Skip to main content

zeph_core/
instructions.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashSet;
5use std::io::Read as _;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9use notify_debouncer_mini::{DebouncedEventKind, new_debouncer};
10use tokio::sync::mpsc;
11
12use crate::config::ProviderKind;
13
14pub enum InstructionEvent {
15    Changed,
16}
17
18pub struct InstructionWatcher {
19    _handle: tokio::task::JoinHandle<()>,
20}
21
22impl InstructionWatcher {
23    /// Start watching directories for instruction file (.md) changes.
24    ///
25    /// Sends `InstructionEvent::Changed` on any `.md` filesystem change (debounced 500ms).
26    ///
27    /// # Errors
28    ///
29    /// Returns an error if the filesystem watcher cannot be initialized.
30    pub fn start(
31        paths: &[PathBuf],
32        tx: mpsc::Sender<InstructionEvent>,
33    ) -> Result<Self, notify::Error> {
34        let (notify_tx, mut notify_rx) = mpsc::channel(16);
35
36        let mut debouncer = new_debouncer(
37            Duration::from_millis(500),
38            move |events: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| {
39                let events = match events {
40                    Ok(events) => events,
41                    Err(e) => {
42                        tracing::warn!("instruction watcher error: {e}");
43                        return;
44                    }
45                };
46
47                let has_md_change = events.iter().any(|e| {
48                    e.kind == DebouncedEventKind::Any
49                        && e.path.extension().is_some_and(|ext| ext == "md")
50                });
51
52                if has_md_change {
53                    let _ = notify_tx.try_send(());
54                }
55            },
56        )?;
57
58        for path in paths {
59            if path.exists()
60                && let Err(e) = debouncer
61                    .watcher()
62                    .watch(path, notify::RecursiveMode::NonRecursive)
63            {
64                tracing::warn!(path = %path.display(), error = %e, "failed to watch instruction path");
65            }
66        }
67
68        tracing::debug!(paths = paths.len(), "starting instruction watcher");
69        let handle = tokio::spawn(async move {
70            let _debouncer = debouncer;
71            while notify_rx.recv().await.is_some() {
72                tracing::debug!("instruction file change detected, signaling reload");
73                if tx.send(InstructionEvent::Changed).await.is_err() {
74                    break;
75                }
76            }
77        });
78
79        Ok(Self { _handle: handle })
80    }
81}
82
83/// Parameters needed to re-run `load_instructions()` on hot-reload.
84pub struct InstructionReloadState {
85    pub base_dir: PathBuf,
86    pub provider_kinds: Vec<ProviderKind>,
87    pub explicit_files: Vec<PathBuf>,
88    pub auto_detect: bool,
89}
90
91/// Maximum size of a single instruction file. Files exceeding this limit are skipped.
92const MAX_FILE_SIZE: u64 = 256 * 1024; // 256 KiB
93
94/// A loaded instruction block from a single file.
95#[derive(Debug, Clone)]
96pub struct InstructionBlock {
97    /// Absolute path of the source file.
98    pub source: PathBuf,
99    /// UTF-8 text content of the file.
100    pub content: String,
101}
102
103/// Load instruction blocks from provider-specific and explicit files.
104///
105/// `base_dir` is resolved as the process working directory at startup via
106/// `std::env::current_dir()`. This matches the directory from which the user
107/// launches `zeph` and is therefore the most natural project root for file
108/// discovery. Non-git projects are fully supported; git root is not used.
109///
110/// Candidate paths are collected in this order:
111/// 1. Always: `base_dir/zeph.md` and `base_dir/.zeph/zeph.md`.
112/// 2. If `auto_detect`, per-provider paths from `detection_paths()` for each kind.
113/// 3. `explicit_files` as provided (trusted — user controls config.toml).
114///
115/// Deduplication uses `fs::canonicalize`. Paths that do not exist are silently
116/// skipped; canonicalize fails on nonexistent paths, so they cannot be deduped
117/// via symlinks against existing paths — this is an acceptable edge case documented here.
118pub fn load_instructions(
119    base_dir: &Path,
120    provider_kinds: &[ProviderKind],
121    explicit_files: &[PathBuf],
122    auto_detect: bool,
123) -> Vec<InstructionBlock> {
124    let canonical_base = match std::fs::canonicalize(base_dir) {
125        Ok(c) => c,
126        Err(e) => {
127            tracing::warn!(path = %base_dir.display(), error = %e, "failed to canonicalize base_dir, skipping all instruction files");
128            return Vec::new();
129        }
130    };
131
132    let mut candidates: Vec<PathBuf> = Vec::new();
133
134    // zeph.md is always checked regardless of provider or auto_detect setting.
135    candidates.push(base_dir.join("zeph.md"));
136    candidates.push(base_dir.join(".zeph").join("zeph.md"));
137
138    if auto_detect {
139        for &kind in provider_kinds {
140            candidates.extend(detection_paths(kind, base_dir));
141        }
142    }
143
144    // Explicit files are trusted (user controls config). Resolve relative to base_dir.
145    for p in explicit_files {
146        if p.is_absolute() {
147            candidates.push(p.clone());
148        } else {
149            candidates.push(base_dir.join(p));
150        }
151    }
152
153    // Deduplicate by canonical path. Only existing paths can be canonicalized.
154    let mut seen: HashSet<PathBuf> = HashSet::new();
155    let mut result: Vec<InstructionBlock> = Vec::new();
156
157    for path in candidates {
158        // Canonicalize first to resolve symlinks before opening — eliminates TOCTOU race.
159        // Nonexistent or unreadable paths are silently skipped.
160        let Ok(canonical) = std::fs::canonicalize(&path) else {
161            continue;
162        };
163
164        if !canonical.starts_with(&canonical_base) {
165            tracing::warn!(path = %canonical.display(), "instruction file escapes project root, skipping");
166            continue;
167        }
168
169        if !seen.insert(canonical.clone()) {
170            // Already loaded this path via a different candidate or symlink.
171            continue;
172        }
173
174        // Open the canonical path after boundary check — no TOCTOU window for symlink swap.
175        let Ok(file) = std::fs::File::open(&canonical) else {
176            continue;
177        };
178
179        let meta = match file.metadata() {
180            Ok(m) => m,
181            Err(e) => {
182                tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file metadata, skipping");
183                continue;
184            }
185        };
186
187        if !meta.is_file() {
188            continue;
189        }
190
191        if meta.len() > MAX_FILE_SIZE {
192            tracing::warn!(
193                path = %path.display(),
194                size = meta.len(),
195                limit = MAX_FILE_SIZE,
196                "instruction file exceeds 256 KiB size limit, skipping"
197            );
198            continue;
199        }
200
201        let mut content = String::new();
202        match std::io::BufReader::new(file).read_to_string(&mut content) {
203            Ok(_) => {}
204            Err(e) => {
205                tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file, skipping");
206                continue;
207            }
208        }
209
210        if content.contains('\0') {
211            tracing::warn!(path = %path.display(), "instruction file contains null bytes, skipping");
212            continue;
213        }
214
215        if content.is_empty() {
216            tracing::debug!(path = %path.display(), "instruction file is empty, skipping");
217            continue;
218        }
219
220        tracing::debug!(path = %path.display(), bytes = content.len(), "loaded instruction file");
221        result.push(InstructionBlock {
222            source: path,
223            content,
224        });
225    }
226
227    result
228}
229
230/// Returns candidate file paths for a given provider.
231///
232/// Uses an exhaustive match — adding a new `ProviderKind` variant will cause
233/// a compile error here, forcing the developer to update the detection table.
234fn detection_paths(kind: ProviderKind, base: &Path) -> Vec<PathBuf> {
235    match kind {
236        ProviderKind::Claude => {
237            let mut paths = vec![
238                base.join("CLAUDE.md"),
239                base.join(".claude").join("CLAUDE.md"),
240            ];
241            // Collect .claude/rules/*.md sorted by name for deterministic order.
242            let rules_dir = base.join(".claude").join("rules");
243            if let Ok(entries) = std::fs::read_dir(&rules_dir) {
244                let mut rule_files: Vec<PathBuf> = entries
245                    .filter_map(std::result::Result::ok)
246                    .map(|e| e.path())
247                    .filter(|p| p.extension().is_some_and(|ext| ext == "md"))
248                    .collect();
249                rule_files.sort();
250                paths.extend(rule_files);
251            }
252            paths
253        }
254        ProviderKind::OpenAi => {
255            vec![base.join("AGENTS.override.md"), base.join("AGENTS.md")]
256        }
257        ProviderKind::Compatible | ProviderKind::Ollama | ProviderKind::Candle => {
258            vec![base.join("AGENTS.md")]
259        }
260        // Router and Orchestrator delegate to their sub-providers; detection
261        // is handled by the caller collecting sub-provider kinds separately.
262        ProviderKind::Router | ProviderKind::Orchestrator => vec![],
263    }
264}
265
266#[cfg(test)]
267mod watcher_tests {
268    use super::*;
269    use tokio::sync::mpsc;
270
271    #[tokio::test]
272    async fn start_with_valid_directory() {
273        let dir = tempfile::tempdir().unwrap();
274        let (tx, _rx) = mpsc::channel(16);
275        let result = InstructionWatcher::start(&[dir.path().to_path_buf()], tx);
276        assert!(result.is_ok());
277    }
278
279    #[tokio::test]
280    async fn start_with_empty_paths() {
281        let (tx, _rx) = mpsc::channel(16);
282        let result = InstructionWatcher::start(&[], tx);
283        assert!(result.is_ok());
284    }
285
286    #[tokio::test]
287    async fn detects_md_file_change() {
288        let dir = tempfile::tempdir().unwrap();
289        let (tx, mut rx) = mpsc::channel(16);
290        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
291
292        let md_path = dir.path().join("zeph.md");
293        std::fs::write(&md_path, "initial").unwrap();
294
295        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
296        std::fs::write(&md_path, "updated").unwrap();
297
298        let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
299        assert!(
300            result.is_ok(),
301            "expected InstructionEvent::Changed within timeout"
302        );
303    }
304
305    #[tokio::test]
306    async fn ignores_non_md_file_change() {
307        let dir = tempfile::tempdir().unwrap();
308        let (tx, mut rx) = mpsc::channel(16);
309        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
310
311        let other_path = dir.path().join("notes.txt");
312        std::fs::write(&other_path, "content").unwrap();
313
314        let result = tokio::time::timeout(std::time::Duration::from_millis(1500), rx.recv()).await;
315        assert!(result.is_err(), "should not receive event for non-.md file");
316    }
317
318    #[tokio::test]
319    async fn detects_md_file_deletion() {
320        let dir = tempfile::tempdir().unwrap();
321        let md_path = dir.path().join("zeph.md");
322        std::fs::write(&md_path, "content").unwrap();
323
324        let (tx, mut rx) = mpsc::channel(16);
325        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
326
327        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
328        std::fs::remove_file(&md_path).unwrap();
329
330        let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
331        assert!(
332            result.is_ok(),
333            "expected InstructionEvent::Changed on .md deletion"
334        );
335    }
336}
337
338#[cfg(test)]
339mod reload_tests {
340    use super::*;
341
342    #[test]
343    fn reload_returns_updated_blocks_when_file_changes() {
344        let dir = tempfile::tempdir().unwrap();
345        let md_path = dir.path().join("zeph.md");
346        std::fs::write(&md_path, "initial content").unwrap();
347
348        let blocks = load_instructions(dir.path(), &[], &[], false);
349        assert_eq!(blocks.len(), 1);
350        assert_eq!(blocks[0].content, "initial content");
351
352        std::fs::write(&md_path, "updated content").unwrap();
353        let blocks2 = load_instructions(dir.path(), &[], &[], false);
354        assert_eq!(blocks2.len(), 1);
355        assert_eq!(blocks2[0].content, "updated content");
356    }
357
358    #[test]
359    fn reload_returns_empty_when_file_deleted() {
360        let dir = tempfile::tempdir().unwrap();
361        let md_path = dir.path().join("zeph.md");
362        std::fs::write(&md_path, "content").unwrap();
363
364        let blocks = load_instructions(dir.path(), &[], &[], false);
365        assert_eq!(blocks.len(), 1);
366
367        std::fs::remove_file(&md_path).unwrap();
368        let blocks2 = load_instructions(dir.path(), &[], &[], false);
369        assert!(
370            blocks2.is_empty(),
371            "deleted file should not be loaded on reload"
372        );
373    }
374}
375
376#[cfg(test)]
377mod tests {
378    use super::*;
379    use std::fs;
380    use tempfile::TempDir;
381
382    fn make_file(dir: &Path, name: &str, content: &str) -> PathBuf {
383        let path = dir.join(name);
384        if let Some(parent) = path.parent() {
385            fs::create_dir_all(parent).unwrap();
386        }
387        fs::write(&path, content).unwrap();
388        path
389    }
390
391    #[test]
392    fn zeph_md_loaded_even_when_auto_detect_disabled() {
393        let dir = TempDir::new().unwrap();
394        make_file(dir.path(), "zeph.md", "some content");
395        let blocks = load_instructions(dir.path(), &[], &[], false);
396        assert_eq!(blocks.len(), 1);
397        assert_eq!(blocks[0].content, "some content");
398    }
399
400    #[test]
401    fn empty_when_no_auto_detect_and_no_explicit_and_no_zeph_md() {
402        let dir = TempDir::new().unwrap();
403        let blocks = load_instructions(dir.path(), &[], &[], false);
404        assert!(blocks.is_empty());
405    }
406
407    #[test]
408    fn finds_zeph_md_in_base_dir() {
409        let dir = TempDir::new().unwrap();
410        make_file(dir.path(), "zeph.md", "zeph instructions");
411        let blocks = load_instructions(dir.path(), &[], &[], true);
412        assert_eq!(blocks.len(), 1);
413        assert_eq!(blocks[0].content, "zeph instructions");
414    }
415
416    #[test]
417    fn finds_dot_zeph_zeph_md() {
418        let dir = TempDir::new().unwrap();
419        make_file(dir.path(), ".zeph/zeph.md", "nested zeph instructions");
420        let blocks = load_instructions(dir.path(), &[], &[], true);
421        assert_eq!(blocks.len(), 1);
422        assert_eq!(blocks[0].content, "nested zeph instructions");
423    }
424
425    #[test]
426    fn detection_paths_claude() {
427        let dir = TempDir::new().unwrap();
428        make_file(dir.path(), "CLAUDE.md", "# Claude");
429        make_file(dir.path(), ".claude/CLAUDE.md", "# Dot Claude");
430        make_file(dir.path(), ".claude/rules/a.md", "rule a");
431        make_file(dir.path(), ".claude/rules/b.md", "rule b");
432
433        let blocks = load_instructions(dir.path(), &[ProviderKind::Claude], &[], true);
434        let sources: Vec<_> = blocks
435            .iter()
436            .map(|b| b.source.file_name().unwrap().to_str().unwrap())
437            .collect();
438        assert!(sources.contains(&"CLAUDE.md"));
439        assert!(sources.contains(&"a.md"));
440        assert!(sources.contains(&"b.md"));
441    }
442
443    #[test]
444    fn detection_paths_openai() {
445        let dir = TempDir::new().unwrap();
446        make_file(dir.path(), "AGENTS.md", "# Agents");
447
448        let paths = detection_paths(ProviderKind::OpenAi, dir.path());
449        assert!(paths.iter().any(|p| p.file_name().unwrap() == "AGENTS.md"));
450        assert!(
451            paths
452                .iter()
453                .any(|p| p.file_name().unwrap() == "AGENTS.override.md")
454        );
455    }
456
457    #[test]
458    fn detection_paths_ollama_and_compatible_and_candle() {
459        let dir = TempDir::new().unwrap();
460        for kind in [
461            ProviderKind::Ollama,
462            ProviderKind::Compatible,
463            ProviderKind::Candle,
464        ] {
465            let paths = detection_paths(kind, dir.path());
466            assert_eq!(paths.len(), 1);
467            assert_eq!(paths[0].file_name().unwrap(), "AGENTS.md");
468        }
469    }
470
471    #[test]
472    fn detection_paths_router_and_orchestrator_empty() {
473        let dir = TempDir::new().unwrap();
474        assert!(detection_paths(ProviderKind::Router, dir.path()).is_empty());
475        assert!(detection_paths(ProviderKind::Orchestrator, dir.path()).is_empty());
476    }
477
478    #[test]
479    fn deduplication_by_canonical_path() {
480        let dir = TempDir::new().unwrap();
481        make_file(dir.path(), "AGENTS.md", "content");
482
483        // Both Ollama and Compatible resolve to AGENTS.md — should appear once.
484        let blocks = load_instructions(
485            dir.path(),
486            &[ProviderKind::Ollama, ProviderKind::Compatible],
487            &[],
488            true,
489        );
490        let agents_count = blocks
491            .iter()
492            .filter(|b| b.source.file_name().unwrap() == "AGENTS.md")
493            .count();
494        assert_eq!(agents_count, 1);
495    }
496
497    #[test]
498    fn skips_files_exceeding_size_limit() {
499        let dir = TempDir::new().unwrap();
500        let path = dir.path().join("big.md");
501        // Write slightly more than 512 KB.
502        let big = vec![b'x'; 513 * 1024];
503        fs::write(&path, &big).unwrap();
504        let blocks = load_instructions(dir.path(), &[], &[path], false);
505        assert!(blocks.is_empty());
506    }
507
508    #[test]
509    fn skips_empty_files() {
510        let dir = TempDir::new().unwrap();
511        make_file(dir.path(), "zeph.md", "");
512        let blocks = load_instructions(dir.path(), &[], &[], true);
513        assert!(blocks.is_empty());
514    }
515
516    #[test]
517    fn nonexistent_paths_are_silently_skipped() {
518        let dir = TempDir::new().unwrap();
519        let nonexistent = dir.path().join("does_not_exist.md");
520        let blocks = load_instructions(dir.path(), &[], &[nonexistent], false);
521        assert!(blocks.is_empty());
522    }
523
524    #[test]
525    fn explicit_relative_path_resolved_against_base_dir() {
526        let dir = TempDir::new().unwrap();
527        make_file(dir.path(), "custom.md", "custom content");
528        let blocks = load_instructions(dir.path(), &[], &[PathBuf::from("custom.md")], false);
529        assert_eq!(blocks.len(), 1);
530        assert_eq!(blocks[0].content, "custom content");
531    }
532
533    #[test]
534    fn invalid_utf8_file_is_skipped() {
535        let dir = TempDir::new().unwrap();
536        let path = dir.path().join("bad.md");
537        // Write bytes that are not valid UTF-8.
538        fs::write(&path, b"\xff\xfe invalid utf8 \x80\x81").unwrap();
539        let blocks = load_instructions(dir.path(), &[], &[path], false);
540        assert!(blocks.is_empty());
541    }
542
543    #[test]
544    fn multiple_providers_union_without_overlap() {
545        let dir = TempDir::new().unwrap();
546        make_file(dir.path(), "CLAUDE.md", "claude content");
547        make_file(dir.path(), "AGENTS.md", "agents content");
548
549        let blocks = load_instructions(
550            dir.path(),
551            &[ProviderKind::Claude, ProviderKind::OpenAi],
552            &[],
553            true,
554        );
555        let names: Vec<_> = blocks
556            .iter()
557            .map(|b| b.source.file_name().unwrap().to_str().unwrap())
558            .collect();
559        assert!(names.contains(&"CLAUDE.md"), "Claude file missing");
560        assert!(names.contains(&"AGENTS.md"), "OpenAI file missing");
561    }
562
563    #[test]
564    fn zeph_md_always_loaded_with_provider_auto_detect() {
565        let dir = TempDir::new().unwrap();
566        make_file(dir.path(), "zeph.md", "zeph rules");
567        // OpenAI provider has no AGENTS.md present, only zeph.md.
568        let blocks = load_instructions(dir.path(), &[ProviderKind::OpenAi], &[], true);
569        assert_eq!(blocks.len(), 1);
570        assert_eq!(blocks[0].content, "zeph rules");
571    }
572
573    #[cfg(unix)]
574    #[test]
575    fn symlink_deduplication() {
576        use std::os::unix::fs::symlink;
577        let dir = TempDir::new().unwrap();
578        make_file(dir.path(), "CLAUDE.md", "claude content");
579        symlink(
580            dir.path().join("CLAUDE.md"),
581            dir.path().join("CLAUDE_link.md"),
582        )
583        .unwrap();
584
585        // Load the original and the symlink — should appear only once after dedup.
586        let blocks = load_instructions(
587            dir.path(),
588            &[ProviderKind::Claude],
589            &[PathBuf::from("CLAUDE_link.md")],
590            true,
591        );
592        let claude_count = blocks
593            .iter()
594            .filter(|b| b.content == "claude content")
595            .count();
596        assert_eq!(claude_count, 1, "symlink should be deduped with original");
597    }
598
599    #[cfg(unix)]
600    #[test]
601    fn symlink_escaping_project_root_is_rejected() {
602        use std::os::unix::fs::symlink;
603        let outside = TempDir::new().unwrap();
604        let inside = TempDir::new().unwrap();
605        make_file(outside.path(), "secret.md", "secret content");
606
607        // Create a symlink inside the project dir pointing outside.
608        let link = inside.path().join("evil.md");
609        symlink(outside.path().join("secret.md"), &link).unwrap();
610
611        let blocks = load_instructions(inside.path(), &[], &[link], false);
612        assert!(
613            blocks.is_empty(),
614            "file escaping project root must be rejected"
615        );
616    }
617
618    #[test]
619    fn file_with_null_bytes_is_skipped() {
620        let dir = TempDir::new().unwrap();
621        let path = dir.path().join("null.md");
622        fs::write(&path, b"content\x00more").unwrap();
623        let blocks = load_instructions(dir.path(), &[], &[path], false);
624        assert!(blocks.is_empty());
625    }
626}