Skip to main content

zeph_core/
instructions.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashSet;
5use std::io::Read as _;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9use notify_debouncer_mini::{DebouncedEventKind, new_debouncer};
10use tokio::sync::mpsc;
11
12use crate::config::ProviderKind;
13
14pub enum InstructionEvent {
15    Changed,
16}
17
18pub struct InstructionWatcher {
19    _handle: tokio::task::JoinHandle<()>,
20}
21
22impl InstructionWatcher {
23    /// Start watching directories for instruction file (.md) changes.
24    ///
25    /// Sends `InstructionEvent::Changed` on any `.md` filesystem change (debounced 500ms).
26    ///
27    /// # Errors
28    ///
29    /// Returns an error if the filesystem watcher cannot be initialized.
30    pub fn start(
31        paths: &[PathBuf],
32        tx: mpsc::Sender<InstructionEvent>,
33    ) -> Result<Self, notify::Error> {
34        let (notify_tx, mut notify_rx) = mpsc::channel(16);
35
36        let mut debouncer = new_debouncer(
37            Duration::from_millis(500),
38            move |events: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| {
39                let events = match events {
40                    Ok(events) => events,
41                    Err(e) => {
42                        tracing::warn!("instruction watcher error: {e}");
43                        return;
44                    }
45                };
46
47                let has_md_change = events.iter().any(|e| {
48                    e.kind == DebouncedEventKind::Any
49                        && e.path.extension().is_some_and(|ext| ext == "md")
50                });
51
52                if has_md_change {
53                    let _ = notify_tx.try_send(());
54                }
55            },
56        )?;
57
58        for path in paths {
59            if path.exists()
60                && let Err(e) = debouncer
61                    .watcher()
62                    .watch(path, notify::RecursiveMode::NonRecursive)
63            {
64                tracing::warn!(path = %path.display(), error = %e, "failed to watch instruction path");
65            }
66        }
67
68        tracing::debug!(paths = paths.len(), "starting instruction watcher");
69        let handle = tokio::spawn(async move {
70            let _debouncer = debouncer;
71            while notify_rx.recv().await.is_some() {
72                tracing::debug!("instruction file change detected, signaling reload");
73                if tx.send(InstructionEvent::Changed).await.is_err() {
74                    break;
75                }
76            }
77        });
78
79        Ok(Self { _handle: handle })
80    }
81}
82
83/// Parameters needed to re-run `load_instructions()` on hot-reload.
84pub struct InstructionReloadState {
85    pub base_dir: PathBuf,
86    pub provider_kinds: Vec<ProviderKind>,
87    pub explicit_files: Vec<PathBuf>,
88    pub auto_detect: bool,
89}
90
91/// Maximum size of a single instruction file. Files exceeding this limit are skipped.
92const MAX_FILE_SIZE: u64 = 256 * 1024; // 256 KiB
93
94/// A loaded instruction block from a single file.
95#[derive(Debug, Clone)]
96pub struct InstructionBlock {
97    /// Absolute path of the source file.
98    pub source: PathBuf,
99    /// UTF-8 text content of the file.
100    pub content: String,
101}
102
103/// Load instruction blocks from provider-specific and explicit files.
104///
105/// `base_dir` is resolved as the process working directory at startup via
106/// `std::env::current_dir()`. This matches the directory from which the user
107/// launches `zeph` and is therefore the most natural project root for file
108/// discovery. Non-git projects are fully supported; git root is not used.
109///
110/// Candidate paths are collected in this order:
111/// 1. Always: `base_dir/zeph.md` and `base_dir/.zeph/zeph.md`.
112/// 2. If `auto_detect`, per-provider paths from `detection_paths()` for each kind.
113/// 3. `explicit_files` as provided (trusted — user controls config.toml).
114///
115/// Deduplication uses `fs::canonicalize`. Paths that do not exist are silently
116/// skipped; canonicalize fails on nonexistent paths, so they cannot be deduped
117/// via symlinks against existing paths — this is an acceptable edge case documented here.
118pub fn load_instructions(
119    base_dir: &Path,
120    provider_kinds: &[ProviderKind],
121    explicit_files: &[PathBuf],
122    auto_detect: bool,
123) -> Vec<InstructionBlock> {
124    let canonical_base = match std::fs::canonicalize(base_dir) {
125        Ok(c) => c,
126        Err(e) => {
127            tracing::warn!(path = %base_dir.display(), error = %e, "failed to canonicalize base_dir, skipping all instruction files");
128            return Vec::new();
129        }
130    };
131
132    let mut candidates: Vec<PathBuf> = Vec::new();
133
134    // zeph.md is always checked regardless of provider or auto_detect setting.
135    candidates.push(base_dir.join("zeph.md"));
136    candidates.push(base_dir.join(".zeph").join("zeph.md"));
137
138    if auto_detect {
139        for &kind in provider_kinds {
140            candidates.extend(detection_paths(kind, base_dir));
141        }
142    }
143
144    // Explicit files are trusted (user controls config). Resolve relative to base_dir.
145    for p in explicit_files {
146        if p.is_absolute() {
147            candidates.push(p.clone());
148        } else {
149            candidates.push(base_dir.join(p));
150        }
151    }
152
153    // Deduplicate by canonical path. Only existing paths can be canonicalized.
154    let mut seen: HashSet<PathBuf> = HashSet::new();
155    let mut result: Vec<InstructionBlock> = Vec::new();
156
157    for path in candidates {
158        // Canonicalize first to resolve symlinks before opening — eliminates TOCTOU race.
159        // Nonexistent or unreadable paths are silently skipped.
160        let Ok(canonical) = std::fs::canonicalize(&path) else {
161            continue;
162        };
163
164        if !canonical.starts_with(&canonical_base) {
165            tracing::warn!(path = %canonical.display(), "instruction file escapes project root, skipping");
166            continue;
167        }
168
169        if !seen.insert(canonical.clone()) {
170            // Already loaded this path via a different candidate or symlink.
171            continue;
172        }
173
174        // Open the canonical path after boundary check — no TOCTOU window for symlink swap.
175        let Ok(file) = std::fs::File::open(&canonical) else {
176            continue;
177        };
178
179        let meta = match file.metadata() {
180            Ok(m) => m,
181            Err(e) => {
182                tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file metadata, skipping");
183                continue;
184            }
185        };
186
187        if !meta.is_file() {
188            continue;
189        }
190
191        if meta.len() > MAX_FILE_SIZE {
192            tracing::warn!(
193                path = %path.display(),
194                size = meta.len(),
195                limit = MAX_FILE_SIZE,
196                "instruction file exceeds 256 KiB size limit, skipping"
197            );
198            continue;
199        }
200
201        let mut content = String::new();
202        match std::io::BufReader::new(file).read_to_string(&mut content) {
203            Ok(_) => {}
204            Err(e) => {
205                tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file, skipping");
206                continue;
207            }
208        }
209
210        if content.contains('\0') {
211            tracing::warn!(path = %path.display(), "instruction file contains null bytes, skipping");
212            continue;
213        }
214
215        if content.is_empty() {
216            tracing::debug!(path = %path.display(), "instruction file is empty, skipping");
217            continue;
218        }
219
220        tracing::debug!(path = %path.display(), bytes = content.len(), "loaded instruction file");
221        result.push(InstructionBlock {
222            source: path,
223            content,
224        });
225    }
226
227    result
228}
229
230/// Returns candidate file paths for a given provider.
231///
232/// Uses an exhaustive match — adding a new `ProviderKind` variant will cause
233/// a compile error here, forcing the developer to update the detection table.
234fn detection_paths(kind: ProviderKind, base: &Path) -> Vec<PathBuf> {
235    match kind {
236        ProviderKind::Claude => {
237            let mut paths = vec![
238                base.join("CLAUDE.md"),
239                base.join(".claude").join("CLAUDE.md"),
240            ];
241            // Collect .claude/rules/*.md sorted by name for deterministic order.
242            let rules_dir = base.join(".claude").join("rules");
243            if let Ok(entries) = std::fs::read_dir(&rules_dir) {
244                let mut rule_files: Vec<PathBuf> = entries
245                    .filter_map(std::result::Result::ok)
246                    .map(|e| e.path())
247                    .filter(|p| p.extension().is_some_and(|ext| ext == "md"))
248                    .collect();
249                rule_files.sort();
250                paths.extend(rule_files);
251            }
252            paths
253        }
254        ProviderKind::OpenAi => {
255            vec![base.join("AGENTS.override.md"), base.join("AGENTS.md")]
256        }
257        ProviderKind::Compatible
258        | ProviderKind::Ollama
259        | ProviderKind::Candle
260        | ProviderKind::Gemini
261        | ProviderKind::Gonka
262        | ProviderKind::Cocoon => {
263            vec![base.join("AGENTS.md")]
264        }
265    }
266}
267
268#[cfg(test)]
269mod watcher_tests {
270    use super::*;
271    use tokio::sync::mpsc;
272
273    #[tokio::test]
274    async fn start_with_valid_directory() {
275        let dir = tempfile::tempdir().unwrap();
276        let (tx, _rx) = mpsc::channel(16);
277        let result = InstructionWatcher::start(&[dir.path().to_path_buf()], tx);
278        assert!(result.is_ok());
279    }
280
281    #[tokio::test]
282    async fn start_with_empty_paths() {
283        let (tx, _rx) = mpsc::channel(16);
284        let result = InstructionWatcher::start(&[], tx);
285        assert!(result.is_ok());
286    }
287
288    #[tokio::test]
289    async fn detects_md_file_change() {
290        let dir = tempfile::tempdir().unwrap();
291        let (tx, mut rx) = mpsc::channel(16);
292        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
293
294        let md_path = dir.path().join("zeph.md");
295        std::fs::write(&md_path, "initial").unwrap();
296
297        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
298        std::fs::write(&md_path, "updated").unwrap();
299
300        let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
301        assert!(
302            result.is_ok(),
303            "expected InstructionEvent::Changed within timeout"
304        );
305    }
306
307    #[tokio::test]
308    async fn ignores_non_md_file_change() {
309        let dir = tempfile::tempdir().unwrap();
310        let (tx, mut rx) = mpsc::channel(16);
311        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
312
313        let other_path = dir.path().join("notes.txt");
314        std::fs::write(&other_path, "content").unwrap();
315
316        let result = tokio::time::timeout(std::time::Duration::from_millis(1500), rx.recv()).await;
317        assert!(result.is_err(), "should not receive event for non-.md file");
318    }
319
320    #[tokio::test]
321    async fn detects_md_file_deletion() {
322        let dir = tempfile::tempdir().unwrap();
323        let md_path = dir.path().join("zeph.md");
324        std::fs::write(&md_path, "content").unwrap();
325
326        let (tx, mut rx) = mpsc::channel(16);
327        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
328
329        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
330        std::fs::remove_file(&md_path).unwrap();
331
332        let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
333        assert!(
334            result.is_ok(),
335            "expected InstructionEvent::Changed on .md deletion"
336        );
337    }
338}
339
340#[cfg(test)]
341mod reload_tests {
342    use super::*;
343
344    #[test]
345    fn reload_returns_updated_blocks_when_file_changes() {
346        let dir = tempfile::tempdir().unwrap();
347        let md_path = dir.path().join("zeph.md");
348        std::fs::write(&md_path, "initial content").unwrap();
349
350        let blocks = load_instructions(dir.path(), &[], &[], false);
351        assert_eq!(blocks.len(), 1);
352        assert_eq!(blocks[0].content, "initial content");
353
354        std::fs::write(&md_path, "updated content").unwrap();
355        let blocks2 = load_instructions(dir.path(), &[], &[], false);
356        assert_eq!(blocks2.len(), 1);
357        assert_eq!(blocks2[0].content, "updated content");
358    }
359
360    #[test]
361    fn reload_returns_empty_when_file_deleted() {
362        let dir = tempfile::tempdir().unwrap();
363        let md_path = dir.path().join("zeph.md");
364        std::fs::write(&md_path, "content").unwrap();
365
366        let blocks = load_instructions(dir.path(), &[], &[], false);
367        assert_eq!(blocks.len(), 1);
368
369        std::fs::remove_file(&md_path).unwrap();
370        let blocks2 = load_instructions(dir.path(), &[], &[], false);
371        assert!(
372            blocks2.is_empty(),
373            "deleted file should not be loaded on reload"
374        );
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    use std::fs;
382    use tempfile::TempDir;
383
384    fn make_file(dir: &Path, name: &str, content: &str) -> PathBuf {
385        let path = dir.join(name);
386        if let Some(parent) = path.parent() {
387            fs::create_dir_all(parent).unwrap();
388        }
389        fs::write(&path, content).unwrap();
390        path
391    }
392
393    #[test]
394    fn zeph_md_loaded_even_when_auto_detect_disabled() {
395        let dir = TempDir::new().unwrap();
396        make_file(dir.path(), "zeph.md", "some content");
397        let blocks = load_instructions(dir.path(), &[], &[], false);
398        assert_eq!(blocks.len(), 1);
399        assert_eq!(blocks[0].content, "some content");
400    }
401
402    #[test]
403    fn empty_when_no_auto_detect_and_no_explicit_and_no_zeph_md() {
404        let dir = TempDir::new().unwrap();
405        let blocks = load_instructions(dir.path(), &[], &[], false);
406        assert!(blocks.is_empty());
407    }
408
409    #[test]
410    fn finds_zeph_md_in_base_dir() {
411        let dir = TempDir::new().unwrap();
412        make_file(dir.path(), "zeph.md", "zeph instructions");
413        let blocks = load_instructions(dir.path(), &[], &[], true);
414        assert_eq!(blocks.len(), 1);
415        assert_eq!(blocks[0].content, "zeph instructions");
416    }
417
418    #[test]
419    fn finds_dot_zeph_zeph_md() {
420        let dir = TempDir::new().unwrap();
421        make_file(dir.path(), ".zeph/zeph.md", "nested zeph instructions");
422        let blocks = load_instructions(dir.path(), &[], &[], true);
423        assert_eq!(blocks.len(), 1);
424        assert_eq!(blocks[0].content, "nested zeph instructions");
425    }
426
427    #[test]
428    fn detection_paths_claude() {
429        let dir = TempDir::new().unwrap();
430        make_file(dir.path(), "CLAUDE.md", "# Claude");
431        make_file(dir.path(), ".claude/CLAUDE.md", "# Dot Claude");
432        make_file(dir.path(), ".claude/rules/a.md", "rule a");
433        make_file(dir.path(), ".claude/rules/b.md", "rule b");
434
435        let blocks = load_instructions(dir.path(), &[ProviderKind::Claude], &[], true);
436        let sources: Vec<_> = blocks
437            .iter()
438            .map(|b| b.source.file_name().unwrap().to_str().unwrap())
439            .collect();
440        assert!(sources.contains(&"CLAUDE.md"));
441        assert!(sources.contains(&"a.md"));
442        assert!(sources.contains(&"b.md"));
443    }
444
445    #[test]
446    fn detection_paths_openai() {
447        let dir = TempDir::new().unwrap();
448        make_file(dir.path(), "AGENTS.md", "# Agents");
449
450        let paths = detection_paths(ProviderKind::OpenAi, dir.path());
451        assert!(paths.iter().any(|p| p.file_name().unwrap() == "AGENTS.md"));
452        assert!(
453            paths
454                .iter()
455                .any(|p| p.file_name().unwrap() == "AGENTS.override.md")
456        );
457    }
458
459    #[test]
460    fn detection_paths_ollama_and_compatible_and_candle() {
461        let dir = TempDir::new().unwrap();
462        for kind in [
463            ProviderKind::Ollama,
464            ProviderKind::Compatible,
465            ProviderKind::Candle,
466        ] {
467            let paths = detection_paths(kind, dir.path());
468            assert_eq!(paths.len(), 1);
469            assert_eq!(paths[0].file_name().unwrap(), "AGENTS.md");
470        }
471    }
472
473    #[test]
474    fn deduplication_by_canonical_path() {
475        let dir = TempDir::new().unwrap();
476        make_file(dir.path(), "AGENTS.md", "content");
477
478        // Both Ollama and Compatible resolve to AGENTS.md — should appear once.
479        let blocks = load_instructions(
480            dir.path(),
481            &[ProviderKind::Ollama, ProviderKind::Compatible],
482            &[],
483            true,
484        );
485        let agents_count = blocks
486            .iter()
487            .filter(|b| b.source.file_name().unwrap() == "AGENTS.md")
488            .count();
489        assert_eq!(agents_count, 1);
490    }
491
492    #[test]
493    fn skips_files_exceeding_size_limit() {
494        let dir = TempDir::new().unwrap();
495        let path = dir.path().join("big.md");
496        // Write slightly more than 512 KB.
497        let big = vec![b'x'; 513 * 1024];
498        fs::write(&path, &big).unwrap();
499        let blocks = load_instructions(dir.path(), &[], &[path], false);
500        assert!(blocks.is_empty());
501    }
502
503    #[test]
504    fn skips_empty_files() {
505        let dir = TempDir::new().unwrap();
506        make_file(dir.path(), "zeph.md", "");
507        let blocks = load_instructions(dir.path(), &[], &[], true);
508        assert!(blocks.is_empty());
509    }
510
511    #[test]
512    fn nonexistent_paths_are_silently_skipped() {
513        let dir = TempDir::new().unwrap();
514        let nonexistent = dir.path().join("does_not_exist.md");
515        let blocks = load_instructions(dir.path(), &[], &[nonexistent], false);
516        assert!(blocks.is_empty());
517    }
518
519    #[test]
520    fn explicit_relative_path_resolved_against_base_dir() {
521        let dir = TempDir::new().unwrap();
522        make_file(dir.path(), "custom.md", "custom content");
523        let blocks = load_instructions(dir.path(), &[], &[PathBuf::from("custom.md")], false);
524        assert_eq!(blocks.len(), 1);
525        assert_eq!(blocks[0].content, "custom content");
526    }
527
528    #[test]
529    fn invalid_utf8_file_is_skipped() {
530        let dir = TempDir::new().unwrap();
531        let path = dir.path().join("bad.md");
532        // Write bytes that are not valid UTF-8.
533        fs::write(&path, b"\xff\xfe invalid utf8 \x80\x81").unwrap();
534        let blocks = load_instructions(dir.path(), &[], &[path], false);
535        assert!(blocks.is_empty());
536    }
537
538    #[test]
539    fn multiple_providers_union_without_overlap() {
540        let dir = TempDir::new().unwrap();
541        make_file(dir.path(), "CLAUDE.md", "claude content");
542        make_file(dir.path(), "AGENTS.md", "agents content");
543
544        let blocks = load_instructions(
545            dir.path(),
546            &[ProviderKind::Claude, ProviderKind::OpenAi],
547            &[],
548            true,
549        );
550        let names: Vec<_> = blocks
551            .iter()
552            .map(|b| b.source.file_name().unwrap().to_str().unwrap())
553            .collect();
554        assert!(names.contains(&"CLAUDE.md"), "Claude file missing");
555        assert!(names.contains(&"AGENTS.md"), "OpenAI file missing");
556    }
557
558    #[test]
559    fn zeph_md_always_loaded_with_provider_auto_detect() {
560        let dir = TempDir::new().unwrap();
561        make_file(dir.path(), "zeph.md", "zeph rules");
562        // OpenAI provider has no AGENTS.md present, only zeph.md.
563        let blocks = load_instructions(dir.path(), &[ProviderKind::OpenAi], &[], true);
564        assert_eq!(blocks.len(), 1);
565        assert_eq!(blocks[0].content, "zeph rules");
566    }
567
568    #[cfg(unix)]
569    #[test]
570    fn symlink_deduplication() {
571        use std::os::unix::fs::symlink;
572        let dir = TempDir::new().unwrap();
573        make_file(dir.path(), "CLAUDE.md", "claude content");
574        symlink(
575            dir.path().join("CLAUDE.md"),
576            dir.path().join("CLAUDE_link.md"),
577        )
578        .unwrap();
579
580        // Load the original and the symlink — should appear only once after dedup.
581        let blocks = load_instructions(
582            dir.path(),
583            &[ProviderKind::Claude],
584            &[PathBuf::from("CLAUDE_link.md")],
585            true,
586        );
587        let claude_count = blocks
588            .iter()
589            .filter(|b| b.content == "claude content")
590            .count();
591        assert_eq!(claude_count, 1, "symlink should be deduped with original");
592    }
593
594    #[cfg(unix)]
595    #[test]
596    fn symlink_escaping_project_root_is_rejected() {
597        use std::os::unix::fs::symlink;
598        let outside = TempDir::new().unwrap();
599        let inside = TempDir::new().unwrap();
600        make_file(outside.path(), "secret.md", "secret content");
601
602        // Create a symlink inside the project dir pointing outside.
603        let link = inside.path().join("evil.md");
604        symlink(outside.path().join("secret.md"), &link).unwrap();
605
606        let blocks = load_instructions(inside.path(), &[], &[link], false);
607        assert!(
608            blocks.is_empty(),
609            "file escaping project root must be rejected"
610        );
611    }
612
613    #[test]
614    fn file_with_null_bytes_is_skipped() {
615        let dir = TempDir::new().unwrap();
616        let path = dir.path().join("null.md");
617        fs::write(&path, b"content\x00more").unwrap();
618        let blocks = load_instructions(dir.path(), &[], &[path], false);
619        assert!(blocks.is_empty());
620    }
621}