Skip to main content

zeph_core/
instructions.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::HashSet;
5use std::io::Read as _;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9use notify_debouncer_mini::{DebouncedEventKind, new_debouncer};
10use tokio::sync::mpsc;
11
12use crate::config::ProviderKind;
13
14pub enum InstructionEvent {
15    Changed,
16}
17
18pub struct InstructionWatcher {
19    _handle: tokio::task::JoinHandle<()>,
20}
21
22impl InstructionWatcher {
23    /// Start watching directories for instruction file (.md) changes.
24    ///
25    /// Sends `InstructionEvent::Changed` on any `.md` filesystem change (debounced 500ms).
26    ///
27    /// # Errors
28    ///
29    /// Returns an error if the filesystem watcher cannot be initialized.
30    pub fn start(
31        paths: &[PathBuf],
32        tx: mpsc::Sender<InstructionEvent>,
33    ) -> Result<Self, notify::Error> {
34        let (notify_tx, mut notify_rx) = mpsc::channel(16);
35
36        let mut debouncer = new_debouncer(
37            Duration::from_millis(500),
38            move |events: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| {
39                let events = match events {
40                    Ok(events) => events,
41                    Err(e) => {
42                        tracing::warn!("instruction watcher error: {e}");
43                        return;
44                    }
45                };
46
47                let has_md_change = events.iter().any(|e| {
48                    e.kind == DebouncedEventKind::Any
49                        && e.path.extension().is_some_and(|ext| ext == "md")
50                });
51
52                if has_md_change {
53                    let _ = notify_tx.try_send(());
54                }
55            },
56        )?;
57
58        for path in paths {
59            if path.exists()
60                && let Err(e) = debouncer
61                    .watcher()
62                    .watch(path, notify::RecursiveMode::NonRecursive)
63            {
64                tracing::warn!(path = %path.display(), error = %e, "failed to watch instruction path");
65            }
66        }
67
68        tracing::debug!(paths = paths.len(), "starting instruction watcher");
69        let handle = tokio::spawn(async move {
70            let _debouncer = debouncer;
71            while notify_rx.recv().await.is_some() {
72                tracing::debug!("instruction file change detected, signaling reload");
73                if tx.send(InstructionEvent::Changed).await.is_err() {
74                    break;
75                }
76            }
77        });
78
79        Ok(Self { _handle: handle })
80    }
81}
82
83/// Parameters needed to re-run `load_instructions()` on hot-reload.
84pub struct InstructionReloadState {
85    pub base_dir: PathBuf,
86    pub provider_kinds: Vec<ProviderKind>,
87    pub explicit_files: Vec<PathBuf>,
88    pub auto_detect: bool,
89}
90
91/// Maximum size of a single instruction file. Files exceeding this limit are skipped.
92const MAX_FILE_SIZE: u64 = 256 * 1024; // 256 KiB
93
94/// A loaded instruction block from a single file.
95#[derive(Debug, Clone)]
96pub struct InstructionBlock {
97    /// Absolute path of the source file.
98    pub source: PathBuf,
99    /// UTF-8 text content of the file.
100    pub content: String,
101}
102
103/// Load instruction blocks from provider-specific and explicit files.
104///
105/// `base_dir` is resolved as the process working directory at startup via
106/// `std::env::current_dir()`. This matches the directory from which the user
107/// launches `zeph` and is therefore the most natural project root for file
108/// discovery. Non-git projects are fully supported; git root is not used.
109///
110/// Candidate paths are collected in this order:
111/// 1. Always: `base_dir/zeph.md` and `base_dir/.zeph/zeph.md`.
112/// 2. If `auto_detect`, per-provider paths from `detection_paths()` for each kind.
113/// 3. `explicit_files` as provided (trusted — user controls config.toml).
114///
115/// Deduplication uses `fs::canonicalize`. Paths that do not exist are silently
116/// skipped; canonicalize fails on nonexistent paths, so they cannot be deduped
117/// via symlinks against existing paths — this is an acceptable edge case documented here.
118pub fn load_instructions(
119    base_dir: &Path,
120    provider_kinds: &[ProviderKind],
121    explicit_files: &[PathBuf],
122    auto_detect: bool,
123) -> Vec<InstructionBlock> {
124    let canonical_base = match std::fs::canonicalize(base_dir) {
125        Ok(c) => c,
126        Err(e) => {
127            tracing::warn!(path = %base_dir.display(), error = %e, "failed to canonicalize base_dir, skipping all instruction files");
128            return Vec::new();
129        }
130    };
131
132    let mut candidates: Vec<PathBuf> = Vec::new();
133
134    // zeph.md is always checked regardless of provider or auto_detect setting.
135    candidates.push(base_dir.join("zeph.md"));
136    candidates.push(base_dir.join(".zeph").join("zeph.md"));
137
138    if auto_detect {
139        for &kind in provider_kinds {
140            candidates.extend(detection_paths(kind, base_dir));
141        }
142    }
143
144    // Explicit files are trusted (user controls config). Resolve relative to base_dir.
145    for p in explicit_files {
146        if p.is_absolute() {
147            candidates.push(p.clone());
148        } else {
149            candidates.push(base_dir.join(p));
150        }
151    }
152
153    // Deduplicate by canonical path. Only existing paths can be canonicalized.
154    let mut seen: HashSet<PathBuf> = HashSet::new();
155    let mut result: Vec<InstructionBlock> = Vec::new();
156
157    for path in candidates {
158        // Canonicalize first to resolve symlinks before opening — eliminates TOCTOU race.
159        // Nonexistent or unreadable paths are silently skipped.
160        let Ok(canonical) = std::fs::canonicalize(&path) else {
161            continue;
162        };
163
164        if !canonical.starts_with(&canonical_base) {
165            tracing::warn!(path = %canonical.display(), "instruction file escapes project root, skipping");
166            continue;
167        }
168
169        if !seen.insert(canonical.clone()) {
170            // Already loaded this path via a different candidate or symlink.
171            continue;
172        }
173
174        // Open the canonical path after boundary check — no TOCTOU window for symlink swap.
175        let Ok(file) = std::fs::File::open(&canonical) else {
176            continue;
177        };
178
179        let meta = match file.metadata() {
180            Ok(m) => m,
181            Err(e) => {
182                tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file metadata, skipping");
183                continue;
184            }
185        };
186
187        if !meta.is_file() {
188            continue;
189        }
190
191        if meta.len() > MAX_FILE_SIZE {
192            tracing::warn!(
193                path = %path.display(),
194                size = meta.len(),
195                limit = MAX_FILE_SIZE,
196                "instruction file exceeds 256 KiB size limit, skipping"
197            );
198            continue;
199        }
200
201        let mut content = String::new();
202        match std::io::BufReader::new(file).read_to_string(&mut content) {
203            Ok(_) => {}
204            Err(e) => {
205                tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file, skipping");
206                continue;
207            }
208        }
209
210        if content.contains('\0') {
211            tracing::warn!(path = %path.display(), "instruction file contains null bytes, skipping");
212            continue;
213        }
214
215        if content.is_empty() {
216            tracing::debug!(path = %path.display(), "instruction file is empty, skipping");
217            continue;
218        }
219
220        tracing::debug!(path = %path.display(), bytes = content.len(), "loaded instruction file");
221        result.push(InstructionBlock {
222            source: path,
223            content,
224        });
225    }
226
227    result
228}
229
230/// Returns candidate file paths for a given provider.
231///
232/// Uses an exhaustive match — adding a new `ProviderKind` variant will cause
233/// a compile error here, forcing the developer to update the detection table.
234fn detection_paths(kind: ProviderKind, base: &Path) -> Vec<PathBuf> {
235    match kind {
236        ProviderKind::Claude => {
237            let mut paths = vec![
238                base.join("CLAUDE.md"),
239                base.join(".claude").join("CLAUDE.md"),
240            ];
241            // Collect .claude/rules/*.md sorted by name for deterministic order.
242            let rules_dir = base.join(".claude").join("rules");
243            if let Ok(entries) = std::fs::read_dir(&rules_dir) {
244                let mut rule_files: Vec<PathBuf> = entries
245                    .filter_map(std::result::Result::ok)
246                    .map(|e| e.path())
247                    .filter(|p| p.extension().is_some_and(|ext| ext == "md"))
248                    .collect();
249                rule_files.sort();
250                paths.extend(rule_files);
251            }
252            paths
253        }
254        ProviderKind::OpenAi => {
255            vec![base.join("AGENTS.override.md"), base.join("AGENTS.md")]
256        }
257        ProviderKind::Compatible
258        | ProviderKind::Ollama
259        | ProviderKind::Candle
260        | ProviderKind::Gemini
261        | ProviderKind::Gonka => {
262            vec![base.join("AGENTS.md")]
263        }
264    }
265}
266
267#[cfg(test)]
268mod watcher_tests {
269    use super::*;
270    use tokio::sync::mpsc;
271
272    #[tokio::test]
273    async fn start_with_valid_directory() {
274        let dir = tempfile::tempdir().unwrap();
275        let (tx, _rx) = mpsc::channel(16);
276        let result = InstructionWatcher::start(&[dir.path().to_path_buf()], tx);
277        assert!(result.is_ok());
278    }
279
280    #[tokio::test]
281    async fn start_with_empty_paths() {
282        let (tx, _rx) = mpsc::channel(16);
283        let result = InstructionWatcher::start(&[], tx);
284        assert!(result.is_ok());
285    }
286
287    #[tokio::test]
288    async fn detects_md_file_change() {
289        let dir = tempfile::tempdir().unwrap();
290        let (tx, mut rx) = mpsc::channel(16);
291        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
292
293        let md_path = dir.path().join("zeph.md");
294        std::fs::write(&md_path, "initial").unwrap();
295
296        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
297        std::fs::write(&md_path, "updated").unwrap();
298
299        let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
300        assert!(
301            result.is_ok(),
302            "expected InstructionEvent::Changed within timeout"
303        );
304    }
305
306    #[tokio::test]
307    async fn ignores_non_md_file_change() {
308        let dir = tempfile::tempdir().unwrap();
309        let (tx, mut rx) = mpsc::channel(16);
310        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
311
312        let other_path = dir.path().join("notes.txt");
313        std::fs::write(&other_path, "content").unwrap();
314
315        let result = tokio::time::timeout(std::time::Duration::from_millis(1500), rx.recv()).await;
316        assert!(result.is_err(), "should not receive event for non-.md file");
317    }
318
319    #[tokio::test]
320    async fn detects_md_file_deletion() {
321        let dir = tempfile::tempdir().unwrap();
322        let md_path = dir.path().join("zeph.md");
323        std::fs::write(&md_path, "content").unwrap();
324
325        let (tx, mut rx) = mpsc::channel(16);
326        let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
327
328        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
329        std::fs::remove_file(&md_path).unwrap();
330
331        let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
332        assert!(
333            result.is_ok(),
334            "expected InstructionEvent::Changed on .md deletion"
335        );
336    }
337}
338
339#[cfg(test)]
340mod reload_tests {
341    use super::*;
342
343    #[test]
344    fn reload_returns_updated_blocks_when_file_changes() {
345        let dir = tempfile::tempdir().unwrap();
346        let md_path = dir.path().join("zeph.md");
347        std::fs::write(&md_path, "initial content").unwrap();
348
349        let blocks = load_instructions(dir.path(), &[], &[], false);
350        assert_eq!(blocks.len(), 1);
351        assert_eq!(blocks[0].content, "initial content");
352
353        std::fs::write(&md_path, "updated content").unwrap();
354        let blocks2 = load_instructions(dir.path(), &[], &[], false);
355        assert_eq!(blocks2.len(), 1);
356        assert_eq!(blocks2[0].content, "updated content");
357    }
358
359    #[test]
360    fn reload_returns_empty_when_file_deleted() {
361        let dir = tempfile::tempdir().unwrap();
362        let md_path = dir.path().join("zeph.md");
363        std::fs::write(&md_path, "content").unwrap();
364
365        let blocks = load_instructions(dir.path(), &[], &[], false);
366        assert_eq!(blocks.len(), 1);
367
368        std::fs::remove_file(&md_path).unwrap();
369        let blocks2 = load_instructions(dir.path(), &[], &[], false);
370        assert!(
371            blocks2.is_empty(),
372            "deleted file should not be loaded on reload"
373        );
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380    use std::fs;
381    use tempfile::TempDir;
382
383    fn make_file(dir: &Path, name: &str, content: &str) -> PathBuf {
384        let path = dir.join(name);
385        if let Some(parent) = path.parent() {
386            fs::create_dir_all(parent).unwrap();
387        }
388        fs::write(&path, content).unwrap();
389        path
390    }
391
392    #[test]
393    fn zeph_md_loaded_even_when_auto_detect_disabled() {
394        let dir = TempDir::new().unwrap();
395        make_file(dir.path(), "zeph.md", "some content");
396        let blocks = load_instructions(dir.path(), &[], &[], false);
397        assert_eq!(blocks.len(), 1);
398        assert_eq!(blocks[0].content, "some content");
399    }
400
401    #[test]
402    fn empty_when_no_auto_detect_and_no_explicit_and_no_zeph_md() {
403        let dir = TempDir::new().unwrap();
404        let blocks = load_instructions(dir.path(), &[], &[], false);
405        assert!(blocks.is_empty());
406    }
407
408    #[test]
409    fn finds_zeph_md_in_base_dir() {
410        let dir = TempDir::new().unwrap();
411        make_file(dir.path(), "zeph.md", "zeph instructions");
412        let blocks = load_instructions(dir.path(), &[], &[], true);
413        assert_eq!(blocks.len(), 1);
414        assert_eq!(blocks[0].content, "zeph instructions");
415    }
416
417    #[test]
418    fn finds_dot_zeph_zeph_md() {
419        let dir = TempDir::new().unwrap();
420        make_file(dir.path(), ".zeph/zeph.md", "nested zeph instructions");
421        let blocks = load_instructions(dir.path(), &[], &[], true);
422        assert_eq!(blocks.len(), 1);
423        assert_eq!(blocks[0].content, "nested zeph instructions");
424    }
425
426    #[test]
427    fn detection_paths_claude() {
428        let dir = TempDir::new().unwrap();
429        make_file(dir.path(), "CLAUDE.md", "# Claude");
430        make_file(dir.path(), ".claude/CLAUDE.md", "# Dot Claude");
431        make_file(dir.path(), ".claude/rules/a.md", "rule a");
432        make_file(dir.path(), ".claude/rules/b.md", "rule b");
433
434        let blocks = load_instructions(dir.path(), &[ProviderKind::Claude], &[], true);
435        let sources: Vec<_> = blocks
436            .iter()
437            .map(|b| b.source.file_name().unwrap().to_str().unwrap())
438            .collect();
439        assert!(sources.contains(&"CLAUDE.md"));
440        assert!(sources.contains(&"a.md"));
441        assert!(sources.contains(&"b.md"));
442    }
443
444    #[test]
445    fn detection_paths_openai() {
446        let dir = TempDir::new().unwrap();
447        make_file(dir.path(), "AGENTS.md", "# Agents");
448
449        let paths = detection_paths(ProviderKind::OpenAi, dir.path());
450        assert!(paths.iter().any(|p| p.file_name().unwrap() == "AGENTS.md"));
451        assert!(
452            paths
453                .iter()
454                .any(|p| p.file_name().unwrap() == "AGENTS.override.md")
455        );
456    }
457
458    #[test]
459    fn detection_paths_ollama_and_compatible_and_candle() {
460        let dir = TempDir::new().unwrap();
461        for kind in [
462            ProviderKind::Ollama,
463            ProviderKind::Compatible,
464            ProviderKind::Candle,
465        ] {
466            let paths = detection_paths(kind, dir.path());
467            assert_eq!(paths.len(), 1);
468            assert_eq!(paths[0].file_name().unwrap(), "AGENTS.md");
469        }
470    }
471
472    #[test]
473    fn deduplication_by_canonical_path() {
474        let dir = TempDir::new().unwrap();
475        make_file(dir.path(), "AGENTS.md", "content");
476
477        // Both Ollama and Compatible resolve to AGENTS.md — should appear once.
478        let blocks = load_instructions(
479            dir.path(),
480            &[ProviderKind::Ollama, ProviderKind::Compatible],
481            &[],
482            true,
483        );
484        let agents_count = blocks
485            .iter()
486            .filter(|b| b.source.file_name().unwrap() == "AGENTS.md")
487            .count();
488        assert_eq!(agents_count, 1);
489    }
490
491    #[test]
492    fn skips_files_exceeding_size_limit() {
493        let dir = TempDir::new().unwrap();
494        let path = dir.path().join("big.md");
495        // Write slightly more than 512 KB.
496        let big = vec![b'x'; 513 * 1024];
497        fs::write(&path, &big).unwrap();
498        let blocks = load_instructions(dir.path(), &[], &[path], false);
499        assert!(blocks.is_empty());
500    }
501
502    #[test]
503    fn skips_empty_files() {
504        let dir = TempDir::new().unwrap();
505        make_file(dir.path(), "zeph.md", "");
506        let blocks = load_instructions(dir.path(), &[], &[], true);
507        assert!(blocks.is_empty());
508    }
509
510    #[test]
511    fn nonexistent_paths_are_silently_skipped() {
512        let dir = TempDir::new().unwrap();
513        let nonexistent = dir.path().join("does_not_exist.md");
514        let blocks = load_instructions(dir.path(), &[], &[nonexistent], false);
515        assert!(blocks.is_empty());
516    }
517
518    #[test]
519    fn explicit_relative_path_resolved_against_base_dir() {
520        let dir = TempDir::new().unwrap();
521        make_file(dir.path(), "custom.md", "custom content");
522        let blocks = load_instructions(dir.path(), &[], &[PathBuf::from("custom.md")], false);
523        assert_eq!(blocks.len(), 1);
524        assert_eq!(blocks[0].content, "custom content");
525    }
526
527    #[test]
528    fn invalid_utf8_file_is_skipped() {
529        let dir = TempDir::new().unwrap();
530        let path = dir.path().join("bad.md");
531        // Write bytes that are not valid UTF-8.
532        fs::write(&path, b"\xff\xfe invalid utf8 \x80\x81").unwrap();
533        let blocks = load_instructions(dir.path(), &[], &[path], false);
534        assert!(blocks.is_empty());
535    }
536
537    #[test]
538    fn multiple_providers_union_without_overlap() {
539        let dir = TempDir::new().unwrap();
540        make_file(dir.path(), "CLAUDE.md", "claude content");
541        make_file(dir.path(), "AGENTS.md", "agents content");
542
543        let blocks = load_instructions(
544            dir.path(),
545            &[ProviderKind::Claude, ProviderKind::OpenAi],
546            &[],
547            true,
548        );
549        let names: Vec<_> = blocks
550            .iter()
551            .map(|b| b.source.file_name().unwrap().to_str().unwrap())
552            .collect();
553        assert!(names.contains(&"CLAUDE.md"), "Claude file missing");
554        assert!(names.contains(&"AGENTS.md"), "OpenAI file missing");
555    }
556
557    #[test]
558    fn zeph_md_always_loaded_with_provider_auto_detect() {
559        let dir = TempDir::new().unwrap();
560        make_file(dir.path(), "zeph.md", "zeph rules");
561        // OpenAI provider has no AGENTS.md present, only zeph.md.
562        let blocks = load_instructions(dir.path(), &[ProviderKind::OpenAi], &[], true);
563        assert_eq!(blocks.len(), 1);
564        assert_eq!(blocks[0].content, "zeph rules");
565    }
566
567    #[cfg(unix)]
568    #[test]
569    fn symlink_deduplication() {
570        use std::os::unix::fs::symlink;
571        let dir = TempDir::new().unwrap();
572        make_file(dir.path(), "CLAUDE.md", "claude content");
573        symlink(
574            dir.path().join("CLAUDE.md"),
575            dir.path().join("CLAUDE_link.md"),
576        )
577        .unwrap();
578
579        // Load the original and the symlink — should appear only once after dedup.
580        let blocks = load_instructions(
581            dir.path(),
582            &[ProviderKind::Claude],
583            &[PathBuf::from("CLAUDE_link.md")],
584            true,
585        );
586        let claude_count = blocks
587            .iter()
588            .filter(|b| b.content == "claude content")
589            .count();
590        assert_eq!(claude_count, 1, "symlink should be deduped with original");
591    }
592
593    #[cfg(unix)]
594    #[test]
595    fn symlink_escaping_project_root_is_rejected() {
596        use std::os::unix::fs::symlink;
597        let outside = TempDir::new().unwrap();
598        let inside = TempDir::new().unwrap();
599        make_file(outside.path(), "secret.md", "secret content");
600
601        // Create a symlink inside the project dir pointing outside.
602        let link = inside.path().join("evil.md");
603        symlink(outside.path().join("secret.md"), &link).unwrap();
604
605        let blocks = load_instructions(inside.path(), &[], &[link], false);
606        assert!(
607            blocks.is_empty(),
608            "file escaping project root must be rejected"
609        );
610    }
611
612    #[test]
613    fn file_with_null_bytes_is_skipped() {
614        let dir = TempDir::new().unwrap();
615        let path = dir.path().join("null.md");
616        fs::write(&path, b"content\x00more").unwrap();
617        let blocks = load_instructions(dir.path(), &[], &[path], false);
618        assert!(blocks.is_empty());
619    }
620}