1use std::collections::HashSet;
5use std::io::Read as _;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9use notify_debouncer_mini::{DebouncedEventKind, new_debouncer};
10use tokio::sync::mpsc;
11
12use crate::config::ProviderKind;
13
14#[non_exhaustive]
15pub enum InstructionEvent {
16 Changed,
17}
18
19pub struct InstructionWatcher {
20 _handle: tokio::task::JoinHandle<()>,
21}
22
23impl InstructionWatcher {
24 pub fn start(
32 paths: &[PathBuf],
33 tx: mpsc::Sender<InstructionEvent>,
34 ) -> Result<Self, notify::Error> {
35 let (notify_tx, mut notify_rx) = mpsc::channel(16);
36
37 let mut debouncer = new_debouncer(
38 Duration::from_millis(500),
39 move |events: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| {
40 let events = match events {
41 Ok(events) => events,
42 Err(e) => {
43 tracing::warn!("instruction watcher error: {e}");
44 return;
45 }
46 };
47
48 let has_md_change = events.iter().any(|e| {
49 e.kind == DebouncedEventKind::Any
50 && e.path.extension().is_some_and(|ext| ext == "md")
51 });
52
53 if has_md_change {
54 let _ = notify_tx.try_send(());
55 }
56 },
57 )?;
58
59 for path in paths {
60 if path.exists()
61 && let Err(e) = debouncer
62 .watcher()
63 .watch(path, notify::RecursiveMode::NonRecursive)
64 {
65 tracing::warn!(path = %path.display(), error = %e, "failed to watch instruction path");
66 }
67 }
68
69 tracing::debug!(paths = paths.len(), "starting instruction watcher");
70 let handle = tokio::spawn(async move {
71 let _debouncer = debouncer;
72 while notify_rx.recv().await.is_some() {
73 tracing::debug!("instruction file change detected, signaling reload");
74 if tx.send(InstructionEvent::Changed).await.is_err() {
75 break;
76 }
77 }
78 });
79
80 Ok(Self { _handle: handle })
81 }
82}
83
84pub struct InstructionReloadState {
86 pub base_dir: PathBuf,
87 pub provider_kinds: Vec<ProviderKind>,
88 pub explicit_files: Vec<PathBuf>,
89 pub auto_detect: bool,
90}
91
92const MAX_FILE_SIZE: u64 = 256 * 1024; #[derive(Debug, Clone)]
97pub struct InstructionBlock {
98 pub source: PathBuf,
100 pub content: String,
102}
103
104pub fn load_instructions(
120 base_dir: &Path,
121 provider_kinds: &[ProviderKind],
122 explicit_files: &[PathBuf],
123 auto_detect: bool,
124) -> Vec<InstructionBlock> {
125 let canonical_base = match std::fs::canonicalize(base_dir) {
126 Ok(c) => c,
127 Err(e) => {
128 tracing::warn!(path = %base_dir.display(), error = %e, "failed to canonicalize base_dir, skipping all instruction files");
129 return Vec::new();
130 }
131 };
132
133 let mut candidates: Vec<PathBuf> = Vec::new();
134
135 candidates.push(base_dir.join("zeph.md"));
137 candidates.push(base_dir.join(".zeph").join("zeph.md"));
138
139 if auto_detect {
140 for &kind in provider_kinds {
141 candidates.extend(detection_paths(kind, base_dir));
142 }
143 }
144
145 for p in explicit_files {
147 if p.is_absolute() {
148 candidates.push(p.clone());
149 } else {
150 candidates.push(base_dir.join(p));
151 }
152 }
153
154 let mut seen: HashSet<PathBuf> = HashSet::new();
156 let mut result: Vec<InstructionBlock> = Vec::new();
157
158 for path in candidates {
159 let Ok(canonical) = std::fs::canonicalize(&path) else {
162 continue;
163 };
164
165 if !canonical.starts_with(&canonical_base) {
166 tracing::warn!(path = %canonical.display(), "instruction file escapes project root, skipping");
167 continue;
168 }
169
170 if !seen.insert(canonical.clone()) {
171 continue;
173 }
174
175 let Ok(file) = std::fs::File::open(&canonical) else {
177 continue;
178 };
179
180 let meta = match file.metadata() {
181 Ok(m) => m,
182 Err(e) => {
183 tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file metadata, skipping");
184 continue;
185 }
186 };
187
188 if !meta.is_file() {
189 continue;
190 }
191
192 if meta.len() > MAX_FILE_SIZE {
193 tracing::warn!(
194 path = %path.display(),
195 size = meta.len(),
196 limit = MAX_FILE_SIZE,
197 "instruction file exceeds 256 KiB size limit, skipping"
198 );
199 continue;
200 }
201
202 let mut content = String::new();
203 match std::io::BufReader::new(file).read_to_string(&mut content) {
204 Ok(_) => {}
205 Err(e) => {
206 tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file, skipping");
207 continue;
208 }
209 }
210
211 if content.contains('\0') {
212 tracing::warn!(path = %path.display(), "instruction file contains null bytes, skipping");
213 continue;
214 }
215
216 if content.is_empty() {
217 tracing::debug!(path = %path.display(), "instruction file is empty, skipping");
218 continue;
219 }
220
221 tracing::debug!(path = %path.display(), bytes = content.len(), "loaded instruction file");
222 result.push(InstructionBlock {
223 source: path,
224 content,
225 });
226 }
227
228 result
229}
230
231fn detection_paths(kind: ProviderKind, base: &Path) -> Vec<PathBuf> {
236 match kind {
237 ProviderKind::Claude => {
238 let mut paths = vec![
239 base.join("CLAUDE.md"),
240 base.join(".claude").join("CLAUDE.md"),
241 ];
242 let rules_dir = base.join(".claude").join("rules");
244 if let Ok(entries) = std::fs::read_dir(&rules_dir) {
245 let mut rule_files: Vec<PathBuf> = entries
246 .filter_map(std::result::Result::ok)
247 .map(|e| e.path())
248 .filter(|p| p.extension().is_some_and(|ext| ext == "md"))
249 .collect();
250 rule_files.sort();
251 paths.extend(rule_files);
252 }
253 paths
254 }
255 ProviderKind::OpenAi => {
256 vec![base.join("AGENTS.override.md"), base.join("AGENTS.md")]
257 }
258 ProviderKind::Compatible
259 | ProviderKind::Ollama
260 | ProviderKind::Candle
261 | ProviderKind::Gemini
262 | ProviderKind::Gonka
263 | ProviderKind::Cocoon => {
264 vec![base.join("AGENTS.md")]
265 }
266 _ => vec![base.join("AGENTS.md")],
267 }
268}
269
270pub async fn load_instructions_async(
275 base_dir: PathBuf,
276 provider_kinds: Vec<ProviderKind>,
277 explicit_files: Vec<PathBuf>,
278 auto_detect: bool,
279) -> Vec<InstructionBlock> {
280 tokio::task::spawn_blocking(move || {
281 load_instructions(&base_dir, &provider_kinds, &explicit_files, auto_detect)
282 })
283 .await
284 .unwrap_or_else(|e| {
285 tracing::error!(
286 error = %e,
287 "load_instructions_async: blocking task panicked, returning empty blocks"
288 );
289 Vec::new()
290 })
291}
292
293#[cfg(test)]
294mod watcher_tests {
295 use super::*;
296 use tokio::sync::mpsc;
297
298 #[tokio::test]
299 async fn start_with_valid_directory() {
300 let dir = tempfile::tempdir().unwrap();
301 let (tx, _rx) = mpsc::channel(16);
302 let result = InstructionWatcher::start(&[dir.path().to_path_buf()], tx);
303 assert!(result.is_ok());
304 }
305
306 #[tokio::test]
307 async fn start_with_empty_paths() {
308 let (tx, _rx) = mpsc::channel(16);
309 let result = InstructionWatcher::start(&[], tx);
310 assert!(result.is_ok());
311 }
312
313 #[tokio::test]
314 async fn detects_md_file_change() {
315 let dir = tempfile::tempdir().unwrap();
316 let (tx, mut rx) = mpsc::channel(16);
317 let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
318
319 let md_path = dir.path().join("zeph.md");
320 std::fs::write(&md_path, "initial").unwrap();
321
322 tokio::time::sleep(std::time::Duration::from_millis(100)).await;
323 std::fs::write(&md_path, "updated").unwrap();
324
325 let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
326 assert!(
327 result.is_ok(),
328 "expected InstructionEvent::Changed within timeout"
329 );
330 }
331
332 #[tokio::test]
333 async fn ignores_non_md_file_change() {
334 let dir = tempfile::tempdir().unwrap();
335 let (tx, mut rx) = mpsc::channel(16);
336 let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
337
338 let other_path = dir.path().join("notes.txt");
339 std::fs::write(&other_path, "content").unwrap();
340
341 let result = tokio::time::timeout(std::time::Duration::from_millis(1500), rx.recv()).await;
342 assert!(result.is_err(), "should not receive event for non-.md file");
343 }
344
345 #[tokio::test]
346 async fn detects_md_file_deletion() {
347 let dir = tempfile::tempdir().unwrap();
348 let md_path = dir.path().join("zeph.md");
349 std::fs::write(&md_path, "content").unwrap();
350
351 let (tx, mut rx) = mpsc::channel(16);
352 let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
353
354 tokio::time::sleep(std::time::Duration::from_millis(100)).await;
355 std::fs::remove_file(&md_path).unwrap();
356
357 let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
358 assert!(
359 result.is_ok(),
360 "expected InstructionEvent::Changed on .md deletion"
361 );
362 }
363}
364
365#[cfg(test)]
366mod reload_tests {
367 use super::*;
368
369 #[test]
370 fn reload_returns_updated_blocks_when_file_changes() {
371 let dir = tempfile::tempdir().unwrap();
372 let md_path = dir.path().join("zeph.md");
373 std::fs::write(&md_path, "initial content").unwrap();
374
375 let blocks = load_instructions(dir.path(), &[], &[], false);
376 assert_eq!(blocks.len(), 1);
377 assert_eq!(blocks[0].content, "initial content");
378
379 std::fs::write(&md_path, "updated content").unwrap();
380 let blocks2 = load_instructions(dir.path(), &[], &[], false);
381 assert_eq!(blocks2.len(), 1);
382 assert_eq!(blocks2[0].content, "updated content");
383 }
384
385 #[test]
386 fn reload_returns_empty_when_file_deleted() {
387 let dir = tempfile::tempdir().unwrap();
388 let md_path = dir.path().join("zeph.md");
389 std::fs::write(&md_path, "content").unwrap();
390
391 let blocks = load_instructions(dir.path(), &[], &[], false);
392 assert_eq!(blocks.len(), 1);
393
394 std::fs::remove_file(&md_path).unwrap();
395 let blocks2 = load_instructions(dir.path(), &[], &[], false);
396 assert!(
397 blocks2.is_empty(),
398 "deleted file should not be loaded on reload"
399 );
400 }
401}
402
403#[cfg(test)]
404mod tests {
405 use super::*;
406 use std::fs;
407 use tempfile::TempDir;
408
409 fn make_file(dir: &Path, name: &str, content: &str) -> PathBuf {
410 let path = dir.join(name);
411 if let Some(parent) = path.parent() {
412 fs::create_dir_all(parent).unwrap();
413 }
414 fs::write(&path, content).unwrap();
415 path
416 }
417
418 #[test]
419 fn zeph_md_loaded_even_when_auto_detect_disabled() {
420 let dir = TempDir::new().unwrap();
421 make_file(dir.path(), "zeph.md", "some content");
422 let blocks = load_instructions(dir.path(), &[], &[], false);
423 assert_eq!(blocks.len(), 1);
424 assert_eq!(blocks[0].content, "some content");
425 }
426
427 #[test]
428 fn empty_when_no_auto_detect_and_no_explicit_and_no_zeph_md() {
429 let dir = TempDir::new().unwrap();
430 let blocks = load_instructions(dir.path(), &[], &[], false);
431 assert!(blocks.is_empty());
432 }
433
434 #[test]
435 fn finds_zeph_md_in_base_dir() {
436 let dir = TempDir::new().unwrap();
437 make_file(dir.path(), "zeph.md", "zeph instructions");
438 let blocks = load_instructions(dir.path(), &[], &[], true);
439 assert_eq!(blocks.len(), 1);
440 assert_eq!(blocks[0].content, "zeph instructions");
441 }
442
443 #[test]
444 fn finds_dot_zeph_zeph_md() {
445 let dir = TempDir::new().unwrap();
446 make_file(dir.path(), ".zeph/zeph.md", "nested zeph instructions");
447 let blocks = load_instructions(dir.path(), &[], &[], true);
448 assert_eq!(blocks.len(), 1);
449 assert_eq!(blocks[0].content, "nested zeph instructions");
450 }
451
452 #[test]
453 fn detection_paths_claude() {
454 let dir = TempDir::new().unwrap();
455 make_file(dir.path(), "CLAUDE.md", "# Claude");
456 make_file(dir.path(), ".claude/CLAUDE.md", "# Dot Claude");
457 make_file(dir.path(), ".claude/rules/a.md", "rule a");
458 make_file(dir.path(), ".claude/rules/b.md", "rule b");
459
460 let blocks = load_instructions(dir.path(), &[ProviderKind::Claude], &[], true);
461 let sources: Vec<_> = blocks
462 .iter()
463 .map(|b| b.source.file_name().unwrap().to_str().unwrap())
464 .collect();
465 assert!(sources.contains(&"CLAUDE.md"));
466 assert!(sources.contains(&"a.md"));
467 assert!(sources.contains(&"b.md"));
468 }
469
470 #[test]
471 fn detection_paths_openai() {
472 let dir = TempDir::new().unwrap();
473 make_file(dir.path(), "AGENTS.md", "# Agents");
474
475 let paths = detection_paths(ProviderKind::OpenAi, dir.path());
476 assert!(paths.iter().any(|p| p.file_name().unwrap() == "AGENTS.md"));
477 assert!(
478 paths
479 .iter()
480 .any(|p| p.file_name().unwrap() == "AGENTS.override.md")
481 );
482 }
483
484 #[test]
485 fn detection_paths_ollama_and_compatible_and_candle() {
486 let dir = TempDir::new().unwrap();
487 for kind in [
488 ProviderKind::Ollama,
489 ProviderKind::Compatible,
490 ProviderKind::Candle,
491 ] {
492 let paths = detection_paths(kind, dir.path());
493 assert_eq!(paths.len(), 1);
494 assert_eq!(paths[0].file_name().unwrap(), "AGENTS.md");
495 }
496 }
497
498 #[test]
499 fn deduplication_by_canonical_path() {
500 let dir = TempDir::new().unwrap();
501 make_file(dir.path(), "AGENTS.md", "content");
502
503 let blocks = load_instructions(
505 dir.path(),
506 &[ProviderKind::Ollama, ProviderKind::Compatible],
507 &[],
508 true,
509 );
510 let agents_count = blocks
511 .iter()
512 .filter(|b| b.source.file_name().unwrap() == "AGENTS.md")
513 .count();
514 assert_eq!(agents_count, 1);
515 }
516
517 #[test]
518 fn skips_files_exceeding_size_limit() {
519 let dir = TempDir::new().unwrap();
520 let path = dir.path().join("big.md");
521 let big = vec![b'x'; 513 * 1024];
523 fs::write(&path, &big).unwrap();
524 let blocks = load_instructions(dir.path(), &[], &[path], false);
525 assert!(blocks.is_empty());
526 }
527
528 #[test]
529 fn skips_empty_files() {
530 let dir = TempDir::new().unwrap();
531 make_file(dir.path(), "zeph.md", "");
532 let blocks = load_instructions(dir.path(), &[], &[], true);
533 assert!(blocks.is_empty());
534 }
535
536 #[test]
537 fn nonexistent_paths_are_silently_skipped() {
538 let dir = TempDir::new().unwrap();
539 let nonexistent = dir.path().join("does_not_exist.md");
540 let blocks = load_instructions(dir.path(), &[], &[nonexistent], false);
541 assert!(blocks.is_empty());
542 }
543
544 #[test]
545 fn explicit_relative_path_resolved_against_base_dir() {
546 let dir = TempDir::new().unwrap();
547 make_file(dir.path(), "custom.md", "custom content");
548 let blocks = load_instructions(dir.path(), &[], &[PathBuf::from("custom.md")], false);
549 assert_eq!(blocks.len(), 1);
550 assert_eq!(blocks[0].content, "custom content");
551 }
552
553 #[test]
554 fn invalid_utf8_file_is_skipped() {
555 let dir = TempDir::new().unwrap();
556 let path = dir.path().join("bad.md");
557 fs::write(&path, b"\xff\xfe invalid utf8 \x80\x81").unwrap();
559 let blocks = load_instructions(dir.path(), &[], &[path], false);
560 assert!(blocks.is_empty());
561 }
562
563 #[test]
564 fn multiple_providers_union_without_overlap() {
565 let dir = TempDir::new().unwrap();
566 make_file(dir.path(), "CLAUDE.md", "claude content");
567 make_file(dir.path(), "AGENTS.md", "agents content");
568
569 let blocks = load_instructions(
570 dir.path(),
571 &[ProviderKind::Claude, ProviderKind::OpenAi],
572 &[],
573 true,
574 );
575 let names: Vec<_> = blocks
576 .iter()
577 .map(|b| b.source.file_name().unwrap().to_str().unwrap())
578 .collect();
579 assert!(names.contains(&"CLAUDE.md"), "Claude file missing");
580 assert!(names.contains(&"AGENTS.md"), "OpenAI file missing");
581 }
582
583 #[test]
584 fn zeph_md_always_loaded_with_provider_auto_detect() {
585 let dir = TempDir::new().unwrap();
586 make_file(dir.path(), "zeph.md", "zeph rules");
587 let blocks = load_instructions(dir.path(), &[ProviderKind::OpenAi], &[], true);
589 assert_eq!(blocks.len(), 1);
590 assert_eq!(blocks[0].content, "zeph rules");
591 }
592
593 #[cfg(unix)]
594 #[test]
595 fn symlink_deduplication() {
596 use std::os::unix::fs::symlink;
597 let dir = TempDir::new().unwrap();
598 make_file(dir.path(), "CLAUDE.md", "claude content");
599 symlink(
600 dir.path().join("CLAUDE.md"),
601 dir.path().join("CLAUDE_link.md"),
602 )
603 .unwrap();
604
605 let blocks = load_instructions(
607 dir.path(),
608 &[ProviderKind::Claude],
609 &[PathBuf::from("CLAUDE_link.md")],
610 true,
611 );
612 let claude_count = blocks
613 .iter()
614 .filter(|b| b.content == "claude content")
615 .count();
616 assert_eq!(claude_count, 1, "symlink should be deduped with original");
617 }
618
619 #[cfg(unix)]
620 #[test]
621 fn symlink_escaping_project_root_is_rejected() {
622 use std::os::unix::fs::symlink;
623 let outside = TempDir::new().unwrap();
624 let inside = TempDir::new().unwrap();
625 make_file(outside.path(), "secret.md", "secret content");
626
627 let link = inside.path().join("evil.md");
629 symlink(outside.path().join("secret.md"), &link).unwrap();
630
631 let blocks = load_instructions(inside.path(), &[], &[link], false);
632 assert!(
633 blocks.is_empty(),
634 "file escaping project root must be rejected"
635 );
636 }
637
638 #[test]
639 fn file_with_null_bytes_is_skipped() {
640 let dir = TempDir::new().unwrap();
641 let path = dir.path().join("null.md");
642 fs::write(&path, b"content\x00more").unwrap();
643 let blocks = load_instructions(dir.path(), &[], &[path], false);
644 assert!(blocks.is_empty());
645 }
646}