1use std::collections::HashSet;
5use std::io::Read as _;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9use notify_debouncer_mini::{DebouncedEventKind, new_debouncer};
10use tokio::sync::mpsc;
11
12use crate::config::ProviderKind;
13
14pub enum InstructionEvent {
15 Changed,
16}
17
18pub struct InstructionWatcher {
19 _handle: tokio::task::JoinHandle<()>,
20}
21
22impl InstructionWatcher {
23 pub fn start(
31 paths: &[PathBuf],
32 tx: mpsc::Sender<InstructionEvent>,
33 ) -> Result<Self, notify::Error> {
34 let (notify_tx, mut notify_rx) = mpsc::channel(16);
35
36 let mut debouncer = new_debouncer(
37 Duration::from_millis(500),
38 move |events: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| {
39 let events = match events {
40 Ok(events) => events,
41 Err(e) => {
42 tracing::warn!("instruction watcher error: {e}");
43 return;
44 }
45 };
46
47 let has_md_change = events.iter().any(|e| {
48 e.kind == DebouncedEventKind::Any
49 && e.path.extension().is_some_and(|ext| ext == "md")
50 });
51
52 if has_md_change {
53 let _ = notify_tx.try_send(());
54 }
55 },
56 )?;
57
58 for path in paths {
59 if path.exists()
60 && let Err(e) = debouncer
61 .watcher()
62 .watch(path, notify::RecursiveMode::NonRecursive)
63 {
64 tracing::warn!(path = %path.display(), error = %e, "failed to watch instruction path");
65 }
66 }
67
68 tracing::debug!(paths = paths.len(), "starting instruction watcher");
69 let handle = tokio::spawn(async move {
70 let _debouncer = debouncer;
71 while notify_rx.recv().await.is_some() {
72 tracing::debug!("instruction file change detected, signaling reload");
73 if tx.send(InstructionEvent::Changed).await.is_err() {
74 break;
75 }
76 }
77 });
78
79 Ok(Self { _handle: handle })
80 }
81}
82
83pub struct InstructionReloadState {
85 pub base_dir: PathBuf,
86 pub provider_kinds: Vec<ProviderKind>,
87 pub explicit_files: Vec<PathBuf>,
88 pub auto_detect: bool,
89}
90
91const MAX_FILE_SIZE: u64 = 256 * 1024; #[derive(Debug, Clone)]
96pub struct InstructionBlock {
97 pub source: PathBuf,
99 pub content: String,
101}
102
103pub fn load_instructions(
119 base_dir: &Path,
120 provider_kinds: &[ProviderKind],
121 explicit_files: &[PathBuf],
122 auto_detect: bool,
123) -> Vec<InstructionBlock> {
124 let canonical_base = match std::fs::canonicalize(base_dir) {
125 Ok(c) => c,
126 Err(e) => {
127 tracing::warn!(path = %base_dir.display(), error = %e, "failed to canonicalize base_dir, skipping all instruction files");
128 return Vec::new();
129 }
130 };
131
132 let mut candidates: Vec<PathBuf> = Vec::new();
133
134 candidates.push(base_dir.join("zeph.md"));
136 candidates.push(base_dir.join(".zeph").join("zeph.md"));
137
138 if auto_detect {
139 for &kind in provider_kinds {
140 candidates.extend(detection_paths(kind, base_dir));
141 }
142 }
143
144 for p in explicit_files {
146 if p.is_absolute() {
147 candidates.push(p.clone());
148 } else {
149 candidates.push(base_dir.join(p));
150 }
151 }
152
153 let mut seen: HashSet<PathBuf> = HashSet::new();
155 let mut result: Vec<InstructionBlock> = Vec::new();
156
157 for path in candidates {
158 let Ok(canonical) = std::fs::canonicalize(&path) else {
161 continue;
162 };
163
164 if !canonical.starts_with(&canonical_base) {
165 tracing::warn!(path = %canonical.display(), "instruction file escapes project root, skipping");
166 continue;
167 }
168
169 if !seen.insert(canonical.clone()) {
170 continue;
172 }
173
174 let Ok(file) = std::fs::File::open(&canonical) else {
176 continue;
177 };
178
179 let meta = match file.metadata() {
180 Ok(m) => m,
181 Err(e) => {
182 tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file metadata, skipping");
183 continue;
184 }
185 };
186
187 if !meta.is_file() {
188 continue;
189 }
190
191 if meta.len() > MAX_FILE_SIZE {
192 tracing::warn!(
193 path = %path.display(),
194 size = meta.len(),
195 limit = MAX_FILE_SIZE,
196 "instruction file exceeds 256 KiB size limit, skipping"
197 );
198 continue;
199 }
200
201 let mut content = String::new();
202 match std::io::BufReader::new(file).read_to_string(&mut content) {
203 Ok(_) => {}
204 Err(e) => {
205 tracing::warn!(path = %path.display(), error = %e, "failed to read instruction file, skipping");
206 continue;
207 }
208 }
209
210 if content.contains('\0') {
211 tracing::warn!(path = %path.display(), "instruction file contains null bytes, skipping");
212 continue;
213 }
214
215 if content.is_empty() {
216 tracing::debug!(path = %path.display(), "instruction file is empty, skipping");
217 continue;
218 }
219
220 tracing::debug!(path = %path.display(), bytes = content.len(), "loaded instruction file");
221 result.push(InstructionBlock {
222 source: path,
223 content,
224 });
225 }
226
227 result
228}
229
230fn detection_paths(kind: ProviderKind, base: &Path) -> Vec<PathBuf> {
235 match kind {
236 ProviderKind::Claude => {
237 let mut paths = vec![
238 base.join("CLAUDE.md"),
239 base.join(".claude").join("CLAUDE.md"),
240 ];
241 let rules_dir = base.join(".claude").join("rules");
243 if let Ok(entries) = std::fs::read_dir(&rules_dir) {
244 let mut rule_files: Vec<PathBuf> = entries
245 .filter_map(std::result::Result::ok)
246 .map(|e| e.path())
247 .filter(|p| p.extension().is_some_and(|ext| ext == "md"))
248 .collect();
249 rule_files.sort();
250 paths.extend(rule_files);
251 }
252 paths
253 }
254 ProviderKind::OpenAi => {
255 vec![base.join("AGENTS.override.md"), base.join("AGENTS.md")]
256 }
257 ProviderKind::Compatible
258 | ProviderKind::Ollama
259 | ProviderKind::Candle
260 | ProviderKind::Gemini
261 | ProviderKind::Gonka
262 | ProviderKind::Cocoon => {
263 vec![base.join("AGENTS.md")]
264 }
265 _ => vec![base.join("AGENTS.md")],
266 }
267}
268
269pub async fn load_instructions_async(
274 base_dir: PathBuf,
275 provider_kinds: Vec<ProviderKind>,
276 explicit_files: Vec<PathBuf>,
277 auto_detect: bool,
278) -> Vec<InstructionBlock> {
279 tokio::task::spawn_blocking(move || {
280 load_instructions(&base_dir, &provider_kinds, &explicit_files, auto_detect)
281 })
282 .await
283 .unwrap_or_else(|e| {
284 tracing::error!(
285 error = %e,
286 "load_instructions_async: blocking task panicked, returning empty blocks"
287 );
288 Vec::new()
289 })
290}
291
292#[cfg(test)]
293mod watcher_tests {
294 use super::*;
295 use tokio::sync::mpsc;
296
297 #[tokio::test]
298 async fn start_with_valid_directory() {
299 let dir = tempfile::tempdir().unwrap();
300 let (tx, _rx) = mpsc::channel(16);
301 let result = InstructionWatcher::start(&[dir.path().to_path_buf()], tx);
302 assert!(result.is_ok());
303 }
304
305 #[tokio::test]
306 async fn start_with_empty_paths() {
307 let (tx, _rx) = mpsc::channel(16);
308 let result = InstructionWatcher::start(&[], tx);
309 assert!(result.is_ok());
310 }
311
312 #[tokio::test]
313 async fn detects_md_file_change() {
314 let dir = tempfile::tempdir().unwrap();
315 let (tx, mut rx) = mpsc::channel(16);
316 let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
317
318 let md_path = dir.path().join("zeph.md");
319 std::fs::write(&md_path, "initial").unwrap();
320
321 tokio::time::sleep(std::time::Duration::from_millis(100)).await;
322 std::fs::write(&md_path, "updated").unwrap();
323
324 let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
325 assert!(
326 result.is_ok(),
327 "expected InstructionEvent::Changed within timeout"
328 );
329 }
330
331 #[tokio::test]
332 async fn ignores_non_md_file_change() {
333 let dir = tempfile::tempdir().unwrap();
334 let (tx, mut rx) = mpsc::channel(16);
335 let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
336
337 let other_path = dir.path().join("notes.txt");
338 std::fs::write(&other_path, "content").unwrap();
339
340 let result = tokio::time::timeout(std::time::Duration::from_millis(1500), rx.recv()).await;
341 assert!(result.is_err(), "should not receive event for non-.md file");
342 }
343
344 #[tokio::test]
345 async fn detects_md_file_deletion() {
346 let dir = tempfile::tempdir().unwrap();
347 let md_path = dir.path().join("zeph.md");
348 std::fs::write(&md_path, "content").unwrap();
349
350 let (tx, mut rx) = mpsc::channel(16);
351 let _watcher = InstructionWatcher::start(&[dir.path().to_path_buf()], tx).unwrap();
352
353 tokio::time::sleep(std::time::Duration::from_millis(100)).await;
354 std::fs::remove_file(&md_path).unwrap();
355
356 let result = tokio::time::timeout(std::time::Duration::from_secs(3), rx.recv()).await;
357 assert!(
358 result.is_ok(),
359 "expected InstructionEvent::Changed on .md deletion"
360 );
361 }
362}
363
364#[cfg(test)]
365mod reload_tests {
366 use super::*;
367
368 #[test]
369 fn reload_returns_updated_blocks_when_file_changes() {
370 let dir = tempfile::tempdir().unwrap();
371 let md_path = dir.path().join("zeph.md");
372 std::fs::write(&md_path, "initial content").unwrap();
373
374 let blocks = load_instructions(dir.path(), &[], &[], false);
375 assert_eq!(blocks.len(), 1);
376 assert_eq!(blocks[0].content, "initial content");
377
378 std::fs::write(&md_path, "updated content").unwrap();
379 let blocks2 = load_instructions(dir.path(), &[], &[], false);
380 assert_eq!(blocks2.len(), 1);
381 assert_eq!(blocks2[0].content, "updated content");
382 }
383
384 #[test]
385 fn reload_returns_empty_when_file_deleted() {
386 let dir = tempfile::tempdir().unwrap();
387 let md_path = dir.path().join("zeph.md");
388 std::fs::write(&md_path, "content").unwrap();
389
390 let blocks = load_instructions(dir.path(), &[], &[], false);
391 assert_eq!(blocks.len(), 1);
392
393 std::fs::remove_file(&md_path).unwrap();
394 let blocks2 = load_instructions(dir.path(), &[], &[], false);
395 assert!(
396 blocks2.is_empty(),
397 "deleted file should not be loaded on reload"
398 );
399 }
400}
401
402#[cfg(test)]
403mod tests {
404 use super::*;
405 use std::fs;
406 use tempfile::TempDir;
407
408 fn make_file(dir: &Path, name: &str, content: &str) -> PathBuf {
409 let path = dir.join(name);
410 if let Some(parent) = path.parent() {
411 fs::create_dir_all(parent).unwrap();
412 }
413 fs::write(&path, content).unwrap();
414 path
415 }
416
417 #[test]
418 fn zeph_md_loaded_even_when_auto_detect_disabled() {
419 let dir = TempDir::new().unwrap();
420 make_file(dir.path(), "zeph.md", "some content");
421 let blocks = load_instructions(dir.path(), &[], &[], false);
422 assert_eq!(blocks.len(), 1);
423 assert_eq!(blocks[0].content, "some content");
424 }
425
426 #[test]
427 fn empty_when_no_auto_detect_and_no_explicit_and_no_zeph_md() {
428 let dir = TempDir::new().unwrap();
429 let blocks = load_instructions(dir.path(), &[], &[], false);
430 assert!(blocks.is_empty());
431 }
432
433 #[test]
434 fn finds_zeph_md_in_base_dir() {
435 let dir = TempDir::new().unwrap();
436 make_file(dir.path(), "zeph.md", "zeph instructions");
437 let blocks = load_instructions(dir.path(), &[], &[], true);
438 assert_eq!(blocks.len(), 1);
439 assert_eq!(blocks[0].content, "zeph instructions");
440 }
441
442 #[test]
443 fn finds_dot_zeph_zeph_md() {
444 let dir = TempDir::new().unwrap();
445 make_file(dir.path(), ".zeph/zeph.md", "nested zeph instructions");
446 let blocks = load_instructions(dir.path(), &[], &[], true);
447 assert_eq!(blocks.len(), 1);
448 assert_eq!(blocks[0].content, "nested zeph instructions");
449 }
450
451 #[test]
452 fn detection_paths_claude() {
453 let dir = TempDir::new().unwrap();
454 make_file(dir.path(), "CLAUDE.md", "# Claude");
455 make_file(dir.path(), ".claude/CLAUDE.md", "# Dot Claude");
456 make_file(dir.path(), ".claude/rules/a.md", "rule a");
457 make_file(dir.path(), ".claude/rules/b.md", "rule b");
458
459 let blocks = load_instructions(dir.path(), &[ProviderKind::Claude], &[], true);
460 let sources: Vec<_> = blocks
461 .iter()
462 .map(|b| b.source.file_name().unwrap().to_str().unwrap())
463 .collect();
464 assert!(sources.contains(&"CLAUDE.md"));
465 assert!(sources.contains(&"a.md"));
466 assert!(sources.contains(&"b.md"));
467 }
468
469 #[test]
470 fn detection_paths_openai() {
471 let dir = TempDir::new().unwrap();
472 make_file(dir.path(), "AGENTS.md", "# Agents");
473
474 let paths = detection_paths(ProviderKind::OpenAi, dir.path());
475 assert!(paths.iter().any(|p| p.file_name().unwrap() == "AGENTS.md"));
476 assert!(
477 paths
478 .iter()
479 .any(|p| p.file_name().unwrap() == "AGENTS.override.md")
480 );
481 }
482
483 #[test]
484 fn detection_paths_ollama_and_compatible_and_candle() {
485 let dir = TempDir::new().unwrap();
486 for kind in [
487 ProviderKind::Ollama,
488 ProviderKind::Compatible,
489 ProviderKind::Candle,
490 ] {
491 let paths = detection_paths(kind, dir.path());
492 assert_eq!(paths.len(), 1);
493 assert_eq!(paths[0].file_name().unwrap(), "AGENTS.md");
494 }
495 }
496
497 #[test]
498 fn deduplication_by_canonical_path() {
499 let dir = TempDir::new().unwrap();
500 make_file(dir.path(), "AGENTS.md", "content");
501
502 let blocks = load_instructions(
504 dir.path(),
505 &[ProviderKind::Ollama, ProviderKind::Compatible],
506 &[],
507 true,
508 );
509 let agents_count = blocks
510 .iter()
511 .filter(|b| b.source.file_name().unwrap() == "AGENTS.md")
512 .count();
513 assert_eq!(agents_count, 1);
514 }
515
516 #[test]
517 fn skips_files_exceeding_size_limit() {
518 let dir = TempDir::new().unwrap();
519 let path = dir.path().join("big.md");
520 let big = vec![b'x'; 513 * 1024];
522 fs::write(&path, &big).unwrap();
523 let blocks = load_instructions(dir.path(), &[], &[path], false);
524 assert!(blocks.is_empty());
525 }
526
527 #[test]
528 fn skips_empty_files() {
529 let dir = TempDir::new().unwrap();
530 make_file(dir.path(), "zeph.md", "");
531 let blocks = load_instructions(dir.path(), &[], &[], true);
532 assert!(blocks.is_empty());
533 }
534
535 #[test]
536 fn nonexistent_paths_are_silently_skipped() {
537 let dir = TempDir::new().unwrap();
538 let nonexistent = dir.path().join("does_not_exist.md");
539 let blocks = load_instructions(dir.path(), &[], &[nonexistent], false);
540 assert!(blocks.is_empty());
541 }
542
543 #[test]
544 fn explicit_relative_path_resolved_against_base_dir() {
545 let dir = TempDir::new().unwrap();
546 make_file(dir.path(), "custom.md", "custom content");
547 let blocks = load_instructions(dir.path(), &[], &[PathBuf::from("custom.md")], false);
548 assert_eq!(blocks.len(), 1);
549 assert_eq!(blocks[0].content, "custom content");
550 }
551
552 #[test]
553 fn invalid_utf8_file_is_skipped() {
554 let dir = TempDir::new().unwrap();
555 let path = dir.path().join("bad.md");
556 fs::write(&path, b"\xff\xfe invalid utf8 \x80\x81").unwrap();
558 let blocks = load_instructions(dir.path(), &[], &[path], false);
559 assert!(blocks.is_empty());
560 }
561
562 #[test]
563 fn multiple_providers_union_without_overlap() {
564 let dir = TempDir::new().unwrap();
565 make_file(dir.path(), "CLAUDE.md", "claude content");
566 make_file(dir.path(), "AGENTS.md", "agents content");
567
568 let blocks = load_instructions(
569 dir.path(),
570 &[ProviderKind::Claude, ProviderKind::OpenAi],
571 &[],
572 true,
573 );
574 let names: Vec<_> = blocks
575 .iter()
576 .map(|b| b.source.file_name().unwrap().to_str().unwrap())
577 .collect();
578 assert!(names.contains(&"CLAUDE.md"), "Claude file missing");
579 assert!(names.contains(&"AGENTS.md"), "OpenAI file missing");
580 }
581
582 #[test]
583 fn zeph_md_always_loaded_with_provider_auto_detect() {
584 let dir = TempDir::new().unwrap();
585 make_file(dir.path(), "zeph.md", "zeph rules");
586 let blocks = load_instructions(dir.path(), &[ProviderKind::OpenAi], &[], true);
588 assert_eq!(blocks.len(), 1);
589 assert_eq!(blocks[0].content, "zeph rules");
590 }
591
592 #[cfg(unix)]
593 #[test]
594 fn symlink_deduplication() {
595 use std::os::unix::fs::symlink;
596 let dir = TempDir::new().unwrap();
597 make_file(dir.path(), "CLAUDE.md", "claude content");
598 symlink(
599 dir.path().join("CLAUDE.md"),
600 dir.path().join("CLAUDE_link.md"),
601 )
602 .unwrap();
603
604 let blocks = load_instructions(
606 dir.path(),
607 &[ProviderKind::Claude],
608 &[PathBuf::from("CLAUDE_link.md")],
609 true,
610 );
611 let claude_count = blocks
612 .iter()
613 .filter(|b| b.content == "claude content")
614 .count();
615 assert_eq!(claude_count, 1, "symlink should be deduped with original");
616 }
617
618 #[cfg(unix)]
619 #[test]
620 fn symlink_escaping_project_root_is_rejected() {
621 use std::os::unix::fs::symlink;
622 let outside = TempDir::new().unwrap();
623 let inside = TempDir::new().unwrap();
624 make_file(outside.path(), "secret.md", "secret content");
625
626 let link = inside.path().join("evil.md");
628 symlink(outside.path().join("secret.md"), &link).unwrap();
629
630 let blocks = load_instructions(inside.path(), &[], &[link], false);
631 assert!(
632 blocks.is_empty(),
633 "file escaping project root must be rejected"
634 );
635 }
636
637 #[test]
638 fn file_with_null_bytes_is_skipped() {
639 let dir = TempDir::new().unwrap();
640 let path = dir.path().join("null.md");
641 fs::write(&path, b"content\x00more").unwrap();
642 let blocks = load_instructions(dir.path(), &[], &[path], false);
643 assert!(blocks.is_empty());
644 }
645}