Skip to main content

kardo_core/scanner/
discovery.rs

1//! File discovery: walk a project directory, respect .gitignore, hash content.
2
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::time::SystemTime;
6
7use chrono::{DateTime, Utc};
8use ignore::WalkBuilder;
9use serde::Serialize;
10use sha2::{Digest, Sha256};
11
12/// Extensions considered relevant for documentation analysis.
13const DOC_EXTENSIONS: &[&str] = &[
14    "md", "txt", "yml", "yaml", "json", "toml", "rst", "adoc",
15];
16
17/// Special filename stems always included regardless of extension.
18const SPECIAL_NAMES: &[&str] = &[
19    "README",
20    "CHANGELOG",
21    "LICENSE",
22    "CONTRIBUTING",
23    "CLAUDE",
24    "AGENTS",
25    "SECURITY",
26    "CODE_OF_CONDUCT",
27    ".cursorrules",
28    ".clinerules",
29];
30
31/// Directory prefixes that are always scanned for doc files.
32const DOC_DIRECTORIES: &[&str] = &[
33    "docs/",
34    "doc/",
35    "documentation/",
36    ".claude/",
37];
38
39/// A file discovered during project scanning.
40#[derive(Debug, Clone, Serialize)]
41pub struct DiscoveredFile {
42    /// Absolute path to the file.
43    pub path: PathBuf,
44    /// Path relative to the project root.
45    pub relative_path: String,
46    /// File size in bytes.
47    pub size: u64,
48    /// Last modification time.
49    pub modified_at: Option<DateTime<Utc>>,
50    /// File extension (lowercase, without dot).
51    pub extension: Option<String>,
52    /// Whether this is a Markdown file.
53    pub is_markdown: bool,
54    /// SHA-256 hash of file content.
55    pub content_hash: String,
56}
57
58/// Project file scanner. Walks directory respecting .gitignore.
59pub struct FileDiscovery {
60    project_root: PathBuf,
61    excludes: Vec<String>,
62    includes: Vec<String>,
63    use_gitignore: bool,
64    custom_ignore_file: Option<PathBuf>,
65}
66
67impl FileDiscovery {
68    /// Create a new scanner for the given project root.
69    pub fn new(project_root: impl AsRef<Path>) -> Self {
70        Self {
71            project_root: project_root.as_ref().to_path_buf(),
72            excludes: Vec::new(),
73            includes: Vec::new(),
74            use_gitignore: true,
75            custom_ignore_file: None,
76        }
77    }
78
79    /// Add glob patterns to exclude from scanning.
80    pub fn with_excludes(mut self, excludes: Vec<String>) -> Self {
81        self.excludes = excludes;
82        self
83    }
84
85    /// Add glob patterns to include (whitelist mode, or override excludes).
86    pub fn with_includes(mut self, includes: Vec<String>) -> Self {
87        self.includes = includes;
88        self
89    }
90
91    /// Enable or disable .gitignore processing.
92    pub fn with_gitignore(mut self, use_gitignore: bool) -> Self {
93        self.use_gitignore = use_gitignore;
94        self
95    }
96
97    /// Add a custom ignore file path (e.g. a shared .kardo.ignore).
98    pub fn with_custom_ignore_file(mut self, path: impl AsRef<Path>) -> Self {
99        self.custom_ignore_file = Some(path.as_ref().to_path_buf());
100        self
101    }
102
103    /// Scan project directory, respecting .gitignore and filtering rules.
104    /// Returns all relevant documentation files sorted by relative_path.
105    pub fn discover(&self) -> Result<Vec<DiscoveredFile>, ScanError> {
106        let project_root = self
107            .project_root
108            .canonicalize()
109            .map_err(|e| ScanError::Io(format!("Cannot resolve project root: {}", e)))?;
110
111        let mut builder = WalkBuilder::new(&project_root);
112        builder
113            .hidden(false)
114            .git_ignore(self.use_gitignore)
115            .git_global(self.use_gitignore)
116            .git_exclude(self.use_gitignore)
117            .follow_links(false);
118
119        // Layer 3: auto-detect .kardo.ignore in any directory
120        builder.add_custom_ignore_filename(".kardo.ignore");
121
122        // Custom ignore file (--ignore-path)
123        if let Some(ref ignore_path) = self.custom_ignore_file {
124            if let Some(err) = builder.add_ignore(ignore_path) {
125                return Err(ScanError::Io(format!(
126                    "Cannot read ignore file '{}': {}",
127                    ignore_path.display(),
128                    err
129                )));
130            }
131        }
132
133        // Layer 4: CLI --exclude / --include via overrides
134        if !self.excludes.is_empty() || !self.includes.is_empty() {
135            let mut ob = ignore::overrides::OverrideBuilder::new(&project_root);
136
137            // When both excludes and includes are present, add catch-all whitelist
138            // so excludes act as blacklist (not whitelist-only mode)
139            if !self.excludes.is_empty() && !self.includes.is_empty() {
140                ob.add("**").map_err(|e| {
141                    ScanError::Io(format!("Failed to add catch-all override: {}", e))
142                })?;
143            }
144
145            for pattern in &self.excludes {
146                let negated = format!("!{}", pattern);
147                ob.add(&negated).map_err(|e| {
148                    ScanError::Io(format!("Invalid exclude pattern '{}': {}", pattern, e))
149                })?;
150            }
151            for pattern in &self.includes {
152                ob.add(pattern).map_err(|e| {
153                    ScanError::Io(format!("Invalid include pattern '{}': {}", pattern, e))
154                })?;
155            }
156
157            let overrides = ob
158                .build()
159                .map_err(|e| ScanError::Io(format!("Failed to build overrides: {}", e)))?;
160            builder.overrides(overrides);
161        }
162
163        let walker = builder.build();
164
165        let mut files = Vec::new();
166
167        for entry in walker {
168            let entry = match entry {
169                Ok(e) => e,
170                Err(err) => {
171                    log::warn!("Skipping entry: {}", err);
172                    continue;
173                }
174            };
175
176            if entry.file_type().map_or(true, |ft| !ft.is_file()) {
177                continue;
178            }
179
180            let abs_path = entry.path().to_path_buf();
181            let relative = abs_path
182                .strip_prefix(&project_root)
183                .unwrap_or(&abs_path)
184                .to_string_lossy()
185                .to_string();
186
187            if !Self::is_relevant_file(&abs_path, &relative) {
188                continue;
189            }
190
191            match Self::build_discovered_file(&abs_path, &relative) {
192                Ok(df) => files.push(df),
193                Err(err) => {
194                    log::warn!("Skipping {}: {}", relative, err);
195                }
196            }
197        }
198
199        files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
200        Ok(files)
201    }
202
203    /// Check if a file is relevant for documentation analysis.
204    fn is_relevant_file(abs_path: &Path, relative: &str) -> bool {
205        // Skip .kardo directory
206        if relative.starts_with(".kardo/") || relative.starts_with(".kardo\\") {
207            return false;
208        }
209
210        // Files inside doc directories are always relevant
211        for dir in DOC_DIRECTORIES {
212            if relative.starts_with(dir) {
213                return true;
214            }
215        }
216
217        // Check special filename stems (case-insensitive on stem)
218        let file_name = abs_path
219            .file_name()
220            .and_then(|n| n.to_str())
221            .unwrap_or("");
222        let stem = abs_path
223            .file_stem()
224            .and_then(|s| s.to_str())
225            .unwrap_or("");
226
227        for special in SPECIAL_NAMES {
228            if file_name.eq_ignore_ascii_case(special)
229                || stem.eq_ignore_ascii_case(special)
230            {
231                return true;
232            }
233        }
234
235        // Check extension
236        if let Some(ext) = abs_path.extension().and_then(|e| e.to_str()) {
237            let ext_lower = ext.to_lowercase();
238            return DOC_EXTENSIONS.contains(&ext_lower.as_str());
239        }
240
241        false
242    }
243
244    /// Compute SHA-256 hex hash of a file's contents.
245    fn hash_file(path: &Path) -> Result<String, ScanError> {
246        let content = fs::read(path)
247            .map_err(|e| ScanError::Io(format!("Cannot read {}: {}", path.display(), e)))?;
248        let mut hasher = Sha256::new();
249        hasher.update(&content);
250        Ok(format!("{:x}", hasher.finalize()))
251    }
252
253    /// Build a DiscoveredFile from a path, computing hash and metadata.
254    fn build_discovered_file(abs_path: &Path, relative: &str) -> Result<DiscoveredFile, ScanError> {
255        let metadata = fs::metadata(abs_path)
256            .map_err(|e| ScanError::Io(format!("Cannot read metadata for {}: {}", relative, e)))?;
257
258        let size = metadata.len();
259
260        let modified_at: Option<DateTime<Utc>> = metadata.modified().ok().and_then(|t| {
261            t.duration_since(SystemTime::UNIX_EPOCH)
262                .ok()
263                .and_then(|d| {
264                    DateTime::from_timestamp(d.as_secs() as i64, d.subsec_nanos())
265                })
266        });
267
268        let extension = abs_path
269            .extension()
270            .and_then(|e| e.to_str())
271            .map(|e| e.to_lowercase());
272
273        let is_markdown = extension.as_deref() == Some("md");
274
275        let content_hash = Self::hash_file(abs_path)?;
276
277        Ok(DiscoveredFile {
278            path: abs_path.to_path_buf(),
279            relative_path: relative.to_string(),
280            size,
281            modified_at,
282            extension,
283            is_markdown,
284            content_hash,
285        })
286    }
287}
288
289/// Errors during file scanning.
290#[derive(Debug, thiserror::Error)]
291pub enum ScanError {
292    #[error("IO error: {0}")]
293    Io(String),
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299    use std::fs;
300    use tempfile::TempDir;
301
302    fn init_git_repo(root: &Path) {
303        fs::create_dir_all(root.join(".git")).unwrap();
304        fs::write(root.join(".git/HEAD"), "ref: refs/heads/main\n").unwrap();
305        fs::create_dir_all(root.join(".git/objects")).unwrap();
306        fs::create_dir_all(root.join(".git/refs")).unwrap();
307        fs::write(
308            root.join(".git/config"),
309            "[core]\n\trepositoryformatversion = 0\n\tfilemode = true\n\tbare = false\n",
310        )
311        .unwrap();
312    }
313
314    fn create_test_project(tmp: &TempDir) {
315        let root = tmp.path();
316        init_git_repo(root);
317
318        fs::write(root.join("README.md"), "# Test Project\n").unwrap();
319        fs::write(root.join("CLAUDE.md"), "# Claude instructions\n").unwrap();
320        fs::create_dir_all(root.join("docs")).unwrap();
321        fs::write(root.join("docs/guide.md"), "# Guide\nSome content").unwrap();
322        fs::write(root.join("docs/config.yml"), "key: value\n").unwrap();
323        fs::write(root.join("docs/data.json"), "{}").unwrap();
324        fs::write(root.join("docs/settings.toml"), "[settings]\n").unwrap();
325
326        // Non-doc files
327        fs::create_dir_all(root.join("src")).unwrap();
328        fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
329        fs::write(root.join("src/lib.rs"), "pub mod foo;").unwrap();
330
331        // .gitignore
332        fs::write(root.join(".gitignore"), "target/\n*.log\n").unwrap();
333
334        // Ignored files
335        fs::create_dir_all(root.join("target")).unwrap();
336        fs::write(root.join("target/debug.log"), "log").unwrap();
337        fs::write(root.join("build.log"), "log").unwrap();
338    }
339
340    #[test]
341    fn test_discover_finds_doc_files() {
342        let tmp = TempDir::new().unwrap();
343        create_test_project(&tmp);
344
345        let scanner = FileDiscovery::new(tmp.path());
346        let files = scanner.discover().unwrap();
347        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
348
349        assert!(rel_paths.contains(&"README.md"), "Missing README.md, found: {:?}", rel_paths);
350        assert!(rel_paths.contains(&"CLAUDE.md"), "Missing CLAUDE.md, found: {:?}", rel_paths);
351        assert!(rel_paths.contains(&"docs/guide.md"));
352        assert!(rel_paths.contains(&"docs/config.yml"));
353        assert!(rel_paths.contains(&"docs/data.json"));
354        assert!(rel_paths.contains(&"docs/settings.toml"));
355
356        // Non-doc files excluded
357        assert!(!rel_paths.contains(&"src/main.rs"));
358        assert!(!rel_paths.contains(&"src/lib.rs"));
359    }
360
361    #[test]
362    fn test_gitignore_respected() {
363        let tmp = TempDir::new().unwrap();
364        create_test_project(&tmp);
365
366        let scanner = FileDiscovery::new(tmp.path());
367        let files = scanner.discover().unwrap();
368        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
369
370        assert!(!rel_paths.contains(&"target/debug.log"));
371        assert!(!rel_paths.contains(&"build.log"));
372    }
373
374    #[test]
375    fn test_is_relevant_file_filter() {
376        // Doc extensions
377        assert!(FileDiscovery::is_relevant_file(Path::new("guide.md"), "guide.md"));
378        assert!(FileDiscovery::is_relevant_file(Path::new("notes.txt"), "notes.txt"));
379        assert!(FileDiscovery::is_relevant_file(Path::new("doc.rst"), "doc.rst"));
380        assert!(FileDiscovery::is_relevant_file(Path::new("doc.adoc"), "doc.adoc"));
381
382        // Non-doc extensions
383        assert!(!FileDiscovery::is_relevant_file(Path::new("main.rs"), "main.rs"));
384        assert!(!FileDiscovery::is_relevant_file(Path::new("app.py"), "app.py"));
385        assert!(!FileDiscovery::is_relevant_file(Path::new("style.css"), "style.css"));
386
387        // Special names
388        assert!(FileDiscovery::is_relevant_file(Path::new("README"), "README"));
389        assert!(FileDiscovery::is_relevant_file(Path::new("README.md"), "README.md"));
390        assert!(FileDiscovery::is_relevant_file(Path::new("LICENSE"), "LICENSE"));
391        assert!(FileDiscovery::is_relevant_file(Path::new("CLAUDE.md"), "CLAUDE.md"));
392        assert!(FileDiscovery::is_relevant_file(Path::new("AGENTS.md"), "AGENTS.md"));
393        assert!(FileDiscovery::is_relevant_file(Path::new(".cursorrules"), ".cursorrules"));
394        assert!(FileDiscovery::is_relevant_file(Path::new(".clinerules"), ".clinerules"));
395
396        // .kardo excluded
397        assert!(!FileDiscovery::is_relevant_file(
398            Path::new(".kardo/config.json"),
399            ".kardo/config.json"
400        ));
401    }
402
403    #[test]
404    fn test_content_hashing() {
405        let tmp = TempDir::new().unwrap();
406        init_git_repo(tmp.path());
407
408        // Same content = same hash
409        fs::write(tmp.path().join("a.md"), "identical content").unwrap();
410        fs::write(tmp.path().join("b.md"), "identical content").unwrap();
411        // Different content = different hash
412        fs::write(tmp.path().join("c.md"), "different content").unwrap();
413
414        let scanner = FileDiscovery::new(tmp.path());
415        let files = scanner.discover().unwrap();
416
417        let a = files.iter().find(|f| f.relative_path == "a.md").unwrap();
418        let b = files.iter().find(|f| f.relative_path == "b.md").unwrap();
419        let c = files.iter().find(|f| f.relative_path == "c.md").unwrap();
420
421        assert_eq!(a.content_hash, b.content_hash);
422        assert_ne!(a.content_hash, c.content_hash);
423
424        // Verify hash format: 64 hex chars
425        assert_eq!(a.content_hash.len(), 64);
426        assert!(a.content_hash.chars().all(|ch| ch.is_ascii_hexdigit()));
427    }
428
429    #[test]
430    fn test_relative_paths_correct() {
431        let tmp = TempDir::new().unwrap();
432        init_git_repo(tmp.path());
433
434        fs::create_dir_all(tmp.path().join("docs/deep/nested")).unwrap();
435        fs::write(tmp.path().join("docs/deep/nested/file.md"), "deep").unwrap();
436        fs::write(tmp.path().join("README.md"), "root").unwrap();
437
438        let scanner = FileDiscovery::new(tmp.path());
439        let files = scanner.discover().unwrap();
440
441        let readme = files.iter().find(|f| f.relative_path == "README.md").unwrap();
442        assert!(readme.path.is_absolute());
443
444        let nested = files.iter().find(|f| f.relative_path == "docs/deep/nested/file.md").unwrap();
445        assert!(nested.path.is_absolute());
446        assert!(!nested.relative_path.starts_with('/'));
447    }
448
449    #[test]
450    fn test_hidden_claude_dir_discovered() {
451        let tmp = TempDir::new().unwrap();
452        init_git_repo(tmp.path());
453
454        fs::create_dir_all(tmp.path().join(".claude")).unwrap();
455        fs::write(tmp.path().join(".claude/instructions"), "AI instructions").unwrap();
456        fs::create_dir_all(tmp.path().join(".claude/research")).unwrap();
457        fs::write(tmp.path().join(".claude/research/notes.md"), "notes").unwrap();
458
459        let scanner = FileDiscovery::new(tmp.path());
460        let files = scanner.discover().unwrap();
461        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
462
463        assert!(
464            rel_paths.contains(&".claude/instructions"),
465            "Missing .claude/instructions, found: {:?}",
466            rel_paths
467        );
468        assert!(rel_paths.contains(&".claude/research/notes.md"));
469    }
470
471    #[test]
472    fn test_kardo_dir_excluded() {
473        let tmp = TempDir::new().unwrap();
474        init_git_repo(tmp.path());
475
476        fs::create_dir_all(tmp.path().join(".kardo")).unwrap();
477        fs::write(tmp.path().join(".kardo/kardo.db"), "fake db").unwrap();
478        fs::write(tmp.path().join(".kardo/config.json"), "{}").unwrap();
479        fs::write(tmp.path().join("README.md"), "hi").unwrap();
480
481        let scanner = FileDiscovery::new(tmp.path());
482        let files = scanner.discover().unwrap();
483        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
484
485        assert!(!rel_paths.iter().any(|p| p.starts_with(".kardo/")));
486    }
487
488    #[test]
489    fn test_file_metadata() {
490        let tmp = TempDir::new().unwrap();
491        init_git_repo(tmp.path());
492
493        fs::write(tmp.path().join("README.md"), "# Hello\n").unwrap();
494
495        let scanner = FileDiscovery::new(tmp.path());
496        let files = scanner.discover().unwrap();
497        let readme = files.iter().find(|f| f.relative_path == "README.md").unwrap();
498
499        assert!(readme.is_markdown);
500        assert_eq!(readme.extension.as_deref(), Some("md"));
501        assert_eq!(readme.size, 8); // "# Hello\n"
502        assert!(readme.modified_at.is_some());
503        assert!(readme.path.is_absolute());
504    }
505
506    #[test]
507    fn test_sorted_output() {
508        let tmp = TempDir::new().unwrap();
509        init_git_repo(tmp.path());
510
511        fs::write(tmp.path().join("z-file.md"), "z").unwrap();
512        fs::write(tmp.path().join("a-file.md"), "a").unwrap();
513        fs::create_dir_all(tmp.path().join("m")).unwrap();
514        fs::write(tmp.path().join("m/middle.md"), "m").unwrap();
515
516        let scanner = FileDiscovery::new(tmp.path());
517        let files = scanner.discover().unwrap();
518        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
519
520        let mut sorted = rel_paths.clone();
521        sorted.sort();
522        assert_eq!(rel_paths, sorted);
523    }
524
525    #[test]
526    fn test_custom_kardo_ignore_file() {
527        let tmp = TempDir::new().unwrap();
528        let root = tmp.path();
529        init_git_repo(root);
530
531        fs::write(root.join("README.md"), "# Project").unwrap();
532        fs::create_dir_all(root.join(".claude/research/calibration")).unwrap();
533        fs::write(root.join(".claude/research/calibration/data.md"), "cal").unwrap();
534        fs::write(root.join(".claude/research/notes.md"), "notes").unwrap();
535
536        // Create .kardo.ignore that excludes calibration dir
537        fs::write(root.join(".kardo.ignore"), ".claude/research/calibration/\n").unwrap();
538
539        let scanner = FileDiscovery::new(root);
540        let files = scanner.discover().unwrap();
541        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
542
543        assert!(rel_paths.contains(&"README.md"));
544        assert!(rel_paths.contains(&".claude/research/notes.md"));
545        assert!(
546            !rel_paths.contains(&".claude/research/calibration/data.md"),
547            "calibration data should be excluded by .kardo.ignore, found: {:?}",
548            rel_paths
549        );
550    }
551
552    #[test]
553    fn test_exclude_pattern_via_builder() {
554        let tmp = TempDir::new().unwrap();
555        let root = tmp.path();
556        init_git_repo(root);
557
558        fs::write(root.join("README.md"), "# Project").unwrap();
559        fs::create_dir_all(root.join(".claude/research")).unwrap();
560        fs::write(root.join(".claude/research/notes.md"), "notes").unwrap();
561        fs::create_dir_all(root.join("docs")).unwrap();
562        fs::write(root.join("docs/guide.md"), "guide").unwrap();
563
564        let scanner = FileDiscovery::new(root)
565            .with_excludes(vec![".claude/research/**".into()]);
566        let files = scanner.discover().unwrap();
567        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
568
569        assert!(rel_paths.contains(&"README.md"));
570        assert!(rel_paths.contains(&"docs/guide.md"));
571        assert!(
572            !rel_paths.contains(&".claude/research/notes.md"),
573            "research notes should be excluded, found: {:?}",
574            rel_paths
575        );
576    }
577
578    #[test]
579    fn test_include_overrides_exclude() {
580        let tmp = TempDir::new().unwrap();
581        let root = tmp.path();
582        init_git_repo(root);
583
584        fs::write(root.join("README.md"), "# Project").unwrap();
585        fs::create_dir_all(root.join("docs")).unwrap();
586        fs::write(root.join("docs/guide.md"), "guide").unwrap();
587        fs::write(root.join("docs/important.md"), "important").unwrap();
588        fs::write(root.join("docs/draft.md"), "draft").unwrap();
589
590        let scanner = FileDiscovery::new(root)
591            .with_excludes(vec!["docs/**".into()])
592            .with_includes(vec!["docs/important.md".into()]);
593        let files = scanner.discover().unwrap();
594        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
595
596        // README.md should still be included (catch-all ** protects it)
597        assert!(
598            rel_paths.contains(&"README.md"),
599            "README.md should be included, found: {:?}",
600            rel_paths
601        );
602        // docs/important.md should survive despite docs/** exclude
603        assert!(
604            rel_paths.contains(&"docs/important.md"),
605            "important.md should override exclude, found: {:?}",
606            rel_paths
607        );
608        // Other docs should be excluded
609        assert!(!rel_paths.contains(&"docs/guide.md"));
610        assert!(!rel_paths.contains(&"docs/draft.md"));
611    }
612
613    #[test]
614    fn test_no_gitignore_flag() {
615        let tmp = TempDir::new().unwrap();
616        let root = tmp.path();
617        init_git_repo(root);
618
619        fs::write(root.join(".gitignore"), "ignored.md\n").unwrap();
620        fs::write(root.join("README.md"), "# Project").unwrap();
621        fs::write(root.join("ignored.md"), "this is ignored by git").unwrap();
622
623        // With gitignore (default) — ignored.md should not appear
624        let with_gi = FileDiscovery::new(root);
625        let files = with_gi.discover().unwrap();
626        let paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
627        assert!(!paths.contains(&"ignored.md"), "should be gitignored");
628
629        // Without gitignore — ignored.md should appear
630        let without_gi = FileDiscovery::new(root).with_gitignore(false);
631        let files = without_gi.discover().unwrap();
632        let paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
633        assert!(
634            paths.contains(&"ignored.md"),
635            "should appear with gitignore disabled, found: {:?}",
636            paths
637        );
638    }
639
640    #[test]
641    fn test_builder_backward_compat() {
642        let tmp = TempDir::new().unwrap();
643        create_test_project(&tmp);
644
645        // Old way: FileDiscovery::new(&root).discover()
646        let scanner = FileDiscovery::new(tmp.path());
647        let files = scanner.discover().unwrap();
648        let rel_paths: Vec<&str> = files.iter().map(|f| f.relative_path.as_str()).collect();
649
650        // Should behave identically to before: find doc files, respect gitignore
651        assert!(rel_paths.contains(&"README.md"));
652        assert!(rel_paths.contains(&"CLAUDE.md"));
653        assert!(rel_paths.contains(&"docs/guide.md"));
654        assert!(!rel_paths.contains(&"src/main.rs"));
655        assert!(!rel_paths.contains(&"target/debug.log"));
656        assert!(!rel_paths.contains(&"build.log"));
657    }
658}