Skip to main content

crap_core/core/
walker.rs

1//! Filesystem walker — discovers source files for analysis,
2//! respecting `.gitignore` and user-provided exclude patterns.
3//!
4//! Adapter-agnostic: the extension filter is parameter-driven via
5//! `AnalyzeOptions::extensions` so crap4rs passes `&["rs"]`, crap4ts
6//! passes `&["ts","tsx","js","jsx","mjs","cjs"]`, and future adapters
7//! supply their own. AST-purity gate satisfied — `ignore::WalkBuilder`
8//! is filesystem-walking machinery with no `syn` / `tree_sitter` /
9//! `swc` / `oxc` coupling.
10//!
11use std::path::{Path, PathBuf};
12
13use anyhow::{Context, Result};
14use ignore::WalkBuilder;
15
16/// Walk the source directory and collect all files whose extension
17/// matches one in `extensions` (case-sensitive), respecting
18/// `.gitignore` and user-provided exclude patterns.
19///
20/// `extensions` is the bare suffix without the leading dot
21/// (`"rs"`, `"ts"`, `"tsx"`). An empty slice matches no files (the
22/// caller is expected to short-circuit on
23/// `AnalyzeOptions::extensions.is_empty()` upstream, but we don't
24/// crash here).
25pub fn discover_source_files(
26    src: &Path,
27    exclude: &[String],
28    respect_gitignore: bool,
29    extensions: &[&str],
30) -> Result<Vec<PathBuf>> {
31    let mut builder = WalkBuilder::new(src);
32    builder.git_ignore(respect_gitignore);
33
34    // Add exclude patterns as overrides
35    if !exclude.is_empty() {
36        let mut overrides = ignore::overrides::OverrideBuilder::new(src);
37        for pattern in exclude {
38            overrides
39                .add(&format!("!{pattern}"))
40                .with_context(|| format!("invalid exclude pattern: {pattern}"))?;
41        }
42        builder.overrides(overrides.build()?);
43    }
44
45    let mut files = Vec::new();
46    for entry in builder.build() {
47        let entry = entry?;
48        if entry.file_type().is_some_and(|ft| ft.is_file())
49            && let Some(ext) = entry.path().extension()
50            && extensions.iter().any(|e| ext == *e)
51        {
52            files.push(entry.into_path());
53        }
54    }
55
56    // Sort for deterministic output
57    files.sort();
58    Ok(files)
59}
60
61#[cfg(test)]
62mod tests {
63    use super::*;
64    use std::fs;
65
66    #[test]
67    fn discover_source_files_finds_nested_rust_extension() {
68        let dir = tempfile::tempdir().unwrap();
69        let src = dir.path().join("src");
70        fs::create_dir_all(src.join("sub")).unwrap();
71        fs::write(src.join("lib.rs"), "").unwrap();
72        fs::write(src.join("sub").join("mod.rs"), "").unwrap();
73        fs::write(src.join("readme.txt"), "").unwrap();
74
75        let files = discover_source_files(&src, &[], false, &["rs"]).unwrap();
76        assert_eq!(files.len(), 2);
77        assert!(files.iter().all(|f| f.extension().unwrap() == "rs"));
78    }
79
80    #[test]
81    fn discover_source_files_sorted_deterministically() {
82        let dir = tempfile::tempdir().unwrap();
83        let src = dir.path().join("src");
84        fs::create_dir_all(&src).unwrap();
85        fs::write(src.join("z.rs"), "").unwrap();
86        fs::write(src.join("a.rs"), "").unwrap();
87        fs::write(src.join("m.rs"), "").unwrap();
88
89        let files = discover_source_files(&src, &[], false, &["rs"]).unwrap();
90        let names: Vec<_> = files.iter().map(|f| f.file_name().unwrap()).collect();
91        assert_eq!(names, vec!["a.rs", "m.rs", "z.rs"]);
92    }
93
94    /// Walker honors arbitrary extensions, not just `.rs`, so crap4ts
95    /// can later land its oxc walker against the same filesystem layer.
96    #[test]
97    fn discover_source_files_finds_typescript_extensions() {
98        let dir = tempfile::tempdir().unwrap();
99        let src = dir.path().join("src");
100        fs::create_dir_all(src.join("sub")).unwrap();
101        fs::write(src.join("a.ts"), "").unwrap();
102        fs::write(src.join("b.tsx"), "").unwrap();
103        fs::write(src.join("sub").join("c.js"), "").unwrap();
104        fs::write(src.join("d.rs"), "").unwrap(); // wrong-language sibling
105        fs::write(src.join("notes.md"), "").unwrap();
106
107        let files =
108            discover_source_files(&src, &[], false, &["ts", "tsx", "js", "jsx", "mjs", "cjs"])
109                .unwrap();
110        let names: Vec<_> = files
111            .iter()
112            .filter_map(|f| f.file_name().and_then(|n| n.to_str()))
113            .collect();
114        assert_eq!(names, vec!["a.ts", "b.tsx", "c.js"]);
115        assert!(
116            !names.iter().any(|n| n.ends_with(".rs")),
117            "walker must not pick up `.rs` when it's not in the extension allow-list"
118        );
119    }
120
121    /// A `**/*.d.ts` glob in the exclude list drops TypeScript
122    /// declaration files while a sibling `app.ts` survives — pins the
123    /// `ignore::overrides` contract the crap4ts `forced_excludes`
124    /// wiring in `cli::merge_exclude` relies on (crap-rs#253). The
125    /// adapter passes `**/*.d.ts` via `AdapterMeta::forced_excludes`;
126    /// this test confirms the walker mechanism honors that glob shape.
127    #[test]
128    fn discover_source_files_dts_excluded_by_glob() {
129        let dir = tempfile::tempdir().unwrap();
130        let src = dir.path().join("src");
131        fs::create_dir_all(src.join("sub")).unwrap();
132        fs::write(src.join("app.ts"), "").unwrap();
133        fs::write(src.join("types.d.ts"), "").unwrap();
134        fs::write(src.join("sub").join("nested.d.ts"), "").unwrap();
135        fs::write(src.join("sub").join("nested.ts"), "").unwrap();
136
137        let exclude = vec!["**/*.d.ts".to_string()];
138        let files =
139            discover_source_files(&src, &exclude, false, &["ts", "tsx", "js", "jsx"]).unwrap();
140        let names: Vec<_> = files
141            .iter()
142            .filter_map(|f| f.file_name().and_then(|n| n.to_str()))
143            .collect();
144        assert_eq!(names, vec!["app.ts", "nested.ts"]);
145        assert!(
146            !names.iter().any(|n| n.ends_with(".d.ts")),
147            "`**/*.d.ts` glob must drop every declaration file in the tree, including nested ones",
148        );
149    }
150
151    /// Empty `extensions` returns no files — the only sane behavior
152    /// when the caller forgot to set `AnalyzeOptions::extensions`.
153    /// `core::ensure_source_files_found` then surfaces the diagnostic.
154    #[test]
155    fn discover_source_files_empty_extensions_returns_empty() {
156        let dir = tempfile::tempdir().unwrap();
157        let src = dir.path().join("src");
158        fs::create_dir_all(&src).unwrap();
159        fs::write(src.join("lib.rs"), "").unwrap();
160
161        let files = discover_source_files(&src, &[], false, &[]).unwrap();
162        assert!(
163            files.is_empty(),
164            "no extensions configured ⇒ no files (caller surfaces diagnostic upstream)"
165        );
166    }
167}