Skip to main content

alint_core/
walker.rs

1use std::path::{Path, PathBuf};
2
3use ignore::{WalkBuilder, overrides::OverrideBuilder};
4
5use crate::error::{Error, Result};
6
7/// A single filesystem entry discovered by the walker.
8#[derive(Debug, Clone)]
9pub struct FileEntry {
10    /// Path relative to the repository root.
11    pub path: PathBuf,
12    pub is_dir: bool,
13    pub size: u64,
14}
15
16/// The indexed result of one filesystem walk. All rules share this index —
17/// the walk happens once per `alint check` invocation.
18#[derive(Debug, Default)]
19pub struct FileIndex {
20    pub entries: Vec<FileEntry>,
21}
22
23impl FileIndex {
24    pub fn files(&self) -> impl Iterator<Item = &FileEntry> {
25        self.entries.iter().filter(|e| !e.is_dir)
26    }
27
28    pub fn dirs(&self) -> impl Iterator<Item = &FileEntry> {
29        self.entries.iter().filter(|e| e.is_dir)
30    }
31
32    pub fn total_size(&self) -> u64 {
33        self.files().map(|f| f.size).sum()
34    }
35
36    /// Find a file entry by its exact relative path. Linear scan — acceptable
37    /// at the scales we target today; revisit with a `HashSet` / `HashMap`
38    /// index if cross-file-rule benches start to show it.
39    pub fn find_file(&self, rel: &Path) -> Option<&FileEntry> {
40        self.files().find(|e| e.path == rel)
41    }
42}
43
44#[derive(Debug, Clone)]
45pub struct WalkOptions {
46    pub respect_gitignore: bool,
47    pub extra_ignores: Vec<String>,
48}
49
50impl Default for WalkOptions {
51    fn default() -> Self {
52        Self {
53            respect_gitignore: true,
54            extra_ignores: Vec::new(),
55        }
56    }
57}
58
59pub fn walk(root: &Path, opts: &WalkOptions) -> Result<FileIndex> {
60    let mut builder = WalkBuilder::new(root);
61    builder
62        .standard_filters(opts.respect_gitignore)
63        .hidden(false)
64        .follow_links(true)
65        .require_git(false);
66
67    // Always exclude `.git/` — descending into git's internal
68    // packfiles + loose objects is wasted work for every alint
69    // rule (none of them target `.git/objects/*`), and it races
70    // git's auto-gc / pack-rewrite on large repos. We set
71    // `hidden(false)` and `require_git(false)` so the `ignore`
72    // crate doesn't apply its own implicit `.git/` exclusion;
73    // this override puts it back.
74    let mut overrides_builder = OverrideBuilder::new(root);
75    overrides_builder
76        .add("!.git")
77        .map_err(|e| Error::Other(format!("ignore pattern .git: {e}")))?;
78    for pattern in &opts.extra_ignores {
79        let pattern = if pattern.starts_with('!') {
80            pattern.clone()
81        } else {
82            format!("!{pattern}")
83        };
84        overrides_builder
85            .add(&pattern)
86            .map_err(|e| Error::Other(format!("ignore pattern {pattern:?}: {e}")))?;
87    }
88    let overrides = overrides_builder
89        .build()
90        .map_err(|e| Error::Other(format!("failed to build overrides: {e}")))?;
91    builder.overrides(overrides);
92
93    let mut entries = Vec::new();
94    for result in builder.build() {
95        let entry = result?;
96        let abs = entry.path();
97        let Ok(rel) = abs.strip_prefix(root) else {
98            continue;
99        };
100        if rel.as_os_str().is_empty() {
101            continue;
102        }
103        let metadata = entry.metadata().map_err(|e| Error::Io {
104            path: abs.to_path_buf(),
105            source: std::io::Error::other(e.to_string()),
106        })?;
107        entries.push(FileEntry {
108            path: rel.to_path_buf(),
109            is_dir: metadata.is_dir(),
110            size: if metadata.is_file() {
111                metadata.len()
112            } else {
113                0
114            },
115        });
116    }
117    Ok(FileIndex { entries })
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    fn td() -> tempfile::TempDir {
125        tempfile::Builder::new()
126            .prefix("alint-walker-test-")
127            .tempdir()
128            .unwrap()
129    }
130
131    fn touch(root: &Path, rel: &str, content: &[u8]) {
132        let abs = root.join(rel);
133        if let Some(parent) = abs.parent() {
134            std::fs::create_dir_all(parent).unwrap();
135        }
136        std::fs::write(abs, content).unwrap();
137    }
138
139    fn paths(idx: &FileIndex) -> Vec<String> {
140        // Normalise to forward slashes so assertions can compare
141        // against literal `"src/foo.rs"` regardless of host OS.
142        // Windows' Path::display() emits `src\foo.rs`.
143        idx.entries
144            .iter()
145            .map(|e| e.path.display().to_string().replace('\\', "/"))
146            .collect()
147    }
148
149    #[test]
150    fn fileindex_files_filters_directories_out() {
151        let idx = FileIndex {
152            entries: vec![
153                FileEntry {
154                    path: "a".into(),
155                    is_dir: true,
156                    size: 0,
157                },
158                FileEntry {
159                    path: "a/x.rs".into(),
160                    is_dir: false,
161                    size: 5,
162                },
163            ],
164        };
165        let files: Vec<_> = idx.files().collect();
166        assert_eq!(files.len(), 1);
167        assert_eq!(files[0].path, Path::new("a/x.rs"));
168    }
169
170    #[test]
171    fn fileindex_dirs_filters_files_out() {
172        let idx = FileIndex {
173            entries: vec![
174                FileEntry {
175                    path: "a".into(),
176                    is_dir: true,
177                    size: 0,
178                },
179                FileEntry {
180                    path: "a/x.rs".into(),
181                    is_dir: false,
182                    size: 5,
183                },
184            ],
185        };
186        let dirs: Vec<_> = idx.dirs().collect();
187        assert_eq!(dirs.len(), 1);
188        assert_eq!(dirs[0].path, Path::new("a"));
189    }
190
191    #[test]
192    fn fileindex_total_size_sums_files_only() {
193        let idx = FileIndex {
194            entries: vec![
195                FileEntry {
196                    path: "a".into(),
197                    is_dir: true,
198                    size: 999, // dirs report 0 in `walk`, but defensively excluded here
199                },
200                FileEntry {
201                    path: "a/x.rs".into(),
202                    is_dir: false,
203                    size: 100,
204                },
205                FileEntry {
206                    path: "a/y.rs".into(),
207                    is_dir: false,
208                    size: 50,
209                },
210            ],
211        };
212        // total_size sums via `files()` so the directory's
213        // bogus size is ignored.
214        assert_eq!(idx.total_size(), 150);
215    }
216
217    #[test]
218    fn fileindex_find_file_returns_match_or_none() {
219        let idx = FileIndex {
220            entries: vec![
221                FileEntry {
222                    path: "a/x.rs".into(),
223                    is_dir: false,
224                    size: 0,
225                },
226                FileEntry {
227                    path: "b".into(),
228                    is_dir: true,
229                    size: 0,
230                },
231            ],
232        };
233        assert!(idx.find_file(Path::new("a/x.rs")).is_some());
234        assert!(idx.find_file(Path::new("missing.rs")).is_none());
235        // find_file filters dirs — querying a known directory
236        // returns None.
237        assert!(idx.find_file(Path::new("b")).is_none());
238    }
239
240    #[test]
241    fn walk_excludes_dot_git_directory() {
242        let tmp = td();
243        touch(tmp.path(), "README.md", b"# demo\n");
244        // Fake `.git/` content — should never appear in the index.
245        touch(tmp.path(), ".git/config", b"[core]\n");
246        touch(tmp.path(), ".git/HEAD", b"ref: refs/heads/main\n");
247
248        let idx = walk(
249            tmp.path(),
250            &WalkOptions {
251                respect_gitignore: false,
252                extra_ignores: Vec::new(),
253            },
254        )
255        .unwrap();
256
257        let p = paths(&idx);
258        assert!(p.contains(&"README.md".into()), "missing README.md: {p:?}");
259        assert!(
260            !p.iter().any(|s| s.starts_with(".git")),
261            ".git was not excluded: {p:?}",
262        );
263    }
264
265    #[test]
266    fn walk_respects_gitignore_when_enabled() {
267        let tmp = td();
268        touch(tmp.path(), ".gitignore", b"target/\nignored.txt\n");
269        touch(tmp.path(), "src/main.rs", b"fn main() {}\n");
270        touch(tmp.path(), "target/debug/build.log", b"junk");
271        touch(tmp.path(), "ignored.txt", b"junk");
272
273        let idx = walk(
274            tmp.path(),
275            &WalkOptions {
276                respect_gitignore: true,
277                extra_ignores: Vec::new(),
278            },
279        )
280        .unwrap();
281
282        let p = paths(&idx);
283        assert!(p.contains(&"src/main.rs".into()));
284        assert!(
285            !p.iter().any(|s| s.starts_with("target")),
286            "target/ should be ignored: {p:?}",
287        );
288        assert!(
289            !p.contains(&"ignored.txt".into()),
290            "ignored.txt should be filtered: {p:?}",
291        );
292    }
293
294    #[test]
295    fn walk_includes_gitignored_paths_when_respect_gitignore_false() {
296        let tmp = td();
297        touch(tmp.path(), ".gitignore", b"ignored.txt\n");
298        touch(tmp.path(), "ignored.txt", b"x");
299        touch(tmp.path(), "kept.txt", b"y");
300
301        let idx = walk(
302            tmp.path(),
303            &WalkOptions {
304                respect_gitignore: false,
305                extra_ignores: Vec::new(),
306            },
307        )
308        .unwrap();
309        let p = paths(&idx);
310        assert!(
311            p.contains(&"ignored.txt".into()),
312            "respect_gitignore=false should include it: {p:?}",
313        );
314        assert!(p.contains(&"kept.txt".into()));
315    }
316
317    #[test]
318    fn walk_applies_extra_ignores_as_excludes() {
319        let tmp = td();
320        touch(tmp.path(), "src/keep.rs", b"x");
321        touch(tmp.path(), "vendor/skip.rs", b"y");
322
323        let idx = walk(
324            tmp.path(),
325            &WalkOptions {
326                respect_gitignore: false,
327                extra_ignores: vec!["vendor/**".to_string()],
328            },
329        )
330        .unwrap();
331        let p = paths(&idx);
332        assert!(p.contains(&"src/keep.rs".into()));
333        // `vendor/**` excludes the contents but the dir entry
334        // itself may still appear; the rule layer's `path_scope`
335        // covers the dir-vs-file distinction. What matters here
336        // is that no FILE under vendor/ was indexed.
337        let file_paths: Vec<&FileEntry> = idx.files().collect();
338        assert!(
339            !file_paths.iter().any(|e| e.path.starts_with("vendor")),
340            "no file under vendor/ should be indexed: {p:?}",
341        );
342    }
343
344    #[test]
345    fn walk_invalid_extra_ignore_pattern_surfaces_error() {
346        let tmp = td();
347        touch(tmp.path(), "a.txt", b"x");
348        let err = walk(
349            tmp.path(),
350            &WalkOptions {
351                respect_gitignore: false,
352                extra_ignores: vec!["[unterminated".to_string()],
353            },
354        );
355        assert!(err.is_err(), "bad pattern should fail: {err:?}");
356    }
357
358    #[test]
359    fn walk_emits_files_with_correct_size() {
360        let tmp = td();
361        touch(tmp.path(), "a.txt", &[0u8; 1024]);
362        let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
363        let entry = idx
364            .files()
365            .find(|e| e.path == Path::new("a.txt"))
366            .expect("a.txt entry");
367        assert_eq!(entry.size, 1024);
368        assert!(!entry.is_dir);
369    }
370
371    #[test]
372    fn default_walk_options_respects_gitignore_and_no_extra_ignores() {
373        let opts = WalkOptions::default();
374        assert!(opts.respect_gitignore);
375        assert!(opts.extra_ignores.is_empty());
376    }
377}