Skip to main content

alint_core/
walker.rs

1use std::path::{Path, PathBuf};
2
3use ignore::{WalkBuilder, overrides::OverrideBuilder};
4
5use crate::error::{Error, Result};
6
7/// A single filesystem entry discovered by the walker.
8#[derive(Debug, Clone)]
9pub struct FileEntry {
10    /// Path relative to the repository root.
11    pub path: PathBuf,
12    pub is_dir: bool,
13    pub size: u64,
14}
15
16/// The indexed result of one filesystem walk. All rules share this index —
17/// the walk happens once per `alint check` invocation.
18#[derive(Debug, Default)]
19pub struct FileIndex {
20    pub entries: Vec<FileEntry>,
21}
22
23impl FileIndex {
24    pub fn files(&self) -> impl Iterator<Item = &FileEntry> {
25        self.entries.iter().filter(|e| !e.is_dir)
26    }
27
28    pub fn dirs(&self) -> impl Iterator<Item = &FileEntry> {
29        self.entries.iter().filter(|e| e.is_dir)
30    }
31
32    pub fn total_size(&self) -> u64 {
33        self.files().map(|f| f.size).sum()
34    }
35
36    /// Find a file entry by its exact relative path. Linear scan — acceptable
37    /// at the scales we target today; revisit with a `HashSet` / `HashMap`
38    /// index if cross-file-rule benches start to show it.
39    pub fn find_file(&self, rel: &Path) -> Option<&FileEntry> {
40        self.files().find(|e| e.path == rel)
41    }
42}
43
44#[derive(Debug, Clone)]
45pub struct WalkOptions {
46    pub respect_gitignore: bool,
47    pub extra_ignores: Vec<String>,
48}
49
50impl Default for WalkOptions {
51    fn default() -> Self {
52        Self {
53            respect_gitignore: true,
54            extra_ignores: Vec::new(),
55        }
56    }
57}
58
59pub fn walk(root: &Path, opts: &WalkOptions) -> Result<FileIndex> {
60    let mut builder = WalkBuilder::new(root);
61    builder
62        .standard_filters(opts.respect_gitignore)
63        .hidden(false)
64        .follow_links(true)
65        .require_git(false);
66
67    // Always exclude `.git/` — descending into git's internal
68    // packfiles + loose objects is wasted work for every alint
69    // rule (none of them target `.git/objects/*`), and it races
70    // git's auto-gc / pack-rewrite on large repos. We set
71    // `hidden(false)` and `require_git(false)` so the `ignore`
72    // crate doesn't apply its own implicit `.git/` exclusion;
73    // this override puts it back.
74    let mut overrides_builder = OverrideBuilder::new(root);
75    overrides_builder
76        .add("!.git")
77        .map_err(|e| Error::Other(format!("ignore pattern .git: {e}")))?;
78    for pattern in &opts.extra_ignores {
79        let pattern = if pattern.starts_with('!') {
80            pattern.clone()
81        } else {
82            format!("!{pattern}")
83        };
84        overrides_builder
85            .add(&pattern)
86            .map_err(|e| Error::Other(format!("ignore pattern {pattern:?}: {e}")))?;
87    }
88    let overrides = overrides_builder
89        .build()
90        .map_err(|e| Error::Other(format!("failed to build overrides: {e}")))?;
91    builder.overrides(overrides);
92
93    let mut entries = Vec::new();
94    for result in builder.build() {
95        let entry = result?;
96        let abs = entry.path();
97        let Ok(rel) = abs.strip_prefix(root) else {
98            continue;
99        };
100        if rel.as_os_str().is_empty() {
101            continue;
102        }
103        let metadata = entry.metadata().map_err(|e| Error::Io {
104            path: abs.to_path_buf(),
105            source: std::io::Error::other(e.to_string()),
106        })?;
107        entries.push(FileEntry {
108            path: rel.to_path_buf(),
109            is_dir: metadata.is_dir(),
110            size: if metadata.is_file() {
111                metadata.len()
112            } else {
113                0
114            },
115        });
116    }
117    Ok(FileIndex { entries })
118}