use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use ignore::{
ParallelVisitor, ParallelVisitorBuilder, WalkBuilder, WalkState, overrides::OverrideBuilder,
};
use crate::error::{Error, Result};
#[derive(Debug, Clone)]
pub struct FileEntry {
pub path: Arc<Path>,
pub is_dir: bool,
pub size: u64,
}
#[derive(Debug, Default)]
pub struct FileIndex {
pub entries: Vec<FileEntry>,
}
impl FileIndex {
pub fn files(&self) -> impl Iterator<Item = &FileEntry> {
self.entries.iter().filter(|e| !e.is_dir)
}
pub fn dirs(&self) -> impl Iterator<Item = &FileEntry> {
self.entries.iter().filter(|e| e.is_dir)
}
pub fn total_size(&self) -> u64 {
self.files().map(|f| f.size).sum()
}
pub fn find_file(&self, rel: &Path) -> Option<&FileEntry> {
self.files().find(|e| &*e.path == rel)
}
}
#[derive(Debug, Clone)]
pub struct WalkOptions {
pub respect_gitignore: bool,
pub extra_ignores: Vec<String>,
}
impl Default for WalkOptions {
fn default() -> Self {
Self {
respect_gitignore: true,
extra_ignores: Vec::new(),
}
}
}
pub fn walk(root: &Path, opts: &WalkOptions) -> Result<FileIndex> {
let builder = build_walk_builder(root, opts)?;
let out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>> = Arc::new(Mutex::new(Vec::new()));
let error_slot: Arc<Mutex<Option<Error>>> = Arc::new(Mutex::new(None));
let root_owned: Arc<PathBuf> = Arc::new(root.to_path_buf());
let mut visitor_builder = WalkVisitorBuilder {
root: Arc::clone(&root_owned),
error_slot: Arc::clone(&error_slot),
out_entries: Arc::clone(&out_entries),
};
builder.build_parallel().visit(&mut visitor_builder);
if let Some(err) = error_slot.lock().expect("walker error slot lock").take() {
return Err(err);
}
let mut entries: Vec<FileEntry> = out_entries
.lock()
.expect("walker out-entries lock")
.drain(..)
.flatten()
.collect();
entries.sort_unstable_by(|a, b| a.path.cmp(&b.path));
Ok(FileIndex { entries })
}
fn build_walk_builder(root: &Path, opts: &WalkOptions) -> Result<WalkBuilder> {
let mut builder = WalkBuilder::new(root);
builder
.standard_filters(opts.respect_gitignore)
.hidden(false)
.follow_links(true)
.require_git(false);
let mut overrides_builder = OverrideBuilder::new(root);
overrides_builder
.add("!.git")
.map_err(|e| Error::Other(format!("ignore pattern .git: {e}")))?;
for pattern in &opts.extra_ignores {
let pattern = if pattern.starts_with('!') {
pattern.clone()
} else {
format!("!{pattern}")
};
overrides_builder
.add(&pattern)
.map_err(|e| Error::Other(format!("ignore pattern {pattern:?}: {e}")))?;
}
let overrides = overrides_builder
.build()
.map_err(|e| Error::Other(format!("failed to build overrides: {e}")))?;
builder.overrides(overrides);
Ok(builder)
}
fn result_to_entry(
root: &Path,
result: std::result::Result<ignore::DirEntry, ignore::Error>,
) -> Result<Option<FileEntry>> {
let entry = result?;
let abs = entry.path();
let Ok(rel) = abs.strip_prefix(root) else {
return Ok(None);
};
if rel.as_os_str().is_empty() {
return Ok(None);
}
let metadata = entry.metadata().map_err(|e| Error::Io {
path: abs.to_path_buf(),
source: std::io::Error::other(e.to_string()),
})?;
Ok(Some(FileEntry {
path: Arc::from(rel),
is_dir: metadata.is_dir(),
size: if metadata.is_file() {
metadata.len()
} else {
0
},
}))
}
struct WalkVisitor {
root: Arc<PathBuf>,
entries: Vec<FileEntry>,
error_slot: Arc<Mutex<Option<Error>>>,
out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>>,
}
impl ParallelVisitor for WalkVisitor {
fn visit(&mut self, result: std::result::Result<ignore::DirEntry, ignore::Error>) -> WalkState {
if self
.error_slot
.lock()
.expect("walker error slot lock")
.is_some()
{
return WalkState::Quit;
}
match result_to_entry(&self.root, result) {
Ok(Some(entry)) => {
self.entries.push(entry);
WalkState::Continue
}
Ok(None) => WalkState::Continue,
Err(err) => {
let mut slot = self.error_slot.lock().expect("walker error slot lock");
if slot.is_none() {
*slot = Some(err);
}
WalkState::Quit
}
}
}
}
impl Drop for WalkVisitor {
fn drop(&mut self) {
let local = std::mem::take(&mut self.entries);
if local.is_empty() {
return;
}
if let Ok(mut out) = self.out_entries.lock() {
out.push(local);
}
}
}
struct WalkVisitorBuilder {
root: Arc<PathBuf>,
error_slot: Arc<Mutex<Option<Error>>>,
out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>>,
}
impl<'s> ParallelVisitorBuilder<'s> for WalkVisitorBuilder {
fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
Box::new(WalkVisitor {
root: Arc::clone(&self.root),
entries: Vec::new(),
error_slot: Arc::clone(&self.error_slot),
out_entries: Arc::clone(&self.out_entries),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
fn td() -> tempfile::TempDir {
tempfile::Builder::new()
.prefix("alint-walker-test-")
.tempdir()
.unwrap()
}
fn touch(root: &Path, rel: &str, content: &[u8]) {
let abs = root.join(rel);
if let Some(parent) = abs.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(abs, content).unwrap();
}
fn paths(idx: &FileIndex) -> Vec<String> {
idx.entries
.iter()
.map(|e| e.path.display().to_string().replace('\\', "/"))
.collect()
}
#[test]
fn fileindex_files_filters_directories_out() {
let idx = FileIndex {
entries: vec![
FileEntry {
path: Path::new("a").into(),
is_dir: true,
size: 0,
},
FileEntry {
path: Path::new("a/x.rs").into(),
is_dir: false,
size: 5,
},
],
};
let files: Vec<_> = idx.files().collect();
assert_eq!(files.len(), 1);
assert_eq!(&*files[0].path, Path::new("a/x.rs"));
}
#[test]
fn fileindex_dirs_filters_files_out() {
let idx = FileIndex {
entries: vec![
FileEntry {
path: Path::new("a").into(),
is_dir: true,
size: 0,
},
FileEntry {
path: Path::new("a/x.rs").into(),
is_dir: false,
size: 5,
},
],
};
let dirs: Vec<_> = idx.dirs().collect();
assert_eq!(dirs.len(), 1);
assert_eq!(&*dirs[0].path, Path::new("a"));
}
#[test]
fn fileindex_total_size_sums_files_only() {
let idx = FileIndex {
entries: vec![
FileEntry {
path: Path::new("a").into(),
is_dir: true,
size: 999, },
FileEntry {
path: Path::new("a/x.rs").into(),
is_dir: false,
size: 100,
},
FileEntry {
path: Path::new("a/y.rs").into(),
is_dir: false,
size: 50,
},
],
};
assert_eq!(idx.total_size(), 150);
}
#[test]
fn fileindex_find_file_returns_match_or_none() {
let idx = FileIndex {
entries: vec![
FileEntry {
path: Path::new("a/x.rs").into(),
is_dir: false,
size: 0,
},
FileEntry {
path: Path::new("b").into(),
is_dir: true,
size: 0,
},
],
};
assert!(idx.find_file(Path::new("a/x.rs")).is_some());
assert!(idx.find_file(Path::new("missing.rs")).is_none());
assert!(idx.find_file(Path::new("b")).is_none());
}
#[test]
fn walk_excludes_dot_git_directory() {
let tmp = td();
touch(tmp.path(), "README.md", b"# demo\n");
touch(tmp.path(), ".git/config", b"[core]\n");
touch(tmp.path(), ".git/HEAD", b"ref: refs/heads/main\n");
let idx = walk(
tmp.path(),
&WalkOptions {
respect_gitignore: false,
extra_ignores: Vec::new(),
},
)
.unwrap();
let p = paths(&idx);
assert!(p.contains(&"README.md".into()), "missing README.md: {p:?}");
assert!(
!p.iter().any(|s| s.starts_with(".git")),
".git was not excluded: {p:?}",
);
}
#[test]
fn walk_respects_gitignore_when_enabled() {
let tmp = td();
touch(tmp.path(), ".gitignore", b"target/\nignored.txt\n");
touch(tmp.path(), "src/main.rs", b"fn main() {}\n");
touch(tmp.path(), "target/debug/build.log", b"junk");
touch(tmp.path(), "ignored.txt", b"junk");
let idx = walk(
tmp.path(),
&WalkOptions {
respect_gitignore: true,
extra_ignores: Vec::new(),
},
)
.unwrap();
let p = paths(&idx);
assert!(p.contains(&"src/main.rs".into()));
assert!(
!p.iter().any(|s| s.starts_with("target")),
"target/ should be ignored: {p:?}",
);
assert!(
!p.contains(&"ignored.txt".into()),
"ignored.txt should be filtered: {p:?}",
);
}
#[test]
fn walk_includes_gitignored_paths_when_respect_gitignore_false() {
let tmp = td();
touch(tmp.path(), ".gitignore", b"ignored.txt\n");
touch(tmp.path(), "ignored.txt", b"x");
touch(tmp.path(), "kept.txt", b"y");
let idx = walk(
tmp.path(),
&WalkOptions {
respect_gitignore: false,
extra_ignores: Vec::new(),
},
)
.unwrap();
let p = paths(&idx);
assert!(
p.contains(&"ignored.txt".into()),
"respect_gitignore=false should include it: {p:?}",
);
assert!(p.contains(&"kept.txt".into()));
}
#[test]
fn walk_applies_extra_ignores_as_excludes() {
let tmp = td();
touch(tmp.path(), "src/keep.rs", b"x");
touch(tmp.path(), "vendor/skip.rs", b"y");
let idx = walk(
tmp.path(),
&WalkOptions {
respect_gitignore: false,
extra_ignores: vec!["vendor/**".to_string()],
},
)
.unwrap();
let p = paths(&idx);
assert!(p.contains(&"src/keep.rs".into()));
let file_paths: Vec<&FileEntry> = idx.files().collect();
assert!(
!file_paths.iter().any(|e| e.path.starts_with("vendor")),
"no file under vendor/ should be indexed: {p:?}",
);
}
#[test]
fn walk_invalid_extra_ignore_pattern_surfaces_error() {
let tmp = td();
touch(tmp.path(), "a.txt", b"x");
let err = walk(
tmp.path(),
&WalkOptions {
respect_gitignore: false,
extra_ignores: vec!["[unterminated".to_string()],
},
);
assert!(err.is_err(), "bad pattern should fail: {err:?}");
}
#[test]
fn walk_emits_files_with_correct_size() {
let tmp = td();
touch(tmp.path(), "a.txt", &[0u8; 1024]);
let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
let entry = idx
.files()
.find(|e| &*e.path == Path::new("a.txt"))
.expect("a.txt entry");
assert_eq!(entry.size, 1024);
assert!(!entry.is_dir);
}
#[test]
fn default_walk_options_respects_gitignore_and_no_extra_ignores() {
let opts = WalkOptions::default();
assert!(opts.respect_gitignore);
assert!(opts.extra_ignores.is_empty());
}
#[test]
fn walk_output_is_deterministic_across_runs() {
let tmp = td();
for i in 0..50 {
touch(
tmp.path(),
&format!("dir_{}/file_{i}.rs", i % 5),
b"// hello\n",
);
}
let opts = WalkOptions::default();
let a = walk(tmp.path(), &opts).unwrap();
let b = walk(tmp.path(), &opts).unwrap();
assert_eq!(paths(&a), paths(&b));
}
#[test]
fn walk_output_is_alphabetically_sorted() {
let tmp = td();
touch(tmp.path(), "z.txt", b"z");
touch(tmp.path(), "a.txt", b"a");
touch(tmp.path(), "m.txt", b"m");
touch(tmp.path(), "sub/b.txt", b"b");
touch(tmp.path(), "sub/a.txt", b"a");
let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
let actual: Vec<_> = idx.entries.iter().map(|e| e.path.clone()).collect();
let mut expected = actual.clone();
expected.sort_unstable();
assert_eq!(actual, expected, "walker output must be path-sorted");
}
#[test]
fn walk_handles_thousand_files() {
let tmp = td();
let n = 1_000usize;
for i in 0..n {
touch(tmp.path(), &format!("d{}/f{i:04}.txt", i % 16), b"x");
}
let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
let file_paths: Vec<_> = idx.files().map(|e| e.path.clone()).collect();
assert_eq!(
file_paths.len(),
n,
"expected {n} files, got {}",
file_paths.len(),
);
let mut expected = file_paths.clone();
expected.sort_unstable();
assert_eq!(
file_paths, expected,
"concurrent walker output must remain path-sorted",
);
}
}