1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3use std::sync::{Arc, Mutex, OnceLock};
4
5use ignore::{
6 ParallelVisitor, ParallelVisitorBuilder, WalkBuilder, WalkState, overrides::OverrideBuilder,
7};
8
9use crate::error::{Error, Result};
10
11#[derive(Debug, Clone)]
19pub struct FileEntry {
20 pub path: Arc<Path>,
22 pub is_dir: bool,
23 pub size: u64,
24}
25
26#[derive(Debug, Default)]
39pub struct FileIndex {
40 pub entries: Vec<FileEntry>,
41 path_set: OnceLock<HashSet<Arc<Path>>>,
42}
43
44impl FileIndex {
45 pub fn from_entries(entries: Vec<FileEntry>) -> Self {
50 Self {
51 entries,
52 path_set: OnceLock::new(),
53 }
54 }
55
56 pub fn files(&self) -> impl Iterator<Item = &FileEntry> {
57 self.entries.iter().filter(|e| !e.is_dir)
58 }
59
60 pub fn dirs(&self) -> impl Iterator<Item = &FileEntry> {
61 self.entries.iter().filter(|e| e.is_dir)
62 }
63
64 pub fn total_size(&self) -> u64 {
65 self.files().map(|f| f.size).sum()
66 }
67
68 pub fn file_path_set(&self) -> &HashSet<Arc<Path>> {
73 self.path_set.get_or_init(|| {
74 self.entries
75 .iter()
76 .filter(|e| !e.is_dir)
77 .map(|e| Arc::clone(&e.path))
78 .collect()
79 })
80 }
81
82 pub fn contains_file(&self, rel: &Path) -> bool {
88 self.file_path_set().contains(rel)
89 }
90
91 pub fn find_file(&self, rel: &Path) -> Option<&FileEntry> {
98 if !self.contains_file(rel) {
99 return None;
100 }
101 self.files().find(|e| &*e.path == rel)
102 }
103}
104
105#[derive(Debug, Clone)]
106pub struct WalkOptions {
107 pub respect_gitignore: bool,
108 pub extra_ignores: Vec<String>,
109}
110
111impl Default for WalkOptions {
112 fn default() -> Self {
113 Self {
114 respect_gitignore: true,
115 extra_ignores: Vec::new(),
116 }
117 }
118}
119
120pub fn walk(root: &Path, opts: &WalkOptions) -> Result<FileIndex> {
121 let builder = build_walk_builder(root, opts)?;
122
123 let out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>> = Arc::new(Mutex::new(Vec::new()));
130 let error_slot: Arc<Mutex<Option<Error>>> = Arc::new(Mutex::new(None));
131 let root_owned: Arc<PathBuf> = Arc::new(root.to_path_buf());
132
133 let mut visitor_builder = WalkVisitorBuilder {
134 root: Arc::clone(&root_owned),
135 error_slot: Arc::clone(&error_slot),
136 out_entries: Arc::clone(&out_entries),
137 };
138 builder.build_parallel().visit(&mut visitor_builder);
139
140 if let Some(err) = error_slot.lock().expect("walker error slot lock").take() {
141 return Err(err);
142 }
143
144 let mut entries: Vec<FileEntry> = out_entries
150 .lock()
151 .expect("walker out-entries lock")
152 .drain(..)
153 .flatten()
154 .collect();
155 entries.sort_unstable_by(|a, b| a.path.cmp(&b.path));
156 Ok(FileIndex::from_entries(entries))
157}
158
159fn build_walk_builder(root: &Path, opts: &WalkOptions) -> Result<WalkBuilder> {
164 let mut builder = WalkBuilder::new(root);
165 builder
166 .standard_filters(opts.respect_gitignore)
167 .hidden(false)
168 .follow_links(true)
169 .require_git(false);
170
171 let mut overrides_builder = OverrideBuilder::new(root);
179 overrides_builder
180 .add("!.git")
181 .map_err(|e| Error::Other(format!("ignore pattern .git: {e}")))?;
182 for pattern in &opts.extra_ignores {
183 let pattern = if pattern.starts_with('!') {
184 pattern.clone()
185 } else {
186 format!("!{pattern}")
187 };
188 overrides_builder
189 .add(&pattern)
190 .map_err(|e| Error::Other(format!("ignore pattern {pattern:?}: {e}")))?;
191 }
192 let overrides = overrides_builder
193 .build()
194 .map_err(|e| Error::Other(format!("failed to build overrides: {e}")))?;
195 builder.overrides(overrides);
196 Ok(builder)
197}
198
199fn result_to_entry(
206 root: &Path,
207 result: std::result::Result<ignore::DirEntry, ignore::Error>,
208) -> Result<Option<FileEntry>> {
209 let entry = result?;
210 let abs = entry.path();
211 let Ok(rel) = abs.strip_prefix(root) else {
212 return Ok(None);
213 };
214 if rel.as_os_str().is_empty() {
215 return Ok(None);
216 }
217 let metadata = entry.metadata().map_err(|e| Error::Io {
218 path: abs.to_path_buf(),
219 source: std::io::Error::other(e.to_string()),
220 })?;
221 Ok(Some(FileEntry {
222 path: Arc::from(rel),
223 is_dir: metadata.is_dir(),
224 size: if metadata.is_file() {
225 metadata.len()
226 } else {
227 0
228 },
229 }))
230}
231
232struct WalkVisitor {
238 root: Arc<PathBuf>,
239 entries: Vec<FileEntry>,
240 error_slot: Arc<Mutex<Option<Error>>>,
241 out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>>,
242}
243
244impl ParallelVisitor for WalkVisitor {
245 fn visit(&mut self, result: std::result::Result<ignore::DirEntry, ignore::Error>) -> WalkState {
246 if self
250 .error_slot
251 .lock()
252 .expect("walker error slot lock")
253 .is_some()
254 {
255 return WalkState::Quit;
256 }
257 match result_to_entry(&self.root, result) {
258 Ok(Some(entry)) => {
259 self.entries.push(entry);
260 WalkState::Continue
261 }
262 Ok(None) => WalkState::Continue,
263 Err(err) => {
264 let mut slot = self.error_slot.lock().expect("walker error slot lock");
265 if slot.is_none() {
266 *slot = Some(err);
267 }
268 WalkState::Quit
269 }
270 }
271 }
272}
273
274impl Drop for WalkVisitor {
275 fn drop(&mut self) {
276 let local = std::mem::take(&mut self.entries);
277 if local.is_empty() {
278 return;
279 }
280 if let Ok(mut out) = self.out_entries.lock() {
281 out.push(local);
282 }
283 }
284}
285
286struct WalkVisitorBuilder {
287 root: Arc<PathBuf>,
288 error_slot: Arc<Mutex<Option<Error>>>,
289 out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>>,
290}
291
292impl<'s> ParallelVisitorBuilder<'s> for WalkVisitorBuilder {
293 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
294 Box::new(WalkVisitor {
295 root: Arc::clone(&self.root),
296 entries: Vec::new(),
297 error_slot: Arc::clone(&self.error_slot),
298 out_entries: Arc::clone(&self.out_entries),
299 })
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306
307 fn td() -> tempfile::TempDir {
308 tempfile::Builder::new()
309 .prefix("alint-walker-test-")
310 .tempdir()
311 .unwrap()
312 }
313
314 fn touch(root: &Path, rel: &str, content: &[u8]) {
315 let abs = root.join(rel);
316 if let Some(parent) = abs.parent() {
317 std::fs::create_dir_all(parent).unwrap();
318 }
319 std::fs::write(abs, content).unwrap();
320 }
321
322 fn paths(idx: &FileIndex) -> Vec<String> {
323 idx.entries
327 .iter()
328 .map(|e| e.path.display().to_string().replace('\\', "/"))
329 .collect()
330 }
331
332 #[test]
333 fn fileindex_files_filters_directories_out() {
334 let idx = FileIndex::from_entries(vec![
335 FileEntry {
336 path: Path::new("a").into(),
337 is_dir: true,
338 size: 0,
339 },
340 FileEntry {
341 path: Path::new("a/x.rs").into(),
342 is_dir: false,
343 size: 5,
344 },
345 ]);
346 let files: Vec<_> = idx.files().collect();
347 assert_eq!(files.len(), 1);
348 assert_eq!(&*files[0].path, Path::new("a/x.rs"));
349 }
350
351 #[test]
352 fn fileindex_dirs_filters_files_out() {
353 let idx = FileIndex::from_entries(vec![
354 FileEntry {
355 path: Path::new("a").into(),
356 is_dir: true,
357 size: 0,
358 },
359 FileEntry {
360 path: Path::new("a/x.rs").into(),
361 is_dir: false,
362 size: 5,
363 },
364 ]);
365 let dirs: Vec<_> = idx.dirs().collect();
366 assert_eq!(dirs.len(), 1);
367 assert_eq!(&*dirs[0].path, Path::new("a"));
368 }
369
370 #[test]
371 fn fileindex_total_size_sums_files_only() {
372 let idx = FileIndex::from_entries(vec![
373 FileEntry {
374 path: Path::new("a").into(),
375 is_dir: true,
376 size: 999, },
378 FileEntry {
379 path: Path::new("a/x.rs").into(),
380 is_dir: false,
381 size: 100,
382 },
383 FileEntry {
384 path: Path::new("a/y.rs").into(),
385 is_dir: false,
386 size: 50,
387 },
388 ]);
389 assert_eq!(idx.total_size(), 150);
392 }
393
394 #[test]
395 fn fileindex_find_file_returns_match_or_none() {
396 let idx = FileIndex::from_entries(vec![
397 FileEntry {
398 path: Path::new("a/x.rs").into(),
399 is_dir: false,
400 size: 0,
401 },
402 FileEntry {
403 path: Path::new("b").into(),
404 is_dir: true,
405 size: 0,
406 },
407 ]);
408 assert!(idx.find_file(Path::new("a/x.rs")).is_some());
409 assert!(idx.find_file(Path::new("missing.rs")).is_none());
410 assert!(idx.find_file(Path::new("b")).is_none());
413 }
414
415 #[test]
416 fn walk_excludes_dot_git_directory() {
417 let tmp = td();
418 touch(tmp.path(), "README.md", b"# demo\n");
419 touch(tmp.path(), ".git/config", b"[core]\n");
421 touch(tmp.path(), ".git/HEAD", b"ref: refs/heads/main\n");
422
423 let idx = walk(
424 tmp.path(),
425 &WalkOptions {
426 respect_gitignore: false,
427 extra_ignores: Vec::new(),
428 },
429 )
430 .unwrap();
431
432 let p = paths(&idx);
433 assert!(p.contains(&"README.md".into()), "missing README.md: {p:?}");
434 assert!(
435 !p.iter().any(|s| s.starts_with(".git")),
436 ".git was not excluded: {p:?}",
437 );
438 }
439
440 #[test]
441 fn walk_respects_gitignore_when_enabled() {
442 let tmp = td();
443 touch(tmp.path(), ".gitignore", b"target/\nignored.txt\n");
444 touch(tmp.path(), "src/main.rs", b"fn main() {}\n");
445 touch(tmp.path(), "target/debug/build.log", b"junk");
446 touch(tmp.path(), "ignored.txt", b"junk");
447
448 let idx = walk(
449 tmp.path(),
450 &WalkOptions {
451 respect_gitignore: true,
452 extra_ignores: Vec::new(),
453 },
454 )
455 .unwrap();
456
457 let p = paths(&idx);
458 assert!(p.contains(&"src/main.rs".into()));
459 assert!(
460 !p.iter().any(|s| s.starts_with("target")),
461 "target/ should be ignored: {p:?}",
462 );
463 assert!(
464 !p.contains(&"ignored.txt".into()),
465 "ignored.txt should be filtered: {p:?}",
466 );
467 }
468
469 #[test]
470 fn walk_includes_gitignored_paths_when_respect_gitignore_false() {
471 let tmp = td();
472 touch(tmp.path(), ".gitignore", b"ignored.txt\n");
473 touch(tmp.path(), "ignored.txt", b"x");
474 touch(tmp.path(), "kept.txt", b"y");
475
476 let idx = walk(
477 tmp.path(),
478 &WalkOptions {
479 respect_gitignore: false,
480 extra_ignores: Vec::new(),
481 },
482 )
483 .unwrap();
484 let p = paths(&idx);
485 assert!(
486 p.contains(&"ignored.txt".into()),
487 "respect_gitignore=false should include it: {p:?}",
488 );
489 assert!(p.contains(&"kept.txt".into()));
490 }
491
492 #[test]
493 fn walk_applies_extra_ignores_as_excludes() {
494 let tmp = td();
495 touch(tmp.path(), "src/keep.rs", b"x");
496 touch(tmp.path(), "vendor/skip.rs", b"y");
497
498 let idx = walk(
499 tmp.path(),
500 &WalkOptions {
501 respect_gitignore: false,
502 extra_ignores: vec!["vendor/**".to_string()],
503 },
504 )
505 .unwrap();
506 let p = paths(&idx);
507 assert!(p.contains(&"src/keep.rs".into()));
508 let file_paths: Vec<&FileEntry> = idx.files().collect();
513 assert!(
514 !file_paths.iter().any(|e| e.path.starts_with("vendor")),
515 "no file under vendor/ should be indexed: {p:?}",
516 );
517 }
518
519 #[test]
520 fn walk_invalid_extra_ignore_pattern_surfaces_error() {
521 let tmp = td();
522 touch(tmp.path(), "a.txt", b"x");
523 let err = walk(
524 tmp.path(),
525 &WalkOptions {
526 respect_gitignore: false,
527 extra_ignores: vec!["[unterminated".to_string()],
528 },
529 );
530 assert!(err.is_err(), "bad pattern should fail: {err:?}");
531 }
532
533 #[test]
534 fn walk_emits_files_with_correct_size() {
535 let tmp = td();
536 touch(tmp.path(), "a.txt", &[0u8; 1024]);
537 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
538 let entry = idx
539 .files()
540 .find(|e| &*e.path == Path::new("a.txt"))
541 .expect("a.txt entry");
542 assert_eq!(entry.size, 1024);
543 assert!(!entry.is_dir);
544 }
545
546 #[test]
547 fn default_walk_options_respects_gitignore_and_no_extra_ignores() {
548 let opts = WalkOptions::default();
549 assert!(opts.respect_gitignore);
550 assert!(opts.extra_ignores.is_empty());
551 }
552
553 #[test]
554 fn walk_output_is_deterministic_across_runs() {
555 let tmp = td();
561 for i in 0..50 {
562 touch(
563 tmp.path(),
564 &format!("dir_{}/file_{i}.rs", i % 5),
565 b"// hello\n",
566 );
567 }
568 let opts = WalkOptions::default();
569 let a = walk(tmp.path(), &opts).unwrap();
570 let b = walk(tmp.path(), &opts).unwrap();
571 assert_eq!(paths(&a), paths(&b));
572 }
573
574 #[test]
575 fn walk_output_is_alphabetically_sorted() {
576 let tmp = td();
581 touch(tmp.path(), "z.txt", b"z");
582 touch(tmp.path(), "a.txt", b"a");
583 touch(tmp.path(), "m.txt", b"m");
584 touch(tmp.path(), "sub/b.txt", b"b");
585 touch(tmp.path(), "sub/a.txt", b"a");
586
587 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
588 let actual: Vec<_> = idx.entries.iter().map(|e| e.path.clone()).collect();
589 let mut expected = actual.clone();
590 expected.sort_unstable();
591 assert_eq!(actual, expected, "walker output must be path-sorted");
592 }
593
594 #[test]
595 fn walk_handles_thousand_files() {
596 let tmp = td();
602 let n = 1_000usize;
603 for i in 0..n {
604 touch(tmp.path(), &format!("d{}/f{i:04}.txt", i % 16), b"x");
605 }
606 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
607
608 let file_paths: Vec<_> = idx.files().map(|e| e.path.clone()).collect();
609 assert_eq!(
610 file_paths.len(),
611 n,
612 "expected {n} files, got {}",
613 file_paths.len(),
614 );
615
616 let mut expected = file_paths.clone();
617 expected.sort_unstable();
618 assert_eq!(
619 file_paths, expected,
620 "concurrent walker output must remain path-sorted",
621 );
622 }
623}