1use std::path::{Path, PathBuf};
2use std::sync::{Arc, Mutex};
3
4use ignore::{
5 ParallelVisitor, ParallelVisitorBuilder, WalkBuilder, WalkState, overrides::OverrideBuilder,
6};
7
8use crate::error::{Error, Result};
9
10#[derive(Debug, Clone)]
18pub struct FileEntry {
19 pub path: Arc<Path>,
21 pub is_dir: bool,
22 pub size: u64,
23}
24
25#[derive(Debug, Default)]
28pub struct FileIndex {
29 pub entries: Vec<FileEntry>,
30}
31
32impl FileIndex {
33 pub fn files(&self) -> impl Iterator<Item = &FileEntry> {
34 self.entries.iter().filter(|e| !e.is_dir)
35 }
36
37 pub fn dirs(&self) -> impl Iterator<Item = &FileEntry> {
38 self.entries.iter().filter(|e| e.is_dir)
39 }
40
41 pub fn total_size(&self) -> u64 {
42 self.files().map(|f| f.size).sum()
43 }
44
45 pub fn find_file(&self, rel: &Path) -> Option<&FileEntry> {
49 self.files().find(|e| &*e.path == rel)
50 }
51}
52
53#[derive(Debug, Clone)]
54pub struct WalkOptions {
55 pub respect_gitignore: bool,
56 pub extra_ignores: Vec<String>,
57}
58
59impl Default for WalkOptions {
60 fn default() -> Self {
61 Self {
62 respect_gitignore: true,
63 extra_ignores: Vec::new(),
64 }
65 }
66}
67
68pub fn walk(root: &Path, opts: &WalkOptions) -> Result<FileIndex> {
69 let builder = build_walk_builder(root, opts)?;
70
71 let out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>> = Arc::new(Mutex::new(Vec::new()));
78 let error_slot: Arc<Mutex<Option<Error>>> = Arc::new(Mutex::new(None));
79 let root_owned: Arc<PathBuf> = Arc::new(root.to_path_buf());
80
81 let mut visitor_builder = WalkVisitorBuilder {
82 root: Arc::clone(&root_owned),
83 error_slot: Arc::clone(&error_slot),
84 out_entries: Arc::clone(&out_entries),
85 };
86 builder.build_parallel().visit(&mut visitor_builder);
87
88 if let Some(err) = error_slot.lock().expect("walker error slot lock").take() {
89 return Err(err);
90 }
91
92 let mut entries: Vec<FileEntry> = out_entries
98 .lock()
99 .expect("walker out-entries lock")
100 .drain(..)
101 .flatten()
102 .collect();
103 entries.sort_unstable_by(|a, b| a.path.cmp(&b.path));
104 Ok(FileIndex { entries })
105}
106
107fn build_walk_builder(root: &Path, opts: &WalkOptions) -> Result<WalkBuilder> {
112 let mut builder = WalkBuilder::new(root);
113 builder
114 .standard_filters(opts.respect_gitignore)
115 .hidden(false)
116 .follow_links(true)
117 .require_git(false);
118
119 let mut overrides_builder = OverrideBuilder::new(root);
127 overrides_builder
128 .add("!.git")
129 .map_err(|e| Error::Other(format!("ignore pattern .git: {e}")))?;
130 for pattern in &opts.extra_ignores {
131 let pattern = if pattern.starts_with('!') {
132 pattern.clone()
133 } else {
134 format!("!{pattern}")
135 };
136 overrides_builder
137 .add(&pattern)
138 .map_err(|e| Error::Other(format!("ignore pattern {pattern:?}: {e}")))?;
139 }
140 let overrides = overrides_builder
141 .build()
142 .map_err(|e| Error::Other(format!("failed to build overrides: {e}")))?;
143 builder.overrides(overrides);
144 Ok(builder)
145}
146
147fn result_to_entry(
154 root: &Path,
155 result: std::result::Result<ignore::DirEntry, ignore::Error>,
156) -> Result<Option<FileEntry>> {
157 let entry = result?;
158 let abs = entry.path();
159 let Ok(rel) = abs.strip_prefix(root) else {
160 return Ok(None);
161 };
162 if rel.as_os_str().is_empty() {
163 return Ok(None);
164 }
165 let metadata = entry.metadata().map_err(|e| Error::Io {
166 path: abs.to_path_buf(),
167 source: std::io::Error::other(e.to_string()),
168 })?;
169 Ok(Some(FileEntry {
170 path: Arc::from(rel),
171 is_dir: metadata.is_dir(),
172 size: if metadata.is_file() {
173 metadata.len()
174 } else {
175 0
176 },
177 }))
178}
179
180struct WalkVisitor {
186 root: Arc<PathBuf>,
187 entries: Vec<FileEntry>,
188 error_slot: Arc<Mutex<Option<Error>>>,
189 out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>>,
190}
191
192impl ParallelVisitor for WalkVisitor {
193 fn visit(&mut self, result: std::result::Result<ignore::DirEntry, ignore::Error>) -> WalkState {
194 if self
198 .error_slot
199 .lock()
200 .expect("walker error slot lock")
201 .is_some()
202 {
203 return WalkState::Quit;
204 }
205 match result_to_entry(&self.root, result) {
206 Ok(Some(entry)) => {
207 self.entries.push(entry);
208 WalkState::Continue
209 }
210 Ok(None) => WalkState::Continue,
211 Err(err) => {
212 let mut slot = self.error_slot.lock().expect("walker error slot lock");
213 if slot.is_none() {
214 *slot = Some(err);
215 }
216 WalkState::Quit
217 }
218 }
219 }
220}
221
222impl Drop for WalkVisitor {
223 fn drop(&mut self) {
224 let local = std::mem::take(&mut self.entries);
225 if local.is_empty() {
226 return;
227 }
228 if let Ok(mut out) = self.out_entries.lock() {
229 out.push(local);
230 }
231 }
232}
233
234struct WalkVisitorBuilder {
235 root: Arc<PathBuf>,
236 error_slot: Arc<Mutex<Option<Error>>>,
237 out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>>,
238}
239
240impl<'s> ParallelVisitorBuilder<'s> for WalkVisitorBuilder {
241 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
242 Box::new(WalkVisitor {
243 root: Arc::clone(&self.root),
244 entries: Vec::new(),
245 error_slot: Arc::clone(&self.error_slot),
246 out_entries: Arc::clone(&self.out_entries),
247 })
248 }
249}
250
251#[cfg(test)]
252mod tests {
253 use super::*;
254
255 fn td() -> tempfile::TempDir {
256 tempfile::Builder::new()
257 .prefix("alint-walker-test-")
258 .tempdir()
259 .unwrap()
260 }
261
262 fn touch(root: &Path, rel: &str, content: &[u8]) {
263 let abs = root.join(rel);
264 if let Some(parent) = abs.parent() {
265 std::fs::create_dir_all(parent).unwrap();
266 }
267 std::fs::write(abs, content).unwrap();
268 }
269
270 fn paths(idx: &FileIndex) -> Vec<String> {
271 idx.entries
275 .iter()
276 .map(|e| e.path.display().to_string().replace('\\', "/"))
277 .collect()
278 }
279
280 #[test]
281 fn fileindex_files_filters_directories_out() {
282 let idx = FileIndex {
283 entries: vec![
284 FileEntry {
285 path: Path::new("a").into(),
286 is_dir: true,
287 size: 0,
288 },
289 FileEntry {
290 path: Path::new("a/x.rs").into(),
291 is_dir: false,
292 size: 5,
293 },
294 ],
295 };
296 let files: Vec<_> = idx.files().collect();
297 assert_eq!(files.len(), 1);
298 assert_eq!(&*files[0].path, Path::new("a/x.rs"));
299 }
300
301 #[test]
302 fn fileindex_dirs_filters_files_out() {
303 let idx = FileIndex {
304 entries: vec![
305 FileEntry {
306 path: Path::new("a").into(),
307 is_dir: true,
308 size: 0,
309 },
310 FileEntry {
311 path: Path::new("a/x.rs").into(),
312 is_dir: false,
313 size: 5,
314 },
315 ],
316 };
317 let dirs: Vec<_> = idx.dirs().collect();
318 assert_eq!(dirs.len(), 1);
319 assert_eq!(&*dirs[0].path, Path::new("a"));
320 }
321
322 #[test]
323 fn fileindex_total_size_sums_files_only() {
324 let idx = FileIndex {
325 entries: vec![
326 FileEntry {
327 path: Path::new("a").into(),
328 is_dir: true,
329 size: 999, },
331 FileEntry {
332 path: Path::new("a/x.rs").into(),
333 is_dir: false,
334 size: 100,
335 },
336 FileEntry {
337 path: Path::new("a/y.rs").into(),
338 is_dir: false,
339 size: 50,
340 },
341 ],
342 };
343 assert_eq!(idx.total_size(), 150);
346 }
347
348 #[test]
349 fn fileindex_find_file_returns_match_or_none() {
350 let idx = FileIndex {
351 entries: vec![
352 FileEntry {
353 path: Path::new("a/x.rs").into(),
354 is_dir: false,
355 size: 0,
356 },
357 FileEntry {
358 path: Path::new("b").into(),
359 is_dir: true,
360 size: 0,
361 },
362 ],
363 };
364 assert!(idx.find_file(Path::new("a/x.rs")).is_some());
365 assert!(idx.find_file(Path::new("missing.rs")).is_none());
366 assert!(idx.find_file(Path::new("b")).is_none());
369 }
370
371 #[test]
372 fn walk_excludes_dot_git_directory() {
373 let tmp = td();
374 touch(tmp.path(), "README.md", b"# demo\n");
375 touch(tmp.path(), ".git/config", b"[core]\n");
377 touch(tmp.path(), ".git/HEAD", b"ref: refs/heads/main\n");
378
379 let idx = walk(
380 tmp.path(),
381 &WalkOptions {
382 respect_gitignore: false,
383 extra_ignores: Vec::new(),
384 },
385 )
386 .unwrap();
387
388 let p = paths(&idx);
389 assert!(p.contains(&"README.md".into()), "missing README.md: {p:?}");
390 assert!(
391 !p.iter().any(|s| s.starts_with(".git")),
392 ".git was not excluded: {p:?}",
393 );
394 }
395
396 #[test]
397 fn walk_respects_gitignore_when_enabled() {
398 let tmp = td();
399 touch(tmp.path(), ".gitignore", b"target/\nignored.txt\n");
400 touch(tmp.path(), "src/main.rs", b"fn main() {}\n");
401 touch(tmp.path(), "target/debug/build.log", b"junk");
402 touch(tmp.path(), "ignored.txt", b"junk");
403
404 let idx = walk(
405 tmp.path(),
406 &WalkOptions {
407 respect_gitignore: true,
408 extra_ignores: Vec::new(),
409 },
410 )
411 .unwrap();
412
413 let p = paths(&idx);
414 assert!(p.contains(&"src/main.rs".into()));
415 assert!(
416 !p.iter().any(|s| s.starts_with("target")),
417 "target/ should be ignored: {p:?}",
418 );
419 assert!(
420 !p.contains(&"ignored.txt".into()),
421 "ignored.txt should be filtered: {p:?}",
422 );
423 }
424
425 #[test]
426 fn walk_includes_gitignored_paths_when_respect_gitignore_false() {
427 let tmp = td();
428 touch(tmp.path(), ".gitignore", b"ignored.txt\n");
429 touch(tmp.path(), "ignored.txt", b"x");
430 touch(tmp.path(), "kept.txt", b"y");
431
432 let idx = walk(
433 tmp.path(),
434 &WalkOptions {
435 respect_gitignore: false,
436 extra_ignores: Vec::new(),
437 },
438 )
439 .unwrap();
440 let p = paths(&idx);
441 assert!(
442 p.contains(&"ignored.txt".into()),
443 "respect_gitignore=false should include it: {p:?}",
444 );
445 assert!(p.contains(&"kept.txt".into()));
446 }
447
448 #[test]
449 fn walk_applies_extra_ignores_as_excludes() {
450 let tmp = td();
451 touch(tmp.path(), "src/keep.rs", b"x");
452 touch(tmp.path(), "vendor/skip.rs", b"y");
453
454 let idx = walk(
455 tmp.path(),
456 &WalkOptions {
457 respect_gitignore: false,
458 extra_ignores: vec!["vendor/**".to_string()],
459 },
460 )
461 .unwrap();
462 let p = paths(&idx);
463 assert!(p.contains(&"src/keep.rs".into()));
464 let file_paths: Vec<&FileEntry> = idx.files().collect();
469 assert!(
470 !file_paths.iter().any(|e| e.path.starts_with("vendor")),
471 "no file under vendor/ should be indexed: {p:?}",
472 );
473 }
474
475 #[test]
476 fn walk_invalid_extra_ignore_pattern_surfaces_error() {
477 let tmp = td();
478 touch(tmp.path(), "a.txt", b"x");
479 let err = walk(
480 tmp.path(),
481 &WalkOptions {
482 respect_gitignore: false,
483 extra_ignores: vec!["[unterminated".to_string()],
484 },
485 );
486 assert!(err.is_err(), "bad pattern should fail: {err:?}");
487 }
488
489 #[test]
490 fn walk_emits_files_with_correct_size() {
491 let tmp = td();
492 touch(tmp.path(), "a.txt", &[0u8; 1024]);
493 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
494 let entry = idx
495 .files()
496 .find(|e| &*e.path == Path::new("a.txt"))
497 .expect("a.txt entry");
498 assert_eq!(entry.size, 1024);
499 assert!(!entry.is_dir);
500 }
501
502 #[test]
503 fn default_walk_options_respects_gitignore_and_no_extra_ignores() {
504 let opts = WalkOptions::default();
505 assert!(opts.respect_gitignore);
506 assert!(opts.extra_ignores.is_empty());
507 }
508
509 #[test]
510 fn walk_output_is_deterministic_across_runs() {
511 let tmp = td();
517 for i in 0..50 {
518 touch(
519 tmp.path(),
520 &format!("dir_{}/file_{i}.rs", i % 5),
521 b"// hello\n",
522 );
523 }
524 let opts = WalkOptions::default();
525 let a = walk(tmp.path(), &opts).unwrap();
526 let b = walk(tmp.path(), &opts).unwrap();
527 assert_eq!(paths(&a), paths(&b));
528 }
529
530 #[test]
531 fn walk_output_is_alphabetically_sorted() {
532 let tmp = td();
537 touch(tmp.path(), "z.txt", b"z");
538 touch(tmp.path(), "a.txt", b"a");
539 touch(tmp.path(), "m.txt", b"m");
540 touch(tmp.path(), "sub/b.txt", b"b");
541 touch(tmp.path(), "sub/a.txt", b"a");
542
543 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
544 let actual: Vec<_> = idx.entries.iter().map(|e| e.path.clone()).collect();
545 let mut expected = actual.clone();
546 expected.sort_unstable();
547 assert_eq!(actual, expected, "walker output must be path-sorted");
548 }
549
550 #[test]
551 fn walk_handles_thousand_files() {
552 let tmp = td();
558 let n = 1_000usize;
559 for i in 0..n {
560 touch(tmp.path(), &format!("d{}/f{i:04}.txt", i % 16), b"x");
561 }
562 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
563
564 let file_paths: Vec<_> = idx.files().map(|e| e.path.clone()).collect();
565 assert_eq!(
566 file_paths.len(),
567 n,
568 "expected {n} files, got {}",
569 file_paths.len(),
570 );
571
572 let mut expected = file_paths.clone();
573 expected.sort_unstable();
574 assert_eq!(
575 file_paths, expected,
576 "concurrent walker output must remain path-sorted",
577 );
578 }
579}