1use std::path::{Path, PathBuf};
2use std::sync::{Arc, Mutex};
3
4use ignore::{
5 ParallelVisitor, ParallelVisitorBuilder, WalkBuilder, WalkState, overrides::OverrideBuilder,
6};
7
8use crate::error::{Error, Result};
9
10#[derive(Debug, Clone)]
12pub struct FileEntry {
13 pub path: PathBuf,
15 pub is_dir: bool,
16 pub size: u64,
17}
18
19#[derive(Debug, Default)]
22pub struct FileIndex {
23 pub entries: Vec<FileEntry>,
24}
25
26impl FileIndex {
27 pub fn files(&self) -> impl Iterator<Item = &FileEntry> {
28 self.entries.iter().filter(|e| !e.is_dir)
29 }
30
31 pub fn dirs(&self) -> impl Iterator<Item = &FileEntry> {
32 self.entries.iter().filter(|e| e.is_dir)
33 }
34
35 pub fn total_size(&self) -> u64 {
36 self.files().map(|f| f.size).sum()
37 }
38
39 pub fn find_file(&self, rel: &Path) -> Option<&FileEntry> {
43 self.files().find(|e| e.path == rel)
44 }
45}
46
47#[derive(Debug, Clone)]
48pub struct WalkOptions {
49 pub respect_gitignore: bool,
50 pub extra_ignores: Vec<String>,
51}
52
53impl Default for WalkOptions {
54 fn default() -> Self {
55 Self {
56 respect_gitignore: true,
57 extra_ignores: Vec::new(),
58 }
59 }
60}
61
62pub fn walk(root: &Path, opts: &WalkOptions) -> Result<FileIndex> {
63 let builder = build_walk_builder(root, opts)?;
64
65 let out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>> = Arc::new(Mutex::new(Vec::new()));
72 let error_slot: Arc<Mutex<Option<Error>>> = Arc::new(Mutex::new(None));
73 let root_owned: Arc<PathBuf> = Arc::new(root.to_path_buf());
74
75 let mut visitor_builder = WalkVisitorBuilder {
76 root: Arc::clone(&root_owned),
77 error_slot: Arc::clone(&error_slot),
78 out_entries: Arc::clone(&out_entries),
79 };
80 builder.build_parallel().visit(&mut visitor_builder);
81
82 if let Some(err) = error_slot.lock().expect("walker error slot lock").take() {
83 return Err(err);
84 }
85
86 let mut entries: Vec<FileEntry> = out_entries
92 .lock()
93 .expect("walker out-entries lock")
94 .drain(..)
95 .flatten()
96 .collect();
97 entries.sort_unstable_by(|a, b| a.path.cmp(&b.path));
98 Ok(FileIndex { entries })
99}
100
101fn build_walk_builder(root: &Path, opts: &WalkOptions) -> Result<WalkBuilder> {
106 let mut builder = WalkBuilder::new(root);
107 builder
108 .standard_filters(opts.respect_gitignore)
109 .hidden(false)
110 .follow_links(true)
111 .require_git(false);
112
113 let mut overrides_builder = OverrideBuilder::new(root);
121 overrides_builder
122 .add("!.git")
123 .map_err(|e| Error::Other(format!("ignore pattern .git: {e}")))?;
124 for pattern in &opts.extra_ignores {
125 let pattern = if pattern.starts_with('!') {
126 pattern.clone()
127 } else {
128 format!("!{pattern}")
129 };
130 overrides_builder
131 .add(&pattern)
132 .map_err(|e| Error::Other(format!("ignore pattern {pattern:?}: {e}")))?;
133 }
134 let overrides = overrides_builder
135 .build()
136 .map_err(|e| Error::Other(format!("failed to build overrides: {e}")))?;
137 builder.overrides(overrides);
138 Ok(builder)
139}
140
141fn result_to_entry(
148 root: &Path,
149 result: std::result::Result<ignore::DirEntry, ignore::Error>,
150) -> Result<Option<FileEntry>> {
151 let entry = result?;
152 let abs = entry.path();
153 let Ok(rel) = abs.strip_prefix(root) else {
154 return Ok(None);
155 };
156 if rel.as_os_str().is_empty() {
157 return Ok(None);
158 }
159 let metadata = entry.metadata().map_err(|e| Error::Io {
160 path: abs.to_path_buf(),
161 source: std::io::Error::other(e.to_string()),
162 })?;
163 Ok(Some(FileEntry {
164 path: rel.to_path_buf(),
165 is_dir: metadata.is_dir(),
166 size: if metadata.is_file() {
167 metadata.len()
168 } else {
169 0
170 },
171 }))
172}
173
174struct WalkVisitor {
180 root: Arc<PathBuf>,
181 entries: Vec<FileEntry>,
182 error_slot: Arc<Mutex<Option<Error>>>,
183 out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>>,
184}
185
186impl ParallelVisitor for WalkVisitor {
187 fn visit(&mut self, result: std::result::Result<ignore::DirEntry, ignore::Error>) -> WalkState {
188 if self
192 .error_slot
193 .lock()
194 .expect("walker error slot lock")
195 .is_some()
196 {
197 return WalkState::Quit;
198 }
199 match result_to_entry(&self.root, result) {
200 Ok(Some(entry)) => {
201 self.entries.push(entry);
202 WalkState::Continue
203 }
204 Ok(None) => WalkState::Continue,
205 Err(err) => {
206 let mut slot = self.error_slot.lock().expect("walker error slot lock");
207 if slot.is_none() {
208 *slot = Some(err);
209 }
210 WalkState::Quit
211 }
212 }
213 }
214}
215
216impl Drop for WalkVisitor {
217 fn drop(&mut self) {
218 let local = std::mem::take(&mut self.entries);
219 if local.is_empty() {
220 return;
221 }
222 if let Ok(mut out) = self.out_entries.lock() {
223 out.push(local);
224 }
225 }
226}
227
228struct WalkVisitorBuilder {
229 root: Arc<PathBuf>,
230 error_slot: Arc<Mutex<Option<Error>>>,
231 out_entries: Arc<Mutex<Vec<Vec<FileEntry>>>>,
232}
233
234impl<'s> ParallelVisitorBuilder<'s> for WalkVisitorBuilder {
235 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
236 Box::new(WalkVisitor {
237 root: Arc::clone(&self.root),
238 entries: Vec::new(),
239 error_slot: Arc::clone(&self.error_slot),
240 out_entries: Arc::clone(&self.out_entries),
241 })
242 }
243}
244
245#[cfg(test)]
246mod tests {
247 use super::*;
248
249 fn td() -> tempfile::TempDir {
250 tempfile::Builder::new()
251 .prefix("alint-walker-test-")
252 .tempdir()
253 .unwrap()
254 }
255
256 fn touch(root: &Path, rel: &str, content: &[u8]) {
257 let abs = root.join(rel);
258 if let Some(parent) = abs.parent() {
259 std::fs::create_dir_all(parent).unwrap();
260 }
261 std::fs::write(abs, content).unwrap();
262 }
263
264 fn paths(idx: &FileIndex) -> Vec<String> {
265 idx.entries
269 .iter()
270 .map(|e| e.path.display().to_string().replace('\\', "/"))
271 .collect()
272 }
273
274 #[test]
275 fn fileindex_files_filters_directories_out() {
276 let idx = FileIndex {
277 entries: vec![
278 FileEntry {
279 path: "a".into(),
280 is_dir: true,
281 size: 0,
282 },
283 FileEntry {
284 path: "a/x.rs".into(),
285 is_dir: false,
286 size: 5,
287 },
288 ],
289 };
290 let files: Vec<_> = idx.files().collect();
291 assert_eq!(files.len(), 1);
292 assert_eq!(files[0].path, Path::new("a/x.rs"));
293 }
294
295 #[test]
296 fn fileindex_dirs_filters_files_out() {
297 let idx = FileIndex {
298 entries: vec![
299 FileEntry {
300 path: "a".into(),
301 is_dir: true,
302 size: 0,
303 },
304 FileEntry {
305 path: "a/x.rs".into(),
306 is_dir: false,
307 size: 5,
308 },
309 ],
310 };
311 let dirs: Vec<_> = idx.dirs().collect();
312 assert_eq!(dirs.len(), 1);
313 assert_eq!(dirs[0].path, Path::new("a"));
314 }
315
316 #[test]
317 fn fileindex_total_size_sums_files_only() {
318 let idx = FileIndex {
319 entries: vec![
320 FileEntry {
321 path: "a".into(),
322 is_dir: true,
323 size: 999, },
325 FileEntry {
326 path: "a/x.rs".into(),
327 is_dir: false,
328 size: 100,
329 },
330 FileEntry {
331 path: "a/y.rs".into(),
332 is_dir: false,
333 size: 50,
334 },
335 ],
336 };
337 assert_eq!(idx.total_size(), 150);
340 }
341
342 #[test]
343 fn fileindex_find_file_returns_match_or_none() {
344 let idx = FileIndex {
345 entries: vec![
346 FileEntry {
347 path: "a/x.rs".into(),
348 is_dir: false,
349 size: 0,
350 },
351 FileEntry {
352 path: "b".into(),
353 is_dir: true,
354 size: 0,
355 },
356 ],
357 };
358 assert!(idx.find_file(Path::new("a/x.rs")).is_some());
359 assert!(idx.find_file(Path::new("missing.rs")).is_none());
360 assert!(idx.find_file(Path::new("b")).is_none());
363 }
364
365 #[test]
366 fn walk_excludes_dot_git_directory() {
367 let tmp = td();
368 touch(tmp.path(), "README.md", b"# demo\n");
369 touch(tmp.path(), ".git/config", b"[core]\n");
371 touch(tmp.path(), ".git/HEAD", b"ref: refs/heads/main\n");
372
373 let idx = walk(
374 tmp.path(),
375 &WalkOptions {
376 respect_gitignore: false,
377 extra_ignores: Vec::new(),
378 },
379 )
380 .unwrap();
381
382 let p = paths(&idx);
383 assert!(p.contains(&"README.md".into()), "missing README.md: {p:?}");
384 assert!(
385 !p.iter().any(|s| s.starts_with(".git")),
386 ".git was not excluded: {p:?}",
387 );
388 }
389
390 #[test]
391 fn walk_respects_gitignore_when_enabled() {
392 let tmp = td();
393 touch(tmp.path(), ".gitignore", b"target/\nignored.txt\n");
394 touch(tmp.path(), "src/main.rs", b"fn main() {}\n");
395 touch(tmp.path(), "target/debug/build.log", b"junk");
396 touch(tmp.path(), "ignored.txt", b"junk");
397
398 let idx = walk(
399 tmp.path(),
400 &WalkOptions {
401 respect_gitignore: true,
402 extra_ignores: Vec::new(),
403 },
404 )
405 .unwrap();
406
407 let p = paths(&idx);
408 assert!(p.contains(&"src/main.rs".into()));
409 assert!(
410 !p.iter().any(|s| s.starts_with("target")),
411 "target/ should be ignored: {p:?}",
412 );
413 assert!(
414 !p.contains(&"ignored.txt".into()),
415 "ignored.txt should be filtered: {p:?}",
416 );
417 }
418
419 #[test]
420 fn walk_includes_gitignored_paths_when_respect_gitignore_false() {
421 let tmp = td();
422 touch(tmp.path(), ".gitignore", b"ignored.txt\n");
423 touch(tmp.path(), "ignored.txt", b"x");
424 touch(tmp.path(), "kept.txt", b"y");
425
426 let idx = walk(
427 tmp.path(),
428 &WalkOptions {
429 respect_gitignore: false,
430 extra_ignores: Vec::new(),
431 },
432 )
433 .unwrap();
434 let p = paths(&idx);
435 assert!(
436 p.contains(&"ignored.txt".into()),
437 "respect_gitignore=false should include it: {p:?}",
438 );
439 assert!(p.contains(&"kept.txt".into()));
440 }
441
442 #[test]
443 fn walk_applies_extra_ignores_as_excludes() {
444 let tmp = td();
445 touch(tmp.path(), "src/keep.rs", b"x");
446 touch(tmp.path(), "vendor/skip.rs", b"y");
447
448 let idx = walk(
449 tmp.path(),
450 &WalkOptions {
451 respect_gitignore: false,
452 extra_ignores: vec!["vendor/**".to_string()],
453 },
454 )
455 .unwrap();
456 let p = paths(&idx);
457 assert!(p.contains(&"src/keep.rs".into()));
458 let file_paths: Vec<&FileEntry> = idx.files().collect();
463 assert!(
464 !file_paths.iter().any(|e| e.path.starts_with("vendor")),
465 "no file under vendor/ should be indexed: {p:?}",
466 );
467 }
468
469 #[test]
470 fn walk_invalid_extra_ignore_pattern_surfaces_error() {
471 let tmp = td();
472 touch(tmp.path(), "a.txt", b"x");
473 let err = walk(
474 tmp.path(),
475 &WalkOptions {
476 respect_gitignore: false,
477 extra_ignores: vec!["[unterminated".to_string()],
478 },
479 );
480 assert!(err.is_err(), "bad pattern should fail: {err:?}");
481 }
482
483 #[test]
484 fn walk_emits_files_with_correct_size() {
485 let tmp = td();
486 touch(tmp.path(), "a.txt", &[0u8; 1024]);
487 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
488 let entry = idx
489 .files()
490 .find(|e| e.path == Path::new("a.txt"))
491 .expect("a.txt entry");
492 assert_eq!(entry.size, 1024);
493 assert!(!entry.is_dir);
494 }
495
496 #[test]
497 fn default_walk_options_respects_gitignore_and_no_extra_ignores() {
498 let opts = WalkOptions::default();
499 assert!(opts.respect_gitignore);
500 assert!(opts.extra_ignores.is_empty());
501 }
502
503 #[test]
504 fn walk_output_is_deterministic_across_runs() {
505 let tmp = td();
511 for i in 0..50 {
512 touch(
513 tmp.path(),
514 &format!("dir_{}/file_{i}.rs", i % 5),
515 b"// hello\n",
516 );
517 }
518 let opts = WalkOptions::default();
519 let a = walk(tmp.path(), &opts).unwrap();
520 let b = walk(tmp.path(), &opts).unwrap();
521 assert_eq!(paths(&a), paths(&b));
522 }
523
524 #[test]
525 fn walk_output_is_alphabetically_sorted() {
526 let tmp = td();
531 touch(tmp.path(), "z.txt", b"z");
532 touch(tmp.path(), "a.txt", b"a");
533 touch(tmp.path(), "m.txt", b"m");
534 touch(tmp.path(), "sub/b.txt", b"b");
535 touch(tmp.path(), "sub/a.txt", b"a");
536
537 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
538 let actual: Vec<_> = idx.entries.iter().map(|e| e.path.clone()).collect();
539 let mut expected = actual.clone();
540 expected.sort_unstable();
541 assert_eq!(actual, expected, "walker output must be path-sorted");
542 }
543
544 #[test]
545 fn walk_handles_thousand_files() {
546 let tmp = td();
552 let n = 1_000usize;
553 for i in 0..n {
554 touch(tmp.path(), &format!("d{}/f{i:04}.txt", i % 16), b"x");
555 }
556 let idx = walk(tmp.path(), &WalkOptions::default()).unwrap();
557
558 let file_paths: Vec<_> = idx.files().map(|e| e.path.clone()).collect();
559 assert_eq!(
560 file_paths.len(),
561 n,
562 "expected {n} files, got {}",
563 file_paths.len(),
564 );
565
566 let mut expected = file_paths.clone();
567 expected.sort_unstable();
568 assert_eq!(
569 file_paths, expected,
570 "concurrent walker output must remain path-sorted",
571 );
572 }
573}