ripvec_core/walk.rs
1//! Parallel directory traversal using the `ignore` crate.
2//!
3//! Respects `.gitignore` rules, skips hidden files, and filters
4//! to files with supported source extensions. Uses `build_parallel()`
5//! for multi-threaded file discovery.
6
7use ignore::WalkBuilder;
8use std::path::{Path, PathBuf};
9use std::sync::Mutex;
10
11/// Walk a directory tree in parallel and collect file paths.
12///
13/// Respects `.gitignore` rules and skips hidden files and directories.
14/// Collects all files — the chunking phase decides whether to use
15/// tree-sitter (known extensions) or sliding-window fallback (unknown).
16///
17/// When `file_type` is `Some`, only files matching that type (using
18/// ripgrep's built-in type database, e.g. "rust", "python", "js") are
19/// collected.
20///
21/// Uses the `ignore` crate's parallel walker for multi-threaded traversal.
22#[must_use]
23pub fn collect_files(root: &Path, file_type: Option<&str>) -> Vec<PathBuf> {
24 let files = Mutex::new(Vec::new());
25
26 let mut builder = WalkBuilder::new(root);
27 builder.hidden(true).git_ignore(true).git_global(true);
28
29 if let Some(ft) = file_type {
30 let mut types_builder = ignore::types::TypesBuilder::new();
31 types_builder.add_defaults();
32 types_builder.select(ft);
33 if let Ok(types) = types_builder.build() {
34 builder.types(types);
35 }
36 }
37
38 builder.build_parallel().run(|| {
39 Box::new(|entry| {
40 let Ok(entry) = entry else {
41 return ignore::WalkState::Continue;
42 };
43 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
44 return ignore::WalkState::Continue;
45 }
46 // Skip known generated/binary files that add noise to the index
47 if let Some(name) = entry.path().file_name().and_then(|n| n.to_str())
48 && matches!(
49 name,
50 "Cargo.lock"
51 | "package-lock.json"
52 | "yarn.lock"
53 | "pnpm-lock.yaml"
54 | "poetry.lock"
55 | "Gemfile.lock"
56 | "go.sum"
57 )
58 {
59 return ignore::WalkState::Continue;
60 }
61 if let Ok(mut files) = files.lock() {
62 files.push(entry.into_path());
63 }
64 ignore::WalkState::Continue
65 })
66 });
67
68 let mut files = files.into_inner().unwrap_or_default();
69 files.sort();
70 files
71}