Skip to main content

ripvec_core/
walk.rs

1//! Parallel directory traversal using the `ignore` crate.
2//!
3//! Respects `.gitignore` rules, skips hidden files, and filters
4//! to files with supported source extensions. Uses `build_parallel()`
5//! for multi-threaded file discovery.
6
7use ignore::WalkBuilder;
8use std::path::{Path, PathBuf};
9use std::sync::Mutex;
10
11/// Walk a directory tree in parallel and collect file paths.
12///
13/// Respects `.gitignore` rules and skips hidden files and directories.
14/// Collects all files — the chunking phase decides whether to use
15/// tree-sitter (known extensions) or sliding-window fallback (unknown).
16///
17/// When `file_type` is `Some`, only files matching that type (using
18/// ripgrep's built-in type database, e.g. "rust", "python", "js") are
19/// collected.
20///
21/// Uses the `ignore` crate's parallel walker for multi-threaded traversal.
22#[must_use]
23pub fn collect_files(root: &Path, file_type: Option<&str>) -> Vec<PathBuf> {
24    let files = Mutex::new(Vec::new());
25
26    let mut builder = WalkBuilder::new(root);
27    builder.hidden(true).git_ignore(true).git_global(true);
28
29    if let Some(ft) = file_type {
30        let mut types_builder = ignore::types::TypesBuilder::new();
31        types_builder.add_defaults();
32        types_builder.select(ft);
33        if let Ok(types) = types_builder.build() {
34            builder.types(types);
35        }
36    }
37
38    builder.build_parallel().run(|| {
39        Box::new(|entry| {
40            let Ok(entry) = entry else {
41                return ignore::WalkState::Continue;
42            };
43            if !entry.file_type().is_some_and(|ft| ft.is_file()) {
44                return ignore::WalkState::Continue;
45            }
46            // Skip known generated/binary files that add noise to the index
47            if let Some(name) = entry.path().file_name().and_then(|n| n.to_str())
48                && matches!(
49                    name,
50                    "Cargo.lock"
51                        | "package-lock.json"
52                        | "yarn.lock"
53                        | "pnpm-lock.yaml"
54                        | "poetry.lock"
55                        | "Gemfile.lock"
56                        | "go.sum"
57                )
58            {
59                return ignore::WalkState::Continue;
60            }
61            if let Ok(mut files) = files.lock() {
62                files.push(entry.into_path());
63            }
64            ignore::WalkState::Continue
65        })
66    });
67
68    let mut files = files.into_inner().unwrap_or_default();
69    files.sort();
70    files
71}