Skip to main content

vtcode_file_search/
lib.rs

1//! Fast fuzzy file search library for VT Code.
2//!
3//! Uses the `ignore` crate (same as ripgrep) for parallel directory traversal
4//! and `nucleo-matcher` for fuzzy matching.
5//!
6//! # Example
7//!
8//! ```ignore
9//! use std::num::NonZero;
10//! use std::path::Path;
11//! use std::sync::Arc;
12//! use std::sync::atomic::AtomicBool;
13//! use vtcode_file_search::run;
14//!
15//! let results = run(
16//!     "main",
17//!     NonZero::new(100).unwrap(),
18//!     Path::new("."),
19//!     vec![],
20//!     NonZero::new(4).unwrap(),
21//!     Arc::new(AtomicBool::new(false)),
22//!     false,
23//!     true,
24//! )?;
25//!
26//! for m in results.matches {
27//!     println!("{}: {}", m.path, m.score);
28//! }
29//! # Ok::<(), anyhow::Error>(())
30//! ```
31
32use parking_lot::Mutex;
33use serde::Serialize;
34use std::cmp::Reverse;
35use std::collections::BinaryHeap;
36use std::num::NonZero;
37use std::sync::Arc;
38use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
39
40/// A single file match result.
41///
42/// Fields:
43/// - `score`: Relevance score from fuzzy matching (higher is better)
44/// - `path`: File path relative to the search directory
45/// - `indices`: Optional character positions for highlighting matched characters
46#[derive(Debug, Clone, Serialize)]
47pub struct FileMatch {
48    pub score: u32,
49    pub path: String,
50    #[serde(skip_serializing_if = "Option::is_none")]
51    pub indices: Option<Vec<u32>>,
52}
53
54/// Complete search results with total match count.
55#[derive(Debug)]
56pub struct FileSearchResults {
57    pub matches: Vec<FileMatch>,
58    pub total_match_count: usize,
59}
60
61/// Configuration for file search operations.
62pub struct FileSearchConfig {
63    pub pattern_text: String,
64    pub limit: NonZero<usize>,
65    pub search_directory: std::path::PathBuf,
66    pub exclude: Vec<String>,
67    pub threads: NonZero<usize>,
68    pub cancel_flag: Arc<AtomicBool>,
69    pub compute_indices: bool,
70    pub respect_gitignore: bool,
71}
72
73pub use vtcode_commons::paths::file_name_from_path;
74
75/// Best matches list per worker thread (lock-free collection).
76///
77/// Each worker thread gets its own instance to avoid locking during
78/// directory traversal. Results are merged at the end.
79struct BestMatchesList {
80    matches: BinaryHeap<Reverse<(u32, String)>>,
81    limit: usize,
82    matcher: nucleo_matcher::Matcher,
83    haystack_buf: Vec<char>,
84    pattern_buf: Vec<char>,
85    pattern_text: String,
86}
87
88impl BestMatchesList {
89    fn new(limit: usize, pattern_text: &str) -> Self {
90        Self {
91            matches: BinaryHeap::new(),
92            limit,
93            matcher: nucleo_matcher::Matcher::new(nucleo_matcher::Config::DEFAULT),
94            haystack_buf: Vec::with_capacity(256),
95            pattern_buf: Vec::with_capacity(pattern_text.len()),
96            pattern_text: pattern_text.to_string(),
97        }
98    }
99
100    /// Record a matching path while preserving the worker-local top-K heap.
101    ///
102    /// Returns true when the path matches the search pattern, even if it
103    /// does not survive the top-K cutoff.
104    fn record_match(&mut self, path: &str) -> bool {
105        let haystack = nucleo_matcher::Utf32Str::new(path, &mut self.haystack_buf);
106        let needle = nucleo_matcher::Utf32Str::new(&self.pattern_text, &mut self.pattern_buf);
107        let Some(score) = self.matcher.fuzzy_match(haystack, needle) else {
108            return false;
109        };
110
111        push_top_match(
112            &mut self.matches,
113            self.limit,
114            score as u32,
115            path.to_string(),
116        );
117        true
118    }
119}
120
121fn push_top_match(
122    matches: &mut BinaryHeap<Reverse<(u32, String)>>,
123    limit: usize,
124    score: u32,
125    path: String,
126) -> bool {
127    if matches.len() < limit {
128        matches.push(Reverse((score, path)));
129        return true;
130    }
131
132    let Some(min_score) = matches.peek().map(|entry| entry.0.0) else {
133        return false;
134    };
135
136    if score <= min_score {
137        return false;
138    }
139
140    matches.pop();
141    matches.push(Reverse((score, path)));
142    true
143}
144
145/// Run fuzzy file search with parallel traversal.
146///
147/// # Arguments
148///
149/// * `config` - File search configuration containing all search parameters
150///
151/// # Returns
152///
153/// FileSearchResults containing matched files and total match count.
154pub fn run(config: FileSearchConfig) -> anyhow::Result<FileSearchResults> {
155    let limit = config.limit.get();
156    let search_directory = &config.search_directory;
157    let exclude = &config.exclude;
158    let threads = config.threads.get();
159    let cancel_flag = &config.cancel_flag;
160    let compute_indices = config.compute_indices;
161    let respect_gitignore = config.respect_gitignore;
162    // Store pattern text for cloning across threads
163    // (Pattern is parsed per-match to work with Utf32Str)
164
165    // Build the directory walker
166    let mut walk_builder = ignore::WalkBuilder::new(search_directory);
167    walk_builder
168        .threads(threads)
169        .hidden(false)
170        .follow_links(true)
171        .require_git(false);
172
173    if !respect_gitignore {
174        walk_builder
175            .git_ignore(false)
176            .git_global(false)
177            .git_exclude(false)
178            .ignore(false)
179            .parents(false);
180    }
181
182    // Add exclusion patterns
183    if !exclude.is_empty() {
184        let mut override_builder = ignore::overrides::OverrideBuilder::new(search_directory);
185        for exclude_pattern in exclude {
186            let pattern = format!("!{}", exclude_pattern);
187            override_builder.add(&pattern)?;
188        }
189        let override_matcher = override_builder.build()?;
190        walk_builder.overrides(override_matcher);
191    }
192
193    let walker = walk_builder.build_parallel();
194
195    // Create per-worker result collection using Arc + Mutex for thread safety
196    let best_matchers_per_worker: Vec<Arc<Mutex<BestMatchesList>>> = (0..threads)
197        .map(|_| {
198            Arc::new(Mutex::new(BestMatchesList::new(
199                limit,
200                &config.pattern_text,
201            )))
202        })
203        .collect();
204
205    let index_counter = AtomicUsize::new(0);
206    let total_match_count = Arc::new(AtomicUsize::new(0));
207
208    // Run parallel traversal
209    walker.run(|| {
210        let worker_id =
211            index_counter.fetch_add(1, Ordering::Relaxed) % best_matchers_per_worker.len();
212        let best_list = best_matchers_per_worker[worker_id].clone();
213        let cancel_flag_clone = cancel_flag.clone();
214        let total_match_count_clone = total_match_count.clone();
215
216        Box::new(move |result| {
217            // Check cancellation flag periodically
218            if cancel_flag_clone.load(Ordering::Relaxed) {
219                return ignore::WalkState::Quit;
220            }
221
222            let entry = match result {
223                Ok(e) => e,
224                Err(_) => return ignore::WalkState::Continue,
225            };
226
227            // Skip directories
228            if entry.metadata().map_or(true, |m| m.is_dir()) {
229                return ignore::WalkState::Continue;
230            }
231
232            let path = match entry.path().to_str() {
233                Some(p) => p,
234                None => return ignore::WalkState::Continue,
235            };
236
237            // Try to add to results
238            {
239                let mut list = best_list.lock();
240                if list.record_match(path) {
241                    total_match_count_clone.fetch_add(1, Ordering::Relaxed);
242                }
243            }
244
245            ignore::WalkState::Continue
246        })
247    });
248
249    // Merge worker-local top-K heaps into one final top-K heap.
250    let mut merged_matches = BinaryHeap::with_capacity(limit);
251    for arc in best_matchers_per_worker {
252        let mut list = arc.lock();
253        for Reverse((score, path)) in std::mem::take(&mut list.matches).into_vec() {
254            push_top_match(&mut merged_matches, limit, score, path);
255        }
256    }
257
258    // Build final results
259    let matches = merged_matches
260        .into_sorted_vec()
261        .into_iter()
262        .map(|Reverse((score, path))| FileMatch {
263            score,
264            path,
265            indices: if compute_indices {
266                Some(Vec::new())
267            } else {
268                None
269            },
270        })
271        .collect();
272
273    Ok(FileSearchResults {
274        matches,
275        total_match_count: total_match_count.load(Ordering::Relaxed),
276    })
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use std::fs;
283    use tempfile::TempDir;
284
285    #[test]
286    fn test_file_name_from_path() {
287        assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
288        assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
289        assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
290        assert_eq!(file_name_from_path("file.txt"), "file.txt");
291        assert_eq!(file_name_from_path(""), "");
292    }
293
294    #[test]
295    fn test_run_search() -> anyhow::Result<()> {
296        let temp = TempDir::new()?;
297        fs::write(temp.path().join("hello.rs"), "fn main() {}")?;
298        fs::write(temp.path().join("world.txt"), "world")?;
299
300        let results = run(FileSearchConfig {
301            pattern_text: "hello".to_string(),
302            limit: NonZero::new(10).unwrap(),
303            search_directory: temp.path().to_path_buf(),
304            exclude: vec![],
305            threads: NonZero::new(1).unwrap(),
306            cancel_flag: Arc::new(AtomicBool::new(false)),
307            compute_indices: false,
308            respect_gitignore: false,
309        })?;
310
311        assert_eq!(results.matches.len(), 1);
312        assert!(results.matches[0].path.contains("hello"));
313
314        Ok(())
315    }
316
317    #[test]
318    fn test_multiple_matches() -> anyhow::Result<()> {
319        let temp = TempDir::new()?;
320        fs::write(temp.path().join("test1.rs"), "")?;
321        fs::write(temp.path().join("test2.rs"), "")?;
322        fs::write(temp.path().join("test3.rs"), "")?;
323        fs::write(temp.path().join("other.txt"), "")?;
324
325        let results = run(FileSearchConfig {
326            pattern_text: "test".to_string(),
327            limit: NonZero::new(10).unwrap(),
328            search_directory: temp.path().to_path_buf(),
329            exclude: vec![],
330            threads: NonZero::new(2).unwrap(),
331            cancel_flag: Arc::new(AtomicBool::new(false)),
332            compute_indices: false,
333            respect_gitignore: false,
334        })?;
335
336        assert_eq!(results.matches.len(), 3);
337        assert!(results.matches.iter().all(|m| m.path.contains("test")));
338
339        Ok(())
340    }
341
342    #[test]
343    fn test_limit_is_respected_across_workers() -> anyhow::Result<()> {
344        let temp = TempDir::new()?;
345        for name in ["alpha.rs", "alphabet.rs", "alphanumeric.rs", "alpaca.rs"] {
346            fs::write(temp.path().join(name), "")?;
347        }
348
349        let results = run(FileSearchConfig {
350            pattern_text: "alpha".to_string(),
351            limit: NonZero::new(2).unwrap(),
352            search_directory: temp.path().to_path_buf(),
353            exclude: vec![],
354            threads: NonZero::new(4).unwrap(),
355            cancel_flag: Arc::new(AtomicBool::new(false)),
356            compute_indices: false,
357            respect_gitignore: false,
358        })?;
359
360        assert_eq!(results.matches.len(), 2);
361        assert!(
362            results
363                .matches
364                .windows(2)
365                .all(|window| window[0].score >= window[1].score)
366        );
367
368        Ok(())
369    }
370
371    #[test]
372    fn test_exclusion_patterns() -> anyhow::Result<()> {
373        let temp = TempDir::new()?;
374        fs::write(temp.path().join("keep.rs"), "")?;
375        fs::create_dir(temp.path().join("target"))?;
376        fs::write(temp.path().join("target/ignore.rs"), "")?;
377
378        let results = run(FileSearchConfig {
379            pattern_text: "rs".to_string(),
380            limit: NonZero::new(10).unwrap(),
381            search_directory: temp.path().to_path_buf(),
382            exclude: vec!["target/**".to_string()],
383            threads: NonZero::new(2).unwrap(),
384            cancel_flag: Arc::new(AtomicBool::new(false)),
385            compute_indices: false,
386            respect_gitignore: false,
387        })?;
388
389        assert_eq!(results.matches.len(), 1);
390        assert!(results.matches[0].path.contains("keep.rs"));
391
392        Ok(())
393    }
394
395    #[test]
396    fn test_cancellation() -> anyhow::Result<()> {
397        let temp = TempDir::new()?;
398        for i in 0..10 {
399            fs::write(temp.path().join(format!("file{}.rs", i)), "")?;
400        }
401
402        let cancel_flag = Arc::new(AtomicBool::new(true));
403        let results = run(FileSearchConfig {
404            pattern_text: "file".to_string(),
405            limit: NonZero::new(10).unwrap(),
406            search_directory: temp.path().to_path_buf(),
407            exclude: vec![],
408            threads: NonZero::new(1).unwrap(),
409            cancel_flag,
410            compute_indices: false,
411            respect_gitignore: false,
412        })?;
413
414        // Should return early due to cancellation
415        assert!(results.matches.is_empty());
416
417        Ok(())
418    }
419}