Skip to main content

vtcode_file_search/
lib.rs

1//! Fast fuzzy file search library for VT Code.
2//!
3//! Uses the `ignore` crate (same as ripgrep) for parallel directory traversal
4//! and `nucleo-matcher` for fuzzy matching.
5//!
6//! # Example
7//!
8//! ```ignore
9//! use std::num::NonZero;
10//! use std::path::Path;
11//! use std::sync::Arc;
12//! use std::sync::atomic::AtomicBool;
13//! use vtcode_file_search::run;
14//!
15//! let results = run(
16//!     "main",
17//!     NonZero::new(100).unwrap(),
18//!     Path::new("."),
19//!     vec![],
20//!     NonZero::new(4).unwrap(),
21//!     Arc::new(AtomicBool::new(false)),
22//!     false,
23//!     true,
24//! )?;
25//!
26//! for m in results.matches {
27//!     println!("{}: {}", m.path, m.score);
28//! }
29//! # Ok::<(), anyhow::Error>(())
30//! ```
31
32use nucleo_matcher::{Matcher, Utf32Str};
33use serde::Serialize;
34use std::cmp::Reverse;
35use std::collections::BinaryHeap;
36use std::num::NonZero;
37use std::sync::Arc;
38use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
39
40/// A single file match result.
41///
42/// Fields:
43/// - `score`: Relevance score from fuzzy matching (higher is better)
44/// - `path`: File path relative to the search directory
45/// - `indices`: Optional character positions for highlighting matched characters
46#[derive(Debug, Clone, Serialize)]
47pub struct FileMatch {
48    pub score: u32,
49    pub path: String,
50    #[serde(skip_serializing_if = "Option::is_none")]
51    pub indices: Option<Vec<u32>>,
52}
53
54/// Complete search results with total match count.
55#[derive(Debug)]
56pub struct FileSearchResults {
57    pub matches: Vec<FileMatch>,
58    pub total_match_count: usize,
59}
60
61/// Configuration for file search operations.
62pub struct FileSearchConfig {
63    pub pattern_text: String,
64    pub limit: NonZero<usize>,
65    pub search_directory: std::path::PathBuf,
66    pub exclude: Vec<String>,
67    pub threads: NonZero<usize>,
68    pub cancel_flag: Arc<AtomicBool>,
69    pub compute_indices: bool,
70    pub respect_gitignore: bool,
71}
72
73pub use vtcode_commons::paths::file_name_from_path;
74
75/// Best matches list per worker thread (lock-free collection).
76///
77/// Each worker thread gets its own instance to avoid locking during
78/// directory traversal. Results are merged at the end.
79struct BestMatchesList {
80    matches: BinaryHeap<Reverse<(u32, String)>>,
81    limit: usize,
82    matcher: Matcher,
83    haystack_buf: Vec<char>,
84}
85
86impl BestMatchesList {
87    fn new(limit: usize) -> Self {
88        Self {
89            matches: BinaryHeap::new(),
90            limit,
91            matcher: Matcher::new(nucleo_matcher::Config::DEFAULT),
92            haystack_buf: Vec::new(),
93        }
94    }
95
96    /// Try to add a match, maintaining top-K results.
97    ///
98    /// Returns Some(score) if the match was added or if it would replace
99    /// a lower-scoring match.
100    fn try_add(&mut self, path: &str, pattern_text: &str) -> Option<u32> {
101        self.haystack_buf.clear();
102        let haystack = Utf32Str::new(path, &mut self.haystack_buf);
103        let mut pattern_buf = Vec::new();
104        let needle = Utf32Str::new(pattern_text, &mut pattern_buf);
105        let score = self.matcher.fuzzy_match(haystack, needle)? as u32;
106
107        if self.matches.len() < self.limit {
108            self.matches.push(Reverse((score, path.to_string())));
109            Some(score)
110        } else {
111            let min_score = self.matches.peek().unwrap().0.0;
112            if score > min_score {
113                self.matches.pop();
114                self.matches.push(Reverse((score, path.to_string())));
115                Some(score)
116            } else {
117                None
118            }
119        }
120    }
121
122    /// Convert into sorted matches (highest score first).
123    #[allow(dead_code)]
124    fn into_matches(self) -> Vec<(u32, String)> {
125        self.matches
126            .into_sorted_vec()
127            .into_iter()
128            .map(|Reverse((score, path))| (score, path))
129            .collect()
130    }
131
132    /// Clone current matches without consuming self.
133    fn clone_matches(&self) -> Vec<(u32, String)> {
134        self.matches
135            .iter()
136            .map(|Reverse((score, path))| (*score, path.clone()))
137            .collect()
138    }
139}
140
141/// Run fuzzy file search with parallel traversal.
142///
143/// # Arguments
144///
145/// * `config` - File search configuration containing all search parameters
146///
147/// # Returns
148///
149/// FileSearchResults containing matched files and total match count.
150pub fn run(config: FileSearchConfig) -> anyhow::Result<FileSearchResults> {
151    let pattern_text = &config.pattern_text;
152    let limit = config.limit;
153    let search_directory = &config.search_directory;
154    let exclude = &config.exclude;
155    let threads = config.threads;
156    let cancel_flag = &config.cancel_flag;
157    let compute_indices = config.compute_indices;
158    let respect_gitignore = config.respect_gitignore;
159    // Store pattern text for cloning across threads
160    // (Pattern is parsed per-match to work with Utf32Str)
161
162    // Build the directory walker
163    let mut walk_builder = ignore::WalkBuilder::new(search_directory);
164    walk_builder
165        .threads(threads.get())
166        .hidden(false)
167        .follow_links(true)
168        .require_git(false);
169
170    if !respect_gitignore {
171        walk_builder
172            .git_ignore(false)
173            .git_global(false)
174            .git_exclude(false)
175            .ignore(false)
176            .parents(false);
177    }
178
179    // Add exclusion patterns
180    if !exclude.is_empty() {
181        let mut override_builder = ignore::overrides::OverrideBuilder::new(search_directory);
182        for exclude_pattern in exclude {
183            let pattern = format!("!{}", exclude_pattern);
184            override_builder.add(&pattern)?;
185        }
186        let override_matcher = override_builder.build()?;
187        walk_builder.overrides(override_matcher);
188    }
189
190    let walker = walk_builder.build_parallel();
191
192    // Create per-worker result collection using Arc + Mutex for thread safety
193    let num_workers = threads.get();
194    let best_matchers_per_worker: Vec<Arc<std::sync::Mutex<BestMatchesList>>> = (0..num_workers)
195        .map(|_| Arc::new(std::sync::Mutex::new(BestMatchesList::new(limit.get()))))
196        .collect();
197
198    let index_counter = AtomicUsize::new(0);
199    let total_match_count = Arc::new(AtomicUsize::new(0));
200    let pattern_text = pattern_text.to_string();
201
202    // Run parallel traversal
203    walker.run(|| {
204        let worker_id =
205            index_counter.fetch_add(1, Ordering::Relaxed) % best_matchers_per_worker.len();
206        let best_list = best_matchers_per_worker[worker_id].clone();
207        let cancel_flag_clone = cancel_flag.clone();
208        let pattern_text_clone = pattern_text.clone();
209        let total_match_count_clone = total_match_count.clone();
210
211        Box::new(move |result| {
212            // Check cancellation flag periodically
213            if cancel_flag_clone.load(Ordering::Relaxed) {
214                return ignore::WalkState::Quit;
215            }
216
217            let entry = match result {
218                Ok(e) => e,
219                Err(_) => return ignore::WalkState::Continue,
220            };
221
222            // Skip directories
223            if entry.metadata().map_or(true, |m| m.is_dir()) {
224                return ignore::WalkState::Continue;
225            }
226
227            let path = match entry.path().to_str() {
228                Some(p) => p,
229                None => return ignore::WalkState::Continue,
230            };
231
232            // Try to add to results
233            if let Ok(mut list) = best_list.lock() {
234                if list.try_add(path, &pattern_text_clone).is_some() {
235                    total_match_count_clone.fetch_add(1, Ordering::Relaxed);
236                }
237            }
238
239            ignore::WalkState::Continue
240        })
241    });
242
243    // Merge results from all workers
244    let mut all_matches = Vec::new();
245    for arc in best_matchers_per_worker {
246        if let Ok(list) = arc.lock() {
247            let matches = list.clone_matches();
248            all_matches.extend(matches);
249        }
250    }
251
252    // Sort by score (descending) and limit
253    all_matches.sort_by(|a, b| b.0.cmp(&a.0));
254    all_matches.truncate(limit.get());
255
256    // Build final results
257    let matches = all_matches
258        .into_iter()
259        .map(|(score, path)| FileMatch {
260            score,
261            path,
262            indices: if compute_indices {
263                Some(Vec::new())
264            } else {
265                None
266            },
267        })
268        .collect();
269
270    Ok(FileSearchResults {
271        matches,
272        total_match_count: total_match_count.as_ref().load(Ordering::Relaxed),
273    })
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279    use std::fs;
280    use tempfile::TempDir;
281
282    #[test]
283    fn test_file_name_from_path() {
284        assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
285        assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
286        assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
287        assert_eq!(file_name_from_path("file.txt"), "file.txt");
288        assert_eq!(file_name_from_path(""), "");
289    }
290
291    #[test]
292    fn test_run_search() -> anyhow::Result<()> {
293        let temp = TempDir::new()?;
294        fs::write(temp.path().join("hello.rs"), "fn main() {}")?;
295        fs::write(temp.path().join("world.txt"), "world")?;
296
297        let results = run(FileSearchConfig {
298            pattern_text: "hello".to_string(),
299            limit: NonZero::new(10).unwrap(),
300            search_directory: temp.path().to_path_buf(),
301            exclude: vec![],
302            threads: NonZero::new(1).unwrap(),
303            cancel_flag: Arc::new(AtomicBool::new(false)),
304            compute_indices: false,
305            respect_gitignore: false,
306        })?;
307
308        assert_eq!(results.matches.len(), 1);
309        assert!(results.matches[0].path.contains("hello"));
310
311        Ok(())
312    }
313
314    #[test]
315    fn test_multiple_matches() -> anyhow::Result<()> {
316        let temp = TempDir::new()?;
317        fs::write(temp.path().join("test1.rs"), "")?;
318        fs::write(temp.path().join("test2.rs"), "")?;
319        fs::write(temp.path().join("test3.rs"), "")?;
320        fs::write(temp.path().join("other.txt"), "")?;
321
322        let results = run(FileSearchConfig {
323            pattern_text: "test".to_string(),
324            limit: NonZero::new(10).unwrap(),
325            search_directory: temp.path().to_path_buf(),
326            exclude: vec![],
327            threads: NonZero::new(2).unwrap(),
328            cancel_flag: Arc::new(AtomicBool::new(false)),
329            compute_indices: false,
330            respect_gitignore: false,
331        })?;
332
333        assert_eq!(results.matches.len(), 3);
334        assert!(results.matches.iter().all(|m| m.path.contains("test")));
335
336        Ok(())
337    }
338
339    #[test]
340    fn test_exclusion_patterns() -> anyhow::Result<()> {
341        let temp = TempDir::new()?;
342        fs::write(temp.path().join("keep.rs"), "")?;
343        fs::create_dir(temp.path().join("target"))?;
344        fs::write(temp.path().join("target/ignore.rs"), "")?;
345
346        let results = run(FileSearchConfig {
347            pattern_text: "rs".to_string(),
348            limit: NonZero::new(10).unwrap(),
349            search_directory: temp.path().to_path_buf(),
350            exclude: vec!["target/**".to_string()],
351            threads: NonZero::new(2).unwrap(),
352            cancel_flag: Arc::new(AtomicBool::new(false)),
353            compute_indices: false,
354            respect_gitignore: false,
355        })?;
356
357        assert_eq!(results.matches.len(), 1);
358        assert!(results.matches[0].path.contains("keep.rs"));
359
360        Ok(())
361    }
362
363    #[test]
364    fn test_cancellation() -> anyhow::Result<()> {
365        let temp = TempDir::new()?;
366        for i in 0..10 {
367            fs::write(temp.path().join(format!("file{}.rs", i)), "")?;
368        }
369
370        let cancel_flag = Arc::new(AtomicBool::new(true));
371        let results = run(FileSearchConfig {
372            pattern_text: "file".to_string(),
373            limit: NonZero::new(10).unwrap(),
374            search_directory: temp.path().to_path_buf(),
375            exclude: vec![],
376            threads: NonZero::new(1).unwrap(),
377            cancel_flag,
378            compute_indices: false,
379            respect_gitignore: false,
380        })?;
381
382        // Should return early due to cancellation
383        assert!(results.matches.is_empty());
384
385        Ok(())
386    }
387}