vtcode_file_search/
lib.rs

1//! Fast fuzzy file search library for VT Code.
2//!
3//! Uses the `ignore` crate (same as ripgrep) for parallel directory traversal
4//! and `nucleo-matcher` for fuzzy matching.
5//!
6//! # Example
7//!
8//! ```ignore
9//! use std::num::NonZero;
10//! use std::path::Path;
11//! use std::sync::Arc;
12//! use std::sync::atomic::AtomicBool;
13//! use vtcode_file_search::run;
14//!
15//! let results = run(
16//!     "main",
17//!     NonZero::new(100).unwrap(),
18//!     Path::new("."),
19//!     vec![],
20//!     NonZero::new(4).unwrap(),
21//!     Arc::new(AtomicBool::new(false)),
22//!     false,
23//!     true,
24//! )?;
25//!
26//! for m in results.matches {
27//!     println!("{}: {}", m.path, m.score);
28//! }
29//! # Ok::<(), anyhow::Error>(())
30//! ```
31
32use nucleo_matcher::{Matcher, Utf32Str};
33use serde::Serialize;
34use std::cmp::Reverse;
35use std::collections::BinaryHeap;
36use std::num::NonZero;
37use std::path::Path;
38use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
39use std::sync::Arc;
40
41/// A single file match result.
42///
43/// Fields:
44/// - `score`: Relevance score from fuzzy matching (higher is better)
45/// - `path`: File path relative to the search directory
46/// - `indices`: Optional character positions for highlighting matched characters
47#[derive(Debug, Clone, Serialize)]
48pub struct FileMatch {
49    pub score: u32,
50    pub path: String,
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub indices: Option<Vec<u32>>,
53}
54
55/// Complete search results with total match count.
56#[derive(Debug)]
57pub struct FileSearchResults {
58    pub matches: Vec<FileMatch>,
59    pub total_match_count: usize,
60}
61
62/// Extract the filename from a path, with fallback to the full path.
63///
64/// # Examples
65///
66/// ```
67/// use vtcode_file_search::file_name_from_path;
68///
69/// assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
70/// assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
71/// assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
72/// ```
73pub fn file_name_from_path(path: &str) -> String {
74    Path::new(path)
75        .file_name()
76        .and_then(|name| name.to_str())
77        .map(|s| s.to_string())
78        .unwrap_or_else(|| path.to_string())
79}
80
81/// Best matches list per worker thread (lock-free collection).
82///
83/// Each worker thread gets its own instance to avoid locking during
84/// directory traversal. Results are merged at the end.
85struct BestMatchesList {
86    matches: BinaryHeap<Reverse<(u32, String)>>,
87    limit: usize,
88    matcher: Matcher,
89    haystack_buf: Vec<char>,
90}
91
92impl BestMatchesList {
93    fn new(limit: usize) -> Self {
94        Self {
95            matches: BinaryHeap::new(),
96            limit,
97            matcher: Matcher::new(nucleo_matcher::Config::DEFAULT),
98            haystack_buf: Vec::new(),
99        }
100    }
101
102    /// Try to add a match, maintaining top-K results.
103    ///
104    /// Returns Some(score) if the match was added or if it would replace
105    /// a lower-scoring match.
106    fn try_add(&mut self, path: &str, pattern_text: &str) -> Option<u32> {
107        self.haystack_buf.clear();
108        let haystack = Utf32Str::new(path, &mut self.haystack_buf);
109        let mut pattern_buf = Vec::new();
110        let needle = Utf32Str::new(pattern_text, &mut pattern_buf);
111        let score = self.matcher.fuzzy_match(haystack, needle)? as u32;
112
113        if self.matches.len() < self.limit {
114            self.matches.push(Reverse((score, path.to_string())));
115            Some(score)
116        } else {
117            let min_score = self.matches.peek().unwrap().0 .0;
118            if score > min_score {
119                self.matches.pop();
120                self.matches.push(Reverse((score, path.to_string())));
121                Some(score)
122            } else {
123                None
124            }
125        }
126    }
127
128    /// Convert into sorted matches (highest score first).
129    #[allow(dead_code)]
130    fn into_matches(self) -> Vec<(u32, String)> {
131        self.matches
132            .into_sorted_vec()
133            .into_iter()
134            .map(|Reverse((score, path))| (score, path))
135            .collect()
136    }
137
138    /// Clone current matches without consuming self.
139    fn clone_matches(&self) -> Vec<(u32, String)> {
140        self.matches
141            .iter()
142            .map(|Reverse((score, path))| (*score, path.clone()))
143            .collect()
144    }
145}
146
147/// Run fuzzy file search with parallel traversal.
148///
149/// # Arguments
150///
151/// * `pattern_text` - Fuzzy search pattern
152/// * `limit` - Maximum number of results
153/// * `search_directory` - Root directory to search
154/// * `exclude` - Exclusion patterns (glob-style)
155/// * `threads` - Number of worker threads
156/// * `cancel_flag` - Atomic flag for cancellation
157/// * `compute_indices` - Whether to compute character indices for highlighting
158/// * `respect_gitignore` - Whether to respect .gitignore files
159///
160/// # Returns
161///
162/// FileSearchResults containing matched files and total match count.
163pub fn run(
164    pattern_text: &str,
165    limit: NonZero<usize>,
166    search_directory: &Path,
167    exclude: Vec<String>,
168    threads: NonZero<usize>,
169    cancel_flag: Arc<AtomicBool>,
170    compute_indices: bool,
171    respect_gitignore: bool,
172) -> anyhow::Result<FileSearchResults> {
173    // Store pattern text for cloning across threads
174    // (Pattern is parsed per-match to work with Utf32Str)
175
176    // Build the directory walker
177    let mut walk_builder = ignore::WalkBuilder::new(search_directory);
178    walk_builder
179        .threads(threads.get())
180        .hidden(false)
181        .follow_links(true)
182        .require_git(false);
183
184    if !respect_gitignore {
185        walk_builder
186            .git_ignore(false)
187            .git_global(false)
188            .git_exclude(false)
189            .ignore(false)
190            .parents(false);
191    }
192
193    // Add exclusion patterns
194    if !exclude.is_empty() {
195        let mut override_builder = ignore::overrides::OverrideBuilder::new(search_directory);
196        for exclude_pattern in exclude {
197            let pattern = format!("!{}", exclude_pattern);
198            override_builder.add(&pattern)?;
199        }
200        let override_matcher = override_builder.build()?;
201        walk_builder.overrides(override_matcher);
202    }
203
204    let walker = walk_builder.build_parallel();
205
206    // Create per-worker result collection using Arc + Mutex for thread safety
207    let num_workers = threads.get();
208    let best_matchers_per_worker: Vec<Arc<std::sync::Mutex<BestMatchesList>>> = (0..num_workers)
209        .map(|_| Arc::new(std::sync::Mutex::new(BestMatchesList::new(limit.get()))))
210        .collect();
211
212    let index_counter = AtomicUsize::new(0);
213    let total_match_count = Arc::new(AtomicUsize::new(0));
214    let pattern_text = pattern_text.to_string();
215
216    // Run parallel traversal
217    walker.run(|| {
218        let worker_id =
219            index_counter.fetch_add(1, Ordering::Relaxed) % best_matchers_per_worker.len();
220        let best_list = best_matchers_per_worker[worker_id].clone();
221        let cancel_flag_clone = cancel_flag.clone();
222        let pattern_text_clone = pattern_text.clone();
223        let total_match_count_clone = total_match_count.clone();
224
225        Box::new(move |result| {
226            // Check cancellation flag periodically
227            if cancel_flag_clone.load(Ordering::Relaxed) {
228                return ignore::WalkState::Quit;
229            }
230
231            let entry = match result {
232                Ok(e) => e,
233                Err(_) => return ignore::WalkState::Continue,
234            };
235
236            // Skip directories
237            if entry.metadata().map_or(true, |m| m.is_dir()) {
238                return ignore::WalkState::Continue;
239            }
240
241            let path = match entry.path().to_str() {
242                Some(p) => p,
243                None => return ignore::WalkState::Continue,
244            };
245
246            // Try to add to results
247            if let Ok(mut list) = best_list.lock() {
248                if list.try_add(path, &pattern_text_clone).is_some() {
249                    total_match_count_clone.fetch_add(1, Ordering::Relaxed);
250                }
251            }
252
253            ignore::WalkState::Continue
254        })
255    });
256
257    // Merge results from all workers
258    let mut all_matches = Vec::new();
259    for arc in best_matchers_per_worker {
260        if let Ok(list) = arc.lock() {
261            let matches = list.clone_matches();
262            all_matches.extend(matches);
263        }
264    }
265
266    // Sort by score (descending) and limit
267    all_matches.sort_by(|a, b| b.0.cmp(&a.0));
268    all_matches.truncate(limit.get());
269
270    // Build final results
271    let matches = all_matches
272        .into_iter()
273        .map(|(score, path)| FileMatch {
274            score,
275            path,
276            indices: if compute_indices {
277                Some(Vec::new())
278            } else {
279                None
280            },
281        })
282        .collect();
283
284    Ok(FileSearchResults {
285        matches,
286        total_match_count: total_match_count.as_ref().load(Ordering::Relaxed),
287    })
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293    use std::fs;
294    use tempfile::TempDir;
295
296    #[test]
297    fn test_file_name_from_path() {
298        assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
299        assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
300        assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
301        assert_eq!(file_name_from_path("file.txt"), "file.txt");
302        assert_eq!(file_name_from_path(""), "");
303    }
304
305    #[test]
306    fn test_run_search() -> anyhow::Result<()> {
307        let temp = TempDir::new()?;
308        fs::write(temp.path().join("hello.rs"), "fn main() {}")?;
309        fs::write(temp.path().join("world.txt"), "world")?;
310
311        let results = run(
312            "hello",
313            NonZero::new(10).unwrap(),
314            temp.path(),
315            vec![],
316            NonZero::new(1).unwrap(),
317            Arc::new(AtomicBool::new(false)),
318            false,
319            false,
320        )?;
321
322        assert_eq!(results.matches.len(), 1);
323        assert!(results.matches[0].path.contains("hello"));
324
325        Ok(())
326    }
327
328    #[test]
329    fn test_multiple_matches() -> anyhow::Result<()> {
330        let temp = TempDir::new()?;
331        fs::write(temp.path().join("test1.rs"), "")?;
332        fs::write(temp.path().join("test2.rs"), "")?;
333        fs::write(temp.path().join("test3.rs"), "")?;
334        fs::write(temp.path().join("other.txt"), "")?;
335
336        let results = run(
337            "test",
338            NonZero::new(10).unwrap(),
339            temp.path(),
340            vec![],
341            NonZero::new(2).unwrap(),
342            Arc::new(AtomicBool::new(false)),
343            false,
344            false,
345        )?;
346
347        assert_eq!(results.matches.len(), 3);
348        assert!(results.matches.iter().all(|m| m.path.contains("test")));
349
350        Ok(())
351    }
352
353    #[test]
354    fn test_exclusion_patterns() -> anyhow::Result<()> {
355        let temp = TempDir::new()?;
356        fs::write(temp.path().join("keep.rs"), "")?;
357        fs::create_dir(temp.path().join("target"))?;
358        fs::write(temp.path().join("target/ignore.rs"), "")?;
359
360        let results = run(
361            "rs",
362            NonZero::new(10).unwrap(),
363            temp.path(),
364            vec!["target/**".to_string()],
365            NonZero::new(2).unwrap(),
366            Arc::new(AtomicBool::new(false)),
367            false,
368            false,
369        )?;
370
371        assert_eq!(results.matches.len(), 1);
372        assert!(results.matches[0].path.contains("keep.rs"));
373
374        Ok(())
375    }
376
377    #[test]
378    fn test_cancellation() -> anyhow::Result<()> {
379        let temp = TempDir::new()?;
380        for i in 0..10 {
381            fs::write(temp.path().join(format!("file{}.rs", i)), "")?;
382        }
383
384        let cancel_flag = Arc::new(AtomicBool::new(true));
385        let results = run(
386            "file",
387            NonZero::new(10).unwrap(),
388            temp.path(),
389            vec![],
390            NonZero::new(1).unwrap(),
391            cancel_flag,
392            false,
393            false,
394        )?;
395
396        // Should return early due to cancellation
397        assert!(results.matches.is_empty());
398
399        Ok(())
400    }
401}