vtcode_file_search/
lib.rs

1//! Fast fuzzy file search library for VT Code.
2//!
3//! Uses the `ignore` crate (same as ripgrep) for parallel directory traversal
4//! and `nucleo-matcher` for fuzzy matching.
5//!
6//! # Example
7//!
8//! ```ignore
9//! use std::num::NonZero;
10//! use std::path::Path;
11//! use std::sync::Arc;
12//! use std::sync::atomic::AtomicBool;
13//! use vtcode_file_search::run;
14//!
15//! let results = run(
16//!     "main",
17//!     NonZero::new(100).unwrap(),
18//!     Path::new("."),
19//!     vec![],
20//!     NonZero::new(4).unwrap(),
21//!     Arc::new(AtomicBool::new(false)),
22//!     false,
23//!     true,
24//! )?;
25//!
26//! for m in results.matches {
27//!     println!("{}: {}", m.path, m.score);
28//! }
29//! # Ok::<(), anyhow::Error>(())
30//! ```
31
32use nucleo_matcher::{Matcher, Utf32Str};
33use serde::Serialize;
34use std::cmp::Reverse;
35use std::collections::BinaryHeap;
36use std::num::NonZero;
37use std::path::Path;
38use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
39use std::sync::Arc;
40
41/// A single file match result.
42///
43/// Fields:
44/// - `score`: Relevance score from fuzzy matching (higher is better)
45/// - `path`: File path relative to the search directory
46/// - `indices`: Optional character positions for highlighting matched characters
47#[derive(Debug, Clone, Serialize)]
48pub struct FileMatch {
49    pub score: u32,
50    pub path: String,
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub indices: Option<Vec<u32>>,
53}
54
55/// Complete search results with total match count.
56#[derive(Debug)]
57pub struct FileSearchResults {
58    pub matches: Vec<FileMatch>,
59    pub total_match_count: usize,
60}
61
62/// Configuration for file search operations.
63pub struct FileSearchConfig {
64    pub pattern_text: String,
65    pub limit: NonZero<usize>,
66    pub search_directory: std::path::PathBuf,
67    pub exclude: Vec<String>,
68    pub threads: NonZero<usize>,
69    pub cancel_flag: Arc<AtomicBool>,
70    pub compute_indices: bool,
71    pub respect_gitignore: bool,
72}
73
74/// Extract the filename from a path, with fallback to the full path.
75///
76/// # Examples
77///
78/// ```
79/// use vtcode_file_search::file_name_from_path;
80///
81/// assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
82/// assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
83/// assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
84/// ```
85pub fn file_name_from_path(path: &str) -> String {
86    Path::new(path)
87        .file_name()
88        .and_then(|name| name.to_str())
89        .map(|s| s.to_string())
90        .unwrap_or_else(|| path.to_string())
91}
92
93/// Best matches list per worker thread (lock-free collection).
94///
95/// Each worker thread gets its own instance to avoid locking during
96/// directory traversal. Results are merged at the end.
97struct BestMatchesList {
98    matches: BinaryHeap<Reverse<(u32, String)>>,
99    limit: usize,
100    matcher: Matcher,
101    haystack_buf: Vec<char>,
102}
103
104impl BestMatchesList {
105    fn new(limit: usize) -> Self {
106        Self {
107            matches: BinaryHeap::new(),
108            limit,
109            matcher: Matcher::new(nucleo_matcher::Config::DEFAULT),
110            haystack_buf: Vec::new(),
111        }
112    }
113
114    /// Try to add a match, maintaining top-K results.
115    ///
116    /// Returns Some(score) if the match was added or if it would replace
117    /// a lower-scoring match.
118    fn try_add(&mut self, path: &str, pattern_text: &str) -> Option<u32> {
119        self.haystack_buf.clear();
120        let haystack = Utf32Str::new(path, &mut self.haystack_buf);
121        let mut pattern_buf = Vec::new();
122        let needle = Utf32Str::new(pattern_text, &mut pattern_buf);
123        let score = self.matcher.fuzzy_match(haystack, needle)? as u32;
124
125        if self.matches.len() < self.limit {
126            self.matches.push(Reverse((score, path.to_string())));
127            Some(score)
128        } else {
129            let min_score = self.matches.peek().unwrap().0 .0;
130            if score > min_score {
131                self.matches.pop();
132                self.matches.push(Reverse((score, path.to_string())));
133                Some(score)
134            } else {
135                None
136            }
137        }
138    }
139
140    /// Convert into sorted matches (highest score first).
141    #[allow(dead_code)]
142    fn into_matches(self) -> Vec<(u32, String)> {
143        self.matches
144            .into_sorted_vec()
145            .into_iter()
146            .map(|Reverse((score, path))| (score, path))
147            .collect()
148    }
149
150    /// Clone current matches without consuming self.
151    fn clone_matches(&self) -> Vec<(u32, String)> {
152        self.matches
153            .iter()
154            .map(|Reverse((score, path))| (*score, path.clone()))
155            .collect()
156    }
157}
158
159/// Run fuzzy file search with parallel traversal.
160///
161/// # Arguments
162///
163/// * `config` - File search configuration containing all search parameters
164///
165/// # Returns
166///
167/// FileSearchResults containing matched files and total match count.
168pub fn run(config: FileSearchConfig) -> anyhow::Result<FileSearchResults> {
169    let pattern_text = &config.pattern_text;
170    let limit = config.limit;
171    let search_directory = &config.search_directory;
172    let exclude = &config.exclude;
173    let threads = config.threads;
174    let cancel_flag = &config.cancel_flag;
175    let compute_indices = config.compute_indices;
176    let respect_gitignore = config.respect_gitignore;
177    // Store pattern text for cloning across threads
178    // (Pattern is parsed per-match to work with Utf32Str)
179
180    // Build the directory walker
181    let mut walk_builder = ignore::WalkBuilder::new(search_directory);
182    walk_builder
183        .threads(threads.get())
184        .hidden(false)
185        .follow_links(true)
186        .require_git(false);
187
188    if !respect_gitignore {
189        walk_builder
190            .git_ignore(false)
191            .git_global(false)
192            .git_exclude(false)
193            .ignore(false)
194            .parents(false);
195    }
196
197    // Add exclusion patterns
198    if !exclude.is_empty() {
199        let mut override_builder = ignore::overrides::OverrideBuilder::new(search_directory);
200        for exclude_pattern in exclude {
201            let pattern = format!("!{}", exclude_pattern);
202            override_builder.add(&pattern)?;
203        }
204        let override_matcher = override_builder.build()?;
205        walk_builder.overrides(override_matcher);
206    }
207
208    let walker = walk_builder.build_parallel();
209
210    // Create per-worker result collection using Arc + Mutex for thread safety
211    let num_workers = threads.get();
212    let best_matchers_per_worker: Vec<Arc<std::sync::Mutex<BestMatchesList>>> = (0..num_workers)
213        .map(|_| Arc::new(std::sync::Mutex::new(BestMatchesList::new(limit.get()))))
214        .collect();
215
216    let index_counter = AtomicUsize::new(0);
217    let total_match_count = Arc::new(AtomicUsize::new(0));
218    let pattern_text = pattern_text.to_string();
219
220    // Run parallel traversal
221    walker.run(|| {
222        let worker_id =
223            index_counter.fetch_add(1, Ordering::Relaxed) % best_matchers_per_worker.len();
224        let best_list = best_matchers_per_worker[worker_id].clone();
225        let cancel_flag_clone = cancel_flag.clone();
226        let pattern_text_clone = pattern_text.clone();
227        let total_match_count_clone = total_match_count.clone();
228
229        Box::new(move |result| {
230            // Check cancellation flag periodically
231            if cancel_flag_clone.load(Ordering::Relaxed) {
232                return ignore::WalkState::Quit;
233            }
234
235            let entry = match result {
236                Ok(e) => e,
237                Err(_) => return ignore::WalkState::Continue,
238            };
239
240            // Skip directories
241            if entry.metadata().map_or(true, |m| m.is_dir()) {
242                return ignore::WalkState::Continue;
243            }
244
245            let path = match entry.path().to_str() {
246                Some(p) => p,
247                None => return ignore::WalkState::Continue,
248            };
249
250            // Try to add to results
251            if let Ok(mut list) = best_list.lock() {
252                if list.try_add(path, &pattern_text_clone).is_some() {
253                    total_match_count_clone.fetch_add(1, Ordering::Relaxed);
254                }
255            }
256
257            ignore::WalkState::Continue
258        })
259    });
260
261    // Merge results from all workers
262    let mut all_matches = Vec::new();
263    for arc in best_matchers_per_worker {
264        if let Ok(list) = arc.lock() {
265            let matches = list.clone_matches();
266            all_matches.extend(matches);
267        }
268    }
269
270    // Sort by score (descending) and limit
271    all_matches.sort_by(|a, b| b.0.cmp(&a.0));
272    all_matches.truncate(limit.get());
273
274    // Build final results
275    let matches = all_matches
276        .into_iter()
277        .map(|(score, path)| FileMatch {
278            score,
279            path,
280            indices: if compute_indices {
281                Some(Vec::new())
282            } else {
283                None
284            },
285        })
286        .collect();
287
288    Ok(FileSearchResults {
289        matches,
290        total_match_count: total_match_count.as_ref().load(Ordering::Relaxed),
291    })
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297    use std::fs;
298    use tempfile::TempDir;
299
300    #[test]
301    fn test_file_name_from_path() {
302        assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
303        assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
304        assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
305        assert_eq!(file_name_from_path("file.txt"), "file.txt");
306        assert_eq!(file_name_from_path(""), "");
307    }
308
309    #[test]
310    fn test_run_search() -> anyhow::Result<()> {
311        let temp = TempDir::new()?;
312        fs::write(temp.path().join("hello.rs"), "fn main() {}")?;
313        fs::write(temp.path().join("world.txt"), "world")?;
314
315        let results = run(
316            "hello",
317            NonZero::new(10).unwrap(),
318            temp.path(),
319            vec![],
320            NonZero::new(1).unwrap(),
321            Arc::new(AtomicBool::new(false)),
322            false,
323            false,
324        )?;
325
326        assert_eq!(results.matches.len(), 1);
327        assert!(results.matches[0].path.contains("hello"));
328
329        Ok(())
330    }
331
332    #[test]
333    fn test_multiple_matches() -> anyhow::Result<()> {
334        let temp = TempDir::new()?;
335        fs::write(temp.path().join("test1.rs"), "")?;
336        fs::write(temp.path().join("test2.rs"), "")?;
337        fs::write(temp.path().join("test3.rs"), "")?;
338        fs::write(temp.path().join("other.txt"), "")?;
339
340        let results = run(
341            "test",
342            NonZero::new(10).unwrap(),
343            temp.path(),
344            vec![],
345            NonZero::new(2).unwrap(),
346            Arc::new(AtomicBool::new(false)),
347            false,
348            false,
349        )?;
350
351        assert_eq!(results.matches.len(), 3);
352        assert!(results.matches.iter().all(|m| m.path.contains("test")));
353
354        Ok(())
355    }
356
357    #[test]
358    fn test_exclusion_patterns() -> anyhow::Result<()> {
359        let temp = TempDir::new()?;
360        fs::write(temp.path().join("keep.rs"), "")?;
361        fs::create_dir(temp.path().join("target"))?;
362        fs::write(temp.path().join("target/ignore.rs"), "")?;
363
364        let results = run(
365            "rs",
366            NonZero::new(10).unwrap(),
367            temp.path(),
368            vec!["target/**".to_string()],
369            NonZero::new(2).unwrap(),
370            Arc::new(AtomicBool::new(false)),
371            false,
372            false,
373        )?;
374
375        assert_eq!(results.matches.len(), 1);
376        assert!(results.matches[0].path.contains("keep.rs"));
377
378        Ok(())
379    }
380
381    #[test]
382    fn test_cancellation() -> anyhow::Result<()> {
383        let temp = TempDir::new()?;
384        for i in 0..10 {
385            fs::write(temp.path().join(format!("file{}.rs", i)), "")?;
386        }
387
388        let cancel_flag = Arc::new(AtomicBool::new(true));
389        let results = run(
390            "file",
391            NonZero::new(10).unwrap(),
392            temp.path(),
393            vec![],
394            NonZero::new(1).unwrap(),
395            cancel_flag,
396            false,
397            false,
398        )?;
399
400        // Should return early due to cancellation
401        assert!(results.matches.is_empty());
402
403        Ok(())
404    }
405}