1use parking_lot::Mutex;
33use serde::Serialize;
34use std::cmp::Reverse;
35use std::collections::BinaryHeap;
36use std::num::NonZero;
37use std::sync::Arc;
38use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
39
40#[derive(Debug, Clone, Serialize)]
47pub struct FileMatch {
48 pub score: u32,
49 pub path: String,
50 #[serde(skip_serializing_if = "Option::is_none")]
51 pub indices: Option<Vec<u32>>,
52}
53
54#[derive(Debug)]
56pub struct FileSearchResults {
57 pub matches: Vec<FileMatch>,
58 pub total_match_count: usize,
59}
60
61pub struct FileSearchConfig {
63 pub pattern_text: String,
64 pub limit: NonZero<usize>,
65 pub search_directory: std::path::PathBuf,
66 pub exclude: Vec<String>,
67 pub threads: NonZero<usize>,
68 pub cancel_flag: Arc<AtomicBool>,
69 pub compute_indices: bool,
70 pub respect_gitignore: bool,
71}
72
73pub use vtcode_commons::paths::file_name_from_path;
74
75struct BestMatchesList {
80 matches: BinaryHeap<Reverse<(u32, String)>>,
81 limit: usize,
82 matcher: nucleo_matcher::Matcher,
83 haystack_buf: Vec<char>,
84 pattern_buf: Vec<char>,
85 pattern_text: String,
86}
87
88impl BestMatchesList {
89 fn new(limit: usize, pattern_text: &str) -> Self {
90 Self {
91 matches: BinaryHeap::new(),
92 limit,
93 matcher: nucleo_matcher::Matcher::new(nucleo_matcher::Config::DEFAULT),
94 haystack_buf: Vec::with_capacity(256),
95 pattern_buf: Vec::with_capacity(pattern_text.len()),
96 pattern_text: pattern_text.to_string(),
97 }
98 }
99
100 fn record_match(&mut self, path: &str) -> bool {
105 let haystack = nucleo_matcher::Utf32Str::new(path, &mut self.haystack_buf);
106 let needle = nucleo_matcher::Utf32Str::new(&self.pattern_text, &mut self.pattern_buf);
107 let Some(score) = self.matcher.fuzzy_match(haystack, needle) else {
108 return false;
109 };
110
111 push_top_match(
112 &mut self.matches,
113 self.limit,
114 score as u32,
115 path.to_string(),
116 );
117 true
118 }
119}
120
121fn push_top_match(
122 matches: &mut BinaryHeap<Reverse<(u32, String)>>,
123 limit: usize,
124 score: u32,
125 path: String,
126) -> bool {
127 if matches.len() < limit {
128 matches.push(Reverse((score, path)));
129 return true;
130 }
131
132 let Some(min_score) = matches.peek().map(|entry| entry.0.0) else {
133 return false;
134 };
135
136 if score <= min_score {
137 return false;
138 }
139
140 matches.pop();
141 matches.push(Reverse((score, path)));
142 true
143}
144
145pub fn run(config: FileSearchConfig) -> anyhow::Result<FileSearchResults> {
155 let limit = config.limit.get();
156 let search_directory = &config.search_directory;
157 let exclude = &config.exclude;
158 let threads = config.threads.get();
159 let cancel_flag = &config.cancel_flag;
160 let compute_indices = config.compute_indices;
161 let respect_gitignore = config.respect_gitignore;
162 let mut walk_builder = ignore::WalkBuilder::new(search_directory);
167 walk_builder
168 .threads(threads)
169 .hidden(false)
170 .follow_links(true)
171 .require_git(false);
172
173 if !respect_gitignore {
174 walk_builder
175 .git_ignore(false)
176 .git_global(false)
177 .git_exclude(false)
178 .ignore(false)
179 .parents(false);
180 }
181
182 if !exclude.is_empty() {
184 let mut override_builder = ignore::overrides::OverrideBuilder::new(search_directory);
185 for exclude_pattern in exclude {
186 let pattern = format!("!{}", exclude_pattern);
187 override_builder.add(&pattern)?;
188 }
189 let override_matcher = override_builder.build()?;
190 walk_builder.overrides(override_matcher);
191 }
192
193 let walker = walk_builder.build_parallel();
194
195 let best_matchers_per_worker: Vec<Arc<Mutex<BestMatchesList>>> = (0..threads)
197 .map(|_| {
198 Arc::new(Mutex::new(BestMatchesList::new(
199 limit,
200 &config.pattern_text,
201 )))
202 })
203 .collect();
204
205 let index_counter = AtomicUsize::new(0);
206 let total_match_count = Arc::new(AtomicUsize::new(0));
207
208 walker.run(|| {
210 let worker_id =
211 index_counter.fetch_add(1, Ordering::Relaxed) % best_matchers_per_worker.len();
212 let best_list = best_matchers_per_worker[worker_id].clone();
213 let cancel_flag_clone = cancel_flag.clone();
214 let total_match_count_clone = total_match_count.clone();
215
216 Box::new(move |result| {
217 if cancel_flag_clone.load(Ordering::Relaxed) {
219 return ignore::WalkState::Quit;
220 }
221
222 let entry = match result {
223 Ok(e) => e,
224 Err(_) => return ignore::WalkState::Continue,
225 };
226
227 if entry.metadata().map_or(true, |m| m.is_dir()) {
229 return ignore::WalkState::Continue;
230 }
231
232 let path = match entry.path().to_str() {
233 Some(p) => p,
234 None => return ignore::WalkState::Continue,
235 };
236
237 {
239 let mut list = best_list.lock();
240 if list.record_match(path) {
241 total_match_count_clone.fetch_add(1, Ordering::Relaxed);
242 }
243 }
244
245 ignore::WalkState::Continue
246 })
247 });
248
249 let mut merged_matches = BinaryHeap::with_capacity(limit);
251 for arc in best_matchers_per_worker {
252 let mut list = arc.lock();
253 for Reverse((score, path)) in std::mem::take(&mut list.matches).into_vec() {
254 push_top_match(&mut merged_matches, limit, score, path);
255 }
256 }
257
258 let matches = merged_matches
260 .into_sorted_vec()
261 .into_iter()
262 .map(|Reverse((score, path))| FileMatch {
263 score,
264 path,
265 indices: if compute_indices {
266 Some(Vec::new())
267 } else {
268 None
269 },
270 })
271 .collect();
272
273 Ok(FileSearchResults {
274 matches,
275 total_match_count: total_match_count.load(Ordering::Relaxed),
276 })
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282 use std::fs;
283 use tempfile::TempDir;
284
285 #[test]
286 fn test_file_name_from_path() {
287 assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
288 assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
289 assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
290 assert_eq!(file_name_from_path("file.txt"), "file.txt");
291 assert_eq!(file_name_from_path(""), "");
292 }
293
294 #[test]
295 fn test_run_search() -> anyhow::Result<()> {
296 let temp = TempDir::new()?;
297 fs::write(temp.path().join("hello.rs"), "fn main() {}")?;
298 fs::write(temp.path().join("world.txt"), "world")?;
299
300 let results = run(FileSearchConfig {
301 pattern_text: "hello".to_string(),
302 limit: NonZero::new(10).unwrap(),
303 search_directory: temp.path().to_path_buf(),
304 exclude: vec![],
305 threads: NonZero::new(1).unwrap(),
306 cancel_flag: Arc::new(AtomicBool::new(false)),
307 compute_indices: false,
308 respect_gitignore: false,
309 })?;
310
311 assert_eq!(results.matches.len(), 1);
312 assert!(results.matches[0].path.contains("hello"));
313
314 Ok(())
315 }
316
317 #[test]
318 fn test_multiple_matches() -> anyhow::Result<()> {
319 let temp = TempDir::new()?;
320 fs::write(temp.path().join("test1.rs"), "")?;
321 fs::write(temp.path().join("test2.rs"), "")?;
322 fs::write(temp.path().join("test3.rs"), "")?;
323 fs::write(temp.path().join("other.txt"), "")?;
324
325 let results = run(FileSearchConfig {
326 pattern_text: "test".to_string(),
327 limit: NonZero::new(10).unwrap(),
328 search_directory: temp.path().to_path_buf(),
329 exclude: vec![],
330 threads: NonZero::new(2).unwrap(),
331 cancel_flag: Arc::new(AtomicBool::new(false)),
332 compute_indices: false,
333 respect_gitignore: false,
334 })?;
335
336 assert_eq!(results.matches.len(), 3);
337 assert!(results.matches.iter().all(|m| m.path.contains("test")));
338
339 Ok(())
340 }
341
342 #[test]
343 fn test_limit_is_respected_across_workers() -> anyhow::Result<()> {
344 let temp = TempDir::new()?;
345 for name in ["alpha.rs", "alphabet.rs", "alphanumeric.rs", "alpaca.rs"] {
346 fs::write(temp.path().join(name), "")?;
347 }
348
349 let results = run(FileSearchConfig {
350 pattern_text: "alpha".to_string(),
351 limit: NonZero::new(2).unwrap(),
352 search_directory: temp.path().to_path_buf(),
353 exclude: vec![],
354 threads: NonZero::new(4).unwrap(),
355 cancel_flag: Arc::new(AtomicBool::new(false)),
356 compute_indices: false,
357 respect_gitignore: false,
358 })?;
359
360 assert_eq!(results.matches.len(), 2);
361 assert!(
362 results
363 .matches
364 .windows(2)
365 .all(|window| window[0].score >= window[1].score)
366 );
367
368 Ok(())
369 }
370
371 #[test]
372 fn test_exclusion_patterns() -> anyhow::Result<()> {
373 let temp = TempDir::new()?;
374 fs::write(temp.path().join("keep.rs"), "")?;
375 fs::create_dir(temp.path().join("target"))?;
376 fs::write(temp.path().join("target/ignore.rs"), "")?;
377
378 let results = run(FileSearchConfig {
379 pattern_text: "rs".to_string(),
380 limit: NonZero::new(10).unwrap(),
381 search_directory: temp.path().to_path_buf(),
382 exclude: vec!["target/**".to_string()],
383 threads: NonZero::new(2).unwrap(),
384 cancel_flag: Arc::new(AtomicBool::new(false)),
385 compute_indices: false,
386 respect_gitignore: false,
387 })?;
388
389 assert_eq!(results.matches.len(), 1);
390 assert!(results.matches[0].path.contains("keep.rs"));
391
392 Ok(())
393 }
394
395 #[test]
396 fn test_cancellation() -> anyhow::Result<()> {
397 let temp = TempDir::new()?;
398 for i in 0..10 {
399 fs::write(temp.path().join(format!("file{}.rs", i)), "")?;
400 }
401
402 let cancel_flag = Arc::new(AtomicBool::new(true));
403 let results = run(FileSearchConfig {
404 pattern_text: "file".to_string(),
405 limit: NonZero::new(10).unwrap(),
406 search_directory: temp.path().to_path_buf(),
407 exclude: vec![],
408 threads: NonZero::new(1).unwrap(),
409 cancel_flag,
410 compute_indices: false,
411 respect_gitignore: false,
412 })?;
413
414 assert!(results.matches.is_empty());
416
417 Ok(())
418 }
419}