1use parking_lot::Mutex;
33use serde::{Deserialize, Serialize};
34use std::cmp::Reverse;
35use std::collections::BinaryHeap;
36use std::num::NonZero;
37use std::sync::Arc;
38use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
39
40#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
48#[serde(rename_all = "lowercase")]
49pub enum MatchType {
50 File,
51 Directory,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct FileMatch {
56 pub score: u32,
57 pub path: String,
58 pub match_type: MatchType,
59 #[serde(skip_serializing_if = "Option::is_none")]
60 pub indices: Option<Vec<u32>>,
61}
62
63#[derive(Debug)]
65pub struct FileSearchResults {
66 pub matches: Vec<FileMatch>,
67 pub total_match_count: usize,
68}
69
70pub struct FileSearchConfig {
72 pub pattern_text: String,
73 pub limit: NonZero<usize>,
74 pub search_directory: std::path::PathBuf,
75 pub exclude: Vec<String>,
76 pub threads: NonZero<usize>,
77 pub cancel_flag: Arc<AtomicBool>,
78 pub compute_indices: bool,
79 pub respect_gitignore: bool,
80}
81
82pub use vtcode_commons::paths::file_name_from_path;
83
84struct BestMatchesList {
89 matches: BinaryHeap<Reverse<(u32, String, MatchType)>>,
90 limit: usize,
91 matcher: nucleo_matcher::Matcher,
92 haystack_buf: Vec<char>,
93 pattern_buf: Vec<char>,
94 pattern_text: String,
95}
96
97impl BestMatchesList {
98 fn new(limit: usize, pattern_text: &str) -> Self {
99 Self {
100 matches: BinaryHeap::new(),
101 limit,
102 matcher: nucleo_matcher::Matcher::new(nucleo_matcher::Config::DEFAULT),
103 haystack_buf: Vec::with_capacity(256),
104 pattern_buf: Vec::with_capacity(pattern_text.len()),
105 pattern_text: pattern_text.to_string(),
106 }
107 }
108
109 fn record_match(&mut self, path: &str, match_type: MatchType) -> bool {
114 let haystack = nucleo_matcher::Utf32Str::new(path, &mut self.haystack_buf);
115 let needle = nucleo_matcher::Utf32Str::new(&self.pattern_text, &mut self.pattern_buf);
116 let Some(score) = self.matcher.fuzzy_match(haystack, needle) else {
117 return false;
118 };
119
120 push_top_match(
121 &mut self.matches,
122 self.limit,
123 score as u32,
124 path.to_string(),
125 match_type,
126 );
127 true
128 }
129}
130
131fn push_top_match(
132 matches: &mut BinaryHeap<Reverse<(u32, String, MatchType)>>,
133 limit: usize,
134 score: u32,
135 path: String,
136 match_type: MatchType,
137) -> bool {
138 if matches.len() < limit {
139 matches.push(Reverse((score, path, match_type)));
140 return true;
141 }
142
143 let Some(min_score) = matches.peek().map(|entry| entry.0.0) else {
144 return false;
145 };
146
147 if score <= min_score {
148 return false;
149 }
150
151 matches.pop();
152 matches.push(Reverse((score, path, match_type)));
153 true
154}
155
156pub fn run(config: FileSearchConfig) -> anyhow::Result<FileSearchResults> {
166 let limit = config.limit.get();
167 let search_directory = &config.search_directory;
168 let exclude = &config.exclude;
169 let threads = config.threads.get();
170 let cancel_flag = &config.cancel_flag;
171 let compute_indices = config.compute_indices;
172 let respect_gitignore = config.respect_gitignore;
173 let mut walk_builder = ignore::WalkBuilder::new(search_directory);
178 walk_builder
179 .threads(threads)
180 .hidden(false)
181 .follow_links(true)
182 .require_git(false);
183
184 if !respect_gitignore {
185 walk_builder
186 .git_ignore(false)
187 .git_global(false)
188 .git_exclude(false)
189 .ignore(false)
190 .parents(false);
191 }
192
193 if !exclude.is_empty() {
195 let mut override_builder = ignore::overrides::OverrideBuilder::new(search_directory);
196 for exclude_pattern in exclude {
197 let pattern = format!("!{}", exclude_pattern);
198 override_builder.add(&pattern)?;
199 }
200 let override_matcher = override_builder.build()?;
201 walk_builder.overrides(override_matcher);
202 }
203
204 let walker = walk_builder.build_parallel();
205
206 let best_matchers_per_worker: Vec<Arc<Mutex<BestMatchesList>>> = (0..threads)
208 .map(|_| {
209 Arc::new(Mutex::new(BestMatchesList::new(
210 limit,
211 &config.pattern_text,
212 )))
213 })
214 .collect();
215
216 let index_counter = AtomicUsize::new(0);
217 let total_match_count = Arc::new(AtomicUsize::new(0));
218
219 walker.run(|| {
221 let worker_id =
222 index_counter.fetch_add(1, Ordering::Relaxed) % best_matchers_per_worker.len();
223 let best_list = best_matchers_per_worker[worker_id].clone();
224 let cancel_flag_clone = cancel_flag.clone();
225 let total_match_count_clone = total_match_count.clone();
226
227 Box::new(move |result| {
228 if cancel_flag_clone.load(Ordering::Relaxed) {
230 return ignore::WalkState::Quit;
231 }
232
233 let entry = match result {
234 Ok(e) => e,
235 Err(_) => return ignore::WalkState::Continue,
236 };
237
238 let path = match entry.path().to_str() {
239 Some(p) => p,
240 None => return ignore::WalkState::Continue,
241 };
242 let match_type = if entry.path().is_dir() {
243 MatchType::Directory
244 } else {
245 MatchType::File
246 };
247
248 {
250 let mut list = best_list.lock();
251 if list.record_match(path, match_type) {
252 total_match_count_clone.fetch_add(1, Ordering::Relaxed);
253 }
254 }
255
256 ignore::WalkState::Continue
257 })
258 });
259
260 let mut merged_matches = BinaryHeap::with_capacity(limit);
262 for arc in best_matchers_per_worker {
263 let mut list = arc.lock();
264 for Reverse((score, path, match_type)) in std::mem::take(&mut list.matches).into_vec() {
265 push_top_match(&mut merged_matches, limit, score, path, match_type);
266 }
267 }
268
269 let matches = merged_matches
271 .into_sorted_vec()
272 .into_iter()
273 .map(|Reverse((score, path, match_type))| FileMatch {
274 score,
275 path,
276 match_type,
277 indices: if compute_indices {
278 Some(Vec::new())
279 } else {
280 None
281 },
282 })
283 .collect();
284
285 Ok(FileSearchResults {
286 matches,
287 total_match_count: total_match_count.load(Ordering::Relaxed),
288 })
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294 use std::fs;
295 use tempfile::TempDir;
296
297 #[test]
298 fn test_file_name_from_path() {
299 assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
300 assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
301 assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
302 assert_eq!(file_name_from_path("file.txt"), "file.txt");
303 assert_eq!(file_name_from_path(""), "");
304 }
305
306 #[test]
307 fn test_run_search() -> anyhow::Result<()> {
308 let temp = TempDir::new()?;
309 fs::write(temp.path().join("hello.rs"), "fn main() {}")?;
310 fs::write(temp.path().join("world.txt"), "world")?;
311
312 let results = run(FileSearchConfig {
313 pattern_text: "hello".to_string(),
314 limit: NonZero::new(10).unwrap(),
315 search_directory: temp.path().to_path_buf(),
316 exclude: vec![],
317 threads: NonZero::new(1).unwrap(),
318 cancel_flag: Arc::new(AtomicBool::new(false)),
319 compute_indices: false,
320 respect_gitignore: false,
321 })?;
322
323 assert_eq!(results.matches.len(), 1);
324 assert!(results.matches[0].path.contains("hello"));
325 assert_eq!(results.matches[0].match_type, MatchType::File);
326
327 Ok(())
328 }
329
330 #[test]
331 fn test_multiple_matches() -> anyhow::Result<()> {
332 let temp = TempDir::new()?;
333 fs::write(temp.path().join("test1.rs"), "")?;
334 fs::write(temp.path().join("test2.rs"), "")?;
335 fs::write(temp.path().join("test3.rs"), "")?;
336 fs::write(temp.path().join("other.txt"), "")?;
337
338 let results = run(FileSearchConfig {
339 pattern_text: "test".to_string(),
340 limit: NonZero::new(10).unwrap(),
341 search_directory: temp.path().to_path_buf(),
342 exclude: vec![],
343 threads: NonZero::new(2).unwrap(),
344 cancel_flag: Arc::new(AtomicBool::new(false)),
345 compute_indices: false,
346 respect_gitignore: false,
347 })?;
348
349 assert_eq!(results.matches.len(), 3);
350 assert!(results.matches.iter().all(|m| m.path.contains("test")));
351 assert!(
352 results
353 .matches
354 .iter()
355 .all(|m| matches!(m.match_type, MatchType::File))
356 );
357
358 Ok(())
359 }
360
361 #[test]
362 fn test_limit_is_respected_across_workers() -> anyhow::Result<()> {
363 let temp = TempDir::new()?;
364 for name in ["alpha.rs", "alphabet.rs", "alphanumeric.rs", "alpaca.rs"] {
365 fs::write(temp.path().join(name), "")?;
366 }
367
368 let results = run(FileSearchConfig {
369 pattern_text: "alpha".to_string(),
370 limit: NonZero::new(2).unwrap(),
371 search_directory: temp.path().to_path_buf(),
372 exclude: vec![],
373 threads: NonZero::new(4).unwrap(),
374 cancel_flag: Arc::new(AtomicBool::new(false)),
375 compute_indices: false,
376 respect_gitignore: false,
377 })?;
378
379 assert_eq!(results.matches.len(), 2);
380 assert!(
381 results
382 .matches
383 .windows(2)
384 .all(|window| window[0].score >= window[1].score)
385 );
386
387 Ok(())
388 }
389
390 #[test]
391 fn test_exclusion_patterns() -> anyhow::Result<()> {
392 let temp = TempDir::new()?;
393 fs::write(temp.path().join("keep.rs"), "")?;
394 fs::create_dir(temp.path().join("target"))?;
395 fs::write(temp.path().join("target/ignore.rs"), "")?;
396
397 let results = run(FileSearchConfig {
398 pattern_text: "rs".to_string(),
399 limit: NonZero::new(10).unwrap(),
400 search_directory: temp.path().to_path_buf(),
401 exclude: vec!["target/**".to_string()],
402 threads: NonZero::new(2).unwrap(),
403 cancel_flag: Arc::new(AtomicBool::new(false)),
404 compute_indices: false,
405 respect_gitignore: false,
406 })?;
407
408 assert_eq!(results.matches.len(), 1);
409 assert!(results.matches[0].path.contains("keep.rs"));
410 assert_eq!(results.matches[0].match_type, MatchType::File);
411
412 Ok(())
413 }
414
415 #[test]
416 fn test_cancellation() -> anyhow::Result<()> {
417 let temp = TempDir::new()?;
418 for i in 0..10 {
419 fs::write(temp.path().join(format!("file{}.rs", i)), "")?;
420 }
421
422 let cancel_flag = Arc::new(AtomicBool::new(true));
423 let results = run(FileSearchConfig {
424 pattern_text: "file".to_string(),
425 limit: NonZero::new(10).unwrap(),
426 search_directory: temp.path().to_path_buf(),
427 exclude: vec![],
428 threads: NonZero::new(1).unwrap(),
429 cancel_flag,
430 compute_indices: false,
431 respect_gitignore: false,
432 })?;
433
434 assert!(results.matches.is_empty());
436
437 Ok(())
438 }
439
440 #[test]
441 fn test_directory_matches_are_returned() -> anyhow::Result<()> {
442 let temp = TempDir::new()?;
443 fs::create_dir_all(temp.path().join("docs/guides"))?;
444 fs::write(temp.path().join("docs/guides/intro.md"), "intro")?;
445 fs::write(temp.path().join("docs/readme.md"), "readme")?;
446
447 let results = run(FileSearchConfig {
448 pattern_text: "guides".to_string(),
449 limit: NonZero::new(10).unwrap(),
450 search_directory: temp.path().to_path_buf(),
451 exclude: vec![],
452 threads: NonZero::new(2).unwrap(),
453 cancel_flag: Arc::new(AtomicBool::new(false)),
454 compute_indices: false,
455 respect_gitignore: false,
456 })?;
457
458 assert!(results.matches.iter().any(
459 |m| m.path.ends_with("docs/guides") && matches!(m.match_type, MatchType::Directory)
460 ));
461
462 Ok(())
463 }
464}