1use parking_lot::Mutex;
33use serde::{Deserialize, Serialize};
34use std::cmp::Reverse;
35use std::collections::BinaryHeap;
36use std::num::NonZero;
37use std::path::Path;
38use std::sync::Arc;
39use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
40use tokio::sync::RwLock;
41
42pub struct FileIndex {
47 files: Vec<String>,
49 directories: Vec<String>,
51 last_built: std::time::Instant,
53}
54
55fn build_parallel_walker(
57 search_directory: &Path,
58 exclude: &[String],
59 threads: usize,
60 respect_gitignore: bool,
61) -> anyhow::Result<ignore::WalkParallel> {
62 let mut walk_builder = ignore::WalkBuilder::new(search_directory);
63 vtcode_commons::walk::apply_defaults(&mut walk_builder);
64
65 walk_builder.threads(threads);
67 walk_builder.follow_links(true); walk_builder.require_git(false); if !respect_gitignore {
71 walk_builder
72 .git_ignore(false)
73 .git_global(false)
74 .git_exclude(false)
75 .ignore(false)
76 .parents(false);
77 }
78
79 if !exclude.is_empty() {
80 let mut override_builder = ignore::overrides::OverrideBuilder::new(search_directory);
81 for exclude_pattern in exclude {
82 let pattern = format!("!{}", exclude_pattern);
83 override_builder.add(&pattern)?;
84 }
85 walk_builder.overrides(override_builder.build()?);
86 }
87
88 Ok(walk_builder.build_parallel())
89}
90
91impl FileIndex {
92 fn build_from_directory(
95 search_directory: &Path,
96 exclude: &[String],
97 respect_gitignore: bool,
98 threads: usize,
99 ) -> anyhow::Result<Self> {
100 let walker = build_parallel_walker(search_directory, exclude, threads, respect_gitignore)?;
101
102 let files_arc = Arc::new(Mutex::new(Vec::new()));
104 let dirs_arc = Arc::new(Mutex::new(Vec::new()));
105
106 walker.run(|| {
107 let files_clone = files_arc.clone();
108 let dirs_clone = dirs_arc.clone();
109 let search_dir = search_directory.to_path_buf();
110
111 Box::new(move |result| {
112 let entry = match result {
113 Ok(e) => e,
114 Err(_) => return ignore::WalkState::Continue,
115 };
116
117 if let Some(rel_path) = entry
119 .path()
120 .strip_prefix(&search_dir)
121 .ok()
122 .and_then(|p| p.to_str())
123 && !rel_path.is_empty()
124 {
125 if entry.path().is_dir() {
126 dirs_clone.lock().push(rel_path.to_string());
127 } else {
128 files_clone.lock().push(rel_path.to_string());
129 }
130 }
131
132 ignore::WalkState::Continue
133 })
134 });
135
136 let files = Arc::try_unwrap(files_arc)
137 .map_err(|arc| {
138 anyhow::anyhow!(
139 "failed to unwrap files arc, {} references remain",
140 Arc::strong_count(&arc)
141 )
142 })?
143 .into_inner();
144 let directories = Arc::try_unwrap(dirs_arc)
145 .map_err(|arc| {
146 anyhow::anyhow!(
147 "failed to unwrap dirs arc, {} references remain",
148 Arc::strong_count(&arc)
149 )
150 })?
151 .into_inner();
152
153 Ok(Self {
154 files,
155 directories,
156 last_built: std::time::Instant::now(),
157 })
158 }
159
160 fn query(
163 &self,
164 pattern_text: &str,
165 limit: usize,
166 match_type_filter: Option<MatchType>,
167 ) -> Vec<(u32, String, MatchType)> {
168 let mut results = BinaryHeap::with_capacity(limit);
169
170 let pattern_storage = if pattern_text.is_ascii() {
174 PatternStorage::Ascii(pattern_text.to_ascii_lowercase().into_bytes())
175 } else {
176 PatternStorage::Unicode(pattern_text.to_lowercase().chars().collect())
177 };
178
179 let mut matcher = nucleo_matcher::Matcher::new(nucleo_matcher::Config::DEFAULT);
181 let mut haystack_buf = Vec::with_capacity(256);
182
183 if match_type_filter.is_none_or(|t| t == MatchType::File) {
185 for path in &self.files {
186 if let Some(score) =
187 self.score_path(path, &pattern_storage, &mut matcher, &mut haystack_buf)
188 {
189 push_top_match(&mut results, limit, score, path.clone(), MatchType::File);
190 }
191 }
192 }
193
194 if match_type_filter.is_none_or(|t| t == MatchType::Directory) {
196 for path in &self.directories {
197 if let Some(score) =
198 self.score_path(path, &pattern_storage, &mut matcher, &mut haystack_buf)
199 {
200 push_top_match(
201 &mut results,
202 limit,
203 score,
204 path.clone(),
205 MatchType::Directory,
206 );
207 }
208 }
209 }
210
211 results
212 .into_sorted_vec()
213 .into_iter()
214 .map(|Reverse(item)| item)
215 .collect()
216 }
217
218 fn score_path(
219 &self,
220 path: &str,
221 pattern: &PatternStorage,
222 matcher: &mut nucleo_matcher::Matcher,
223 haystack_buf: &mut Vec<char>,
224 ) -> Option<u32> {
225 let haystack = nucleo_matcher::Utf32Str::new(path, haystack_buf);
226
227 let needle = match pattern {
228 PatternStorage::Ascii(bytes) => nucleo_matcher::Utf32Str::Ascii(bytes),
229 PatternStorage::Unicode(chars) => nucleo_matcher::Utf32Str::Unicode(chars),
230 };
231
232 matcher.fuzzy_match(haystack, needle).map(|s| s as u32)
233 }
234}
235
236pub struct FileIndexCache {
238 cache: Arc<RwLock<Option<Arc<FileIndex>>>>,
239 search_directory: std::path::PathBuf,
240 exclude: Vec<String>,
241 respect_gitignore: bool,
242 threads: usize,
243}
244
245impl FileIndexCache {
246 pub fn new(
247 search_directory: std::path::PathBuf,
248 exclude: impl IntoIterator<Item = String>,
249 respect_gitignore: bool,
250 threads: usize,
251 ) -> Self {
252 Self {
253 cache: Arc::new(RwLock::new(None)),
254 search_directory,
255 exclude: exclude.into_iter().collect(),
256 respect_gitignore,
257 threads,
258 }
259 }
260
261 pub async fn get_or_build(&self) -> anyhow::Result<Arc<FileIndex>> {
263 {
265 let guard = self.cache.read().await;
266 if let Some(index) = guard.as_ref() {
267 if index.last_built.elapsed() < std::time::Duration::from_secs(300) {
269 return Ok(Arc::clone(index));
270 }
271 }
272 }
273
274 let index = Arc::new(FileIndex::build_from_directory(
276 &self.search_directory,
277 &self.exclude,
278 self.respect_gitignore,
279 self.threads,
280 )?);
281
282 {
284 let mut guard = self.cache.write().await;
285 *guard = Some(Arc::clone(&index));
286 }
287 Ok(index)
288 }
289
290 pub fn refresh_background(&self) -> Option<Arc<FileIndex>> {
293 let search_directory = self.search_directory.clone();
295 let exclude = self.exclude.clone();
296 let respect_gitignore = self.respect_gitignore;
297 let threads = self.threads;
298 let cache = self.cache.clone();
299
300 tokio::spawn(async move {
301 match FileIndex::build_from_directory(
302 &search_directory,
303 &exclude,
304 respect_gitignore,
305 threads,
306 ) {
307 Ok(new_index) => {
308 let mut guard = cache.write().await;
309 *guard = Some(Arc::new(new_index));
310 }
311 Err(e) => {
312 tracing::error!("failed to rebuild file index: {e}");
313 }
314 }
315 });
316
317 let guard = self.cache.blocking_read();
319 guard.as_ref().map(Arc::clone)
320 }
321
322 pub fn update_file(&self, path: &str, is_added: bool) {
325 let mut guard = self.cache.blocking_write();
326 let Some(existing) = guard.take() else { return };
327
328 let mut index = Arc::try_unwrap(existing).unwrap_or_else(|arc| (*arc).clone());
329 if is_added {
330 if Path::new(path).is_dir() {
331 index.directories.push(path.to_string());
332 } else {
333 index.files.push(path.to_string());
334 }
335 } else {
336 index.files.retain(|p| p != path);
337 index.directories.retain(|p| p != path);
338 }
339 index.last_built = std::time::Instant::now();
340 *guard = Some(Arc::new(index));
341 }
342
343 pub async fn index_age(&self) -> Option<std::time::Duration> {
345 let guard = self.cache.read().await;
346 guard.as_ref().map(|idx| idx.last_built.elapsed())
347 }
348}
349
350impl Clone for FileIndex {
352 fn clone(&self) -> Self {
353 Self {
354 files: self.files.clone(),
355 directories: self.directories.clone(),
356 last_built: self.last_built,
357 }
358 }
359}
360
361#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
369#[serde(rename_all = "lowercase")]
370pub enum MatchType {
371 File,
372 Directory,
373}
374
375#[derive(Debug, Clone, Serialize, Deserialize)]
376pub struct FileMatch {
377 pub score: u32,
378 pub path: String,
379 pub match_type: MatchType,
380 #[serde(skip_serializing_if = "Option::is_none")]
381 pub indices: Option<Vec<u32>>,
382}
383
384#[derive(Debug)]
386pub struct FileSearchResults {
387 pub matches: Vec<FileMatch>,
388 pub total_match_count: usize,
389}
390
391pub struct FileSearchConfig {
393 pub pattern_text: String,
394 pub limit: NonZero<usize>,
395 pub search_directory: std::path::PathBuf,
396 pub exclude: Vec<String>,
397 pub threads: NonZero<usize>,
398 pub cancel_flag: Arc<AtomicBool>,
399 pub compute_indices: bool,
400 pub respect_gitignore: bool,
401}
402
403pub use vtcode_commons::paths::file_name_from_path;
404
405struct BestMatchesList {
410 matches: BinaryHeap<Reverse<(u32, String, MatchType)>>,
411 limit: usize,
412 matcher: nucleo_matcher::Matcher,
413 haystack_buf: Vec<char>,
414 pattern: PatternStorage,
416}
417
418enum PatternStorage {
420 Ascii(Vec<u8>),
422 Unicode(Vec<char>),
424}
425
426impl BestMatchesList {
427 fn new(limit: usize, pattern_text: &str) -> Self {
428 let pattern = if pattern_text.is_ascii() {
432 PatternStorage::Ascii(pattern_text.to_ascii_lowercase().into_bytes())
433 } else {
434 PatternStorage::Unicode(pattern_text.to_lowercase().chars().collect())
435 };
436
437 Self {
438 matches: BinaryHeap::new(),
439 limit,
440 matcher: nucleo_matcher::Matcher::new(nucleo_matcher::Config::DEFAULT),
441 haystack_buf: Vec::with_capacity(256),
442 pattern,
443 }
444 }
445
446 fn record_match(&mut self, path: &str, match_type: MatchType) -> bool {
451 let haystack = nucleo_matcher::Utf32Str::new(path, &mut self.haystack_buf);
453 let needle = match &self.pattern {
454 PatternStorage::Ascii(bytes) => nucleo_matcher::Utf32Str::Ascii(bytes),
455 PatternStorage::Unicode(chars) => nucleo_matcher::Utf32Str::Unicode(chars),
456 };
457 let Some(score) = self.matcher.fuzzy_match(haystack, needle) else {
458 return false;
459 };
460
461 push_top_match(
462 &mut self.matches,
463 self.limit,
464 score as u32,
465 path.to_string(),
466 match_type,
467 );
468 true
469 }
470}
471
472fn push_top_match(
473 matches: &mut BinaryHeap<Reverse<(u32, String, MatchType)>>,
474 limit: usize,
475 score: u32,
476 path: String,
477 match_type: MatchType,
478) -> bool {
479 if matches.len() < limit {
480 matches.push(Reverse((score, path, match_type)));
481 return true;
482 }
483
484 let Some(min_score) = matches.peek().map(|entry| entry.0.0) else {
485 return false;
486 };
487
488 if score <= min_score {
489 return false;
490 }
491
492 matches.pop();
493 matches.push(Reverse((score, path, match_type)));
494 true
495}
496
497pub async fn run_with_index(
511 config: FileSearchConfig,
512 index_cache: &FileIndexCache,
513) -> anyhow::Result<FileSearchResults> {
514 let limit = config.limit.get();
515 let cancel_flag = &config.cancel_flag;
516 let compute_indices = config.compute_indices;
517
518 let index = index_cache.get_or_build().await?;
520
521 if cancel_flag.load(Ordering::Relaxed) {
523 return Ok(FileSearchResults {
524 matches: Vec::new(),
525 total_match_count: 0,
526 });
527 }
528
529 let matched_paths = index.query(&config.pattern_text, limit, None);
531 let total_match_count = matched_paths.len();
532
533 let matches = matched_paths
535 .into_iter()
536 .map(|(score, path, match_type)| FileMatch {
537 score,
538 path,
539 match_type,
540 indices: if compute_indices {
541 Some(Vec::new())
542 } else {
543 None
544 },
545 })
546 .collect();
547
548 Ok(FileSearchResults {
549 matches,
550 total_match_count,
551 })
552}
553
554pub fn run(config: FileSearchConfig) -> anyhow::Result<FileSearchResults> {
564 let limit = config.limit.get();
565 let search_directory = &config.search_directory;
566 let exclude = &config.exclude;
567 let threads = config.threads.get();
568 let cancel_flag = &config.cancel_flag;
569 let compute_indices = config.compute_indices;
570 let respect_gitignore = config.respect_gitignore;
571
572 let walker = build_parallel_walker(search_directory, exclude, threads, respect_gitignore)?;
573
574 let best_matchers_per_worker: Vec<Arc<Mutex<BestMatchesList>>> = (0..threads)
577 .map(|_| {
578 Arc::new(Mutex::new(BestMatchesList::new(
579 limit,
580 &config.pattern_text,
581 )))
582 })
583 .collect();
584
585 let total_match_count = Arc::new(AtomicUsize::new(0));
586
587 let worker_counter = AtomicUsize::new(0);
590 let worker_count = best_matchers_per_worker.len();
591 walker.run(|| {
592 let worker_id = worker_counter.fetch_add(1, Ordering::Relaxed) % worker_count;
593 let best_list = best_matchers_per_worker[worker_id].clone();
594 let cancel_flag_clone = cancel_flag.clone();
595 let total_match_count_clone = total_match_count.clone();
596
597 Box::new(move |result| {
598 if cancel_flag_clone.load(Ordering::Relaxed) {
600 return ignore::WalkState::Quit;
601 }
602
603 let entry = match result {
604 Ok(e) => e,
605 Err(_) => return ignore::WalkState::Continue,
606 };
607
608 let relative_path = entry
610 .path()
611 .strip_prefix(search_directory)
612 .ok()
613 .and_then(|p| p.to_str());
614
615 let path_to_match = match relative_path {
616 Some(p) if !p.is_empty() => p,
617 _ => return ignore::WalkState::Continue, };
619
620 let match_type = if entry.path().is_dir() {
621 MatchType::Directory
622 } else {
623 MatchType::File
624 };
625
626 {
628 let mut list = best_list.lock();
629 if list.record_match(path_to_match, match_type) {
630 total_match_count_clone.fetch_add(1, Ordering::Relaxed);
631 }
632 }
633
634 ignore::WalkState::Continue
635 })
636 });
637
638 let mut merged_matches = BinaryHeap::with_capacity(limit);
640 for arc in best_matchers_per_worker {
641 let mut list = arc.lock();
642 for Reverse((score, path, match_type)) in std::mem::take(&mut list.matches).into_vec() {
643 push_top_match(&mut merged_matches, limit, score, path, match_type);
644 }
645 }
646
647 let matches = merged_matches
649 .into_sorted_vec()
650 .into_iter()
651 .map(|Reverse((score, path, match_type))| FileMatch {
652 score,
653 path,
654 match_type,
655 indices: if compute_indices {
656 Some(Vec::new())
657 } else {
658 None
659 },
660 })
661 .collect();
662
663 Ok(FileSearchResults {
664 matches,
665 total_match_count: total_match_count.load(Ordering::Relaxed),
666 })
667}