Skip to main content

vtcode_file_search/
lib.rs

1//! Fast fuzzy file search library for VT Code.
2//!
3//! Uses the `ignore` crate (same as ripgrep) for parallel directory traversal
4//! and `nucleo-matcher` for fuzzy matching.
5//!
6//! # Example
7//!
8//! ```ignore
9//! use std::num::NonZero;
10//! use std::path::Path;
11//! use std::sync::Arc;
12//! use std::sync::atomic::AtomicBool;
13//! use vtcode_file_search::run;
14//!
15//! let results = run(
16//!     "main",
17//!     NonZero::new(100).unwrap(),
18//!     Path::new("."),
19//!     vec![],
20//!     NonZero::new(4).unwrap(),
21//!     Arc::new(AtomicBool::new(false)),
22//!     false,
23//!     true,
24//! )?;
25//!
26//! for m in results.matches {
27//!     println!("{}: {}", m.path, m.score);
28//! }
29//! # Ok::<(), anyhow::Error>(())
30//! ```
31
32use parking_lot::Mutex;
33use serde::{Deserialize, Serialize};
34use std::cmp::Reverse;
35use std::collections::BinaryHeap;
36use std::num::NonZero;
37use std::path::Path;
38use std::sync::Arc;
39use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
40use tokio::sync::RwLock;
41
42/// Pre-computed file index for instant queries.
43///
44/// This index is built in the background and cached to avoid
45/// repeated directory traversals on every search.
46pub struct FileIndex {
47    /// All file paths in the workspace
48    files: Vec<String>,
49    /// All directory paths in the workspace
50    directories: Vec<String>,
51    /// When this index was last built
52    last_built: std::time::Instant,
53}
54
55/// Build a parallel walker with the given configuration.
56fn build_parallel_walker(
57    search_directory: &Path,
58    exclude: &[String],
59    threads: usize,
60    respect_gitignore: bool,
61) -> anyhow::Result<ignore::WalkParallel> {
62    let mut walk_builder = ignore::WalkBuilder::new(search_directory);
63    walk_builder
64        .threads(threads)
65        .hidden(false)
66        .follow_links(true)
67        .require_git(false);
68
69    if !respect_gitignore {
70        walk_builder
71            .git_ignore(false)
72            .git_global(false)
73            .git_exclude(false)
74            .ignore(false)
75            .parents(false);
76    }
77
78    if !exclude.is_empty() {
79        let mut override_builder = ignore::overrides::OverrideBuilder::new(search_directory);
80        for exclude_pattern in exclude {
81            let pattern = format!("!{}", exclude_pattern);
82            override_builder.add(&pattern)?;
83        }
84        walk_builder.overrides(override_builder.build()?);
85    }
86
87    Ok(walk_builder.build_parallel())
88}
89
90impl FileIndex {
91    /// Build a file index by traversing the directory tree.
92    /// This is expensive but only done once.
93    fn build_from_directory(
94        search_directory: &Path,
95        exclude: &[String],
96        respect_gitignore: bool,
97        threads: usize,
98    ) -> anyhow::Result<Self> {
99        let walker = build_parallel_walker(search_directory, exclude, threads, respect_gitignore)?;
100
101        // Collect all files and directories
102        let files_arc = Arc::new(Mutex::new(Vec::new()));
103        let dirs_arc = Arc::new(Mutex::new(Vec::new()));
104
105        walker.run(|| {
106            let files_clone = files_arc.clone();
107            let dirs_clone = dirs_arc.clone();
108            let search_dir = search_directory.to_path_buf();
109
110            Box::new(move |result| {
111                let entry = match result {
112                    Ok(e) => e,
113                    Err(_) => return ignore::WalkState::Continue,
114                };
115
116                // Make path relative to search directory
117                if let Some(rel_path) = entry
118                    .path()
119                    .strip_prefix(&search_dir)
120                    .ok()
121                    .and_then(|p| p.to_str())
122                    && !rel_path.is_empty()
123                {
124                    if entry.path().is_dir() {
125                        dirs_clone.lock().push(rel_path.to_string());
126                    } else {
127                        files_clone.lock().push(rel_path.to_string());
128                    }
129                }
130
131                ignore::WalkState::Continue
132            })
133        });
134
135        let files = Arc::try_unwrap(files_arc)
136            .map_err(|arc| {
137                anyhow::anyhow!(
138                    "failed to unwrap files arc, {} references remain",
139                    Arc::strong_count(&arc)
140                )
141            })?
142            .into_inner();
143        let directories = Arc::try_unwrap(dirs_arc)
144            .map_err(|arc| {
145                anyhow::anyhow!(
146                    "failed to unwrap dirs arc, {} references remain",
147                    Arc::strong_count(&arc)
148                )
149            })?
150            .into_inner();
151
152        Ok(Self {
153            files,
154            directories,
155            last_built: std::time::Instant::now(),
156        })
157    }
158
159    /// Query the index for matching paths.
160    /// Much faster than re-traversing the filesystem.
161    fn query(
162        &self,
163        pattern_text: &str,
164        limit: usize,
165        match_type_filter: Option<MatchType>,
166    ) -> Vec<(u32, String, MatchType)> {
167        let mut results = BinaryHeap::with_capacity(limit);
168
169        // Normalize pattern to lowercase to work around a nucleo-matcher bug:
170        // its prefilter only does case-insensitive search for lowercase needle
171        // chars, not uppercase. See https://github.com/openai/codex/pull/15772.
172        let pattern_storage = if pattern_text.is_ascii() {
173            PatternStorage::Ascii(pattern_text.to_ascii_lowercase().into_bytes())
174        } else {
175            PatternStorage::Unicode(pattern_text.to_lowercase().chars().collect())
176        };
177
178        // Reuse single matcher across all queries (mem-reuse-collections)
179        let mut matcher = nucleo_matcher::Matcher::new(nucleo_matcher::Config::DEFAULT);
180        let mut haystack_buf = Vec::with_capacity(256);
181
182        // Iterate over files
183        if match_type_filter.is_none_or(|t| t == MatchType::File) {
184            for path in &self.files {
185                if let Some(score) =
186                    self.score_path(path, &pattern_storage, &mut matcher, &mut haystack_buf)
187                {
188                    push_top_match(&mut results, limit, score, path.clone(), MatchType::File);
189                }
190            }
191        }
192
193        // Iterate over directories
194        if match_type_filter.is_none_or(|t| t == MatchType::Directory) {
195            for path in &self.directories {
196                if let Some(score) =
197                    self.score_path(path, &pattern_storage, &mut matcher, &mut haystack_buf)
198                {
199                    push_top_match(
200                        &mut results,
201                        limit,
202                        score,
203                        path.clone(),
204                        MatchType::Directory,
205                    );
206                }
207            }
208        }
209
210        results
211            .into_sorted_vec()
212            .into_iter()
213            .map(|Reverse(item)| item)
214            .collect()
215    }
216
217    fn score_path(
218        &self,
219        path: &str,
220        pattern: &PatternStorage,
221        matcher: &mut nucleo_matcher::Matcher,
222        haystack_buf: &mut Vec<char>,
223    ) -> Option<u32> {
224        let haystack = nucleo_matcher::Utf32Str::new(path, haystack_buf);
225
226        let needle = match pattern {
227            PatternStorage::Ascii(bytes) => nucleo_matcher::Utf32Str::Ascii(bytes),
228            PatternStorage::Unicode(chars) => nucleo_matcher::Utf32Str::Unicode(chars),
229        };
230
231        matcher.fuzzy_match(haystack, needle).map(|s| s as u32)
232    }
233}
234
235/// A cached file index that can be shared across searches.
236pub struct FileIndexCache {
237    cache: Arc<RwLock<Option<Arc<FileIndex>>>>,
238    search_directory: std::path::PathBuf,
239    exclude: Vec<String>,
240    respect_gitignore: bool,
241    threads: usize,
242}
243
244impl FileIndexCache {
245    pub fn new(
246        search_directory: std::path::PathBuf,
247        exclude: impl IntoIterator<Item = String>,
248        respect_gitignore: bool,
249        threads: usize,
250    ) -> Self {
251        Self {
252            cache: Arc::new(RwLock::new(None)),
253            search_directory,
254            exclude: exclude.into_iter().collect(),
255            respect_gitignore,
256            threads,
257        }
258    }
259
260    /// Get or build the file index.
261    pub async fn get_or_build(&self) -> anyhow::Result<Arc<FileIndex>> {
262        // Check if we have a cached index
263        {
264            let guard = self.cache.read().await;
265            if let Some(index) = guard.as_ref() {
266                // Check if index is stale (older than 5 minutes)
267                if index.last_built.elapsed() < std::time::Duration::from_secs(300) {
268                    return Ok(Arc::clone(index));
269                }
270            }
271        }
272
273        // Build a new index
274        let index = Arc::new(FileIndex::build_from_directory(
275            &self.search_directory,
276            &self.exclude,
277            self.respect_gitignore,
278            self.threads,
279        )?);
280
281        // Cache and return
282        {
283            let mut guard = self.cache.write().await;
284            *guard = Some(Arc::clone(&index));
285        }
286        Ok(index)
287    }
288
289    /// Force refresh the index in the background.
290    /// Returns the old index immediately while rebuilding happens asynchronously.
291    pub fn refresh_background(&self) -> Option<Arc<FileIndex>> {
292        // Build new index asynchronously
293        let search_directory = self.search_directory.clone();
294        let exclude = self.exclude.clone();
295        let respect_gitignore = self.respect_gitignore;
296        let threads = self.threads;
297        let cache = self.cache.clone();
298
299        tokio::spawn(async move {
300            match FileIndex::build_from_directory(
301                &search_directory,
302                &exclude,
303                respect_gitignore,
304                threads,
305            ) {
306                Ok(new_index) => {
307                    let mut guard = cache.write().await;
308                    *guard = Some(Arc::new(new_index));
309                }
310                Err(e) => {
311                    tracing::error!("failed to rebuild file index: {e}");
312                }
313            }
314        });
315
316        // Return old index if available
317        let guard = self.cache.blocking_read();
318        guard.as_ref().map(Arc::clone)
319    }
320
321    /// Incrementally update the index when a file change is detected.
322    /// This is faster than a full rebuild for single file changes.
323    pub fn update_file(&self, path: &str, is_added: bool) {
324        let mut guard = self.cache.blocking_write();
325        let Some(existing) = guard.take() else { return };
326
327        let mut index = Arc::try_unwrap(existing).unwrap_or_else(|arc| (*arc).clone());
328        if is_added {
329            if Path::new(path).is_dir() {
330                index.directories.push(path.to_string());
331            } else {
332                index.files.push(path.to_string());
333            }
334        } else {
335            index.files.retain(|p| p != path);
336            index.directories.retain(|p| p != path);
337        }
338        index.last_built = std::time::Instant::now();
339        *guard = Some(Arc::new(index));
340    }
341
342    /// Get the age of the current index.
343    pub async fn index_age(&self) -> Option<std::time::Duration> {
344        let guard = self.cache.read().await;
345        guard.as_ref().map(|idx| idx.last_built.elapsed())
346    }
347}
348
349// Make FileIndex cloneable
350impl Clone for FileIndex {
351    fn clone(&self) -> Self {
352        Self {
353            files: self.files.clone(),
354            directories: self.directories.clone(),
355            last_built: self.last_built,
356        }
357    }
358}
359
360/// A single file match result.
361///
362/// Fields:
363/// - `score`: Relevance score from fuzzy matching (higher is better)
364/// - `path`: Path relative to the search directory
365/// - `match_type`: Whether the match is a file or directory
366/// - `indices`: Optional character positions for highlighting matched characters
367#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
368#[serde(rename_all = "lowercase")]
369pub enum MatchType {
370    File,
371    Directory,
372}
373
374#[derive(Debug, Clone, Serialize, Deserialize)]
375pub struct FileMatch {
376    pub score: u32,
377    pub path: String,
378    pub match_type: MatchType,
379    #[serde(skip_serializing_if = "Option::is_none")]
380    pub indices: Option<Vec<u32>>,
381}
382
383/// Complete search results with total match count.
384#[derive(Debug)]
385pub struct FileSearchResults {
386    pub matches: Vec<FileMatch>,
387    pub total_match_count: usize,
388}
389
390/// Configuration for file search operations.
391pub struct FileSearchConfig {
392    pub pattern_text: String,
393    pub limit: NonZero<usize>,
394    pub search_directory: std::path::PathBuf,
395    pub exclude: Vec<String>,
396    pub threads: NonZero<usize>,
397    pub cancel_flag: Arc<AtomicBool>,
398    pub compute_indices: bool,
399    pub respect_gitignore: bool,
400}
401
402pub use vtcode_commons::paths::file_name_from_path;
403
404/// Best matches list per worker thread (lock-free collection).
405///
406/// Each worker thread gets its own instance to avoid locking during
407/// directory traversal. Results are merged at the end.
408struct BestMatchesList {
409    matches: BinaryHeap<Reverse<(u32, String, MatchType)>>,
410    limit: usize,
411    matcher: nucleo_matcher::Matcher,
412    haystack_buf: Vec<char>,
413    /// Pre-computed pattern - avoids per-match UTF-32 conversion
414    pattern: PatternStorage,
415}
416
417/// Stores a pattern in the optimal form for Utf32Str creation.
418enum PatternStorage {
419    /// ASCII pattern - can be used directly with Utf32Str::Ascii
420    Ascii(Vec<u8>),
421    /// Unicode pattern - stored as chars for Utf32Str::Unicode
422    Unicode(Vec<char>),
423}
424
425impl BestMatchesList {
426    fn new(limit: usize, pattern_text: &str) -> Self {
427        // Normalize pattern to lowercase to work around a nucleo-matcher bug:
428        // its prefilter only does case-insensitive search for lowercase needle
429        // chars, not uppercase. See https://github.com/openai/codex/pull/15772.
430        let pattern = if pattern_text.is_ascii() {
431            PatternStorage::Ascii(pattern_text.to_ascii_lowercase().into_bytes())
432        } else {
433            PatternStorage::Unicode(pattern_text.to_lowercase().chars().collect())
434        };
435
436        Self {
437            matches: BinaryHeap::new(),
438            limit,
439            matcher: nucleo_matcher::Matcher::new(nucleo_matcher::Config::DEFAULT),
440            haystack_buf: Vec::with_capacity(256),
441            pattern,
442        }
443    }
444
445    /// Record a matching path while preserving the worker-local top-K heap.
446    ///
447    /// Returns true when the path matches the search pattern, even if it
448    /// does not survive the top-K cutoff.
449    fn record_match(&mut self, path: &str, match_type: MatchType) -> bool {
450        // Use pre-computed pattern directly - zero allocation per match
451        let haystack = nucleo_matcher::Utf32Str::new(path, &mut self.haystack_buf);
452        let needle = match &self.pattern {
453            PatternStorage::Ascii(bytes) => nucleo_matcher::Utf32Str::Ascii(bytes),
454            PatternStorage::Unicode(chars) => nucleo_matcher::Utf32Str::Unicode(chars),
455        };
456        let Some(score) = self.matcher.fuzzy_match(haystack, needle) else {
457            return false;
458        };
459
460        push_top_match(
461            &mut self.matches,
462            self.limit,
463            score as u32,
464            path.to_string(),
465            match_type,
466        );
467        true
468    }
469}
470
471fn push_top_match(
472    matches: &mut BinaryHeap<Reverse<(u32, String, MatchType)>>,
473    limit: usize,
474    score: u32,
475    path: String,
476    match_type: MatchType,
477) -> bool {
478    if matches.len() < limit {
479        matches.push(Reverse((score, path, match_type)));
480        return true;
481    }
482
483    let Some(min_score) = matches.peek().map(|entry| entry.0.0) else {
484        return false;
485    };
486
487    if score <= min_score {
488        return false;
489    }
490
491    matches.pop();
492    matches.push(Reverse((score, path, match_type)));
493    true
494}
495
496/// Run fuzzy file search using a pre-computed file index.
497///
498/// This is much faster than `run()` for repeated queries on the same
499/// directory because it avoids re-traversing the filesystem.
500///
501/// # Arguments
502///
503/// * `config` - File search configuration
504/// * `index_cache` - Shared cache for the pre-computed file index
505///
506/// # Returns
507///
508/// FileSearchResults containing matched files and total match count.
509pub async fn run_with_index(
510    config: FileSearchConfig,
511    index_cache: &FileIndexCache,
512) -> anyhow::Result<FileSearchResults> {
513    let limit = config.limit.get();
514    let cancel_flag = &config.cancel_flag;
515    let compute_indices = config.compute_indices;
516
517    // Get or build the file index
518    let index = index_cache.get_or_build().await?;
519
520    // Check cancellation
521    if cancel_flag.load(Ordering::Relaxed) {
522        return Ok(FileSearchResults {
523            matches: Vec::new(),
524            total_match_count: 0,
525        });
526    }
527
528    // Query the index
529    let matched_paths = index.query(&config.pattern_text, limit, None);
530    let total_match_count = matched_paths.len();
531
532    // Build final results
533    let matches = matched_paths
534        .into_iter()
535        .map(|(score, path, match_type)| FileMatch {
536            score,
537            path,
538            match_type,
539            indices: if compute_indices {
540                Some(Vec::new())
541            } else {
542                None
543            },
544        })
545        .collect();
546
547    Ok(FileSearchResults {
548        matches,
549        total_match_count,
550    })
551}
552
553/// Run fuzzy file search with parallel traversal.
554///
555/// # Arguments
556///
557/// * `config` - File search configuration containing all search parameters
558///
559/// # Returns
560///
561/// FileSearchResults containing matched files and total match count.
562pub fn run(config: FileSearchConfig) -> anyhow::Result<FileSearchResults> {
563    let limit = config.limit.get();
564    let search_directory = &config.search_directory;
565    let exclude = &config.exclude;
566    let threads = config.threads.get();
567    let cancel_flag = &config.cancel_flag;
568    let compute_indices = config.compute_indices;
569    let respect_gitignore = config.respect_gitignore;
570
571    let walker = build_parallel_walker(search_directory, exclude, threads, respect_gitignore)?;
572
573    // Create per-worker result collection using Arc + Mutex for thread safety.
574    // Each worker gets exactly one instance - no sharing between workers.
575    let best_matchers_per_worker: Vec<Arc<Mutex<BestMatchesList>>> = (0..threads)
576        .map(|_| {
577            Arc::new(Mutex::new(BestMatchesList::new(
578                limit,
579                &config.pattern_text,
580            )))
581        })
582        .collect();
583
584    let total_match_count = Arc::new(AtomicUsize::new(0));
585
586    // Run parallel traversal - the closure is called once per worker thread.
587    // We use a local counter to assign each worker a unique index.
588    let worker_counter = AtomicUsize::new(0);
589    let worker_count = best_matchers_per_worker.len();
590    walker.run(|| {
591        let worker_id = worker_counter.fetch_add(1, Ordering::Relaxed) % worker_count;
592        let best_list = best_matchers_per_worker[worker_id].clone();
593        let cancel_flag_clone = cancel_flag.clone();
594        let total_match_count_clone = total_match_count.clone();
595
596        Box::new(move |result| {
597            // Check cancellation flag periodically
598            if cancel_flag_clone.load(Ordering::Relaxed) {
599                return ignore::WalkState::Quit;
600            }
601
602            let entry = match result {
603                Ok(e) => e,
604                Err(_) => return ignore::WalkState::Continue,
605            };
606
607            // Make path relative to search directory
608            let relative_path = entry
609                .path()
610                .strip_prefix(search_directory)
611                .ok()
612                .and_then(|p| p.to_str());
613
614            let path_to_match = match relative_path {
615                Some(p) if !p.is_empty() => p,
616                _ => return ignore::WalkState::Continue, // Skip root and non-relative paths
617            };
618
619            let match_type = if entry.path().is_dir() {
620                MatchType::Directory
621            } else {
622                MatchType::File
623            };
624
625            // Try to add to results - no contention with other workers
626            {
627                let mut list = best_list.lock();
628                if list.record_match(path_to_match, match_type) {
629                    total_match_count_clone.fetch_add(1, Ordering::Relaxed);
630                }
631            }
632
633            ignore::WalkState::Continue
634        })
635    });
636
637    // Merge worker-local top-K heaps into one final top-K heap.
638    let mut merged_matches = BinaryHeap::with_capacity(limit);
639    for arc in best_matchers_per_worker {
640        let mut list = arc.lock();
641        for Reverse((score, path, match_type)) in std::mem::take(&mut list.matches).into_vec() {
642            push_top_match(&mut merged_matches, limit, score, path, match_type);
643        }
644    }
645
646    // Build final results
647    let matches = merged_matches
648        .into_sorted_vec()
649        .into_iter()
650        .map(|Reverse((score, path, match_type))| FileMatch {
651            score,
652            path,
653            match_type,
654            indices: if compute_indices {
655                Some(Vec::new())
656            } else {
657                None
658            },
659        })
660        .collect();
661
662    Ok(FileSearchResults {
663        matches,
664        total_match_count: total_match_count.load(Ordering::Relaxed),
665    })
666}
667
668#[cfg(test)]
669mod tests {
670    use super::*;
671    use std::fs;
672    use tempfile::TempDir;
673
674    #[test]
675    fn test_file_name_from_path() {
676        assert_eq!(file_name_from_path("src/main.rs"), "main.rs");
677        assert_eq!(file_name_from_path("Cargo.toml"), "Cargo.toml");
678        assert_eq!(file_name_from_path("/absolute/path/file.txt"), "file.txt");
679        assert_eq!(file_name_from_path("file.txt"), "file.txt");
680        assert_eq!(file_name_from_path(""), "");
681    }
682
683    #[test]
684    fn test_run_search() -> anyhow::Result<()> {
685        let temp = TempDir::new()?;
686        fs::write(temp.path().join("hello.rs"), "fn main() {}")?;
687        fs::write(temp.path().join("world.txt"), "world")?;
688
689        let results = run(FileSearchConfig {
690            pattern_text: "hello".to_string(),
691            limit: NonZero::new(10).unwrap(),
692            search_directory: temp.path().to_path_buf(),
693            exclude: vec![],
694            threads: NonZero::new(1).unwrap(),
695            cancel_flag: Arc::new(AtomicBool::new(false)),
696            compute_indices: false,
697            respect_gitignore: false,
698        })?;
699
700        assert_eq!(results.matches.len(), 1);
701        assert!(results.matches[0].path.contains("hello"));
702        assert_eq!(results.matches[0].match_type, MatchType::File);
703
704        Ok(())
705    }
706
707    #[test]
708    fn test_multiple_matches() -> anyhow::Result<()> {
709        let temp = TempDir::new()?;
710        fs::write(temp.path().join("test1.rs"), "")?;
711        fs::write(temp.path().join("test2.rs"), "")?;
712        fs::write(temp.path().join("test3.rs"), "")?;
713        fs::write(temp.path().join("other.txt"), "")?;
714
715        let results = run(FileSearchConfig {
716            pattern_text: "test".to_string(),
717            limit: NonZero::new(10).unwrap(),
718            search_directory: temp.path().to_path_buf(),
719            exclude: vec![],
720            threads: NonZero::new(2).unwrap(),
721            cancel_flag: Arc::new(AtomicBool::new(false)),
722            compute_indices: false,
723            respect_gitignore: false,
724        })?;
725
726        assert_eq!(results.matches.len(), 3);
727        assert!(results.matches.iter().all(|m| m.path.contains("test")));
728        assert!(
729            results
730                .matches
731                .iter()
732                .all(|m| matches!(m.match_type, MatchType::File))
733        );
734
735        Ok(())
736    }
737
738    #[test]
739    fn test_limit_is_respected_across_workers() -> anyhow::Result<()> {
740        let temp = TempDir::new()?;
741        for name in ["alpha.rs", "alphabet.rs", "alphanumeric.rs", "alpaca.rs"] {
742            fs::write(temp.path().join(name), "")?;
743        }
744
745        let results = run(FileSearchConfig {
746            pattern_text: "alpha".to_string(),
747            limit: NonZero::new(2).unwrap(),
748            search_directory: temp.path().to_path_buf(),
749            exclude: vec![],
750            threads: NonZero::new(4).unwrap(),
751            cancel_flag: Arc::new(AtomicBool::new(false)),
752            compute_indices: false,
753            respect_gitignore: false,
754        })?;
755
756        assert_eq!(results.matches.len(), 2);
757        assert!(
758            results
759                .matches
760                .windows(2)
761                .all(|window| window[0].score >= window[1].score)
762        );
763
764        Ok(())
765    }
766
767    #[test]
768    fn test_exclusion_patterns() -> anyhow::Result<()> {
769        let temp = TempDir::new()?;
770        fs::write(temp.path().join("keep.rs"), "")?;
771        fs::create_dir(temp.path().join("target"))?;
772        fs::write(temp.path().join("target/ignore.rs"), "")?;
773
774        let results = run(FileSearchConfig {
775            pattern_text: "rs".to_string(),
776            limit: NonZero::new(10).unwrap(),
777            search_directory: temp.path().to_path_buf(),
778            exclude: vec!["target/**".to_string()],
779            threads: NonZero::new(2).unwrap(),
780            cancel_flag: Arc::new(AtomicBool::new(false)),
781            compute_indices: false,
782            respect_gitignore: false,
783        })?;
784
785        assert_eq!(results.matches.len(), 1);
786        assert!(results.matches[0].path.contains("keep.rs"));
787        assert_eq!(results.matches[0].match_type, MatchType::File);
788
789        Ok(())
790    }
791
792    #[test]
793    fn test_cancellation() -> anyhow::Result<()> {
794        let temp = TempDir::new()?;
795        for i in 0..10 {
796            fs::write(temp.path().join(format!("file{}.rs", i)), "")?;
797        }
798
799        let cancel_flag = Arc::new(AtomicBool::new(true));
800        let results = run(FileSearchConfig {
801            pattern_text: "file".to_string(),
802            limit: NonZero::new(10).unwrap(),
803            search_directory: temp.path().to_path_buf(),
804            exclude: vec![],
805            threads: NonZero::new(1).unwrap(),
806            cancel_flag,
807            compute_indices: false,
808            respect_gitignore: false,
809        })?;
810
811        // Should return early due to cancellation
812        assert!(results.matches.is_empty());
813
814        Ok(())
815    }
816
817    #[test]
818    fn test_directory_matches_are_returned() -> anyhow::Result<()> {
819        let temp = TempDir::new()?;
820        fs::create_dir_all(temp.path().join("docs/guides"))?;
821        fs::write(temp.path().join("docs/guides/intro.md"), "intro")?;
822        fs::write(temp.path().join("docs/readme.md"), "readme")?;
823
824        let results = run(FileSearchConfig {
825            pattern_text: "guides".to_string(),
826            limit: NonZero::new(10).unwrap(),
827            search_directory: temp.path().to_path_buf(),
828            exclude: vec![],
829            threads: NonZero::new(2).unwrap(),
830            cancel_flag: Arc::new(AtomicBool::new(false)),
831            compute_indices: false,
832            respect_gitignore: false,
833        })?;
834
835        assert!(results.matches.iter().any(
836            |m| m.path.ends_with("docs/guides") && matches!(m.match_type, MatchType::Directory)
837        ));
838
839        Ok(())
840    }
841
842    #[test]
843    fn test_file_index_cache_basic() -> anyhow::Result<()> {
844        let temp = TempDir::new()?;
845        fs::write(temp.path().join("main.rs"), "")?;
846        fs::write(temp.path().join("lib.rs"), "")?;
847        fs::create_dir(temp.path().join("src"))?;
848
849        let cache = FileIndexCache::new(temp.path().to_path_buf(), vec![], false, 2);
850
851        let rt = tokio::runtime::Builder::new_current_thread()
852            .enable_all()
853            .build()?;
854
855        // First call should build the index
856        let index = rt.block_on(cache.get_or_build())?;
857        assert_eq!(index.files.len(), 2);
858        assert_eq!(index.directories.len(), 1);
859
860        // Second call should return cached index
861        let index2 = rt.block_on(cache.get_or_build())?;
862        assert_eq!(index2.files.len(), 2);
863
864        Ok(())
865    }
866
867    #[test]
868    fn test_file_index_incremental_update() -> anyhow::Result<()> {
869        let temp = TempDir::new()?;
870        fs::write(temp.path().join("main.rs"), "")?;
871
872        let cache = FileIndexCache::new(temp.path().to_path_buf(), vec![], false, 1);
873
874        let rt = tokio::runtime::Builder::new_current_thread()
875            .enable_all()
876            .build()?;
877
878        let _ = rt.block_on(cache.get_or_build())?;
879
880        // Add a new file
881        fs::write(temp.path().join("new.rs"), "")?;
882        let new_path = temp.path().join("new.rs").to_string_lossy().to_string();
883        cache.update_file(&new_path, true);
884
885        // Verify index was updated
886        let index = rt.block_on(cache.get_or_build())?;
887        assert!(index.files.iter().any(|p| p.contains("new.rs")));
888
889        Ok(())
890    }
891
892    #[test]
893    fn test_file_index_query() -> anyhow::Result<()> {
894        let temp = TempDir::new()?;
895        fs::write(temp.path().join("hello_world.rs"), "")?;
896        fs::write(temp.path().join("hello_test.rs"), "")?;
897        fs::write(temp.path().join("other.txt"), "")?;
898
899        let cache = FileIndexCache::new(temp.path().to_path_buf(), vec![], false, 1);
900
901        let rt = tokio::runtime::Builder::new_current_thread()
902            .enable_all()
903            .build()?;
904
905        let index = rt.block_on(cache.get_or_build())?;
906
907        // Query for "hello" should match both hello files
908        let results = index.query("hello", 10, None);
909        assert_eq!(results.len(), 2);
910        assert!(results.iter().all(|(_, path, _)| path.contains("hello")));
911
912        // Query with limit
913        let results = index.query("hello", 1, None);
914        assert_eq!(results.len(), 1);
915
916        // Query for non-existent pattern
917        let results = index.query("nonexistent", 10, None);
918        assert!(results.is_empty());
919
920        Ok(())
921    }
922
923    #[test]
924    fn test_run_with_index() -> anyhow::Result<()> {
925        let temp = TempDir::new()?;
926        fs::write(temp.path().join("main.rs"), "fn main() {}")?;
927        fs::write(temp.path().join("lib.rs"), "pub fn lib() {}")?;
928
929        let cache = FileIndexCache::new(temp.path().to_path_buf(), vec![], false, 1);
930
931        let rt = tokio::runtime::Builder::new_current_thread()
932            .enable_all()
933            .build()?;
934
935        let config = FileSearchConfig {
936            pattern_text: "main".to_string(),
937            limit: NonZero::new(10).unwrap(),
938            search_directory: temp.path().to_path_buf(),
939            exclude: vec![],
940            threads: NonZero::new(1).unwrap(),
941            cancel_flag: Arc::new(AtomicBool::new(false)),
942            compute_indices: false,
943            respect_gitignore: false,
944        };
945
946        let results = rt.block_on(run_with_index(config, &cache))?;
947        assert_eq!(results.matches.len(), 1);
948        assert!(results.matches[0].path.contains("main.rs"));
949
950        Ok(())
951    }
952}