vtcode_core/tools/
file_search.rs

1//! Recursive file search functionality for VTCode
2//!
3//! This module provides utilities for recursively searching files in a project workspace,
4//! with support for glob patterns, exclusions, and content searching.
5
6use anyhow::{Context, Result};
7use glob::Pattern as GlobPattern;
8use ignore::WalkBuilder;
9use nucleo_matcher::pattern::{AtomKind, CaseMatching, Normalization, Pattern as FuzzyPattern};
10use nucleo_matcher::{Matcher, Utf32Str};
11use serde_json::{Value, json};
12use std::collections::HashSet;
13use std::fs::{self, Metadata};
14use std::path::{Path, PathBuf};
15
16/// Configuration for file search operations
17#[derive(Debug, Clone)]
18pub struct FileSearchConfig {
19    /// Maximum number of results to return
20    pub max_results: usize,
21    /// Whether to follow symbolic links
22    pub follow_links: bool,
23    /// Whether to include hidden files
24    pub include_hidden: bool,
25    /// File extensions to include (if empty, include all)
26    pub include_extensions: HashSet<String>,
27    /// File extensions to exclude
28    pub exclude_extensions: HashSet<String>,
29    /// File names/patterns to exclude
30    pub exclude_patterns: Vec<GlobPattern>,
31    /// Maximum file size in bytes (0 = no limit)
32    pub max_file_size: u64,
33}
34
35impl Default for FileSearchConfig {
36    fn default() -> Self {
37        Self {
38            max_results: 1000,
39            follow_links: false,
40            include_hidden: false,
41            include_extensions: HashSet::new(),
42            exclude_extensions: HashSet::new(),
43            exclude_patterns: Vec::new(),
44            max_file_size: 0,
45        }
46    }
47}
48
49/// Result of a file search operation
50#[derive(Debug, Clone)]
51pub struct FileSearchResult {
52    /// Path to the file
53    pub path: PathBuf,
54    /// File name
55    pub name: String,
56    /// File extension
57    pub extension: Option<String>,
58    /// File size in bytes
59    pub size: u64,
60    /// Whether the file is a directory
61    pub is_dir: bool,
62    /// Content matches (if searched for content)
63    pub content_matches: Vec<ContentMatch>,
64}
65
66/// A match found in file content
67#[derive(Debug, Clone)]
68pub struct ContentMatch {
69    /// Line number (1-based)
70    pub line_number: usize,
71    /// Content of the line
72    pub content: String,
73    /// Column where match starts
74    pub column: usize,
75}
76
77/// File searcher for recursive file operations
78pub struct FileSearcher {
79    root: PathBuf,
80    config: FileSearchConfig,
81}
82
83impl FileSearcher {
84    /// Create a new file searcher
85    pub fn new(root: PathBuf, config: FileSearchConfig) -> Self {
86        Self { root, config }
87    }
88
89    /// Create a searcher with default configuration
90    pub fn with_default_config(root: PathBuf) -> Self {
91        Self::new(root, FileSearchConfig::default())
92    }
93
94    fn build_walk_builder(&self) -> WalkBuilder {
95        let mut builder = WalkBuilder::new(&self.root);
96        builder.follow_links(self.config.follow_links);
97        builder.hidden(!self.config.include_hidden);
98        builder.require_git(false);
99        builder.git_ignore(true);
100        builder.git_global(true);
101        builder.git_exclude(true);
102        builder
103    }
104
105    fn relative_path_string(&self, path: &Path) -> String {
106        path.strip_prefix(&self.root)
107            .unwrap_or(path)
108            .to_string_lossy()
109            .into_owned()
110    }
111
112    /// Recursively search for files matching the given pattern
113    pub fn search_files(&self, pattern: Option<&str>) -> Result<Vec<FileSearchResult>> {
114        let mut entries: Vec<(String, FileSearchResult)> = Vec::new();
115        let max_results = self.config.max_results;
116        let compiled_pattern = pattern.and_then(compile_fuzzy_pattern);
117
118        for entry_result in self.build_walk_builder().build() {
119            let entry = match entry_result {
120                Ok(entry) => entry,
121                Err(_) => continue,
122            };
123
124            if entry.depth() == 0 {
125                continue;
126            }
127
128            let file_type = match entry.file_type() {
129                Some(file_type) => file_type,
130                None => continue,
131            };
132
133            let metadata = match entry.metadata() {
134                Ok(meta) => meta,
135                Err(_) => continue,
136            };
137
138            if self.should_exclude_entry(entry.path(), Some(&file_type), &metadata)? {
139                continue;
140            }
141
142            let path = entry.path();
143            let result = FileSearchResult {
144                path: path.to_path_buf(),
145                name: path
146                    .file_name()
147                    .and_then(|n| n.to_str())
148                    .unwrap_or("")
149                    .to_string(),
150                extension: path
151                    .extension()
152                    .and_then(|ext| ext.to_str())
153                    .map(|ext| ext.to_string()),
154                size: metadata.len(),
155                is_dir: file_type.is_dir(),
156                content_matches: Vec::new(),
157            };
158
159            let rel_path = self.relative_path_string(path);
160            entries.push((rel_path, result));
161        }
162
163        if let Some(pattern) = compiled_pattern {
164            let mut matcher = Matcher::new(nucleo_matcher::Config::DEFAULT);
165            let mut buffer = Vec::<char>::new();
166            let mut scored = Vec::new();
167
168            for (rel_path, result) in entries {
169                buffer.clear();
170                let haystack = Utf32Str::new(rel_path.as_str(), &mut buffer);
171                if let Some(score) = pattern.score(haystack, &mut matcher) {
172                    scored.push((score, rel_path, result));
173                }
174            }
175
176            scored.sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.cmp(&b.1)));
177            Ok(scored
178                .into_iter()
179                .take(max_results)
180                .map(|(_, _, result)| result)
181                .collect())
182        } else {
183            entries.sort_by(|a, b| a.0.cmp(&b.0));
184            Ok(entries
185                .into_iter()
186                .take(max_results)
187                .map(|(_, result)| result)
188                .collect())
189        }
190    }
191
192    /// Search for files containing specific content
193    pub fn search_files_with_content(
194        &self,
195        content_pattern: &str,
196        file_pattern: Option<&str>,
197    ) -> Result<Vec<FileSearchResult>> {
198        let mut results = Vec::new();
199        let max_results = self.config.max_results;
200        for entry_result in self.build_walk_builder().build() {
201            if results.len() >= max_results {
202                break;
203            }
204
205            let entry = match entry_result {
206                Ok(entry) => entry,
207                Err(_) => continue,
208            };
209
210            if entry.depth() == 0 {
211                continue;
212            }
213
214            let path = entry.path();
215
216            let file_type = match entry.file_type() {
217                Some(file_type) if file_type.is_file() => file_type,
218                _ => continue,
219            };
220
221            let metadata = match entry.metadata() {
222                Ok(meta) => meta,
223                Err(_) => continue,
224            };
225
226            if self.should_exclude_entry(path, Some(&file_type), &metadata)? {
227                continue;
228            }
229
230            if let Some(pattern) = file_pattern {
231                if !self.path_matches_pattern(path, pattern)? {
232                    continue;
233                }
234            }
235
236            match self.search_content_in_file(path, content_pattern) {
237                Ok(content_matches) => {
238                    if content_matches.is_empty() {
239                        continue;
240                    }
241
242                    let file_result = FileSearchResult {
243                        path: path.to_path_buf(),
244                        name: path
245                            .file_name()
246                            .and_then(|n| n.to_str())
247                            .unwrap_or("")
248                            .to_string(),
249                        extension: path
250                            .extension()
251                            .and_then(|ext| ext.to_str())
252                            .map(|ext| ext.to_string()),
253                        size: metadata.len(),
254                        is_dir: false,
255                        content_matches,
256                    };
257
258                    results.push(file_result);
259                }
260                Err(_) => continue,
261            }
262        }
263
264        Ok(results)
265    }
266
267    /// Find a specific file by name (recursively)
268    pub fn find_file_by_name(&self, file_name: &str) -> Result<Option<PathBuf>> {
269        for entry_result in self.build_walk_builder().build() {
270            let entry = match entry_result {
271                Ok(entry) => entry,
272                Err(_) => continue,
273            };
274
275            if entry.depth() == 0 {
276                continue;
277            }
278
279            let path = entry.path();
280
281            let file_type = match entry.file_type() {
282                Some(file_type) => file_type,
283                None => continue,
284            };
285
286            let metadata = match entry.metadata() {
287                Ok(meta) => meta,
288                Err(_) => continue,
289            };
290
291            if self.should_exclude_entry(path, Some(&file_type), &metadata)? {
292                continue;
293            }
294
295            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
296                if name == file_name {
297                    return Ok(Some(path.to_path_buf()));
298                }
299            }
300        }
301
302        Ok(None)
303    }
304
305    /// Check if a path should be excluded based on configuration
306    fn should_exclude_entry(
307        &self,
308        path: &Path,
309        file_type: Option<&std::fs::FileType>,
310        metadata: &Metadata,
311    ) -> Result<bool> {
312        let path_str = path.to_string_lossy();
313
314        let is_effective_file = metadata.is_file() || file_type.map_or(false, |ft| ft.is_file());
315
316        if let Some(extension) = path.extension().and_then(|ext| ext.to_str()) {
317            let extension_lower = extension.to_lowercase();
318
319            if self.config.exclude_extensions.contains(&extension_lower) {
320                return Ok(true);
321            }
322
323            if !self.config.include_extensions.is_empty()
324                && !self.config.include_extensions.contains(&extension_lower)
325            {
326                return Ok(true);
327            }
328        } else if !self.config.include_extensions.is_empty() && is_effective_file {
329            return Ok(true);
330        }
331
332        for pattern in &self.config.exclude_patterns {
333            if pattern.matches(path_str.as_ref()) {
334                return Ok(true);
335            }
336        }
337
338        if is_effective_file
339            && self.config.max_file_size > 0
340            && metadata.len() > self.config.max_file_size
341        {
342            return Ok(true);
343        }
344
345        Ok(false)
346    }
347
348    /// Check if a path matches a pattern
349    fn path_matches_pattern(&self, path: &Path, pattern: &str) -> Result<bool> {
350        if let Some(compiled) = compile_fuzzy_pattern(pattern) {
351            let mut matcher = Matcher::new(nucleo_matcher::Config::DEFAULT);
352            let mut buffer = Vec::<char>::new();
353            let relative = self.relative_path_string(path);
354            let haystack = Utf32Str::new(relative.as_str(), &mut buffer);
355            Ok(compiled.score(haystack, &mut matcher).is_some())
356        } else {
357            Ok(true)
358        }
359    }
360
361    /// Search for content within a file
362    fn search_content_in_file(&self, path: &Path, pattern: &str) -> Result<Vec<ContentMatch>> {
363        let content = fs::read_to_string(path)
364            .with_context(|| format!("Failed to read file: {}", path.display()))?;
365
366        let mut matches = Vec::new();
367        let pattern_lower = pattern.to_lowercase();
368
369        for (line_num, line) in content.lines().enumerate() {
370            let line_lower = line.to_lowercase();
371            if line_lower.contains(&pattern_lower) {
372                // Find all occurrences in the line
373                let mut start = 0;
374                while let Some(pos) = line_lower[start..].find(&pattern_lower) {
375                    let actual_pos = start + pos;
376                    matches.push(ContentMatch {
377                        line_number: line_num + 1,
378                        content: line.to_string(),
379                        column: actual_pos,
380                    });
381                    start = actual_pos + pattern.len();
382                }
383            }
384        }
385
386        Ok(matches)
387    }
388
389    /// Convert search results to JSON format
390    pub fn results_to_json(results: Vec<FileSearchResult>) -> Value {
391        let json_results: Vec<Value> = results
392            .into_iter()
393            .map(|result| {
394                json!({
395                    "path": result.path.to_string_lossy(),
396                    "name": result.name,
397                    "extension": result.extension,
398                    "size": result.size,
399                    "is_dir": result.is_dir,
400                    "content_matches": result.content_matches.iter().map(|m| json!({
401                        "line_number": m.line_number,
402                        "content": m.content,
403                        "column": m.column,
404                    })).collect::<Vec<Value>>()
405                })
406            })
407            .collect();
408
409        json!({
410            "success": true,
411            "results": json_results,
412            "count": json_results.len()
413        })
414    }
415}
416
417fn compile_fuzzy_pattern(pattern: &str) -> Option<FuzzyPattern> {
418    let trimmed = pattern.trim();
419    if trimmed.is_empty() {
420        None
421    } else {
422        Some(FuzzyPattern::new(
423            trimmed,
424            CaseMatching::Smart,
425            Normalization::Smart,
426            AtomKind::Fuzzy,
427        ))
428    }
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434    use std::path::{Path, PathBuf};
435    use tempfile::TempDir;
436
437    fn collect_relative_paths(results: &[FileSearchResult], root: &Path) -> Vec<PathBuf> {
438        results
439            .iter()
440            .filter_map(|result| result.path.strip_prefix(root).ok())
441            .map(PathBuf::from)
442            .collect()
443    }
444
445    #[test]
446    fn test_file_searcher_creation() {
447        let temp_dir = TempDir::new().unwrap();
448        let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
449        assert_eq!(searcher.root, temp_dir.path());
450    }
451
452    #[test]
453    fn test_find_file_by_name() -> Result<()> {
454        let temp_dir = TempDir::new().unwrap();
455        let test_file = temp_dir.path().join("test.txt");
456        fs::write(&test_file, "test content").unwrap();
457
458        let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
459        let result = searcher.find_file_by_name("test.txt")?;
460
461        assert!(result.is_some());
462        assert_eq!(result.unwrap(), test_file);
463
464        Ok(())
465    }
466
467    #[test]
468    fn test_search_files_without_pattern_returns_sorted_entries() -> Result<()> {
469        let temp_dir = TempDir::new().unwrap();
470
471        fs::write(temp_dir.path().join("b_file.rs"), "content").unwrap();
472        fs::write(temp_dir.path().join("a_file.txt"), "content").unwrap();
473        fs::create_dir(temp_dir.path().join("subdir")).unwrap();
474        fs::write(temp_dir.path().join("subdir").join("nested.txt"), "content").unwrap();
475
476        let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
477        let results = searcher.search_files(None)?;
478
479        let relative = collect_relative_paths(&results, temp_dir.path());
480        let expected = vec![
481            PathBuf::from("a_file.txt"),
482            PathBuf::from("b_file.rs"),
483            PathBuf::from("subdir"),
484            PathBuf::from("subdir/nested.txt"),
485        ];
486
487        assert_eq!(relative, expected);
488
489        Ok(())
490    }
491
492    #[test]
493    fn test_search_files_uses_fuzzy_matching() -> Result<()> {
494        let temp_dir = TempDir::new().unwrap();
495
496        fs::create_dir(temp_dir.path().join("src")).unwrap();
497        fs::write(temp_dir.path().join("src").join("lib.rs"), "content").unwrap();
498        fs::write(temp_dir.path().join("README.md"), "docs").unwrap();
499
500        let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
501        let results = searcher.search_files(Some("srlb"))?;
502
503        let file_paths: Vec<PathBuf> = results
504            .into_iter()
505            .filter(|result| !result.is_dir)
506            .filter_map(|result| {
507                result
508                    .path
509                    .strip_prefix(temp_dir.path())
510                    .ok()
511                    .map(PathBuf::from)
512            })
513            .collect();
514
515        assert!(file_paths.contains(&PathBuf::from("src/lib.rs")));
516        assert!(!file_paths.contains(&PathBuf::from("README.md")));
517
518        Ok(())
519    }
520
521    #[test]
522    fn test_search_files_respects_gitignore() -> Result<()> {
523        let temp_dir = TempDir::new().unwrap();
524
525        fs::write(temp_dir.path().join(".gitignore"), "ignored/\n").unwrap();
526        fs::create_dir(temp_dir.path().join("ignored")).unwrap();
527        fs::write(temp_dir.path().join("ignored").join("skip.txt"), "skip").unwrap();
528        fs::write(temp_dir.path().join("include.txt"), "include").unwrap();
529
530        let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
531        let results = searcher.search_files(None)?;
532
533        let relative = collect_relative_paths(&results, temp_dir.path());
534
535        assert!(relative.contains(&PathBuf::from("include.txt")));
536        assert!(!relative.contains(&PathBuf::from("ignored/skip.txt")));
537
538        Ok(())
539    }
540}