vtcode_core/tools/
file_search.rs

1//! Recursive file search functionality for VTCode
2//!
3//! This module provides utilities for recursively searching files in a project workspace,
4//! with support for glob patterns, exclusions, and content searching.
5
6use anyhow::{Context, Result};
7use glob::Pattern;
8use serde_json::{Value, json};
9use std::collections::HashSet;
10use std::fs;
11use std::path::{Path, PathBuf};
12use walkdir::WalkDir;
13
14/// Configuration for file search operations
15#[derive(Debug, Clone)]
16pub struct FileSearchConfig {
17    /// Maximum number of results to return
18    pub max_results: usize,
19    /// Whether to follow symbolic links
20    pub follow_links: bool,
21    /// Whether to include hidden files
22    pub include_hidden: bool,
23    /// File extensions to include (if empty, include all)
24    pub include_extensions: HashSet<String>,
25    /// File extensions to exclude
26    pub exclude_extensions: HashSet<String>,
27    /// File names/patterns to exclude
28    pub exclude_patterns: Vec<Pattern>,
29    /// Maximum file size in bytes (0 = no limit)
30    pub max_file_size: u64,
31}
32
33impl Default for FileSearchConfig {
34    fn default() -> Self {
35        Self {
36            max_results: 1000,
37            follow_links: false,
38            include_hidden: false,
39            include_extensions: HashSet::new(),
40            exclude_extensions: HashSet::new(),
41            exclude_patterns: Vec::new(),
42            max_file_size: 0,
43        }
44    }
45}
46
47/// Result of a file search operation
48#[derive(Debug, Clone)]
49pub struct FileSearchResult {
50    /// Path to the file
51    pub path: PathBuf,
52    /// File name
53    pub name: String,
54    /// File extension
55    pub extension: Option<String>,
56    /// File size in bytes
57    pub size: u64,
58    /// Whether the file is a directory
59    pub is_dir: bool,
60    /// Content matches (if searched for content)
61    pub content_matches: Vec<ContentMatch>,
62}
63
64/// A match found in file content
65#[derive(Debug, Clone)]
66pub struct ContentMatch {
67    /// Line number (1-based)
68    pub line_number: usize,
69    /// Content of the line
70    pub content: String,
71    /// Column where match starts
72    pub column: usize,
73}
74
75/// File searcher for recursive file operations
76pub struct FileSearcher {
77    root: PathBuf,
78    config: FileSearchConfig,
79}
80
81impl FileSearcher {
82    /// Create a new file searcher
83    pub fn new(root: PathBuf, config: FileSearchConfig) -> Self {
84        Self { root, config }
85    }
86
87    /// Create a searcher with default configuration
88    pub fn with_default_config(root: PathBuf) -> Self {
89        Self::new(root, FileSearchConfig::default())
90    }
91
92    /// Recursively search for files matching the given pattern
93    pub fn search_files(&self, pattern: Option<&str>) -> Result<Vec<FileSearchResult>> {
94        let mut results = Vec::new();
95        let max_results = self.config.max_results;
96
97        for entry in WalkDir::new(&self.root)
98            .follow_links(self.config.follow_links)
99            .into_iter()
100            .filter_map(|e| e.ok())
101        {
102            if results.len() >= max_results {
103                break;
104            }
105
106            let path = entry.path();
107
108            // Skip if should be excluded
109            if self.should_exclude_path(path)? {
110                continue;
111            }
112
113            // Check if path matches pattern (if pattern is provided)
114            if let Some(pattern_str) = pattern {
115                if !pattern_str.is_empty() && !self.path_matches_pattern(path, pattern_str)? {
116                    continue;
117                }
118            }
119
120            let metadata = match entry.metadata() {
121                Ok(meta) => meta,
122                Err(_) => continue, // Skip files we can't read metadata for
123            };
124
125            let file_result = FileSearchResult {
126                path: path.to_path_buf(),
127                name: path
128                    .file_name()
129                    .and_then(|n| n.to_str())
130                    .unwrap_or("")
131                    .to_string(),
132                extension: path
133                    .extension()
134                    .and_then(|ext| ext.to_str())
135                    .map(|ext| ext.to_string()),
136                size: metadata.len(),
137                is_dir: metadata.is_dir(),
138                content_matches: Vec::new(),
139            };
140
141            results.push(file_result);
142        }
143
144        Ok(results)
145    }
146
147    /// Search for files containing specific content
148    pub fn search_files_with_content(
149        &self,
150        content_pattern: &str,
151        file_pattern: Option<&str>,
152    ) -> Result<Vec<FileSearchResult>> {
153        let mut results = Vec::new();
154        let max_results = self.config.max_results;
155
156        for entry in WalkDir::new(&self.root)
157            .follow_links(self.config.follow_links)
158            .into_iter()
159            .filter_map(|e| e.ok())
160        {
161            if results.len() >= max_results {
162                break;
163            }
164
165            let path = entry.path();
166
167            // Skip if should be excluded
168            if self.should_exclude_path(path)? {
169                continue;
170            }
171
172            // Skip directories for content search
173            if entry.metadata().map(|m| m.is_dir()).unwrap_or(false) {
174                continue;
175            }
176
177            // Check file pattern if specified
178            if let Some(pattern_str) = file_pattern {
179                if !self.path_matches_pattern(path, pattern_str)? {
180                    continue;
181                }
182            }
183
184            // Search for content in the file
185            match self.search_content_in_file(path, content_pattern) {
186                Ok(content_matches) => {
187                    if !content_matches.is_empty() {
188                        let metadata = match entry.metadata() {
189                            Ok(meta) => meta,
190                            Err(_) => continue,
191                        };
192
193                        let file_result = FileSearchResult {
194                            path: path.to_path_buf(),
195                            name: path
196                                .file_name()
197                                .and_then(|n| n.to_str())
198                                .unwrap_or("")
199                                .to_string(),
200                            extension: path
201                                .extension()
202                                .and_then(|ext| ext.to_str())
203                                .map(|ext| ext.to_string()),
204                            size: metadata.len(),
205                            is_dir: metadata.is_dir(),
206                            content_matches,
207                        };
208
209                        results.push(file_result);
210                    }
211                }
212                Err(_) => {
213                    // Skip files we can't read
214                    continue;
215                }
216            }
217        }
218
219        Ok(results)
220    }
221
222    /// Find a specific file by name (recursively)
223    pub fn find_file_by_name(&self, file_name: &str) -> Result<Option<PathBuf>> {
224        for entry in WalkDir::new(&self.root)
225            .follow_links(self.config.follow_links)
226            .into_iter()
227            .filter_map(|e| e.ok())
228        {
229            let path = entry.path();
230
231            // Skip if should be excluded
232            if self.should_exclude_path(path)? {
233                continue;
234            }
235
236            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
237                if name == file_name {
238                    return Ok(Some(path.to_path_buf()));
239                }
240            }
241        }
242
243        Ok(None)
244    }
245
246    /// Check if a path should be excluded based on configuration
247    fn should_exclude_path(&self, path: &Path) -> Result<bool> {
248        let path_str = path.to_string_lossy();
249
250        // Skip hidden files if not included
251        if !self.config.include_hidden {
252            // Check if any component of the path is hidden (starts with '.')
253            for component in path.components() {
254                if let std::path::Component::Normal(name) = component {
255                    if let Some(name_str) = name.to_str() {
256                        if name_str.starts_with('.') {
257                            return Ok(true);
258                        }
259                    }
260                }
261            }
262        }
263
264        // Check file extensions
265        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
266            let ext_lower = ext.to_lowercase();
267
268            // Check exclude extensions
269            if self.config.exclude_extensions.contains(&ext_lower) {
270                return Ok(true);
271            }
272
273            // Check include extensions (if specified)
274            if !self.config.include_extensions.is_empty()
275                && !self.config.include_extensions.contains(&ext_lower)
276            {
277                return Ok(true);
278            }
279        }
280
281        // Check exclude patterns
282        for pattern in &self.config.exclude_patterns {
283            if pattern.matches(&path_str) {
284                return Ok(true);
285            }
286        }
287
288        // Check file size
289        if self.config.max_file_size > 0 {
290            if let Ok(metadata) = fs::metadata(path) {
291                if metadata.len() > self.config.max_file_size {
292                    return Ok(true);
293                }
294            }
295        }
296
297        Ok(false)
298    }
299
300    /// Check if a path matches a pattern
301    fn path_matches_pattern(&self, path: &Path, pattern: &str) -> Result<bool> {
302        // If pattern is empty, match everything
303        if pattern.is_empty() {
304            return Ok(true);
305        }
306
307        // Convert to lowercase for case-insensitive matching
308        let path_str = path.to_string_lossy().to_lowercase();
309        let pattern_lower = pattern.to_lowercase();
310
311        // Handle wildcard patterns
312        if pattern_lower.contains('*') || pattern_lower.contains('?') {
313            // Use glob matching for patterns with wildcards
314            if let Ok(glob_pattern) = Pattern::new(&format!("*{}*", pattern_lower)) {
315                return Ok(glob_pattern.matches(&path_str));
316            }
317        }
318
319        // Simple substring match for basic patterns
320        Ok(path_str.contains(&pattern_lower))
321    }
322
323    /// Search for content within a file
324    fn search_content_in_file(&self, path: &Path, pattern: &str) -> Result<Vec<ContentMatch>> {
325        let content = fs::read_to_string(path)
326            .with_context(|| format!("Failed to read file: {}", path.display()))?;
327
328        let mut matches = Vec::new();
329        let pattern_lower = pattern.to_lowercase();
330
331        for (line_num, line) in content.lines().enumerate() {
332            let line_lower = line.to_lowercase();
333            if line_lower.contains(&pattern_lower) {
334                // Find all occurrences in the line
335                let mut start = 0;
336                while let Some(pos) = line_lower[start..].find(&pattern_lower) {
337                    let actual_pos = start + pos;
338                    matches.push(ContentMatch {
339                        line_number: line_num + 1,
340                        content: line.to_string(),
341                        column: actual_pos,
342                    });
343                    start = actual_pos + pattern.len();
344                }
345            }
346        }
347
348        Ok(matches)
349    }
350
351    /// Convert search results to JSON format
352    pub fn results_to_json(results: Vec<FileSearchResult>) -> Value {
353        let json_results: Vec<Value> = results
354            .into_iter()
355            .map(|result| {
356                json!({
357                    "path": result.path.to_string_lossy(),
358                    "name": result.name,
359                    "extension": result.extension,
360                    "size": result.size,
361                    "is_dir": result.is_dir,
362                    "content_matches": result.content_matches.iter().map(|m| json!({
363                        "line_number": m.line_number,
364                        "content": m.content,
365                        "column": m.column,
366                    })).collect::<Vec<Value>>()
367                })
368            })
369            .collect();
370
371        json!({
372            "success": true,
373            "results": json_results,
374            "count": json_results.len()
375        })
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382    use tempfile::TempDir;
383
384    #[test]
385    fn test_file_searcher_creation() {
386        let temp_dir = TempDir::new().unwrap();
387        let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
388        assert_eq!(searcher.root, temp_dir.path());
389    }
390
391    #[test]
392    fn test_find_file_by_name() -> Result<()> {
393        let temp_dir = TempDir::new().unwrap();
394        let test_file = temp_dir.path().join("test.txt");
395        fs::write(&test_file, "test content").unwrap();
396
397        let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
398        let result = searcher.find_file_by_name("test.txt")?;
399
400        assert!(result.is_some());
401        assert_eq!(result.unwrap(), test_file);
402
403        Ok(())
404    }
405
406    #[test]
407    fn test_search_files() -> Result<()> {
408        let temp_dir = TempDir::new().unwrap();
409
410        // Create test files
411        fs::write(temp_dir.path().join("file1.txt"), "content1").unwrap();
412        fs::write(temp_dir.path().join("file2.rs"), "content2").unwrap();
413        fs::create_dir(temp_dir.path().join("subdir")).unwrap();
414        fs::write(temp_dir.path().join("subdir").join("file3.txt"), "content3").unwrap();
415
416        let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
417        let results = searcher.search_files(None)?;
418
419        assert_eq!(results.len(), 4); // 2 files + 1 subdir + 1 file in subdir
420
421        Ok(())
422    }
423}