vtcode_core/tools/
advanced_search.rs

1//! Advanced search tools with enhanced case-insensitive capabilities
2
3use super::traits::Tool;
4use crate::tools::grep_search::GrepSearchManager;
5use anyhow::{Result, anyhow};
6use async_trait::async_trait;
7use regex::Regex;
8use serde_json::{Value, json};
9use std::collections::HashMap;
10use std::path::{Path, PathBuf};
11use std::sync::Arc;
12
13/// Enhanced search tool with advanced case-insensitive features
14pub struct AdvancedSearchTool {
15    workspace_root: PathBuf,
16}
17
18#[derive(Debug, Clone)]
19pub struct SearchOptions {
20    pub case_sensitive: bool,
21    pub whole_word: bool,
22    pub regex: bool,
23    pub include_hidden: bool,
24    pub max_results: usize,
25    pub context_lines: usize,
26    pub file_patterns: Vec<String>,
27    pub exclude_patterns: Vec<String>,
28}
29
30impl Default for SearchOptions {
31    fn default() -> Self {
32        Self {
33            case_sensitive: false,
34            whole_word: false,
35            regex: false,
36            include_hidden: false,
37            max_results: 100,
38            context_lines: 0,
39            file_patterns: vec![],
40            exclude_patterns: vec![],
41        }
42    }
43}
44
45impl AdvancedSearchTool {
46    pub fn new(workspace_root: PathBuf, _grep_search: Arc<GrepSearchManager>) -> Self {
47        // grep_search was unused; keep constructor signature for compatibility but drop the field
48        Self { workspace_root }
49    }
50
51    /// Perform advanced search with multiple options
52    pub async fn search(&self, query: &str, path: &str, options: SearchOptions) -> Result<Value> {
53        let results = if options.regex {
54            // Use regex search
55            self.regex_search(query, path, &options).await?
56        } else {
57            // Use pattern-based search
58            self.pattern_search(query, path, &options).await?
59        };
60
61        // Apply post-processing filters
62        let filtered_results = self.apply_filters(results, &options);
63
64        Ok(json!({
65            "success": true,
66            "query": query,
67            "path": path,
68            "options": {
69                "case_sensitive": options.case_sensitive,
70                "whole_word": options.whole_word,
71                "regex": options.regex,
72                "include_hidden": options.include_hidden,
73                "max_results": options.max_results,
74                "context_lines": options.context_lines,
75                "file_patterns": options.file_patterns,
76                "exclude_patterns": options.exclude_patterns
77            },
78            "results": filtered_results,
79            "total_matches": filtered_results.len()
80        }))
81    }
82
83    /// Perform regex-based search
84    async fn regex_search(
85        &self,
86        pattern: &str,
87        path: &str,
88        options: &SearchOptions,
89    ) -> Result<Vec<Value>> {
90        let regex_flags = if options.case_sensitive { "" } else { "(?i)" };
91        let regex_pattern = if options.whole_word {
92            format!(r"{}\b{}\b", regex_flags, regex::escape(pattern))
93        } else {
94            format!(r"{}{}", regex_flags, pattern)
95        };
96
97        let regex =
98            Regex::new(&regex_pattern).map_err(|e| anyhow!("Invalid regex pattern: {}", e))?;
99
100        let mut results = Vec::new();
101        let search_path = self.workspace_root.join(path);
102
103        self.search_files_recursive(&search_path, &regex, options, &mut results)
104            .await?;
105
106        Ok(results)
107    }
108
109    /// Perform pattern-based search
110    async fn pattern_search(
111        &self,
112        pattern: &str,
113        path: &str,
114        options: &SearchOptions,
115    ) -> Result<Vec<Value>> {
116        let search_pattern = if options.whole_word {
117            format!(r"\b{}\b", regex::escape(pattern))
118        } else {
119            regex::escape(pattern)
120        };
121
122        let regex_flags = if options.case_sensitive { "" } else { "(?i)" };
123        let regex_pattern = format!(r"{}{}", regex_flags, search_pattern);
124
125        let regex =
126            Regex::new(&regex_pattern).map_err(|e| anyhow!("Invalid search pattern: {}", e))?;
127
128        let mut results = Vec::new();
129        let search_path = self.workspace_root.join(path);
130
131        self.search_files_recursive(&search_path, &regex, options, &mut results)
132            .await?;
133
134        Ok(results)
135    }
136
137    /// Recursively search files
138    async fn search_files_recursive(
139        &self,
140        dir: &Path,
141        regex: &Regex,
142        options: &SearchOptions,
143        results: &mut Vec<Value>,
144    ) -> Result<()> {
145        if !dir.exists() {
146            return Ok(());
147        }
148
149        let mut entries = tokio::fs::read_dir(dir).await?;
150
151        while let Some(entry) = entries.next_entry().await? {
152            let path = entry.path();
153
154            // Skip hidden files unless explicitly included
155            if !options.include_hidden
156                && path
157                    .file_name()
158                    .and_then(|n| n.to_str())
159                    .map(|n| n.starts_with('.'))
160                    .unwrap_or(false)
161            {
162                continue;
163            }
164
165            if path.is_dir() {
166                // Skip common directories that shouldn't be searched
167                if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) {
168                    if matches!(
169                        dir_name,
170                        "node_modules" | ".git" | "target" | "__pycache__" | ".next"
171                    ) {
172                        continue;
173                    }
174                }
175
176                // Recurse into subdirectories
177                Box::pin(self.search_files_recursive(&path, regex, options, results)).await?;
178            } else if path.is_file() {
179                // Check file pattern filters
180                if !self.matches_file_patterns(&path, options) {
181                    continue;
182                }
183
184                // Search file content
185                match self.search_file_content(&path, regex, options).await {
186                    Ok(file_results) => {
187                        results.extend(file_results);
188                        // file_count += 1; // Not used, so removed
189
190                        // Check if we've hit the max results limit
191                        if results.len() >= options.max_results {
192                            break;
193                        }
194                    }
195                    Err(_) => continue, // Skip files that can't be read
196                }
197            }
198        }
199
200        Ok(())
201    }
202
203    /// Search content of a single file
204    async fn search_file_content(
205        &self,
206        file_path: &Path,
207        regex: &Regex,
208        options: &SearchOptions,
209    ) -> Result<Vec<Value>> {
210        let content = tokio::fs::read_to_string(file_path).await?;
211        let lines: Vec<&str> = content.lines().collect();
212        let mut file_results = Vec::new();
213
214        for (line_num, line) in lines.iter().enumerate() {
215            if regex.is_match(line) {
216                let result = json!({
217                    "file": file_path.strip_prefix(&self.workspace_root)
218                        .unwrap_or(file_path)
219                        .to_string_lossy(),
220                    "line": line_num + 1,
221                    "content": line.trim(),
222                    "context": if options.context_lines > 0 {
223                        self.get_context_lines(&lines, line_num, options.context_lines)
224                    } else {
225                        Value::Null
226                    }
227                });
228
229                file_results.push(result);
230
231                if file_results.len() >= options.max_results {
232                    break;
233                }
234            }
235        }
236
237        Ok(file_results)
238    }
239
240    /// Get context lines around a match
241    fn get_context_lines(&self, lines: &[&str], match_line: usize, context_lines: usize) -> Value {
242        let start = match_line.saturating_sub(context_lines);
243        let end = (match_line + context_lines + 1).min(lines.len());
244
245        let context: Vec<Value> = lines[start..end]
246            .iter()
247            .enumerate()
248            .map(|(i, line)| {
249                let line_num = start + i + 1;
250                let is_match = line_num == match_line + 1;
251                json!({
252                    "line": line_num,
253                    "content": line.trim(),
254                    "is_match": is_match
255                })
256            })
257            .collect();
258
259        Value::Array(context)
260    }
261
262    /// Check if file matches the specified patterns
263    fn matches_file_patterns(&self, file_path: &Path, options: &SearchOptions) -> bool {
264        if options.file_patterns.is_empty() {
265            return true;
266        }
267
268        let file_name = file_path.to_string_lossy();
269
270        for pattern in &options.file_patterns {
271            if self.matches_glob_pattern(&file_name, pattern) {
272                return true;
273            }
274        }
275
276        false
277    }
278
279    /// Simple glob pattern matching
280    fn matches_glob_pattern(&self, text: &str, pattern: &str) -> bool {
281        if pattern.contains('*') {
282            let regex_pattern = pattern
283                .replace('.', r"\.")
284                .replace('*', ".*")
285                .replace('?', ".");
286            Regex::new(&format!("^{}$", regex_pattern))
287                .map(|r| r.is_match(text))
288                .unwrap_or(false)
289        } else {
290            text.contains(pattern)
291        }
292    }
293
294    /// Apply post-processing filters to results
295    fn apply_filters(&self, results: Vec<Value>, options: &SearchOptions) -> Vec<Value> {
296        let mut filtered = results;
297
298        // Apply exclude patterns
299        if !options.exclude_patterns.is_empty() {
300            filtered = filtered
301                .into_iter()
302                .filter(|result| {
303                    if let Some(file) = result.get("file").and_then(|f| f.as_str()) {
304                        !options
305                            .exclude_patterns
306                            .iter()
307                            .any(|pattern| self.matches_glob_pattern(file, pattern))
308                    } else {
309                        true
310                    }
311                })
312                .collect();
313        }
314
315        // Limit results
316        if filtered.len() > options.max_results {
317            filtered.truncate(options.max_results);
318        }
319
320        filtered
321    }
322
323    /// Perform case-insensitive search with smart defaults
324    pub async fn smart_search(&self, query: &str, path: &str) -> Result<Value> {
325        let options = SearchOptions {
326            case_sensitive: false,
327            whole_word: false,
328            regex: false,
329            include_hidden: false,
330            max_results: 50,
331            context_lines: 2,
332            file_patterns: vec![],
333            exclude_patterns: vec![
334                "*.log".to_string(),
335                "*.min.js".to_string(),
336                "*.min.css".to_string(),
337                "node_modules/**".to_string(),
338                ".git/**".to_string(),
339                "target/**".to_string(),
340            ],
341        };
342
343        self.search(query, path, options).await
344    }
345
346    /// Search for multiple terms with case-insensitive matching
347    pub async fn multi_term_search(
348        &self,
349        terms: &[String],
350        path: &str,
351        require_all: bool,
352    ) -> Result<Value> {
353        let mut all_results = Vec::new();
354        let mut term_matches = HashMap::new();
355
356        // Search for each term
357        for term in terms {
358            let result = self.smart_search(term, path).await?;
359            if let Some(results) = result.get("results").and_then(|r| r.as_array()) {
360                term_matches.insert(term.clone(), results.clone());
361                all_results.extend(results.clone());
362            }
363        }
364
365        // Filter results based on require_all flag
366        let filtered_results = if require_all {
367            self.filter_require_all(all_results, &term_matches, terms)
368        } else {
369            self.deduplicate_results(all_results)
370        };
371
372        Ok(json!({
373            "success": true,
374            "query_terms": terms,
375            "require_all": require_all,
376            "results": filtered_results,
377            "total_matches": filtered_results.len()
378        }))
379    }
380
381    /// Filter results to only include files that contain all search terms
382    fn filter_require_all(
383        &self,
384        results: Vec<Value>,
385        term_matches: &HashMap<String, Vec<Value>>,
386        terms: &[String],
387    ) -> Vec<Value> {
388        let mut file_groups: HashMap<String, Vec<Value>> = HashMap::new();
389
390        // Group results by file
391        for result in results {
392            if let Some(file) = result.get("file").and_then(|f| f.as_str()) {
393                file_groups
394                    .entry(file.to_string())
395                    .or_insert_with(Vec::new)
396                    .push(result);
397            }
398        }
399
400        // Filter files that contain all terms
401        file_groups
402            .into_iter()
403            .filter(|(_, file_results)| {
404                let _file_path = file_results
405                    .first()
406                    .and_then(|r| r.get("file"))
407                    .and_then(|f| f.as_str())
408                    .unwrap_or("");
409
410                terms.iter().all(|term| {
411                    file_results.iter().any(|result| {
412                        result
413                            .get("content")
414                            .and_then(|c| c.as_str())
415                            .map(|content| {
416                                if term_matches.contains_key(term) {
417                                    content.to_lowercase().contains(&term.to_lowercase())
418                                } else {
419                                    false
420                                }
421                            })
422                            .unwrap_or(false)
423                    })
424                })
425            })
426            .flat_map(|(_, results)| results)
427            .collect()
428    }
429
430    /// Remove duplicate results
431    fn deduplicate_results(&self, results: Vec<Value>) -> Vec<Value> {
432        let mut seen = std::collections::HashSet::new();
433
434        results
435            .into_iter()
436            .filter(|result| {
437                let key = format!(
438                    "{}:{}",
439                    result.get("file").and_then(|f| f.as_str()).unwrap_or(""),
440                    result.get("line").and_then(|l| l.as_u64()).unwrap_or(0)
441                );
442
443                seen.insert(key)
444            })
445            .collect()
446    }
447}
448
449#[async_trait]
450impl Tool for AdvancedSearchTool {
451    fn name(&self) -> &'static str {
452        "advanced_search"
453    }
454
455    fn description(&self) -> &'static str {
456        "Advanced search tool with case-insensitive matching, regex support, and smart filtering"
457    }
458
459    async fn execute(&self, args: Value) -> Result<Value> {
460        let query = args
461            .get("query")
462            .and_then(|q| q.as_str())
463            .ok_or_else(|| anyhow!("Missing query parameter"))?;
464
465        let path = args.get("path").and_then(|p| p.as_str()).unwrap_or(".");
466
467        let options = SearchOptions {
468            case_sensitive: args
469                .get("case_sensitive")
470                .and_then(|c| c.as_bool())
471                .unwrap_or(false),
472            whole_word: args
473                .get("whole_word")
474                .and_then(|w| w.as_bool())
475                .unwrap_or(false),
476            regex: args.get("regex").and_then(|r| r.as_bool()).unwrap_or(false),
477            include_hidden: args
478                .get("include_hidden")
479                .and_then(|h| h.as_bool())
480                .unwrap_or(false),
481            max_results: args
482                .get("max_results")
483                .and_then(|m| m.as_u64())
484                .unwrap_or(100) as usize,
485            context_lines: args
486                .get("context_lines")
487                .and_then(|c| c.as_u64())
488                .unwrap_or(0) as usize,
489            file_patterns: args
490                .get("file_patterns")
491                .and_then(|fp| fp.as_array())
492                .map(|arr| {
493                    arr.iter()
494                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
495                        .collect()
496                })
497                .unwrap_or_default(),
498            exclude_patterns: args
499                .get("exclude_patterns")
500                .and_then(|ep| ep.as_array())
501                .map(|arr| {
502                    arr.iter()
503                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
504                        .collect()
505                })
506                .unwrap_or_default(),
507        };
508
509        self.search(query, path, options).await
510    }
511}
512
513#[cfg(test)]
514mod tests {
515    use super::*;
516    use std::sync::Arc;
517    use tempfile::TempDir;
518
519    #[tokio::test]
520    async fn test_case_insensitive_search() {
521        let temp_dir = TempDir::new().unwrap();
522        let workspace_root = temp_dir.path().to_path_buf();
523
524        // Create test file
525        let test_file = workspace_root.join("test.txt");
526        tokio::fs::write(&test_file, "Hello World\nHELLO world\nhello WORLD")
527            .await
528            .unwrap();
529
530        let rp_search = Arc::new(GrepSearchManager::new(workspace_root.clone()));
531        let search_tool = AdvancedSearchTool::new(workspace_root, rp_search);
532
533        let options = SearchOptions {
534            case_sensitive: false,
535            ..Default::default()
536        };
537
538        let result = search_tool.search("hello", ".", options).await.unwrap();
539        let results = result.get("results").unwrap().as_array().unwrap();
540
541        assert_eq!(results.len(), 3); // Should match all 3 lines
542    }
543
544    #[tokio::test]
545    async fn test_whole_word_search() {
546        let temp_dir = TempDir::new().unwrap();
547        let workspace_root = temp_dir.path().to_path_buf();
548
549        // Create test file
550        let test_file = workspace_root.join("test.txt");
551        tokio::fs::write(&test_file, "hello world\nhelloworld\nhello-world")
552            .await
553            .unwrap();
554
555        let rp_search = Arc::new(GrepSearchManager::new(workspace_root.clone()));
556        let search_tool = AdvancedSearchTool::new(workspace_root, rp_search);
557
558        let options = SearchOptions {
559            case_sensitive: false,
560            whole_word: true,
561            ..Default::default()
562        };
563
564        let result = search_tool.search("hello", ".", options).await.unwrap();
565        let results = result.get("results").unwrap().as_array().unwrap();
566
567        assert_eq!(results.len(), 1); // Should only match "hello world"
568    }
569}