Skip to main content

matrixcode_core/tools/
grep.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use serde_json::{Value, json};
4
5use super::{Tool, ToolDefinition};
6
7/// High-performance grep tool with advanced filtering options
8pub struct GrepTool;
9
10#[async_trait]
11impl Tool for GrepTool {
12    fn definition(&self) -> ToolDefinition {
13        ToolDefinition {
14            name: "grep".to_string(),
15            description: "高性能内容搜索工具,适用于任意规模代码库。支持正则表达式、文件类型过滤和多种输出模式。".to_string(),
16            parameters: json!({
17                "type": "object",
18                "properties": {
19                    "pattern": {
20                        "type": "string",
21                        "description": "要搜索的正则表达式模式"
22                    },
23                    "path": {
24                        "type": "string",
25                        "description": "搜索的文件或目录(默认当前目录)"
26                    },
27                    "glob": {
28                        "type": "string",
29                        "description": "Glob 文件过滤模式(如 '*.ts'、'**/*.rs')"
30                    },
31                    "type": {
32                        "type": "string",
33                        "enum": ["js", "ts", "py", "rs", "go", "java", "c", "cpp", "md", "json", "yaml", "html", "css"],
34                        "description": "按文件类型搜索(映射到常用扩展名)"
35                    },
36                    "output_mode": {
37                        "type": "string",
38                        "enum": ["content", "files_with_matches", "count"],
39                        "default": "content",
40                        "description": "输出模式:'content' 显示匹配行,'files_with_matches' 列出文件,'count' 显示匹配数"
41                    },
42                    "-i": {
43                        "type": "boolean",
44                        "default": false,
45                        "description": "忽略大小写"
46                    },
47                    "-n": {
48                        "type": "boolean",
49                        "default": true,
50                        "description": "显示行号"
51                    },
52                    "-C": {
53                        "type": "integer",
54                        "default": 0,
55                        "description": "匹配行前后显示的上下文行数"
56                    },
57                    "head_limit": {
58                        "type": "integer",
59                        "default": 100,
60                        "description": "最大返回结果数"
61                    }
62                },
63                "required": ["pattern"]
64            }),
65        }
66    }
67
68    async fn execute(&self, params: Value) -> Result<String> {
69        let pattern = params["pattern"].as_str().ok_or_else(|| anyhow::anyhow!("missing 'pattern'"))?;
70        let path = params["path"].as_str().unwrap_or(".");
71        let glob_pattern = params["glob"].as_str();
72        let file_type = params["type"].as_str();
73        let output_mode = params["output_mode"].as_str().unwrap_or("content");
74        let case_insensitive = params["-i"].as_bool().unwrap_or(false);
75        let show_line_numbers = params["-n"].as_bool().unwrap_or(true);
76        let context_lines = params["-C"].as_u64().unwrap_or(0) as usize;
77        let head_limit = params["head_limit"].as_u64().unwrap_or(100) as usize;
78
79        let pattern = pattern.to_string();
80        let path = path.to_string();
81        let glob_pattern = glob_pattern.map(|s| s.to_string());
82        let file_type = file_type.map(|s| s.to_string());
83        let output_mode = output_mode.to_string();
84
85        tokio::task::spawn_blocking(move || {
86            grep_search(
87                &pattern, &path, glob_pattern.as_deref(),
88                file_type.as_deref(), &output_mode,
89                case_insensitive, show_line_numbers, context_lines, head_limit
90            )
91        }).await?
92    }
93}
94
95/// File type to extension mapping
96fn get_extensions_for_type(file_type: &str) -> Vec<&'static str> {
97    match file_type {
98        "js" => vec!["js", "jsx", "mjs", "cjs"],
99        "ts" => vec!["ts", "tsx", "mts", "cts"],
100        "py" => vec!["py", "pyw", "pyi"],
101        "rs" => vec!["rs"],
102        "go" => vec!["go"],
103        "java" => vec!["java"],
104        "c" => vec!["c", "h"],
105        "cpp" => vec!["cpp", "cc", "cxx", "hpp", "hh", "hxx"],
106        "md" => vec!["md", "markdown"],
107        "json" => vec!["json", "json5", "jsonc"],
108        "yaml" => vec!["yaml", "yml"],
109        "html" => vec!["html", "htm", "xhtml"],
110        "css" => vec!["css", "scss", "sass", "less"],
111        _ => vec![],
112    }
113}
114
115fn grep_search(
116    pattern: &str,
117    path: &str,
118    glob_pattern: Option<&str>,
119    file_type: Option<&str>,
120    output_mode: &str,
121    case_insensitive: bool,
122    show_line_numbers: bool,
123    context_lines: usize,
124    head_limit: usize,
125) -> Result<String> {
126    use std::fs;
127    use std::path::Path;
128
129    // Build regex with case-insensitive option
130    let regex_pattern = if case_insensitive {
131        regex::RegexBuilder::new(pattern).case_insensitive(true).build()?
132    } else {
133        regex::Regex::new(pattern)?
134    };
135
136    let root = Path::new(path);
137    let mut results: Vec<String> = Vec::new();
138    let mut match_count = 0;
139    let mut files_with_matches: Vec<String> = Vec::new();
140
141    // Get file extensions for type filter
142    let type_extensions = file_type.map(get_extensions_for_type);
143
144    let entries = collect_grep_files(root, glob_pattern, type_extensions.as_deref())?;
145
146    for file_path in entries {
147        if results.len() >= head_limit && output_mode == "content" {
148            results.push(format!("... (limited to {} results)", head_limit));
149            break;
150        }
151
152        let content = match fs::read_to_string(&file_path) {
153            Ok(c) => c,
154            Err(_) => continue,
155        };
156
157        let lines: Vec<&str> = content.lines().collect();
158        let mut file_has_match = false;
159        let mut file_match_count = 0;
160
161        for (line_idx, line) in lines.iter().enumerate() {
162            if regex_pattern.is_match(line) {
163                file_has_match = true;
164                file_match_count += 1;
165                match_count += 1;
166
167                if output_mode == "content" && results.len() < head_limit {
168                    // Add context lines before the match first
169                    if context_lines > 0 {
170                        // Before context (in correct order)
171                        let start_ctx = line_idx.saturating_sub(context_lines);
172                        for ctx_idx in start_ctx..line_idx {
173                            let ctx_line_num = ctx_idx + 1;
174                            let ctx_formatted = if show_line_numbers {
175                                format!("{}:{}- {}", file_path.display(), ctx_line_num, lines[ctx_idx].trim())
176                            } else {
177                                format!("{}- {}", file_path.display(), lines[ctx_idx].trim())
178                            };
179                            results.push(ctx_formatted);
180                        }
181                    }
182
183                    // Then add the matching line
184                    let line_num = line_idx + 1;
185                    let formatted = if show_line_numbers {
186                        format!("{}:{}: {}", file_path.display(), line_num, line.trim())
187                    } else {
188                        format!("{}: {}", file_path.display(), line.trim())
189                    };
190                    results.push(formatted);
191
192                    // Add context lines after the match
193                    if context_lines > 0 {
194                        for ctx_idx in (line_idx + 1)..=(line_idx + context_lines).min(lines.len() - 1) {
195                            let ctx_line_num = ctx_idx + 1;
196                            let ctx_formatted = if show_line_numbers {
197                                format!("{}:{}- {}", file_path.display(), ctx_line_num, lines[ctx_idx].trim())
198                            } else {
199                                format!("{}- {}", file_path.display(), lines[ctx_idx].trim())
200                            };
201                            results.push(ctx_formatted);
202                        }
203                    }
204                }
205            }
206        }
207
208        if file_has_match && output_mode == "files_with_matches" {
209            files_with_matches.push(file_path.display().to_string());
210        }
211
212        if output_mode == "count" && file_match_count > 0 {
213            results.push(format!("{}: {} matches", file_path.display(), file_match_count));
214        }
215    }
216
217    // Format output based on mode
218    match output_mode {
219        "files_with_matches" => {
220            if files_with_matches.is_empty() {
221                Ok("No files matched.".to_string())
222            } else {
223                Ok(files_with_matches.join("\n"))
224            }
225        }
226        "count" => {
227            if results.is_empty() {
228                Ok("No matches found.".to_string())
229            } else {
230                Ok(format!("Total: {} matches\n{}", match_count, results.join("\n")))
231            }
232        }
233        _ => { // content
234            if results.is_empty() {
235                Ok("No matches found.".to_string())
236            } else {
237                Ok(results.join("\n"))
238            }
239        }
240    }
241}
242
243fn collect_grep_files(
244    root: &std::path::Path,
245    glob_pattern: Option<&str>,
246    type_extensions: Option<&[&str]>,
247) -> Result<Vec<std::path::PathBuf>> {
248    let mut files = Vec::new();
249
250    if root.is_file() {
251        files.push(root.to_path_buf());
252        return Ok(files);
253    }
254
255    // Build glob matcher
256    let glob_matcher = if let Some(pattern) = glob_pattern {
257        // Handle **/*.ext patterns
258        if pattern.starts_with("**/") {
259            Some(glob::Pattern::new(pattern)?)
260        } else {
261            Some(glob::Pattern::new(pattern)?)
262        }
263    } else {
264        None
265    };
266
267    let walker = walkdir_grep(root)?;
268
269    for entry in walker {
270        let path = entry;
271
272        // Check glob pattern
273        if let Some(ref matcher) = glob_matcher {
274            let relative = path.strip_prefix(root).unwrap_or(&path);
275            let relative_str = relative.to_string_lossy();
276            if !matcher.matches(&relative_str) {
277                // Also try just the filename
278                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
279                    if !matcher.matches(name) {
280                        continue;
281                    }
282                } else {
283                    continue;
284                }
285            }
286        }
287
288        // Check file type extensions
289        if let Some(ref extensions) = type_extensions {
290            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
291            if !extensions.contains(&ext) {
292                continue;
293            }
294        }
295
296        files.push(path);
297    }
298
299    Ok(files)
300}
301
302fn walkdir_grep(root: &std::path::Path) -> Result<Vec<std::path::PathBuf>> {
303    use std::fs;
304
305    let mut files = Vec::new();
306    let mut stack = vec![root.to_path_buf()];
307
308    // Directories to skip
309    const SKIP_DIRS: &[&str] = &[
310        ".git", ".svn", ".hg",
311        "node_modules", "vendor",
312        "target", "build", "dist", "out",
313        ".cache", ".npm", ".cargo",
314        "__pycache__", ".venv", "venv",
315        ".idea", ".vscode",
316    ];
317
318    while let Some(dir) = stack.pop() {
319        let entries = match fs::read_dir(&dir) {
320            Ok(e) => e,
321            Err(_) => continue,
322        };
323
324        for entry in entries.flatten() {
325            let path = entry.path();
326            let name = entry.file_name();
327            let name_str = name.to_string_lossy();
328
329            // Skip hidden files and known directories
330            if name_str.starts_with('.') || SKIP_DIRS.contains(&name_str.as_ref()) {
331                continue;
332            }
333
334            if path.is_dir() {
335                stack.push(path);
336            } else if path.is_file() {
337                files.push(path);
338            }
339        }
340    }
341
342    Ok(files)
343}