Skip to main content

matrixcode_core/tools/
grep.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use serde_json::{Value, json};
4
5use super::{Tool, ToolDefinition};
6
7/// Grep search options bundled into a single struct.
8struct GrepOptions {
9    pattern: String,
10    path: String,
11    glob_pattern: Option<String>,
12    file_type: Option<String>,
13    output_mode: String,
14    case_insensitive: bool,
15    show_line_numbers: bool,
16    context_lines: usize,
17    head_limit: usize,
18}
19
20impl GrepOptions {
21    fn from_params(params: &Value) -> Result<Self> {
22        let pattern = params["pattern"]
23            .as_str()
24            .ok_or_else(|| anyhow::anyhow!("missing 'pattern'"))?
25            .to_string();
26        let path = params["path"].as_str().unwrap_or(".").to_string();
27        let glob_pattern = params["glob"].as_str().map(|s| s.to_string());
28        let file_type = params["type"].as_str().map(|s| s.to_string());
29        let output_mode = params["output_mode"]
30            .as_str()
31            .unwrap_or("content")
32            .to_string();
33        let case_insensitive = params["-i"].as_bool().unwrap_or(false);
34        let show_line_numbers = params["-n"].as_bool().unwrap_or(true);
35        let context_lines = params["-C"].as_u64().unwrap_or(0) as usize;
36        let head_limit = params["head_limit"].as_u64().unwrap_or(100) as usize;
37
38        Ok(Self {
39            pattern,
40            path,
41            glob_pattern,
42            file_type,
43            output_mode,
44            case_insensitive,
45            show_line_numbers,
46            context_lines,
47            head_limit,
48        })
49    }
50}
51
52/// High-performance grep tool with advanced filtering options
53pub struct GrepTool;
54
55#[async_trait]
56impl Tool for GrepTool {
57    fn definition(&self) -> ToolDefinition {
58        ToolDefinition {
59            name: "grep".to_string(),
60            description: "搜索文本内容(错误消息、注释、字符串等)。
61
62适用场景:
63- 搜错误信息(如 'failed to connect'、'panic')
64- 找注释内容(如 'TODO'、'FIXME')
65- 搜字符串常量、日志文本
66- 搜索任意文本模式(正则表达式)
67
68不适用场景:
69- ❌ 找函数定义 → 用 code_search(快10-100倍)
70- ❌ 找类定义、变量声明 → 用 code_search
71- ❌ 查谁调用了某方法 → 用 code_callers
72
73优先级:[中] 文本搜索首选工具".to_string(),
74            parameters: json!({
75                "type": "object",
76                "properties": {
77                    "pattern": {
78                        "type": "string",
79                        "description": "要搜索的正则表达式模式"
80                    },
81                    "path": {
82                        "type": "string",
83                        "description": "搜索的文件或目录(默认当前目录)"
84                    },
85                    "glob": {
86                        "type": "string",
87                        "description": "Glob 文件过滤模式(如 '*.ts'、'**/*.rs')"
88                    },
89                    "type": {
90                        "type": "string",
91                        "enum": ["js", "ts", "py", "rs", "go", "java", "c", "cpp", "md", "json", "yaml", "html", "css"],
92                        "description": "按文件类型搜索(映射到常用扩展名)"
93                    },
94                    "output_mode": {
95                        "type": "string",
96                        "enum": ["content", "files_with_matches", "count"],
97                        "default": "content",
98                        "description": "输出模式:'content' 显示匹配行,'files_with_matches' 列出文件,'count' 显示匹配数"
99                    },
100                    "-i": {
101                        "type": "boolean",
102                        "default": false,
103                        "description": "忽略大小写"
104                    },
105                    "-n": {
106                        "type": "boolean",
107                        "default": true,
108                        "description": "显示行号"
109                    },
110                    "-C": {
111                        "type": "integer",
112                        "default": 0,
113                        "description": "匹配行前后显示的上下文行数"
114                    },
115                    "head_limit": {
116                        "type": "integer",
117                        "default": 100,
118                        "description": "最大返回结果数"
119                    }
120                },
121                "required": ["pattern"]
122            }),
123            ..Default::default()
124        }
125    }
126
127    async fn execute(&self, params: Value) -> Result<String> {
128        let opts = GrepOptions::from_params(&params)?;
129
130        tokio::task::spawn_blocking(move || grep_search(&opts)).await?
131    }
132}
133
134/// File type to extension mapping
135fn get_extensions_for_type(file_type: &str) -> Vec<&'static str> {
136    match file_type {
137        "js" => vec!["js", "jsx", "mjs", "cjs"],
138        "ts" => vec!["ts", "tsx", "mts", "cts"],
139        "py" => vec!["py", "pyw", "pyi"],
140        "rs" => vec!["rs"],
141        "go" => vec!["go"],
142        "java" => vec!["java"],
143        "c" => vec!["c", "h"],
144        "cpp" => vec!["cpp", "cc", "cxx", "hpp", "hh", "hxx"],
145        "md" => vec!["md", "markdown"],
146        "json" => vec!["json", "json5", "jsonc"],
147        "yaml" => vec!["yaml", "yml"],
148        "html" => vec!["html", "htm", "xhtml"],
149        "css" => vec!["css", "scss", "sass", "less"],
150        _ => vec![],
151    }
152}
153
154fn grep_search(opts: &GrepOptions) -> Result<String> {
155    use std::fs;
156    use std::path::Path;
157
158    // Build regex with case-insensitive option
159    let regex_pattern = if opts.case_insensitive {
160        regex::RegexBuilder::new(&opts.pattern)
161            .case_insensitive(true)
162            .build()?
163    } else {
164        regex::Regex::new(&opts.pattern)?
165    };
166
167    let root = Path::new(&opts.path);
168    let mut results: Vec<String> = Vec::new();
169    let mut match_count = 0;
170    let mut files_with_matches: Vec<String> = Vec::new();
171
172    // Get file extensions for type filter
173    let type_extensions = opts.file_type.as_deref().map(get_extensions_for_type);
174
175    let entries = collect_grep_files(
176        root,
177        opts.glob_pattern.as_deref(),
178        type_extensions.as_deref(),
179    )?;
180
181    for file_path in entries {
182        if results.len() >= opts.head_limit && opts.output_mode == "content" {
183            results.push(format!("... (limited to {} results)", opts.head_limit));
184            break;
185        }
186
187        let content = match fs::read_to_string(&file_path) {
188            Ok(c) => c,
189            Err(_) => continue,
190        };
191
192        let lines: Vec<&str> = content.lines().collect();
193        let mut file_has_match = false;
194        let mut file_match_count = 0;
195
196        for (line_idx, line) in lines.iter().enumerate() {
197            if regex_pattern.is_match(line) {
198                file_has_match = true;
199                file_match_count += 1;
200                match_count += 1;
201
202                if opts.output_mode == "content" && results.len() < opts.head_limit {
203                    // Add context lines before the match
204                    if opts.context_lines > 0 {
205                        let start_ctx = line_idx.saturating_sub(opts.context_lines);
206                        for (ctx_idx, ctx_line) in lines
207                            .iter()
208                            .enumerate()
209                            .skip(start_ctx)
210                            .take(line_idx - start_ctx)
211                        {
212                            results.push(format_line(
213                                &file_path,
214                                ctx_idx + 1,
215                                ctx_line,
216                                opts.show_line_numbers,
217                                true,
218                            ));
219                        }
220                    }
221
222                    // Add the matching line
223                    results.push(format_line(
224                        &file_path,
225                        line_idx + 1,
226                        line,
227                        opts.show_line_numbers,
228                        false,
229                    ));
230
231                    // Add context lines after the match
232                    if opts.context_lines > 0 {
233                        let end_ctx = (line_idx + opts.context_lines).min(lines.len() - 1);
234                        for (ctx_idx, ctx_line) in lines
235                            .iter()
236                            .enumerate()
237                            .skip(line_idx + 1)
238                            .take(end_ctx - line_idx)
239                        {
240                            results.push(format_line(
241                                &file_path,
242                                ctx_idx + 1,
243                                ctx_line,
244                                opts.show_line_numbers,
245                                true,
246                            ));
247                        }
248                    }
249                }
250            }
251        }
252
253        if file_has_match && opts.output_mode == "files_with_matches" {
254            files_with_matches.push(file_path.display().to_string());
255        }
256
257        if opts.output_mode == "count" && file_match_count > 0 {
258            results.push(format!(
259                "{}: {} matches",
260                file_path.display(),
261                file_match_count
262            ));
263        }
264    }
265
266    // Format output based on mode
267    match opts.output_mode.as_str() {
268        "files_with_matches" => {
269            if files_with_matches.is_empty() {
270                Ok("No files matched.".to_string())
271            } else {
272                Ok(files_with_matches.join("\n"))
273            }
274        }
275        "count" => {
276            if results.is_empty() {
277                Ok("No matches found.".to_string())
278            } else {
279                Ok(format!(
280                    "Total: {} matches\n{}",
281                    match_count,
282                    results.join("\n")
283                ))
284            }
285        }
286        _ => {
287            // content
288            if results.is_empty() {
289                Ok("No matches found.".to_string())
290            } else {
291                Ok(results.join("\n"))
292            }
293        }
294    }
295}
296
297/// Format a single line with optional line number and context marker.
298fn format_line(
299    file_path: &std::path::Path,
300    line_num: usize,
301    line: &str,
302    show_line_numbers: bool,
303    is_context: bool,
304) -> String {
305    let marker = if is_context { "-" } else { ":" };
306    if show_line_numbers {
307        format!(
308            "{}:{}{} {}",
309            file_path.display(),
310            line_num,
311            marker,
312            line.trim()
313        )
314    } else {
315        format!("{}{} {}", file_path.display(), marker, line.trim())
316    }
317}
318
319fn collect_grep_files(
320    root: &std::path::Path,
321    glob_pattern: Option<&str>,
322    type_extensions: Option<&[&str]>,
323) -> Result<Vec<std::path::PathBuf>> {
324    let mut files = Vec::new();
325
326    if root.is_file() {
327        files.push(root.to_path_buf());
328        return Ok(files);
329    }
330
331    // Build glob matcher
332    let glob_matcher = glob_pattern.map(glob::Pattern::new).transpose()?;
333
334    let walker = walkdir_grep(root)?;
335
336    for entry in walker {
337        let path = entry;
338
339        // Check glob pattern
340        if let Some(ref matcher) = glob_matcher {
341            let relative = path.strip_prefix(root).unwrap_or(&path);
342            let relative_str = relative.to_string_lossy();
343            if !matcher.matches(&relative_str) {
344                // Also try just the filename
345                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
346                    if !matcher.matches(name) {
347                        continue;
348                    }
349                } else {
350                    continue;
351                }
352            }
353        }
354
355        // Check file type extensions
356        if let Some(extensions) = type_extensions {
357            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
358            if !extensions.contains(&ext) {
359                continue;
360            }
361        }
362
363        files.push(path);
364    }
365
366    Ok(files)
367}
368
369fn walkdir_grep(root: &std::path::Path) -> Result<Vec<std::path::PathBuf>> {
370    use std::fs;
371
372    let mut files = Vec::new();
373    let mut stack = vec![root.to_path_buf()];
374
375    // Directories to skip
376    const SKIP_DIRS: &[&str] = &[
377        ".git",
378        ".svn",
379        ".hg",
380        "node_modules",
381        "vendor",
382        "target",
383        "build",
384        "dist",
385        "out",
386        ".cache",
387        ".npm",
388        ".cargo",
389        "__pycache__",
390        ".venv",
391        "venv",
392        ".idea",
393        ".vscode",
394    ];
395
396    while let Some(dir) = stack.pop() {
397        let entries = match fs::read_dir(&dir) {
398            Ok(e) => e,
399            Err(_) => continue,
400        };
401
402        for entry in entries.flatten() {
403            let path = entry.path();
404            let name = entry.file_name();
405            let name_str = name.to_string_lossy();
406
407            // Skip hidden files and known directories
408            if name_str.starts_with('.') || SKIP_DIRS.contains(&name_str.as_ref()) {
409                continue;
410            }
411
412            if path.is_dir() {
413                stack.push(path);
414            } else if path.is_file() {
415                files.push(path);
416            }
417        }
418    }
419
420    Ok(files)
421}