Skip to main content

matrixcode_core/tools/
grep.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use serde_json::{Value, json};
4
5use super::{Tool, ToolDefinition, ToolContext};
6
7/// Grep search options bundled into a single struct.
8struct GrepOptions {
9    pattern: String,
10    path: String,
11    glob_pattern: Option<String>,
12    file_type: Option<String>,
13    output_mode: String,
14    case_insensitive: bool,
15    show_line_numbers: bool,
16    context_lines: usize,
17    head_limit: usize,
18}
19
20impl GrepOptions {
21    fn from_params(params: &Value) -> Result<Self> {
22        let pattern = params["pattern"]
23            .as_str()
24            .ok_or_else(|| anyhow::anyhow!("missing 'pattern'"))?
25            .to_string();
26        let path = params["path"].as_str().unwrap_or(".").to_string();
27        let glob_pattern = params["glob"].as_str().map(|s| s.to_string());
28        let file_type = params["type"].as_str().map(|s| s.to_string());
29        let output_mode = params["output_mode"]
30            .as_str()
31            .unwrap_or("content")
32            .to_string();
33        let case_insensitive = params["-i"].as_bool().unwrap_or(false);
34        let show_line_numbers = params["-n"].as_bool().unwrap_or(true);
35        let context_lines = params["-C"].as_u64().unwrap_or(0) as usize;
36        let head_limit = params["head_limit"].as_u64().unwrap_or(100) as usize;
37
38        Ok(Self {
39            pattern,
40            path,
41            glob_pattern,
42            file_type,
43            output_mode,
44            case_insensitive,
45            show_line_numbers,
46            context_lines,
47            head_limit,
48        })
49    }
50}
51
52/// High-performance grep tool with advanced filtering options
53pub struct GrepTool;
54
55#[async_trait]
56impl Tool for GrepTool {
57    fn definition_with_context(&self, ctx: &ToolContext) -> ToolDefinition {
58        // Dynamic description based on CodeGraph availability
59        let not_applicable = if ctx.codegraph_available {
60            "不适用场景:
61- ❌ 找函数定义 → code_search(快10-100倍)
62- ❌ 找类定义、变量声明 → code_search
63- ❌ 查谁调用了某方法 → code_callers"
64        } else {
65            "不适用场景:
66- ❌ 找函数定义 → 用 grep 搜索 'fn func_name' 或 'class ClassName'
67- ❌ 找类定义、变量声明 → 用 grep 搜索 class/struct 名
68- ❌ 查调用关系 → 用 grep 搜索函数名(不精确)"
69        };
70        
71        let description = format!("搜索文本内容(错误消息、注释、字符串等)。
72
73适用场景:
74- 搜错误信息(如 'failed to connect'、'panic')
75- 找注释内容(如 'TODO'、'FIXME')
76- 搜字符串常量、日志文本
77- 搜索任意文本模式(正则表达式)
78
79{}
80
81优先级:[中] 文本搜索首选工具", not_applicable);
82        
83        ToolDefinition {
84            name: "grep".to_string(),
85            description,
86            parameters: json!({
87                "type": "object",
88                "properties": {
89                    "pattern": {
90                        "type": "string",
91                        "description": "要搜索的正则表达式模式"
92                    },
93                    "path": {
94                        "type": "string",
95                        "description": "搜索的文件或目录(默认当前目录)"
96                    },
97                    "glob": {
98                        "type": "string",
99                        "description": "Glob 文件过滤模式(如 '*.ts'、'**/*.rs')"
100                    },
101                    "type": {
102                        "type": "string",
103                        "enum": ["js", "ts", "py", "rs", "go", "java", "c", "cpp", "md", "json", "yaml", "html", "css"],
104                        "description": "按文件类型搜索(映射到常用扩展名)"
105                    },
106                    "output_mode": {
107                        "type": "string",
108                        "enum": ["content", "files_with_matches", "count"],
109                        "default": "content",
110                        "description": "输出模式:'content' 显示匹配行,'files_with_matches' 列出文件,'count' 显示匹配数"
111                    },
112                    "-i": {
113                        "type": "boolean",
114                        "default": false,
115                        "description": "忽略大小写"
116                    },
117                    "-n": {
118                        "type": "boolean",
119                        "default": true,
120                        "description": "显示行号"
121                    },
122                    "-C": {
123                        "type": "integer",
124                        "default": 0,
125                        "description": "匹配行前后显示的上下文行数"
126                    },
127                    "head_limit": {
128                        "type": "integer",
129                        "default": 100,
130                        "description": "最大返回结果数"
131                    }
132                },
133                "required": ["pattern"]
134            }),
135            ..Default::default()
136        }
137    }
138    
139    fn definition(&self) -> ToolDefinition {
140        self.definition_with_context(&ToolContext::default())
141    }
142
143    async fn execute(&self, params: Value) -> Result<String> {
144        let opts = GrepOptions::from_params(&params)?;
145
146        tokio::task::spawn_blocking(move || grep_search(&opts)).await?
147    }
148}
149
150/// File type to extension mapping
151fn get_extensions_for_type(file_type: &str) -> Vec<&'static str> {
152    match file_type {
153        "js" => vec!["js", "jsx", "mjs", "cjs"],
154        "ts" => vec!["ts", "tsx", "mts", "cts"],
155        "py" => vec!["py", "pyw", "pyi"],
156        "rs" => vec!["rs"],
157        "go" => vec!["go"],
158        "java" => vec!["java"],
159        "c" => vec!["c", "h"],
160        "cpp" => vec!["cpp", "cc", "cxx", "hpp", "hh", "hxx"],
161        "md" => vec!["md", "markdown"],
162        "json" => vec!["json", "json5", "jsonc"],
163        "yaml" => vec!["yaml", "yml"],
164        "html" => vec!["html", "htm", "xhtml"],
165        "css" => vec!["css", "scss", "sass", "less"],
166        _ => vec![],
167    }
168}
169
170fn grep_search(opts: &GrepOptions) -> Result<String> {
171    use std::fs;
172    use std::path::Path;
173
174    // Build regex with case-insensitive option
175    let regex_pattern = if opts.case_insensitive {
176        regex::RegexBuilder::new(&opts.pattern)
177            .case_insensitive(true)
178            .build()?
179    } else {
180        regex::Regex::new(&opts.pattern)?
181    };
182
183    let root = Path::new(&opts.path);
184    let mut results: Vec<String> = Vec::new();
185    let mut match_count = 0;
186    let mut files_with_matches: Vec<String> = Vec::new();
187
188    // Get file extensions for type filter
189    let type_extensions = opts.file_type.as_deref().map(get_extensions_for_type);
190
191    let entries = collect_grep_files(
192        root,
193        opts.glob_pattern.as_deref(),
194        type_extensions.as_deref(),
195    )?;
196
197    for file_path in entries {
198        if results.len() >= opts.head_limit && opts.output_mode == "content" {
199            results.push(format!("... (limited to {} results)", opts.head_limit));
200            break;
201        }
202
203        let content = match fs::read_to_string(&file_path) {
204            Ok(c) => c,
205            Err(_) => continue,
206        };
207
208        let lines: Vec<&str> = content.lines().collect();
209        let mut file_has_match = false;
210        let mut file_match_count = 0;
211
212        for (line_idx, line) in lines.iter().enumerate() {
213            if regex_pattern.is_match(line) {
214                file_has_match = true;
215                file_match_count += 1;
216                match_count += 1;
217
218                if opts.output_mode == "content" && results.len() < opts.head_limit {
219                    // Add context lines before the match
220                    if opts.context_lines > 0 {
221                        let start_ctx = line_idx.saturating_sub(opts.context_lines);
222                        for (ctx_idx, ctx_line) in lines
223                            .iter()
224                            .enumerate()
225                            .skip(start_ctx)
226                            .take(line_idx - start_ctx)
227                        {
228                            results.push(format_line(
229                                &file_path,
230                                ctx_idx + 1,
231                                ctx_line,
232                                opts.show_line_numbers,
233                                true,
234                            ));
235                        }
236                    }
237
238                    // Add the matching line
239                    results.push(format_line(
240                        &file_path,
241                        line_idx + 1,
242                        line,
243                        opts.show_line_numbers,
244                        false,
245                    ));
246
247                    // Add context lines after the match
248                    if opts.context_lines > 0 {
249                        let end_ctx = (line_idx + opts.context_lines).min(lines.len() - 1);
250                        for (ctx_idx, ctx_line) in lines
251                            .iter()
252                            .enumerate()
253                            .skip(line_idx + 1)
254                            .take(end_ctx - line_idx)
255                        {
256                            results.push(format_line(
257                                &file_path,
258                                ctx_idx + 1,
259                                ctx_line,
260                                opts.show_line_numbers,
261                                true,
262                            ));
263                        }
264                    }
265                }
266            }
267        }
268
269        if file_has_match && opts.output_mode == "files_with_matches" {
270            files_with_matches.push(file_path.display().to_string());
271        }
272
273        if opts.output_mode == "count" && file_match_count > 0 {
274            results.push(format!(
275                "{}: {} matches",
276                file_path.display(),
277                file_match_count
278            ));
279        }
280    }
281
282    // Format output based on mode
283    match opts.output_mode.as_str() {
284        "files_with_matches" => {
285            if files_with_matches.is_empty() {
286                Ok("No files matched.".to_string())
287            } else {
288                Ok(files_with_matches.join("\n"))
289            }
290        }
291        "count" => {
292            if results.is_empty() {
293                Ok("No matches found.".to_string())
294            } else {
295                Ok(format!(
296                    "Total: {} matches\n{}",
297                    match_count,
298                    results.join("\n")
299                ))
300            }
301        }
302        _ => {
303            // content
304            if results.is_empty() {
305                Ok("No matches found.".to_string())
306            } else {
307                Ok(results.join("\n"))
308            }
309        }
310    }
311}
312
313/// Format a single line with optional line number and context marker.
314fn format_line(
315    file_path: &std::path::Path,
316    line_num: usize,
317    line: &str,
318    show_line_numbers: bool,
319    is_context: bool,
320) -> String {
321    let marker = if is_context { "-" } else { ":" };
322    if show_line_numbers {
323        format!(
324            "{}:{}{} {}",
325            file_path.display(),
326            line_num,
327            marker,
328            line.trim()
329        )
330    } else {
331        format!("{}{} {}", file_path.display(), marker, line.trim())
332    }
333}
334
335fn collect_grep_files(
336    root: &std::path::Path,
337    glob_pattern: Option<&str>,
338    type_extensions: Option<&[&str]>,
339) -> Result<Vec<std::path::PathBuf>> {
340    let mut files = Vec::new();
341
342    if root.is_file() {
343        files.push(root.to_path_buf());
344        return Ok(files);
345    }
346
347    // Build glob matcher
348    let glob_matcher = glob_pattern.map(glob::Pattern::new).transpose()?;
349
350    let walker = walkdir_grep(root)?;
351
352    for entry in walker {
353        let path = entry;
354
355        // Check glob pattern
356        if let Some(ref matcher) = glob_matcher {
357            let relative = path.strip_prefix(root).unwrap_or(&path);
358            let relative_str = relative.to_string_lossy();
359            if !matcher.matches(&relative_str) {
360                // Also try just the filename
361                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
362                    if !matcher.matches(name) {
363                        continue;
364                    }
365                } else {
366                    continue;
367                }
368            }
369        }
370
371        // Check file type extensions
372        if let Some(extensions) = type_extensions {
373            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
374            if !extensions.contains(&ext) {
375                continue;
376            }
377        }
378
379        files.push(path);
380    }
381
382    Ok(files)
383}
384
385fn walkdir_grep(root: &std::path::Path) -> Result<Vec<std::path::PathBuf>> {
386    use std::fs;
387
388    let mut files = Vec::new();
389    let mut stack = vec![root.to_path_buf()];
390
391    // Directories to skip
392    const SKIP_DIRS: &[&str] = &[
393        ".git",
394        ".svn",
395        ".hg",
396        "node_modules",
397        "vendor",
398        "target",
399        "build",
400        "dist",
401        "out",
402        ".cache",
403        ".npm",
404        ".cargo",
405        "__pycache__",
406        ".venv",
407        "venv",
408        ".idea",
409        ".vscode",
410    ];
411
412    while let Some(dir) = stack.pop() {
413        let entries = match fs::read_dir(&dir) {
414            Ok(e) => e,
415            Err(_) => continue,
416        };
417
418        for entry in entries.flatten() {
419            let path = entry.path();
420            let name = entry.file_name();
421            let name_str = name.to_string_lossy();
422
423            // Skip hidden files and known directories
424            if name_str.starts_with('.') || SKIP_DIRS.contains(&name_str.as_ref()) {
425                continue;
426            }
427
428            if path.is_dir() {
429                stack.push(path);
430            } else if path.is_file() {
431                files.push(path);
432            }
433        }
434    }
435
436    Ok(files)
437}