Skip to main content

matrixcode_core/tools/
grep.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use serde_json::{Value, json};
4
5use super::{Tool, ToolContext, ToolDefinition};
6
7/// Grep search options bundled into a single struct.
8struct GrepOptions {
9    pattern: String,
10    path: String,
11    glob_pattern: Option<String>,
12    file_type: Option<String>,
13    output_mode: String,
14    case_insensitive: bool,
15    show_line_numbers: bool,
16    context_lines: usize,
17    head_limit: usize,
18}
19
20impl GrepOptions {
21    fn from_params(params: &Value) -> Result<Self> {
22        let pattern = params["pattern"]
23            .as_str()
24            .ok_or_else(|| anyhow::anyhow!("missing 'pattern'"))?
25            .to_string();
26        let path = params["path"].as_str().unwrap_or(".").to_string();
27        let glob_pattern = params["glob"].as_str().map(|s| s.to_string());
28        let file_type = params["type"].as_str().map(|s| s.to_string());
29        let output_mode = params["output_mode"]
30            .as_str()
31            .unwrap_or("content")
32            .to_string();
33        let case_insensitive = params["-i"].as_bool().unwrap_or(false);
34        let show_line_numbers = params["-n"].as_bool().unwrap_or(true);
35        let context_lines = params["-C"].as_u64().unwrap_or(0) as usize;
36        let head_limit = params["head_limit"].as_u64().unwrap_or(100) as usize;
37
38        Ok(Self {
39            pattern,
40            path,
41            glob_pattern,
42            file_type,
43            output_mode,
44            case_insensitive,
45            show_line_numbers,
46            context_lines,
47            head_limit,
48        })
49    }
50}
51
52/// High-performance grep tool with advanced filtering options
53pub struct GrepTool;
54
55#[async_trait]
56impl Tool for GrepTool {
57    fn definition_with_context(&self, ctx: &ToolContext) -> ToolDefinition {
58        // Dynamic description based on CodeGraph availability
59        let not_applicable = if ctx.codegraph_available {
60            "不适用场景:
61- ❌ 找函数定义 → code_search(快10-100倍)
62- ❌ 找类定义、变量声明 → code_search
63- ❌ 查谁调用了某方法 → code_callers"
64        } else {
65            "不适用场景:
66- ❌ 找函数定义 → 用 grep 搜索 'fn func_name' 或 'class ClassName'
67- ❌ 找类定义、变量声明 → 用 grep 搜索 class/struct 名
68- ❌ 查调用关系 → 用 grep 搜索函数名(不精确)"
69        };
70
71        let description = format!(
72            "搜索文本内容(错误消息、注释、字符串等)。
73
74适用场景:
75- 搜错误信息(如 'failed to connect'、'panic')
76- 找注释内容(如 'TODO'、'FIXME')
77- 搜字符串常量、日志文本
78- 搜索任意文本模式(正则表达式)
79
80{}
81
82优先级:[中] 文本搜索首选工具",
83            not_applicable
84        );
85
86        ToolDefinition {
87            name: "grep".to_string(),
88            description,
89            parameters: json!({
90                "type": "object",
91                "properties": {
92                    "pattern": {
93                        "type": "string",
94                        "description": "要搜索的正则表达式模式"
95                    },
96                    "path": {
97                        "type": "string",
98                        "description": "搜索的文件或目录(默认当前目录)"
99                    },
100                    "glob": {
101                        "type": "string",
102                        "description": "Glob 文件过滤模式(如 '*.ts'、'**/*.rs')"
103                    },
104                    "type": {
105                        "type": "string",
106                        "enum": ["js", "ts", "py", "rs", "go", "java", "c", "cpp", "md", "json", "yaml", "html", "css"],
107                        "description": "按文件类型搜索(映射到常用扩展名)"
108                    },
109                    "output_mode": {
110                        "type": "string",
111                        "enum": ["content", "files_with_matches", "count"],
112                        "default": "content",
113                        "description": "输出模式:'content' 显示匹配行,'files_with_matches' 列出文件,'count' 显示匹配数"
114                    },
115                    "-i": {
116                        "type": "boolean",
117                        "default": false,
118                        "description": "忽略大小写"
119                    },
120                    "-n": {
121                        "type": "boolean",
122                        "default": true,
123                        "description": "显示行号"
124                    },
125                    "-C": {
126                        "type": "integer",
127                        "default": 0,
128                        "description": "匹配行前后显示的上下文行数"
129                    },
130                    "head_limit": {
131                        "type": "integer",
132                        "default": 100,
133                        "description": "最大返回结果数"
134                    }
135                },
136                "required": ["pattern"]
137            }),
138            ..Default::default()
139        }
140    }
141
142    fn definition(&self) -> ToolDefinition {
143        self.definition_with_context(&ToolContext::default())
144    }
145
146    async fn execute(&self, params: Value) -> Result<String> {
147        let opts = GrepOptions::from_params(&params)?;
148
149        tokio::task::spawn_blocking(move || grep_search(&opts)).await?
150    }
151}
152
153/// File type to extension mapping
154fn get_extensions_for_type(file_type: &str) -> Vec<&'static str> {
155    match file_type {
156        "js" => vec!["js", "jsx", "mjs", "cjs"],
157        "ts" => vec!["ts", "tsx", "mts", "cts"],
158        "py" => vec!["py", "pyw", "pyi"],
159        "rs" => vec!["rs"],
160        "go" => vec!["go"],
161        "java" => vec!["java"],
162        "c" => vec!["c", "h"],
163        "cpp" => vec!["cpp", "cc", "cxx", "hpp", "hh", "hxx"],
164        "md" => vec!["md", "markdown"],
165        "json" => vec!["json", "json5", "jsonc"],
166        "yaml" => vec!["yaml", "yml"],
167        "html" => vec!["html", "htm", "xhtml"],
168        "css" => vec!["css", "scss", "sass", "less"],
169        _ => vec![],
170    }
171}
172
173fn grep_search(opts: &GrepOptions) -> Result<String> {
174    use std::fs;
175    use std::path::Path;
176
177    // Build regex with case-insensitive option
178    let regex_pattern = if opts.case_insensitive {
179        regex::RegexBuilder::new(&opts.pattern)
180            .case_insensitive(true)
181            .build()?
182    } else {
183        regex::Regex::new(&opts.pattern)?
184    };
185
186    let root = Path::new(&opts.path);
187    let mut results: Vec<String> = Vec::new();
188    let mut match_count = 0;
189    let mut files_with_matches: Vec<String> = Vec::new();
190
191    // Get file extensions for type filter
192    let type_extensions = opts.file_type.as_deref().map(get_extensions_for_type);
193
194    let entries = collect_grep_files(
195        root,
196        opts.glob_pattern.as_deref(),
197        type_extensions.as_deref(),
198    )?;
199
200    for file_path in entries {
201        if results.len() >= opts.head_limit && opts.output_mode == "content" {
202            results.push(format!("... (limited to {} results)", opts.head_limit));
203            break;
204        }
205
206        let content = match fs::read_to_string(&file_path) {
207            Ok(c) => c,
208            Err(_) => continue,
209        };
210
211        let lines: Vec<&str> = content.lines().collect();
212        let mut file_has_match = false;
213        let mut file_match_count = 0;
214
215        for (line_idx, line) in lines.iter().enumerate() {
216            if regex_pattern.is_match(line) {
217                file_has_match = true;
218                file_match_count += 1;
219                match_count += 1;
220
221                if opts.output_mode == "content" && results.len() < opts.head_limit {
222                    // Add context lines before the match
223                    if opts.context_lines > 0 {
224                        let start_ctx = line_idx.saturating_sub(opts.context_lines);
225                        for (ctx_idx, ctx_line) in lines
226                            .iter()
227                            .enumerate()
228                            .skip(start_ctx)
229                            .take(line_idx - start_ctx)
230                        {
231                            results.push(format_line(
232                                &file_path,
233                                ctx_idx + 1,
234                                ctx_line,
235                                opts.show_line_numbers,
236                                true,
237                            ));
238                        }
239                    }
240
241                    // Add the matching line
242                    results.push(format_line(
243                        &file_path,
244                        line_idx + 1,
245                        line,
246                        opts.show_line_numbers,
247                        false,
248                    ));
249
250                    // Add context lines after the match
251                    if opts.context_lines > 0 {
252                        let end_ctx = (line_idx + opts.context_lines).min(lines.len() - 1);
253                        for (ctx_idx, ctx_line) in lines
254                            .iter()
255                            .enumerate()
256                            .skip(line_idx + 1)
257                            .take(end_ctx - line_idx)
258                        {
259                            results.push(format_line(
260                                &file_path,
261                                ctx_idx + 1,
262                                ctx_line,
263                                opts.show_line_numbers,
264                                true,
265                            ));
266                        }
267                    }
268                }
269            }
270        }
271
272        if file_has_match && opts.output_mode == "files_with_matches" {
273            files_with_matches.push(file_path.display().to_string());
274        }
275
276        if opts.output_mode == "count" && file_match_count > 0 {
277            results.push(format!(
278                "{}: {} matches",
279                file_path.display(),
280                file_match_count
281            ));
282        }
283    }
284
285    // Format output based on mode
286    match opts.output_mode.as_str() {
287        "files_with_matches" => {
288            if files_with_matches.is_empty() {
289                Ok("No files matched.".to_string())
290            } else {
291                Ok(files_with_matches.join("\n"))
292            }
293        }
294        "count" => {
295            if results.is_empty() {
296                Ok("No matches found.".to_string())
297            } else {
298                Ok(format!(
299                    "Total: {} matches\n{}",
300                    match_count,
301                    results.join("\n")
302                ))
303            }
304        }
305        _ => {
306            // content
307            if results.is_empty() {
308                Ok("No matches found.".to_string())
309            } else {
310                Ok(results.join("\n"))
311            }
312        }
313    }
314}
315
316/// Format a single line with optional line number and context marker.
317fn format_line(
318    file_path: &std::path::Path,
319    line_num: usize,
320    line: &str,
321    show_line_numbers: bool,
322    is_context: bool,
323) -> String {
324    let marker = if is_context { "-" } else { ":" };
325    if show_line_numbers {
326        format!(
327            "{}:{}{} {}",
328            file_path.display(),
329            line_num,
330            marker,
331            line.trim()
332        )
333    } else {
334        format!("{}{} {}", file_path.display(), marker, line.trim())
335    }
336}
337
338fn collect_grep_files(
339    root: &std::path::Path,
340    glob_pattern: Option<&str>,
341    type_extensions: Option<&[&str]>,
342) -> Result<Vec<std::path::PathBuf>> {
343    let mut files = Vec::new();
344
345    if root.is_file() {
346        files.push(root.to_path_buf());
347        return Ok(files);
348    }
349
350    // Build glob matcher
351    let glob_matcher = glob_pattern.map(glob::Pattern::new).transpose()?;
352
353    let walker = walkdir_grep(root)?;
354
355    for entry in walker {
356        let path = entry;
357
358        // Check glob pattern
359        if let Some(ref matcher) = glob_matcher {
360            let relative = path.strip_prefix(root).unwrap_or(&path);
361            let relative_str = relative.to_string_lossy();
362            if !matcher.matches(&relative_str) {
363                // Also try just the filename
364                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
365                    if !matcher.matches(name) {
366                        continue;
367                    }
368                } else {
369                    continue;
370                }
371            }
372        }
373
374        // Check file type extensions
375        if let Some(extensions) = type_extensions {
376            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
377            if !extensions.contains(&ext) {
378                continue;
379            }
380        }
381
382        files.push(path);
383    }
384
385    Ok(files)
386}
387
388fn walkdir_grep(root: &std::path::Path) -> Result<Vec<std::path::PathBuf>> {
389    use std::fs;
390
391    let mut files = Vec::new();
392    let mut stack = vec![root.to_path_buf()];
393
394    // Directories to skip
395    const SKIP_DIRS: &[&str] = &[
396        ".git",
397        ".svn",
398        ".hg",
399        "node_modules",
400        "vendor",
401        "target",
402        "build",
403        "dist",
404        "out",
405        ".cache",
406        ".npm",
407        ".cargo",
408        "__pycache__",
409        ".venv",
410        "venv",
411        ".idea",
412        ".vscode",
413    ];
414
415    while let Some(dir) = stack.pop() {
416        let entries = match fs::read_dir(&dir) {
417            Ok(e) => e,
418            Err(_) => continue,
419        };
420
421        for entry in entries.flatten() {
422            let path = entry.path();
423            let name = entry.file_name();
424            let name_str = name.to_string_lossy();
425
426            // Skip hidden files and known directories
427            if name_str.starts_with('.') || SKIP_DIRS.contains(&name_str.as_ref()) {
428                continue;
429            }
430
431            if path.is_dir() {
432                stack.push(path);
433            } else if path.is_file() {
434                files.push(path);
435            }
436        }
437    }
438
439    Ok(files)
440}