Skip to main content

matrixcode_core/tools/
grep.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use serde_json::{Value, json};
4
5use super::{Tool, ToolDefinition};
6
7/// Grep search options bundled into a single struct.
8struct GrepOptions {
9    pattern: String,
10    path: String,
11    glob_pattern: Option<String>,
12    file_type: Option<String>,
13    output_mode: String,
14    case_insensitive: bool,
15    show_line_numbers: bool,
16    context_lines: usize,
17    head_limit: usize,
18}
19
20impl GrepOptions {
21    fn from_params(params: &Value) -> Result<Self> {
22        let pattern = params["pattern"]
23            .as_str()
24            .ok_or_else(|| anyhow::anyhow!("missing 'pattern'"))?
25            .to_string();
26        let path = params["path"].as_str().unwrap_or(".").to_string();
27        let glob_pattern = params["glob"].as_str().map(|s| s.to_string());
28        let file_type = params["type"].as_str().map(|s| s.to_string());
29        let output_mode = params["output_mode"]
30            .as_str()
31            .unwrap_or("content")
32            .to_string();
33        let case_insensitive = params["-i"].as_bool().unwrap_or(false);
34        let show_line_numbers = params["-n"].as_bool().unwrap_or(true);
35        let context_lines = params["-C"].as_u64().unwrap_or(0) as usize;
36        let head_limit = params["head_limit"].as_u64().unwrap_or(100) as usize;
37
38        Ok(Self {
39            pattern,
40            path,
41            glob_pattern,
42            file_type,
43            output_mode,
44            case_insensitive,
45            show_line_numbers,
46            context_lines,
47            head_limit,
48        })
49    }
50}
51
52/// High-performance grep tool with advanced filtering options
53pub struct GrepTool;
54
55#[async_trait]
56impl Tool for GrepTool {
57    fn definition(&self) -> ToolDefinition {
58        ToolDefinition {
59            name: "grep".to_string(),
60            description: "高性能内容搜索工具,适用于任意规模代码库。支持正则表达式、文件类型过滤和多种输出模式。".to_string(),
61            parameters: json!({
62                "type": "object",
63                "properties": {
64                    "pattern": {
65                        "type": "string",
66                        "description": "要搜索的正则表达式模式"
67                    },
68                    "path": {
69                        "type": "string",
70                        "description": "搜索的文件或目录(默认当前目录)"
71                    },
72                    "glob": {
73                        "type": "string",
74                        "description": "Glob 文件过滤模式(如 '*.ts'、'**/*.rs')"
75                    },
76                    "type": {
77                        "type": "string",
78                        "enum": ["js", "ts", "py", "rs", "go", "java", "c", "cpp", "md", "json", "yaml", "html", "css"],
79                        "description": "按文件类型搜索(映射到常用扩展名)"
80                    },
81                    "output_mode": {
82                        "type": "string",
83                        "enum": ["content", "files_with_matches", "count"],
84                        "default": "content",
85                        "description": "输出模式:'content' 显示匹配行,'files_with_matches' 列出文件,'count' 显示匹配数"
86                    },
87                    "-i": {
88                        "type": "boolean",
89                        "default": false,
90                        "description": "忽略大小写"
91                    },
92                    "-n": {
93                        "type": "boolean",
94                        "default": true,
95                        "description": "显示行号"
96                    },
97                    "-C": {
98                        "type": "integer",
99                        "default": 0,
100                        "description": "匹配行前后显示的上下文行数"
101                    },
102                    "head_limit": {
103                        "type": "integer",
104                        "default": 100,
105                        "description": "最大返回结果数"
106                    }
107                },
108                "required": ["pattern"]
109            }),
110            ..Default::default()
111        }
112    }
113
114    async fn execute(&self, params: Value) -> Result<String> {
115        let opts = GrepOptions::from_params(&params)?;
116
117        tokio::task::spawn_blocking(move || grep_search(&opts)).await?
118    }
119}
120
121/// File type to extension mapping
122fn get_extensions_for_type(file_type: &str) -> Vec<&'static str> {
123    match file_type {
124        "js" => vec!["js", "jsx", "mjs", "cjs"],
125        "ts" => vec!["ts", "tsx", "mts", "cts"],
126        "py" => vec!["py", "pyw", "pyi"],
127        "rs" => vec!["rs"],
128        "go" => vec!["go"],
129        "java" => vec!["java"],
130        "c" => vec!["c", "h"],
131        "cpp" => vec!["cpp", "cc", "cxx", "hpp", "hh", "hxx"],
132        "md" => vec!["md", "markdown"],
133        "json" => vec!["json", "json5", "jsonc"],
134        "yaml" => vec!["yaml", "yml"],
135        "html" => vec!["html", "htm", "xhtml"],
136        "css" => vec!["css", "scss", "sass", "less"],
137        _ => vec![],
138    }
139}
140
141fn grep_search(opts: &GrepOptions) -> Result<String> {
142    use std::fs;
143    use std::path::Path;
144
145    // Build regex with case-insensitive option
146    let regex_pattern = if opts.case_insensitive {
147        regex::RegexBuilder::new(&opts.pattern)
148            .case_insensitive(true)
149            .build()?
150    } else {
151        regex::Regex::new(&opts.pattern)?
152    };
153
154    let root = Path::new(&opts.path);
155    let mut results: Vec<String> = Vec::new();
156    let mut match_count = 0;
157    let mut files_with_matches: Vec<String> = Vec::new();
158
159    // Get file extensions for type filter
160    let type_extensions = opts.file_type.as_deref().map(get_extensions_for_type);
161
162    let entries = collect_grep_files(
163        root,
164        opts.glob_pattern.as_deref(),
165        type_extensions.as_deref(),
166    )?;
167
168    for file_path in entries {
169        if results.len() >= opts.head_limit && opts.output_mode == "content" {
170            results.push(format!("... (limited to {} results)", opts.head_limit));
171            break;
172        }
173
174        let content = match fs::read_to_string(&file_path) {
175            Ok(c) => c,
176            Err(_) => continue,
177        };
178
179        let lines: Vec<&str> = content.lines().collect();
180        let mut file_has_match = false;
181        let mut file_match_count = 0;
182
183        for (line_idx, line) in lines.iter().enumerate() {
184            if regex_pattern.is_match(line) {
185                file_has_match = true;
186                file_match_count += 1;
187                match_count += 1;
188
189                if opts.output_mode == "content" && results.len() < opts.head_limit {
190                    // Add context lines before the match
191                    if opts.context_lines > 0 {
192                        let start_ctx = line_idx.saturating_sub(opts.context_lines);
193                        for (ctx_idx, ctx_line) in lines
194                            .iter()
195                            .enumerate()
196                            .skip(start_ctx)
197                            .take(line_idx - start_ctx)
198                        {
199                            results.push(format_line(
200                                &file_path,
201                                ctx_idx + 1,
202                                ctx_line,
203                                opts.show_line_numbers,
204                                true,
205                            ));
206                        }
207                    }
208
209                    // Add the matching line
210                    results.push(format_line(
211                        &file_path,
212                        line_idx + 1,
213                        line,
214                        opts.show_line_numbers,
215                        false,
216                    ));
217
218                    // Add context lines after the match
219                    if opts.context_lines > 0 {
220                        let end_ctx = (line_idx + opts.context_lines).min(lines.len() - 1);
221                        for (ctx_idx, ctx_line) in lines
222                            .iter()
223                            .enumerate()
224                            .skip(line_idx + 1)
225                            .take(end_ctx - line_idx)
226                        {
227                            results.push(format_line(
228                                &file_path,
229                                ctx_idx + 1,
230                                ctx_line,
231                                opts.show_line_numbers,
232                                true,
233                            ));
234                        }
235                    }
236                }
237            }
238        }
239
240        if file_has_match && opts.output_mode == "files_with_matches" {
241            files_with_matches.push(file_path.display().to_string());
242        }
243
244        if opts.output_mode == "count" && file_match_count > 0 {
245            results.push(format!(
246                "{}: {} matches",
247                file_path.display(),
248                file_match_count
249            ));
250        }
251    }
252
253    // Format output based on mode
254    match opts.output_mode.as_str() {
255        "files_with_matches" => {
256            if files_with_matches.is_empty() {
257                Ok("No files matched.".to_string())
258            } else {
259                Ok(files_with_matches.join("\n"))
260            }
261        }
262        "count" => {
263            if results.is_empty() {
264                Ok("No matches found.".to_string())
265            } else {
266                Ok(format!(
267                    "Total: {} matches\n{}",
268                    match_count,
269                    results.join("\n")
270                ))
271            }
272        }
273        _ => {
274            // content
275            if results.is_empty() {
276                Ok("No matches found.".to_string())
277            } else {
278                Ok(results.join("\n"))
279            }
280        }
281    }
282}
283
284/// Format a single line with optional line number and context marker.
285fn format_line(
286    file_path: &std::path::Path,
287    line_num: usize,
288    line: &str,
289    show_line_numbers: bool,
290    is_context: bool,
291) -> String {
292    let marker = if is_context { "-" } else { ":" };
293    if show_line_numbers {
294        format!(
295            "{}:{}{} {}",
296            file_path.display(),
297            line_num,
298            marker,
299            line.trim()
300        )
301    } else {
302        format!("{}{} {}", file_path.display(), marker, line.trim())
303    }
304}
305
306fn collect_grep_files(
307    root: &std::path::Path,
308    glob_pattern: Option<&str>,
309    type_extensions: Option<&[&str]>,
310) -> Result<Vec<std::path::PathBuf>> {
311    let mut files = Vec::new();
312
313    if root.is_file() {
314        files.push(root.to_path_buf());
315        return Ok(files);
316    }
317
318    // Build glob matcher
319    let glob_matcher = glob_pattern.map(glob::Pattern::new).transpose()?;
320
321    let walker = walkdir_grep(root)?;
322
323    for entry in walker {
324        let path = entry;
325
326        // Check glob pattern
327        if let Some(ref matcher) = glob_matcher {
328            let relative = path.strip_prefix(root).unwrap_or(&path);
329            let relative_str = relative.to_string_lossy();
330            if !matcher.matches(&relative_str) {
331                // Also try just the filename
332                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
333                    if !matcher.matches(name) {
334                        continue;
335                    }
336                } else {
337                    continue;
338                }
339            }
340        }
341
342        // Check file type extensions
343        if let Some(extensions) = type_extensions {
344            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
345            if !extensions.contains(&ext) {
346                continue;
347            }
348        }
349
350        files.push(path);
351    }
352
353    Ok(files)
354}
355
356fn walkdir_grep(root: &std::path::Path) -> Result<Vec<std::path::PathBuf>> {
357    use std::fs;
358
359    let mut files = Vec::new();
360    let mut stack = vec![root.to_path_buf()];
361
362    // Directories to skip
363    const SKIP_DIRS: &[&str] = &[
364        ".git",
365        ".svn",
366        ".hg",
367        "node_modules",
368        "vendor",
369        "target",
370        "build",
371        "dist",
372        "out",
373        ".cache",
374        ".npm",
375        ".cargo",
376        "__pycache__",
377        ".venv",
378        "venv",
379        ".idea",
380        ".vscode",
381    ];
382
383    while let Some(dir) = stack.pop() {
384        let entries = match fs::read_dir(&dir) {
385            Ok(e) => e,
386            Err(_) => continue,
387        };
388
389        for entry in entries.flatten() {
390            let path = entry.path();
391            let name = entry.file_name();
392            let name_str = name.to_string_lossy();
393
394            // Skip hidden files and known directories
395            if name_str.starts_with('.') || SKIP_DIRS.contains(&name_str.as_ref()) {
396                continue;
397            }
398
399            if path.is_dir() {
400                stack.push(path);
401            } else if path.is_file() {
402                files.push(path);
403            }
404        }
405    }
406
407    Ok(files)
408}