Skip to main content

matrixcode_core/tools/
search.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use serde_json::{Value, json};
4use tokio::time::{Duration, timeout};
5
6use super::{Tool, ToolContext, ToolDefinition};
7
8pub struct SearchTool;
9
10#[async_trait]
11impl Tool for SearchTool {
12    fn definition_with_context(&self, ctx: &ToolContext) -> ToolDefinition {
13        // Dynamic description based on CodeGraph availability
14        let prefer_section = if ctx.codegraph_available {
15            "【优先使用 code_search 的场景】
16- 查找函数/类/方法/变量的定义 → code_search(快10-100倍)
17- 查找符号的调用关系 → code_callers/callees"
18        } else {
19            "【search 的适用场景】
20- 搜索非代码文本(错误消息、日志文本)
21- 搜索字符串常量
22- 需要搜索特定文件类型的内容"
23        };
24
25        let description = format!(
26            "在文件内容中搜索匹配的文本模式。
27
28适用场景:
29- 搜索文本内容(错误消息、日志、注释)
30- 搜索字符串常量
31- 不确定目标是否是代码符号
32
33{}",
34            prefer_section
35        );
36
37        ToolDefinition {
38            name: "search".to_string(),
39            description,
40            parameters: json!({
41                "type": "object",
42                "properties": {
43                    "pattern": {
44                        "type": "string",
45                        "description": "要搜索的正则表达式模式"
46                    },
47                    "path": {
48                        "type": "string",
49                        "description": "搜索的目录或文件路径(默认 '.')"
50                    },
51                    "glob": {
52                        "type": "string",
53                        "description": "文件过滤的 glob 模式(如 '*.rs')"
54                    }
55                },
56                "required": ["pattern"]
57            }),
58            ..Default::default()
59        }
60    }
61
62    fn definition(&self) -> ToolDefinition {
63        self.definition_with_context(&ToolContext::default())
64    }
65
66    async fn execute(&self, params: Value) -> Result<String> {
67        let pattern = params["pattern"]
68            .as_str()
69            .ok_or_else(|| anyhow::anyhow!("missing 'pattern'"))?;
70        let path = params["path"].as_str().unwrap_or(".");
71        let glob_pattern = params["glob"].as_str();
72
73        let pattern = pattern.to_string();
74        let path = path.to_string();
75        let glob_pattern = glob_pattern.map(|s| s.to_string());
76
77        // Use timeout to prevent hanging on large directories
78        timeout(Duration::from_secs(30), async {
79            tokio::task::spawn_blocking(move || {
80                search_files(&pattern, &path, glob_pattern.as_deref())
81            })
82            .await?
83        })
84        .await
85        .map_err(|_| anyhow::anyhow!("Search timeout (30s) - directory may be too large"))?
86    }
87}
88
89/// Maximum files to search before stopping.
90const MAX_FILES: usize = 500;
91
92fn search_files(pattern: &str, path: &str, glob_pattern: Option<&str>) -> Result<String> {
93    use std::fs;
94    use std::path::Path;
95
96    let regex = regex::Regex::new(pattern)?;
97    let mut results = Vec::new();
98    let root = Path::new(path);
99
100    let entries = collect_files(root, glob_pattern)?;
101
102    for file_path in entries {
103        // Skip very large files (> 1MB)
104        match fs::metadata(&file_path) {
105            Ok(meta) if meta.len() > 1_000_000 => continue,
106            Err(_) => continue,
107            Ok(_) => {}
108        }
109
110        let content = match fs::read_to_string(&file_path) {
111            Ok(c) => c,
112            Err(_) => continue,
113        };
114
115        for (line_num, line) in content.lines().enumerate() {
116            if regex.is_match(line) {
117                results.push(format!(
118                    "{}:{}: {}",
119                    file_path.display(),
120                    line_num + 1,
121                    line.trim()
122                ));
123            }
124        }
125
126        if results.len() > 200 {
127            results.push("... (truncated, too many results)".to_string());
128            break;
129        }
130    }
131
132    if results.is_empty() {
133        Ok("No matches found.".to_string())
134    } else {
135        Ok(results.join("\n"))
136    }
137}
138
139fn collect_files(
140    root: &std::path::Path,
141    glob_pattern: Option<&str>,
142) -> Result<Vec<std::path::PathBuf>> {
143    let mut files = Vec::new();
144
145    if root.is_file() {
146        files.push(root.to_path_buf());
147        return Ok(files);
148    }
149
150    let glob_matcher = glob_pattern.map(glob::Pattern::new).transpose()?;
151
152    let mut stack = vec![root.to_path_buf()];
153
154    while let Some(dir) = stack.pop() {
155        let entries = match std::fs::read_dir(&dir) {
156            Ok(e) => e,
157            Err(_) => continue,
158        };
159
160        for entry in entries.flatten() {
161            let path = entry.path();
162            let name = entry.file_name();
163            let name_str = name.to_string_lossy();
164
165            // Skip hidden dirs and common large directories
166            if name_str.starts_with('.')
167                || name_str == "node_modules"
168                || name_str == "target"
169                || name_str == "dist"
170                || name_str == "build"
171                || name_str == ".git"
172            {
173                continue;
174            }
175
176            // Check glob pattern for files
177            if let Some(ref matcher) = glob_matcher
178                && path.is_file()
179                && let Some(name) = path.file_name().and_then(|n| n.to_str())
180                && !matcher.matches(name)
181            {
182                continue;
183            }
184
185            if path.is_dir() {
186                stack.push(path);
187            } else if path.is_file() {
188                files.push(path);
189                // Limit number of files to search
190                if files.len() >= MAX_FILES {
191                    return Ok(files);
192                }
193            }
194        }
195    }
196
197    Ok(files)
198}