Skip to main content

matrixcode_core/tools/
search.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use serde_json::{Value, json};
4use tokio::time::{Duration, timeout};
5
6use super::{Tool, ToolDefinition};
7
8pub struct SearchTool;
9
10#[async_trait]
11impl Tool for SearchTool {
12    fn definition(&self) -> ToolDefinition {
13        ToolDefinition {
14            name: "search".to_string(),
15            description: r#"在文件内容中搜索匹配的文本模式。
16
17适用场景:
18- 搜索文本内容(错误消息、日志、注释)
19- 搜索字符串常量
20- 不确定目标是否是代码符号
21
22【优先使用 code_search 的场景】
23如果 CodeGraph 可用(系统提示中有 CodeGraph 工具),以下场景应该用 code_search:
24- 查找函数/类/方法/变量的定义 → code_search(快 10-100 倍)
25- 查找符号的调用关系 → code_callers/callees
26- 查找谁调用了某函数 → code_callers
27
28【使用 search 的场景】
29- 搜索非代码文本(错误消息、日志文本)
30- CodeGraph 未初始化或不可用
31- 需要搜索特定文件类型的内容"#.to_string(),
32            parameters: json!({
33                "type": "object",
34                "properties": {
35                    "pattern": {
36                        "type": "string",
37                        "description": "要搜索的正则表达式模式"
38                    },
39                    "path": {
40                        "type": "string",
41                        "description": "搜索的目录或文件路径(默认 '.')"
42                    },
43                    "glob": {
44                        "type": "string",
45                        "description": "文件过滤的 glob 模式(如 '*.rs')"
46                    }
47                },
48                "required": ["pattern"]
49            }),
50            ..Default::default()
51        }
52    }
53
54    async fn execute(&self, params: Value) -> Result<String> {
55        let pattern = params["pattern"]
56            .as_str()
57            .ok_or_else(|| anyhow::anyhow!("missing 'pattern'"))?;
58        let path = params["path"].as_str().unwrap_or(".");
59        let glob_pattern = params["glob"].as_str();
60
61        let pattern = pattern.to_string();
62        let path = path.to_string();
63        let glob_pattern = glob_pattern.map(|s| s.to_string());
64
65        // Use timeout to prevent hanging on large directories
66        timeout(Duration::from_secs(30), async {
67            tokio::task::spawn_blocking(move || {
68                search_files(&pattern, &path, glob_pattern.as_deref())
69            })
70            .await?
71        })
72        .await
73        .map_err(|_| anyhow::anyhow!("Search timeout (30s) - directory may be too large"))?
74    }
75}
76
77/// Maximum files to search before stopping.
78const MAX_FILES: usize = 500;
79
80fn search_files(pattern: &str, path: &str, glob_pattern: Option<&str>) -> Result<String> {
81    use std::fs;
82    use std::path::Path;
83
84    let regex = regex::Regex::new(pattern)?;
85    let mut results = Vec::new();
86    let root = Path::new(path);
87
88    let entries = collect_files(root, glob_pattern)?;
89
90    for file_path in entries {
91        // Skip very large files (> 1MB)
92        match fs::metadata(&file_path) {
93            Ok(meta) if meta.len() > 1_000_000 => continue,
94            Err(_) => continue,
95            Ok(_) => {}
96        }
97
98        let content = match fs::read_to_string(&file_path) {
99            Ok(c) => c,
100            Err(_) => continue,
101        };
102
103        for (line_num, line) in content.lines().enumerate() {
104            if regex.is_match(line) {
105                results.push(format!(
106                    "{}:{}: {}",
107                    file_path.display(),
108                    line_num + 1,
109                    line.trim()
110                ));
111            }
112        }
113
114        if results.len() > 200 {
115            results.push("... (truncated, too many results)".to_string());
116            break;
117        }
118    }
119
120    if results.is_empty() {
121        Ok("No matches found.".to_string())
122    } else {
123        Ok(results.join("\n"))
124    }
125}
126
127fn collect_files(
128    root: &std::path::Path,
129    glob_pattern: Option<&str>,
130) -> Result<Vec<std::path::PathBuf>> {
131    let mut files = Vec::new();
132
133    if root.is_file() {
134        files.push(root.to_path_buf());
135        return Ok(files);
136    }
137
138    let glob_matcher = glob_pattern.map(glob::Pattern::new).transpose()?;
139
140    let mut stack = vec![root.to_path_buf()];
141
142    while let Some(dir) = stack.pop() {
143        let entries = match std::fs::read_dir(&dir) {
144            Ok(e) => e,
145            Err(_) => continue,
146        };
147
148        for entry in entries.flatten() {
149            let path = entry.path();
150            let name = entry.file_name();
151            let name_str = name.to_string_lossy();
152
153            // Skip hidden dirs and common large directories
154            if name_str.starts_with('.')
155                || name_str == "node_modules"
156                || name_str == "target"
157                || name_str == "dist"
158                || name_str == "build"
159                || name_str == ".git"
160            {
161                continue;
162            }
163
164            // Check glob pattern for files
165            if let Some(ref matcher) = glob_matcher
166                && path.is_file()
167                && let Some(name) = path.file_name().and_then(|n| n.to_str())
168                && !matcher.matches(name)
169            {
170                continue;
171            }
172
173            if path.is_dir() {
174                stack.push(path);
175            } else if path.is_file() {
176                files.push(path);
177                // Limit number of files to search
178                if files.len() >= MAX_FILES {
179                    return Ok(files);
180                }
181            }
182        }
183    }
184
185    Ok(files)
186}