Skip to main content

matrixcode_core/tools/
search.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use serde_json::{Value, json};
4use tokio::time::{Duration, timeout};
5
6use super::{Tool, ToolDefinition, ToolContext};
7
8pub struct SearchTool;
9
10#[async_trait]
11impl Tool for SearchTool {
12    fn definition_with_context(&self, ctx: &ToolContext) -> ToolDefinition {
13        // Dynamic description based on CodeGraph availability
14        let prefer_section = if ctx.codegraph_available {
15            "【优先使用 code_search 的场景】
16- 查找函数/类/方法/变量的定义 → code_search(快10-100倍)
17- 查找符号的调用关系 → code_callers/callees"
18        } else {
19            "【search 的适用场景】
20- 搜索非代码文本(错误消息、日志文本)
21- 搜索字符串常量
22- 需要搜索特定文件类型的内容"
23        };
24        
25        let description = format!("在文件内容中搜索匹配的文本模式。
26
27适用场景:
28- 搜索文本内容(错误消息、日志、注释)
29- 搜索字符串常量
30- 不确定目标是否是代码符号
31
32{}", prefer_section);
33        
34        ToolDefinition {
35            name: "search".to_string(),
36            description,
37            parameters: json!({
38                "type": "object",
39                "properties": {
40                    "pattern": {
41                        "type": "string",
42                        "description": "要搜索的正则表达式模式"
43                    },
44                    "path": {
45                        "type": "string",
46                        "description": "搜索的目录或文件路径(默认 '.')"
47                    },
48                    "glob": {
49                        "type": "string",
50                        "description": "文件过滤的 glob 模式(如 '*.rs')"
51                    }
52                },
53                "required": ["pattern"]
54            }),
55            ..Default::default()
56        }
57    }
58    
59    fn definition(&self) -> ToolDefinition {
60        self.definition_with_context(&ToolContext::default())
61    }
62
63    async fn execute(&self, params: Value) -> Result<String> {
64        let pattern = params["pattern"]
65            .as_str()
66            .ok_or_else(|| anyhow::anyhow!("missing 'pattern'"))?;
67        let path = params["path"].as_str().unwrap_or(".");
68        let glob_pattern = params["glob"].as_str();
69
70        let pattern = pattern.to_string();
71        let path = path.to_string();
72        let glob_pattern = glob_pattern.map(|s| s.to_string());
73
74        // Use timeout to prevent hanging on large directories
75        timeout(Duration::from_secs(30), async {
76            tokio::task::spawn_blocking(move || {
77                search_files(&pattern, &path, glob_pattern.as_deref())
78            })
79            .await?
80        })
81        .await
82        .map_err(|_| anyhow::anyhow!("Search timeout (30s) - directory may be too large"))?
83    }
84}
85
86/// Maximum files to search before stopping.
87const MAX_FILES: usize = 500;
88
89fn search_files(pattern: &str, path: &str, glob_pattern: Option<&str>) -> Result<String> {
90    use std::fs;
91    use std::path::Path;
92
93    let regex = regex::Regex::new(pattern)?;
94    let mut results = Vec::new();
95    let root = Path::new(path);
96
97    let entries = collect_files(root, glob_pattern)?;
98
99    for file_path in entries {
100        // Skip very large files (> 1MB)
101        match fs::metadata(&file_path) {
102            Ok(meta) if meta.len() > 1_000_000 => continue,
103            Err(_) => continue,
104            Ok(_) => {}
105        }
106
107        let content = match fs::read_to_string(&file_path) {
108            Ok(c) => c,
109            Err(_) => continue,
110        };
111
112        for (line_num, line) in content.lines().enumerate() {
113            if regex.is_match(line) {
114                results.push(format!(
115                    "{}:{}: {}",
116                    file_path.display(),
117                    line_num + 1,
118                    line.trim()
119                ));
120            }
121        }
122
123        if results.len() > 200 {
124            results.push("... (truncated, too many results)".to_string());
125            break;
126        }
127    }
128
129    if results.is_empty() {
130        Ok("No matches found.".to_string())
131    } else {
132        Ok(results.join("\n"))
133    }
134}
135
136fn collect_files(
137    root: &std::path::Path,
138    glob_pattern: Option<&str>,
139) -> Result<Vec<std::path::PathBuf>> {
140    let mut files = Vec::new();
141
142    if root.is_file() {
143        files.push(root.to_path_buf());
144        return Ok(files);
145    }
146
147    let glob_matcher = glob_pattern.map(glob::Pattern::new).transpose()?;
148
149    let mut stack = vec![root.to_path_buf()];
150
151    while let Some(dir) = stack.pop() {
152        let entries = match std::fs::read_dir(&dir) {
153            Ok(e) => e,
154            Err(_) => continue,
155        };
156
157        for entry in entries.flatten() {
158            let path = entry.path();
159            let name = entry.file_name();
160            let name_str = name.to_string_lossy();
161
162            // Skip hidden dirs and common large directories
163            if name_str.starts_with('.')
164                || name_str == "node_modules"
165                || name_str == "target"
166                || name_str == "dist"
167                || name_str == "build"
168                || name_str == ".git"
169            {
170                continue;
171            }
172
173            // Check glob pattern for files
174            if let Some(ref matcher) = glob_matcher
175                && path.is_file()
176                && let Some(name) = path.file_name().and_then(|n| n.to_str())
177                && !matcher.matches(name)
178            {
179                continue;
180            }
181
182            if path.is_dir() {
183                stack.push(path);
184            } else if path.is_file() {
185                files.push(path);
186                // Limit number of files to search
187                if files.len() >= MAX_FILES {
188                    return Ok(files);
189                }
190            }
191        }
192    }
193
194    Ok(files)
195}