codetether_agent/tool/
search.rs1use super::{Tool, ToolResult};
4use anyhow::Result;
5use async_trait::async_trait;
6use ignore::WalkBuilder;
7use regex::Regex;
8use serde_json::{Value, json};
9use std::time::{Duration, Instant};
10
11const DEFAULT_GREP_LIMIT: usize = 50;
12const MAX_GREP_LIMIT: usize = 500;
13const DEFAULT_GREP_TIMEOUT_SECS: u64 = 15;
14const MAX_GREP_TIMEOUT_SECS: u64 = 120;
15const DEFAULT_GREP_MAX_SCANNED_FILES: usize = 10_000;
16const DEFAULT_GREP_MAX_FILE_BYTES: u64 = 1024 * 1024;
17
18pub struct GrepTool;
20
21impl Default for GrepTool {
22 fn default() -> Self {
23 Self::new()
24 }
25}
26
27impl GrepTool {
28 pub fn new() -> Self {
29 Self
30 }
31}
32
33#[async_trait]
34impl Tool for GrepTool {
35 fn id(&self) -> &str {
36 "grep"
37 }
38
39 fn name(&self) -> &str {
40 "Grep Search"
41 }
42
43 fn description(&self) -> &str {
44 "grep(pattern: string, path?: string, is_regex?: bool, include?: string, limit?: int) - Search for text or regex patterns in files. Respects .gitignore by default."
45 }
46
47 fn parameters(&self) -> Value {
48 json!({
49 "type": "object",
50 "properties": {
51 "pattern": {
52 "type": "string",
53 "description": "The text or regex pattern to search for"
54 },
55 "path": {
56 "type": "string",
57 "description": "Directory or file to search in (default: current directory)"
58 },
59 "is_regex": {
60 "type": "boolean",
61 "description": "Whether the pattern is a regex (default: false)"
62 },
63 "include": {
64 "type": "string",
65 "description": "Glob pattern to include files (e.g., *.rs)"
66 },
67 "limit": {
68 "type": "integer",
69 "description": "Maximum number of matches to return"
70 },
71 "timeout_secs": {
72 "type": "integer",
73 "description": "Maximum search time in seconds before returning partial results"
74 }
75 },
76 "required": ["pattern"],
77 "example": {
78 "pattern": "fn main",
79 "path": "src/",
80 "include": "*.rs"
81 }
82 })
83 }
84
85 async fn execute(&self, args: Value) -> Result<ToolResult> {
86 let pattern = match args["pattern"].as_str() {
87 Some(p) => p,
88 None => {
89 return Ok(ToolResult::structured_error(
90 "INVALID_ARGUMENT",
91 "grep",
92 "pattern is required",
93 Some(vec!["pattern"]),
94 Some(json!({"pattern": "search text", "path": "src/"})),
95 ));
96 }
97 };
98 let search_path = args["path"].as_str().unwrap_or(".");
99 let is_regex = args["is_regex"].as_bool().unwrap_or(false);
100 let include = args["include"].as_str();
101 let limit = args["limit"]
102 .as_u64()
103 .map(|n| n as usize)
104 .unwrap_or(DEFAULT_GREP_LIMIT)
105 .clamp(1, MAX_GREP_LIMIT);
106 let timeout_secs = args["timeout_secs"]
107 .as_u64()
108 .or_else(|| env_u64("CODETETHER_GREP_TIMEOUT_SECS"))
109 .unwrap_or(DEFAULT_GREP_TIMEOUT_SECS)
110 .clamp(1, MAX_GREP_TIMEOUT_SECS);
111 let max_scanned_files = env_usize("CODETETHER_GREP_MAX_SCANNED_FILES")
112 .unwrap_or(DEFAULT_GREP_MAX_SCANNED_FILES)
113 .max(1);
114 let max_file_bytes = env_u64("CODETETHER_GREP_MAX_FILE_BYTES")
115 .unwrap_or(DEFAULT_GREP_MAX_FILE_BYTES)
116 .max(1);
117
118 let regex = if is_regex {
119 Regex::new(pattern)?
120 } else {
121 Regex::new(®ex::escape(pattern))?
122 };
123 let include_pattern = include.and_then(|pattern| glob::Pattern::new(pattern).ok());
124
125 let started = Instant::now();
126 let deadline = started + Duration::from_secs(timeout_secs);
127 let mut results = Vec::new();
128 let mut scanned_files = 0usize;
129 let mut skipped_oversize = 0usize;
130 let mut skipped_unreadable = 0usize;
131 let mut timed_out = false;
132 let mut scan_limit_reached = false;
133 let mut walker = WalkBuilder::new(search_path);
134 walker.hidden(false).git_ignore(true);
135
136 for entry in walker.build() {
137 if Instant::now() >= deadline {
138 timed_out = true;
139 break;
140 }
141
142 if results.len() >= limit {
143 break;
144 }
145
146 let entry = match entry {
147 Ok(e) => e,
148 Err(_) => continue,
149 };
150
151 if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
152 continue;
153 }
154
155 let path = entry.path();
156
157 if let Some(include_pattern) = &include_pattern
159 && !include_pattern.matches_path(path)
160 {
161 continue;
162 }
163
164 scanned_files += 1;
165 if scanned_files > max_scanned_files {
166 scan_limit_reached = true;
167 break;
168 }
169
170 let Some(remaining) = remaining_duration(deadline) else {
171 timed_out = true;
172 break;
173 };
174 let metadata = match tokio::time::timeout(remaining, tokio::fs::metadata(path)).await {
175 Ok(Ok(metadata)) => metadata,
176 Ok(Err(_)) => {
177 skipped_unreadable += 1;
178 continue;
179 }
180 Err(_) => {
181 timed_out = true;
182 break;
183 }
184 };
185 if metadata.len() > max_file_bytes {
186 skipped_oversize += 1;
187 continue;
188 }
189
190 let Some(remaining) = remaining_duration(deadline) else {
192 timed_out = true;
193 break;
194 };
195 let content =
196 match tokio::time::timeout(remaining, tokio::fs::read_to_string(path)).await {
197 Ok(Ok(content)) => content,
198 Ok(Err(_)) => {
199 skipped_unreadable += 1;
200 continue;
201 }
202 Err(_) => {
203 timed_out = true;
204 break;
205 }
206 };
207
208 for (line_num, line) in content.lines().enumerate() {
209 if results.len() >= limit {
210 break;
211 }
212
213 if regex.is_match(line) {
214 results.push(format!(
215 "{}:{}: {}",
216 path.display(),
217 line_num + 1,
218 line.trim()
219 ));
220 }
221 }
222 }
223
224 let result_limit_reached = results.len() >= limit;
225 let truncated = result_limit_reached || timed_out || scan_limit_reached;
226 let mut output = results.join("\n");
227 if output.is_empty() {
228 output = "No matches found".to_string();
229 }
230 if timed_out {
231 output.push_str(&format!(
232 "\n[grep stopped after {timeout_secs}s; scanned {scanned_files} files. Narrow path/include or raise timeout_secs.]"
233 ));
234 } else if scan_limit_reached {
235 output.push_str(&format!(
236 "\n[grep stopped after scanning {max_scanned_files} files. Narrow path/include.]"
237 ));
238 }
239
240 let result = if timed_out || scan_limit_reached {
241 ToolResult::error(output)
242 } else {
243 ToolResult::success(output)
244 };
245
246 Ok(result
247 .with_metadata("count", json!(results.len()))
248 .with_metadata("truncated", json!(truncated))
249 .with_metadata("scanned_files", json!(scanned_files))
250 .with_metadata("skipped_oversize", json!(skipped_oversize))
251 .with_metadata("skipped_unreadable", json!(skipped_unreadable))
252 .with_metadata("timed_out", json!(timed_out))
253 .with_metadata("scan_limit_reached", json!(scan_limit_reached)))
254 }
255}
256
257fn env_u64(name: &str) -> Option<u64> {
258 std::env::var(name).ok()?.parse().ok()
259}
260
261fn env_usize(name: &str) -> Option<usize> {
262 std::env::var(name).ok()?.parse().ok()
263}
264
265fn remaining_duration(deadline: Instant) -> Option<Duration> {
266 deadline.checked_duration_since(Instant::now())
267}
268
269#[cfg(test)]
270mod tests {
271 use super::*;
272 use serde_json::json;
273 use tokio::io::AsyncWriteExt;
274
275 #[tokio::test]
276 async fn grep_honors_result_limit() {
277 let dir = tempfile::tempdir().expect("tempdir");
278 tokio::fs::write(dir.path().join("a.txt"), "needle one\nneedle two\n")
279 .await
280 .expect("write fixture");
281 tokio::fs::write(dir.path().join("b.txt"), "needle three\n")
282 .await
283 .expect("write fixture");
284
285 let result = GrepTool::new()
286 .execute(json!({
287 "pattern": "needle",
288 "path": dir.path().to_string_lossy(),
289 "limit": 1
290 }))
291 .await
292 .expect("grep executes");
293
294 assert!(result.success);
295 assert_eq!(result.metadata["count"], json!(1));
296 assert_eq!(result.metadata["truncated"], json!(true));
297 assert!(result.output.contains("needle"));
298 }
299
300 #[tokio::test]
301 async fn grep_skips_oversized_files() {
302 let dir = tempfile::tempdir().expect("tempdir");
303 let mut file = tokio::fs::File::create(dir.path().join("large.txt"))
304 .await
305 .expect("create fixture");
306 file.write_all(&vec![b'x'; (DEFAULT_GREP_MAX_FILE_BYTES + 1) as usize])
307 .await
308 .expect("write fixture");
309 file.flush().await.expect("flush fixture");
310
311 let result = GrepTool::new()
312 .execute(json!({
313 "pattern": "needle",
314 "path": dir.path().to_string_lossy()
315 }))
316 .await
317 .expect("grep executes");
318
319 assert!(result.success);
320 assert_eq!(result.metadata["count"], json!(0));
321 assert_eq!(result.metadata["skipped_oversize"], json!(1));
322 assert!(result.output.contains("No matches found"));
323 }
324}