Skip to main content

codetether_agent/tool/
search.rs

1//! Search tools: grep
2
3use super::{Tool, ToolResult};
4use anyhow::Result;
5use async_trait::async_trait;
6use ignore::WalkBuilder;
7use regex::Regex;
8use serde_json::{Value, json};
9use std::time::{Duration, Instant};
10
11const DEFAULT_GREP_LIMIT: usize = 50;
12const MAX_GREP_LIMIT: usize = 500;
13const DEFAULT_GREP_TIMEOUT_SECS: u64 = 15;
14const MAX_GREP_TIMEOUT_SECS: u64 = 120;
15const DEFAULT_GREP_MAX_SCANNED_FILES: usize = 10_000;
16const DEFAULT_GREP_MAX_FILE_BYTES: u64 = 1024 * 1024;
17
18/// Search for text in files
19pub struct GrepTool;
20
21impl Default for GrepTool {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl GrepTool {
28    pub fn new() -> Self {
29        Self
30    }
31}
32
33#[async_trait]
34impl Tool for GrepTool {
35    fn id(&self) -> &str {
36        "grep"
37    }
38
39    fn name(&self) -> &str {
40        "Grep Search"
41    }
42
43    fn description(&self) -> &str {
44        "grep(pattern: string, path?: string, is_regex?: bool, include?: string, limit?: int) - Search for text or regex patterns in files. Respects .gitignore by default."
45    }
46
47    fn parameters(&self) -> Value {
48        json!({
49            "type": "object",
50            "properties": {
51                "pattern": {
52                    "type": "string",
53                    "description": "The text or regex pattern to search for"
54                },
55                "path": {
56                    "type": "string",
57                    "description": "Directory or file to search in (default: current directory)"
58                },
59                "is_regex": {
60                    "type": "boolean",
61                    "description": "Whether the pattern is a regex (default: false)"
62                },
63                "include": {
64                    "type": "string",
65                    "description": "Glob pattern to include files (e.g., *.rs)"
66                },
67                "limit": {
68                    "type": "integer",
69                    "description": "Maximum number of matches to return"
70                },
71                "timeout_secs": {
72                    "type": "integer",
73                    "description": "Maximum search time in seconds before returning partial results"
74                }
75            },
76            "required": ["pattern"],
77            "example": {
78                "pattern": "fn main",
79                "path": "src/",
80                "include": "*.rs"
81            }
82        })
83    }
84
85    async fn execute(&self, args: Value) -> Result<ToolResult> {
86        let pattern = match args["pattern"].as_str() {
87            Some(p) => p,
88            None => {
89                return Ok(ToolResult::structured_error(
90                    "INVALID_ARGUMENT",
91                    "grep",
92                    "pattern is required",
93                    Some(vec!["pattern"]),
94                    Some(json!({"pattern": "search text", "path": "src/"})),
95                ));
96            }
97        };
98        let search_path = args["path"].as_str().unwrap_or(".");
99        let is_regex = args["is_regex"].as_bool().unwrap_or(false);
100        let include = args["include"].as_str();
101        let limit = args["limit"]
102            .as_u64()
103            .map(|n| n as usize)
104            .unwrap_or(DEFAULT_GREP_LIMIT)
105            .clamp(1, MAX_GREP_LIMIT);
106        let timeout_secs = args["timeout_secs"]
107            .as_u64()
108            .or_else(|| env_u64("CODETETHER_GREP_TIMEOUT_SECS"))
109            .unwrap_or(DEFAULT_GREP_TIMEOUT_SECS)
110            .clamp(1, MAX_GREP_TIMEOUT_SECS);
111        let max_scanned_files = env_usize("CODETETHER_GREP_MAX_SCANNED_FILES")
112            .unwrap_or(DEFAULT_GREP_MAX_SCANNED_FILES)
113            .max(1);
114        let max_file_bytes = env_u64("CODETETHER_GREP_MAX_FILE_BYTES")
115            .unwrap_or(DEFAULT_GREP_MAX_FILE_BYTES)
116            .max(1);
117
118        let regex = if is_regex {
119            Regex::new(pattern)?
120        } else {
121            Regex::new(&regex::escape(pattern))?
122        };
123        let include_pattern = include.and_then(|pattern| glob::Pattern::new(pattern).ok());
124
125        let started = Instant::now();
126        let deadline = started + Duration::from_secs(timeout_secs);
127        let mut results = Vec::new();
128        let mut scanned_files = 0usize;
129        let mut skipped_oversize = 0usize;
130        let mut skipped_unreadable = 0usize;
131        let mut timed_out = false;
132        let mut scan_limit_reached = false;
133        let mut walker = WalkBuilder::new(search_path);
134        walker.hidden(false).git_ignore(true);
135
136        for entry in walker.build() {
137            if Instant::now() >= deadline {
138                timed_out = true;
139                break;
140            }
141
142            if results.len() >= limit {
143                break;
144            }
145
146            let entry = match entry {
147                Ok(e) => e,
148                Err(_) => continue,
149            };
150
151            if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
152                continue;
153            }
154
155            let path = entry.path();
156
157            // Check include pattern
158            if let Some(include_pattern) = &include_pattern
159                && !include_pattern.matches_path(path)
160            {
161                continue;
162            }
163
164            scanned_files += 1;
165            if scanned_files > max_scanned_files {
166                scan_limit_reached = true;
167                break;
168            }
169
170            let Some(remaining) = remaining_duration(deadline) else {
171                timed_out = true;
172                break;
173            };
174            let metadata = match tokio::time::timeout(remaining, tokio::fs::metadata(path)).await {
175                Ok(Ok(metadata)) => metadata,
176                Ok(Err(_)) => {
177                    skipped_unreadable += 1;
178                    continue;
179                }
180                Err(_) => {
181                    timed_out = true;
182                    break;
183                }
184            };
185            if metadata.len() > max_file_bytes {
186                skipped_oversize += 1;
187                continue;
188            }
189
190            // Read and search file
191            let Some(remaining) = remaining_duration(deadline) else {
192                timed_out = true;
193                break;
194            };
195            let content =
196                match tokio::time::timeout(remaining, tokio::fs::read_to_string(path)).await {
197                    Ok(Ok(content)) => content,
198                    Ok(Err(_)) => {
199                        skipped_unreadable += 1;
200                        continue;
201                    }
202                    Err(_) => {
203                        timed_out = true;
204                        break;
205                    }
206                };
207
208            for (line_num, line) in content.lines().enumerate() {
209                if results.len() >= limit {
210                    break;
211                }
212
213                if regex.is_match(line) {
214                    results.push(format!(
215                        "{}:{}: {}",
216                        path.display(),
217                        line_num + 1,
218                        line.trim()
219                    ));
220                }
221            }
222        }
223
224        let result_limit_reached = results.len() >= limit;
225        let truncated = result_limit_reached || timed_out || scan_limit_reached;
226        let mut output = results.join("\n");
227        if output.is_empty() {
228            output = "No matches found".to_string();
229        }
230        if timed_out {
231            output.push_str(&format!(
232                "\n[grep stopped after {timeout_secs}s; scanned {scanned_files} files. Narrow path/include or raise timeout_secs.]"
233            ));
234        } else if scan_limit_reached {
235            output.push_str(&format!(
236                "\n[grep stopped after scanning {max_scanned_files} files. Narrow path/include.]"
237            ));
238        }
239
240        let result = if timed_out || scan_limit_reached {
241            ToolResult::error(output)
242        } else {
243            ToolResult::success(output)
244        };
245
246        Ok(result
247            .with_metadata("count", json!(results.len()))
248            .with_metadata("truncated", json!(truncated))
249            .with_metadata("scanned_files", json!(scanned_files))
250            .with_metadata("skipped_oversize", json!(skipped_oversize))
251            .with_metadata("skipped_unreadable", json!(skipped_unreadable))
252            .with_metadata("timed_out", json!(timed_out))
253            .with_metadata("scan_limit_reached", json!(scan_limit_reached)))
254    }
255}
256
257fn env_u64(name: &str) -> Option<u64> {
258    std::env::var(name).ok()?.parse().ok()
259}
260
261fn env_usize(name: &str) -> Option<usize> {
262    std::env::var(name).ok()?.parse().ok()
263}
264
265fn remaining_duration(deadline: Instant) -> Option<Duration> {
266    deadline.checked_duration_since(Instant::now())
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use serde_json::json;
273    use tokio::io::AsyncWriteExt;
274
275    #[tokio::test]
276    async fn grep_honors_result_limit() {
277        let dir = tempfile::tempdir().expect("tempdir");
278        tokio::fs::write(dir.path().join("a.txt"), "needle one\nneedle two\n")
279            .await
280            .expect("write fixture");
281        tokio::fs::write(dir.path().join("b.txt"), "needle three\n")
282            .await
283            .expect("write fixture");
284
285        let result = GrepTool::new()
286            .execute(json!({
287                "pattern": "needle",
288                "path": dir.path().to_string_lossy(),
289                "limit": 1
290            }))
291            .await
292            .expect("grep executes");
293
294        assert!(result.success);
295        assert_eq!(result.metadata["count"], json!(1));
296        assert_eq!(result.metadata["truncated"], json!(true));
297        assert!(result.output.contains("needle"));
298    }
299
300    #[tokio::test]
301    async fn grep_skips_oversized_files() {
302        let dir = tempfile::tempdir().expect("tempdir");
303        let mut file = tokio::fs::File::create(dir.path().join("large.txt"))
304            .await
305            .expect("create fixture");
306        file.write_all(&vec![b'x'; (DEFAULT_GREP_MAX_FILE_BYTES + 1) as usize])
307            .await
308            .expect("write fixture");
309        file.flush().await.expect("flush fixture");
310
311        let result = GrepTool::new()
312            .execute(json!({
313                "pattern": "needle",
314                "path": dir.path().to_string_lossy()
315            }))
316            .await
317            .expect("grep executes");
318
319        assert!(result.success);
320        assert_eq!(result.metadata["count"], json!(0));
321        assert_eq!(result.metadata["skipped_oversize"], json!(1));
322        assert!(result.output.contains("No matches found"));
323    }
324}