1use agentzero_core::{Tool, ToolContext, ToolResult};
2use anyhow::{anyhow, Context};
3use async_trait::async_trait;
4use serde::Deserialize;
5use std::path::{Component, Path, PathBuf};
6
7const DEFAULT_LIMIT: usize = 50;
8const MAX_LINE_DISPLAY: usize = 200;
9
10#[derive(Debug, Deserialize)]
11struct ContentSearchInput {
12 pattern: String,
13 #[serde(default)]
14 path: Option<String>,
15 #[serde(default)]
16 glob: Option<String>,
17 #[serde(default = "default_limit")]
18 limit: usize,
19 #[serde(default)]
20 case_insensitive: bool,
21}
22
23fn default_limit() -> usize {
24 DEFAULT_LIMIT
25}
26
27#[derive(Debug, Default, Clone, Copy)]
28pub struct ContentSearchTool;
29
30impl ContentSearchTool {
31 fn resolve_base(input_path: Option<&str>, workspace_root: &str) -> anyhow::Result<PathBuf> {
32 let base = match input_path {
33 Some(p) if !p.trim().is_empty() => {
34 let rel = Path::new(p);
35 if rel.is_absolute() {
36 return Err(anyhow!("absolute paths are not allowed"));
37 }
38 if rel.components().any(|c| matches!(c, Component::ParentDir)) {
39 return Err(anyhow!("path traversal is not allowed"));
40 }
41 Path::new(workspace_root).join(rel)
42 }
43 _ => PathBuf::from(workspace_root),
44 };
45
46 let canonical = base
47 .canonicalize()
48 .with_context(|| format!("unable to resolve search base: {}", base.display()))?;
49 let canonical_root = Path::new(workspace_root)
50 .canonicalize()
51 .context("unable to resolve workspace root")?;
52 if !canonical.starts_with(&canonical_root) {
53 return Err(anyhow!("search path is outside workspace root"));
54 }
55 Ok(canonical)
56 }
57
58 fn looks_binary(bytes: &[u8]) -> bool {
59 let check_len = bytes.len().min(8192);
60 bytes[..check_len].contains(&0)
61 }
62
63 fn walk_files(
64 base: &Path,
65 glob_pattern: Option<&str>,
66 workspace_root: &Path,
67 ) -> anyhow::Result<Vec<PathBuf>> {
68 let mut files = Vec::new();
69 Self::walk_recursive(base, glob_pattern, workspace_root, &mut files)?;
70 files.sort();
71 Ok(files)
72 }
73
74 fn walk_recursive(
75 dir: &Path,
76 glob_pattern: Option<&str>,
77 workspace_root: &Path,
78 files: &mut Vec<PathBuf>,
79 ) -> anyhow::Result<()> {
80 let entries = std::fs::read_dir(dir)
81 .with_context(|| format!("unable to read directory: {}", dir.display()))?;
82
83 for entry in entries {
84 let entry = entry?;
85 let path = entry.path();
86 let file_name = entry.file_name().to_string_lossy().to_string();
87
88 if file_name.starts_with('.')
90 || file_name == "node_modules"
91 || file_name == "target"
92 || file_name == "__pycache__"
93 {
94 continue;
95 }
96
97 if path.is_dir() {
98 Self::walk_recursive(&path, glob_pattern, workspace_root, files)?;
99 } else if path.is_file() {
100 if let Some(pattern) = glob_pattern {
101 let glob = glob::Pattern::new(pattern)
102 .with_context(|| format!("invalid glob pattern: {pattern}"))?;
103 if !glob.matches(&file_name) {
104 continue;
105 }
106 }
107 if let Ok(canonical) = path.canonicalize() {
109 if canonical.starts_with(workspace_root) {
110 files.push(canonical);
111 }
112 }
113 }
114 }
115 Ok(())
116 }
117}
118
119#[async_trait]
120impl Tool for ContentSearchTool {
121 fn name(&self) -> &'static str {
122 "content_search"
123 }
124
125 fn description(&self) -> &'static str {
126 "Search file contents for a regex pattern. Returns matching lines with file paths and line numbers."
127 }
128
129 fn input_schema(&self) -> Option<serde_json::Value> {
130 Some(serde_json::json!({
131 "type": "object",
132 "properties": {
133 "pattern": {
134 "type": "string",
135 "description": "Regex pattern to search for"
136 },
137 "path": {
138 "type": "string",
139 "description": "Subdirectory to search within (optional)"
140 },
141 "glob": {
142 "type": "string",
143 "description": "File glob filter (e.g. \"*.rs\", \"*.py\")"
144 },
145 "limit": {
146 "type": "integer",
147 "description": "Maximum number of matches to return (default: 50)"
148 },
149 "case_insensitive": {
150 "type": "boolean",
151 "description": "If true, perform case-insensitive matching"
152 }
153 },
154 "required": ["pattern"]
155 }))
156 }
157
158 async fn execute(&self, input: &str, ctx: &ToolContext) -> anyhow::Result<ToolResult> {
159 let request: ContentSearchInput = serde_json::from_str(input).context(
160 "content_search expects JSON: {\"pattern\", \"path\"?, \"glob\"?, \"limit\"?, \"case_insensitive\"?}",
161 )?;
162
163 if request.pattern.is_empty() {
164 return Err(anyhow!("pattern must not be empty"));
165 }
166
167 let regex = if request.case_insensitive {
168 regex::RegexBuilder::new(&request.pattern)
169 .case_insensitive(true)
170 .build()
171 } else {
172 regex::Regex::new(&request.pattern)
173 }
174 .with_context(|| format!("invalid regex pattern: {}", request.pattern))?;
175
176 let workspace_root = PathBuf::from(&ctx.workspace_root);
177 let base = Self::resolve_base(request.path.as_deref(), &ctx.workspace_root)?;
178 let canonical_root = workspace_root
179 .canonicalize()
180 .context("unable to resolve workspace root")?;
181
182 let files = Self::walk_files(&base, request.glob.as_deref(), &canonical_root)?;
183
184 let limit = if request.limit == 0 {
185 DEFAULT_LIMIT
186 } else {
187 request.limit
188 };
189
190 let mut results = Vec::new();
191 'outer: for file_path in &files {
192 let bytes = match std::fs::read(file_path) {
193 Ok(b) => b,
194 Err(_) => continue,
195 };
196
197 if Self::looks_binary(&bytes) {
198 continue;
199 }
200
201 let content = match std::str::from_utf8(&bytes) {
202 Ok(s) => s,
203 Err(_) => continue,
204 };
205
206 let relative = file_path.strip_prefix(&canonical_root).unwrap_or(file_path);
207
208 for (line_num, line) in content.lines().enumerate() {
209 if regex.is_match(line) {
210 let display_line = if line.len() > MAX_LINE_DISPLAY {
211 format!("{}...", &line[..MAX_LINE_DISPLAY])
212 } else {
213 line.to_string()
214 };
215 results.push(format!(
216 "{}:{}:{}",
217 relative.display(),
218 line_num + 1,
219 display_line
220 ));
221 if results.len() >= limit {
222 break 'outer;
223 }
224 }
225 }
226 }
227
228 if results.is_empty() {
229 return Ok(ToolResult {
230 output: "no matches found".to_string(),
231 });
232 }
233
234 let truncated = results.len() >= limit;
235 let mut output = results.join("\n");
236 if truncated {
237 output.push_str(&format!("\n<truncated at {} results>", limit));
238 }
239
240 Ok(ToolResult { output })
241 }
242}
243
244#[cfg(test)]
245mod tests {
246 use super::ContentSearchTool;
247 use agentzero_core::{Tool, ToolContext};
248 use std::fs;
249 use std::path::PathBuf;
250 use std::sync::atomic::{AtomicU64, Ordering};
251 use std::time::{SystemTime, UNIX_EPOCH};
252
253 static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
254
255 fn temp_dir() -> PathBuf {
256 let nanos = SystemTime::now()
257 .duration_since(UNIX_EPOCH)
258 .expect("clock")
259 .as_nanos();
260 let seq = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
261 let dir = std::env::temp_dir().join(format!(
262 "agentzero-content-search-{}-{nanos}-{seq}",
263 std::process::id()
264 ));
265 fs::create_dir_all(&dir).expect("temp dir should be created");
266 dir
267 }
268
269 #[tokio::test]
270 async fn content_search_finds_matches() {
271 let dir = temp_dir();
272 fs::write(
273 dir.join("main.rs"),
274 "fn main() {\n println!(\"hello\");\n}\n",
275 )
276 .unwrap();
277 fs::write(dir.join("lib.rs"), "pub fn helper() {}\n").unwrap();
278
279 let tool = ContentSearchTool;
280 let result = tool
281 .execute(
282 r#"{"pattern": "fn \\w+"}"#,
283 &ToolContext::new(dir.to_string_lossy().to_string()),
284 )
285 .await
286 .expect("search should succeed");
287 assert!(result.output.contains("main.rs:1:fn main()"));
288 assert!(result.output.contains("lib.rs:1:pub fn helper()"));
289 fs::remove_dir_all(dir).ok();
290 }
291
292 #[tokio::test]
293 async fn content_search_case_insensitive() {
294 let dir = temp_dir();
295 fs::write(dir.join("test.txt"), "Hello World\nhello world\n").unwrap();
296
297 let tool = ContentSearchTool;
298 let result = tool
299 .execute(
300 r#"{"pattern": "HELLO", "case_insensitive": true}"#,
301 &ToolContext::new(dir.to_string_lossy().to_string()),
302 )
303 .await
304 .expect("case insensitive search should succeed");
305 assert!(result.output.contains(":1:"));
306 assert!(result.output.contains(":2:"));
307 fs::remove_dir_all(dir).ok();
308 }
309
310 #[tokio::test]
311 async fn content_search_with_glob_filter() {
312 let dir = temp_dir();
313 fs::write(dir.join("match.rs"), "fn test() {}\n").unwrap();
314 fs::write(dir.join("skip.txt"), "fn test() {}\n").unwrap();
315
316 let tool = ContentSearchTool;
317 let result = tool
318 .execute(
319 r#"{"pattern": "fn test", "glob": "*.rs"}"#,
320 &ToolContext::new(dir.to_string_lossy().to_string()),
321 )
322 .await
323 .expect("filtered search should succeed");
324 assert!(result.output.contains("match.rs"));
325 assert!(!result.output.contains("skip.txt"));
326 fs::remove_dir_all(dir).ok();
327 }
328
329 #[tokio::test]
330 async fn content_search_no_matches() {
331 let dir = temp_dir();
332 fs::write(dir.join("test.txt"), "nothing relevant here\n").unwrap();
333
334 let tool = ContentSearchTool;
335 let result = tool
336 .execute(
337 r#"{"pattern": "nonexistent_pattern_xyz"}"#,
338 &ToolContext::new(dir.to_string_lossy().to_string()),
339 )
340 .await
341 .expect("no matches should succeed");
342 assert!(result.output.contains("no matches"));
343 fs::remove_dir_all(dir).ok();
344 }
345
346 #[tokio::test]
347 async fn content_search_rejects_invalid_regex_negative_path() {
348 let dir = temp_dir();
349
350 let tool = ContentSearchTool;
351 let err = tool
352 .execute(
353 r#"{"pattern": "[invalid"}"#,
354 &ToolContext::new(dir.to_string_lossy().to_string()),
355 )
356 .await
357 .expect_err("invalid regex should fail");
358 assert!(err.to_string().contains("invalid regex"));
359 fs::remove_dir_all(dir).ok();
360 }
361
362 #[tokio::test]
363 async fn content_search_rejects_empty_pattern_negative_path() {
364 let dir = temp_dir();
365
366 let tool = ContentSearchTool;
367 let err = tool
368 .execute(
369 r#"{"pattern": ""}"#,
370 &ToolContext::new(dir.to_string_lossy().to_string()),
371 )
372 .await
373 .expect_err("empty pattern should fail");
374 assert!(err.to_string().contains("pattern must not be empty"));
375 fs::remove_dir_all(dir).ok();
376 }
377
378 #[tokio::test]
379 async fn content_search_skips_binary_files() {
380 let dir = temp_dir();
381 fs::write(dir.join("text.txt"), "searchable content\n").unwrap();
382 fs::write(dir.join("binary.bin"), [0u8, 1, 2, 3, 0, 5, 6]).unwrap();
383
384 let tool = ContentSearchTool;
385 let result = tool
386 .execute(
387 r#"{"pattern": "."}"#,
388 &ToolContext::new(dir.to_string_lossy().to_string()),
389 )
390 .await
391 .expect("search should succeed");
392 assert!(result.output.contains("text.txt"));
393 assert!(!result.output.contains("binary.bin"));
394 fs::remove_dir_all(dir).ok();
395 }
396}