Skip to main content

agentzero_tools/
content_search.rs

1use agentzero_core::{Tool, ToolContext, ToolResult};
2use anyhow::{anyhow, Context};
3use async_trait::async_trait;
4use serde::Deserialize;
5use std::path::{Component, Path, PathBuf};
6
7const DEFAULT_LIMIT: usize = 50;
8const MAX_LINE_DISPLAY: usize = 200;
9
10#[derive(Debug, Deserialize)]
11struct ContentSearchInput {
12    pattern: String,
13    #[serde(default)]
14    path: Option<String>,
15    #[serde(default)]
16    glob: Option<String>,
17    #[serde(default = "default_limit")]
18    limit: usize,
19    #[serde(default)]
20    case_insensitive: bool,
21}
22
23fn default_limit() -> usize {
24    DEFAULT_LIMIT
25}
26
27#[derive(Debug, Default, Clone, Copy)]
28pub struct ContentSearchTool;
29
30impl ContentSearchTool {
31    fn resolve_base(input_path: Option<&str>, workspace_root: &str) -> anyhow::Result<PathBuf> {
32        let base = match input_path {
33            Some(p) if !p.trim().is_empty() => {
34                let rel = Path::new(p);
35                if rel.is_absolute() {
36                    return Err(anyhow!("absolute paths are not allowed"));
37                }
38                if rel.components().any(|c| matches!(c, Component::ParentDir)) {
39                    return Err(anyhow!("path traversal is not allowed"));
40                }
41                Path::new(workspace_root).join(rel)
42            }
43            _ => PathBuf::from(workspace_root),
44        };
45
46        let canonical = base
47            .canonicalize()
48            .with_context(|| format!("unable to resolve search base: {}", base.display()))?;
49        let canonical_root = Path::new(workspace_root)
50            .canonicalize()
51            .context("unable to resolve workspace root")?;
52        if !canonical.starts_with(&canonical_root) {
53            return Err(anyhow!("search path is outside workspace root"));
54        }
55        Ok(canonical)
56    }
57
58    fn looks_binary(bytes: &[u8]) -> bool {
59        let check_len = bytes.len().min(8192);
60        bytes[..check_len].contains(&0)
61    }
62
63    fn walk_files(
64        base: &Path,
65        glob_pattern: Option<&str>,
66        workspace_root: &Path,
67    ) -> anyhow::Result<Vec<PathBuf>> {
68        let mut files = Vec::new();
69        Self::walk_recursive(base, glob_pattern, workspace_root, &mut files)?;
70        files.sort();
71        Ok(files)
72    }
73
74    fn walk_recursive(
75        dir: &Path,
76        glob_pattern: Option<&str>,
77        workspace_root: &Path,
78        files: &mut Vec<PathBuf>,
79    ) -> anyhow::Result<()> {
80        let entries = std::fs::read_dir(dir)
81            .with_context(|| format!("unable to read directory: {}", dir.display()))?;
82
83        for entry in entries {
84            let entry = entry?;
85            let path = entry.path();
86            let file_name = entry.file_name().to_string_lossy().to_string();
87
88            // Skip hidden directories and common noise.
89            if file_name.starts_with('.')
90                || file_name == "node_modules"
91                || file_name == "target"
92                || file_name == "__pycache__"
93            {
94                continue;
95            }
96
97            if path.is_dir() {
98                Self::walk_recursive(&path, glob_pattern, workspace_root, files)?;
99            } else if path.is_file() {
100                if let Some(pattern) = glob_pattern {
101                    let glob = glob::Pattern::new(pattern)
102                        .with_context(|| format!("invalid glob pattern: {pattern}"))?;
103                    if !glob.matches(&file_name) {
104                        continue;
105                    }
106                }
107                // Verify still within workspace.
108                if let Ok(canonical) = path.canonicalize() {
109                    if canonical.starts_with(workspace_root) {
110                        files.push(canonical);
111                    }
112                }
113            }
114        }
115        Ok(())
116    }
117}
118
119#[async_trait]
120impl Tool for ContentSearchTool {
121    fn name(&self) -> &'static str {
122        "content_search"
123    }
124
125    fn description(&self) -> &'static str {
126        "Search file contents for a regex pattern. Returns matching lines with file paths and line numbers."
127    }
128
129    fn input_schema(&self) -> Option<serde_json::Value> {
130        Some(serde_json::json!({
131            "type": "object",
132            "properties": {
133                "pattern": {
134                    "type": "string",
135                    "description": "Regex pattern to search for"
136                },
137                "path": {
138                    "type": "string",
139                    "description": "Subdirectory to search within (optional)"
140                },
141                "glob": {
142                    "type": "string",
143                    "description": "File glob filter (e.g. \"*.rs\", \"*.py\")"
144                },
145                "limit": {
146                    "type": "integer",
147                    "description": "Maximum number of matches to return (default: 50)"
148                },
149                "case_insensitive": {
150                    "type": "boolean",
151                    "description": "If true, perform case-insensitive matching"
152                }
153            },
154            "required": ["pattern"]
155        }))
156    }
157
158    async fn execute(&self, input: &str, ctx: &ToolContext) -> anyhow::Result<ToolResult> {
159        let request: ContentSearchInput = serde_json::from_str(input).context(
160            "content_search expects JSON: {\"pattern\", \"path\"?, \"glob\"?, \"limit\"?, \"case_insensitive\"?}",
161        )?;
162
163        if request.pattern.is_empty() {
164            return Err(anyhow!("pattern must not be empty"));
165        }
166
167        let regex = if request.case_insensitive {
168            regex::RegexBuilder::new(&request.pattern)
169                .case_insensitive(true)
170                .build()
171        } else {
172            regex::Regex::new(&request.pattern)
173        }
174        .with_context(|| format!("invalid regex pattern: {}", request.pattern))?;
175
176        let workspace_root = PathBuf::from(&ctx.workspace_root);
177        let base = Self::resolve_base(request.path.as_deref(), &ctx.workspace_root)?;
178        let canonical_root = workspace_root
179            .canonicalize()
180            .context("unable to resolve workspace root")?;
181
182        let files = Self::walk_files(&base, request.glob.as_deref(), &canonical_root)?;
183
184        let limit = if request.limit == 0 {
185            DEFAULT_LIMIT
186        } else {
187            request.limit
188        };
189
190        let mut results = Vec::new();
191        'outer: for file_path in &files {
192            let bytes = match std::fs::read(file_path) {
193                Ok(b) => b,
194                Err(_) => continue,
195            };
196
197            if Self::looks_binary(&bytes) {
198                continue;
199            }
200
201            let content = match std::str::from_utf8(&bytes) {
202                Ok(s) => s,
203                Err(_) => continue,
204            };
205
206            let relative = file_path.strip_prefix(&canonical_root).unwrap_or(file_path);
207
208            for (line_num, line) in content.lines().enumerate() {
209                if regex.is_match(line) {
210                    let display_line = if line.len() > MAX_LINE_DISPLAY {
211                        format!("{}...", &line[..MAX_LINE_DISPLAY])
212                    } else {
213                        line.to_string()
214                    };
215                    results.push(format!(
216                        "{}:{}:{}",
217                        relative.display(),
218                        line_num + 1,
219                        display_line
220                    ));
221                    if results.len() >= limit {
222                        break 'outer;
223                    }
224                }
225            }
226        }
227
228        if results.is_empty() {
229            return Ok(ToolResult {
230                output: "no matches found".to_string(),
231            });
232        }
233
234        let truncated = results.len() >= limit;
235        let mut output = results.join("\n");
236        if truncated {
237            output.push_str(&format!("\n<truncated at {} results>", limit));
238        }
239
240        Ok(ToolResult { output })
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::ContentSearchTool;
247    use agentzero_core::{Tool, ToolContext};
248    use std::fs;
249    use std::path::PathBuf;
250    use std::sync::atomic::{AtomicU64, Ordering};
251    use std::time::{SystemTime, UNIX_EPOCH};
252
253    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
254
255    fn temp_dir() -> PathBuf {
256        let nanos = SystemTime::now()
257            .duration_since(UNIX_EPOCH)
258            .expect("clock")
259            .as_nanos();
260        let seq = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
261        let dir = std::env::temp_dir().join(format!(
262            "agentzero-content-search-{}-{nanos}-{seq}",
263            std::process::id()
264        ));
265        fs::create_dir_all(&dir).expect("temp dir should be created");
266        dir
267    }
268
269    #[tokio::test]
270    async fn content_search_finds_matches() {
271        let dir = temp_dir();
272        fs::write(
273            dir.join("main.rs"),
274            "fn main() {\n    println!(\"hello\");\n}\n",
275        )
276        .unwrap();
277        fs::write(dir.join("lib.rs"), "pub fn helper() {}\n").unwrap();
278
279        let tool = ContentSearchTool;
280        let result = tool
281            .execute(
282                r#"{"pattern": "fn \\w+"}"#,
283                &ToolContext::new(dir.to_string_lossy().to_string()),
284            )
285            .await
286            .expect("search should succeed");
287        assert!(result.output.contains("main.rs:1:fn main()"));
288        assert!(result.output.contains("lib.rs:1:pub fn helper()"));
289        fs::remove_dir_all(dir).ok();
290    }
291
292    #[tokio::test]
293    async fn content_search_case_insensitive() {
294        let dir = temp_dir();
295        fs::write(dir.join("test.txt"), "Hello World\nhello world\n").unwrap();
296
297        let tool = ContentSearchTool;
298        let result = tool
299            .execute(
300                r#"{"pattern": "HELLO", "case_insensitive": true}"#,
301                &ToolContext::new(dir.to_string_lossy().to_string()),
302            )
303            .await
304            .expect("case insensitive search should succeed");
305        assert!(result.output.contains(":1:"));
306        assert!(result.output.contains(":2:"));
307        fs::remove_dir_all(dir).ok();
308    }
309
310    #[tokio::test]
311    async fn content_search_with_glob_filter() {
312        let dir = temp_dir();
313        fs::write(dir.join("match.rs"), "fn test() {}\n").unwrap();
314        fs::write(dir.join("skip.txt"), "fn test() {}\n").unwrap();
315
316        let tool = ContentSearchTool;
317        let result = tool
318            .execute(
319                r#"{"pattern": "fn test", "glob": "*.rs"}"#,
320                &ToolContext::new(dir.to_string_lossy().to_string()),
321            )
322            .await
323            .expect("filtered search should succeed");
324        assert!(result.output.contains("match.rs"));
325        assert!(!result.output.contains("skip.txt"));
326        fs::remove_dir_all(dir).ok();
327    }
328
329    #[tokio::test]
330    async fn content_search_no_matches() {
331        let dir = temp_dir();
332        fs::write(dir.join("test.txt"), "nothing relevant here\n").unwrap();
333
334        let tool = ContentSearchTool;
335        let result = tool
336            .execute(
337                r#"{"pattern": "nonexistent_pattern_xyz"}"#,
338                &ToolContext::new(dir.to_string_lossy().to_string()),
339            )
340            .await
341            .expect("no matches should succeed");
342        assert!(result.output.contains("no matches"));
343        fs::remove_dir_all(dir).ok();
344    }
345
346    #[tokio::test]
347    async fn content_search_rejects_invalid_regex_negative_path() {
348        let dir = temp_dir();
349
350        let tool = ContentSearchTool;
351        let err = tool
352            .execute(
353                r#"{"pattern": "[invalid"}"#,
354                &ToolContext::new(dir.to_string_lossy().to_string()),
355            )
356            .await
357            .expect_err("invalid regex should fail");
358        assert!(err.to_string().contains("invalid regex"));
359        fs::remove_dir_all(dir).ok();
360    }
361
362    #[tokio::test]
363    async fn content_search_rejects_empty_pattern_negative_path() {
364        let dir = temp_dir();
365
366        let tool = ContentSearchTool;
367        let err = tool
368            .execute(
369                r#"{"pattern": ""}"#,
370                &ToolContext::new(dir.to_string_lossy().to_string()),
371            )
372            .await
373            .expect_err("empty pattern should fail");
374        assert!(err.to_string().contains("pattern must not be empty"));
375        fs::remove_dir_all(dir).ok();
376    }
377
378    #[tokio::test]
379    async fn content_search_skips_binary_files() {
380        let dir = temp_dir();
381        fs::write(dir.join("text.txt"), "searchable content\n").unwrap();
382        fs::write(dir.join("binary.bin"), [0u8, 1, 2, 3, 0, 5, 6]).unwrap();
383
384        let tool = ContentSearchTool;
385        let result = tool
386            .execute(
387                r#"{"pattern": "."}"#,
388                &ToolContext::new(dir.to_string_lossy().to_string()),
389            )
390            .await
391            .expect("search should succeed");
392        assert!(result.output.contains("text.txt"));
393        assert!(!result.output.contains("binary.bin"));
394        fs::remove_dir_all(dir).ok();
395    }
396}