Skip to main content

pawan/tools/
search.rs

1//! Search tools (glob and grep)
2
3use super::Tool;
4use async_trait::async_trait;
5use serde_json::{json, Value};
6use std::path::PathBuf;
7
8/// Tool for finding files by glob pattern
9pub struct GlobSearchTool {
10    workspace_root: PathBuf,
11}
12
13impl GlobSearchTool {
14    pub fn new(workspace_root: PathBuf) -> Self {
15        Self { workspace_root }
16    }
17}
18
19#[async_trait]
20impl Tool for GlobSearchTool {
21    fn name(&self) -> &str {
22        "glob_search"
23    }
24
25    fn description(&self) -> &str {
26        "Find files matching a glob pattern. Respects .gitignore. \
27         Examples: '**/*.rs', 'src/**/*.toml', 'Cargo.*'"
28    }
29
30    fn parameters_schema(&self) -> Value {
31        json!({
32            "type": "object",
33            "properties": {
34                "pattern": {
35                    "type": "string",
36                    "description": "Glob pattern to match files"
37                },
38                "path": {
39                    "type": "string",
40                    "description": "Directory to search in (optional, defaults to workspace root)"
41                },
42                "max_results": {
43                    "type": "integer",
44                    "description": "Maximum number of results (default: 100)"
45                }
46            },
47            "required": ["pattern"]
48        })
49    }
50
51    fn thulp_definition(&self) -> thulp_core::ToolDefinition {
52        use thulp_core::{Parameter, ParameterType};
53        thulp_core::ToolDefinition::builder("glob_search")
54            .description(self.description())
55            .parameter(Parameter::builder("pattern").param_type(ParameterType::String).required(true)
56                .description("Glob pattern to match files").build())
57            .parameter(Parameter::builder("path").param_type(ParameterType::String).required(false)
58                .description("Directory to search in (optional, defaults to workspace root)").build())
59            .parameter(Parameter::builder("max_results").param_type(ParameterType::Integer).required(false)
60                .description("Maximum number of results (default: 100)").build())
61            .build()
62    }
63
64    async fn execute(&self, args: Value) -> crate::Result<Value> {
65        let pattern = args["pattern"]
66            .as_str()
67            .ok_or_else(|| crate::PawanError::Tool("pattern is required".into()))?;
68
69        let base_path = args["path"]
70            .as_str()
71            .map(|p| self.workspace_root.join(p))
72            .unwrap_or_else(|| self.workspace_root.clone());
73
74        let max_results = args["max_results"].as_u64().unwrap_or(100) as usize;
75
76        // Use ignore crate to respect .gitignore
77        let mut builder = ignore::WalkBuilder::new(&base_path);
78        builder.hidden(false); // Include hidden files if explicitly matched
79
80        let mut matches = Vec::new();
81        let glob_matcher = glob::Pattern::new(pattern)
82            .map_err(|e| crate::PawanError::Tool(format!("Invalid glob pattern: {}", e)))?;
83
84        for result in builder.build() {
85            if matches.len() >= max_results {
86                break;
87            }
88
89            if let Ok(entry) = result {
90                let path = entry.path();
91                if path.is_file() {
92                    let relative = path.strip_prefix(&self.workspace_root).unwrap_or(path);
93                    let relative_str = relative.to_string_lossy();
94
95                    if glob_matcher.matches(&relative_str) {
96                        let metadata = path.metadata().ok();
97                        let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0);
98                        let modified = metadata.and_then(|m| m.modified().ok()).map(|t| {
99                            t.duration_since(std::time::UNIX_EPOCH)
100                                .map(|d| d.as_secs())
101                                .unwrap_or(0)
102                        });
103                        matches.push(json!({
104                            "path": relative_str,
105                            "size": size,
106                            "modified": modified
107                        }));
108                    }
109                }
110            }
111        }
112
113        // Sort by modification time (newest first)
114        matches.sort_by(|a, b| {
115            let a_mod = a["modified"].as_u64().unwrap_or(0);
116            let b_mod = b["modified"].as_u64().unwrap_or(0);
117            b_mod.cmp(&a_mod)
118        });
119
120        Ok(json!({
121            "pattern": pattern,
122            "matches": matches,
123            "count": matches.len(),
124            "truncated": matches.len() >= max_results
125        }))
126    }
127}
128
129/// Tool for searching file contents
130pub struct GrepSearchTool {
131    workspace_root: PathBuf,
132}
133
134impl GrepSearchTool {
135    pub fn new(workspace_root: PathBuf) -> Self {
136        Self { workspace_root }
137    }
138}
139
140#[async_trait]
141impl Tool for GrepSearchTool {
142    fn name(&self) -> &str {
143        "grep_search"
144    }
145
146    fn description(&self) -> &str {
147        "Search file contents for a pattern. Supports regex. \
148         Returns file paths and line numbers with matches."
149    }
150
151    fn parameters_schema(&self) -> Value {
152        json!({
153            "type": "object",
154            "properties": {
155                "pattern": {
156                    "type": "string",
157                    "description": "Pattern to search for (supports regex)"
158                },
159                "path": {
160                    "type": "string",
161                    "description": "Directory to search in (optional, defaults to workspace root)"
162                },
163                "include": {
164                    "type": "string",
165                    "description": "File pattern to include (e.g., '*.rs', '*.{ts,tsx}')"
166                },
167                "max_results": {
168                    "type": "integer",
169                    "description": "Maximum number of matching files (default: 50)"
170                },
171                "context_lines": {
172                    "type": "integer",
173                    "description": "Lines of context around matches (default: 0)"
174                }
175            },
176            "required": ["pattern"]
177        })
178    }
179
180    fn thulp_definition(&self) -> thulp_core::ToolDefinition {
181        use thulp_core::{Parameter, ParameterType};
182        thulp_core::ToolDefinition::builder("grep_search")
183            .description(self.description())
184            .parameter(Parameter::builder("pattern").param_type(ParameterType::String).required(true)
185                .description("Pattern to search for (supports regex)").build())
186            .parameter(Parameter::builder("path").param_type(ParameterType::String).required(false)
187                .description("Directory to search in (optional, defaults to workspace root)").build())
188            .parameter(Parameter::builder("include").param_type(ParameterType::String).required(false)
189                .description("File pattern to include (e.g., '*.rs', '*.{ts,tsx}')").build())
190            .parameter(Parameter::builder("max_results").param_type(ParameterType::Integer).required(false)
191                .description("Maximum number of matching files (default: 50)").build())
192            .parameter(Parameter::builder("context_lines").param_type(ParameterType::Integer).required(false)
193                .description("Lines of context around matches (default: 0)").build())
194            .build()
195    }
196
197    async fn execute(&self, args: Value) -> crate::Result<Value> {
198        let pattern = args["pattern"]
199            .as_str()
200            .ok_or_else(|| crate::PawanError::Tool("pattern is required".into()))?;
201
202        let base_path = args["path"]
203            .as_str()
204            .map(|p| self.workspace_root.join(p))
205            .unwrap_or_else(|| self.workspace_root.clone());
206
207        let include = args["include"].as_str();
208        let max_results = args["max_results"].as_u64().unwrap_or(50) as usize;
209        let context_lines = args["context_lines"].as_u64().unwrap_or(0) as usize;
210
211        // Build regex
212        let regex = regex::Regex::new(pattern)
213            .map_err(|e| crate::PawanError::Tool(format!("Invalid regex: {}", e)))?;
214
215        // Build glob matcher for include filter
216        let include_matcher = include
217            .map(glob::Pattern::new)
218            .transpose()
219            .map_err(|e| crate::PawanError::Tool(format!("Invalid include pattern: {}", e)))?;
220
221        let mut file_matches = Vec::new();
222
223        // Walk directory
224        let mut builder = ignore::WalkBuilder::new(&base_path);
225        builder.hidden(false);
226
227        for result in builder.build() {
228            if file_matches.len() >= max_results {
229                break;
230            }
231
232            if let Ok(entry) = result {
233                let path = entry.path();
234                if !path.is_file() {
235                    continue;
236                }
237
238                let relative = path.strip_prefix(&self.workspace_root).unwrap_or(path);
239                let relative_str = relative.to_string_lossy();
240
241                // Check include filter
242                if let Some(ref matcher) = include_matcher {
243                    // Match against filename only
244                    let filename = path
245                        .file_name()
246                        .map(|n| n.to_string_lossy())
247                        .unwrap_or_default();
248                    if !matcher.matches(&filename) && !matcher.matches(&relative_str) {
249                        continue;
250                    }
251                }
252
253                // Read and search file
254                if let Ok(content) = std::fs::read_to_string(path) {
255                    let mut line_matches = Vec::new();
256                    let lines: Vec<&str> = content.lines().collect();
257
258                    for (line_num, line) in lines.iter().enumerate() {
259                        if regex.is_match(line) {
260                            let mut match_info = json!({
261                                "line": line_num + 1,
262                                "content": line.chars().take(200).collect::<String>()
263                            });
264
265                            // Add context if requested
266                            if context_lines > 0 {
267                                let start = line_num.saturating_sub(context_lines);
268                                let end = (line_num + context_lines + 1).min(lines.len());
269                                let context: Vec<String> = lines[start..end]
270                                    .iter()
271                                    .enumerate()
272                                    .map(|(i, l)| format!("{}: {}", start + i + 1, l))
273                                    .collect();
274                                match_info["context"] = json!(context);
275                            }
276
277                            line_matches.push(match_info);
278                        }
279                    }
280
281                    if !line_matches.is_empty() {
282                        file_matches.push(json!({
283                            "path": relative_str,
284                            "matches": line_matches,
285                            "match_count": line_matches.len()
286                        }));
287                    }
288                }
289            }
290        }
291
292        // Sort by match count (most matches first)
293        file_matches.sort_by(|a, b| {
294            let a_count = a["match_count"].as_u64().unwrap_or(0);
295            let b_count = b["match_count"].as_u64().unwrap_or(0);
296            b_count.cmp(&a_count)
297        });
298
299        let total_matches: u64 = file_matches
300            .iter()
301            .map(|f| f["match_count"].as_u64().unwrap_or(0))
302            .sum();
303
304        Ok(json!({
305            "pattern": pattern,
306            "files": file_matches,
307            "file_count": file_matches.len(),
308            "total_matches": total_matches,
309            "truncated": file_matches.len() >= max_results
310        }))
311    }
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317    use tempfile::TempDir;
318
319    #[tokio::test]
320    async fn test_glob_search() {
321        let temp_dir = TempDir::new().unwrap();
322        std::fs::write(temp_dir.path().join("file1.rs"), "rust code").unwrap();
323        std::fs::write(temp_dir.path().join("file2.rs"), "more rust").unwrap();
324        std::fs::write(temp_dir.path().join("file3.txt"), "text file").unwrap();
325
326        let tool = GlobSearchTool::new(temp_dir.path().to_path_buf());
327        let result = tool.execute(json!({"pattern": "*.rs"})).await.unwrap();
328
329        assert_eq!(result["count"], 2);
330    }
331
332    #[tokio::test]
333    async fn test_grep_search() {
334        let temp_dir = TempDir::new().unwrap();
335        std::fs::write(
336            temp_dir.path().join("test.rs"),
337            "fn main() {\n    println!(\"hello\");\n}",
338        )
339        .unwrap();
340
341        let tool = GrepSearchTool::new(temp_dir.path().to_path_buf());
342        let result = tool
343            .execute(json!({
344                "pattern": "println",
345                "include": "*.rs"
346            }))
347            .await
348            .unwrap();
349
350        assert_eq!(result["file_count"], 1);
351        assert_eq!(result["total_matches"], 1);
352    }
353
354    // --- GlobSearchTool expanded tests ---
355
356    #[tokio::test]
357    async fn test_glob_no_matches() {
358        let tmp = TempDir::new().unwrap();
359        std::fs::write(tmp.path().join("file.txt"), "text").unwrap();
360        let tool = GlobSearchTool::new(tmp.path().into());
361        let result = tool.execute(json!({"pattern": "*.rs"})).await.unwrap();
362        assert_eq!(result["count"], 0);
363        assert_eq!(result["truncated"], false);
364    }
365
366    #[tokio::test]
367    async fn test_glob_invalid_pattern() {
368        let tmp = TempDir::new().unwrap();
369        let tool = GlobSearchTool::new(tmp.path().into());
370        let result = tool.execute(json!({"pattern": "[invalid"})).await;
371        assert!(result.is_err(), "Invalid glob should error");
372    }
373
374    #[tokio::test]
375    async fn test_glob_missing_pattern() {
376        let tmp = TempDir::new().unwrap();
377        let tool = GlobSearchTool::new(tmp.path().into());
378        let result = tool.execute(json!({})).await;
379        assert!(result.is_err(), "Missing pattern should error");
380    }
381
382    #[tokio::test]
383    async fn test_glob_max_results() {
384        let tmp = TempDir::new().unwrap();
385        for i in 0..10 {
386            std::fs::write(tmp.path().join(format!("f{}.rs", i)), "code").unwrap();
387        }
388        let tool = GlobSearchTool::new(tmp.path().into());
389        let result = tool.execute(json!({"pattern": "*.rs", "max_results": 3})).await.unwrap();
390        assert_eq!(result["count"], 3);
391        assert_eq!(result["truncated"], true);
392    }
393
394    #[tokio::test]
395    async fn test_glob_subdirectory() {
396        let tmp = TempDir::new().unwrap();
397        std::fs::create_dir(tmp.path().join("sub")).unwrap();
398        std::fs::write(tmp.path().join("sub/a.rs"), "code").unwrap();
399        std::fs::write(tmp.path().join("b.rs"), "code").unwrap();
400        let tool = GlobSearchTool::new(tmp.path().into());
401        // Search only in sub/
402        let result = tool.execute(json!({"pattern": "*.rs", "path": "sub"})).await.unwrap();
403        assert_eq!(result["count"], 1);
404    }
405
406    #[tokio::test]
407    async fn test_glob_result_has_metadata() {
408        let tmp = TempDir::new().unwrap();
409        std::fs::write(tmp.path().join("f.rs"), "hello world").unwrap();
410        let tool = GlobSearchTool::new(tmp.path().into());
411        let result = tool.execute(json!({"pattern": "*.rs"})).await.unwrap();
412        let first = &result["matches"][0];
413        assert!(first["path"].as_str().is_some());
414        assert!(first["size"].as_u64().unwrap() > 0);
415        assert!(first["modified"].as_u64().is_some());
416    }
417
418    // --- GrepSearchTool expanded tests ---
419
420    #[tokio::test]
421    async fn test_grep_no_matches() {
422        let tmp = TempDir::new().unwrap();
423        std::fs::write(tmp.path().join("f.rs"), "fn main() {}").unwrap();
424        let tool = GrepSearchTool::new(tmp.path().into());
425        let result = tool.execute(json!({"pattern": "nonexistent_string_xyz"})).await.unwrap();
426        assert_eq!(result["file_count"], 0);
427        assert_eq!(result["total_matches"], 0);
428    }
429
430    #[tokio::test]
431    async fn test_grep_regex() {
432        let tmp = TempDir::new().unwrap();
433        std::fs::write(tmp.path().join("f.rs"), "fn foo() {}\nfn bar() {}\nfn baz() {}").unwrap();
434        let tool = GrepSearchTool::new(tmp.path().into());
435        let result = tool.execute(json!({"pattern": "fn \\w+\\(\\)"})).await.unwrap();
436        assert_eq!(result["total_matches"], 3);
437    }
438
439    #[tokio::test]
440    async fn test_grep_invalid_regex() {
441        let tmp = TempDir::new().unwrap();
442        let tool = GrepSearchTool::new(tmp.path().into());
443        let result = tool.execute(json!({"pattern": "[invalid"})).await;
444        assert!(result.is_err(), "Invalid regex should error");
445    }
446
447    #[tokio::test]
448    async fn test_grep_missing_pattern() {
449        let tmp = TempDir::new().unwrap();
450        let tool = GrepSearchTool::new(tmp.path().into());
451        let result = tool.execute(json!({})).await;
452        assert!(result.is_err(), "Missing pattern should error");
453    }
454
455    #[tokio::test]
456    async fn test_grep_include_filter() {
457        let tmp = TempDir::new().unwrap();
458        std::fs::write(tmp.path().join("a.rs"), "hello").unwrap();
459        std::fs::write(tmp.path().join("b.txt"), "hello").unwrap();
460        let tool = GrepSearchTool::new(tmp.path().into());
461        let result = tool.execute(json!({"pattern": "hello", "include": "*.rs"})).await.unwrap();
462        assert_eq!(result["file_count"], 1);
463        let path = result["files"][0]["path"].as_str().unwrap();
464        assert!(path.ends_with(".rs"));
465    }
466
467    #[tokio::test]
468    async fn test_grep_context_lines() {
469        let tmp = TempDir::new().unwrap();
470        std::fs::write(tmp.path().join("f.rs"), "line1\nline2\nTARGET\nline4\nline5").unwrap();
471        let tool = GrepSearchTool::new(tmp.path().into());
472        let result = tool.execute(json!({"pattern": "TARGET", "context_lines": 1})).await.unwrap();
473        let matches = result["files"][0]["matches"].as_array().unwrap();
474        assert!(matches[0]["context"].is_array());
475        let ctx = matches[0]["context"].as_array().unwrap();
476        assert_eq!(ctx.len(), 3); // 1 before + match + 1 after
477    }
478
479    #[tokio::test]
480    async fn test_grep_max_results() {
481        let tmp = TempDir::new().unwrap();
482        for i in 0..10 {
483            std::fs::write(tmp.path().join(format!("f{}.rs", i)), "match_me").unwrap();
484        }
485        let tool = GrepSearchTool::new(tmp.path().into());
486        let result = tool.execute(json!({"pattern": "match_me", "max_results": 3})).await.unwrap();
487        assert_eq!(result["file_count"], 3);
488        assert_eq!(result["truncated"], true);
489    }
490
491    #[tokio::test]
492    async fn test_grep_multiple_matches_in_file() {
493        let tmp = TempDir::new().unwrap();
494        std::fs::write(tmp.path().join("f.rs"), "foo\nbar\nfoo\nbaz\nfoo").unwrap();
495        let tool = GrepSearchTool::new(tmp.path().into());
496        let result = tool.execute(json!({"pattern": "foo"})).await.unwrap();
497        assert_eq!(result["files"][0]["match_count"], 3);
498        assert_eq!(result["total_matches"], 3);
499    }
500
501    #[tokio::test]
502    async fn test_grep_line_truncation() {
503        let tmp = TempDir::new().unwrap();
504        let long_line = "x".repeat(500);
505        std::fs::write(tmp.path().join("f.rs"), &long_line).unwrap();
506        let tool = GrepSearchTool::new(tmp.path().into());
507        let result = tool.execute(json!({"pattern": "x+"})).await.unwrap();
508        let content = result["files"][0]["matches"][0]["content"].as_str().unwrap();
509        assert_eq!(content.len(), 200, "Line content should be truncated to 200 chars");
510    }
511
512    #[tokio::test]
513    async fn test_grep_sorted_by_match_count() {
514        let tmp = TempDir::new().unwrap();
515        std::fs::write(tmp.path().join("few.rs"), "x").unwrap();
516        std::fs::write(tmp.path().join("many.rs"), "x\nx\nx\nx\nx").unwrap();
517        let tool = GrepSearchTool::new(tmp.path().into());
518        let result = tool.execute(json!({"pattern": "x"})).await.unwrap();
519        let files = result["files"].as_array().unwrap();
520        assert!(files.len() == 2);
521        // First file should have more matches
522        let first_count = files[0]["match_count"].as_u64().unwrap();
523        let second_count = files[1]["match_count"].as_u64().unwrap();
524        assert!(first_count >= second_count, "Results should be sorted by match count desc");
525    }
526}