spec_ai_core/tools/builtin/
rg.rs

1use crate::tools::{Tool, ToolResult};
2use anyhow::{anyhow, Context, Result};
3use async_trait::async_trait;
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use std::path::PathBuf;
7use std::process::Command;
8
9/// Maximum output size in bytes to prevent context overflow
10const MAX_OUTPUT_BYTES: usize = 100 * 1024; // 100 KiB
11
12#[derive(Debug, Deserialize)]
13struct RgArgs {
14    /// Pattern to search for
15    pattern: String,
16    /// File or directory to search in
17    path: Option<String>,
18    /// Glob pattern to filter files (e.g., "*.rs")
19    #[serde(default)]
20    glob: Option<String>,
21    /// File type to search (e.g., "rust", "py", "js")
22    #[serde(rename = "type")]
23    #[serde(default)]
24    file_type: Option<String>,
25    /// Case insensitive search
26    #[serde(default)]
27    case_insensitive: bool,
28    /// Match whole words only
29    #[serde(default)]
30    word_regexp: bool,
31    /// Treat pattern as literal string (not regex)
32    #[serde(default)]
33    fixed_strings: bool,
34    /// Lines of context before and after match
35    #[serde(default)]
36    context: Option<usize>,
37    /// Lines of context before match
38    #[serde(default)]
39    before_context: Option<usize>,
40    /// Lines of context after match
41    #[serde(default)]
42    after_context: Option<usize>,
43    /// Max matches per file
44    #[serde(default)]
45    max_count: Option<usize>,
46    /// Search hidden files
47    #[serde(default)]
48    hidden: bool,
49    /// Don't respect .gitignore
50    #[serde(default)]
51    no_ignore: bool,
52    /// Multiline mode
53    #[serde(default)]
54    multiline: bool,
55}
56
57#[derive(Debug, Serialize)]
58struct RgResponse {
59    success: bool,
60    output: String,
61    truncated: bool,
62    match_count: usize,
63}
64
65/// Tool that wraps the external `rg` (ripgrep) binary.
66///
67/// This tool provides access to ripgrep's powerful search capabilities
68/// by shelling out to the `rg` command.
69pub struct RgTool {
70    root: PathBuf,
71}
72
73impl RgTool {
74    pub fn new() -> Self {
75        let root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
76        Self { root }
77    }
78
79    pub fn with_root(mut self, root: impl Into<PathBuf>) -> Self {
80        self.root = root.into();
81        self
82    }
83
84    fn resolve_path(&self, override_path: &Option<String>) -> PathBuf {
85        override_path
86            .as_ref()
87            .map(PathBuf::from)
88            .unwrap_or_else(|| self.root.clone())
89    }
90
91    fn build_command(&self, args: &RgArgs) -> Command {
92        let mut cmd = Command::new("rg");
93
94        // Always use these flags for consistent output
95        cmd.arg("--line-number"); // Show line numbers
96        cmd.arg("--with-filename"); // Always show filename
97        cmd.arg("--color=never"); // No ANSI colors
98
99        // Pattern matching options
100        if args.case_insensitive {
101            cmd.arg("-i");
102        }
103        if args.word_regexp {
104            cmd.arg("-w");
105        }
106        if args.fixed_strings {
107            cmd.arg("-F");
108        }
109        if args.multiline {
110            cmd.arg("-U");
111        }
112
113        // Context options
114        if let Some(ctx) = args.context {
115            cmd.arg("-C").arg(ctx.to_string());
116        } else {
117            if let Some(before) = args.before_context {
118                cmd.arg("-B").arg(before.to_string());
119            }
120            if let Some(after) = args.after_context {
121                cmd.arg("-A").arg(after.to_string());
122            }
123        }
124
125        // File filtering
126        if let Some(ref glob) = args.glob {
127            cmd.arg("-g").arg(glob);
128        }
129        if let Some(ref file_type) = args.file_type {
130            cmd.arg("-t").arg(file_type);
131        }
132
133        // Max matches per file
134        if let Some(max) = args.max_count {
135            cmd.arg("-m").arg(max.to_string());
136        }
137
138        // Hidden and ignore options
139        if args.hidden {
140            cmd.arg("--hidden");
141        }
142        if args.no_ignore {
143            cmd.arg("--no-ignore");
144        }
145
146        // The pattern
147        cmd.arg(&args.pattern);
148
149        // The search path
150        let search_path = self.resolve_path(&args.path);
151        cmd.arg(&search_path);
152
153        cmd
154    }
155}
156
157impl Default for RgTool {
158    fn default() -> Self {
159        Self::new()
160    }
161}
162
163#[async_trait]
164impl Tool for RgTool {
165    fn name(&self) -> &str {
166        "rg"
167    }
168
169    fn description(&self) -> &str {
170        "Search for patterns in files using ripgrep (rg). Requires the 'rg' binary to be installed on the system. Returns matching lines with file paths and line numbers."
171    }
172
173    fn parameters(&self) -> Value {
174        serde_json::json!({
175            "type": "object",
176            "properties": {
177                "pattern": {
178                    "type": "string",
179                    "description": "Pattern to search for (regex by default, or literal if fixed_strings=true)"
180                },
181                "path": {
182                    "type": "string",
183                    "description": "File or directory to search in (defaults to current workspace)"
184                },
185                "glob": {
186                    "type": "string",
187                    "description": "Glob pattern to filter files (e.g., '*.rs', '*.{js,ts}')"
188                },
189                "type": {
190                    "type": "string",
191                    "description": "File type to search (e.g., 'rust', 'py', 'js', 'ts'). Use 'rg --type-list' to see all types."
192                },
193                "case_insensitive": {
194                    "type": "boolean",
195                    "description": "Case insensitive search (-i)",
196                    "default": false
197                },
198                "word_regexp": {
199                    "type": "boolean",
200                    "description": "Match whole words only (-w)",
201                    "default": false
202                },
203                "fixed_strings": {
204                    "type": "boolean",
205                    "description": "Treat pattern as literal string, not regex (-F)",
206                    "default": false
207                },
208                "context": {
209                    "type": "integer",
210                    "description": "Lines of context before and after each match (-C)"
211                },
212                "before_context": {
213                    "type": "integer",
214                    "description": "Lines of context before each match (-B)"
215                },
216                "after_context": {
217                    "type": "integer",
218                    "description": "Lines of context after each match (-A)"
219                },
220                "max_count": {
221                    "type": "integer",
222                    "description": "Maximum matches per file (-m)"
223                },
224                "hidden": {
225                    "type": "boolean",
226                    "description": "Search hidden files and directories (--hidden)",
227                    "default": false
228                },
229                "no_ignore": {
230                    "type": "boolean",
231                    "description": "Don't respect .gitignore and other ignore files (--no-ignore)",
232                    "default": false
233                },
234                "multiline": {
235                    "type": "boolean",
236                    "description": "Enable multiline matching (-U)",
237                    "default": false
238                }
239            },
240            "required": ["pattern"]
241        })
242    }
243
244    async fn execute(&self, args: Value) -> Result<ToolResult> {
245        let args: RgArgs =
246            serde_json::from_value(args).context("Failed to parse rg arguments")?;
247
248        if args.pattern.trim().is_empty() {
249            return Err(anyhow!("rg pattern cannot be empty"));
250        }
251
252        let search_path = self.resolve_path(&args.path);
253        if !search_path.exists() {
254            return Err(anyhow!(
255                "Search path {} does not exist",
256                search_path.display()
257            ));
258        }
259
260        let mut cmd = self.build_command(&args);
261
262        let output = cmd.output().context(
263            "Failed to execute 'rg' command. Is ripgrep installed? Install with: brew install ripgrep (macOS), apt install ripgrep (Debian/Ubuntu), or cargo install ripgrep",
264        )?;
265
266        let stdout = String::from_utf8_lossy(&output.stdout);
267        let stderr = String::from_utf8_lossy(&output.stderr);
268
269        // rg exits with code 1 when no matches found (not an error)
270        // rg exits with code 2 for actual errors
271        if !output.status.success() && output.status.code() == Some(2) {
272            return Err(anyhow!("rg error: {}", stderr));
273        }
274
275        let mut result_output = stdout.to_string();
276        let mut truncated = false;
277
278        // Count matches (lines that look like file:line:content)
279        let match_count = result_output
280            .lines()
281            .filter(|line| {
282                // Context lines start with file:line-, match lines start with file:line:
283                line.contains(':') && !line.starts_with("--")
284            })
285            .count();
286
287        // Truncate if too large
288        if result_output.len() > MAX_OUTPUT_BYTES {
289            result_output.truncate(MAX_OUTPUT_BYTES);
290            // Try to truncate at a line boundary
291            if let Some(last_newline) = result_output.rfind('\n') {
292                result_output.truncate(last_newline);
293            }
294            result_output.push_str("\n... [output truncated]");
295            truncated = true;
296        }
297
298        if result_output.is_empty() {
299            result_output = "No matches found.".to_string();
300        }
301
302        let response = RgResponse {
303            success: true,
304            output: result_output,
305            truncated,
306            match_count,
307        };
308
309        Ok(ToolResult::success(
310            serde_json::to_string(&response).context("Failed to serialize rg results")?,
311        ))
312    }
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318
319    #[test]
320    fn test_build_command_basic() {
321        let tool = RgTool::new();
322        let args = RgArgs {
323            pattern: "test".to_string(),
324            path: Some("/tmp".to_string()),
325            glob: None,
326            file_type: None,
327            case_insensitive: false,
328            word_regexp: false,
329            fixed_strings: false,
330            context: None,
331            before_context: None,
332            after_context: None,
333            max_count: None,
334            hidden: false,
335            no_ignore: false,
336            multiline: false,
337        };
338
339        let cmd = tool.build_command(&args);
340        let program = cmd.get_program().to_string_lossy();
341        assert_eq!(program, "rg");
342
343        let args_vec: Vec<_> = cmd.get_args().map(|a| a.to_string_lossy()).collect();
344        assert!(args_vec.contains(&"--line-number".into()));
345        assert!(args_vec.contains(&"--with-filename".into()));
346        assert!(args_vec.contains(&"--color=never".into()));
347        assert!(args_vec.contains(&"test".into()));
348        assert!(args_vec.contains(&"/tmp".into()));
349    }
350
351    #[test]
352    fn test_build_command_with_options() {
353        let tool = RgTool::new();
354        let args = RgArgs {
355            pattern: "TODO".to_string(),
356            path: None,
357            glob: Some("*.rs".to_string()),
358            file_type: Some("rust".to_string()),
359            case_insensitive: true,
360            word_regexp: true,
361            fixed_strings: true,
362            context: Some(3),
363            before_context: None,
364            after_context: None,
365            max_count: Some(10),
366            hidden: true,
367            no_ignore: true,
368            multiline: true,
369        };
370
371        let cmd = tool.build_command(&args);
372        let args_vec: Vec<_> = cmd.get_args().map(|a| a.to_string_lossy()).collect();
373
374        assert!(args_vec.contains(&"-i".into()));
375        assert!(args_vec.contains(&"-w".into()));
376        assert!(args_vec.contains(&"-F".into()));
377        assert!(args_vec.contains(&"-U".into()));
378        assert!(args_vec.contains(&"-C".into()));
379        assert!(args_vec.contains(&"3".into()));
380        assert!(args_vec.contains(&"-g".into()));
381        assert!(args_vec.contains(&"*.rs".into()));
382        assert!(args_vec.contains(&"-t".into()));
383        assert!(args_vec.contains(&"rust".into()));
384        assert!(args_vec.contains(&"-m".into()));
385        assert!(args_vec.contains(&"10".into()));
386        assert!(args_vec.contains(&"--hidden".into()));
387        assert!(args_vec.contains(&"--no-ignore".into()));
388    }
389
390    #[test]
391    fn test_build_command_before_after_context() {
392        let tool = RgTool::new();
393        let args = RgArgs {
394            pattern: "test".to_string(),
395            path: None,
396            glob: None,
397            file_type: None,
398            case_insensitive: false,
399            word_regexp: false,
400            fixed_strings: false,
401            context: None,
402            before_context: Some(2),
403            after_context: Some(5),
404            max_count: None,
405            hidden: false,
406            no_ignore: false,
407            multiline: false,
408        };
409
410        let cmd = tool.build_command(&args);
411        let args_vec: Vec<_> = cmd.get_args().map(|a| a.to_string_lossy()).collect();
412
413        assert!(args_vec.contains(&"-B".into()));
414        assert!(args_vec.contains(&"2".into()));
415        assert!(args_vec.contains(&"-A".into()));
416        assert!(args_vec.contains(&"5".into()));
417    }
418}