spec_ai_core/tools/builtin/
rg.rs

1use crate::tools::{Tool, ToolResult};
2use anyhow::{anyhow, Context, Result};
3use async_trait::async_trait;
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use std::path::PathBuf;
7use std::process::Command;
8
9/// Maximum output size in bytes to prevent context overflow
10const MAX_OUTPUT_BYTES: usize = 100 * 1024; // 100 KiB
11
12#[derive(Debug, Deserialize)]
13struct RgArgs {
14    /// Pattern to search for
15    pattern: String,
16    /// File or directory to search in
17    path: Option<String>,
18    /// Glob pattern to filter files (e.g., "*.rs")
19    #[serde(default)]
20    glob: Option<String>,
21    /// File type to search (e.g., "rust", "py", "js")
22    #[serde(rename = "type")]
23    #[serde(default)]
24    file_type: Option<String>,
25    /// Case insensitive search
26    #[serde(default)]
27    case_insensitive: bool,
28    /// Match whole words only
29    #[serde(default)]
30    word_regexp: bool,
31    /// Treat pattern as literal string (not regex)
32    #[serde(default)]
33    fixed_strings: bool,
34    /// Lines of context before and after match
35    #[serde(default)]
36    context: Option<usize>,
37    /// Lines of context before match
38    #[serde(default)]
39    before_context: Option<usize>,
40    /// Lines of context after match
41    #[serde(default)]
42    after_context: Option<usize>,
43    /// Max matches per file
44    #[serde(default)]
45    max_count: Option<usize>,
46    /// Search hidden files
47    #[serde(default)]
48    hidden: bool,
49    /// Don't respect .gitignore
50    #[serde(default)]
51    no_ignore: bool,
52    /// Multiline mode
53    #[serde(default)]
54    multiline: bool,
55}
56
57#[derive(Debug, Serialize)]
58struct RgResponse {
59    success: bool,
60    output: String,
61    truncated: bool,
62    match_count: usize,
63}
64
65/// Tool that wraps the external `rg` (ripgrep) binary.
66///
67/// This tool provides access to ripgrep's powerful search capabilities
68/// by shelling out to the `rg` command.
69pub struct RgTool {
70    root: PathBuf,
71}
72
73impl RgTool {
74    pub fn new() -> Self {
75        let root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
76        Self { root }
77    }
78
79    pub fn with_root(mut self, root: impl Into<PathBuf>) -> Self {
80        self.root = root.into();
81        self
82    }
83
84    fn resolve_path(&self, override_path: &Option<String>) -> PathBuf {
85        override_path
86            .as_ref()
87            .map(PathBuf::from)
88            .unwrap_or_else(|| self.root.clone())
89    }
90
91    fn build_command(&self, args: &RgArgs) -> Command {
92        let mut cmd = Command::new("rg");
93
94        // Always use these flags for consistent output
95        cmd.arg("--line-number"); // Show line numbers
96        cmd.arg("--with-filename"); // Always show filename
97        cmd.arg("--color=never"); // No ANSI colors
98
99        // Pattern matching options
100        if args.case_insensitive {
101            cmd.arg("-i");
102        }
103        if args.word_regexp {
104            cmd.arg("-w");
105        }
106        if args.fixed_strings {
107            cmd.arg("-F");
108        }
109        if args.multiline {
110            cmd.arg("-U");
111        }
112
113        // Context options
114        if let Some(ctx) = args.context {
115            cmd.arg("-C").arg(ctx.to_string());
116        } else {
117            if let Some(before) = args.before_context {
118                cmd.arg("-B").arg(before.to_string());
119            }
120            if let Some(after) = args.after_context {
121                cmd.arg("-A").arg(after.to_string());
122            }
123        }
124
125        // File filtering
126        if let Some(ref glob) = args.glob {
127            cmd.arg("-g").arg(glob);
128        }
129        if let Some(ref file_type) = args.file_type {
130            cmd.arg("-t").arg(file_type);
131        }
132
133        // Max matches per file
134        if let Some(max) = args.max_count {
135            cmd.arg("-m").arg(max.to_string());
136        }
137
138        // Hidden and ignore options
139        if args.hidden {
140            cmd.arg("--hidden");
141        }
142        if args.no_ignore {
143            cmd.arg("--no-ignore");
144        }
145
146        // The pattern
147        cmd.arg(&args.pattern);
148
149        // The search path
150        let search_path = self.resolve_path(&args.path);
151        cmd.arg(&search_path);
152
153        cmd
154    }
155}
156
157impl Default for RgTool {
158    fn default() -> Self {
159        Self::new()
160    }
161}
162
163#[async_trait]
164impl Tool for RgTool {
165    fn name(&self) -> &str {
166        "rg"
167    }
168
169    fn description(&self) -> &str {
170        "Search for patterns in files using ripgrep (rg). Requires the 'rg' binary to be installed on the system. Returns matching lines with file paths and line numbers."
171    }
172
173    fn parameters(&self) -> Value {
174        serde_json::json!({
175            "type": "object",
176            "properties": {
177                "pattern": {
178                    "type": "string",
179                    "description": "Pattern to search for (regex by default, or literal if fixed_strings=true)"
180                },
181                "path": {
182                    "type": "string",
183                    "description": "File or directory to search in (defaults to current workspace)"
184                },
185                "glob": {
186                    "type": "string",
187                    "description": "Glob pattern to filter files (e.g., '*.rs', '*.{js,ts}')"
188                },
189                "type": {
190                    "type": "string",
191                    "description": "File type to search (e.g., 'rust', 'py', 'js', 'ts'). Use 'rg --type-list' to see all types."
192                },
193                "case_insensitive": {
194                    "type": "boolean",
195                    "description": "Case insensitive search (-i)",
196                    "default": false
197                },
198                "word_regexp": {
199                    "type": "boolean",
200                    "description": "Match whole words only (-w)",
201                    "default": false
202                },
203                "fixed_strings": {
204                    "type": "boolean",
205                    "description": "Treat pattern as literal string, not regex (-F)",
206                    "default": false
207                },
208                "context": {
209                    "type": "integer",
210                    "description": "Lines of context before and after each match (-C)"
211                },
212                "before_context": {
213                    "type": "integer",
214                    "description": "Lines of context before each match (-B)"
215                },
216                "after_context": {
217                    "type": "integer",
218                    "description": "Lines of context after each match (-A)"
219                },
220                "max_count": {
221                    "type": "integer",
222                    "description": "Maximum matches per file (-m)"
223                },
224                "hidden": {
225                    "type": "boolean",
226                    "description": "Search hidden files and directories (--hidden)",
227                    "default": false
228                },
229                "no_ignore": {
230                    "type": "boolean",
231                    "description": "Don't respect .gitignore and other ignore files (--no-ignore)",
232                    "default": false
233                },
234                "multiline": {
235                    "type": "boolean",
236                    "description": "Enable multiline matching (-U)",
237                    "default": false
238                }
239            },
240            "required": ["pattern"]
241        })
242    }
243
244    async fn execute(&self, args: Value) -> Result<ToolResult> {
245        let args: RgArgs = serde_json::from_value(args).context("Failed to parse rg arguments")?;
246
247        if args.pattern.trim().is_empty() {
248            return Err(anyhow!("rg pattern cannot be empty"));
249        }
250
251        let search_path = self.resolve_path(&args.path);
252        if !search_path.exists() {
253            return Err(anyhow!(
254                "Search path {} does not exist",
255                search_path.display()
256            ));
257        }
258
259        let mut cmd = self.build_command(&args);
260
261        let output = cmd.output().context(
262            "Failed to execute 'rg' command. Is ripgrep installed? Install with: brew install ripgrep (macOS), apt install ripgrep (Debian/Ubuntu), or cargo install ripgrep",
263        )?;
264
265        let stdout = String::from_utf8_lossy(&output.stdout);
266        let stderr = String::from_utf8_lossy(&output.stderr);
267
268        // rg exits with code 1 when no matches found (not an error)
269        // rg exits with code 2 for actual errors
270        if !output.status.success() && output.status.code() == Some(2) {
271            return Err(anyhow!("rg error: {}", stderr));
272        }
273
274        let mut result_output = stdout.to_string();
275        let mut truncated = false;
276
277        // Count matches (lines that look like file:line:content)
278        let match_count = result_output
279            .lines()
280            .filter(|line| {
281                // Context lines start with file:line-, match lines start with file:line:
282                line.contains(':') && !line.starts_with("--")
283            })
284            .count();
285
286        // Truncate if too large
287        if result_output.len() > MAX_OUTPUT_BYTES {
288            result_output.truncate(MAX_OUTPUT_BYTES);
289            // Try to truncate at a line boundary
290            if let Some(last_newline) = result_output.rfind('\n') {
291                result_output.truncate(last_newline);
292            }
293            result_output.push_str("\n... [output truncated]");
294            truncated = true;
295        }
296
297        if result_output.is_empty() {
298            result_output = "No matches found.".to_string();
299        }
300
301        let response = RgResponse {
302            success: true,
303            output: result_output,
304            truncated,
305            match_count,
306        };
307
308        Ok(ToolResult::success(
309            serde_json::to_string(&response).context("Failed to serialize rg results")?,
310        ))
311    }
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317
318    #[test]
319    fn test_build_command_basic() {
320        let tool = RgTool::new();
321        let args = RgArgs {
322            pattern: "test".to_string(),
323            path: Some("/tmp".to_string()),
324            glob: None,
325            file_type: None,
326            case_insensitive: false,
327            word_regexp: false,
328            fixed_strings: false,
329            context: None,
330            before_context: None,
331            after_context: None,
332            max_count: None,
333            hidden: false,
334            no_ignore: false,
335            multiline: false,
336        };
337
338        let cmd = tool.build_command(&args);
339        let program = cmd.get_program().to_string_lossy();
340        assert_eq!(program, "rg");
341
342        let args_vec: Vec<_> = cmd.get_args().map(|a| a.to_string_lossy()).collect();
343        assert!(args_vec.contains(&"--line-number".into()));
344        assert!(args_vec.contains(&"--with-filename".into()));
345        assert!(args_vec.contains(&"--color=never".into()));
346        assert!(args_vec.contains(&"test".into()));
347        assert!(args_vec.contains(&"/tmp".into()));
348    }
349
350    #[test]
351    fn test_build_command_with_options() {
352        let tool = RgTool::new();
353        let args = RgArgs {
354            pattern: "TODO".to_string(),
355            path: None,
356            glob: Some("*.rs".to_string()),
357            file_type: Some("rust".to_string()),
358            case_insensitive: true,
359            word_regexp: true,
360            fixed_strings: true,
361            context: Some(3),
362            before_context: None,
363            after_context: None,
364            max_count: Some(10),
365            hidden: true,
366            no_ignore: true,
367            multiline: true,
368        };
369
370        let cmd = tool.build_command(&args);
371        let args_vec: Vec<_> = cmd.get_args().map(|a| a.to_string_lossy()).collect();
372
373        assert!(args_vec.contains(&"-i".into()));
374        assert!(args_vec.contains(&"-w".into()));
375        assert!(args_vec.contains(&"-F".into()));
376        assert!(args_vec.contains(&"-U".into()));
377        assert!(args_vec.contains(&"-C".into()));
378        assert!(args_vec.contains(&"3".into()));
379        assert!(args_vec.contains(&"-g".into()));
380        assert!(args_vec.contains(&"*.rs".into()));
381        assert!(args_vec.contains(&"-t".into()));
382        assert!(args_vec.contains(&"rust".into()));
383        assert!(args_vec.contains(&"-m".into()));
384        assert!(args_vec.contains(&"10".into()));
385        assert!(args_vec.contains(&"--hidden".into()));
386        assert!(args_vec.contains(&"--no-ignore".into()));
387    }
388
389    #[test]
390    fn test_build_command_before_after_context() {
391        let tool = RgTool::new();
392        let args = RgArgs {
393            pattern: "test".to_string(),
394            path: None,
395            glob: None,
396            file_type: None,
397            case_insensitive: false,
398            word_regexp: false,
399            fixed_strings: false,
400            context: None,
401            before_context: Some(2),
402            after_context: Some(5),
403            max_count: None,
404            hidden: false,
405            no_ignore: false,
406            multiline: false,
407        };
408
409        let cmd = tool.build_command(&args);
410        let args_vec: Vec<_> = cmd.get_args().map(|a| a.to_string_lossy()).collect();
411
412        assert!(args_vec.contains(&"-B".into()));
413        assert!(args_vec.contains(&"2".into()));
414        assert!(args_vec.contains(&"-A".into()));
415        assert!(args_vec.contains(&"5".into()));
416    }
417}