Skip to main content

dci_tool/
tools.rs

1//! Discrete, typed corpus-interaction tools exposed to a rig [`Agent`].
2//!
3//! Each tool is a thin, strongly-typed wrapper over [`crate::engine`] that
4//! runs the blocking engine work on a worker thread under a wall-clock
5//! timeout. The tools are intentionally narrow (search / find / read / list)
6//! rather than a single "run a shell command" tool: bounded, structured
7//! commands are what state-of-the-art agentic-search systems converged on, and
8//! they remove the shell-injection surface entirely.
9//!
10//! [`Agent`]: rig_core::agent::Agent
11
12use rig_core::completion::ToolDefinition;
13use rig_core::tool::Tool;
14use serde::Deserialize;
15use serde_json::json;
16
17use crate::engine::{
18    self, FindQuery, FindResult, ListResult, ReadResult, SearchQuery, SearchResult,
19};
20use crate::error::DciError;
21use crate::sandbox::CorpusRoot;
22
23/// Run a blocking engine operation on a worker thread, bounded by the corpus
24/// timeout.
25///
26/// The engine walks enforce the same `timeout` cooperatively (stopping between
27/// files and returning partial, `truncated` results). This outer bound is a
28/// backstop with a grace margin for the rare case where a single operation
29/// cannot reach a cancellation point in time; it should seldom fire.
30async fn run_blocking<T, F>(corpus: CorpusRoot, op: F) -> Result<T, DciError>
31where
32    F: FnOnce(&CorpusRoot) -> Result<T, DciError> + Send + 'static,
33    T: Send + 'static,
34{
35    let timeout = corpus.limits().timeout;
36    // Backstop margin: let the engine's cooperative deadline win in the normal
37    // case so callers get partial results instead of a hard timeout error.
38    let backstop = timeout + std::time::Duration::from_secs(5);
39    let handle = tokio::task::spawn_blocking(move || op(&corpus));
40    match tokio::time::timeout(backstop, handle).await {
41        Ok(Ok(result)) => result,
42        Ok(Err(join_err)) => Err(DciError::Worker(join_err.to_string())),
43        Err(_) => Err(DciError::Timeout {
44            millis: backstop.as_millis() as u64,
45        }),
46    }
47}
48
49/// Regex search across the corpus, returning `file:line` evidence.
50#[derive(Clone)]
51pub struct SearchTool {
52    corpus: CorpusRoot,
53}
54
55impl SearchTool {
56    /// Create a search tool bound to `corpus`.
57    pub fn new(corpus: CorpusRoot) -> Self {
58        Self { corpus }
59    }
60}
61
62/// Arguments for [`SearchTool`].
63#[derive(Debug, serde::Serialize, Deserialize)]
64pub struct SearchArgs {
65    /// Regular expression to match (ripgrep/Rust regex syntax).
66    pub pattern: String,
67    /// Optional glob restricting which files are searched (e.g. `**/*.log`).
68    #[serde(default)]
69    pub path_glob: Option<String>,
70    /// Case-insensitive matching. Defaults to `false`.
71    #[serde(default)]
72    pub case_insensitive: Option<bool>,
73    /// Lines of surrounding context to include per match. Defaults to `0`.
74    #[serde(default)]
75    pub context_lines: Option<usize>,
76    /// Cap on the number of matches returned.
77    #[serde(default)]
78    pub max_results: Option<usize>,
79}
80
81impl Tool for SearchTool {
82    const NAME: &'static str = "corpus_search";
83    type Error = DciError;
84    type Args = SearchArgs;
85    type Output = SearchResult;
86
87    async fn definition(&self, _prompt: String) -> ToolDefinition {
88        ToolDefinition {
89            name: Self::NAME.to_string(),
90            description: "Search the corpus with a regular expression and return matching \
91                          file paths, line numbers, and line text. Use this first to locate \
92                          evidence, then narrow with path_glob or read the surrounding lines."
93                .to_string(),
94            parameters: json!({
95                "type": "object",
96                "properties": {
97                    "pattern": {
98                        "type": "string",
99                        "description": "Regular expression to search for (Rust/ripgrep syntax)."
100                    },
101                    "path_glob": {
102                        "type": "string",
103                        "description": "Optional glob to restrict files, e.g. '**/*.log' or 'auth*'."
104                    },
105                    "case_insensitive": {
106                        "type": "boolean",
107                        "description": "Match case-insensitively. Default false."
108                    },
109                    "context_lines": {
110                        "type": "integer",
111                        "description": "Lines of context to include on each side of a match. Default 0."
112                    },
113                    "max_results": {
114                        "type": "integer",
115                        "description": "Maximum number of matching lines to return."
116                    }
117                },
118                "required": ["pattern"]
119            }),
120        }
121    }
122
123    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
124        let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
125        crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
126            let query = SearchQuery {
127                pattern: args.pattern,
128                path_glob: args.path_glob,
129                case_insensitive: args.case_insensitive.unwrap_or(false),
130                context_lines: args.context_lines.unwrap_or(0),
131                max_results: args.max_results,
132            };
133            run_blocking(self.corpus.clone(), move |c| engine::search(c, &query)).await
134        })
135        .await
136    }
137}
138
139/// Locate files by a glob over their corpus-relative path.
140#[derive(Clone)]
141pub struct FindTool {
142    corpus: CorpusRoot,
143}
144
145impl FindTool {
146    /// Create a find tool bound to `corpus`.
147    pub fn new(corpus: CorpusRoot) -> Self {
148        Self { corpus }
149    }
150}
151
152/// Arguments for [`FindTool`].
153#[derive(Debug, serde::Serialize, Deserialize)]
154pub struct FindArgs {
155    /// Glob to match against corpus-relative paths (e.g. `**/*.rs`, `auth*`).
156    pub glob: String,
157    /// Cap on the number of paths returned.
158    #[serde(default)]
159    pub max_results: Option<usize>,
160}
161
162impl Tool for FindTool {
163    const NAME: &'static str = "corpus_find";
164    type Error = DciError;
165    type Args = FindArgs;
166    type Output = FindResult;
167
168    async fn definition(&self, _prompt: String) -> ToolDefinition {
169        ToolDefinition {
170            name: Self::NAME.to_string(),
171            description: "Find files in the corpus whose path matches a glob. Use this to \
172                          discover where relevant files live before searching or reading them."
173                .to_string(),
174            parameters: json!({
175                "type": "object",
176                "properties": {
177                    "glob": {
178                        "type": "string",
179                        "description": "Glob over relative paths, e.g. '**/*.log', 'src/**/*.rs', or 'passwd'."
180                    },
181                    "max_results": {
182                        "type": "integer",
183                        "description": "Maximum number of paths to return."
184                    }
185                },
186                "required": ["glob"]
187            }),
188        }
189    }
190
191    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
192        let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
193        crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
194            let query = FindQuery {
195                glob: args.glob,
196                max_results: args.max_results,
197            };
198            run_blocking(self.corpus.clone(), move |c| engine::find(c, &query)).await
199        })
200        .await
201    }
202}
203
204/// Read a bounded, line-numbered window from one file.
205#[derive(Clone)]
206pub struct ReadTool {
207    corpus: CorpusRoot,
208}
209
210impl ReadTool {
211    /// Create a read tool bound to `corpus`.
212    pub fn new(corpus: CorpusRoot) -> Self {
213        Self { corpus }
214    }
215}
216
217/// Arguments for [`ReadTool`].
218#[derive(Debug, serde::Serialize, Deserialize)]
219pub struct ReadArgs {
220    /// Corpus-relative path of the file to read.
221    pub path: String,
222    /// 1-based line to start at. Defaults to 1.
223    #[serde(default)]
224    pub start_line: Option<usize>,
225    /// Number of lines to return. Clamped to the configured read limit.
226    #[serde(default)]
227    pub line_count: Option<usize>,
228}
229
230impl Tool for ReadTool {
231    const NAME: &'static str = "corpus_read";
232    type Error = DciError;
233    type Args = ReadArgs;
234    type Output = ReadResult;
235
236    async fn definition(&self, _prompt: String) -> ToolDefinition {
237        ToolDefinition {
238            name: Self::NAME.to_string(),
239            description: "Read a bounded, line-numbered window from a single corpus file. Use \
240                          this to inspect the exact lines around a search hit and quote evidence."
241                .to_string(),
242            parameters: json!({
243                "type": "object",
244                "properties": {
245                    "path": {
246                        "type": "string",
247                        "description": "Corpus-relative path to read."
248                    },
249                    "start_line": {
250                        "type": "integer",
251                        "description": "1-based line to start at. Default 1."
252                    },
253                    "line_count": {
254                        "type": "integer",
255                        "description": "Number of lines to return (clamped to the read limit)."
256                    }
257                },
258                "required": ["path"]
259            }),
260        }
261    }
262
263    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
264        let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
265        crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
266            run_blocking(self.corpus.clone(), move |c| {
267                engine::read_range(c, &args.path, args.start_line, args.line_count)
268            })
269            .await
270        })
271        .await
272    }
273}
274
275/// List the immediate entries of a corpus directory.
276#[derive(Clone)]
277pub struct ListTool {
278    corpus: CorpusRoot,
279}
280
281impl ListTool {
282    /// Create a list tool bound to `corpus`.
283    pub fn new(corpus: CorpusRoot) -> Self {
284        Self { corpus }
285    }
286}
287
288/// Arguments for [`ListTool`].
289#[derive(Debug, serde::Serialize, Deserialize)]
290pub struct ListArgs {
291    /// Corpus-relative directory to list. Defaults to the corpus root.
292    #[serde(default)]
293    pub path: Option<String>,
294}
295
296impl Tool for ListTool {
297    const NAME: &'static str = "corpus_list";
298    type Error = DciError;
299    type Args = ListArgs;
300    type Output = ListResult;
301
302    async fn definition(&self, _prompt: String) -> ToolDefinition {
303        ToolDefinition {
304            name: Self::NAME.to_string(),
305            description: "List the files and subdirectories of a corpus directory to orient \
306                          yourself before searching."
307                .to_string(),
308            parameters: json!({
309                "type": "object",
310                "properties": {
311                    "path": {
312                        "type": "string",
313                        "description": "Corpus-relative directory to list. Defaults to the root."
314                    }
315                }
316            }),
317        }
318    }
319
320    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
321        let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
322        crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
323            run_blocking(self.corpus.clone(), move |c| {
324                engine::list_dir(c, args.path.as_deref())
325            })
326            .await
327        })
328        .await
329    }
330}
331
332/// Bundle of the four corpus tools, all sharing one [`CorpusRoot`].
333///
334/// Used by [`crate::agent`] to register the full toolset on an agent in one
335/// call, and available to callers who want the tools individually.
336#[derive(Clone)]
337pub struct CorpusTools {
338    /// The regex search tool.
339    pub search: SearchTool,
340    /// The glob find tool.
341    pub find: FindTool,
342    /// The bounded read tool.
343    pub read: ReadTool,
344    /// The directory listing tool.
345    pub list: ListTool,
346}
347
348impl CorpusTools {
349    /// Build the full toolset over a shared corpus root.
350    pub fn new(corpus: CorpusRoot) -> Self {
351        Self {
352            search: SearchTool::new(corpus.clone()),
353            find: FindTool::new(corpus.clone()),
354            read: ReadTool::new(corpus.clone()),
355            list: ListTool::new(corpus),
356        }
357    }
358}