1use rig_core::completion::ToolDefinition;
13use rig_core::tool::Tool;
14use serde::Deserialize;
15use serde_json::json;
16
17use crate::engine::{
18 self, FindQuery, FindResult, ListResult, ReadResult, SearchQuery, SearchResult,
19};
20use crate::error::DciError;
21use crate::sandbox::CorpusRoot;
22
23async fn run_blocking<T, F>(corpus: CorpusRoot, op: F) -> Result<T, DciError>
31where
32 F: FnOnce(&CorpusRoot) -> Result<T, DciError> + Send + 'static,
33 T: Send + 'static,
34{
35 let timeout = corpus.limits().timeout;
36 let backstop = timeout + std::time::Duration::from_secs(5);
39 let handle = tokio::task::spawn_blocking(move || op(&corpus));
40 match tokio::time::timeout(backstop, handle).await {
41 Ok(Ok(result)) => result,
42 Ok(Err(join_err)) => Err(DciError::Worker(join_err.to_string())),
43 Err(_) => Err(DciError::Timeout {
44 millis: backstop.as_millis() as u64,
45 }),
46 }
47}
48
49#[derive(Clone)]
51pub struct SearchTool {
52 corpus: CorpusRoot,
53}
54
55impl SearchTool {
56 pub fn new(corpus: CorpusRoot) -> Self {
58 Self { corpus }
59 }
60}
61
62#[derive(Debug, serde::Serialize, Deserialize)]
64pub struct SearchArgs {
65 pub pattern: String,
67 #[serde(default)]
69 pub path_glob: Option<String>,
70 #[serde(default)]
72 pub case_insensitive: Option<bool>,
73 #[serde(default)]
75 pub context_lines: Option<usize>,
76 #[serde(default)]
78 pub max_results: Option<usize>,
79}
80
81impl Tool for SearchTool {
82 const NAME: &'static str = "corpus_search";
83 type Error = DciError;
84 type Args = SearchArgs;
85 type Output = SearchResult;
86
87 async fn definition(&self, _prompt: String) -> ToolDefinition {
88 ToolDefinition {
89 name: Self::NAME.to_string(),
90 description: "Search the corpus with a regular expression and return matching \
91 file paths, line numbers, and line text. Use this first to locate \
92 evidence, then narrow with path_glob or read the surrounding lines."
93 .to_string(),
94 parameters: json!({
95 "type": "object",
96 "properties": {
97 "pattern": {
98 "type": "string",
99 "description": "Regular expression to search for (Rust/ripgrep syntax)."
100 },
101 "path_glob": {
102 "type": "string",
103 "description": "Optional glob to restrict files, e.g. '**/*.log' or 'auth*'."
104 },
105 "case_insensitive": {
106 "type": "boolean",
107 "description": "Match case-insensitively. Default false."
108 },
109 "context_lines": {
110 "type": "integer",
111 "description": "Lines of context to include on each side of a match. Default 0."
112 },
113 "max_results": {
114 "type": "integer",
115 "description": "Maximum number of matching lines to return."
116 }
117 },
118 "required": ["pattern"]
119 }),
120 }
121 }
122
123 async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
124 let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
125 crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
126 let query = SearchQuery {
127 pattern: args.pattern,
128 path_glob: args.path_glob,
129 case_insensitive: args.case_insensitive.unwrap_or(false),
130 context_lines: args.context_lines.unwrap_or(0),
131 max_results: args.max_results,
132 };
133 run_blocking(self.corpus.clone(), move |c| engine::search(c, &query)).await
134 })
135 .await
136 }
137}
138
139#[derive(Clone)]
141pub struct FindTool {
142 corpus: CorpusRoot,
143}
144
145impl FindTool {
146 pub fn new(corpus: CorpusRoot) -> Self {
148 Self { corpus }
149 }
150}
151
152#[derive(Debug, serde::Serialize, Deserialize)]
154pub struct FindArgs {
155 pub glob: String,
157 #[serde(default)]
159 pub max_results: Option<usize>,
160}
161
162impl Tool for FindTool {
163 const NAME: &'static str = "corpus_find";
164 type Error = DciError;
165 type Args = FindArgs;
166 type Output = FindResult;
167
168 async fn definition(&self, _prompt: String) -> ToolDefinition {
169 ToolDefinition {
170 name: Self::NAME.to_string(),
171 description: "Find files in the corpus whose path matches a glob. Use this to \
172 discover where relevant files live before searching or reading them."
173 .to_string(),
174 parameters: json!({
175 "type": "object",
176 "properties": {
177 "glob": {
178 "type": "string",
179 "description": "Glob over relative paths, e.g. '**/*.log', 'src/**/*.rs', or 'passwd'."
180 },
181 "max_results": {
182 "type": "integer",
183 "description": "Maximum number of paths to return."
184 }
185 },
186 "required": ["glob"]
187 }),
188 }
189 }
190
191 async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
192 let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
193 crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
194 let query = FindQuery {
195 glob: args.glob,
196 max_results: args.max_results,
197 };
198 run_blocking(self.corpus.clone(), move |c| engine::find(c, &query)).await
199 })
200 .await
201 }
202}
203
204#[derive(Clone)]
206pub struct ReadTool {
207 corpus: CorpusRoot,
208}
209
210impl ReadTool {
211 pub fn new(corpus: CorpusRoot) -> Self {
213 Self { corpus }
214 }
215}
216
217#[derive(Debug, serde::Serialize, Deserialize)]
219pub struct ReadArgs {
220 pub path: String,
222 #[serde(default)]
224 pub start_line: Option<usize>,
225 #[serde(default)]
227 pub line_count: Option<usize>,
228}
229
230impl Tool for ReadTool {
231 const NAME: &'static str = "corpus_read";
232 type Error = DciError;
233 type Args = ReadArgs;
234 type Output = ReadResult;
235
236 async fn definition(&self, _prompt: String) -> ToolDefinition {
237 ToolDefinition {
238 name: Self::NAME.to_string(),
239 description: "Read a bounded, line-numbered window from a single corpus file. Use \
240 this to inspect the exact lines around a search hit and quote evidence."
241 .to_string(),
242 parameters: json!({
243 "type": "object",
244 "properties": {
245 "path": {
246 "type": "string",
247 "description": "Corpus-relative path to read."
248 },
249 "start_line": {
250 "type": "integer",
251 "description": "1-based line to start at. Default 1."
252 },
253 "line_count": {
254 "type": "integer",
255 "description": "Number of lines to return (clamped to the read limit)."
256 }
257 },
258 "required": ["path"]
259 }),
260 }
261 }
262
263 async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
264 let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
265 crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
266 run_blocking(self.corpus.clone(), move |c| {
267 engine::read_range(c, &args.path, args.start_line, args.line_count)
268 })
269 .await
270 })
271 .await
272 }
273}
274
275#[derive(Clone)]
277pub struct ListTool {
278 corpus: CorpusRoot,
279}
280
281impl ListTool {
282 pub fn new(corpus: CorpusRoot) -> Self {
284 Self { corpus }
285 }
286}
287
288#[derive(Debug, serde::Serialize, Deserialize)]
290pub struct ListArgs {
291 #[serde(default)]
293 pub path: Option<String>,
294}
295
296impl Tool for ListTool {
297 const NAME: &'static str = "corpus_list";
298 type Error = DciError;
299 type Args = ListArgs;
300 type Output = ListResult;
301
302 async fn definition(&self, _prompt: String) -> ToolDefinition {
303 ToolDefinition {
304 name: Self::NAME.to_string(),
305 description: "List the files and subdirectories of a corpus directory to orient \
306 yourself before searching."
307 .to_string(),
308 parameters: json!({
309 "type": "object",
310 "properties": {
311 "path": {
312 "type": "string",
313 "description": "Corpus-relative directory to list. Defaults to the root."
314 }
315 }
316 }),
317 }
318 }
319
320 async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
321 let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
322 crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
323 run_blocking(self.corpus.clone(), move |c| {
324 engine::list_dir(c, args.path.as_deref())
325 })
326 .await
327 })
328 .await
329 }
330}
331
332#[derive(Clone)]
337pub struct CorpusTools {
338 pub search: SearchTool,
340 pub find: FindTool,
342 pub read: ReadTool,
344 pub list: ListTool,
346}
347
348impl CorpusTools {
349 pub fn new(corpus: CorpusRoot) -> Self {
351 Self {
352 search: SearchTool::new(corpus.clone()),
353 find: FindTool::new(corpus.clone()),
354 read: ReadTool::new(corpus.clone()),
355 list: ListTool::new(corpus),
356 }
357 }
358}