use rig_core::completion::ToolDefinition;
use rig_core::tool::Tool;
use serde::Deserialize;
use serde_json::json;
use crate::engine::{
self, FindQuery, FindResult, ListResult, ReadResult, SearchQuery, SearchResult,
};
use crate::error::DciError;
use crate::sandbox::CorpusRoot;
async fn run_blocking<T, F>(corpus: CorpusRoot, op: F) -> Result<T, DciError>
where
F: FnOnce(&CorpusRoot) -> Result<T, DciError> + Send + 'static,
T: Send + 'static,
{
let timeout = corpus.limits().timeout;
let backstop = timeout + std::time::Duration::from_secs(5);
let handle = tokio::task::spawn_blocking(move || op(&corpus));
match tokio::time::timeout(backstop, handle).await {
Ok(Ok(result)) => result,
Ok(Err(join_err)) => Err(DciError::Worker(join_err.to_string())),
Err(_) => Err(DciError::Timeout {
millis: backstop.as_millis() as u64,
}),
}
}
#[derive(Clone)]
pub struct SearchTool {
corpus: CorpusRoot,
}
impl SearchTool {
pub fn new(corpus: CorpusRoot) -> Self {
Self { corpus }
}
}
#[derive(Debug, serde::Serialize, Deserialize)]
pub struct SearchArgs {
pub pattern: String,
#[serde(default)]
pub path_glob: Option<String>,
#[serde(default)]
pub case_insensitive: Option<bool>,
#[serde(default)]
pub context_lines: Option<usize>,
#[serde(default)]
pub max_results: Option<usize>,
}
impl Tool for SearchTool {
const NAME: &'static str = "corpus_search";
type Error = DciError;
type Args = SearchArgs;
type Output = SearchResult;
async fn definition(&self, _prompt: String) -> ToolDefinition {
ToolDefinition {
name: Self::NAME.to_string(),
description: "Search the corpus with a regular expression and return matching \
file paths, line numbers, and line text. Use this first to locate \
evidence, then narrow with path_glob or read the surrounding lines."
.to_string(),
parameters: json!({
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "Regular expression to search for (Rust/ripgrep syntax)."
},
"path_glob": {
"type": "string",
"description": "Optional glob to restrict files, e.g. '**/*.log' or 'auth*'."
},
"case_insensitive": {
"type": "boolean",
"description": "Match case-insensitively. Default false."
},
"context_lines": {
"type": "integer",
"description": "Lines of context to include on each side of a match. Default 0."
},
"max_results": {
"type": "integer",
"description": "Maximum number of matching lines to return."
}
},
"required": ["pattern"]
}),
}
}
async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
let query = SearchQuery {
pattern: args.pattern,
path_glob: args.path_glob,
case_insensitive: args.case_insensitive.unwrap_or(false),
context_lines: args.context_lines.unwrap_or(0),
max_results: args.max_results,
};
run_blocking(self.corpus.clone(), move |c| engine::search(c, &query)).await
})
.await
}
}
#[derive(Clone)]
pub struct FindTool {
corpus: CorpusRoot,
}
impl FindTool {
pub fn new(corpus: CorpusRoot) -> Self {
Self { corpus }
}
}
#[derive(Debug, serde::Serialize, Deserialize)]
pub struct FindArgs {
pub glob: String,
#[serde(default)]
pub max_results: Option<usize>,
}
impl Tool for FindTool {
const NAME: &'static str = "corpus_find";
type Error = DciError;
type Args = FindArgs;
type Output = FindResult;
async fn definition(&self, _prompt: String) -> ToolDefinition {
ToolDefinition {
name: Self::NAME.to_string(),
description: "Find files in the corpus whose path matches a glob. Use this to \
discover where relevant files live before searching or reading them."
.to_string(),
parameters: json!({
"type": "object",
"properties": {
"glob": {
"type": "string",
"description": "Glob over relative paths, e.g. '**/*.log', 'src/**/*.rs', or 'passwd'."
},
"max_results": {
"type": "integer",
"description": "Maximum number of paths to return."
}
},
"required": ["glob"]
}),
}
}
async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
let query = FindQuery {
glob: args.glob,
max_results: args.max_results,
};
run_blocking(self.corpus.clone(), move |c| engine::find(c, &query)).await
})
.await
}
}
#[derive(Clone)]
pub struct ReadTool {
corpus: CorpusRoot,
}
impl ReadTool {
pub fn new(corpus: CorpusRoot) -> Self {
Self { corpus }
}
}
#[derive(Debug, serde::Serialize, Deserialize)]
pub struct ReadArgs {
pub path: String,
#[serde(default)]
pub start_line: Option<usize>,
#[serde(default)]
pub line_count: Option<usize>,
}
impl Tool for ReadTool {
const NAME: &'static str = "corpus_read";
type Error = DciError;
type Args = ReadArgs;
type Output = ReadResult;
async fn definition(&self, _prompt: String) -> ToolDefinition {
ToolDefinition {
name: Self::NAME.to_string(),
description: "Read a bounded, line-numbered window from a single corpus file. Use \
this to inspect the exact lines around a search hit and quote evidence."
.to_string(),
parameters: json!({
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Corpus-relative path to read."
},
"start_line": {
"type": "integer",
"description": "1-based line to start at. Default 1."
},
"line_count": {
"type": "integer",
"description": "Number of lines to return (clamped to the read limit)."
}
},
"required": ["path"]
}),
}
}
async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
run_blocking(self.corpus.clone(), move |c| {
engine::read_range(c, &args.path, args.start_line, args.line_count)
})
.await
})
.await
}
}
#[derive(Clone)]
pub struct ListTool {
corpus: CorpusRoot,
}
impl ListTool {
pub fn new(corpus: CorpusRoot) -> Self {
Self { corpus }
}
}
#[derive(Debug, serde::Serialize, Deserialize)]
pub struct ListArgs {
#[serde(default)]
pub path: Option<String>,
}
impl Tool for ListTool {
const NAME: &'static str = "corpus_list";
type Error = DciError;
type Args = ListArgs;
type Output = ListResult;
async fn definition(&self, _prompt: String) -> ToolDefinition {
ToolDefinition {
name: Self::NAME.to_string(),
description: "List the files and subdirectories of a corpus directory to orient \
yourself before searching."
.to_string(),
parameters: json!({
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Corpus-relative directory to list. Defaults to the root."
}
}
}),
}
}
async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
let args_str = serde_json::to_string(&args).unwrap_or_else(|_| "{}".to_string());
crate::telemetry::record_tool_call(Self::NAME, &args_str, || async {
run_blocking(self.corpus.clone(), move |c| {
engine::list_dir(c, args.path.as_deref())
})
.await
})
.await
}
}
#[derive(Clone)]
pub struct CorpusTools {
pub search: SearchTool,
pub find: FindTool,
pub read: ReadTool,
pub list: ListTool,
}
impl CorpusTools {
pub fn new(corpus: CorpusRoot) -> Self {
Self {
search: SearchTool::new(corpus.clone()),
find: FindTool::new(corpus.clone()),
read: ReadTool::new(corpus.clone()),
list: ListTool::new(corpus),
}
}
}