Skip to main content

dci_tool/
agent.rs

1//! The Direct Corpus Interaction agent: a rig [`Agent`] pre-wired with the four
2//! corpus tools and a preamble that teaches the search → narrow → read → cite
3//! investigation loop.
4//!
5//! The agent is generic over any [`CompletionModel`], so callers bring their
6//! own model/provider. Nothing here is tied to a specific vendor.
7
8use rig_core::agent::{Agent, AgentBuilder};
9use rig_core::completion::{CompletionModel, Prompt, PromptError};
10
11use crate::sandbox::CorpusRoot;
12use crate::tools::CorpusTools;
13
14/// Default number of tool-calling turns allowed per investigation.
15pub const DEFAULT_MAX_TURNS: usize = 24;
16
17/// The default system preamble. It frames the corpus as something to be
18/// *interrogated directly* with commands rather than retrieved from an index.
19pub const DEFAULT_PREAMBLE: &str = "\
20You are a Direct Corpus Interaction (DCI) analyst. You answer questions about a \
21corpus of files (code, logs, documents) by issuing search commands directly \
22against the raw text — there is no vector database and no pre-built index.
23
24You have four tools:
25- corpus_list: list a directory to orient yourself.
26- corpus_find: locate files by a path glob (e.g. '**/*.log').
27- corpus_search: search file contents with a regular expression; returns
28  file:line:text evidence.
29- corpus_read: read a bounded, line-numbered window from one file.
30
31Method:
321. Start broad: search for the most specific term or pattern in the question.
332. Narrow using path globs and follow-up searches; pivot on identifiers,
34   error codes, IPs, hashes, or usernames you discover.
353. Read the surrounding lines of promising hits to confirm before concluding.
364. Cite concrete evidence as `path:line` for every claim. If the corpus does
37   not support a conclusion, say so plainly rather than guessing.
38
39Prefer precise regular expressions over broad ones, and prefer reading a few
40lines of real evidence over speculating.";
41
42/// Builder for a [`DciAgent`].
43pub struct DciAgentBuilder<M: CompletionModel> {
44    model: M,
45    corpus: CorpusRoot,
46    preamble: Option<String>,
47    appended: Vec<String>,
48    max_turns: usize,
49    temperature: Option<f64>,
50    max_tokens: Option<u64>,
51    model_label: Option<String>,
52}
53
54impl<M: CompletionModel + 'static> DciAgentBuilder<M> {
55    /// Start building a DCI agent over `model` and `corpus`.
56    pub fn new(model: M, corpus: CorpusRoot) -> Self {
57        Self {
58            model,
59            corpus,
60            preamble: None,
61            appended: Vec::new(),
62            max_turns: DEFAULT_MAX_TURNS,
63            temperature: None,
64            max_tokens: None,
65            model_label: None,
66        }
67    }
68
69    /// Replace the default preamble entirely.
70    pub fn preamble(mut self, preamble: impl Into<String>) -> Self {
71        self.preamble = Some(preamble.into());
72        self
73    }
74
75    /// Append domain guidance (e.g. a cyber-investigation playbook) after the
76    /// base preamble.
77    pub fn append_preamble(mut self, extra: impl Into<String>) -> Self {
78        self.appended.push(extra.into());
79        self
80    }
81
82    /// Set the maximum number of tool-calling turns per investigation.
83    pub fn max_turns(mut self, turns: usize) -> Self {
84        self.max_turns = turns;
85        self
86    }
87
88    /// Set the model sampling temperature.
89    pub fn temperature(mut self, temperature: f64) -> Self {
90        self.temperature = Some(temperature);
91        self
92    }
93
94    /// Set the maximum number of output tokens per turn. Required by some
95    /// providers (e.g. Anthropic).
96    pub fn max_tokens(mut self, max_tokens: u64) -> Self {
97        self.max_tokens = Some(max_tokens);
98        self
99    }
100
101    /// Set a human-readable model label recorded on telemetry events (e.g.
102    /// `"gpt-4o"`). Defaults to `"unknown"`.
103    pub fn model_label(mut self, label: impl Into<String>) -> Self {
104        self.model_label = Some(label.into());
105        self
106    }
107
108    /// Finish building, registering the four corpus tools on the agent.
109    pub fn build(self) -> DciAgent<M> {
110        let tools = CorpusTools::new(self.corpus);
111
112        let mut preamble = self
113            .preamble
114            .unwrap_or_else(|| DEFAULT_PREAMBLE.to_string());
115        for extra in &self.appended {
116            preamble.push_str("\n\n");
117            preamble.push_str(extra);
118        }
119
120        let mut builder = AgentBuilder::new(self.model).preamble(&preamble);
121        if let Some(temp) = self.temperature {
122            builder = builder.temperature(temp);
123        }
124        if let Some(max_tokens) = self.max_tokens {
125            builder = builder.max_tokens(max_tokens);
126        }
127        let agent = builder
128            .tool(tools.search)
129            .tool(tools.find)
130            .tool(tools.read)
131            .tool(tools.list)
132            .build();
133
134        DciAgent {
135            agent,
136            max_turns: self.max_turns,
137            model_label: self.model_label.unwrap_or_else(|| "unknown".to_string()),
138        }
139    }
140}
141
142/// A ready-to-run Direct Corpus Interaction agent.
143pub struct DciAgent<M: CompletionModel, P: rig_core::agent::PromptHook<M> = ()> {
144    agent: Agent<M, P>,
145    max_turns: usize,
146    model_label: String,
147}
148
149impl<M: CompletionModel + 'static> DciAgent<M, ()> {
150    /// Start building a DCI agent.
151    pub fn builder(model: M, corpus: CorpusRoot) -> DciAgentBuilder<M> {
152        DciAgentBuilder::new(model, corpus)
153    }
154}
155
156impl<M: CompletionModel + 'static, P: rig_core::agent::PromptHook<M> + 'static> DciAgent<M, P> {
157    /// Borrow the underlying rig agent (e.g. to register additional tools or
158    /// wrap it as a delegate in later phases).
159    pub fn agent(&self) -> &Agent<M, P> {
160        &self.agent
161    }
162
163    /// The configured per-investigation turn budget.
164    pub fn max_turns(&self) -> usize {
165        self.max_turns
166    }
167
168    /// Run an investigation: prompt the agent and let it interact with the
169    /// corpus across multiple tool-calling turns until it produces an answer.
170    ///
171    /// Token usage for the whole run is emitted as a telemetry
172    /// `prompt.completed` event (see [`crate::telemetry`]).
173    pub async fn investigate(&self, question: &str) -> Result<String, PromptError> {
174        let start = std::time::Instant::now();
175        let response = self
176            .agent
177            .prompt(question)
178            .max_turns(self.max_turns)
179            .extended_details()
180            .await?;
181        let usage = &response.usage;
182        crate::telemetry::record_prompt(
183            &self.model_label,
184            usage.input_tokens,
185            usage.output_tokens,
186            usage.cached_input_tokens,
187            usage.reasoning_tokens,
188            start.elapsed().as_millis() as u64,
189        );
190        Ok(response.output)
191    }
192}