Skip to main content

defect_agent/
tool.rs

1//! Tool abstraction.
2//!
3//! Both builtin tools (`defect-tools`) and MCP adapters (`defect-mcp`) integrate
4//! into the agent main loop by implementing the [`Tool`] trait.
5//!
6//! ## ACP alignment
7//!
8//! [`Tool::describe`] and [`ToolEvent::Progress`] / [`ToolEvent::Completed`]
9//! directly reuse ACP's [`ToolCallUpdateFields`] to avoid duplicating fields.
10//! The main loop enriches the fields produced by the tool with metadata such as
11//! [`ToolCallId`] and [`raw_input`], then forwards them as `session/update` and
12//! `session/request_permission`.
13//!
14//! [`ToolCallId`]: agent_client_protocol_schema::ToolCallId
15//! [`ToolCallUpdateFields`]: agent_client_protocol_schema::ToolCallUpdateFields
16//! [`raw_input`]: agent_client_protocol_schema::ToolCallUpdateFields::raw_input
17
18use std::path::Path;
19use std::pin::Pin;
20use std::sync::Arc;
21
22use agent_client_protocol_schema::{ToolCallId, ToolCallUpdateFields};
23use futures::Stream;
24use futures::future::BoxFuture;
25use serde::{Deserialize, Serialize};
26use thiserror::Error;
27use tokio_util::sync::CancellationToken;
28
29use crate::error::BoxError;
30use crate::fs::FsBackend;
31use crate::http::HttpClient;
32use crate::session::EventEmitter;
33use crate::shell::ShellBackend;
34
35mod background_tasks;
36mod goal_done;
37mod skill;
38mod spawn_agent;
39pub use background_tasks::{CancelBackgroundTaskTool, InspectBackgroundTaskTool};
40pub use goal_done::{GOAL_DONE_TOOL_NAME, GoalDoneTool};
41pub use skill::{SkillEntry, SkillTool, SkillTriggers};
42pub(crate) use spawn_agent::SPAWN_AGENT_TOOL_NAME;
43pub use spawn_agent::{SpawnAgentTool, SubagentProfile};
44
45/// Tool's "public face": describes the parameter shape without any execution capability.
46///
47/// [`crate::llm::CompletionRequest::tools`] accepts `Vec<ToolSchema>`.
48/// Providers don't hold `dyn Tool`; they serialize schemas into wire JSON.
49#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
50pub struct ToolSchema {
51    pub name: String,
52    pub description: String,
53    /// JSON Schema for the parameters. Uses a subset of Draft 2020-12 (the exact subset
54    /// and escaping rules are documented in `tool-trait.md`).
55    pub input_schema: serde_json::Value,
56}
57
58/// Self-description of a tool call, directly mapping to ACP's [`ToolCallUpdateFields`].
59///
60/// Purpose (the same data drives three ACP messages):
61/// - First push of a `ToolCall` (`status = Pending`)
62/// - The `tool_call` field in a `RequestPermission` request
63/// - Baseline for incremental updates via [`ToolEvent::Progress`]
64///
65/// Field conventions:
66/// - `tool_call_id` is not in this struct; it is assigned uniformly by the main loop
67///   (using the LLM's `tool_use_id` or a self-generated UUID). The tool does not care
68///   about it.
69/// - `raw_input` is filled by the main loop with the original args. Tool implementations
70///   must not set it themselves, to avoid divergence from the real parameters on the
71///   wire.
72/// - `status` is inferred from the [`ToolEvent`] variant: `Progress` → `InProgress`,
73///   `Completed` → `Completed`, `Failed` → `Failed`. Tools must not set it themselves.
74///
75/// [`ToolCallUpdateFields`]: agent_client_protocol_schema::ToolCallUpdateFields
76#[derive(Debug, Clone)]
77pub struct ToolCallDescription {
78    pub fields: ToolCallUpdateFields,
79}
80
81/// Safety level for a tool.
82///
83/// This is only a **hint** fed to the external sandbox policy; the final Allow / Deny /
84/// Ask decision is made by the policy (in combination with user configuration, prior
85/// authorization, etc.). The trait itself does not enforce any policy.
86///
87/// The `serde` representation uses `snake_case` (`read_only` / `mutating` / `destructive`
88/// / `network`), so that `defect-config` can deserialize it directly from TOML in hook
89/// matchers and similar contexts.
90#[non_exhaustive]
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum SafetyClass {
94    /// Read-only: list directories, read files, query metadata.
95    ReadOnly,
96    /// Mutating: writes files or modifies state; side effects may or may not be
97    /// reversible.
98    Mutating,
99    /// Destructive: deleting files, moving, executing commands.
100    Destructive,
101    /// Outbound network: HTTP / DNS / any remote I/O.
102    Network,
103}
104
105/// Elements of the event stream produced by [`Tool::execute`].
106///
107/// Terminal semantics: the stream contains **at most one** [`ToolEvent::Completed`] or
108/// [`ToolEvent::Failed`], and it must be the last event in the stream. When the main
109/// loop encounters a terminal event, it considers the tool call finished and does not
110/// consume any further elements.
111#[non_exhaustive]
112#[derive(Debug)]
113pub enum ToolEvent {
114    /// Progress delta: the main loop forwards this as a `tool_call_update` in an ACP
115    /// `session/update`.
116    /// Contains only the fields that changed, matching the "patch" semantics of
117    /// [`ToolCallUpdateFields`].
118    ///
119    /// [`ToolCallUpdateFields`]: agent_client_protocol_schema::ToolCallUpdateFields
120    Progress(ToolCallUpdateFields),
121
122    /// Successful completion. `fields` contains the remaining final-state fields (e.g.,
123    /// final content, locations, raw_output); the main loop is responsible for setting
124    /// `status` to `Completed`.
125    Completed(ToolCallUpdateFields),
126
127    /// Terminal failure. Carries the Rust-side error so the caller can retry or log it;
128    /// when mapping to ACP, the main loop sets `status` to `Failed` and places the
129    /// [`ToolError`] text into `content`.
130    Failed(ToolError),
131}
132
133/// Event stream for [`Tool::execute`]. Type-erased so that `dyn Tool` can be used
134/// directly.
135pub type ToolStream = Pin<Box<dyn Stream<Item = ToolEvent> + Send>>;
136
137/// The execution environment injected into [`Tool::execute`].
138///
139/// An explicit struct rather than environment variables or thread-locals, making it easy
140/// to construct in tests and avoiding implicit global state. Fields are marked
141/// `non_exhaustive` to allow future additions (sandbox handles, ACP backchannels, etc.)
142/// without breaking existing implementations.
143#[non_exhaustive]
144pub struct ToolContext<'a> {
145    /// The default working directory for the tool (typically the ACP session's `cwd`).
146    pub cwd: &'a Path,
147    /// Cancellation token: triggered by upstream `session/cancel`, user Ctrl+C, timeout,
148    /// etc.
149    /// Tool implementations should check `cancel.is_cancelled()` at long loops or await
150    /// points and exit as soon as possible.
151    pub cancel: CancellationToken,
152    /// Filesystem backend. The `fs` tool family (`read_file` / `write_file` /
153    /// `edit_file`) reads and writes files through it. During assembly, `defect-acp`
154    /// selects either `LocalFsBackend` or `AcpFsBackend` based on the client-negotiated
155    /// [`FileSystemCapabilities`]; tool implementations are completely unaware of this.
156    ///
157    /// Uses [`Arc`] instead of a borrow: `Tool::execute` returns a `'static` future /
158    /// stream, and tools typically `clone` the fs into async tasks. A borrow cannot
159    /// survive across `.await`.
160    ///
161    /// [`FileSystemCapabilities`]: agent_client_protocol_schema::FileSystemCapabilities
162    pub fs: Arc<dyn FsBackend>,
163    /// Shell execution backend. The `bash` tool uses it to create a terminal and run
164    /// commands; during assembly, `defect-acp` selects either `LocalShellBackend` or
165    /// `AcpShellBackend` based on the client-negotiated [`ClientCapabilities::terminal`],
166    /// and tool implementations are unaware of the choice.
167    ///
168    /// Same `Arc` trade-off as `fs` — `Tool::execute` returns a `'static` future.
169    ///
170    /// [`ClientCapabilities::terminal`]: agent_client_protocol_schema::ClientCapabilities
171    pub shell: Arc<dyn ShellBackend>,
172    /// HTTP fetch backend. The `fetch` tool uses it to perform network reads; it is set
173    /// up at the CLI entry point (constructed from `HttpClientConfig` as a process-level
174    /// [`HttpClient`] instance and reused). Tool implementations receive an [`Arc`]
175    /// clone; `Tool::execute` is a `'static` future, so borrowing cannot survive across
176    /// await points.
177    pub http: Arc<dyn HttpClient>,
178    /// The model id selected for the current turn. Most tools do not need this; the
179    /// `spawn_agent` sub-agent tool uses it to "fall back the model to the parent
180    /// session's current selection" — `ToolContext` does not carry a provider registry,
181    /// but carrying this string is enough for `spawn_agent` to call `entry_for_model` on
182    /// its own captured registry to resolve the provider the parent is currently using.
183    /// Populated from `config.model` by [`TurnRunner`](crate::session::TurnRunner) when
184    /// constructing the context.
185    pub current_model: &'a str,
186    /// The provider vendor selected for the current turn. Together with
187    /// [`Self::current_model`] this forms a `(vendor, model)` selection pair — when a
188    /// `spawn_agent` sub-agent falls back to the parent's choice, it uses this pair to
189    /// call `entry_for` on the registry for exact resolution (avoiding provider
190    /// mis-selection when multiple gateways serve the same model name). An empty string
191    /// means the value was not injected (legacy/test paths); in that case `spawn_agent`
192    /// falls back to looking up the first entry by bare model id. Populated by the turn
193    /// runner from `config.provider` when constructing the context.
194    pub current_provider: &'a str,
195    /// Session-level background task handle. When `Some`, tools can fire-and-forget a
196    /// task that outlives the current turn (primarily for `spawn_agent {
197    /// run_in_background: true }`); `None` means the context does not support background
198    /// execution (e.g., nested sub-agent turns or tests), and tools should fall back to
199    /// synchronous execution.
200    ///
201    /// Uses an owned [`Arc`]-backed handle instead of a borrow: `Tool::execute` returns a
202    /// `'static` future, and a borrow cannot survive across await. Injected by the
203    /// top-level [`TurnRunner`](crate::session::TurnRunner) when constructing the
204    /// context; not injected for nested sub-agent turns (structurally prevents background
205    /// tasks from spawning themselves).
206    pub background: Option<crate::session::BackgroundTasks>,
207    /// Subagent event bridge: when `Some`, a tool can wrap internally spawned sub-turn
208    /// events as [`crate::event::AgentEvent::Subagent`] and forward them back to the
209    /// parent session's event stream for nested observability display. Currently only
210    /// used by `spawn_agent`. Injected by the turn runner in `session::turn` for each
211    /// tool according to its [`ToolCallId`] — **injected for both top-level and nested
212    /// sub-agent turns** (recursive bridging), with the mount point expressed by
213    /// [`SubagentBridge::parent_tool_call_id`].
214    pub subagent_bridge: Option<SubagentBridge>,
215    /// The active sandbox policy for this turn snapshot. `spawn_agent` uses it to pass
216    /// the parent's current real policy to child agents — after a `session/set_mode`
217    /// switch, newly created turns propagate the new policy through this field, so child
218    /// agents never see a stale process-level default. When `None`, `spawn_agent` falls
219    /// back to the policy captured at construction time (testing / uninjected scenarios).
220    /// Most tools ignore this field.
221    pub policy: Option<Arc<dyn crate::policy::SandboxPolicy>>,
222    /// Shared state for the `--goal` goal-driven loop. When `Some`, this session runs in
223    /// goal mode; the `goal_done` tool calls [`crate::session::GoalState::mark_reached`]
224    /// to set the flag, and the `goal-gate` hook uses it to decide whether to release or
225    /// extend a turn when it voluntarily stops. `None` means non-goal mode (the default);
226    /// the `goal_done` tool is not registered and this field is never read.
227    pub goal: Option<Arc<crate::session::GoalState>>,
228    /// How many more layers of subagent can be dispatched from the current layer. The
229    /// top-level turn starts at the configured initial limit; `spawn_agent` decrements it
230    /// by one when injecting a nested turn for a child agent. `0` means the child agent
231    /// cannot obtain the `spawn_agent` tool (depth exhausted, structurally preventing
232    /// further recursion) — replacing the old hard-coded "whitelist never contains
233    /// `spawn_agent`". This is a functional gate, unrelated to observability, so it is
234    /// independent of the optional [`Self::subagent_bridge`] and also takes effect in
235    /// test / no-bridge scenarios. Defaults to `0` (most conservative: no explicit
236    /// injection means no dispatch; the top-level turn must explicitly use
237    /// [`Self::with_subagent_depth`]).
238    pub subagent_depth: u32,
239    /// The current session's **fully assembled** tool pool — the `CompositeRegistry` that
240    /// already merged built-in tools with the per-session MCP tools. `spawn_agent` uses
241    /// this (rather than a static, MCP-free tool set captured at construction) to build a
242    /// child agent's tool subset, so a subagent profile may allow `mcp__*` tools. `None`
243    /// in legacy / test paths, where `spawn_agent` falls back to its captured static pool.
244    /// Injected by the [`TurnRunner`](crate::session::TurnRunner) when constructing the
245    /// context.
246    pub session_tools: Option<Arc<dyn crate::session::ToolRegistry>>,
247    /// The current turn's [`TurnConfig`](crate::session::TurnConfig). `spawn_agent` uses it
248    /// so a child agent **inherits** the parent's turn settings (compaction thresholds,
249    /// retry/concurrency limits, sampling incl. `reasoning_effort`, request-limit default)
250    /// instead of silently falling back to `TurnConfig::default()`. A profile may still
251    /// override individual fields. `None` in legacy / test paths, where `spawn_agent` uses
252    /// defaults. Injected by the [`TurnRunner`](crate::session::TurnRunner).
253    pub parent_turn_config: Option<Arc<crate::session::TurnConfig>>,
254}
255
256/// A handle for bridging sub-turn events (spawned internally by a tool) back into the
257/// parent session's event stream.
258///
259/// Holds the parent session's [`EventEmitter`] and the [`ToolCallId`] that initiated this
260/// tool invocation. `Clone` is cheap (internally `Arc` + small string).
261///
262/// ## Recursive bridging: each layer only prepends its own id
263///
264/// The full ancestor chain is not stored here — it is accumulated incrementally as events
265/// **bubble upward** through each layer's bridge. The bridge subscriber (e.g.,
266/// `spawn_agent`'s `bridge_task`) at each layer:
267/// - Receives a **leaf** event from the sub-turn → wraps it as
268///   `Subagent{ ancestor_path: [parent_tool_call_id], agent_type: <this layer's profile>,
269///   inner: leaf }`;
270/// - Receives an **already** `Subagent` (from a deeper layer, already carrying a partial
271///   chain) → **prepends** `parent_tool_call_id` to the head of its `ancestor_path`,
272///   leaving `inner` leaf and deeper `agent_type` unchanged.
273///
274/// Thus after passing through N layers of bridging, `ancestor_path` is exactly the
275/// complete id chain from the top layer down to the leaf. Each layer only needs to know
276/// its own hop — this lets frontend, backend, and arbitrary depths share the same logic.
277///
278/// The recursive **depth gate** is not here — it is functional and must always apply
279/// (including in non-observability / test scenarios), so it lives in the separate
280/// [`ToolContext::subagent_depth`] field rather than in this optional bridge.
281#[derive(Clone)]
282pub struct SubagentBridge {
283    /// Event bus of the parent session. Wrapped [`crate::event::AgentEvent::Subagent`]
284    /// events are emitted here.
285    pub parent_events: Arc<EventEmitter>,
286    /// The tool call ID that spawned this subagent (the corresponding tool span in the
287    /// parent trace). The bridge prepends this ID, serving as the mount point of this
288    /// subagent within the parent trace.
289    pub parent_tool_call_id: ToolCallId,
290}
291
292impl<'a> ToolContext<'a> {
293    /// Constructs a minimal `ToolContext`. The `#[non_exhaustive]` attribute prevents
294    /// external crates from constructing the struct directly with a literal — this
295    /// constructor is the only cross-crate entry point. When adding new fields, add
296    /// default values to the signature or provide a new constructor to avoid breaking
297    /// existing call sites.
298    pub fn new(
299        cwd: &'a Path,
300        cancel: CancellationToken,
301        fs: Arc<dyn FsBackend>,
302        shell: Arc<dyn ShellBackend>,
303        http: Arc<dyn HttpClient>,
304        current_model: &'a str,
305    ) -> Self {
306        Self {
307            cwd,
308            cancel,
309            fs,
310            shell,
311            http,
312            current_model,
313            current_provider: "",
314            background: None,
315            subagent_bridge: None,
316            policy: None,
317            goal: None,
318            subagent_depth: 0,
319            session_tools: None,
320            parent_turn_config: None,
321        }
322    }
323
324    /// Inject the current turn's [`TurnConfig`](crate::session::TurnConfig) so `spawn_agent`
325    /// can build a child config that inherits the parent's turn settings. If not called,
326    /// `spawn_agent` falls back to `TurnConfig::default()` for non-explicit fields.
327    #[must_use]
328    pub fn with_parent_turn_config(mut self, config: Arc<crate::session::TurnConfig>) -> Self {
329        self.parent_turn_config = Some(config);
330        self
331    }
332
333    /// Inject the current session's fully assembled tool pool (built-in + MCP composite).
334    /// `spawn_agent` uses it to build a child agent's tool subset so subagent profiles can
335    /// allow `mcp__*` tools. If not called, `session_tools` is `None` and `spawn_agent`
336    /// falls back to the static pool captured at construction.
337    #[must_use]
338    pub fn with_session_tools(mut self, tools: Arc<dyn crate::session::ToolRegistry>) -> Self {
339        self.session_tools = Some(tools);
340        self
341    }
342
343    /// Inject the provider vendor selected for the current turn, forming a selection pair
344    /// with `current_model`.
345    /// If not called, defaults to an empty string (`spawn_agent` falls back to picking
346    /// the first entry by bare model id).
347    #[must_use]
348    pub fn with_current_provider(mut self, vendor: &'a str) -> Self {
349        self.current_provider = vendor;
350        self
351    }
352
353    /// Inject the remaining subagent dispatch depth from this layer onward. The tool
354    /// driver for the top-level turn calls with the configured initial cap; `spawn_agent`
355    /// injects the decremented value for nested child-agent turns. If not called,
356    /// defaults to `0` (most conservative: no subagent dispatch allowed).
357    #[must_use]
358    pub fn with_subagent_depth(mut self, depth: u32) -> Self {
359        self.subagent_depth = depth;
360        self
361    }
362
363    /// Inject the active sandbox policy for this turn snapshot. The top-level turn's tool
364    /// driver uses this to pass the parent turn's policy to `spawn_agent`; if not called,
365    /// `policy` is `None` (child agent nesting / testing), and `spawn_agent` falls back
366    /// to the policy captured at construction time.
367    #[must_use]
368    pub fn with_policy(mut self, policy: Arc<dyn crate::policy::SandboxPolicy>) -> Self {
369        self.policy = Some(policy);
370        self
371    }
372
373    /// Inject a session-level background task handle. The top-level turn's tool driver
374    /// uses this to enable `run_in_background`; if not called, `background` is `None`
375    /// (the default for sub-agents / tests), and tools fall back to synchronous
376    /// execution.
377    #[must_use]
378    pub fn with_background(mut self, background: crate::session::BackgroundTasks) -> Self {
379        self.background = Some(background);
380        self
381    }
382
383    /// Inject shared state for the `--goal` goal-driven loop. The `goal_done` tool sets
384    /// `reached` based on this state; if not called, `goal` is `None` (non-goal mode, the
385    /// default).
386    #[must_use]
387    pub fn with_goal(mut self, goal: Arc<crate::session::GoalState>) -> Self {
388        self.goal = Some(goal);
389        self
390    }
391
392    /// Inject a subagent event bridge. The tool driver injects one per tool call in
393    /// `session::turn`, keyed by [`ToolCallId`], so that `spawn_agent` can nest child
394    /// turn events back into the parent trace.
395    #[must_use]
396    pub fn with_subagent_bridge(mut self, bridge: SubagentBridge) -> Self {
397        self.subagent_bridge = Some(bridge);
398        self
399    }
400}
401
402/// Tools callable by the agent.
403///
404/// Implementors are typically stateless (each invocation receives all dependencies via
405/// `args` + [`ToolContext`]); if you need to hold state such as connections or caches,
406/// place the state on `Self` and register an `Arc<Self>` with the main loop.
407pub trait Tool: Send + Sync {
408    /// Tool metadata. Returns a reference to avoid allocating on every call.
409    fn schema(&self) -> &ToolSchema;
410
411    /// Provides a safety-level hint to the sandbox policy without actually executing the
412    /// tool.
413    ///
414    /// `args` is the already-deserialized JSON value — the same tool's safety level may
415    /// vary by arguments (e.g., the `bash` tool escalates to [`SafetyClass::Destructive`]
416    /// when `command` contains `rm`). The implementation should be a **pure function**
417    /// and perform no IO.
418    fn safety_hint(&self, args: &serde_json::Value) -> SafetyClass;
419
420    /// Generates a "self-description" before execution, for display to the ACP client.
421    ///
422    /// The async signature and [`ToolContext`] injection allow implementations to perform
423    /// lightweight IO during the describe phase (typical example: `write_file` reads the
424    /// old content before requesting authorization, producing a precise old↔new diff for
425    /// the client—more reviewable than "entirely new content").
426    ///
427    /// Performance constraint: `describe` runs before every ACP `ToolCall` push.
428    /// Implementations should remain fast and graceful on failure (on IO failure, degrade
429    /// to returning basic fields; do not let `describe` itself throw—the signature
430    /// provides no error channel).
431    ///
432    /// See the field conventions on [`ToolCallDescription`] for which fields are filled
433    /// by whom.
434    fn describe<'a>(
435        &'a self,
436        args: &'a serde_json::Value,
437        ctx: ToolContext<'a>,
438    ) -> BoxFuture<'a, ToolCallDescription>;
439
440    /// Initiates a tool call and returns an event stream.
441    ///
442    /// See [`ToolEvent`] for the stream elements. The stream must end immediately after
443    /// the terminal event. Dropping the stream is treated as cancellation (equivalent to
444    /// `ctx.cancel.cancel()`).
445    fn execute(&self, args: serde_json::Value, ctx: ToolContext<'_>) -> ToolStream;
446}
447
448/// Tool execution error.
449///
450/// The granularity is intentionally coarse — finer-grained error types are carried by
451/// built-in tools themselves in the `Execution` source. Here we only distinguish the
452/// broad categories that the main loop needs to handle differently.
453#[non_exhaustive]
454#[derive(Debug, Error)]
455pub enum ToolError {
456    /// Canceled by the caller (triggered via [`ToolContext::cancel`]).
457    #[error("tool canceled")]
458    Canceled,
459
460    /// The tool arguments failed JSON parsing or schema validation. The main loop can
461    /// send this back to the LLM so the model can fix the parameters and retry.
462    #[error("invalid tool arguments: {0}")]
463    InvalidArgs(#[source] BoxError),
464
465    /// Runtime error (I/O failure, non-zero subprocess exit, network error, etc.).
466    #[error("tool execution failed: {0}")]
467    Execution(#[source] BoxError),
468}