Skip to main content

zeph_tools/
executor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use zeph_common::ToolName;
7
8use crate::shell::background::RunId;
9
10/// Data for rendering file diffs in the TUI.
11///
12/// Produced by [`ShellExecutor`](crate::ShellExecutor) and [`FileExecutor`](crate::FileExecutor)
13/// when a tool call modifies a tracked file. The TUI uses this to display a side-by-side diff.
14#[derive(Debug, Clone)]
15pub struct DiffData {
16    /// Relative or absolute path to the file that was modified.
17    pub file_path: String,
18    /// File content before the tool executed.
19    pub old_content: String,
20    /// File content after the tool executed.
21    pub new_content: String,
22}
23
24/// Structured tool invocation from LLM.
25///
26/// Produced by the agent loop when the LLM emits a structured tool call (as opposed to
27/// a legacy fenced code block). Dispatched to [`ToolExecutor::execute_tool_call`].
28///
29/// # Example
30///
31/// ```rust
32/// use zeph_tools::{ToolCall, ExecutionContext};
33/// use zeph_common::ToolName;
34///
35/// let call = ToolCall {
36///     tool_id: ToolName::new("bash"),
37///     params: {
38///         let mut m = serde_json::Map::new();
39///         m.insert("command".to_owned(), serde_json::Value::String("echo hello".to_owned()));
40///         m
41///     },
42///     caller_id: Some("user-42".to_owned()),
43///     context: Some(ExecutionContext::new().with_name("repo")),
44///     tool_call_id: String::new(),
45/// };
46/// assert_eq!(call.tool_id, "bash");
47/// ```
48#[derive(Debug, Clone)]
49pub struct ToolCall {
50    /// The tool identifier, matching a value from [`ToolExecutor::tool_definitions`].
51    pub tool_id: ToolName,
52    /// JSON parameters for the tool call, deserialized into the tool's parameter struct.
53    pub params: serde_json::Map<String, serde_json::Value>,
54    /// Opaque caller identifier propagated from the channel (user ID, session ID, etc.).
55    /// `None` for system-initiated calls (scheduler, self-learning, internal).
56    pub caller_id: Option<String>,
57    /// Per-turn execution environment. `None` means use the executor default (process CWD
58    /// and inherited env), which is identical to the behaviour before this field existed.
59    pub context: Option<crate::ExecutionContext>,
60    /// Opaque tool call ID used to correlate [`ToolEvent::OutputChunk`] events with
61    /// their originating tool call in the TUI. Empty when not set by the agent loop.
62    pub tool_call_id: String,
63}
64
65/// Cumulative filter statistics for a single tool execution.
66///
67/// Populated by [`ShellExecutor`](crate::ShellExecutor) when output filters are configured.
68/// Displayed in the TUI to show how much output was compacted before being sent to the LLM.
69#[derive(Debug, Clone, Default)]
70pub struct FilterStats {
71    /// Raw character count before filtering.
72    pub raw_chars: usize,
73    /// Character count after filtering.
74    pub filtered_chars: usize,
75    /// Raw line count before filtering.
76    pub raw_lines: usize,
77    /// Line count after filtering.
78    pub filtered_lines: usize,
79    /// Worst-case confidence across all applied filters.
80    pub confidence: Option<crate::FilterConfidence>,
81    /// The shell command that produced this output, for display purposes.
82    pub command: Option<String>,
83    /// Zero-based line indices that were kept after filtering.
84    pub kept_lines: Vec<usize>,
85}
86
87impl FilterStats {
88    /// Returns the percentage of characters removed by filtering.
89    ///
90    /// Returns `0.0` when there was no raw output to filter.
91    #[must_use]
92    #[allow(clippy::cast_precision_loss)]
93    pub fn savings_pct(&self) -> f64 {
94        if self.raw_chars == 0 {
95            return 0.0;
96        }
97        (1.0 - self.filtered_chars as f64 / self.raw_chars as f64) * 100.0
98    }
99
100    /// Estimates the number of LLM tokens saved by filtering.
101    ///
102    /// Uses the 4-chars-per-token approximation. Suitable for logging and metrics,
103    /// not for billing or exact budget calculations.
104    #[must_use]
105    pub fn estimated_tokens_saved(&self) -> usize {
106        self.raw_chars.saturating_sub(self.filtered_chars) / 4
107    }
108
109    /// Formats a one-line filter summary for log messages and TUI status.
110    ///
111    /// # Example
112    ///
113    /// ```rust
114    /// use zeph_tools::FilterStats;
115    ///
116    /// let stats = FilterStats {
117    ///     raw_chars: 1000,
118    ///     filtered_chars: 400,
119    ///     raw_lines: 50,
120    ///     filtered_lines: 20,
121    ///     command: Some("cargo build".to_owned()),
122    ///     ..Default::default()
123    /// };
124    /// let summary = stats.format_inline("shell");
125    /// assert!(summary.contains("60.0% filtered"));
126    /// ```
127    #[must_use]
128    pub fn format_inline(&self, tool_name: &str) -> String {
129        let cmd_label = self
130            .command
131            .as_deref()
132            .map(|c| {
133                let trimmed = c.trim();
134                if trimmed.len() > 60 {
135                    format!(" `{}…`", &trimmed[..57])
136                } else {
137                    format!(" `{trimmed}`")
138                }
139            })
140            .unwrap_or_default();
141        format!(
142            "[{tool_name}]{cmd_label} {} lines \u{2192} {} lines, {:.1}% filtered",
143            self.raw_lines,
144            self.filtered_lines,
145            self.savings_pct()
146        )
147    }
148}
149
150/// Provenance of a tool execution result.
151///
152/// Set by each executor at `ToolOutput` construction time. Used by the sanitizer bridge
153/// in `zeph-core` to select the appropriate `ContentSourceKind` and trust level.
154/// `None` means the source is unspecified (pass-through code, mocks, tests).
155#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
156#[serde(rename_all = "snake_case")]
157pub enum ClaimSource {
158    /// Local shell command execution.
159    Shell,
160    /// Local file system read/write.
161    FileSystem,
162    /// HTTP web scrape.
163    WebScrape,
164    /// MCP server tool response.
165    Mcp,
166    /// A2A agent message.
167    A2a,
168    /// Code search (LSP or semantic).
169    CodeSearch,
170    /// Agent diagnostics (internal).
171    Diagnostics,
172    /// Memory retrieval (semantic search).
173    Memory,
174}
175
176/// Structured result from tool execution.
177///
178/// Returned by every [`ToolExecutor`] implementation on success. The agent loop uses
179/// [`ToolOutput::summary`] as the tool result text injected into the LLM context.
180///
181/// # Example
182///
183/// ```rust
184/// use zeph_tools::{ToolOutput, executor::ClaimSource};
185/// use zeph_common::ToolName;
186///
187/// let output = ToolOutput {
188///     tool_name: ToolName::new("shell"),
189///     summary: "hello\n".to_owned(),
190///     blocks_executed: 1,
191///     filter_stats: None,
192///     diff: None,
193///     streamed: false,
194///     terminal_id: None,
195///     locations: None,
196///     raw_response: None,
197///     claim_source: Some(ClaimSource::Shell),
198/// };
199/// assert_eq!(output.to_string(), "hello\n");
200/// ```
201#[derive(Debug, Clone)]
202pub struct ToolOutput {
203    /// Name of the tool that produced this output (e.g. `"shell"`, `"web-scrape"`).
204    pub tool_name: ToolName,
205    /// Human-readable result text injected into the LLM context.
206    pub summary: String,
207    /// Number of code blocks processed in this invocation.
208    pub blocks_executed: u32,
209    /// Output filter statistics when filtering was applied, `None` otherwise.
210    pub filter_stats: Option<FilterStats>,
211    /// File diff data for TUI display when the tool modified a tracked file.
212    pub diff: Option<DiffData>,
213    /// Whether this tool already streamed its output via `ToolEvent` channel.
214    pub streamed: bool,
215    /// Terminal ID when the tool was executed via IDE terminal (ACP terminal/* protocol).
216    pub terminal_id: Option<String>,
217    /// File paths touched by this tool call, for IDE follow-along (e.g. `ToolCallLocation`).
218    pub locations: Option<Vec<String>>,
219    /// Structured tool response payload for ACP intermediate `tool_call_update` notifications.
220    pub raw_response: Option<serde_json::Value>,
221    /// Provenance of this tool result. Set by the executor at construction time.
222    /// `None` in pass-through wrappers, mocks, and tests.
223    pub claim_source: Option<ClaimSource>,
224}
225
226impl fmt::Display for ToolOutput {
227    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
228        f.write_str(&self.summary)
229    }
230}
231
232/// Maximum characters of tool output injected into the LLM context without truncation.
233///
234/// Output that exceeds this limit is split into a head and tail via [`truncate_tool_output`]
235/// to keep both the beginning and end of large command outputs.
236pub const MAX_TOOL_OUTPUT_CHARS: usize = 30_000;
237
238/// Truncate tool output that exceeds [`MAX_TOOL_OUTPUT_CHARS`] using a head+tail split.
239///
240/// Equivalent to `truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)`.
241///
242/// # Example
243///
244/// ```rust
245/// use zeph_tools::executor::truncate_tool_output;
246///
247/// let short = "hello world";
248/// assert_eq!(truncate_tool_output(short), short);
249/// ```
250#[must_use]
251pub fn truncate_tool_output(output: &str) -> String {
252    truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)
253}
254
255/// Truncate tool output that exceeds `max_chars` using a head+tail split.
256///
257/// Preserves the first and last `max_chars / 2` characters and inserts a truncation
258/// marker in the middle. Both boundaries are snapped to valid UTF-8 character boundaries.
259///
260/// # Example
261///
262/// ```rust
263/// use zeph_tools::executor::truncate_tool_output_at;
264///
265/// let long = "a".repeat(200);
266/// let truncated = truncate_tool_output_at(&long, 100);
267/// assert!(truncated.contains("truncated"));
268/// assert!(truncated.len() < long.len());
269/// ```
270#[must_use]
271pub fn truncate_tool_output_at(output: &str, max_chars: usize) -> String {
272    if output.len() <= max_chars {
273        return output.to_string();
274    }
275
276    let half = max_chars / 2;
277    let head_end = output.floor_char_boundary(half);
278    let tail_start = output.ceil_char_boundary(output.len() - half);
279    let head = &output[..head_end];
280    let tail = &output[tail_start..];
281    let truncated = output.len() - head_end - (output.len() - tail_start);
282
283    format!(
284        "{head}\n\n... [truncated {truncated} chars, showing first and last ~{half} chars] ...\n\n{tail}"
285    )
286}
287
288/// Event emitted during tool execution for real-time UI updates.
289///
290/// Sent over the [`ToolEventTx`] channel to the TUI or channel adapter.
291/// Each event variant corresponds to a phase in the tool execution lifecycle.
292#[derive(Debug, Clone)]
293pub enum ToolEvent {
294    /// The tool has started. Displayed in the TUI as a spinner with the command text.
295    Started {
296        tool_name: ToolName,
297        command: String,
298        /// Active sandbox profile, if any. `None` when sandbox is disabled.
299        sandbox_profile: Option<String>,
300        /// Canonical absolute working directory the command will run in.
301        /// `None` for executors that do not resolve a per-turn CWD.
302        resolved_cwd: Option<String>,
303        /// Name of the resolved execution environment (from `[[execution.environments]]`),
304        /// or `None` when no named environment was selected.
305        execution_env: Option<String>,
306    },
307    /// A chunk of streaming output was produced (e.g. from a long-running command).
308    OutputChunk {
309        tool_name: ToolName,
310        command: String,
311        chunk: String,
312        /// Opaque tool call ID matching the corresponding [`ToolEvent::Started`] event.
313        /// Empty string when the executor does not have access to the call ID.
314        tool_call_id: String,
315    },
316    /// The tool finished. Contains the full output and optional filter/diff data.
317    Completed {
318        tool_name: ToolName,
319        command: String,
320        /// Full output text (possibly filtered and truncated).
321        output: String,
322        /// `true` when the tool exited successfully, `false` on error.
323        success: bool,
324        filter_stats: Option<FilterStats>,
325        diff: Option<DiffData>,
326        /// Set when this completion belongs to a background run. `None` for blocking runs.
327        run_id: Option<RunId>,
328    },
329    /// A transactional rollback was performed, restoring or deleting files.
330    Rollback {
331        tool_name: ToolName,
332        command: String,
333        /// Number of files restored to their pre-execution content.
334        restored_count: usize,
335        /// Number of files that did not exist before execution and were deleted.
336        deleted_count: usize,
337    },
338}
339
340/// Sender half of the bounded channel used to stream [`ToolEvent`]s to the UI.
341///
342/// Capacity is 1024 slots. Streaming variants (`OutputChunk`, `Started`) use
343/// `try_send` and drop on full; terminal variants (`Completed`, `Rollback`) use
344/// `send().await` to guarantee delivery.
345///
346/// Created via [`tokio::sync::mpsc::channel`] with capacity `TOOL_EVENT_CHANNEL_CAP`.
347pub type ToolEventTx = tokio::sync::mpsc::Sender<ToolEvent>;
348
349/// Receiver half matching [`ToolEventTx`].
350pub type ToolEventRx = tokio::sync::mpsc::Receiver<ToolEvent>;
351
352/// Bounded capacity for the tool-event channel.
353pub const TOOL_EVENT_CHANNEL_CAP: usize = 1024;
354
355/// Classifies a tool error as transient (retryable) or permanent (abort immediately).
356///
357/// Transient errors may succeed on retry (network blips, race conditions).
358/// Permanent errors will not succeed regardless of retries (policy, bad args, not found).
359#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
360pub enum ErrorKind {
361    Transient,
362    Permanent,
363}
364
365impl std::fmt::Display for ErrorKind {
366    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367        match self {
368            Self::Transient => f.write_str("transient"),
369            Self::Permanent => f.write_str("permanent"),
370        }
371    }
372}
373
374/// Errors that can occur during tool execution.
375#[derive(Debug, thiserror::Error)]
376pub enum ToolError {
377    #[error("command blocked by policy: {command}")]
378    Blocked { command: String },
379
380    #[error("path not allowed by sandbox: {path}")]
381    SandboxViolation { path: String },
382
383    #[error("command requires confirmation: {command}")]
384    ConfirmationRequired { command: String },
385
386    #[error("command timed out after {timeout_secs}s")]
387    Timeout { timeout_secs: u64 },
388
389    #[error("operation cancelled")]
390    Cancelled,
391
392    #[error("invalid tool parameters: {message}")]
393    InvalidParams { message: String },
394
395    #[error("execution failed: {0}")]
396    Execution(#[from] std::io::Error),
397
398    /// HTTP or API error with status code for fine-grained classification.
399    ///
400    /// Used by `WebScrapeExecutor` and other HTTP-based tools to preserve the status
401    /// code for taxonomy classification. Scope: HTTP tools only (MCP uses a separate path).
402    #[error("HTTP error {status}: {message}")]
403    Http { status: u16, message: String },
404
405    /// Shell execution error with explicit exit code and pre-classified category.
406    ///
407    /// Used by `ShellExecutor` when the exit code or stderr content maps to a known
408    /// taxonomy category (e.g., exit 126 → `PolicyBlocked`, exit 127 → `PermanentFailure`).
409    /// Preserves the exit code for audit logging and the category for skill evolution.
410    #[error("shell error (exit {exit_code}): {message}")]
411    Shell {
412        exit_code: i32,
413        category: crate::error_taxonomy::ToolErrorCategory,
414        message: String,
415    },
416
417    #[error("snapshot failed: {reason}")]
418    SnapshotFailed { reason: String },
419
420    /// Tool call rejected because the tool id is outside the active capability scope.
421    ///
422    /// Emitted by `ScopedToolExecutor` before any tool side-effect runs.
423    /// The audit log records `error_category = "out_of_scope"`.
424    // LLM isolation: task_type is never shown in the error message (P2-OutOfScope).
425    #[error("tool call denied by policy")]
426    OutOfScope {
427        /// Fully-qualified tool id that was rejected.
428        tool_id: String,
429        /// Active task type at dispatch time, if any.
430        task_type: Option<String>,
431    },
432
433    /// Tool call blocked by `ShadowProbeExecutor` after the LLM safety probe returned Deny.
434    ///
435    /// Emitted before any tool side-effect runs. The probe evaluated the full trajectory
436    /// context and determined the call is unsafe. Reason is LLM-generated; shown to the
437    /// agent loop as the tool result so the model can adapt.
438    #[error("tool call denied by safety probe: {reason}")]
439    SafetyDenied {
440        /// Human-readable explanation from the LLM safety probe.
441        reason: String,
442    },
443}
444
445impl ToolError {
446    /// Fine-grained error classification using the 12-category taxonomy.
447    ///
448    /// Prefer `category()` over `kind()` for new code. `kind()` is preserved for
449    /// backward compatibility and delegates to `category().error_kind()`.
450    #[must_use]
451    pub fn category(&self) -> crate::error_taxonomy::ToolErrorCategory {
452        use crate::error_taxonomy::{ToolErrorCategory, classify_http_status, classify_io_error};
453        match self {
454            Self::Blocked { .. } | Self::SandboxViolation { .. } => {
455                ToolErrorCategory::PolicyBlocked
456            }
457            Self::ConfirmationRequired { .. } => ToolErrorCategory::ConfirmationRequired,
458            Self::Timeout { .. } => ToolErrorCategory::Timeout,
459            Self::Cancelled => ToolErrorCategory::Cancelled,
460            Self::InvalidParams { .. } => ToolErrorCategory::InvalidParameters,
461            Self::Http { status, .. } => classify_http_status(*status),
462            Self::Execution(io_err) => classify_io_error(io_err),
463            Self::Shell { category, .. } => *category,
464            Self::SnapshotFailed { .. } => ToolErrorCategory::PermanentFailure,
465            Self::OutOfScope { .. } | Self::SafetyDenied { .. } => ToolErrorCategory::PolicyBlocked,
466        }
467    }
468
469    /// Coarse classification for backward compatibility. Delegates to `category().error_kind()`.
470    ///
471    /// For `Execution(io::Error)`, the classification inspects `io::Error::kind()`:
472    /// - Transient: `TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`,
473    ///   `ConnectionAborted`, `BrokenPipe` — these may succeed on retry.
474    /// - Permanent: `NotFound`, `PermissionDenied`, `AlreadyExists`, and all other
475    ///   I/O error kinds — retrying would waste time with no benefit.
476    #[must_use]
477    pub fn kind(&self) -> ErrorKind {
478        use crate::error_taxonomy::ToolErrorCategoryExt;
479        self.category().error_kind()
480    }
481}
482
483/// Deserialize tool call params from a `serde_json::Map<String, Value>` into a typed struct.
484///
485/// # Errors
486///
487/// Returns `ToolError::InvalidParams` when deserialization fails.
488pub fn deserialize_params<T: serde::de::DeserializeOwned>(
489    params: &serde_json::Map<String, serde_json::Value>,
490) -> Result<T, ToolError> {
491    let obj = serde_json::Value::Object(params.clone());
492    serde_json::from_value(obj).map_err(|e| ToolError::InvalidParams {
493        message: e.to_string(),
494    })
495}
496
497/// Async trait for tool execution backends.
498///
499/// Implementations include [`ShellExecutor`](crate::ShellExecutor),
500/// [`WebScrapeExecutor`](crate::WebScrapeExecutor), [`CompositeExecutor`](crate::CompositeExecutor),
501/// and [`FileExecutor`](crate::FileExecutor).
502///
503/// # Contract
504///
505/// - [`execute`](ToolExecutor::execute) and [`execute_tool_call`](ToolExecutor::execute_tool_call)
506///   return `Ok(None)` when the executor does not handle the given input — callers must not
507///   treat `None` as an error.
508/// - All methods must be `Send + Sync` and free of blocking I/O.
509/// - Implementations must enforce their own security controls (blocklists, sandboxes, SSRF
510///   protection) before executing any side-effectful operation.
511/// - [`execute_confirmed`](ToolExecutor::execute_confirmed) and
512///   [`execute_tool_call_confirmed`](ToolExecutor::execute_tool_call_confirmed) bypass
513///   confirmation gates only — all other security controls remain active.
514///
515/// # Two Invocation Paths
516///
517/// **Legacy fenced blocks**: The agent loop passes the raw LLM response string to [`execute`](ToolExecutor::execute).
518/// The executor parses ` ```bash ` or ` ```scrape ` blocks and executes each one.
519///
520/// **Structured tool calls**: The agent loop constructs a [`ToolCall`] from the LLM's
521/// JSON tool-use response and dispatches it via [`execute_tool_call`](ToolExecutor::execute_tool_call).
522/// This is the preferred path for new code.
523///
524/// # Example
525///
526/// ```rust
527/// use zeph_tools::{ToolExecutor, ToolCall, ToolOutput, ToolError, executor::ClaimSource};
528///
529/// #[derive(Debug)]
530/// struct EchoExecutor;
531///
532/// impl ToolExecutor for EchoExecutor {
533///     async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
534///         Ok(None) // not a fenced-block executor
535///     }
536///
537///     async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
538///         if call.tool_id != "echo" {
539///             return Ok(None);
540///         }
541///         let text = call.params.get("text")
542///             .and_then(|v| v.as_str())
543///             .unwrap_or("")
544///             .to_owned();
545///         Ok(Some(ToolOutput {
546///             tool_name: "echo".into(),
547///             summary: text,
548///             blocks_executed: 1,
549///             filter_stats: None,
550///             diff: None,
551///             streamed: false,
552///             terminal_id: None,
553///             locations: None,
554///             raw_response: None,
555///             claim_source: None,
556///         }))
557///     }
558/// }
559/// ```
560/// # TODO (G3 — deferred: Tower-style tool middleware stack)
561///
562/// Currently, cross-cutting concerns (audit logging, rate limiting, sandboxing, guardrails)
563/// are scattered across individual executor implementations. The planned approach is a
564/// composable middleware stack similar to Tower's `Service` trait:
565///
566/// ```text
567/// AuditLayer::new(RateLimitLayer::new(SandboxLayer::new(ShellExecutor::new())))
568/// ```
569///
570/// **Blocked by:** requires D2 (consolidating `ToolExecutor` + `ErasedToolExecutor` into one
571/// object-safe trait). See critic review §S3 for the tradeoff between RPIT fast-path and
572/// dynamic dispatch overhead before collapsing D2.
573///
574/// # TODO (D2 — deferred: consolidate `ToolExecutor` and `ErasedToolExecutor`)
575///
576/// Having two parallel traits creates duplication and confusion. The blanket impl
577/// `impl<T: ToolExecutor> ErasedToolExecutor for T` works but every new method must be
578/// added to both traits. Use `trait_variant::make` or a single object-safe design.
579///
580/// **Blocked by:** need to benchmark the RPIT fast-path before removing it. See critic §S3.
581pub trait ToolExecutor: Send + Sync {
582    /// Parse `response` for fenced tool blocks and execute them.
583    ///
584    /// Returns `Ok(None)` when no tool blocks are found in `response`.
585    ///
586    /// # Errors
587    ///
588    /// Returns [`ToolError`] when a block is found but execution fails (blocked command,
589    /// sandbox violation, network error, timeout, etc.).
590    fn execute(
591        &self,
592        response: &str,
593    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send;
594
595    /// Execute bypassing confirmation checks (called after user approves).
596    ///
597    /// Security controls other than the confirmation gate remain active. Default
598    /// implementation delegates to [`execute`](ToolExecutor::execute).
599    ///
600    /// # Errors
601    ///
602    /// Returns [`ToolError`] on execution failure.
603    fn execute_confirmed(
604        &self,
605        response: &str,
606    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
607        self.execute(response)
608    }
609
610    /// Return the tool definitions this executor can handle.
611    ///
612    /// Used to populate the LLM's tool schema at context-assembly time.
613    /// Returns an empty `Vec` by default (for executors that only handle fenced blocks).
614    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
615        vec![]
616    }
617
618    /// Execute a structured tool call. Returns `Ok(None)` if `call.tool_id` is not handled.
619    ///
620    /// # Errors
621    ///
622    /// Returns [`ToolError`] when the tool ID is handled but execution fails.
623    fn execute_tool_call(
624        &self,
625        _call: &ToolCall,
626    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
627        std::future::ready(Ok(None))
628    }
629
630    /// Execute a structured tool call bypassing confirmation checks.
631    ///
632    /// Called after the user has explicitly approved the tool invocation.
633    /// Default implementation delegates to [`execute_tool_call`](ToolExecutor::execute_tool_call).
634    ///
635    /// # Errors
636    ///
637    /// Returns [`ToolError`] on execution failure.
638    fn execute_tool_call_confirmed(
639        &self,
640        call: &ToolCall,
641    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
642        self.execute_tool_call(call)
643    }
644
645    /// Inject environment variables for the currently active skill. No-op by default.
646    ///
647    /// Called by the agent loop before each turn when the active skill specifies env vars.
648    /// Implementations that ignore this (e.g. `WebScrapeExecutor`) may leave the default.
649    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
650
651    /// Set the effective trust level for the currently active skill. No-op by default.
652    ///
653    /// Trust level affects which operations are permitted (e.g. network access, file writes).
654    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
655
656    /// Whether the executor can safely retry this tool call on a transient error.
657    ///
658    /// Only idempotent operations (e.g. read-only HTTP GET) should return `true`.
659    /// Shell commands and other non-idempotent operations must keep the default `false`
660    /// to prevent double-execution of side-effectful commands.
661    fn is_tool_retryable(&self, _tool_id: &str) -> bool {
662        false
663    }
664
665    /// Whether a tool call can be safely dispatched speculatively (before the LLM finishes).
666    ///
667    /// Speculative execution requires the tool to be:
668    /// 1. Idempotent — repeated execution with the same args produces the same result.
669    /// 2. Side-effect-free or cheaply reversible.
670    /// 3. Not subject to user confirmation (`needs_confirmation` must be false at call time).
671    ///
672    /// Default: `false` (safe). Override to `true` only for tools that satisfy all three
673    /// properties. The engine additionally gates on trust level and confirmation status
674    /// regardless of this flag.
675    ///
676    /// # Examples
677    ///
678    /// ```rust
679    /// use zeph_tools::ToolExecutor;
680    ///
681    /// struct ReadOnlyExecutor;
682    /// impl ToolExecutor for ReadOnlyExecutor {
683    ///     async fn execute(&self, _: &str) -> Result<Option<zeph_tools::ToolOutput>, zeph_tools::ToolError> {
684    ///         Ok(None)
685    ///     }
686    ///     fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
687    ///         true // read-only, idempotent
688    ///     }
689    /// }
690    /// ```
691    fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
692        false
693    }
694
695    /// Return `true` when `call` would require user confirmation before execution.
696    ///
697    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
698    /// Used by the speculative engine to gate dispatch without causing double side-effects.
699    ///
700    /// Default: `false`. Executors that enforce a confirmation policy (e.g. `TrustGateExecutor`)
701    /// must override this to reflect their actual policy without executing the tool.
702    fn requires_confirmation(&self, _call: &ToolCall) -> bool {
703        false
704    }
705}
706
707/// Object-safe erased version of [`ToolExecutor`] using boxed futures.
708///
709/// Because [`ToolExecutor`] uses `impl Future` return types, it is not object-safe and
710/// cannot be used as `dyn ToolExecutor`. This trait provides the same interface with
711/// `Pin<Box<dyn Future>>` returns, enabling dynamic dispatch.
712///
713/// Implemented automatically for all `T: ToolExecutor + 'static` via the blanket impl below.
714/// Use [`DynExecutor`] or `Box<dyn ErasedToolExecutor>` when runtime polymorphism is needed.
715pub trait ErasedToolExecutor: Send + Sync {
716    fn execute_erased<'a>(
717        &'a self,
718        response: &'a str,
719    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
720
721    fn execute_confirmed_erased<'a>(
722        &'a self,
723        response: &'a str,
724    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
725
726    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef>;
727
728    fn execute_tool_call_erased<'a>(
729        &'a self,
730        call: &'a ToolCall,
731    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
732
733    fn execute_tool_call_confirmed_erased<'a>(
734        &'a self,
735        call: &'a ToolCall,
736    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
737    {
738        // TrustGateExecutor overrides ToolExecutor::execute_tool_call_confirmed; the blanket
739        // impl for T: ToolExecutor routes this call through it via execute_tool_call_confirmed_erased.
740        // Other implementors fall back to execute_tool_call_erased (normal enforcement path).
741        self.execute_tool_call_erased(call)
742    }
743
744    /// Inject environment variables for the currently active skill. No-op by default.
745    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
746
747    /// Set the effective trust level for the currently active skill. No-op by default.
748    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
749
750    /// Whether the executor can safely retry this tool call on a transient error.
751    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool;
752
753    /// Whether a tool call can be safely dispatched speculatively.
754    ///
755    /// Default: `false`. Override to `true` in read-only executors.
756    fn is_tool_speculatable_erased(&self, _tool_id: &str) -> bool {
757        false
758    }
759
760    /// Return `true` when `call` would require user confirmation before execution.
761    ///
762    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
763    /// Used by the speculative engine to gate dispatch without causing double side-effects.
764    ///
765    /// Default: `true` (confirmation required). Implementors that want to allow speculative
766    /// dispatch must explicitly return `false`. The blanket impl for `T: ToolExecutor`
767    /// delegates to [`ToolExecutor::requires_confirmation`].
768    fn requires_confirmation_erased(&self, _call: &ToolCall) -> bool {
769        true
770    }
771}
772
773impl<T: ToolExecutor> ErasedToolExecutor for T {
774    fn execute_erased<'a>(
775        &'a self,
776        response: &'a str,
777    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
778    {
779        Box::pin(self.execute(response))
780    }
781
782    fn execute_confirmed_erased<'a>(
783        &'a self,
784        response: &'a str,
785    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
786    {
787        Box::pin(self.execute_confirmed(response))
788    }
789
790    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
791        self.tool_definitions()
792    }
793
794    fn execute_tool_call_erased<'a>(
795        &'a self,
796        call: &'a ToolCall,
797    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
798    {
799        Box::pin(self.execute_tool_call(call))
800    }
801
802    fn execute_tool_call_confirmed_erased<'a>(
803        &'a self,
804        call: &'a ToolCall,
805    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
806    {
807        Box::pin(self.execute_tool_call_confirmed(call))
808    }
809
810    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
811        ToolExecutor::set_skill_env(self, env);
812    }
813
814    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
815        ToolExecutor::set_effective_trust(self, level);
816    }
817
818    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool {
819        ToolExecutor::is_tool_retryable(self, tool_id)
820    }
821
822    fn is_tool_speculatable_erased(&self, tool_id: &str) -> bool {
823        ToolExecutor::is_tool_speculatable(self, tool_id)
824    }
825
826    fn requires_confirmation_erased(&self, call: &ToolCall) -> bool {
827        ToolExecutor::requires_confirmation(self, call)
828    }
829}
830
831/// Wraps `Arc<dyn ErasedToolExecutor>` so it can be used as a concrete `ToolExecutor`.
832///
833/// Enables dynamic composition of tool executors at runtime without static type chains.
834pub struct DynExecutor(pub std::sync::Arc<dyn ErasedToolExecutor>);
835
836impl ToolExecutor for DynExecutor {
837    fn execute(
838        &self,
839        response: &str,
840    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
841        // Clone data to satisfy the 'static-ish bound: erased futures must not borrow self.
842        let inner = std::sync::Arc::clone(&self.0);
843        let response = response.to_owned();
844        async move { inner.execute_erased(&response).await }
845    }
846
847    fn execute_confirmed(
848        &self,
849        response: &str,
850    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
851        let inner = std::sync::Arc::clone(&self.0);
852        let response = response.to_owned();
853        async move { inner.execute_confirmed_erased(&response).await }
854    }
855
856    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
857        self.0.tool_definitions_erased()
858    }
859
860    fn execute_tool_call(
861        &self,
862        call: &ToolCall,
863    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
864        let inner = std::sync::Arc::clone(&self.0);
865        let call = call.clone();
866        async move { inner.execute_tool_call_erased(&call).await }
867    }
868
869    fn execute_tool_call_confirmed(
870        &self,
871        call: &ToolCall,
872    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
873        let inner = std::sync::Arc::clone(&self.0);
874        let call = call.clone();
875        async move { inner.execute_tool_call_confirmed_erased(&call).await }
876    }
877
878    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
879        ErasedToolExecutor::set_skill_env(self.0.as_ref(), env);
880    }
881
882    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
883        ErasedToolExecutor::set_effective_trust(self.0.as_ref(), level);
884    }
885
886    fn is_tool_retryable(&self, tool_id: &str) -> bool {
887        self.0.is_tool_retryable_erased(tool_id)
888    }
889
890    fn is_tool_speculatable(&self, tool_id: &str) -> bool {
891        self.0.is_tool_speculatable_erased(tool_id)
892    }
893
894    fn requires_confirmation(&self, call: &ToolCall) -> bool {
895        self.0.requires_confirmation_erased(call)
896    }
897}
898
899/// Extract fenced code blocks with the given language marker from text.
900///
901/// Searches for `` ```{lang} `` … `` ``` `` pairs, returning trimmed content.
902#[must_use]
903pub fn extract_fenced_blocks<'a>(text: &'a str, lang: &str) -> Vec<&'a str> {
904    let marker = format!("```{lang}");
905    let marker_len = marker.len();
906    let mut blocks = Vec::new();
907    let mut rest = text;
908
909    let mut search_from = 0;
910    while let Some(rel) = rest[search_from..].find(&marker) {
911        let start = search_from + rel;
912        let after = &rest[start + marker_len..];
913        // Word-boundary check: the character immediately after the marker must be
914        // whitespace, end-of-string, or a non-word character (not alphanumeric / _ / -).
915        // This prevents "```bash" from matching "```bashrc".
916        let boundary_ok = after
917            .chars()
918            .next()
919            .is_none_or(|c| !c.is_alphanumeric() && c != '_' && c != '-');
920        if !boundary_ok {
921            search_from = start + marker_len;
922            continue;
923        }
924        if let Some(end) = after.find("```") {
925            blocks.push(after[..end].trim());
926            rest = &after[end + 3..];
927            search_from = 0;
928        } else {
929            break;
930        }
931    }
932
933    blocks
934}
935
936#[cfg(test)]
937mod tests {
938    use super::*;
939
940    #[test]
941    fn tool_output_display() {
942        let output = ToolOutput {
943            tool_name: ToolName::new("bash"),
944            summary: "$ echo hello\nhello".to_owned(),
945            blocks_executed: 1,
946            filter_stats: None,
947            diff: None,
948            streamed: false,
949            terminal_id: None,
950            locations: None,
951            raw_response: None,
952            claim_source: None,
953        };
954        assert_eq!(output.to_string(), "$ echo hello\nhello");
955    }
956
957    #[test]
958    fn tool_error_blocked_display() {
959        let err = ToolError::Blocked {
960            command: "rm -rf /".to_owned(),
961        };
962        assert_eq!(err.to_string(), "command blocked by policy: rm -rf /");
963    }
964
965    #[test]
966    fn tool_error_sandbox_violation_display() {
967        let err = ToolError::SandboxViolation {
968            path: "/etc/shadow".to_owned(),
969        };
970        assert_eq!(err.to_string(), "path not allowed by sandbox: /etc/shadow");
971    }
972
973    #[test]
974    fn tool_error_confirmation_required_display() {
975        let err = ToolError::ConfirmationRequired {
976            command: "rm -rf /tmp".to_owned(),
977        };
978        assert_eq!(
979            err.to_string(),
980            "command requires confirmation: rm -rf /tmp"
981        );
982    }
983
984    #[test]
985    fn tool_error_timeout_display() {
986        let err = ToolError::Timeout { timeout_secs: 30 };
987        assert_eq!(err.to_string(), "command timed out after 30s");
988    }
989
990    #[test]
991    fn tool_error_invalid_params_display() {
992        let err = ToolError::InvalidParams {
993            message: "missing field `command`".to_owned(),
994        };
995        assert_eq!(
996            err.to_string(),
997            "invalid tool parameters: missing field `command`"
998        );
999    }
1000
1001    #[test]
1002    fn deserialize_params_valid() {
1003        #[derive(Debug, serde::Deserialize, PartialEq)]
1004        struct P {
1005            name: String,
1006            count: u32,
1007        }
1008        let mut map = serde_json::Map::new();
1009        map.insert("name".to_owned(), serde_json::json!("test"));
1010        map.insert("count".to_owned(), serde_json::json!(42));
1011        let p: P = deserialize_params(&map).unwrap();
1012        assert_eq!(
1013            p,
1014            P {
1015                name: "test".to_owned(),
1016                count: 42
1017            }
1018        );
1019    }
1020
1021    #[test]
1022    fn deserialize_params_missing_required_field() {
1023        #[derive(Debug, serde::Deserialize)]
1024        #[allow(dead_code)]
1025        struct P {
1026            name: String,
1027        }
1028        let map = serde_json::Map::new();
1029        let err = deserialize_params::<P>(&map).unwrap_err();
1030        assert!(matches!(err, ToolError::InvalidParams { .. }));
1031    }
1032
1033    #[test]
1034    fn deserialize_params_wrong_type() {
1035        #[derive(Debug, serde::Deserialize)]
1036        #[allow(dead_code)]
1037        struct P {
1038            count: u32,
1039        }
1040        let mut map = serde_json::Map::new();
1041        map.insert("count".to_owned(), serde_json::json!("not a number"));
1042        let err = deserialize_params::<P>(&map).unwrap_err();
1043        assert!(matches!(err, ToolError::InvalidParams { .. }));
1044    }
1045
1046    #[test]
1047    fn deserialize_params_all_optional_empty() {
1048        #[derive(Debug, serde::Deserialize, PartialEq)]
1049        struct P {
1050            name: Option<String>,
1051        }
1052        let map = serde_json::Map::new();
1053        let p: P = deserialize_params(&map).unwrap();
1054        assert_eq!(p, P { name: None });
1055    }
1056
1057    #[test]
1058    fn deserialize_params_ignores_extra_fields() {
1059        #[derive(Debug, serde::Deserialize, PartialEq)]
1060        struct P {
1061            name: String,
1062        }
1063        let mut map = serde_json::Map::new();
1064        map.insert("name".to_owned(), serde_json::json!("test"));
1065        map.insert("extra".to_owned(), serde_json::json!(true));
1066        let p: P = deserialize_params(&map).unwrap();
1067        assert_eq!(
1068            p,
1069            P {
1070                name: "test".to_owned()
1071            }
1072        );
1073    }
1074
1075    #[test]
1076    fn tool_error_execution_display() {
1077        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash not found");
1078        let err = ToolError::Execution(io_err);
1079        assert!(err.to_string().starts_with("execution failed:"));
1080        assert!(err.to_string().contains("bash not found"));
1081    }
1082
1083    // ErrorKind classification tests
1084    #[test]
1085    fn error_kind_timeout_is_transient() {
1086        let err = ToolError::Timeout { timeout_secs: 30 };
1087        assert_eq!(err.kind(), ErrorKind::Transient);
1088    }
1089
1090    #[test]
1091    fn error_kind_blocked_is_permanent() {
1092        let err = ToolError::Blocked {
1093            command: "rm -rf /".to_owned(),
1094        };
1095        assert_eq!(err.kind(), ErrorKind::Permanent);
1096    }
1097
1098    #[test]
1099    fn error_kind_sandbox_violation_is_permanent() {
1100        let err = ToolError::SandboxViolation {
1101            path: "/etc/shadow".to_owned(),
1102        };
1103        assert_eq!(err.kind(), ErrorKind::Permanent);
1104    }
1105
1106    #[test]
1107    fn error_kind_cancelled_is_permanent() {
1108        assert_eq!(ToolError::Cancelled.kind(), ErrorKind::Permanent);
1109    }
1110
1111    #[test]
1112    fn error_kind_invalid_params_is_permanent() {
1113        let err = ToolError::InvalidParams {
1114            message: "bad arg".to_owned(),
1115        };
1116        assert_eq!(err.kind(), ErrorKind::Permanent);
1117    }
1118
1119    #[test]
1120    fn error_kind_confirmation_required_is_permanent() {
1121        let err = ToolError::ConfirmationRequired {
1122            command: "rm /tmp/x".to_owned(),
1123        };
1124        assert_eq!(err.kind(), ErrorKind::Permanent);
1125    }
1126
1127    #[test]
1128    fn error_kind_execution_timed_out_is_transient() {
1129        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1130        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1131    }
1132
1133    #[test]
1134    fn error_kind_execution_interrupted_is_transient() {
1135        let io_err = std::io::Error::new(std::io::ErrorKind::Interrupted, "interrupted");
1136        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1137    }
1138
1139    #[test]
1140    fn error_kind_execution_connection_reset_is_transient() {
1141        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
1142        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1143    }
1144
1145    #[test]
1146    fn error_kind_execution_broken_pipe_is_transient() {
1147        let io_err = std::io::Error::new(std::io::ErrorKind::BrokenPipe, "pipe broken");
1148        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1149    }
1150
1151    #[test]
1152    fn error_kind_execution_would_block_is_transient() {
1153        let io_err = std::io::Error::new(std::io::ErrorKind::WouldBlock, "would block");
1154        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1155    }
1156
1157    #[test]
1158    fn error_kind_execution_connection_aborted_is_transient() {
1159        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionAborted, "aborted");
1160        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1161    }
1162
1163    #[test]
1164    fn error_kind_execution_not_found_is_permanent() {
1165        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
1166        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1167    }
1168
1169    #[test]
1170    fn error_kind_execution_permission_denied_is_permanent() {
1171        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "denied");
1172        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1173    }
1174
1175    #[test]
1176    fn error_kind_execution_other_is_permanent() {
1177        let io_err = std::io::Error::other("some other error");
1178        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1179    }
1180
1181    #[test]
1182    fn error_kind_execution_already_exists_is_permanent() {
1183        let io_err = std::io::Error::new(std::io::ErrorKind::AlreadyExists, "exists");
1184        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1185    }
1186
1187    #[test]
1188    fn error_kind_display() {
1189        assert_eq!(ErrorKind::Transient.to_string(), "transient");
1190        assert_eq!(ErrorKind::Permanent.to_string(), "permanent");
1191    }
1192
1193    #[test]
1194    fn truncate_tool_output_short_passthrough() {
1195        let short = "hello world";
1196        assert_eq!(truncate_tool_output(short), short);
1197    }
1198
1199    #[test]
1200    fn truncate_tool_output_exact_limit() {
1201        let exact = "a".repeat(MAX_TOOL_OUTPUT_CHARS);
1202        assert_eq!(truncate_tool_output(&exact), exact);
1203    }
1204
1205    #[test]
1206    fn truncate_tool_output_long_split() {
1207        let long = "x".repeat(MAX_TOOL_OUTPUT_CHARS + 1000);
1208        let result = truncate_tool_output(&long);
1209        assert!(result.contains("truncated"));
1210        assert!(result.len() < long.len());
1211    }
1212
1213    #[test]
1214    fn truncate_tool_output_notice_contains_count() {
1215        let long = "y".repeat(MAX_TOOL_OUTPUT_CHARS + 2000);
1216        let result = truncate_tool_output(&long);
1217        assert!(result.contains("truncated"));
1218        assert!(result.contains("chars"));
1219    }
1220
1221    #[derive(Debug)]
1222    struct DefaultExecutor;
1223    impl ToolExecutor for DefaultExecutor {
1224        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1225            Ok(None)
1226        }
1227    }
1228
1229    #[tokio::test]
1230    async fn execute_tool_call_default_returns_none() {
1231        let exec = DefaultExecutor;
1232        let call = ToolCall {
1233            tool_id: ToolName::new("anything"),
1234            params: serde_json::Map::new(),
1235            caller_id: None,
1236            context: None,
1237
1238            tool_call_id: String::new(),
1239        };
1240        let result = exec.execute_tool_call(&call).await.unwrap();
1241        assert!(result.is_none());
1242    }
1243
1244    #[test]
1245    fn filter_stats_savings_pct() {
1246        let fs = FilterStats {
1247            raw_chars: 1000,
1248            filtered_chars: 200,
1249            ..Default::default()
1250        };
1251        assert!((fs.savings_pct() - 80.0).abs() < 0.01);
1252    }
1253
1254    #[test]
1255    fn filter_stats_savings_pct_zero() {
1256        let fs = FilterStats::default();
1257        assert!((fs.savings_pct()).abs() < 0.01);
1258    }
1259
1260    #[test]
1261    fn filter_stats_estimated_tokens_saved() {
1262        let fs = FilterStats {
1263            raw_chars: 1000,
1264            filtered_chars: 200,
1265            ..Default::default()
1266        };
1267        assert_eq!(fs.estimated_tokens_saved(), 200); // (1000 - 200) / 4
1268    }
1269
1270    #[test]
1271    fn filter_stats_format_inline() {
1272        let fs = FilterStats {
1273            raw_chars: 1000,
1274            filtered_chars: 200,
1275            raw_lines: 342,
1276            filtered_lines: 28,
1277            ..Default::default()
1278        };
1279        let line = fs.format_inline("shell");
1280        assert_eq!(line, "[shell] 342 lines \u{2192} 28 lines, 80.0% filtered");
1281    }
1282
1283    #[test]
1284    fn filter_stats_format_inline_zero() {
1285        let fs = FilterStats::default();
1286        let line = fs.format_inline("bash");
1287        assert_eq!(line, "[bash] 0 lines \u{2192} 0 lines, 0.0% filtered");
1288    }
1289
1290    // DynExecutor tests
1291
1292    struct FixedExecutor {
1293        tool_id: &'static str,
1294        output: &'static str,
1295    }
1296
1297    impl ToolExecutor for FixedExecutor {
1298        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1299            Ok(Some(ToolOutput {
1300                tool_name: ToolName::new(self.tool_id),
1301                summary: self.output.to_owned(),
1302                blocks_executed: 1,
1303                filter_stats: None,
1304                diff: None,
1305                streamed: false,
1306                terminal_id: None,
1307                locations: None,
1308                raw_response: None,
1309                claim_source: None,
1310            }))
1311        }
1312
1313        fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1314            vec![]
1315        }
1316
1317        async fn execute_tool_call(
1318            &self,
1319            _call: &ToolCall,
1320        ) -> Result<Option<ToolOutput>, ToolError> {
1321            Ok(Some(ToolOutput {
1322                tool_name: ToolName::new(self.tool_id),
1323                summary: self.output.to_owned(),
1324                blocks_executed: 1,
1325                filter_stats: None,
1326                diff: None,
1327                streamed: false,
1328                terminal_id: None,
1329                locations: None,
1330                raw_response: None,
1331                claim_source: None,
1332            }))
1333        }
1334    }
1335
1336    #[tokio::test]
1337    async fn dyn_executor_execute_delegates() {
1338        let inner = std::sync::Arc::new(FixedExecutor {
1339            tool_id: "bash",
1340            output: "hello",
1341        });
1342        let exec = DynExecutor(inner);
1343        let result = exec.execute("```bash\necho hello\n```").await.unwrap();
1344        assert!(result.is_some());
1345        assert_eq!(result.unwrap().summary, "hello");
1346    }
1347
1348    #[tokio::test]
1349    async fn dyn_executor_execute_confirmed_delegates() {
1350        let inner = std::sync::Arc::new(FixedExecutor {
1351            tool_id: "bash",
1352            output: "confirmed",
1353        });
1354        let exec = DynExecutor(inner);
1355        let result = exec.execute_confirmed("...").await.unwrap();
1356        assert!(result.is_some());
1357        assert_eq!(result.unwrap().summary, "confirmed");
1358    }
1359
1360    #[test]
1361    fn dyn_executor_tool_definitions_delegates() {
1362        let inner = std::sync::Arc::new(FixedExecutor {
1363            tool_id: "my_tool",
1364            output: "",
1365        });
1366        let exec = DynExecutor(inner);
1367        // FixedExecutor returns empty definitions; verify delegation occurs without panic.
1368        let defs = exec.tool_definitions();
1369        assert!(defs.is_empty());
1370    }
1371
1372    #[tokio::test]
1373    async fn dyn_executor_execute_tool_call_delegates() {
1374        let inner = std::sync::Arc::new(FixedExecutor {
1375            tool_id: "bash",
1376            output: "tool_call_result",
1377        });
1378        let exec = DynExecutor(inner);
1379        let call = ToolCall {
1380            tool_id: ToolName::new("bash"),
1381            params: serde_json::Map::new(),
1382            caller_id: None,
1383            context: None,
1384
1385            tool_call_id: String::new(),
1386        };
1387        let result = exec.execute_tool_call(&call).await.unwrap();
1388        assert!(result.is_some());
1389        assert_eq!(result.unwrap().summary, "tool_call_result");
1390    }
1391
1392    #[test]
1393    fn dyn_executor_set_effective_trust_delegates() {
1394        use std::sync::atomic::{AtomicU8, Ordering};
1395
1396        struct TrustCapture(AtomicU8);
1397        impl ToolExecutor for TrustCapture {
1398            async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1399                Ok(None)
1400            }
1401            fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
1402                // encode: Trusted=0, Verified=1, Quarantined=2, Blocked=3
1403                let v = match level {
1404                    crate::SkillTrustLevel::Trusted => 0u8,
1405                    crate::SkillTrustLevel::Verified => 1,
1406                    crate::SkillTrustLevel::Quarantined => 2,
1407                    crate::SkillTrustLevel::Blocked => 3,
1408                };
1409                self.0.store(v, Ordering::Relaxed);
1410            }
1411        }
1412
1413        let inner = std::sync::Arc::new(TrustCapture(AtomicU8::new(0)));
1414        let exec =
1415            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1416        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Quarantined);
1417        assert_eq!(inner.0.load(Ordering::Relaxed), 2);
1418
1419        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Blocked);
1420        assert_eq!(inner.0.load(Ordering::Relaxed), 3);
1421    }
1422
1423    #[test]
1424    fn extract_fenced_blocks_no_prefix_match() {
1425        // ```bashrc must NOT match when searching for "bash"
1426        assert!(extract_fenced_blocks("```bashrc\nfoo\n```", "bash").is_empty());
1427        // exact match
1428        assert_eq!(
1429            extract_fenced_blocks("```bash\nfoo\n```", "bash"),
1430            vec!["foo"]
1431        );
1432        // trailing space is fine
1433        assert_eq!(
1434            extract_fenced_blocks("```bash \nfoo\n```", "bash"),
1435            vec!["foo"]
1436        );
1437    }
1438
1439    // ── ToolError::category() delegation tests ────────────────────────────────
1440
1441    #[test]
1442    fn tool_error_http_400_category_is_invalid_parameters() {
1443        use crate::error_taxonomy::ToolErrorCategory;
1444        let err = ToolError::Http {
1445            status: 400,
1446            message: "bad request".to_owned(),
1447        };
1448        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1449    }
1450
1451    #[test]
1452    fn tool_error_http_401_category_is_policy_blocked() {
1453        use crate::error_taxonomy::ToolErrorCategory;
1454        let err = ToolError::Http {
1455            status: 401,
1456            message: "unauthorized".to_owned(),
1457        };
1458        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1459    }
1460
1461    #[test]
1462    fn tool_error_http_403_category_is_policy_blocked() {
1463        use crate::error_taxonomy::ToolErrorCategory;
1464        let err = ToolError::Http {
1465            status: 403,
1466            message: "forbidden".to_owned(),
1467        };
1468        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1469    }
1470
1471    #[test]
1472    fn tool_error_http_404_category_is_permanent_failure() {
1473        use crate::error_taxonomy::ToolErrorCategory;
1474        let err = ToolError::Http {
1475            status: 404,
1476            message: "not found".to_owned(),
1477        };
1478        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1479    }
1480
1481    #[test]
1482    fn tool_error_http_429_category_is_rate_limited() {
1483        use crate::error_taxonomy::ToolErrorCategory;
1484        let err = ToolError::Http {
1485            status: 429,
1486            message: "too many requests".to_owned(),
1487        };
1488        assert_eq!(err.category(), ToolErrorCategory::RateLimited);
1489    }
1490
1491    #[test]
1492    fn tool_error_http_500_category_is_server_error() {
1493        use crate::error_taxonomy::ToolErrorCategory;
1494        let err = ToolError::Http {
1495            status: 500,
1496            message: "internal server error".to_owned(),
1497        };
1498        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1499    }
1500
1501    #[test]
1502    fn tool_error_http_502_category_is_server_error() {
1503        use crate::error_taxonomy::ToolErrorCategory;
1504        let err = ToolError::Http {
1505            status: 502,
1506            message: "bad gateway".to_owned(),
1507        };
1508        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1509    }
1510
1511    #[test]
1512    fn tool_error_http_503_category_is_server_error() {
1513        use crate::error_taxonomy::ToolErrorCategory;
1514        let err = ToolError::Http {
1515            status: 503,
1516            message: "service unavailable".to_owned(),
1517        };
1518        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1519    }
1520
1521    #[test]
1522    fn tool_error_http_503_is_transient_triggers_phase2_retry() {
1523        // Phase 2 retry fires when err.kind() == ErrorKind::Transient.
1524        // Verify the full chain: Http{503} -> ServerError -> is_retryable() -> Transient.
1525        let err = ToolError::Http {
1526            status: 503,
1527            message: "service unavailable".to_owned(),
1528        };
1529        assert_eq!(
1530            err.kind(),
1531            ErrorKind::Transient,
1532            "HTTP 503 must be Transient so Phase 2 retry fires"
1533        );
1534    }
1535
1536    #[test]
1537    fn tool_error_blocked_category_is_policy_blocked() {
1538        use crate::error_taxonomy::ToolErrorCategory;
1539        let err = ToolError::Blocked {
1540            command: "rm -rf /".to_owned(),
1541        };
1542        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1543    }
1544
1545    #[test]
1546    fn tool_error_sandbox_violation_category_is_policy_blocked() {
1547        use crate::error_taxonomy::ToolErrorCategory;
1548        let err = ToolError::SandboxViolation {
1549            path: "/etc/shadow".to_owned(),
1550        };
1551        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1552    }
1553
1554    #[test]
1555    fn tool_error_confirmation_required_category() {
1556        use crate::error_taxonomy::ToolErrorCategory;
1557        let err = ToolError::ConfirmationRequired {
1558            command: "rm /tmp/x".to_owned(),
1559        };
1560        assert_eq!(err.category(), ToolErrorCategory::ConfirmationRequired);
1561    }
1562
1563    #[test]
1564    fn tool_error_timeout_category() {
1565        use crate::error_taxonomy::ToolErrorCategory;
1566        let err = ToolError::Timeout { timeout_secs: 30 };
1567        assert_eq!(err.category(), ToolErrorCategory::Timeout);
1568    }
1569
1570    #[test]
1571    fn tool_error_cancelled_category() {
1572        use crate::error_taxonomy::ToolErrorCategory;
1573        assert_eq!(
1574            ToolError::Cancelled.category(),
1575            ToolErrorCategory::Cancelled
1576        );
1577    }
1578
1579    #[test]
1580    fn tool_error_invalid_params_category() {
1581        use crate::error_taxonomy::ToolErrorCategory;
1582        let err = ToolError::InvalidParams {
1583            message: "missing field".to_owned(),
1584        };
1585        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1586    }
1587
1588    // B2 regression: Execution(NotFound) must NOT produce ToolNotFound.
1589    #[test]
1590    fn tool_error_execution_not_found_category_is_permanent_failure() {
1591        use crate::error_taxonomy::ToolErrorCategory;
1592        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash: not found");
1593        let err = ToolError::Execution(io_err);
1594        let cat = err.category();
1595        assert_ne!(
1596            cat,
1597            ToolErrorCategory::ToolNotFound,
1598            "Execution(NotFound) must NOT map to ToolNotFound"
1599        );
1600        assert_eq!(cat, ToolErrorCategory::PermanentFailure);
1601    }
1602
1603    #[test]
1604    fn tool_error_execution_timed_out_category_is_timeout() {
1605        use crate::error_taxonomy::ToolErrorCategory;
1606        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
1607        assert_eq!(
1608            ToolError::Execution(io_err).category(),
1609            ToolErrorCategory::Timeout
1610        );
1611    }
1612
1613    #[test]
1614    fn tool_error_execution_connection_refused_category_is_network_error() {
1615        use crate::error_taxonomy::ToolErrorCategory;
1616        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "refused");
1617        assert_eq!(
1618            ToolError::Execution(io_err).category(),
1619            ToolErrorCategory::NetworkError
1620        );
1621    }
1622
1623    // B4 regression: Http/network/transient categories must NOT be quality failures.
1624    #[test]
1625    fn b4_tool_error_http_429_not_quality_failure() {
1626        let err = ToolError::Http {
1627            status: 429,
1628            message: "rate limited".to_owned(),
1629        };
1630        assert!(
1631            !err.category().is_quality_failure(),
1632            "RateLimited must not be a quality failure"
1633        );
1634    }
1635
1636    #[test]
1637    fn b4_tool_error_http_503_not_quality_failure() {
1638        let err = ToolError::Http {
1639            status: 503,
1640            message: "service unavailable".to_owned(),
1641        };
1642        assert!(
1643            !err.category().is_quality_failure(),
1644            "ServerError must not be a quality failure"
1645        );
1646    }
1647
1648    #[test]
1649    fn b4_tool_error_execution_timed_out_not_quality_failure() {
1650        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1651        assert!(
1652            !ToolError::Execution(io_err).category().is_quality_failure(),
1653            "Timeout must not be a quality failure"
1654        );
1655    }
1656
1657    // ── ToolError::Shell category tests ──────────────────────────────────────
1658
1659    #[test]
1660    fn tool_error_shell_exit126_is_policy_blocked() {
1661        use crate::error_taxonomy::ToolErrorCategory;
1662        let err = ToolError::Shell {
1663            exit_code: 126,
1664            category: ToolErrorCategory::PolicyBlocked,
1665            message: "permission denied".to_owned(),
1666        };
1667        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1668    }
1669
1670    #[test]
1671    fn tool_error_shell_exit127_is_permanent_failure() {
1672        use crate::error_taxonomy::ToolErrorCategory;
1673        let err = ToolError::Shell {
1674            exit_code: 127,
1675            category: ToolErrorCategory::PermanentFailure,
1676            message: "command not found".to_owned(),
1677        };
1678        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1679        assert!(!err.category().is_retryable());
1680    }
1681
1682    #[test]
1683    fn tool_error_shell_not_quality_failure() {
1684        use crate::error_taxonomy::ToolErrorCategory;
1685        let err = ToolError::Shell {
1686            exit_code: 127,
1687            category: ToolErrorCategory::PermanentFailure,
1688            message: "command not found".to_owned(),
1689        };
1690        // Shell exit errors are not attributable to LLM output quality.
1691        assert!(!err.category().is_quality_failure());
1692    }
1693
1694    // ── requires_confirmation / requires_confirmation_erased tests (#3644) ───
1695
1696    /// Stub implementing only `ToolExecutor` without overriding `requires_confirmation`.
1697    struct StubExecutor;
1698    impl ToolExecutor for StubExecutor {
1699        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1700            Ok(None)
1701        }
1702    }
1703
1704    /// Stub that always signals confirmation is required via `ToolExecutor::requires_confirmation`.
1705    struct ConfirmingExecutor;
1706    impl ToolExecutor for ConfirmingExecutor {
1707        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1708            Ok(None)
1709        }
1710        fn requires_confirmation(&self, _call: &ToolCall) -> bool {
1711            true
1712        }
1713    }
1714
1715    fn dummy_call() -> ToolCall {
1716        ToolCall {
1717            tool_id: ToolName::new("test"),
1718            params: serde_json::Map::new(),
1719            caller_id: None,
1720            context: None,
1721
1722            tool_call_id: String::new(),
1723        }
1724    }
1725
1726    #[test]
1727    fn requires_confirmation_default_is_false_on_tool_executor() {
1728        let exec = StubExecutor;
1729        assert!(
1730            !exec.requires_confirmation(&dummy_call()),
1731            "ToolExecutor default requires_confirmation must be false"
1732        );
1733    }
1734
1735    #[test]
1736    fn requires_confirmation_erased_delegates_to_tool_executor_default() {
1737        // blanket impl routes erased → ToolExecutor::requires_confirmation (= false)
1738        let exec = StubExecutor;
1739        assert!(
1740            !ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1741            "requires_confirmation_erased via blanket impl must return false for stub executor"
1742        );
1743    }
1744
1745    #[test]
1746    fn requires_confirmation_erased_delegates_override() {
1747        // ConfirmingExecutor overrides requires_confirmation → true;
1748        // blanket impl must propagate this.
1749        let exec = ConfirmingExecutor;
1750        assert!(
1751            ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1752            "requires_confirmation_erased must return true when ToolExecutor override returns true"
1753        );
1754    }
1755
1756    #[test]
1757    fn requires_confirmation_erased_default_on_erased_trait_is_true() {
1758        // ErasedToolExecutor's own default (trait method body) returns true.
1759        // We construct a DynExecutor wrapping ConfirmingExecutor and verify via the erased path.
1760        // (We cannot instantiate ErasedToolExecutor directly without a concrete type.)
1761        // Instead verify via a type that only implements ErasedToolExecutor manually:
1762        struct ManualErased;
1763        impl ErasedToolExecutor for ManualErased {
1764            fn execute_erased<'a>(
1765                &'a self,
1766                _response: &'a str,
1767            ) -> std::pin::Pin<
1768                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1769            > {
1770                Box::pin(std::future::ready(Ok(None)))
1771            }
1772            fn execute_confirmed_erased<'a>(
1773                &'a self,
1774                _response: &'a str,
1775            ) -> std::pin::Pin<
1776                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1777            > {
1778                Box::pin(std::future::ready(Ok(None)))
1779            }
1780            fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
1781                vec![]
1782            }
1783            fn execute_tool_call_erased<'a>(
1784                &'a self,
1785                _call: &'a ToolCall,
1786            ) -> std::pin::Pin<
1787                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1788            > {
1789                Box::pin(std::future::ready(Ok(None)))
1790            }
1791            fn is_tool_retryable_erased(&self, _tool_id: &str) -> bool {
1792                false
1793            }
1794            // requires_confirmation_erased NOT overridden → trait default returns true
1795        }
1796        let exec = ManualErased;
1797        assert!(
1798            exec.requires_confirmation_erased(&dummy_call()),
1799            "ErasedToolExecutor trait-level default for requires_confirmation_erased must be true"
1800        );
1801    }
1802
1803    // ── DynExecutor::requires_confirmation delegation tests (#3650) ──────────
1804
1805    #[test]
1806    fn dyn_executor_requires_confirmation_delegates() {
1807        let inner = std::sync::Arc::new(ConfirmingExecutor);
1808        let exec =
1809            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1810        assert!(
1811            ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1812            "DynExecutor must delegate requires_confirmation to inner executor"
1813        );
1814    }
1815
1816    #[test]
1817    fn dyn_executor_requires_confirmation_default_false() {
1818        let inner = std::sync::Arc::new(StubExecutor);
1819        let exec =
1820            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1821        assert!(
1822            !ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1823            "DynExecutor must return false when inner executor does not require confirmation"
1824        );
1825    }
1826}