Skip to main content

zeph_tools/
executor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use zeph_common::ToolName;
7
8use crate::shell::background::RunId;
9
10/// Data for rendering file diffs in the TUI.
11///
12/// Produced by [`ShellExecutor`](crate::ShellExecutor) and [`FileExecutor`](crate::FileExecutor)
13/// when a tool call modifies a tracked file. The TUI uses this to display a side-by-side diff.
14#[derive(Debug, Clone)]
15pub struct DiffData {
16    /// Relative or absolute path to the file that was modified.
17    pub file_path: String,
18    /// File content before the tool executed.
19    pub old_content: String,
20    /// File content after the tool executed.
21    pub new_content: String,
22}
23
24/// Structured tool invocation from LLM.
25///
26/// Produced by the agent loop when the LLM emits a structured tool call (as opposed to
27/// a legacy fenced code block). Dispatched to [`ToolExecutor::execute_tool_call`].
28///
29/// # Example
30///
31/// ```rust
32/// use zeph_tools::{ToolCall, ExecutionContext};
33/// use zeph_common::ToolName;
34///
35/// let call = ToolCall {
36///     tool_id: ToolName::new("bash"),
37///     params: {
38///         let mut m = serde_json::Map::new();
39///         m.insert("command".to_owned(), serde_json::Value::String("echo hello".to_owned()));
40///         m
41///     },
42///     caller_id: Some("user-42".to_owned()),
43///     context: Some(ExecutionContext::new().with_name("repo")),
44///     tool_call_id: String::new(),
45/// };
46/// assert_eq!(call.tool_id, "bash");
47/// ```
48#[derive(Debug, Clone)]
49pub struct ToolCall {
50    /// The tool identifier, matching a value from [`ToolExecutor::tool_definitions`].
51    pub tool_id: ToolName,
52    /// JSON parameters for the tool call, deserialized into the tool's parameter struct.
53    pub params: serde_json::Map<String, serde_json::Value>,
54    /// Opaque caller identifier propagated from the channel (user ID, session ID, etc.).
55    /// `None` for system-initiated calls (scheduler, self-learning, internal).
56    pub caller_id: Option<String>,
57    /// Per-turn execution environment. `None` means use the executor default (process CWD
58    /// and inherited env), which is identical to the behaviour before this field existed.
59    pub context: Option<crate::ExecutionContext>,
60    /// Opaque tool call ID used to correlate [`ToolEvent::OutputChunk`] events with
61    /// their originating tool call in the TUI. Empty when not set by the agent loop.
62    pub tool_call_id: String,
63}
64
65/// Cumulative filter statistics for a single tool execution.
66///
67/// Populated by [`ShellExecutor`](crate::ShellExecutor) when output filters are configured.
68/// Displayed in the TUI to show how much output was compacted before being sent to the LLM.
69#[derive(Debug, Clone, Default)]
70pub struct FilterStats {
71    /// Raw character count before filtering.
72    pub raw_chars: usize,
73    /// Character count after filtering.
74    pub filtered_chars: usize,
75    /// Raw line count before filtering.
76    pub raw_lines: usize,
77    /// Line count after filtering.
78    pub filtered_lines: usize,
79    /// Worst-case confidence across all applied filters.
80    pub confidence: Option<crate::FilterConfidence>,
81    /// The shell command that produced this output, for display purposes.
82    pub command: Option<String>,
83    /// Zero-based line indices that were kept after filtering.
84    pub kept_lines: Vec<usize>,
85}
86
87impl FilterStats {
88    /// Returns the percentage of characters removed by filtering.
89    ///
90    /// Returns `0.0` when there was no raw output to filter.
91    #[must_use]
92    #[allow(clippy::cast_precision_loss)]
93    pub fn savings_pct(&self) -> f64 {
94        if self.raw_chars == 0 {
95            return 0.0;
96        }
97        (1.0 - self.filtered_chars as f64 / self.raw_chars as f64) * 100.0
98    }
99
100    /// Estimates the number of LLM tokens saved by filtering.
101    ///
102    /// Uses the 4-chars-per-token approximation. Suitable for logging and metrics,
103    /// not for billing or exact budget calculations.
104    #[must_use]
105    pub fn estimated_tokens_saved(&self) -> usize {
106        self.raw_chars.saturating_sub(self.filtered_chars) / 4
107    }
108
109    /// Formats a one-line filter summary for log messages and TUI status.
110    ///
111    /// # Example
112    ///
113    /// ```rust
114    /// use zeph_tools::FilterStats;
115    ///
116    /// let stats = FilterStats {
117    ///     raw_chars: 1000,
118    ///     filtered_chars: 400,
119    ///     raw_lines: 50,
120    ///     filtered_lines: 20,
121    ///     command: Some("cargo build".to_owned()),
122    ///     ..Default::default()
123    /// };
124    /// let summary = stats.format_inline("shell");
125    /// assert!(summary.contains("60.0% filtered"));
126    /// ```
127    #[must_use]
128    pub fn format_inline(&self, tool_name: &str) -> String {
129        let cmd_label = self
130            .command
131            .as_deref()
132            .map(|c| {
133                let trimmed = c.trim();
134                if trimmed.len() > 60 {
135                    format!(" `{}…`", &trimmed[..57])
136                } else {
137                    format!(" `{trimmed}`")
138                }
139            })
140            .unwrap_or_default();
141        format!(
142            "[{tool_name}]{cmd_label} {} lines \u{2192} {} lines, {:.1}% filtered",
143            self.raw_lines,
144            self.filtered_lines,
145            self.savings_pct()
146        )
147    }
148}
149
150/// Provenance of a tool execution result.
151///
152/// Set by each executor at `ToolOutput` construction time. Used by the sanitizer bridge
153/// in `zeph-core` to select the appropriate `ContentSourceKind` and trust level.
154/// `None` means the source is unspecified (pass-through code, mocks, tests).
155#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
156#[serde(rename_all = "snake_case")]
157pub enum ClaimSource {
158    /// Local shell command execution.
159    Shell,
160    /// Local file system read/write.
161    FileSystem,
162    /// HTTP web scrape.
163    WebScrape,
164    /// MCP server tool response.
165    Mcp,
166    /// A2A agent message.
167    A2a,
168    /// Code search (LSP or semantic).
169    CodeSearch,
170    /// Agent diagnostics (internal).
171    Diagnostics,
172    /// Memory retrieval (semantic search).
173    Memory,
174    /// Telegram moderation action (reaction deletion).
175    Moderation,
176}
177
178/// Structured result from tool execution.
179///
180/// Returned by every [`ToolExecutor`] implementation on success. The agent loop uses
181/// [`ToolOutput::summary`] as the tool result text injected into the LLM context.
182///
183/// # Example
184///
185/// ```rust
186/// use zeph_tools::{ToolOutput, executor::ClaimSource};
187/// use zeph_common::ToolName;
188///
189/// let output = ToolOutput {
190///     tool_name: ToolName::new("shell"),
191///     summary: "hello\n".to_owned(),
192///     blocks_executed: 1,
193///     filter_stats: None,
194///     diff: None,
195///     streamed: false,
196///     terminal_id: None,
197///     locations: None,
198///     raw_response: None,
199///     claim_source: Some(ClaimSource::Shell),
200/// };
201/// assert_eq!(output.to_string(), "hello\n");
202/// ```
203#[derive(Debug, Clone)]
204pub struct ToolOutput {
205    /// Name of the tool that produced this output (e.g. `"shell"`, `"web-scrape"`).
206    pub tool_name: ToolName,
207    /// Human-readable result text injected into the LLM context.
208    pub summary: String,
209    /// Number of code blocks processed in this invocation.
210    pub blocks_executed: u32,
211    /// Output filter statistics when filtering was applied, `None` otherwise.
212    pub filter_stats: Option<FilterStats>,
213    /// File diff data for TUI display when the tool modified a tracked file.
214    pub diff: Option<DiffData>,
215    /// Whether this tool already streamed its output via `ToolEvent` channel.
216    pub streamed: bool,
217    /// Terminal ID when the tool was executed via IDE terminal (ACP terminal/* protocol).
218    pub terminal_id: Option<String>,
219    /// File paths touched by this tool call, for IDE follow-along (e.g. `ToolCallLocation`).
220    pub locations: Option<Vec<String>>,
221    /// Structured tool response payload for ACP intermediate `tool_call_update` notifications.
222    pub raw_response: Option<serde_json::Value>,
223    /// Provenance of this tool result. Set by the executor at construction time.
224    /// `None` in pass-through wrappers, mocks, and tests.
225    pub claim_source: Option<ClaimSource>,
226}
227
228impl fmt::Display for ToolOutput {
229    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230        f.write_str(&self.summary)
231    }
232}
233
234/// Maximum characters of tool output injected into the LLM context without truncation.
235///
236/// Output that exceeds this limit is split into a head and tail via [`truncate_tool_output`]
237/// to keep both the beginning and end of large command outputs.
238pub const MAX_TOOL_OUTPUT_CHARS: usize = 30_000;
239
240/// Truncate tool output that exceeds [`MAX_TOOL_OUTPUT_CHARS`] using a head+tail split.
241///
242/// Equivalent to `truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)`.
243///
244/// # Example
245///
246/// ```rust
247/// use zeph_tools::executor::truncate_tool_output;
248///
249/// let short = "hello world";
250/// assert_eq!(truncate_tool_output(short), short);
251/// ```
252#[must_use]
253pub fn truncate_tool_output(output: &str) -> String {
254    truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)
255}
256
257/// Truncate tool output that exceeds `max_chars` using a head+tail split.
258///
259/// Preserves the first and last `max_chars / 2` characters and inserts a truncation
260/// marker in the middle. Both boundaries are snapped to valid UTF-8 character boundaries.
261///
262/// # Example
263///
264/// ```rust
265/// use zeph_tools::executor::truncate_tool_output_at;
266///
267/// let long = "a".repeat(200);
268/// let truncated = truncate_tool_output_at(&long, 100);
269/// assert!(truncated.contains("truncated"));
270/// assert!(truncated.len() < long.len());
271/// ```
272#[must_use]
273pub fn truncate_tool_output_at(output: &str, max_chars: usize) -> String {
274    if output.len() <= max_chars {
275        return output.to_string();
276    }
277
278    let half = max_chars / 2;
279    let head_end = output.floor_char_boundary(half);
280    let tail_start = output.ceil_char_boundary(output.len() - half);
281    let head = &output[..head_end];
282    let tail = &output[tail_start..];
283    let truncated = output.len() - head_end - (output.len() - tail_start);
284
285    format!(
286        "{head}\n\n... [truncated {truncated} chars, showing first and last ~{half} chars] ...\n\n{tail}"
287    )
288}
289
290/// Event emitted during tool execution for real-time UI updates.
291///
292/// Sent over the [`ToolEventTx`] channel to the TUI or channel adapter.
293/// Each event variant corresponds to a phase in the tool execution lifecycle.
294#[derive(Debug, Clone)]
295pub enum ToolEvent {
296    /// The tool has started. Displayed in the TUI as a spinner with the command text.
297    Started {
298        tool_name: ToolName,
299        command: String,
300        /// Active sandbox profile, if any. `None` when sandbox is disabled.
301        sandbox_profile: Option<String>,
302        /// Canonical absolute working directory the command will run in.
303        /// `None` for executors that do not resolve a per-turn CWD.
304        resolved_cwd: Option<String>,
305        /// Name of the resolved execution environment (from `[[execution.environments]]`),
306        /// or `None` when no named environment was selected.
307        execution_env: Option<String>,
308    },
309    /// A chunk of streaming output was produced (e.g. from a long-running command).
310    OutputChunk {
311        tool_name: ToolName,
312        command: String,
313        chunk: String,
314        /// Opaque tool call ID matching the corresponding [`ToolEvent::Started`] event.
315        /// Empty string when the executor does not have access to the call ID.
316        tool_call_id: String,
317    },
318    /// The tool finished. Contains the full output and optional filter/diff data.
319    Completed {
320        tool_name: ToolName,
321        command: String,
322        /// Full output text (possibly filtered and truncated).
323        output: String,
324        /// `true` when the tool exited successfully, `false` on error.
325        success: bool,
326        filter_stats: Option<FilterStats>,
327        diff: Option<DiffData>,
328        /// Set when this completion belongs to a background run. `None` for blocking runs.
329        run_id: Option<RunId>,
330    },
331    /// A transactional rollback was performed, restoring or deleting files.
332    Rollback {
333        tool_name: ToolName,
334        command: String,
335        /// Number of files restored to their pre-execution content.
336        restored_count: usize,
337        /// Number of files that did not exist before execution and were deleted.
338        deleted_count: usize,
339    },
340}
341
342/// Sender half of the bounded channel used to stream [`ToolEvent`]s to the UI.
343///
344/// Capacity is 1024 slots. Streaming variants (`OutputChunk`, `Started`) use
345/// `try_send` and drop on full; terminal variants (`Completed`, `Rollback`) use
346/// `send().await` to guarantee delivery.
347///
348/// Created via [`tokio::sync::mpsc::channel`] with capacity `TOOL_EVENT_CHANNEL_CAP`.
349pub type ToolEventTx = tokio::sync::mpsc::Sender<ToolEvent>;
350
351/// Receiver half matching [`ToolEventTx`].
352pub type ToolEventRx = tokio::sync::mpsc::Receiver<ToolEvent>;
353
354/// Bounded capacity for the tool-event channel.
355pub const TOOL_EVENT_CHANNEL_CAP: usize = 1024;
356
357/// Classifies a tool error as transient (retryable) or permanent (abort immediately).
358///
359/// Transient errors may succeed on retry (network blips, race conditions).
360/// Permanent errors will not succeed regardless of retries (policy, bad args, not found).
361#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
362pub enum ErrorKind {
363    Transient,
364    Permanent,
365}
366
367impl std::fmt::Display for ErrorKind {
368    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
369        match self {
370            Self::Transient => f.write_str("transient"),
371            Self::Permanent => f.write_str("permanent"),
372        }
373    }
374}
375
376/// Errors that can occur during tool execution.
377#[derive(Debug, thiserror::Error)]
378pub enum ToolError {
379    #[error("command blocked by policy: {command}")]
380    Blocked { command: String },
381
382    /// Command was blocked and a safer alternative is available.
383    ///
384    /// Emitted by [`ShellExecutor`](crate::ShellExecutor) when `suggest_fix` returns a
385    /// suggestion. The agent receives both the block reason and the alternative so it can
386    /// self-correct without additional prompting.
387    #[error("command blocked by policy: {command}")]
388    BlockedWithFix {
389        command: String,
390        suggestion: Option<crate::shell::SafeFixSuggestion>,
391    },
392
393    #[error("path not allowed by sandbox: {path}")]
394    SandboxViolation { path: String },
395
396    #[error("command requires confirmation: {command}")]
397    ConfirmationRequired { command: String },
398
399    #[error("command timed out after {timeout_secs}s")]
400    Timeout { timeout_secs: u64 },
401
402    #[error("operation cancelled")]
403    Cancelled,
404
405    #[error("invalid tool parameters: {message}")]
406    InvalidParams { message: String },
407
408    #[error("execution failed: {0}")]
409    Execution(#[from] std::io::Error),
410
411    /// HTTP or API error with status code for fine-grained classification.
412    ///
413    /// Used by `WebScrapeExecutor` and other HTTP-based tools to preserve the status
414    /// code for taxonomy classification. Scope: HTTP tools only (MCP uses a separate path).
415    #[error("HTTP error {status}: {message}")]
416    Http { status: u16, message: String },
417
418    /// Shell execution error with explicit exit code and pre-classified category.
419    ///
420    /// Used by `ShellExecutor` when the exit code or stderr content maps to a known
421    /// taxonomy category (e.g., exit 126 → `PolicyBlocked`, exit 127 → `PermanentFailure`).
422    /// Preserves the exit code for audit logging and the category for skill evolution.
423    #[error("shell error (exit {exit_code}): {message}")]
424    Shell {
425        exit_code: i32,
426        category: crate::error_taxonomy::ToolErrorCategory,
427        message: String,
428    },
429
430    #[error("snapshot failed: {reason}")]
431    SnapshotFailed { reason: String },
432
433    /// Tool call rejected because the tool id is outside the active capability scope.
434    ///
435    /// Emitted by `ScopedToolExecutor` before any tool side-effect runs.
436    /// The audit log records `error_category = "out_of_scope"`.
437    // LLM isolation: task_type is never shown in the error message (P2-OutOfScope).
438    #[error("tool call denied by policy")]
439    OutOfScope {
440        /// Fully-qualified tool id that was rejected.
441        tool_id: String,
442        /// Active task type at dispatch time, if any.
443        task_type: Option<String>,
444    },
445
446    /// Tool call blocked by `ShadowProbeExecutor` after the LLM safety probe returned Deny.
447    ///
448    /// Emitted before any tool side-effect runs. The probe evaluated the full trajectory
449    /// context and determined the call is unsafe. Reason is LLM-generated; shown to the
450    /// agent loop as the tool result so the model can adapt.
451    #[error("tool call denied by safety probe: {reason}")]
452    SafetyDenied {
453        /// Human-readable explanation from the LLM safety probe.
454        reason: String,
455    },
456}
457
458impl ToolError {
459    /// Fine-grained error classification using the 12-category taxonomy.
460    ///
461    /// Prefer `category()` over `kind()` for new code. `kind()` is preserved for
462    /// backward compatibility and delegates to `category().error_kind()`.
463    #[must_use]
464    pub fn category(&self) -> crate::error_taxonomy::ToolErrorCategory {
465        use crate::error_taxonomy::{ToolErrorCategory, classify_http_status, classify_io_error};
466        match self {
467            Self::Blocked { .. } | Self::BlockedWithFix { .. } | Self::SandboxViolation { .. } => {
468                ToolErrorCategory::PolicyBlocked
469            }
470            Self::ConfirmationRequired { .. } => ToolErrorCategory::ConfirmationRequired,
471            Self::Timeout { .. } => ToolErrorCategory::Timeout,
472            Self::Cancelled => ToolErrorCategory::Cancelled,
473            Self::InvalidParams { .. } => ToolErrorCategory::InvalidParameters,
474            Self::Http { status, .. } => classify_http_status(*status),
475            Self::Execution(io_err) => classify_io_error(io_err),
476            Self::Shell { category, .. } => *category,
477            Self::SnapshotFailed { .. } => ToolErrorCategory::PermanentFailure,
478            Self::OutOfScope { .. } | Self::SafetyDenied { .. } => ToolErrorCategory::PolicyBlocked,
479        }
480    }
481
482    /// Coarse classification for backward compatibility. Delegates to `category().error_kind()`.
483    ///
484    /// For `Execution(io::Error)`, the classification inspects `io::Error::kind()`:
485    /// - Transient: `TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`,
486    ///   `ConnectionAborted`, `BrokenPipe` — these may succeed on retry.
487    /// - Permanent: `NotFound`, `PermissionDenied`, `AlreadyExists`, and all other
488    ///   I/O error kinds — retrying would waste time with no benefit.
489    #[must_use]
490    pub fn kind(&self) -> ErrorKind {
491        use crate::error_taxonomy::ToolErrorCategoryExt;
492        self.category().error_kind()
493    }
494}
495
496/// Deserialize tool call params from a `serde_json::Map<String, Value>` into a typed struct.
497///
498/// # Errors
499///
500/// Returns `ToolError::InvalidParams` when deserialization fails.
501pub fn deserialize_params<T: serde::de::DeserializeOwned>(
502    params: &serde_json::Map<String, serde_json::Value>,
503) -> Result<T, ToolError> {
504    let obj = serde_json::Value::Object(params.clone());
505    serde_json::from_value(obj).map_err(|e| ToolError::InvalidParams {
506        message: e.to_string(),
507    })
508}
509
510/// Async trait for tool execution backends.
511///
512/// Implementations include [`ShellExecutor`](crate::ShellExecutor),
513/// [`WebScrapeExecutor`](crate::WebScrapeExecutor), [`CompositeExecutor`](crate::CompositeExecutor),
514/// and [`FileExecutor`](crate::FileExecutor).
515///
516/// # Contract
517///
518/// - [`execute`](ToolExecutor::execute) and [`execute_tool_call`](ToolExecutor::execute_tool_call)
519///   return `Ok(None)` when the executor does not handle the given input — callers must not
520///   treat `None` as an error.
521/// - All methods must be `Send + Sync` and free of blocking I/O.
522/// - Implementations must enforce their own security controls (blocklists, sandboxes, SSRF
523///   protection) before executing any side-effectful operation.
524/// - [`execute_confirmed`](ToolExecutor::execute_confirmed) and
525///   [`execute_tool_call_confirmed`](ToolExecutor::execute_tool_call_confirmed) bypass
526///   confirmation gates only — all other security controls remain active.
527///
528/// # Two Invocation Paths
529///
530/// **Legacy fenced blocks**: The agent loop passes the raw LLM response string to [`execute`](ToolExecutor::execute).
531/// The executor parses ` ```bash ` or ` ```scrape ` blocks and executes each one.
532///
533/// **Structured tool calls**: The agent loop constructs a [`ToolCall`] from the LLM's
534/// JSON tool-use response and dispatches it via [`execute_tool_call`](ToolExecutor::execute_tool_call).
535/// This is the preferred path for new code.
536///
537/// # Example
538///
539/// ```rust
540/// use zeph_tools::{ToolExecutor, ToolCall, ToolOutput, ToolError, executor::ClaimSource};
541///
542/// #[derive(Debug)]
543/// struct EchoExecutor;
544///
545/// impl ToolExecutor for EchoExecutor {
546///     async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
547///         Ok(None) // not a fenced-block executor
548///     }
549///
550///     async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
551///         if call.tool_id != "echo" {
552///             return Ok(None);
553///         }
554///         let text = call.params.get("text")
555///             .and_then(|v| v.as_str())
556///             .unwrap_or("")
557///             .to_owned();
558///         Ok(Some(ToolOutput {
559///             tool_name: "echo".into(),
560///             summary: text,
561///             blocks_executed: 1,
562///             filter_stats: None,
563///             diff: None,
564///             streamed: false,
565///             terminal_id: None,
566///             locations: None,
567///             raw_response: None,
568///             claim_source: None,
569///         }))
570///     }
571/// }
572/// ```
573/// # TODO (G3 — deferred: Tower-style tool middleware stack)
574///
575/// Currently, cross-cutting concerns (audit logging, rate limiting, sandboxing, guardrails)
576/// are scattered across individual executor implementations. The planned approach is a
577/// composable middleware stack similar to Tower's `Service` trait:
578///
579/// ```text
580/// AuditLayer::new(RateLimitLayer::new(SandboxLayer::new(ShellExecutor::new())))
581/// ```
582///
583/// **Blocked by:** requires D2 (consolidating `ToolExecutor` + `ErasedToolExecutor` into one
584/// object-safe trait). See critic review §S3 for the tradeoff between RPIT fast-path and
585/// dynamic dispatch overhead before collapsing D2.
586///
587/// # TODO (D2 — deferred: consolidate `ToolExecutor` and `ErasedToolExecutor`)
588///
589/// Having two parallel traits creates duplication and confusion. The blanket impl
590/// `impl<T: ToolExecutor> ErasedToolExecutor for T` works but every new method must be
591/// added to both traits. Use `trait_variant::make` or a single object-safe design.
592///
593/// **Blocked by:** need to benchmark the RPIT fast-path before removing it. See critic §S3.
594pub trait ToolExecutor: Send + Sync {
595    /// Parse `response` for fenced tool blocks and execute them.
596    ///
597    /// Returns `Ok(None)` when no tool blocks are found in `response`.
598    ///
599    /// # Errors
600    ///
601    /// Returns [`ToolError`] when a block is found but execution fails (blocked command,
602    /// sandbox violation, network error, timeout, etc.).
603    fn execute(
604        &self,
605        response: &str,
606    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send;
607
608    /// Execute bypassing confirmation checks (called after user approves).
609    ///
610    /// Security controls other than the confirmation gate remain active. Default
611    /// implementation delegates to [`execute`](ToolExecutor::execute).
612    ///
613    /// # Errors
614    ///
615    /// Returns [`ToolError`] on execution failure.
616    fn execute_confirmed(
617        &self,
618        response: &str,
619    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
620        self.execute(response)
621    }
622
623    /// Return the tool definitions this executor can handle.
624    ///
625    /// Used to populate the LLM's tool schema at context-assembly time.
626    /// Returns an empty `Vec` by default (for executors that only handle fenced blocks).
627    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
628        vec![]
629    }
630
631    /// Execute a structured tool call. Returns `Ok(None)` if `call.tool_id` is not handled.
632    ///
633    /// # Errors
634    ///
635    /// Returns [`ToolError`] when the tool ID is handled but execution fails.
636    fn execute_tool_call(
637        &self,
638        _call: &ToolCall,
639    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
640        std::future::ready(Ok(None))
641    }
642
643    /// Execute a structured tool call bypassing confirmation checks.
644    ///
645    /// Called after the user has explicitly approved the tool invocation.
646    /// Default implementation delegates to [`execute_tool_call`](ToolExecutor::execute_tool_call).
647    ///
648    /// # Errors
649    ///
650    /// Returns [`ToolError`] on execution failure.
651    fn execute_tool_call_confirmed(
652        &self,
653        call: &ToolCall,
654    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
655        self.execute_tool_call(call)
656    }
657
658    /// Inject environment variables for the currently active skill. No-op by default.
659    ///
660    /// Called by the agent loop before each turn when the active skill specifies env vars.
661    /// Implementations that ignore this (e.g. `WebScrapeExecutor`) may leave the default.
662    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
663
664    /// Set the effective trust level for the currently active skill. No-op by default.
665    ///
666    /// Trust level affects which operations are permitted (e.g. network access, file writes).
667    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
668
669    /// Whether the executor can safely retry this tool call on a transient error.
670    ///
671    /// Only idempotent operations (e.g. read-only HTTP GET) should return `true`.
672    /// Shell commands and other non-idempotent operations must keep the default `false`
673    /// to prevent double-execution of side-effectful commands.
674    fn is_tool_retryable(&self, _tool_id: &str) -> bool {
675        false
676    }
677
678    /// Whether a tool call can be safely dispatched speculatively (before the LLM finishes).
679    ///
680    /// Speculative execution requires the tool to be:
681    /// 1. Idempotent — repeated execution with the same args produces the same result.
682    /// 2. Side-effect-free or cheaply reversible.
683    /// 3. Not subject to user confirmation (`needs_confirmation` must be false at call time).
684    ///
685    /// Default: `false` (safe). Override to `true` only for tools that satisfy all three
686    /// properties. The engine additionally gates on trust level and confirmation status
687    /// regardless of this flag.
688    ///
689    /// # Examples
690    ///
691    /// ```rust
692    /// use zeph_tools::ToolExecutor;
693    ///
694    /// struct ReadOnlyExecutor;
695    /// impl ToolExecutor for ReadOnlyExecutor {
696    ///     async fn execute(&self, _: &str) -> Result<Option<zeph_tools::ToolOutput>, zeph_tools::ToolError> {
697    ///         Ok(None)
698    ///     }
699    ///     fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
700    ///         true // read-only, idempotent
701    ///     }
702    /// }
703    /// ```
704    fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
705        false
706    }
707
708    /// Return `true` when `call` would require user confirmation before execution.
709    ///
710    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
711    /// Used by the speculative engine to gate dispatch without causing double side-effects.
712    ///
713    /// Default: `false`. Executors that enforce a confirmation policy (e.g. `TrustGateExecutor`)
714    /// must override this to reflect their actual policy without executing the tool.
715    fn requires_confirmation(&self, _call: &ToolCall) -> bool {
716        false
717    }
718}
719
720/// Object-safe erased version of [`ToolExecutor`] using boxed futures.
721///
722/// Because [`ToolExecutor`] uses `impl Future` return types, it is not object-safe and
723/// cannot be used as `dyn ToolExecutor`. This trait provides the same interface with
724/// `Pin<Box<dyn Future>>` returns, enabling dynamic dispatch.
725///
726/// Implemented automatically for all `T: ToolExecutor + 'static` via the blanket impl below.
727/// Use [`DynExecutor`] or `Box<dyn ErasedToolExecutor>` when runtime polymorphism is needed.
728pub trait ErasedToolExecutor: Send + Sync {
729    fn execute_erased<'a>(
730        &'a self,
731        response: &'a str,
732    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
733
734    fn execute_confirmed_erased<'a>(
735        &'a self,
736        response: &'a str,
737    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
738
739    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef>;
740
741    fn execute_tool_call_erased<'a>(
742        &'a self,
743        call: &'a ToolCall,
744    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
745
746    fn execute_tool_call_confirmed_erased<'a>(
747        &'a self,
748        call: &'a ToolCall,
749    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
750    {
751        // TrustGateExecutor overrides ToolExecutor::execute_tool_call_confirmed; the blanket
752        // impl for T: ToolExecutor routes this call through it via execute_tool_call_confirmed_erased.
753        // Other implementors fall back to execute_tool_call_erased (normal enforcement path).
754        self.execute_tool_call_erased(call)
755    }
756
757    /// Inject environment variables for the currently active skill. No-op by default.
758    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
759
760    /// Set the effective trust level for the currently active skill. No-op by default.
761    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
762
763    /// Whether the executor can safely retry this tool call on a transient error.
764    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool;
765
766    /// Whether a tool call can be safely dispatched speculatively.
767    ///
768    /// Default: `false`. Override to `true` in read-only executors.
769    fn is_tool_speculatable_erased(&self, _tool_id: &str) -> bool {
770        false
771    }
772
773    /// Return `true` when `call` would require user confirmation before execution.
774    ///
775    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
776    /// Used by the speculative engine to gate dispatch without causing double side-effects.
777    ///
778    /// Default: `true` (confirmation required). Implementors that want to allow speculative
779    /// dispatch must explicitly return `false`. The blanket impl for `T: ToolExecutor`
780    /// delegates to [`ToolExecutor::requires_confirmation`].
781    fn requires_confirmation_erased(&self, _call: &ToolCall) -> bool {
782        true
783    }
784}
785
786impl<T: ToolExecutor> ErasedToolExecutor for T {
787    fn execute_erased<'a>(
788        &'a self,
789        response: &'a str,
790    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
791    {
792        Box::pin(self.execute(response))
793    }
794
795    fn execute_confirmed_erased<'a>(
796        &'a self,
797        response: &'a str,
798    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
799    {
800        Box::pin(self.execute_confirmed(response))
801    }
802
803    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
804        self.tool_definitions()
805    }
806
807    fn execute_tool_call_erased<'a>(
808        &'a self,
809        call: &'a ToolCall,
810    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
811    {
812        Box::pin(self.execute_tool_call(call))
813    }
814
815    fn execute_tool_call_confirmed_erased<'a>(
816        &'a self,
817        call: &'a ToolCall,
818    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
819    {
820        Box::pin(self.execute_tool_call_confirmed(call))
821    }
822
823    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
824        ToolExecutor::set_skill_env(self, env);
825    }
826
827    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
828        ToolExecutor::set_effective_trust(self, level);
829    }
830
831    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool {
832        ToolExecutor::is_tool_retryable(self, tool_id)
833    }
834
835    fn is_tool_speculatable_erased(&self, tool_id: &str) -> bool {
836        ToolExecutor::is_tool_speculatable(self, tool_id)
837    }
838
839    fn requires_confirmation_erased(&self, call: &ToolCall) -> bool {
840        ToolExecutor::requires_confirmation(self, call)
841    }
842}
843
844/// Wraps `Arc<dyn ErasedToolExecutor>` so it can be used as a concrete `ToolExecutor`.
845///
846/// Enables dynamic composition of tool executors at runtime without static type chains.
847pub struct DynExecutor(pub std::sync::Arc<dyn ErasedToolExecutor>);
848
849impl ToolExecutor for DynExecutor {
850    fn execute(
851        &self,
852        response: &str,
853    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
854        // Clone data to satisfy the 'static-ish bound: erased futures must not borrow self.
855        let inner = std::sync::Arc::clone(&self.0);
856        let response = response.to_owned();
857        async move { inner.execute_erased(&response).await }
858    }
859
860    fn execute_confirmed(
861        &self,
862        response: &str,
863    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
864        let inner = std::sync::Arc::clone(&self.0);
865        let response = response.to_owned();
866        async move { inner.execute_confirmed_erased(&response).await }
867    }
868
869    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
870        self.0.tool_definitions_erased()
871    }
872
873    fn execute_tool_call(
874        &self,
875        call: &ToolCall,
876    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
877        let inner = std::sync::Arc::clone(&self.0);
878        let call = call.clone();
879        async move { inner.execute_tool_call_erased(&call).await }
880    }
881
882    fn execute_tool_call_confirmed(
883        &self,
884        call: &ToolCall,
885    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
886        let inner = std::sync::Arc::clone(&self.0);
887        let call = call.clone();
888        async move { inner.execute_tool_call_confirmed_erased(&call).await }
889    }
890
891    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
892        ErasedToolExecutor::set_skill_env(self.0.as_ref(), env);
893    }
894
895    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
896        ErasedToolExecutor::set_effective_trust(self.0.as_ref(), level);
897    }
898
899    fn is_tool_retryable(&self, tool_id: &str) -> bool {
900        self.0.is_tool_retryable_erased(tool_id)
901    }
902
903    fn is_tool_speculatable(&self, tool_id: &str) -> bool {
904        self.0.is_tool_speculatable_erased(tool_id)
905    }
906
907    fn requires_confirmation(&self, call: &ToolCall) -> bool {
908        self.0.requires_confirmation_erased(call)
909    }
910}
911
912/// Extract fenced code blocks with the given language marker from text.
913///
914/// Searches for `` ```{lang} `` … `` ``` `` pairs, returning trimmed content.
915#[must_use]
916pub fn extract_fenced_blocks<'a>(text: &'a str, lang: &str) -> Vec<&'a str> {
917    let marker = format!("```{lang}");
918    let marker_len = marker.len();
919    let mut blocks = Vec::new();
920    let mut rest = text;
921
922    let mut search_from = 0;
923    while let Some(rel) = rest[search_from..].find(&marker) {
924        let start = search_from + rel;
925        let after = &rest[start + marker_len..];
926        // Word-boundary check: the character immediately after the marker must be
927        // whitespace, end-of-string, or a non-word character (not alphanumeric / _ / -).
928        // This prevents "```bash" from matching "```bashrc".
929        let boundary_ok = after
930            .chars()
931            .next()
932            .is_none_or(|c| !c.is_alphanumeric() && c != '_' && c != '-');
933        if !boundary_ok {
934            search_from = start + marker_len;
935            continue;
936        }
937        if let Some(end) = after.find("```") {
938            blocks.push(after[..end].trim());
939            rest = &after[end + 3..];
940            search_from = 0;
941        } else {
942            break;
943        }
944    }
945
946    blocks
947}
948
949#[cfg(test)]
950mod tests {
951    use super::*;
952
953    #[test]
954    fn tool_output_display() {
955        let output = ToolOutput {
956            tool_name: ToolName::new("bash"),
957            summary: "$ echo hello\nhello".to_owned(),
958            blocks_executed: 1,
959            filter_stats: None,
960            diff: None,
961            streamed: false,
962            terminal_id: None,
963            locations: None,
964            raw_response: None,
965            claim_source: None,
966        };
967        assert_eq!(output.to_string(), "$ echo hello\nhello");
968    }
969
970    #[test]
971    fn tool_error_blocked_display() {
972        let err = ToolError::Blocked {
973            command: "rm -rf /".to_owned(),
974        };
975        assert_eq!(err.to_string(), "command blocked by policy: rm -rf /");
976    }
977
978    #[test]
979    fn tool_error_sandbox_violation_display() {
980        let err = ToolError::SandboxViolation {
981            path: "/etc/shadow".to_owned(),
982        };
983        assert_eq!(err.to_string(), "path not allowed by sandbox: /etc/shadow");
984    }
985
986    #[test]
987    fn tool_error_confirmation_required_display() {
988        let err = ToolError::ConfirmationRequired {
989            command: "rm -rf /tmp".to_owned(),
990        };
991        assert_eq!(
992            err.to_string(),
993            "command requires confirmation: rm -rf /tmp"
994        );
995    }
996
997    #[test]
998    fn tool_error_timeout_display() {
999        let err = ToolError::Timeout { timeout_secs: 30 };
1000        assert_eq!(err.to_string(), "command timed out after 30s");
1001    }
1002
1003    #[test]
1004    fn tool_error_invalid_params_display() {
1005        let err = ToolError::InvalidParams {
1006            message: "missing field `command`".to_owned(),
1007        };
1008        assert_eq!(
1009            err.to_string(),
1010            "invalid tool parameters: missing field `command`"
1011        );
1012    }
1013
1014    #[test]
1015    fn deserialize_params_valid() {
1016        #[derive(Debug, serde::Deserialize, PartialEq)]
1017        struct P {
1018            name: String,
1019            count: u32,
1020        }
1021        let mut map = serde_json::Map::new();
1022        map.insert("name".to_owned(), serde_json::json!("test"));
1023        map.insert("count".to_owned(), serde_json::json!(42));
1024        let p: P = deserialize_params(&map).unwrap();
1025        assert_eq!(
1026            p,
1027            P {
1028                name: "test".to_owned(),
1029                count: 42
1030            }
1031        );
1032    }
1033
1034    #[test]
1035    fn deserialize_params_missing_required_field() {
1036        #[derive(Debug, serde::Deserialize)]
1037        #[allow(dead_code)]
1038        struct P {
1039            name: String,
1040        }
1041        let map = serde_json::Map::new();
1042        let err = deserialize_params::<P>(&map).unwrap_err();
1043        assert!(matches!(err, ToolError::InvalidParams { .. }));
1044    }
1045
1046    #[test]
1047    fn deserialize_params_wrong_type() {
1048        #[derive(Debug, serde::Deserialize)]
1049        #[allow(dead_code)]
1050        struct P {
1051            count: u32,
1052        }
1053        let mut map = serde_json::Map::new();
1054        map.insert("count".to_owned(), serde_json::json!("not a number"));
1055        let err = deserialize_params::<P>(&map).unwrap_err();
1056        assert!(matches!(err, ToolError::InvalidParams { .. }));
1057    }
1058
1059    #[test]
1060    fn deserialize_params_all_optional_empty() {
1061        #[derive(Debug, serde::Deserialize, PartialEq)]
1062        struct P {
1063            name: Option<String>,
1064        }
1065        let map = serde_json::Map::new();
1066        let p: P = deserialize_params(&map).unwrap();
1067        assert_eq!(p, P { name: None });
1068    }
1069
1070    #[test]
1071    fn deserialize_params_ignores_extra_fields() {
1072        #[derive(Debug, serde::Deserialize, PartialEq)]
1073        struct P {
1074            name: String,
1075        }
1076        let mut map = serde_json::Map::new();
1077        map.insert("name".to_owned(), serde_json::json!("test"));
1078        map.insert("extra".to_owned(), serde_json::json!(true));
1079        let p: P = deserialize_params(&map).unwrap();
1080        assert_eq!(
1081            p,
1082            P {
1083                name: "test".to_owned()
1084            }
1085        );
1086    }
1087
1088    #[test]
1089    fn tool_error_execution_display() {
1090        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash not found");
1091        let err = ToolError::Execution(io_err);
1092        assert!(err.to_string().starts_with("execution failed:"));
1093        assert!(err.to_string().contains("bash not found"));
1094    }
1095
1096    // ErrorKind classification tests
1097    #[test]
1098    fn error_kind_timeout_is_transient() {
1099        let err = ToolError::Timeout { timeout_secs: 30 };
1100        assert_eq!(err.kind(), ErrorKind::Transient);
1101    }
1102
1103    #[test]
1104    fn error_kind_blocked_is_permanent() {
1105        let err = ToolError::Blocked {
1106            command: "rm -rf /".to_owned(),
1107        };
1108        assert_eq!(err.kind(), ErrorKind::Permanent);
1109    }
1110
1111    #[test]
1112    fn error_kind_sandbox_violation_is_permanent() {
1113        let err = ToolError::SandboxViolation {
1114            path: "/etc/shadow".to_owned(),
1115        };
1116        assert_eq!(err.kind(), ErrorKind::Permanent);
1117    }
1118
1119    #[test]
1120    fn error_kind_cancelled_is_permanent() {
1121        assert_eq!(ToolError::Cancelled.kind(), ErrorKind::Permanent);
1122    }
1123
1124    #[test]
1125    fn error_kind_invalid_params_is_permanent() {
1126        let err = ToolError::InvalidParams {
1127            message: "bad arg".to_owned(),
1128        };
1129        assert_eq!(err.kind(), ErrorKind::Permanent);
1130    }
1131
1132    #[test]
1133    fn error_kind_confirmation_required_is_permanent() {
1134        let err = ToolError::ConfirmationRequired {
1135            command: "rm /tmp/x".to_owned(),
1136        };
1137        assert_eq!(err.kind(), ErrorKind::Permanent);
1138    }
1139
1140    #[test]
1141    fn error_kind_execution_timed_out_is_transient() {
1142        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1143        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1144    }
1145
1146    #[test]
1147    fn error_kind_execution_interrupted_is_transient() {
1148        let io_err = std::io::Error::new(std::io::ErrorKind::Interrupted, "interrupted");
1149        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1150    }
1151
1152    #[test]
1153    fn error_kind_execution_connection_reset_is_transient() {
1154        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
1155        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1156    }
1157
1158    #[test]
1159    fn error_kind_execution_broken_pipe_is_transient() {
1160        let io_err = std::io::Error::new(std::io::ErrorKind::BrokenPipe, "pipe broken");
1161        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1162    }
1163
1164    #[test]
1165    fn error_kind_execution_would_block_is_transient() {
1166        let io_err = std::io::Error::new(std::io::ErrorKind::WouldBlock, "would block");
1167        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1168    }
1169
1170    #[test]
1171    fn error_kind_execution_connection_aborted_is_transient() {
1172        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionAborted, "aborted");
1173        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1174    }
1175
1176    #[test]
1177    fn error_kind_execution_not_found_is_permanent() {
1178        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
1179        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1180    }
1181
1182    #[test]
1183    fn error_kind_execution_permission_denied_is_permanent() {
1184        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "denied");
1185        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1186    }
1187
1188    #[test]
1189    fn error_kind_execution_other_is_permanent() {
1190        let io_err = std::io::Error::other("some other error");
1191        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1192    }
1193
1194    #[test]
1195    fn error_kind_execution_already_exists_is_permanent() {
1196        let io_err = std::io::Error::new(std::io::ErrorKind::AlreadyExists, "exists");
1197        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1198    }
1199
1200    #[test]
1201    fn error_kind_display() {
1202        assert_eq!(ErrorKind::Transient.to_string(), "transient");
1203        assert_eq!(ErrorKind::Permanent.to_string(), "permanent");
1204    }
1205
1206    #[test]
1207    fn truncate_tool_output_short_passthrough() {
1208        let short = "hello world";
1209        assert_eq!(truncate_tool_output(short), short);
1210    }
1211
1212    #[test]
1213    fn truncate_tool_output_exact_limit() {
1214        let exact = "a".repeat(MAX_TOOL_OUTPUT_CHARS);
1215        assert_eq!(truncate_tool_output(&exact), exact);
1216    }
1217
1218    #[test]
1219    fn truncate_tool_output_long_split() {
1220        let long = "x".repeat(MAX_TOOL_OUTPUT_CHARS + 1000);
1221        let result = truncate_tool_output(&long);
1222        assert!(result.contains("truncated"));
1223        assert!(result.len() < long.len());
1224    }
1225
1226    #[test]
1227    fn truncate_tool_output_notice_contains_count() {
1228        let long = "y".repeat(MAX_TOOL_OUTPUT_CHARS + 2000);
1229        let result = truncate_tool_output(&long);
1230        assert!(result.contains("truncated"));
1231        assert!(result.contains("chars"));
1232    }
1233
1234    #[derive(Debug)]
1235    struct DefaultExecutor;
1236    impl ToolExecutor for DefaultExecutor {
1237        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1238            Ok(None)
1239        }
1240    }
1241
1242    #[tokio::test]
1243    async fn execute_tool_call_default_returns_none() {
1244        let exec = DefaultExecutor;
1245        let call = ToolCall {
1246            tool_id: ToolName::new("anything"),
1247            params: serde_json::Map::new(),
1248            caller_id: None,
1249            context: None,
1250
1251            tool_call_id: String::new(),
1252        };
1253        let result = exec.execute_tool_call(&call).await.unwrap();
1254        assert!(result.is_none());
1255    }
1256
1257    #[test]
1258    fn filter_stats_savings_pct() {
1259        let fs = FilterStats {
1260            raw_chars: 1000,
1261            filtered_chars: 200,
1262            ..Default::default()
1263        };
1264        assert!((fs.savings_pct() - 80.0).abs() < 0.01);
1265    }
1266
1267    #[test]
1268    fn filter_stats_savings_pct_zero() {
1269        let fs = FilterStats::default();
1270        assert!((fs.savings_pct()).abs() < 0.01);
1271    }
1272
1273    #[test]
1274    fn filter_stats_estimated_tokens_saved() {
1275        let fs = FilterStats {
1276            raw_chars: 1000,
1277            filtered_chars: 200,
1278            ..Default::default()
1279        };
1280        assert_eq!(fs.estimated_tokens_saved(), 200); // (1000 - 200) / 4
1281    }
1282
1283    #[test]
1284    fn filter_stats_format_inline() {
1285        let fs = FilterStats {
1286            raw_chars: 1000,
1287            filtered_chars: 200,
1288            raw_lines: 342,
1289            filtered_lines: 28,
1290            ..Default::default()
1291        };
1292        let line = fs.format_inline("shell");
1293        assert_eq!(line, "[shell] 342 lines \u{2192} 28 lines, 80.0% filtered");
1294    }
1295
1296    #[test]
1297    fn filter_stats_format_inline_zero() {
1298        let fs = FilterStats::default();
1299        let line = fs.format_inline("bash");
1300        assert_eq!(line, "[bash] 0 lines \u{2192} 0 lines, 0.0% filtered");
1301    }
1302
1303    // DynExecutor tests
1304
1305    struct FixedExecutor {
1306        tool_id: &'static str,
1307        output: &'static str,
1308    }
1309
1310    impl ToolExecutor for FixedExecutor {
1311        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1312            Ok(Some(ToolOutput {
1313                tool_name: ToolName::new(self.tool_id),
1314                summary: self.output.to_owned(),
1315                blocks_executed: 1,
1316                filter_stats: None,
1317                diff: None,
1318                streamed: false,
1319                terminal_id: None,
1320                locations: None,
1321                raw_response: None,
1322                claim_source: None,
1323            }))
1324        }
1325
1326        fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1327            vec![]
1328        }
1329
1330        async fn execute_tool_call(
1331            &self,
1332            _call: &ToolCall,
1333        ) -> Result<Option<ToolOutput>, ToolError> {
1334            Ok(Some(ToolOutput {
1335                tool_name: ToolName::new(self.tool_id),
1336                summary: self.output.to_owned(),
1337                blocks_executed: 1,
1338                filter_stats: None,
1339                diff: None,
1340                streamed: false,
1341                terminal_id: None,
1342                locations: None,
1343                raw_response: None,
1344                claim_source: None,
1345            }))
1346        }
1347    }
1348
1349    #[tokio::test]
1350    async fn dyn_executor_execute_delegates() {
1351        let inner = std::sync::Arc::new(FixedExecutor {
1352            tool_id: "bash",
1353            output: "hello",
1354        });
1355        let exec = DynExecutor(inner);
1356        let result = exec.execute("```bash\necho hello\n```").await.unwrap();
1357        assert!(result.is_some());
1358        assert_eq!(result.unwrap().summary, "hello");
1359    }
1360
1361    #[tokio::test]
1362    async fn dyn_executor_execute_confirmed_delegates() {
1363        let inner = std::sync::Arc::new(FixedExecutor {
1364            tool_id: "bash",
1365            output: "confirmed",
1366        });
1367        let exec = DynExecutor(inner);
1368        let result = exec.execute_confirmed("...").await.unwrap();
1369        assert!(result.is_some());
1370        assert_eq!(result.unwrap().summary, "confirmed");
1371    }
1372
1373    #[test]
1374    fn dyn_executor_tool_definitions_delegates() {
1375        let inner = std::sync::Arc::new(FixedExecutor {
1376            tool_id: "my_tool",
1377            output: "",
1378        });
1379        let exec = DynExecutor(inner);
1380        // FixedExecutor returns empty definitions; verify delegation occurs without panic.
1381        let defs = exec.tool_definitions();
1382        assert!(defs.is_empty());
1383    }
1384
1385    #[tokio::test]
1386    async fn dyn_executor_execute_tool_call_delegates() {
1387        let inner = std::sync::Arc::new(FixedExecutor {
1388            tool_id: "bash",
1389            output: "tool_call_result",
1390        });
1391        let exec = DynExecutor(inner);
1392        let call = ToolCall {
1393            tool_id: ToolName::new("bash"),
1394            params: serde_json::Map::new(),
1395            caller_id: None,
1396            context: None,
1397
1398            tool_call_id: String::new(),
1399        };
1400        let result = exec.execute_tool_call(&call).await.unwrap();
1401        assert!(result.is_some());
1402        assert_eq!(result.unwrap().summary, "tool_call_result");
1403    }
1404
1405    #[test]
1406    fn dyn_executor_set_effective_trust_delegates() {
1407        use std::sync::atomic::{AtomicU8, Ordering};
1408
1409        struct TrustCapture(AtomicU8);
1410        impl ToolExecutor for TrustCapture {
1411            async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1412                Ok(None)
1413            }
1414            fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
1415                // encode: Trusted=0, Verified=1, Quarantined=2, Blocked=3
1416                let v = match level {
1417                    crate::SkillTrustLevel::Trusted => 0u8,
1418                    crate::SkillTrustLevel::Verified => 1,
1419                    crate::SkillTrustLevel::Quarantined => 2,
1420                    crate::SkillTrustLevel::Blocked => 3,
1421                };
1422                self.0.store(v, Ordering::Relaxed);
1423            }
1424        }
1425
1426        let inner = std::sync::Arc::new(TrustCapture(AtomicU8::new(0)));
1427        let exec =
1428            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1429        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Quarantined);
1430        assert_eq!(inner.0.load(Ordering::Relaxed), 2);
1431
1432        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Blocked);
1433        assert_eq!(inner.0.load(Ordering::Relaxed), 3);
1434    }
1435
1436    #[test]
1437    fn extract_fenced_blocks_no_prefix_match() {
1438        // ```bashrc must NOT match when searching for "bash"
1439        assert!(extract_fenced_blocks("```bashrc\nfoo\n```", "bash").is_empty());
1440        // exact match
1441        assert_eq!(
1442            extract_fenced_blocks("```bash\nfoo\n```", "bash"),
1443            vec!["foo"]
1444        );
1445        // trailing space is fine
1446        assert_eq!(
1447            extract_fenced_blocks("```bash \nfoo\n```", "bash"),
1448            vec!["foo"]
1449        );
1450    }
1451
1452    // ── ToolError::category() delegation tests ────────────────────────────────
1453
1454    #[test]
1455    fn tool_error_http_400_category_is_invalid_parameters() {
1456        use crate::error_taxonomy::ToolErrorCategory;
1457        let err = ToolError::Http {
1458            status: 400,
1459            message: "bad request".to_owned(),
1460        };
1461        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1462    }
1463
1464    #[test]
1465    fn tool_error_http_401_category_is_policy_blocked() {
1466        use crate::error_taxonomy::ToolErrorCategory;
1467        let err = ToolError::Http {
1468            status: 401,
1469            message: "unauthorized".to_owned(),
1470        };
1471        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1472    }
1473
1474    #[test]
1475    fn tool_error_http_403_category_is_policy_blocked() {
1476        use crate::error_taxonomy::ToolErrorCategory;
1477        let err = ToolError::Http {
1478            status: 403,
1479            message: "forbidden".to_owned(),
1480        };
1481        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1482    }
1483
1484    #[test]
1485    fn tool_error_http_404_category_is_permanent_failure() {
1486        use crate::error_taxonomy::ToolErrorCategory;
1487        let err = ToolError::Http {
1488            status: 404,
1489            message: "not found".to_owned(),
1490        };
1491        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1492    }
1493
1494    #[test]
1495    fn tool_error_http_429_category_is_rate_limited() {
1496        use crate::error_taxonomy::ToolErrorCategory;
1497        let err = ToolError::Http {
1498            status: 429,
1499            message: "too many requests".to_owned(),
1500        };
1501        assert_eq!(err.category(), ToolErrorCategory::RateLimited);
1502    }
1503
1504    #[test]
1505    fn tool_error_http_500_category_is_server_error() {
1506        use crate::error_taxonomy::ToolErrorCategory;
1507        let err = ToolError::Http {
1508            status: 500,
1509            message: "internal server error".to_owned(),
1510        };
1511        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1512    }
1513
1514    #[test]
1515    fn tool_error_http_502_category_is_server_error() {
1516        use crate::error_taxonomy::ToolErrorCategory;
1517        let err = ToolError::Http {
1518            status: 502,
1519            message: "bad gateway".to_owned(),
1520        };
1521        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1522    }
1523
1524    #[test]
1525    fn tool_error_http_503_category_is_server_error() {
1526        use crate::error_taxonomy::ToolErrorCategory;
1527        let err = ToolError::Http {
1528            status: 503,
1529            message: "service unavailable".to_owned(),
1530        };
1531        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1532    }
1533
1534    #[test]
1535    fn tool_error_http_503_is_transient_triggers_phase2_retry() {
1536        // Phase 2 retry fires when err.kind() == ErrorKind::Transient.
1537        // Verify the full chain: Http{503} -> ServerError -> is_retryable() -> Transient.
1538        let err = ToolError::Http {
1539            status: 503,
1540            message: "service unavailable".to_owned(),
1541        };
1542        assert_eq!(
1543            err.kind(),
1544            ErrorKind::Transient,
1545            "HTTP 503 must be Transient so Phase 2 retry fires"
1546        );
1547    }
1548
1549    #[test]
1550    fn tool_error_blocked_category_is_policy_blocked() {
1551        use crate::error_taxonomy::ToolErrorCategory;
1552        let err = ToolError::Blocked {
1553            command: "rm -rf /".to_owned(),
1554        };
1555        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1556    }
1557
1558    #[test]
1559    fn tool_error_sandbox_violation_category_is_policy_blocked() {
1560        use crate::error_taxonomy::ToolErrorCategory;
1561        let err = ToolError::SandboxViolation {
1562            path: "/etc/shadow".to_owned(),
1563        };
1564        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1565    }
1566
1567    #[test]
1568    fn tool_error_confirmation_required_category() {
1569        use crate::error_taxonomy::ToolErrorCategory;
1570        let err = ToolError::ConfirmationRequired {
1571            command: "rm /tmp/x".to_owned(),
1572        };
1573        assert_eq!(err.category(), ToolErrorCategory::ConfirmationRequired);
1574    }
1575
1576    #[test]
1577    fn tool_error_timeout_category() {
1578        use crate::error_taxonomy::ToolErrorCategory;
1579        let err = ToolError::Timeout { timeout_secs: 30 };
1580        assert_eq!(err.category(), ToolErrorCategory::Timeout);
1581    }
1582
1583    #[test]
1584    fn tool_error_cancelled_category() {
1585        use crate::error_taxonomy::ToolErrorCategory;
1586        assert_eq!(
1587            ToolError::Cancelled.category(),
1588            ToolErrorCategory::Cancelled
1589        );
1590    }
1591
1592    #[test]
1593    fn tool_error_invalid_params_category() {
1594        use crate::error_taxonomy::ToolErrorCategory;
1595        let err = ToolError::InvalidParams {
1596            message: "missing field".to_owned(),
1597        };
1598        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1599    }
1600
1601    // B2 regression: Execution(NotFound) must NOT produce ToolNotFound.
1602    #[test]
1603    fn tool_error_execution_not_found_category_is_permanent_failure() {
1604        use crate::error_taxonomy::ToolErrorCategory;
1605        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash: not found");
1606        let err = ToolError::Execution(io_err);
1607        let cat = err.category();
1608        assert_ne!(
1609            cat,
1610            ToolErrorCategory::ToolNotFound,
1611            "Execution(NotFound) must NOT map to ToolNotFound"
1612        );
1613        assert_eq!(cat, ToolErrorCategory::PermanentFailure);
1614    }
1615
1616    #[test]
1617    fn tool_error_execution_timed_out_category_is_timeout() {
1618        use crate::error_taxonomy::ToolErrorCategory;
1619        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
1620        assert_eq!(
1621            ToolError::Execution(io_err).category(),
1622            ToolErrorCategory::Timeout
1623        );
1624    }
1625
1626    #[test]
1627    fn tool_error_execution_connection_refused_category_is_network_error() {
1628        use crate::error_taxonomy::ToolErrorCategory;
1629        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "refused");
1630        assert_eq!(
1631            ToolError::Execution(io_err).category(),
1632            ToolErrorCategory::NetworkError
1633        );
1634    }
1635
1636    // B4 regression: Http/network/transient categories must NOT be quality failures.
1637    #[test]
1638    fn b4_tool_error_http_429_not_quality_failure() {
1639        let err = ToolError::Http {
1640            status: 429,
1641            message: "rate limited".to_owned(),
1642        };
1643        assert!(
1644            !err.category().is_quality_failure(),
1645            "RateLimited must not be a quality failure"
1646        );
1647    }
1648
1649    #[test]
1650    fn b4_tool_error_http_503_not_quality_failure() {
1651        let err = ToolError::Http {
1652            status: 503,
1653            message: "service unavailable".to_owned(),
1654        };
1655        assert!(
1656            !err.category().is_quality_failure(),
1657            "ServerError must not be a quality failure"
1658        );
1659    }
1660
1661    #[test]
1662    fn b4_tool_error_execution_timed_out_not_quality_failure() {
1663        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1664        assert!(
1665            !ToolError::Execution(io_err).category().is_quality_failure(),
1666            "Timeout must not be a quality failure"
1667        );
1668    }
1669
1670    // ── ToolError::Shell category tests ──────────────────────────────────────
1671
1672    #[test]
1673    fn tool_error_shell_exit126_is_policy_blocked() {
1674        use crate::error_taxonomy::ToolErrorCategory;
1675        let err = ToolError::Shell {
1676            exit_code: 126,
1677            category: ToolErrorCategory::PolicyBlocked,
1678            message: "permission denied".to_owned(),
1679        };
1680        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1681    }
1682
1683    #[test]
1684    fn tool_error_shell_exit127_is_permanent_failure() {
1685        use crate::error_taxonomy::ToolErrorCategory;
1686        let err = ToolError::Shell {
1687            exit_code: 127,
1688            category: ToolErrorCategory::PermanentFailure,
1689            message: "command not found".to_owned(),
1690        };
1691        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1692        assert!(!err.category().is_retryable());
1693    }
1694
1695    #[test]
1696    fn tool_error_shell_not_quality_failure() {
1697        use crate::error_taxonomy::ToolErrorCategory;
1698        let err = ToolError::Shell {
1699            exit_code: 127,
1700            category: ToolErrorCategory::PermanentFailure,
1701            message: "command not found".to_owned(),
1702        };
1703        // Shell exit errors are not attributable to LLM output quality.
1704        assert!(!err.category().is_quality_failure());
1705    }
1706
1707    // ── requires_confirmation / requires_confirmation_erased tests (#3644) ───
1708
1709    /// Stub implementing only `ToolExecutor` without overriding `requires_confirmation`.
1710    struct StubExecutor;
1711    impl ToolExecutor for StubExecutor {
1712        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1713            Ok(None)
1714        }
1715    }
1716
1717    /// Stub that always signals confirmation is required via `ToolExecutor::requires_confirmation`.
1718    struct ConfirmingExecutor;
1719    impl ToolExecutor for ConfirmingExecutor {
1720        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1721            Ok(None)
1722        }
1723        fn requires_confirmation(&self, _call: &ToolCall) -> bool {
1724            true
1725        }
1726    }
1727
1728    fn dummy_call() -> ToolCall {
1729        ToolCall {
1730            tool_id: ToolName::new("test"),
1731            params: serde_json::Map::new(),
1732            caller_id: None,
1733            context: None,
1734
1735            tool_call_id: String::new(),
1736        }
1737    }
1738
1739    #[test]
1740    fn requires_confirmation_default_is_false_on_tool_executor() {
1741        let exec = StubExecutor;
1742        assert!(
1743            !exec.requires_confirmation(&dummy_call()),
1744            "ToolExecutor default requires_confirmation must be false"
1745        );
1746    }
1747
1748    #[test]
1749    fn requires_confirmation_erased_delegates_to_tool_executor_default() {
1750        // blanket impl routes erased → ToolExecutor::requires_confirmation (= false)
1751        let exec = StubExecutor;
1752        assert!(
1753            !ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1754            "requires_confirmation_erased via blanket impl must return false for stub executor"
1755        );
1756    }
1757
1758    #[test]
1759    fn requires_confirmation_erased_delegates_override() {
1760        // ConfirmingExecutor overrides requires_confirmation → true;
1761        // blanket impl must propagate this.
1762        let exec = ConfirmingExecutor;
1763        assert!(
1764            ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1765            "requires_confirmation_erased must return true when ToolExecutor override returns true"
1766        );
1767    }
1768
1769    #[test]
1770    fn requires_confirmation_erased_default_on_erased_trait_is_true() {
1771        // ErasedToolExecutor's own default (trait method body) returns true.
1772        // We construct a DynExecutor wrapping ConfirmingExecutor and verify via the erased path.
1773        // (We cannot instantiate ErasedToolExecutor directly without a concrete type.)
1774        // Instead verify via a type that only implements ErasedToolExecutor manually:
1775        struct ManualErased;
1776        impl ErasedToolExecutor for ManualErased {
1777            fn execute_erased<'a>(
1778                &'a self,
1779                _response: &'a str,
1780            ) -> std::pin::Pin<
1781                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1782            > {
1783                Box::pin(std::future::ready(Ok(None)))
1784            }
1785            fn execute_confirmed_erased<'a>(
1786                &'a self,
1787                _response: &'a str,
1788            ) -> std::pin::Pin<
1789                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1790            > {
1791                Box::pin(std::future::ready(Ok(None)))
1792            }
1793            fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
1794                vec![]
1795            }
1796            fn execute_tool_call_erased<'a>(
1797                &'a self,
1798                _call: &'a ToolCall,
1799            ) -> std::pin::Pin<
1800                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1801            > {
1802                Box::pin(std::future::ready(Ok(None)))
1803            }
1804            fn is_tool_retryable_erased(&self, _tool_id: &str) -> bool {
1805                false
1806            }
1807            // requires_confirmation_erased NOT overridden → trait default returns true
1808        }
1809        let exec = ManualErased;
1810        assert!(
1811            exec.requires_confirmation_erased(&dummy_call()),
1812            "ErasedToolExecutor trait-level default for requires_confirmation_erased must be true"
1813        );
1814    }
1815
1816    // ── DynExecutor::requires_confirmation delegation tests (#3650) ──────────
1817
1818    #[test]
1819    fn dyn_executor_requires_confirmation_delegates() {
1820        let inner = std::sync::Arc::new(ConfirmingExecutor);
1821        let exec =
1822            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1823        assert!(
1824            ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1825            "DynExecutor must delegate requires_confirmation to inner executor"
1826        );
1827    }
1828
1829    #[test]
1830    fn dyn_executor_requires_confirmation_default_false() {
1831        let inner = std::sync::Arc::new(StubExecutor);
1832        let exec =
1833            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1834        assert!(
1835            !ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1836            "DynExecutor must return false when inner executor does not require confirmation"
1837        );
1838    }
1839}