zeph_tools/
executor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use zeph_common::ToolName;
7
8use crate::shell::background::RunId;
9
10/// Data for rendering file diffs in the TUI.
11///
12/// Produced by [`ShellExecutor`](crate::ShellExecutor) and [`FileExecutor`](crate::FileExecutor)
13/// when a tool call modifies a tracked file. The TUI uses this to display a side-by-side diff.
14#[derive(Debug, Clone)]
15pub struct DiffData {
16    /// Relative or absolute path to the file that was modified.
17    pub file_path: String,
18    /// File content before the tool executed.
19    pub old_content: String,
20    /// File content after the tool executed.
21    pub new_content: String,
22}
23
24/// Structured tool invocation from LLM.
25///
26/// Produced by the agent loop when the LLM emits a structured tool call (as opposed to
27/// a legacy fenced code block). Dispatched to [`ToolExecutor::execute_tool_call`].
28///
29/// # Example
30///
31/// ```rust
32/// use zeph_tools::{ToolCall, ExecutionContext};
33/// use zeph_common::ToolName;
34///
35/// let call = ToolCall {
36///     tool_id: ToolName::new("bash"),
37///     params: {
38///         let mut m = serde_json::Map::new();
39///         m.insert("command".to_owned(), serde_json::Value::String("echo hello".to_owned()));
40///         m
41///     },
42///     caller_id: Some("user-42".to_owned()),
43///     context: Some(ExecutionContext::new().with_name("repo")),
44/// };
45/// assert_eq!(call.tool_id, "bash");
46/// ```
47#[derive(Debug, Clone)]
48pub struct ToolCall {
49    /// The tool identifier, matching a value from [`ToolExecutor::tool_definitions`].
50    pub tool_id: ToolName,
51    /// JSON parameters for the tool call, deserialized into the tool's parameter struct.
52    pub params: serde_json::Map<String, serde_json::Value>,
53    /// Opaque caller identifier propagated from the channel (user ID, session ID, etc.).
54    /// `None` for system-initiated calls (scheduler, self-learning, internal).
55    pub caller_id: Option<String>,
56    /// Per-turn execution environment. `None` means use the executor default (process CWD
57    /// and inherited env), which is identical to the behaviour before this field existed.
58    pub context: Option<crate::ExecutionContext>,
59}
60
61/// Cumulative filter statistics for a single tool execution.
62///
63/// Populated by [`ShellExecutor`](crate::ShellExecutor) when output filters are configured.
64/// Displayed in the TUI to show how much output was compacted before being sent to the LLM.
65#[derive(Debug, Clone, Default)]
66pub struct FilterStats {
67    /// Raw character count before filtering.
68    pub raw_chars: usize,
69    /// Character count after filtering.
70    pub filtered_chars: usize,
71    /// Raw line count before filtering.
72    pub raw_lines: usize,
73    /// Line count after filtering.
74    pub filtered_lines: usize,
75    /// Worst-case confidence across all applied filters.
76    pub confidence: Option<crate::FilterConfidence>,
77    /// The shell command that produced this output, for display purposes.
78    pub command: Option<String>,
79    /// Zero-based line indices that were kept after filtering.
80    pub kept_lines: Vec<usize>,
81}
82
83impl FilterStats {
84    /// Returns the percentage of characters removed by filtering.
85    ///
86    /// Returns `0.0` when there was no raw output to filter.
87    #[must_use]
88    #[allow(clippy::cast_precision_loss)]
89    pub fn savings_pct(&self) -> f64 {
90        if self.raw_chars == 0 {
91            return 0.0;
92        }
93        (1.0 - self.filtered_chars as f64 / self.raw_chars as f64) * 100.0
94    }
95
96    /// Estimates the number of LLM tokens saved by filtering.
97    ///
98    /// Uses the 4-chars-per-token approximation. Suitable for logging and metrics,
99    /// not for billing or exact budget calculations.
100    #[must_use]
101    pub fn estimated_tokens_saved(&self) -> usize {
102        self.raw_chars.saturating_sub(self.filtered_chars) / 4
103    }
104
105    /// Formats a one-line filter summary for log messages and TUI status.
106    ///
107    /// # Example
108    ///
109    /// ```rust
110    /// use zeph_tools::FilterStats;
111    ///
112    /// let stats = FilterStats {
113    ///     raw_chars: 1000,
114    ///     filtered_chars: 400,
115    ///     raw_lines: 50,
116    ///     filtered_lines: 20,
117    ///     command: Some("cargo build".to_owned()),
118    ///     ..Default::default()
119    /// };
120    /// let summary = stats.format_inline("shell");
121    /// assert!(summary.contains("60.0% filtered"));
122    /// ```
123    #[must_use]
124    pub fn format_inline(&self, tool_name: &str) -> String {
125        let cmd_label = self
126            .command
127            .as_deref()
128            .map(|c| {
129                let trimmed = c.trim();
130                if trimmed.len() > 60 {
131                    format!(" `{}…`", &trimmed[..57])
132                } else {
133                    format!(" `{trimmed}`")
134                }
135            })
136            .unwrap_or_default();
137        format!(
138            "[{tool_name}]{cmd_label} {} lines \u{2192} {} lines, {:.1}% filtered",
139            self.raw_lines,
140            self.filtered_lines,
141            self.savings_pct()
142        )
143    }
144}
145
146/// Provenance of a tool execution result.
147///
148/// Set by each executor at `ToolOutput` construction time. Used by the sanitizer bridge
149/// in `zeph-core` to select the appropriate `ContentSourceKind` and trust level.
150/// `None` means the source is unspecified (pass-through code, mocks, tests).
151#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
152#[serde(rename_all = "snake_case")]
153pub enum ClaimSource {
154    /// Local shell command execution.
155    Shell,
156    /// Local file system read/write.
157    FileSystem,
158    /// HTTP web scrape.
159    WebScrape,
160    /// MCP server tool response.
161    Mcp,
162    /// A2A agent message.
163    A2a,
164    /// Code search (LSP or semantic).
165    CodeSearch,
166    /// Agent diagnostics (internal).
167    Diagnostics,
168    /// Memory retrieval (semantic search).
169    Memory,
170}
171
172/// Structured result from tool execution.
173///
174/// Returned by every [`ToolExecutor`] implementation on success. The agent loop uses
175/// [`ToolOutput::summary`] as the tool result text injected into the LLM context.
176///
177/// # Example
178///
179/// ```rust
180/// use zeph_tools::{ToolOutput, executor::ClaimSource};
181/// use zeph_common::ToolName;
182///
183/// let output = ToolOutput {
184///     tool_name: ToolName::new("shell"),
185///     summary: "hello\n".to_owned(),
186///     blocks_executed: 1,
187///     filter_stats: None,
188///     diff: None,
189///     streamed: false,
190///     terminal_id: None,
191///     locations: None,
192///     raw_response: None,
193///     claim_source: Some(ClaimSource::Shell),
194/// };
195/// assert_eq!(output.to_string(), "hello\n");
196/// ```
197#[derive(Debug, Clone)]
198pub struct ToolOutput {
199    /// Name of the tool that produced this output (e.g. `"shell"`, `"web-scrape"`).
200    pub tool_name: ToolName,
201    /// Human-readable result text injected into the LLM context.
202    pub summary: String,
203    /// Number of code blocks processed in this invocation.
204    pub blocks_executed: u32,
205    /// Output filter statistics when filtering was applied, `None` otherwise.
206    pub filter_stats: Option<FilterStats>,
207    /// File diff data for TUI display when the tool modified a tracked file.
208    pub diff: Option<DiffData>,
209    /// Whether this tool already streamed its output via `ToolEvent` channel.
210    pub streamed: bool,
211    /// Terminal ID when the tool was executed via IDE terminal (ACP terminal/* protocol).
212    pub terminal_id: Option<String>,
213    /// File paths touched by this tool call, for IDE follow-along (e.g. `ToolCallLocation`).
214    pub locations: Option<Vec<String>>,
215    /// Structured tool response payload for ACP intermediate `tool_call_update` notifications.
216    pub raw_response: Option<serde_json::Value>,
217    /// Provenance of this tool result. Set by the executor at construction time.
218    /// `None` in pass-through wrappers, mocks, and tests.
219    pub claim_source: Option<ClaimSource>,
220}
221
222impl fmt::Display for ToolOutput {
223    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
224        f.write_str(&self.summary)
225    }
226}
227
228/// Maximum characters of tool output injected into the LLM context without truncation.
229///
230/// Output that exceeds this limit is split into a head and tail via [`truncate_tool_output`]
231/// to keep both the beginning and end of large command outputs.
232pub const MAX_TOOL_OUTPUT_CHARS: usize = 30_000;
233
234/// Truncate tool output that exceeds [`MAX_TOOL_OUTPUT_CHARS`] using a head+tail split.
235///
236/// Equivalent to `truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)`.
237///
238/// # Example
239///
240/// ```rust
241/// use zeph_tools::executor::truncate_tool_output;
242///
243/// let short = "hello world";
244/// assert_eq!(truncate_tool_output(short), short);
245/// ```
246#[must_use]
247pub fn truncate_tool_output(output: &str) -> String {
248    truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)
249}
250
251/// Truncate tool output that exceeds `max_chars` using a head+tail split.
252///
253/// Preserves the first and last `max_chars / 2` characters and inserts a truncation
254/// marker in the middle. Both boundaries are snapped to valid UTF-8 character boundaries.
255///
256/// # Example
257///
258/// ```rust
259/// use zeph_tools::executor::truncate_tool_output_at;
260///
261/// let long = "a".repeat(200);
262/// let truncated = truncate_tool_output_at(&long, 100);
263/// assert!(truncated.contains("truncated"));
264/// assert!(truncated.len() < long.len());
265/// ```
266#[must_use]
267pub fn truncate_tool_output_at(output: &str, max_chars: usize) -> String {
268    if output.len() <= max_chars {
269        return output.to_string();
270    }
271
272    let half = max_chars / 2;
273    let head_end = output.floor_char_boundary(half);
274    let tail_start = output.ceil_char_boundary(output.len() - half);
275    let head = &output[..head_end];
276    let tail = &output[tail_start..];
277    let truncated = output.len() - head_end - (output.len() - tail_start);
278
279    format!(
280        "{head}\n\n... [truncated {truncated} chars, showing first and last ~{half} chars] ...\n\n{tail}"
281    )
282}
283
284/// Event emitted during tool execution for real-time UI updates.
285///
286/// Sent over the [`ToolEventTx`] channel to the TUI or channel adapter.
287/// Each event variant corresponds to a phase in the tool execution lifecycle.
288#[derive(Debug, Clone)]
289pub enum ToolEvent {
290    /// The tool has started. Displayed in the TUI as a spinner with the command text.
291    Started {
292        tool_name: ToolName,
293        command: String,
294        /// Active sandbox profile, if any. `None` when sandbox is disabled.
295        sandbox_profile: Option<String>,
296        /// Canonical absolute working directory the command will run in.
297        /// `None` for executors that do not resolve a per-turn CWD.
298        resolved_cwd: Option<String>,
299        /// Name of the resolved execution environment (from `[[execution.environments]]`),
300        /// or `None` when no named environment was selected.
301        execution_env: Option<String>,
302    },
303    /// A chunk of streaming output was produced (e.g. from a long-running command).
304    OutputChunk {
305        tool_name: ToolName,
306        command: String,
307        chunk: String,
308    },
309    /// The tool finished. Contains the full output and optional filter/diff data.
310    Completed {
311        tool_name: ToolName,
312        command: String,
313        /// Full output text (possibly filtered and truncated).
314        output: String,
315        /// `true` when the tool exited successfully, `false` on error.
316        success: bool,
317        filter_stats: Option<FilterStats>,
318        diff: Option<DiffData>,
319        /// Set when this completion belongs to a background run. `None` for blocking runs.
320        run_id: Option<RunId>,
321    },
322    /// A transactional rollback was performed, restoring or deleting files.
323    Rollback {
324        tool_name: ToolName,
325        command: String,
326        /// Number of files restored to their pre-execution content.
327        restored_count: usize,
328        /// Number of files that did not exist before execution and were deleted.
329        deleted_count: usize,
330    },
331}
332
333/// Sender half of the bounded channel used to stream [`ToolEvent`]s to the UI.
334///
335/// Capacity is 1024 slots. Streaming variants (`OutputChunk`, `Started`) use
336/// `try_send` and drop on full; terminal variants (`Completed`, `Rollback`) use
337/// `send().await` to guarantee delivery.
338///
339/// Created via [`tokio::sync::mpsc::channel`] with capacity `TOOL_EVENT_CHANNEL_CAP`.
340pub type ToolEventTx = tokio::sync::mpsc::Sender<ToolEvent>;
341
342/// Receiver half matching [`ToolEventTx`].
343pub type ToolEventRx = tokio::sync::mpsc::Receiver<ToolEvent>;
344
345/// Bounded capacity for the tool-event channel.
346pub const TOOL_EVENT_CHANNEL_CAP: usize = 1024;
347
348/// Classifies a tool error as transient (retryable) or permanent (abort immediately).
349///
350/// Transient errors may succeed on retry (network blips, race conditions).
351/// Permanent errors will not succeed regardless of retries (policy, bad args, not found).
352#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
353pub enum ErrorKind {
354    Transient,
355    Permanent,
356}
357
358impl std::fmt::Display for ErrorKind {
359    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
360        match self {
361            Self::Transient => f.write_str("transient"),
362            Self::Permanent => f.write_str("permanent"),
363        }
364    }
365}
366
367/// Errors that can occur during tool execution.
368#[derive(Debug, thiserror::Error)]
369pub enum ToolError {
370    #[error("command blocked by policy: {command}")]
371    Blocked { command: String },
372
373    #[error("path not allowed by sandbox: {path}")]
374    SandboxViolation { path: String },
375
376    #[error("command requires confirmation: {command}")]
377    ConfirmationRequired { command: String },
378
379    #[error("command timed out after {timeout_secs}s")]
380    Timeout { timeout_secs: u64 },
381
382    #[error("operation cancelled")]
383    Cancelled,
384
385    #[error("invalid tool parameters: {message}")]
386    InvalidParams { message: String },
387
388    #[error("execution failed: {0}")]
389    Execution(#[from] std::io::Error),
390
391    /// HTTP or API error with status code for fine-grained classification.
392    ///
393    /// Used by `WebScrapeExecutor` and other HTTP-based tools to preserve the status
394    /// code for taxonomy classification. Scope: HTTP tools only (MCP uses a separate path).
395    #[error("HTTP error {status}: {message}")]
396    Http { status: u16, message: String },
397
398    /// Shell execution error with explicit exit code and pre-classified category.
399    ///
400    /// Used by `ShellExecutor` when the exit code or stderr content maps to a known
401    /// taxonomy category (e.g., exit 126 → `PolicyBlocked`, exit 127 → `PermanentFailure`).
402    /// Preserves the exit code for audit logging and the category for skill evolution.
403    #[error("shell error (exit {exit_code}): {message}")]
404    Shell {
405        exit_code: i32,
406        category: crate::error_taxonomy::ToolErrorCategory,
407        message: String,
408    },
409
410    #[error("snapshot failed: {reason}")]
411    SnapshotFailed { reason: String },
412
413    /// Tool call rejected because the tool id is outside the active capability scope.
414    ///
415    /// Emitted by `ScopedToolExecutor` before any tool side-effect runs.
416    /// The audit log records `error_category = "out_of_scope"`.
417    // LLM isolation: task_type is never shown in the error message (P2-OutOfScope).
418    #[error("tool call denied by policy")]
419    OutOfScope {
420        /// Fully-qualified tool id that was rejected.
421        tool_id: String,
422        /// Active task type at dispatch time, if any.
423        task_type: Option<String>,
424    },
425}
426
427impl ToolError {
428    /// Fine-grained error classification using the 12-category taxonomy.
429    ///
430    /// Prefer `category()` over `kind()` for new code. `kind()` is preserved for
431    /// backward compatibility and delegates to `category().error_kind()`.
432    #[must_use]
433    pub fn category(&self) -> crate::error_taxonomy::ToolErrorCategory {
434        use crate::error_taxonomy::{ToolErrorCategory, classify_http_status, classify_io_error};
435        match self {
436            Self::Blocked { .. } | Self::SandboxViolation { .. } => {
437                ToolErrorCategory::PolicyBlocked
438            }
439            Self::ConfirmationRequired { .. } => ToolErrorCategory::ConfirmationRequired,
440            Self::Timeout { .. } => ToolErrorCategory::Timeout,
441            Self::Cancelled => ToolErrorCategory::Cancelled,
442            Self::InvalidParams { .. } => ToolErrorCategory::InvalidParameters,
443            Self::Http { status, .. } => classify_http_status(*status),
444            Self::Execution(io_err) => classify_io_error(io_err),
445            Self::Shell { category, .. } => *category,
446            Self::SnapshotFailed { .. } => ToolErrorCategory::PermanentFailure,
447            Self::OutOfScope { .. } => ToolErrorCategory::PolicyBlocked,
448        }
449    }
450
451    /// Coarse classification for backward compatibility. Delegates to `category().error_kind()`.
452    ///
453    /// For `Execution(io::Error)`, the classification inspects `io::Error::kind()`:
454    /// - Transient: `TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`,
455    ///   `ConnectionAborted`, `BrokenPipe` — these may succeed on retry.
456    /// - Permanent: `NotFound`, `PermissionDenied`, `AlreadyExists`, and all other
457    ///   I/O error kinds — retrying would waste time with no benefit.
458    #[must_use]
459    pub fn kind(&self) -> ErrorKind {
460        use crate::error_taxonomy::ToolErrorCategoryExt;
461        self.category().error_kind()
462    }
463}
464
465/// Deserialize tool call params from a `serde_json::Map<String, Value>` into a typed struct.
466///
467/// # Errors
468///
469/// Returns `ToolError::InvalidParams` when deserialization fails.
470pub fn deserialize_params<T: serde::de::DeserializeOwned>(
471    params: &serde_json::Map<String, serde_json::Value>,
472) -> Result<T, ToolError> {
473    let obj = serde_json::Value::Object(params.clone());
474    serde_json::from_value(obj).map_err(|e| ToolError::InvalidParams {
475        message: e.to_string(),
476    })
477}
478
479/// Async trait for tool execution backends.
480///
481/// Implementations include [`ShellExecutor`](crate::ShellExecutor),
482/// [`WebScrapeExecutor`](crate::WebScrapeExecutor), [`CompositeExecutor`](crate::CompositeExecutor),
483/// and [`FileExecutor`](crate::FileExecutor).
484///
485/// # Contract
486///
487/// - [`execute`](ToolExecutor::execute) and [`execute_tool_call`](ToolExecutor::execute_tool_call)
488///   return `Ok(None)` when the executor does not handle the given input — callers must not
489///   treat `None` as an error.
490/// - All methods must be `Send + Sync` and free of blocking I/O.
491/// - Implementations must enforce their own security controls (blocklists, sandboxes, SSRF
492///   protection) before executing any side-effectful operation.
493/// - [`execute_confirmed`](ToolExecutor::execute_confirmed) and
494///   [`execute_tool_call_confirmed`](ToolExecutor::execute_tool_call_confirmed) bypass
495///   confirmation gates only — all other security controls remain active.
496///
497/// # Two Invocation Paths
498///
499/// **Legacy fenced blocks**: The agent loop passes the raw LLM response string to [`execute`](ToolExecutor::execute).
500/// The executor parses ` ```bash ` or ` ```scrape ` blocks and executes each one.
501///
502/// **Structured tool calls**: The agent loop constructs a [`ToolCall`] from the LLM's
503/// JSON tool-use response and dispatches it via [`execute_tool_call`](ToolExecutor::execute_tool_call).
504/// This is the preferred path for new code.
505///
506/// # Example
507///
508/// ```rust
509/// use zeph_tools::{ToolExecutor, ToolCall, ToolOutput, ToolError, executor::ClaimSource};
510///
511/// #[derive(Debug)]
512/// struct EchoExecutor;
513///
514/// impl ToolExecutor for EchoExecutor {
515///     async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
516///         Ok(None) // not a fenced-block executor
517///     }
518///
519///     async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
520///         if call.tool_id != "echo" {
521///             return Ok(None);
522///         }
523///         let text = call.params.get("text")
524///             .and_then(|v| v.as_str())
525///             .unwrap_or("")
526///             .to_owned();
527///         Ok(Some(ToolOutput {
528///             tool_name: "echo".into(),
529///             summary: text,
530///             blocks_executed: 1,
531///             filter_stats: None,
532///             diff: None,
533///             streamed: false,
534///             terminal_id: None,
535///             locations: None,
536///             raw_response: None,
537///             claim_source: None,
538///         }))
539///     }
540/// }
541/// ```
542/// # TODO (G3 — deferred: Tower-style tool middleware stack)
543///
544/// Currently, cross-cutting concerns (audit logging, rate limiting, sandboxing, guardrails)
545/// are scattered across individual executor implementations. The planned approach is a
546/// composable middleware stack similar to Tower's `Service` trait:
547///
548/// ```text
549/// AuditLayer::new(RateLimitLayer::new(SandboxLayer::new(ShellExecutor::new())))
550/// ```
551///
552/// **Blocked by:** requires D2 (consolidating `ToolExecutor` + `ErasedToolExecutor` into one
553/// object-safe trait). See critic review §S3 for the tradeoff between RPIT fast-path and
554/// dynamic dispatch overhead before collapsing D2.
555///
556/// # TODO (D2 — deferred: consolidate `ToolExecutor` and `ErasedToolExecutor`)
557///
558/// Having two parallel traits creates duplication and confusion. The blanket impl
559/// `impl<T: ToolExecutor> ErasedToolExecutor for T` works but every new method must be
560/// added to both traits. Use `trait_variant::make` or a single object-safe design.
561///
562/// **Blocked by:** need to benchmark the RPIT fast-path before removing it. See critic §S3.
563pub trait ToolExecutor: Send + Sync {
564    /// Parse `response` for fenced tool blocks and execute them.
565    ///
566    /// Returns `Ok(None)` when no tool blocks are found in `response`.
567    ///
568    /// # Errors
569    ///
570    /// Returns [`ToolError`] when a block is found but execution fails (blocked command,
571    /// sandbox violation, network error, timeout, etc.).
572    fn execute(
573        &self,
574        response: &str,
575    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send;
576
577    /// Execute bypassing confirmation checks (called after user approves).
578    ///
579    /// Security controls other than the confirmation gate remain active. Default
580    /// implementation delegates to [`execute`](ToolExecutor::execute).
581    ///
582    /// # Errors
583    ///
584    /// Returns [`ToolError`] on execution failure.
585    fn execute_confirmed(
586        &self,
587        response: &str,
588    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
589        self.execute(response)
590    }
591
592    /// Return the tool definitions this executor can handle.
593    ///
594    /// Used to populate the LLM's tool schema at context-assembly time.
595    /// Returns an empty `Vec` by default (for executors that only handle fenced blocks).
596    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
597        vec![]
598    }
599
600    /// Execute a structured tool call. Returns `Ok(None)` if `call.tool_id` is not handled.
601    ///
602    /// # Errors
603    ///
604    /// Returns [`ToolError`] when the tool ID is handled but execution fails.
605    fn execute_tool_call(
606        &self,
607        _call: &ToolCall,
608    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
609        std::future::ready(Ok(None))
610    }
611
612    /// Execute a structured tool call bypassing confirmation checks.
613    ///
614    /// Called after the user has explicitly approved the tool invocation.
615    /// Default implementation delegates to [`execute_tool_call`](ToolExecutor::execute_tool_call).
616    ///
617    /// # Errors
618    ///
619    /// Returns [`ToolError`] on execution failure.
620    fn execute_tool_call_confirmed(
621        &self,
622        call: &ToolCall,
623    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
624        self.execute_tool_call(call)
625    }
626
627    /// Inject environment variables for the currently active skill. No-op by default.
628    ///
629    /// Called by the agent loop before each turn when the active skill specifies env vars.
630    /// Implementations that ignore this (e.g. `WebScrapeExecutor`) may leave the default.
631    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
632
633    /// Set the effective trust level for the currently active skill. No-op by default.
634    ///
635    /// Trust level affects which operations are permitted (e.g. network access, file writes).
636    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
637
638    /// Whether the executor can safely retry this tool call on a transient error.
639    ///
640    /// Only idempotent operations (e.g. read-only HTTP GET) should return `true`.
641    /// Shell commands and other non-idempotent operations must keep the default `false`
642    /// to prevent double-execution of side-effectful commands.
643    fn is_tool_retryable(&self, _tool_id: &str) -> bool {
644        false
645    }
646
647    /// Whether a tool call can be safely dispatched speculatively (before the LLM finishes).
648    ///
649    /// Speculative execution requires the tool to be:
650    /// 1. Idempotent — repeated execution with the same args produces the same result.
651    /// 2. Side-effect-free or cheaply reversible.
652    /// 3. Not subject to user confirmation (`needs_confirmation` must be false at call time).
653    ///
654    /// Default: `false` (safe). Override to `true` only for tools that satisfy all three
655    /// properties. The engine additionally gates on trust level and confirmation status
656    /// regardless of this flag.
657    ///
658    /// # Examples
659    ///
660    /// ```rust
661    /// use zeph_tools::ToolExecutor;
662    ///
663    /// struct ReadOnlyExecutor;
664    /// impl ToolExecutor for ReadOnlyExecutor {
665    ///     async fn execute(&self, _: &str) -> Result<Option<zeph_tools::ToolOutput>, zeph_tools::ToolError> {
666    ///         Ok(None)
667    ///     }
668    ///     fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
669    ///         true // read-only, idempotent
670    ///     }
671    /// }
672    /// ```
673    fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
674        false
675    }
676
677    /// Return `true` when `call` would require user confirmation before execution.
678    ///
679    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
680    /// Used by the speculative engine to gate dispatch without causing double side-effects.
681    ///
682    /// Default: `false`. Executors that enforce a confirmation policy (e.g. `TrustGateExecutor`)
683    /// must override this to reflect their actual policy without executing the tool.
684    fn requires_confirmation(&self, _call: &ToolCall) -> bool {
685        false
686    }
687}
688
689/// Object-safe erased version of [`ToolExecutor`] using boxed futures.
690///
691/// Because [`ToolExecutor`] uses `impl Future` return types, it is not object-safe and
692/// cannot be used as `dyn ToolExecutor`. This trait provides the same interface with
693/// `Pin<Box<dyn Future>>` returns, enabling dynamic dispatch.
694///
695/// Implemented automatically for all `T: ToolExecutor + 'static` via the blanket impl below.
696/// Use [`DynExecutor`] or `Box<dyn ErasedToolExecutor>` when runtime polymorphism is needed.
697pub trait ErasedToolExecutor: Send + Sync {
698    fn execute_erased<'a>(
699        &'a self,
700        response: &'a str,
701    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
702
703    fn execute_confirmed_erased<'a>(
704        &'a self,
705        response: &'a str,
706    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
707
708    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef>;
709
710    fn execute_tool_call_erased<'a>(
711        &'a self,
712        call: &'a ToolCall,
713    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
714
715    fn execute_tool_call_confirmed_erased<'a>(
716        &'a self,
717        call: &'a ToolCall,
718    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
719    {
720        // TrustGateExecutor overrides ToolExecutor::execute_tool_call_confirmed; the blanket
721        // impl for T: ToolExecutor routes this call through it via execute_tool_call_confirmed_erased.
722        // Other implementors fall back to execute_tool_call_erased (normal enforcement path).
723        self.execute_tool_call_erased(call)
724    }
725
726    /// Inject environment variables for the currently active skill. No-op by default.
727    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
728
729    /// Set the effective trust level for the currently active skill. No-op by default.
730    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
731
732    /// Whether the executor can safely retry this tool call on a transient error.
733    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool;
734
735    /// Whether a tool call can be safely dispatched speculatively.
736    ///
737    /// Default: `false`. Override to `true` in read-only executors.
738    fn is_tool_speculatable_erased(&self, _tool_id: &str) -> bool {
739        false
740    }
741
742    /// Return `true` when `call` would require user confirmation before execution.
743    ///
744    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
745    /// Used by the speculative engine to gate dispatch without causing double side-effects.
746    ///
747    /// Default: `true` (confirmation required). Implementors that want to allow speculative
748    /// dispatch must explicitly return `false`. The blanket impl for `T: ToolExecutor`
749    /// delegates to [`ToolExecutor::requires_confirmation`].
750    fn requires_confirmation_erased(&self, _call: &ToolCall) -> bool {
751        true
752    }
753}
754
755impl<T: ToolExecutor> ErasedToolExecutor for T {
756    fn execute_erased<'a>(
757        &'a self,
758        response: &'a str,
759    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
760    {
761        Box::pin(self.execute(response))
762    }
763
764    fn execute_confirmed_erased<'a>(
765        &'a self,
766        response: &'a str,
767    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
768    {
769        Box::pin(self.execute_confirmed(response))
770    }
771
772    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
773        self.tool_definitions()
774    }
775
776    fn execute_tool_call_erased<'a>(
777        &'a self,
778        call: &'a ToolCall,
779    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
780    {
781        Box::pin(self.execute_tool_call(call))
782    }
783
784    fn execute_tool_call_confirmed_erased<'a>(
785        &'a self,
786        call: &'a ToolCall,
787    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
788    {
789        Box::pin(self.execute_tool_call_confirmed(call))
790    }
791
792    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
793        ToolExecutor::set_skill_env(self, env);
794    }
795
796    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
797        ToolExecutor::set_effective_trust(self, level);
798    }
799
800    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool {
801        ToolExecutor::is_tool_retryable(self, tool_id)
802    }
803
804    fn is_tool_speculatable_erased(&self, tool_id: &str) -> bool {
805        ToolExecutor::is_tool_speculatable(self, tool_id)
806    }
807
808    fn requires_confirmation_erased(&self, call: &ToolCall) -> bool {
809        ToolExecutor::requires_confirmation(self, call)
810    }
811}
812
813/// Wraps `Arc<dyn ErasedToolExecutor>` so it can be used as a concrete `ToolExecutor`.
814///
815/// Enables dynamic composition of tool executors at runtime without static type chains.
816pub struct DynExecutor(pub std::sync::Arc<dyn ErasedToolExecutor>);
817
818impl ToolExecutor for DynExecutor {
819    fn execute(
820        &self,
821        response: &str,
822    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
823        // Clone data to satisfy the 'static-ish bound: erased futures must not borrow self.
824        let inner = std::sync::Arc::clone(&self.0);
825        let response = response.to_owned();
826        async move { inner.execute_erased(&response).await }
827    }
828
829    fn execute_confirmed(
830        &self,
831        response: &str,
832    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
833        let inner = std::sync::Arc::clone(&self.0);
834        let response = response.to_owned();
835        async move { inner.execute_confirmed_erased(&response).await }
836    }
837
838    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
839        self.0.tool_definitions_erased()
840    }
841
842    fn execute_tool_call(
843        &self,
844        call: &ToolCall,
845    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
846        let inner = std::sync::Arc::clone(&self.0);
847        let call = call.clone();
848        async move { inner.execute_tool_call_erased(&call).await }
849    }
850
851    fn execute_tool_call_confirmed(
852        &self,
853        call: &ToolCall,
854    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
855        let inner = std::sync::Arc::clone(&self.0);
856        let call = call.clone();
857        async move { inner.execute_tool_call_confirmed_erased(&call).await }
858    }
859
860    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
861        ErasedToolExecutor::set_skill_env(self.0.as_ref(), env);
862    }
863
864    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
865        ErasedToolExecutor::set_effective_trust(self.0.as_ref(), level);
866    }
867
868    fn is_tool_retryable(&self, tool_id: &str) -> bool {
869        self.0.is_tool_retryable_erased(tool_id)
870    }
871
872    fn is_tool_speculatable(&self, tool_id: &str) -> bool {
873        self.0.is_tool_speculatable_erased(tool_id)
874    }
875
876    fn requires_confirmation(&self, call: &ToolCall) -> bool {
877        self.0.requires_confirmation_erased(call)
878    }
879}
880
881/// Extract fenced code blocks with the given language marker from text.
882///
883/// Searches for `` ```{lang} `` … `` ``` `` pairs, returning trimmed content.
884#[must_use]
885pub fn extract_fenced_blocks<'a>(text: &'a str, lang: &str) -> Vec<&'a str> {
886    let marker = format!("```{lang}");
887    let marker_len = marker.len();
888    let mut blocks = Vec::new();
889    let mut rest = text;
890
891    let mut search_from = 0;
892    while let Some(rel) = rest[search_from..].find(&marker) {
893        let start = search_from + rel;
894        let after = &rest[start + marker_len..];
895        // Word-boundary check: the character immediately after the marker must be
896        // whitespace, end-of-string, or a non-word character (not alphanumeric / _ / -).
897        // This prevents "```bash" from matching "```bashrc".
898        let boundary_ok = after
899            .chars()
900            .next()
901            .is_none_or(|c| !c.is_alphanumeric() && c != '_' && c != '-');
902        if !boundary_ok {
903            search_from = start + marker_len;
904            continue;
905        }
906        if let Some(end) = after.find("```") {
907            blocks.push(after[..end].trim());
908            rest = &after[end + 3..];
909            search_from = 0;
910        } else {
911            break;
912        }
913    }
914
915    blocks
916}
917
918#[cfg(test)]
919mod tests {
920    use super::*;
921
922    #[test]
923    fn tool_output_display() {
924        let output = ToolOutput {
925            tool_name: ToolName::new("bash"),
926            summary: "$ echo hello\nhello".to_owned(),
927            blocks_executed: 1,
928            filter_stats: None,
929            diff: None,
930            streamed: false,
931            terminal_id: None,
932            locations: None,
933            raw_response: None,
934            claim_source: None,
935        };
936        assert_eq!(output.to_string(), "$ echo hello\nhello");
937    }
938
939    #[test]
940    fn tool_error_blocked_display() {
941        let err = ToolError::Blocked {
942            command: "rm -rf /".to_owned(),
943        };
944        assert_eq!(err.to_string(), "command blocked by policy: rm -rf /");
945    }
946
947    #[test]
948    fn tool_error_sandbox_violation_display() {
949        let err = ToolError::SandboxViolation {
950            path: "/etc/shadow".to_owned(),
951        };
952        assert_eq!(err.to_string(), "path not allowed by sandbox: /etc/shadow");
953    }
954
955    #[test]
956    fn tool_error_confirmation_required_display() {
957        let err = ToolError::ConfirmationRequired {
958            command: "rm -rf /tmp".to_owned(),
959        };
960        assert_eq!(
961            err.to_string(),
962            "command requires confirmation: rm -rf /tmp"
963        );
964    }
965
966    #[test]
967    fn tool_error_timeout_display() {
968        let err = ToolError::Timeout { timeout_secs: 30 };
969        assert_eq!(err.to_string(), "command timed out after 30s");
970    }
971
972    #[test]
973    fn tool_error_invalid_params_display() {
974        let err = ToolError::InvalidParams {
975            message: "missing field `command`".to_owned(),
976        };
977        assert_eq!(
978            err.to_string(),
979            "invalid tool parameters: missing field `command`"
980        );
981    }
982
983    #[test]
984    fn deserialize_params_valid() {
985        #[derive(Debug, serde::Deserialize, PartialEq)]
986        struct P {
987            name: String,
988            count: u32,
989        }
990        let mut map = serde_json::Map::new();
991        map.insert("name".to_owned(), serde_json::json!("test"));
992        map.insert("count".to_owned(), serde_json::json!(42));
993        let p: P = deserialize_params(&map).unwrap();
994        assert_eq!(
995            p,
996            P {
997                name: "test".to_owned(),
998                count: 42
999            }
1000        );
1001    }
1002
1003    #[test]
1004    fn deserialize_params_missing_required_field() {
1005        #[derive(Debug, serde::Deserialize)]
1006        #[allow(dead_code)]
1007        struct P {
1008            name: String,
1009        }
1010        let map = serde_json::Map::new();
1011        let err = deserialize_params::<P>(&map).unwrap_err();
1012        assert!(matches!(err, ToolError::InvalidParams { .. }));
1013    }
1014
1015    #[test]
1016    fn deserialize_params_wrong_type() {
1017        #[derive(Debug, serde::Deserialize)]
1018        #[allow(dead_code)]
1019        struct P {
1020            count: u32,
1021        }
1022        let mut map = serde_json::Map::new();
1023        map.insert("count".to_owned(), serde_json::json!("not a number"));
1024        let err = deserialize_params::<P>(&map).unwrap_err();
1025        assert!(matches!(err, ToolError::InvalidParams { .. }));
1026    }
1027
1028    #[test]
1029    fn deserialize_params_all_optional_empty() {
1030        #[derive(Debug, serde::Deserialize, PartialEq)]
1031        struct P {
1032            name: Option<String>,
1033        }
1034        let map = serde_json::Map::new();
1035        let p: P = deserialize_params(&map).unwrap();
1036        assert_eq!(p, P { name: None });
1037    }
1038
1039    #[test]
1040    fn deserialize_params_ignores_extra_fields() {
1041        #[derive(Debug, serde::Deserialize, PartialEq)]
1042        struct P {
1043            name: String,
1044        }
1045        let mut map = serde_json::Map::new();
1046        map.insert("name".to_owned(), serde_json::json!("test"));
1047        map.insert("extra".to_owned(), serde_json::json!(true));
1048        let p: P = deserialize_params(&map).unwrap();
1049        assert_eq!(
1050            p,
1051            P {
1052                name: "test".to_owned()
1053            }
1054        );
1055    }
1056
1057    #[test]
1058    fn tool_error_execution_display() {
1059        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash not found");
1060        let err = ToolError::Execution(io_err);
1061        assert!(err.to_string().starts_with("execution failed:"));
1062        assert!(err.to_string().contains("bash not found"));
1063    }
1064
1065    // ErrorKind classification tests
1066    #[test]
1067    fn error_kind_timeout_is_transient() {
1068        let err = ToolError::Timeout { timeout_secs: 30 };
1069        assert_eq!(err.kind(), ErrorKind::Transient);
1070    }
1071
1072    #[test]
1073    fn error_kind_blocked_is_permanent() {
1074        let err = ToolError::Blocked {
1075            command: "rm -rf /".to_owned(),
1076        };
1077        assert_eq!(err.kind(), ErrorKind::Permanent);
1078    }
1079
1080    #[test]
1081    fn error_kind_sandbox_violation_is_permanent() {
1082        let err = ToolError::SandboxViolation {
1083            path: "/etc/shadow".to_owned(),
1084        };
1085        assert_eq!(err.kind(), ErrorKind::Permanent);
1086    }
1087
1088    #[test]
1089    fn error_kind_cancelled_is_permanent() {
1090        assert_eq!(ToolError::Cancelled.kind(), ErrorKind::Permanent);
1091    }
1092
1093    #[test]
1094    fn error_kind_invalid_params_is_permanent() {
1095        let err = ToolError::InvalidParams {
1096            message: "bad arg".to_owned(),
1097        };
1098        assert_eq!(err.kind(), ErrorKind::Permanent);
1099    }
1100
1101    #[test]
1102    fn error_kind_confirmation_required_is_permanent() {
1103        let err = ToolError::ConfirmationRequired {
1104            command: "rm /tmp/x".to_owned(),
1105        };
1106        assert_eq!(err.kind(), ErrorKind::Permanent);
1107    }
1108
1109    #[test]
1110    fn error_kind_execution_timed_out_is_transient() {
1111        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1112        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1113    }
1114
1115    #[test]
1116    fn error_kind_execution_interrupted_is_transient() {
1117        let io_err = std::io::Error::new(std::io::ErrorKind::Interrupted, "interrupted");
1118        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1119    }
1120
1121    #[test]
1122    fn error_kind_execution_connection_reset_is_transient() {
1123        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
1124        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1125    }
1126
1127    #[test]
1128    fn error_kind_execution_broken_pipe_is_transient() {
1129        let io_err = std::io::Error::new(std::io::ErrorKind::BrokenPipe, "pipe broken");
1130        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1131    }
1132
1133    #[test]
1134    fn error_kind_execution_would_block_is_transient() {
1135        let io_err = std::io::Error::new(std::io::ErrorKind::WouldBlock, "would block");
1136        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1137    }
1138
1139    #[test]
1140    fn error_kind_execution_connection_aborted_is_transient() {
1141        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionAborted, "aborted");
1142        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1143    }
1144
1145    #[test]
1146    fn error_kind_execution_not_found_is_permanent() {
1147        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
1148        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1149    }
1150
1151    #[test]
1152    fn error_kind_execution_permission_denied_is_permanent() {
1153        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "denied");
1154        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1155    }
1156
1157    #[test]
1158    fn error_kind_execution_other_is_permanent() {
1159        let io_err = std::io::Error::other("some other error");
1160        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1161    }
1162
1163    #[test]
1164    fn error_kind_execution_already_exists_is_permanent() {
1165        let io_err = std::io::Error::new(std::io::ErrorKind::AlreadyExists, "exists");
1166        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1167    }
1168
1169    #[test]
1170    fn error_kind_display() {
1171        assert_eq!(ErrorKind::Transient.to_string(), "transient");
1172        assert_eq!(ErrorKind::Permanent.to_string(), "permanent");
1173    }
1174
1175    #[test]
1176    fn truncate_tool_output_short_passthrough() {
1177        let short = "hello world";
1178        assert_eq!(truncate_tool_output(short), short);
1179    }
1180
1181    #[test]
1182    fn truncate_tool_output_exact_limit() {
1183        let exact = "a".repeat(MAX_TOOL_OUTPUT_CHARS);
1184        assert_eq!(truncate_tool_output(&exact), exact);
1185    }
1186
1187    #[test]
1188    fn truncate_tool_output_long_split() {
1189        let long = "x".repeat(MAX_TOOL_OUTPUT_CHARS + 1000);
1190        let result = truncate_tool_output(&long);
1191        assert!(result.contains("truncated"));
1192        assert!(result.len() < long.len());
1193    }
1194
1195    #[test]
1196    fn truncate_tool_output_notice_contains_count() {
1197        let long = "y".repeat(MAX_TOOL_OUTPUT_CHARS + 2000);
1198        let result = truncate_tool_output(&long);
1199        assert!(result.contains("truncated"));
1200        assert!(result.contains("chars"));
1201    }
1202
1203    #[derive(Debug)]
1204    struct DefaultExecutor;
1205    impl ToolExecutor for DefaultExecutor {
1206        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1207            Ok(None)
1208        }
1209    }
1210
1211    #[tokio::test]
1212    async fn execute_tool_call_default_returns_none() {
1213        let exec = DefaultExecutor;
1214        let call = ToolCall {
1215            tool_id: ToolName::new("anything"),
1216            params: serde_json::Map::new(),
1217            caller_id: None,
1218            context: None,
1219        };
1220        let result = exec.execute_tool_call(&call).await.unwrap();
1221        assert!(result.is_none());
1222    }
1223
1224    #[test]
1225    fn filter_stats_savings_pct() {
1226        let fs = FilterStats {
1227            raw_chars: 1000,
1228            filtered_chars: 200,
1229            ..Default::default()
1230        };
1231        assert!((fs.savings_pct() - 80.0).abs() < 0.01);
1232    }
1233
1234    #[test]
1235    fn filter_stats_savings_pct_zero() {
1236        let fs = FilterStats::default();
1237        assert!((fs.savings_pct()).abs() < 0.01);
1238    }
1239
1240    #[test]
1241    fn filter_stats_estimated_tokens_saved() {
1242        let fs = FilterStats {
1243            raw_chars: 1000,
1244            filtered_chars: 200,
1245            ..Default::default()
1246        };
1247        assert_eq!(fs.estimated_tokens_saved(), 200); // (1000 - 200) / 4
1248    }
1249
1250    #[test]
1251    fn filter_stats_format_inline() {
1252        let fs = FilterStats {
1253            raw_chars: 1000,
1254            filtered_chars: 200,
1255            raw_lines: 342,
1256            filtered_lines: 28,
1257            ..Default::default()
1258        };
1259        let line = fs.format_inline("shell");
1260        assert_eq!(line, "[shell] 342 lines \u{2192} 28 lines, 80.0% filtered");
1261    }
1262
1263    #[test]
1264    fn filter_stats_format_inline_zero() {
1265        let fs = FilterStats::default();
1266        let line = fs.format_inline("bash");
1267        assert_eq!(line, "[bash] 0 lines \u{2192} 0 lines, 0.0% filtered");
1268    }
1269
1270    // DynExecutor tests
1271
1272    struct FixedExecutor {
1273        tool_id: &'static str,
1274        output: &'static str,
1275    }
1276
1277    impl ToolExecutor for FixedExecutor {
1278        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1279            Ok(Some(ToolOutput {
1280                tool_name: ToolName::new(self.tool_id),
1281                summary: self.output.to_owned(),
1282                blocks_executed: 1,
1283                filter_stats: None,
1284                diff: None,
1285                streamed: false,
1286                terminal_id: None,
1287                locations: None,
1288                raw_response: None,
1289                claim_source: None,
1290            }))
1291        }
1292
1293        fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1294            vec![]
1295        }
1296
1297        async fn execute_tool_call(
1298            &self,
1299            _call: &ToolCall,
1300        ) -> Result<Option<ToolOutput>, ToolError> {
1301            Ok(Some(ToolOutput {
1302                tool_name: ToolName::new(self.tool_id),
1303                summary: self.output.to_owned(),
1304                blocks_executed: 1,
1305                filter_stats: None,
1306                diff: None,
1307                streamed: false,
1308                terminal_id: None,
1309                locations: None,
1310                raw_response: None,
1311                claim_source: None,
1312            }))
1313        }
1314    }
1315
1316    #[tokio::test]
1317    async fn dyn_executor_execute_delegates() {
1318        let inner = std::sync::Arc::new(FixedExecutor {
1319            tool_id: "bash",
1320            output: "hello",
1321        });
1322        let exec = DynExecutor(inner);
1323        let result = exec.execute("```bash\necho hello\n```").await.unwrap();
1324        assert!(result.is_some());
1325        assert_eq!(result.unwrap().summary, "hello");
1326    }
1327
1328    #[tokio::test]
1329    async fn dyn_executor_execute_confirmed_delegates() {
1330        let inner = std::sync::Arc::new(FixedExecutor {
1331            tool_id: "bash",
1332            output: "confirmed",
1333        });
1334        let exec = DynExecutor(inner);
1335        let result = exec.execute_confirmed("...").await.unwrap();
1336        assert!(result.is_some());
1337        assert_eq!(result.unwrap().summary, "confirmed");
1338    }
1339
1340    #[test]
1341    fn dyn_executor_tool_definitions_delegates() {
1342        let inner = std::sync::Arc::new(FixedExecutor {
1343            tool_id: "my_tool",
1344            output: "",
1345        });
1346        let exec = DynExecutor(inner);
1347        // FixedExecutor returns empty definitions; verify delegation occurs without panic.
1348        let defs = exec.tool_definitions();
1349        assert!(defs.is_empty());
1350    }
1351
1352    #[tokio::test]
1353    async fn dyn_executor_execute_tool_call_delegates() {
1354        let inner = std::sync::Arc::new(FixedExecutor {
1355            tool_id: "bash",
1356            output: "tool_call_result",
1357        });
1358        let exec = DynExecutor(inner);
1359        let call = ToolCall {
1360            tool_id: ToolName::new("bash"),
1361            params: serde_json::Map::new(),
1362            caller_id: None,
1363            context: None,
1364        };
1365        let result = exec.execute_tool_call(&call).await.unwrap();
1366        assert!(result.is_some());
1367        assert_eq!(result.unwrap().summary, "tool_call_result");
1368    }
1369
1370    #[test]
1371    fn dyn_executor_set_effective_trust_delegates() {
1372        use std::sync::atomic::{AtomicU8, Ordering};
1373
1374        struct TrustCapture(AtomicU8);
1375        impl ToolExecutor for TrustCapture {
1376            async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1377                Ok(None)
1378            }
1379            fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
1380                // encode: Trusted=0, Verified=1, Quarantined=2, Blocked=3
1381                let v = match level {
1382                    crate::SkillTrustLevel::Trusted => 0u8,
1383                    crate::SkillTrustLevel::Verified => 1,
1384                    crate::SkillTrustLevel::Quarantined => 2,
1385                    crate::SkillTrustLevel::Blocked => 3,
1386                };
1387                self.0.store(v, Ordering::Relaxed);
1388            }
1389        }
1390
1391        let inner = std::sync::Arc::new(TrustCapture(AtomicU8::new(0)));
1392        let exec =
1393            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1394        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Quarantined);
1395        assert_eq!(inner.0.load(Ordering::Relaxed), 2);
1396
1397        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Blocked);
1398        assert_eq!(inner.0.load(Ordering::Relaxed), 3);
1399    }
1400
1401    #[test]
1402    fn extract_fenced_blocks_no_prefix_match() {
1403        // ```bashrc must NOT match when searching for "bash"
1404        assert!(extract_fenced_blocks("```bashrc\nfoo\n```", "bash").is_empty());
1405        // exact match
1406        assert_eq!(
1407            extract_fenced_blocks("```bash\nfoo\n```", "bash"),
1408            vec!["foo"]
1409        );
1410        // trailing space is fine
1411        assert_eq!(
1412            extract_fenced_blocks("```bash \nfoo\n```", "bash"),
1413            vec!["foo"]
1414        );
1415    }
1416
1417    // ── ToolError::category() delegation tests ────────────────────────────────
1418
1419    #[test]
1420    fn tool_error_http_400_category_is_invalid_parameters() {
1421        use crate::error_taxonomy::ToolErrorCategory;
1422        let err = ToolError::Http {
1423            status: 400,
1424            message: "bad request".to_owned(),
1425        };
1426        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1427    }
1428
1429    #[test]
1430    fn tool_error_http_401_category_is_policy_blocked() {
1431        use crate::error_taxonomy::ToolErrorCategory;
1432        let err = ToolError::Http {
1433            status: 401,
1434            message: "unauthorized".to_owned(),
1435        };
1436        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1437    }
1438
1439    #[test]
1440    fn tool_error_http_403_category_is_policy_blocked() {
1441        use crate::error_taxonomy::ToolErrorCategory;
1442        let err = ToolError::Http {
1443            status: 403,
1444            message: "forbidden".to_owned(),
1445        };
1446        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1447    }
1448
1449    #[test]
1450    fn tool_error_http_404_category_is_permanent_failure() {
1451        use crate::error_taxonomy::ToolErrorCategory;
1452        let err = ToolError::Http {
1453            status: 404,
1454            message: "not found".to_owned(),
1455        };
1456        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1457    }
1458
1459    #[test]
1460    fn tool_error_http_429_category_is_rate_limited() {
1461        use crate::error_taxonomy::ToolErrorCategory;
1462        let err = ToolError::Http {
1463            status: 429,
1464            message: "too many requests".to_owned(),
1465        };
1466        assert_eq!(err.category(), ToolErrorCategory::RateLimited);
1467    }
1468
1469    #[test]
1470    fn tool_error_http_500_category_is_server_error() {
1471        use crate::error_taxonomy::ToolErrorCategory;
1472        let err = ToolError::Http {
1473            status: 500,
1474            message: "internal server error".to_owned(),
1475        };
1476        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1477    }
1478
1479    #[test]
1480    fn tool_error_http_502_category_is_server_error() {
1481        use crate::error_taxonomy::ToolErrorCategory;
1482        let err = ToolError::Http {
1483            status: 502,
1484            message: "bad gateway".to_owned(),
1485        };
1486        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1487    }
1488
1489    #[test]
1490    fn tool_error_http_503_category_is_server_error() {
1491        use crate::error_taxonomy::ToolErrorCategory;
1492        let err = ToolError::Http {
1493            status: 503,
1494            message: "service unavailable".to_owned(),
1495        };
1496        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1497    }
1498
1499    #[test]
1500    fn tool_error_http_503_is_transient_triggers_phase2_retry() {
1501        // Phase 2 retry fires when err.kind() == ErrorKind::Transient.
1502        // Verify the full chain: Http{503} -> ServerError -> is_retryable() -> Transient.
1503        let err = ToolError::Http {
1504            status: 503,
1505            message: "service unavailable".to_owned(),
1506        };
1507        assert_eq!(
1508            err.kind(),
1509            ErrorKind::Transient,
1510            "HTTP 503 must be Transient so Phase 2 retry fires"
1511        );
1512    }
1513
1514    #[test]
1515    fn tool_error_blocked_category_is_policy_blocked() {
1516        use crate::error_taxonomy::ToolErrorCategory;
1517        let err = ToolError::Blocked {
1518            command: "rm -rf /".to_owned(),
1519        };
1520        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1521    }
1522
1523    #[test]
1524    fn tool_error_sandbox_violation_category_is_policy_blocked() {
1525        use crate::error_taxonomy::ToolErrorCategory;
1526        let err = ToolError::SandboxViolation {
1527            path: "/etc/shadow".to_owned(),
1528        };
1529        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1530    }
1531
1532    #[test]
1533    fn tool_error_confirmation_required_category() {
1534        use crate::error_taxonomy::ToolErrorCategory;
1535        let err = ToolError::ConfirmationRequired {
1536            command: "rm /tmp/x".to_owned(),
1537        };
1538        assert_eq!(err.category(), ToolErrorCategory::ConfirmationRequired);
1539    }
1540
1541    #[test]
1542    fn tool_error_timeout_category() {
1543        use crate::error_taxonomy::ToolErrorCategory;
1544        let err = ToolError::Timeout { timeout_secs: 30 };
1545        assert_eq!(err.category(), ToolErrorCategory::Timeout);
1546    }
1547
1548    #[test]
1549    fn tool_error_cancelled_category() {
1550        use crate::error_taxonomy::ToolErrorCategory;
1551        assert_eq!(
1552            ToolError::Cancelled.category(),
1553            ToolErrorCategory::Cancelled
1554        );
1555    }
1556
1557    #[test]
1558    fn tool_error_invalid_params_category() {
1559        use crate::error_taxonomy::ToolErrorCategory;
1560        let err = ToolError::InvalidParams {
1561            message: "missing field".to_owned(),
1562        };
1563        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1564    }
1565
1566    // B2 regression: Execution(NotFound) must NOT produce ToolNotFound.
1567    #[test]
1568    fn tool_error_execution_not_found_category_is_permanent_failure() {
1569        use crate::error_taxonomy::ToolErrorCategory;
1570        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash: not found");
1571        let err = ToolError::Execution(io_err);
1572        let cat = err.category();
1573        assert_ne!(
1574            cat,
1575            ToolErrorCategory::ToolNotFound,
1576            "Execution(NotFound) must NOT map to ToolNotFound"
1577        );
1578        assert_eq!(cat, ToolErrorCategory::PermanentFailure);
1579    }
1580
1581    #[test]
1582    fn tool_error_execution_timed_out_category_is_timeout() {
1583        use crate::error_taxonomy::ToolErrorCategory;
1584        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
1585        assert_eq!(
1586            ToolError::Execution(io_err).category(),
1587            ToolErrorCategory::Timeout
1588        );
1589    }
1590
1591    #[test]
1592    fn tool_error_execution_connection_refused_category_is_network_error() {
1593        use crate::error_taxonomy::ToolErrorCategory;
1594        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "refused");
1595        assert_eq!(
1596            ToolError::Execution(io_err).category(),
1597            ToolErrorCategory::NetworkError
1598        );
1599    }
1600
1601    // B4 regression: Http/network/transient categories must NOT be quality failures.
1602    #[test]
1603    fn b4_tool_error_http_429_not_quality_failure() {
1604        let err = ToolError::Http {
1605            status: 429,
1606            message: "rate limited".to_owned(),
1607        };
1608        assert!(
1609            !err.category().is_quality_failure(),
1610            "RateLimited must not be a quality failure"
1611        );
1612    }
1613
1614    #[test]
1615    fn b4_tool_error_http_503_not_quality_failure() {
1616        let err = ToolError::Http {
1617            status: 503,
1618            message: "service unavailable".to_owned(),
1619        };
1620        assert!(
1621            !err.category().is_quality_failure(),
1622            "ServerError must not be a quality failure"
1623        );
1624    }
1625
1626    #[test]
1627    fn b4_tool_error_execution_timed_out_not_quality_failure() {
1628        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1629        assert!(
1630            !ToolError::Execution(io_err).category().is_quality_failure(),
1631            "Timeout must not be a quality failure"
1632        );
1633    }
1634
1635    // ── ToolError::Shell category tests ──────────────────────────────────────
1636
1637    #[test]
1638    fn tool_error_shell_exit126_is_policy_blocked() {
1639        use crate::error_taxonomy::ToolErrorCategory;
1640        let err = ToolError::Shell {
1641            exit_code: 126,
1642            category: ToolErrorCategory::PolicyBlocked,
1643            message: "permission denied".to_owned(),
1644        };
1645        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1646    }
1647
1648    #[test]
1649    fn tool_error_shell_exit127_is_permanent_failure() {
1650        use crate::error_taxonomy::ToolErrorCategory;
1651        let err = ToolError::Shell {
1652            exit_code: 127,
1653            category: ToolErrorCategory::PermanentFailure,
1654            message: "command not found".to_owned(),
1655        };
1656        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1657        assert!(!err.category().is_retryable());
1658    }
1659
1660    #[test]
1661    fn tool_error_shell_not_quality_failure() {
1662        use crate::error_taxonomy::ToolErrorCategory;
1663        let err = ToolError::Shell {
1664            exit_code: 127,
1665            category: ToolErrorCategory::PermanentFailure,
1666            message: "command not found".to_owned(),
1667        };
1668        // Shell exit errors are not attributable to LLM output quality.
1669        assert!(!err.category().is_quality_failure());
1670    }
1671
1672    // ── requires_confirmation / requires_confirmation_erased tests (#3644) ───
1673
1674    /// Stub implementing only `ToolExecutor` without overriding `requires_confirmation`.
1675    struct StubExecutor;
1676    impl ToolExecutor for StubExecutor {
1677        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1678            Ok(None)
1679        }
1680    }
1681
1682    /// Stub that always signals confirmation is required via `ToolExecutor::requires_confirmation`.
1683    struct ConfirmingExecutor;
1684    impl ToolExecutor for ConfirmingExecutor {
1685        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1686            Ok(None)
1687        }
1688        fn requires_confirmation(&self, _call: &ToolCall) -> bool {
1689            true
1690        }
1691    }
1692
1693    fn dummy_call() -> ToolCall {
1694        ToolCall {
1695            tool_id: ToolName::new("test"),
1696            params: serde_json::Map::new(),
1697            caller_id: None,
1698            context: None,
1699        }
1700    }
1701
1702    #[test]
1703    fn requires_confirmation_default_is_false_on_tool_executor() {
1704        let exec = StubExecutor;
1705        assert!(
1706            !exec.requires_confirmation(&dummy_call()),
1707            "ToolExecutor default requires_confirmation must be false"
1708        );
1709    }
1710
1711    #[test]
1712    fn requires_confirmation_erased_delegates_to_tool_executor_default() {
1713        // blanket impl routes erased → ToolExecutor::requires_confirmation (= false)
1714        let exec = StubExecutor;
1715        assert!(
1716            !ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1717            "requires_confirmation_erased via blanket impl must return false for stub executor"
1718        );
1719    }
1720
1721    #[test]
1722    fn requires_confirmation_erased_delegates_override() {
1723        // ConfirmingExecutor overrides requires_confirmation → true;
1724        // blanket impl must propagate this.
1725        let exec = ConfirmingExecutor;
1726        assert!(
1727            ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1728            "requires_confirmation_erased must return true when ToolExecutor override returns true"
1729        );
1730    }
1731
1732    #[test]
1733    fn requires_confirmation_erased_default_on_erased_trait_is_true() {
1734        // ErasedToolExecutor's own default (trait method body) returns true.
1735        // We construct a DynExecutor wrapping ConfirmingExecutor and verify via the erased path.
1736        // (We cannot instantiate ErasedToolExecutor directly without a concrete type.)
1737        // Instead verify via a type that only implements ErasedToolExecutor manually:
1738        struct ManualErased;
1739        impl ErasedToolExecutor for ManualErased {
1740            fn execute_erased<'a>(
1741                &'a self,
1742                _response: &'a str,
1743            ) -> std::pin::Pin<
1744                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1745            > {
1746                Box::pin(std::future::ready(Ok(None)))
1747            }
1748            fn execute_confirmed_erased<'a>(
1749                &'a self,
1750                _response: &'a str,
1751            ) -> std::pin::Pin<
1752                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1753            > {
1754                Box::pin(std::future::ready(Ok(None)))
1755            }
1756            fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
1757                vec![]
1758            }
1759            fn execute_tool_call_erased<'a>(
1760                &'a self,
1761                _call: &'a ToolCall,
1762            ) -> std::pin::Pin<
1763                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1764            > {
1765                Box::pin(std::future::ready(Ok(None)))
1766            }
1767            fn is_tool_retryable_erased(&self, _tool_id: &str) -> bool {
1768                false
1769            }
1770            // requires_confirmation_erased NOT overridden → trait default returns true
1771        }
1772        let exec = ManualErased;
1773        assert!(
1774            exec.requires_confirmation_erased(&dummy_call()),
1775            "ErasedToolExecutor trait-level default for requires_confirmation_erased must be true"
1776        );
1777    }
1778
1779    // ── DynExecutor::requires_confirmation delegation tests (#3650) ──────────
1780
1781    #[test]
1782    fn dyn_executor_requires_confirmation_delegates() {
1783        let inner = std::sync::Arc::new(ConfirmingExecutor);
1784        let exec =
1785            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1786        assert!(
1787            ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1788            "DynExecutor must delegate requires_confirmation to inner executor"
1789        );
1790    }
1791
1792    #[test]
1793    fn dyn_executor_requires_confirmation_default_false() {
1794        let inner = std::sync::Arc::new(StubExecutor);
1795        let exec =
1796            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1797        assert!(
1798            !ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1799            "DynExecutor must return false when inner executor does not require confirmation"
1800        );
1801    }
1802}
zeph_tools/executor.rs

zeph_tools/
executor.rs