zeph_tools/
executor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use zeph_common::ToolName;
7
8use crate::shell::background::RunId;
9
10/// Data for rendering file diffs in the TUI.
11///
12/// Produced by [`ShellExecutor`](crate::ShellExecutor) and [`FileExecutor`](crate::FileExecutor)
13/// when a tool call modifies a tracked file. The TUI uses this to display a side-by-side diff.
14#[derive(Debug, Clone)]
15pub struct DiffData {
16    /// Relative or absolute path to the file that was modified.
17    pub file_path: String,
18    /// File content before the tool executed.
19    pub old_content: String,
20    /// File content after the tool executed.
21    pub new_content: String,
22}
23
24/// Structured tool invocation from LLM.
25///
26/// Produced by the agent loop when the LLM emits a structured tool call (as opposed to
27/// a legacy fenced code block). Dispatched to [`ToolExecutor::execute_tool_call`].
28///
29/// # Example
30///
31/// ```rust
32/// use zeph_tools::{ToolCall, ExecutionContext};
33/// use zeph_common::ToolName;
34///
35/// let call = ToolCall {
36///     tool_id: ToolName::new("bash"),
37///     params: {
38///         let mut m = serde_json::Map::new();
39///         m.insert("command".to_owned(), serde_json::Value::String("echo hello".to_owned()));
40///         m
41///     },
42///     caller_id: Some("user-42".to_owned()),
43///     context: Some(ExecutionContext::new().with_name("repo")),
44///     tool_call_id: String::new(),
45/// };
46/// assert_eq!(call.tool_id, "bash");
47/// ```
48#[derive(Debug, Clone)]
49pub struct ToolCall {
50    /// The tool identifier, matching a value from [`ToolExecutor::tool_definitions`].
51    pub tool_id: ToolName,
52    /// JSON parameters for the tool call, deserialized into the tool's parameter struct.
53    pub params: serde_json::Map<String, serde_json::Value>,
54    /// Opaque caller identifier propagated from the channel (user ID, session ID, etc.).
55    /// `None` for system-initiated calls (scheduler, self-learning, internal).
56    pub caller_id: Option<String>,
57    /// Per-turn execution environment. `None` means use the executor default (process CWD
58    /// and inherited env), which is identical to the behaviour before this field existed.
59    pub context: Option<crate::ExecutionContext>,
60    /// Opaque tool call ID used to correlate [`ToolEvent::OutputChunk`] events with
61    /// their originating tool call in the TUI. Empty when not set by the agent loop.
62    pub tool_call_id: String,
63}
64
65/// Cumulative filter statistics for a single tool execution.
66///
67/// Populated by [`ShellExecutor`](crate::ShellExecutor) when output filters are configured.
68/// Displayed in the TUI to show how much output was compacted before being sent to the LLM.
69#[derive(Debug, Clone, Default)]
70pub struct FilterStats {
71    /// Raw character count before filtering.
72    pub raw_chars: usize,
73    /// Character count after filtering.
74    pub filtered_chars: usize,
75    /// Raw line count before filtering.
76    pub raw_lines: usize,
77    /// Line count after filtering.
78    pub filtered_lines: usize,
79    /// Worst-case confidence across all applied filters.
80    pub confidence: Option<crate::FilterConfidence>,
81    /// The shell command that produced this output, for display purposes.
82    pub command: Option<String>,
83    /// Zero-based line indices that were kept after filtering.
84    pub kept_lines: Vec<usize>,
85}
86
87impl FilterStats {
88    /// Returns the percentage of characters removed by filtering.
89    ///
90    /// Returns `0.0` when there was no raw output to filter.
91    #[must_use]
92    #[allow(clippy::cast_precision_loss)]
93    pub fn savings_pct(&self) -> f64 {
94        if self.raw_chars == 0 {
95            return 0.0;
96        }
97        (1.0 - self.filtered_chars as f64 / self.raw_chars as f64) * 100.0
98    }
99
100    /// Estimates the number of LLM tokens saved by filtering.
101    ///
102    /// Uses the 4-chars-per-token approximation. Suitable for logging and metrics,
103    /// not for billing or exact budget calculations.
104    #[must_use]
105    pub fn estimated_tokens_saved(&self) -> usize {
106        self.raw_chars.saturating_sub(self.filtered_chars) / 4
107    }
108
109    /// Formats a one-line filter summary for log messages and TUI status.
110    ///
111    /// # Example
112    ///
113    /// ```rust
114    /// use zeph_tools::FilterStats;
115    ///
116    /// let stats = FilterStats {
117    ///     raw_chars: 1000,
118    ///     filtered_chars: 400,
119    ///     raw_lines: 50,
120    ///     filtered_lines: 20,
121    ///     command: Some("cargo build".to_owned()),
122    ///     ..Default::default()
123    /// };
124    /// let summary = stats.format_inline("shell");
125    /// assert!(summary.contains("60.0% filtered"));
126    /// ```
127    #[must_use]
128    pub fn format_inline(&self, tool_name: &str) -> String {
129        let cmd_label = self
130            .command
131            .as_deref()
132            .map(|c| {
133                let trimmed = c.trim();
134                if trimmed.len() > 60 {
135                    format!(" `{}…`", &trimmed[..57])
136                } else {
137                    format!(" `{trimmed}`")
138                }
139            })
140            .unwrap_or_default();
141        format!(
142            "[{tool_name}]{cmd_label} {} lines \u{2192} {} lines, {:.1}% filtered",
143            self.raw_lines,
144            self.filtered_lines,
145            self.savings_pct()
146        )
147    }
148}
149
150/// Provenance of a tool execution result.
151///
152/// Set by each executor at `ToolOutput` construction time. Used by the sanitizer bridge
153/// in `zeph-core` to select the appropriate `ContentSourceKind` and trust level.
154/// `None` means the source is unspecified (pass-through code, mocks, tests).
155#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
156#[serde(rename_all = "snake_case")]
157pub enum ClaimSource {
158    /// Local shell command execution.
159    Shell,
160    /// Local file system read/write.
161    FileSystem,
162    /// HTTP web scrape.
163    WebScrape,
164    /// MCP server tool response.
165    Mcp,
166    /// A2A agent message.
167    A2a,
168    /// Code search (LSP or semantic).
169    CodeSearch,
170    /// Agent diagnostics (internal).
171    Diagnostics,
172    /// Memory retrieval (semantic search).
173    Memory,
174    /// Telegram moderation action (reaction deletion).
175    Moderation,
176}
177
178/// Structured result from tool execution.
179///
180/// Returned by every [`ToolExecutor`] implementation on success. The agent loop uses
181/// [`ToolOutput::summary`] as the tool result text injected into the LLM context.
182///
183/// # Example
184///
185/// ```rust
186/// use zeph_tools::{ToolOutput, executor::ClaimSource};
187/// use zeph_common::ToolName;
188///
189/// let output = ToolOutput {
190///     tool_name: ToolName::new("shell"),
191///     summary: "hello\n".to_owned(),
192///     blocks_executed: 1,
193///     filter_stats: None,
194///     diff: None,
195///     streamed: false,
196///     terminal_id: None,
197///     locations: None,
198///     raw_response: None,
199///     claim_source: Some(ClaimSource::Shell),
200/// };
201/// assert_eq!(output.to_string(), "hello\n");
202/// ```
203#[derive(Debug, Clone)]
204pub struct ToolOutput {
205    /// Name of the tool that produced this output (e.g. `"shell"`, `"web-scrape"`).
206    pub tool_name: ToolName,
207    /// Human-readable result text injected into the LLM context.
208    pub summary: String,
209    /// Number of code blocks processed in this invocation.
210    pub blocks_executed: u32,
211    /// Output filter statistics when filtering was applied, `None` otherwise.
212    pub filter_stats: Option<FilterStats>,
213    /// File diff data for TUI display when the tool modified a tracked file.
214    pub diff: Option<DiffData>,
215    /// Whether this tool already streamed its output via `ToolEvent` channel.
216    pub streamed: bool,
217    /// Terminal ID when the tool was executed via IDE terminal (ACP terminal/* protocol).
218    pub terminal_id: Option<String>,
219    /// File paths touched by this tool call, for IDE follow-along (e.g. `ToolCallLocation`).
220    pub locations: Option<Vec<String>>,
221    /// Structured tool response payload for ACP intermediate `tool_call_update` notifications.
222    pub raw_response: Option<serde_json::Value>,
223    /// Provenance of this tool result. Set by the executor at construction time.
224    /// `None` in pass-through wrappers, mocks, and tests.
225    pub claim_source: Option<ClaimSource>,
226}
227
228impl fmt::Display for ToolOutput {
229    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230        f.write_str(&self.summary)
231    }
232}
233
234/// Maximum characters of tool output injected into the LLM context without truncation.
235///
236/// Output that exceeds this limit is split into a head and tail via [`truncate_tool_output`]
237/// to keep both the beginning and end of large command outputs.
238pub const MAX_TOOL_OUTPUT_CHARS: usize = 30_000;
239
240/// Truncate tool output that exceeds [`MAX_TOOL_OUTPUT_CHARS`] using a head+tail split.
241///
242/// Equivalent to `truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)`.
243///
244/// # Example
245///
246/// ```rust
247/// use zeph_tools::executor::truncate_tool_output;
248///
249/// let short = "hello world";
250/// assert_eq!(truncate_tool_output(short), short);
251/// ```
252#[must_use]
253pub fn truncate_tool_output(output: &str) -> String {
254    truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)
255}
256
257/// Truncate tool output that exceeds `max_chars` using a head+tail split.
258///
259/// Preserves the first and last `max_chars / 2` characters and inserts a truncation
260/// marker in the middle. Both boundaries are snapped to valid UTF-8 character boundaries.
261///
262/// # Example
263///
264/// ```rust
265/// use zeph_tools::executor::truncate_tool_output_at;
266///
267/// let long = "a".repeat(200);
268/// let truncated = truncate_tool_output_at(&long, 100);
269/// assert!(truncated.contains("truncated"));
270/// assert!(truncated.len() < long.len());
271/// ```
272#[must_use]
273pub fn truncate_tool_output_at(output: &str, max_chars: usize) -> String {
274    if output.len() <= max_chars {
275        return output.to_string();
276    }
277
278    let half = max_chars / 2;
279    let head_end = output.floor_char_boundary(half);
280    let tail_start = output.ceil_char_boundary(output.len() - half);
281    let head = &output[..head_end];
282    let tail = &output[tail_start..];
283    let truncated = output.len() - head_end - (output.len() - tail_start);
284
285    format!(
286        "{head}\n\n... [truncated {truncated} chars, showing first and last ~{half} chars] ...\n\n{tail}"
287    )
288}
289
290/// Event emitted during tool execution for real-time UI updates.
291///
292/// Sent over the [`ToolEventTx`] channel to the TUI or channel adapter.
293/// Each event variant corresponds to a phase in the tool execution lifecycle.
294#[derive(Debug, Clone)]
295pub enum ToolEvent {
296    /// The tool has started. Displayed in the TUI as a spinner with the command text.
297    Started {
298        tool_name: ToolName,
299        command: String,
300        /// Active sandbox profile, if any. `None` when sandbox is disabled.
301        sandbox_profile: Option<String>,
302        /// Canonical absolute working directory the command will run in.
303        /// `None` for executors that do not resolve a per-turn CWD.
304        resolved_cwd: Option<String>,
305        /// Name of the resolved execution environment (from `[[execution.environments]]`),
306        /// or `None` when no named environment was selected.
307        execution_env: Option<String>,
308    },
309    /// A chunk of streaming output was produced (e.g. from a long-running command).
310    OutputChunk {
311        tool_name: ToolName,
312        command: String,
313        chunk: String,
314        /// Opaque tool call ID matching the corresponding [`ToolEvent::Started`] event.
315        /// Empty string when the executor does not have access to the call ID.
316        tool_call_id: String,
317    },
318    /// The tool finished. Contains the full output and optional filter/diff data.
319    Completed {
320        tool_name: ToolName,
321        command: String,
322        /// Full output text (possibly filtered and truncated).
323        output: String,
324        /// `true` when the tool exited successfully, `false` on error.
325        success: bool,
326        filter_stats: Option<FilterStats>,
327        diff: Option<DiffData>,
328        /// Set when this completion belongs to a background run. `None` for blocking runs.
329        run_id: Option<RunId>,
330    },
331    /// A transactional rollback was performed, restoring or deleting files.
332    Rollback {
333        tool_name: ToolName,
334        command: String,
335        /// Number of files restored to their pre-execution content.
336        restored_count: usize,
337        /// Number of files that did not exist before execution and were deleted.
338        deleted_count: usize,
339    },
340}
341
342/// Sender half of the bounded channel used to stream [`ToolEvent`]s to the UI.
343///
344/// Capacity is 1024 slots. Streaming variants (`OutputChunk`, `Started`) use
345/// `try_send` and drop on full; terminal variants (`Completed`, `Rollback`) use
346/// `send().await` to guarantee delivery.
347///
348/// Created via [`tokio::sync::mpsc::channel`] with capacity `TOOL_EVENT_CHANNEL_CAP`.
349pub type ToolEventTx = tokio::sync::mpsc::Sender<ToolEvent>;
350
351/// Receiver half matching [`ToolEventTx`].
352pub type ToolEventRx = tokio::sync::mpsc::Receiver<ToolEvent>;
353
354/// Bounded capacity for the tool-event channel.
355pub const TOOL_EVENT_CHANNEL_CAP: usize = 1024;
356
357/// Classifies a tool error as transient (retryable) or permanent (abort immediately).
358///
359/// Transient errors may succeed on retry (network blips, race conditions).
360/// Permanent errors will not succeed regardless of retries (policy, bad args, not found).
361#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
362pub enum ErrorKind {
363    Transient,
364    Permanent,
365}
366
367impl std::fmt::Display for ErrorKind {
368    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
369        match self {
370            Self::Transient => f.write_str("transient"),
371            Self::Permanent => f.write_str("permanent"),
372        }
373    }
374}
375
376/// Errors that can occur during tool execution.
377#[derive(Debug, thiserror::Error)]
378pub enum ToolError {
379    #[error("command blocked by policy: {command}")]
380    Blocked { command: String },
381
382    #[error("path not allowed by sandbox: {path}")]
383    SandboxViolation { path: String },
384
385    #[error("command requires confirmation: {command}")]
386    ConfirmationRequired { command: String },
387
388    #[error("command timed out after {timeout_secs}s")]
389    Timeout { timeout_secs: u64 },
390
391    #[error("operation cancelled")]
392    Cancelled,
393
394    #[error("invalid tool parameters: {message}")]
395    InvalidParams { message: String },
396
397    #[error("execution failed: {0}")]
398    Execution(#[from] std::io::Error),
399
400    /// HTTP or API error with status code for fine-grained classification.
401    ///
402    /// Used by `WebScrapeExecutor` and other HTTP-based tools to preserve the status
403    /// code for taxonomy classification. Scope: HTTP tools only (MCP uses a separate path).
404    #[error("HTTP error {status}: {message}")]
405    Http { status: u16, message: String },
406
407    /// Shell execution error with explicit exit code and pre-classified category.
408    ///
409    /// Used by `ShellExecutor` when the exit code or stderr content maps to a known
410    /// taxonomy category (e.g., exit 126 → `PolicyBlocked`, exit 127 → `PermanentFailure`).
411    /// Preserves the exit code for audit logging and the category for skill evolution.
412    #[error("shell error (exit {exit_code}): {message}")]
413    Shell {
414        exit_code: i32,
415        category: crate::error_taxonomy::ToolErrorCategory,
416        message: String,
417    },
418
419    #[error("snapshot failed: {reason}")]
420    SnapshotFailed { reason: String },
421
422    /// Tool call rejected because the tool id is outside the active capability scope.
423    ///
424    /// Emitted by `ScopedToolExecutor` before any tool side-effect runs.
425    /// The audit log records `error_category = "out_of_scope"`.
426    // LLM isolation: task_type is never shown in the error message (P2-OutOfScope).
427    #[error("tool call denied by policy")]
428    OutOfScope {
429        /// Fully-qualified tool id that was rejected.
430        tool_id: String,
431        /// Active task type at dispatch time, if any.
432        task_type: Option<String>,
433    },
434
435    /// Tool call blocked by `ShadowProbeExecutor` after the LLM safety probe returned Deny.
436    ///
437    /// Emitted before any tool side-effect runs. The probe evaluated the full trajectory
438    /// context and determined the call is unsafe. Reason is LLM-generated; shown to the
439    /// agent loop as the tool result so the model can adapt.
440    #[error("tool call denied by safety probe: {reason}")]
441    SafetyDenied {
442        /// Human-readable explanation from the LLM safety probe.
443        reason: String,
444    },
445}
446
447impl ToolError {
448    /// Fine-grained error classification using the 12-category taxonomy.
449    ///
450    /// Prefer `category()` over `kind()` for new code. `kind()` is preserved for
451    /// backward compatibility and delegates to `category().error_kind()`.
452    #[must_use]
453    pub fn category(&self) -> crate::error_taxonomy::ToolErrorCategory {
454        use crate::error_taxonomy::{ToolErrorCategory, classify_http_status, classify_io_error};
455        match self {
456            Self::Blocked { .. } | Self::SandboxViolation { .. } => {
457                ToolErrorCategory::PolicyBlocked
458            }
459            Self::ConfirmationRequired { .. } => ToolErrorCategory::ConfirmationRequired,
460            Self::Timeout { .. } => ToolErrorCategory::Timeout,
461            Self::Cancelled => ToolErrorCategory::Cancelled,
462            Self::InvalidParams { .. } => ToolErrorCategory::InvalidParameters,
463            Self::Http { status, .. } => classify_http_status(*status),
464            Self::Execution(io_err) => classify_io_error(io_err),
465            Self::Shell { category, .. } => *category,
466            Self::SnapshotFailed { .. } => ToolErrorCategory::PermanentFailure,
467            Self::OutOfScope { .. } | Self::SafetyDenied { .. } => ToolErrorCategory::PolicyBlocked,
468        }
469    }
470
471    /// Coarse classification for backward compatibility. Delegates to `category().error_kind()`.
472    ///
473    /// For `Execution(io::Error)`, the classification inspects `io::Error::kind()`:
474    /// - Transient: `TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`,
475    ///   `ConnectionAborted`, `BrokenPipe` — these may succeed on retry.
476    /// - Permanent: `NotFound`, `PermissionDenied`, `AlreadyExists`, and all other
477    ///   I/O error kinds — retrying would waste time with no benefit.
478    #[must_use]
479    pub fn kind(&self) -> ErrorKind {
480        use crate::error_taxonomy::ToolErrorCategoryExt;
481        self.category().error_kind()
482    }
483}
484
485/// Deserialize tool call params from a `serde_json::Map<String, Value>` into a typed struct.
486///
487/// # Errors
488///
489/// Returns `ToolError::InvalidParams` when deserialization fails.
490pub fn deserialize_params<T: serde::de::DeserializeOwned>(
491    params: &serde_json::Map<String, serde_json::Value>,
492) -> Result<T, ToolError> {
493    let obj = serde_json::Value::Object(params.clone());
494    serde_json::from_value(obj).map_err(|e| ToolError::InvalidParams {
495        message: e.to_string(),
496    })
497}
498
499/// Async trait for tool execution backends.
500///
501/// Implementations include [`ShellExecutor`](crate::ShellExecutor),
502/// [`WebScrapeExecutor`](crate::WebScrapeExecutor), [`CompositeExecutor`](crate::CompositeExecutor),
503/// and [`FileExecutor`](crate::FileExecutor).
504///
505/// # Contract
506///
507/// - [`execute`](ToolExecutor::execute) and [`execute_tool_call`](ToolExecutor::execute_tool_call)
508///   return `Ok(None)` when the executor does not handle the given input — callers must not
509///   treat `None` as an error.
510/// - All methods must be `Send + Sync` and free of blocking I/O.
511/// - Implementations must enforce their own security controls (blocklists, sandboxes, SSRF
512///   protection) before executing any side-effectful operation.
513/// - [`execute_confirmed`](ToolExecutor::execute_confirmed) and
514///   [`execute_tool_call_confirmed`](ToolExecutor::execute_tool_call_confirmed) bypass
515///   confirmation gates only — all other security controls remain active.
516///
517/// # Two Invocation Paths
518///
519/// **Legacy fenced blocks**: The agent loop passes the raw LLM response string to [`execute`](ToolExecutor::execute).
520/// The executor parses ` ```bash ` or ` ```scrape ` blocks and executes each one.
521///
522/// **Structured tool calls**: The agent loop constructs a [`ToolCall`] from the LLM's
523/// JSON tool-use response and dispatches it via [`execute_tool_call`](ToolExecutor::execute_tool_call).
524/// This is the preferred path for new code.
525///
526/// # Example
527///
528/// ```rust
529/// use zeph_tools::{ToolExecutor, ToolCall, ToolOutput, ToolError, executor::ClaimSource};
530///
531/// #[derive(Debug)]
532/// struct EchoExecutor;
533///
534/// impl ToolExecutor for EchoExecutor {
535///     async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
536///         Ok(None) // not a fenced-block executor
537///     }
538///
539///     async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
540///         if call.tool_id != "echo" {
541///             return Ok(None);
542///         }
543///         let text = call.params.get("text")
544///             .and_then(|v| v.as_str())
545///             .unwrap_or("")
546///             .to_owned();
547///         Ok(Some(ToolOutput {
548///             tool_name: "echo".into(),
549///             summary: text,
550///             blocks_executed: 1,
551///             filter_stats: None,
552///             diff: None,
553///             streamed: false,
554///             terminal_id: None,
555///             locations: None,
556///             raw_response: None,
557///             claim_source: None,
558///         }))
559///     }
560/// }
561/// ```
562/// # TODO (G3 — deferred: Tower-style tool middleware stack)
563///
564/// Currently, cross-cutting concerns (audit logging, rate limiting, sandboxing, guardrails)
565/// are scattered across individual executor implementations. The planned approach is a
566/// composable middleware stack similar to Tower's `Service` trait:
567///
568/// ```text
569/// AuditLayer::new(RateLimitLayer::new(SandboxLayer::new(ShellExecutor::new())))
570/// ```
571///
572/// **Blocked by:** requires D2 (consolidating `ToolExecutor` + `ErasedToolExecutor` into one
573/// object-safe trait). See critic review §S3 for the tradeoff between RPIT fast-path and
574/// dynamic dispatch overhead before collapsing D2.
575///
576/// # TODO (D2 — deferred: consolidate `ToolExecutor` and `ErasedToolExecutor`)
577///
578/// Having two parallel traits creates duplication and confusion. The blanket impl
579/// `impl<T: ToolExecutor> ErasedToolExecutor for T` works but every new method must be
580/// added to both traits. Use `trait_variant::make` or a single object-safe design.
581///
582/// **Blocked by:** need to benchmark the RPIT fast-path before removing it. See critic §S3.
583pub trait ToolExecutor: Send + Sync {
584    /// Parse `response` for fenced tool blocks and execute them.
585    ///
586    /// Returns `Ok(None)` when no tool blocks are found in `response`.
587    ///
588    /// # Errors
589    ///
590    /// Returns [`ToolError`] when a block is found but execution fails (blocked command,
591    /// sandbox violation, network error, timeout, etc.).
592    fn execute(
593        &self,
594        response: &str,
595    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send;
596
597    /// Execute bypassing confirmation checks (called after user approves).
598    ///
599    /// Security controls other than the confirmation gate remain active. Default
600    /// implementation delegates to [`execute`](ToolExecutor::execute).
601    ///
602    /// # Errors
603    ///
604    /// Returns [`ToolError`] on execution failure.
605    fn execute_confirmed(
606        &self,
607        response: &str,
608    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
609        self.execute(response)
610    }
611
612    /// Return the tool definitions this executor can handle.
613    ///
614    /// Used to populate the LLM's tool schema at context-assembly time.
615    /// Returns an empty `Vec` by default (for executors that only handle fenced blocks).
616    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
617        vec![]
618    }
619
620    /// Execute a structured tool call. Returns `Ok(None)` if `call.tool_id` is not handled.
621    ///
622    /// # Errors
623    ///
624    /// Returns [`ToolError`] when the tool ID is handled but execution fails.
625    fn execute_tool_call(
626        &self,
627        _call: &ToolCall,
628    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
629        std::future::ready(Ok(None))
630    }
631
632    /// Execute a structured tool call bypassing confirmation checks.
633    ///
634    /// Called after the user has explicitly approved the tool invocation.
635    /// Default implementation delegates to [`execute_tool_call`](ToolExecutor::execute_tool_call).
636    ///
637    /// # Errors
638    ///
639    /// Returns [`ToolError`] on execution failure.
640    fn execute_tool_call_confirmed(
641        &self,
642        call: &ToolCall,
643    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
644        self.execute_tool_call(call)
645    }
646
647    /// Inject environment variables for the currently active skill. No-op by default.
648    ///
649    /// Called by the agent loop before each turn when the active skill specifies env vars.
650    /// Implementations that ignore this (e.g. `WebScrapeExecutor`) may leave the default.
651    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
652
653    /// Set the effective trust level for the currently active skill. No-op by default.
654    ///
655    /// Trust level affects which operations are permitted (e.g. network access, file writes).
656    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
657
658    /// Whether the executor can safely retry this tool call on a transient error.
659    ///
660    /// Only idempotent operations (e.g. read-only HTTP GET) should return `true`.
661    /// Shell commands and other non-idempotent operations must keep the default `false`
662    /// to prevent double-execution of side-effectful commands.
663    fn is_tool_retryable(&self, _tool_id: &str) -> bool {
664        false
665    }
666
667    /// Whether a tool call can be safely dispatched speculatively (before the LLM finishes).
668    ///
669    /// Speculative execution requires the tool to be:
670    /// 1. Idempotent — repeated execution with the same args produces the same result.
671    /// 2. Side-effect-free or cheaply reversible.
672    /// 3. Not subject to user confirmation (`needs_confirmation` must be false at call time).
673    ///
674    /// Default: `false` (safe). Override to `true` only for tools that satisfy all three
675    /// properties. The engine additionally gates on trust level and confirmation status
676    /// regardless of this flag.
677    ///
678    /// # Examples
679    ///
680    /// ```rust
681    /// use zeph_tools::ToolExecutor;
682    ///
683    /// struct ReadOnlyExecutor;
684    /// impl ToolExecutor for ReadOnlyExecutor {
685    ///     async fn execute(&self, _: &str) -> Result<Option<zeph_tools::ToolOutput>, zeph_tools::ToolError> {
686    ///         Ok(None)
687    ///     }
688    ///     fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
689    ///         true // read-only, idempotent
690    ///     }
691    /// }
692    /// ```
693    fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
694        false
695    }
696
697    /// Return `true` when `call` would require user confirmation before execution.
698    ///
699    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
700    /// Used by the speculative engine to gate dispatch without causing double side-effects.
701    ///
702    /// Default: `false`. Executors that enforce a confirmation policy (e.g. `TrustGateExecutor`)
703    /// must override this to reflect their actual policy without executing the tool.
704    fn requires_confirmation(&self, _call: &ToolCall) -> bool {
705        false
706    }
707}
708
709/// Object-safe erased version of [`ToolExecutor`] using boxed futures.
710///
711/// Because [`ToolExecutor`] uses `impl Future` return types, it is not object-safe and
712/// cannot be used as `dyn ToolExecutor`. This trait provides the same interface with
713/// `Pin<Box<dyn Future>>` returns, enabling dynamic dispatch.
714///
715/// Implemented automatically for all `T: ToolExecutor + 'static` via the blanket impl below.
716/// Use [`DynExecutor`] or `Box<dyn ErasedToolExecutor>` when runtime polymorphism is needed.
717pub trait ErasedToolExecutor: Send + Sync {
718    fn execute_erased<'a>(
719        &'a self,
720        response: &'a str,
721    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
722
723    fn execute_confirmed_erased<'a>(
724        &'a self,
725        response: &'a str,
726    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
727
728    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef>;
729
730    fn execute_tool_call_erased<'a>(
731        &'a self,
732        call: &'a ToolCall,
733    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
734
735    fn execute_tool_call_confirmed_erased<'a>(
736        &'a self,
737        call: &'a ToolCall,
738    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
739    {
740        // TrustGateExecutor overrides ToolExecutor::execute_tool_call_confirmed; the blanket
741        // impl for T: ToolExecutor routes this call through it via execute_tool_call_confirmed_erased.
742        // Other implementors fall back to execute_tool_call_erased (normal enforcement path).
743        self.execute_tool_call_erased(call)
744    }
745
746    /// Inject environment variables for the currently active skill. No-op by default.
747    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
748
749    /// Set the effective trust level for the currently active skill. No-op by default.
750    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
751
752    /// Whether the executor can safely retry this tool call on a transient error.
753    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool;
754
755    /// Whether a tool call can be safely dispatched speculatively.
756    ///
757    /// Default: `false`. Override to `true` in read-only executors.
758    fn is_tool_speculatable_erased(&self, _tool_id: &str) -> bool {
759        false
760    }
761
762    /// Return `true` when `call` would require user confirmation before execution.
763    ///
764    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
765    /// Used by the speculative engine to gate dispatch without causing double side-effects.
766    ///
767    /// Default: `true` (confirmation required). Implementors that want to allow speculative
768    /// dispatch must explicitly return `false`. The blanket impl for `T: ToolExecutor`
769    /// delegates to [`ToolExecutor::requires_confirmation`].
770    fn requires_confirmation_erased(&self, _call: &ToolCall) -> bool {
771        true
772    }
773}
774
775impl<T: ToolExecutor> ErasedToolExecutor for T {
776    fn execute_erased<'a>(
777        &'a self,
778        response: &'a str,
779    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
780    {
781        Box::pin(self.execute(response))
782    }
783
784    fn execute_confirmed_erased<'a>(
785        &'a self,
786        response: &'a str,
787    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
788    {
789        Box::pin(self.execute_confirmed(response))
790    }
791
792    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
793        self.tool_definitions()
794    }
795
796    fn execute_tool_call_erased<'a>(
797        &'a self,
798        call: &'a ToolCall,
799    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
800    {
801        Box::pin(self.execute_tool_call(call))
802    }
803
804    fn execute_tool_call_confirmed_erased<'a>(
805        &'a self,
806        call: &'a ToolCall,
807    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
808    {
809        Box::pin(self.execute_tool_call_confirmed(call))
810    }
811
812    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
813        ToolExecutor::set_skill_env(self, env);
814    }
815
816    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
817        ToolExecutor::set_effective_trust(self, level);
818    }
819
820    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool {
821        ToolExecutor::is_tool_retryable(self, tool_id)
822    }
823
824    fn is_tool_speculatable_erased(&self, tool_id: &str) -> bool {
825        ToolExecutor::is_tool_speculatable(self, tool_id)
826    }
827
828    fn requires_confirmation_erased(&self, call: &ToolCall) -> bool {
829        ToolExecutor::requires_confirmation(self, call)
830    }
831}
832
833/// Wraps `Arc<dyn ErasedToolExecutor>` so it can be used as a concrete `ToolExecutor`.
834///
835/// Enables dynamic composition of tool executors at runtime without static type chains.
836pub struct DynExecutor(pub std::sync::Arc<dyn ErasedToolExecutor>);
837
838impl ToolExecutor for DynExecutor {
839    fn execute(
840        &self,
841        response: &str,
842    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
843        // Clone data to satisfy the 'static-ish bound: erased futures must not borrow self.
844        let inner = std::sync::Arc::clone(&self.0);
845        let response = response.to_owned();
846        async move { inner.execute_erased(&response).await }
847    }
848
849    fn execute_confirmed(
850        &self,
851        response: &str,
852    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
853        let inner = std::sync::Arc::clone(&self.0);
854        let response = response.to_owned();
855        async move { inner.execute_confirmed_erased(&response).await }
856    }
857
858    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
859        self.0.tool_definitions_erased()
860    }
861
862    fn execute_tool_call(
863        &self,
864        call: &ToolCall,
865    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
866        let inner = std::sync::Arc::clone(&self.0);
867        let call = call.clone();
868        async move { inner.execute_tool_call_erased(&call).await }
869    }
870
871    fn execute_tool_call_confirmed(
872        &self,
873        call: &ToolCall,
874    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
875        let inner = std::sync::Arc::clone(&self.0);
876        let call = call.clone();
877        async move { inner.execute_tool_call_confirmed_erased(&call).await }
878    }
879
880    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
881        ErasedToolExecutor::set_skill_env(self.0.as_ref(), env);
882    }
883
884    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
885        ErasedToolExecutor::set_effective_trust(self.0.as_ref(), level);
886    }
887
888    fn is_tool_retryable(&self, tool_id: &str) -> bool {
889        self.0.is_tool_retryable_erased(tool_id)
890    }
891
892    fn is_tool_speculatable(&self, tool_id: &str) -> bool {
893        self.0.is_tool_speculatable_erased(tool_id)
894    }
895
896    fn requires_confirmation(&self, call: &ToolCall) -> bool {
897        self.0.requires_confirmation_erased(call)
898    }
899}
900
901/// Extract fenced code blocks with the given language marker from text.
902///
903/// Searches for `` ```{lang} `` … `` ``` `` pairs, returning trimmed content.
904#[must_use]
905pub fn extract_fenced_blocks<'a>(text: &'a str, lang: &str) -> Vec<&'a str> {
906    let marker = format!("```{lang}");
907    let marker_len = marker.len();
908    let mut blocks = Vec::new();
909    let mut rest = text;
910
911    let mut search_from = 0;
912    while let Some(rel) = rest[search_from..].find(&marker) {
913        let start = search_from + rel;
914        let after = &rest[start + marker_len..];
915        // Word-boundary check: the character immediately after the marker must be
916        // whitespace, end-of-string, or a non-word character (not alphanumeric / _ / -).
917        // This prevents "```bash" from matching "```bashrc".
918        let boundary_ok = after
919            .chars()
920            .next()
921            .is_none_or(|c| !c.is_alphanumeric() && c != '_' && c != '-');
922        if !boundary_ok {
923            search_from = start + marker_len;
924            continue;
925        }
926        if let Some(end) = after.find("```") {
927            blocks.push(after[..end].trim());
928            rest = &after[end + 3..];
929            search_from = 0;
930        } else {
931            break;
932        }
933    }
934
935    blocks
936}
937
938#[cfg(test)]
939mod tests {
940    use super::*;
941
942    #[test]
943    fn tool_output_display() {
944        let output = ToolOutput {
945            tool_name: ToolName::new("bash"),
946            summary: "$ echo hello\nhello".to_owned(),
947            blocks_executed: 1,
948            filter_stats: None,
949            diff: None,
950            streamed: false,
951            terminal_id: None,
952            locations: None,
953            raw_response: None,
954            claim_source: None,
955        };
956        assert_eq!(output.to_string(), "$ echo hello\nhello");
957    }
958
959    #[test]
960    fn tool_error_blocked_display() {
961        let err = ToolError::Blocked {
962            command: "rm -rf /".to_owned(),
963        };
964        assert_eq!(err.to_string(), "command blocked by policy: rm -rf /");
965    }
966
967    #[test]
968    fn tool_error_sandbox_violation_display() {
969        let err = ToolError::SandboxViolation {
970            path: "/etc/shadow".to_owned(),
971        };
972        assert_eq!(err.to_string(), "path not allowed by sandbox: /etc/shadow");
973    }
974
975    #[test]
976    fn tool_error_confirmation_required_display() {
977        let err = ToolError::ConfirmationRequired {
978            command: "rm -rf /tmp".to_owned(),
979        };
980        assert_eq!(
981            err.to_string(),
982            "command requires confirmation: rm -rf /tmp"
983        );
984    }
985
986    #[test]
987    fn tool_error_timeout_display() {
988        let err = ToolError::Timeout { timeout_secs: 30 };
989        assert_eq!(err.to_string(), "command timed out after 30s");
990    }
991
992    #[test]
993    fn tool_error_invalid_params_display() {
994        let err = ToolError::InvalidParams {
995            message: "missing field `command`".to_owned(),
996        };
997        assert_eq!(
998            err.to_string(),
999            "invalid tool parameters: missing field `command`"
1000        );
1001    }
1002
1003    #[test]
1004    fn deserialize_params_valid() {
1005        #[derive(Debug, serde::Deserialize, PartialEq)]
1006        struct P {
1007            name: String,
1008            count: u32,
1009        }
1010        let mut map = serde_json::Map::new();
1011        map.insert("name".to_owned(), serde_json::json!("test"));
1012        map.insert("count".to_owned(), serde_json::json!(42));
1013        let p: P = deserialize_params(&map).unwrap();
1014        assert_eq!(
1015            p,
1016            P {
1017                name: "test".to_owned(),
1018                count: 42
1019            }
1020        );
1021    }
1022
1023    #[test]
1024    fn deserialize_params_missing_required_field() {
1025        #[derive(Debug, serde::Deserialize)]
1026        #[allow(dead_code)]
1027        struct P {
1028            name: String,
1029        }
1030        let map = serde_json::Map::new();
1031        let err = deserialize_params::<P>(&map).unwrap_err();
1032        assert!(matches!(err, ToolError::InvalidParams { .. }));
1033    }
1034
1035    #[test]
1036    fn deserialize_params_wrong_type() {
1037        #[derive(Debug, serde::Deserialize)]
1038        #[allow(dead_code)]
1039        struct P {
1040            count: u32,
1041        }
1042        let mut map = serde_json::Map::new();
1043        map.insert("count".to_owned(), serde_json::json!("not a number"));
1044        let err = deserialize_params::<P>(&map).unwrap_err();
1045        assert!(matches!(err, ToolError::InvalidParams { .. }));
1046    }
1047
1048    #[test]
1049    fn deserialize_params_all_optional_empty() {
1050        #[derive(Debug, serde::Deserialize, PartialEq)]
1051        struct P {
1052            name: Option<String>,
1053        }
1054        let map = serde_json::Map::new();
1055        let p: P = deserialize_params(&map).unwrap();
1056        assert_eq!(p, P { name: None });
1057    }
1058
1059    #[test]
1060    fn deserialize_params_ignores_extra_fields() {
1061        #[derive(Debug, serde::Deserialize, PartialEq)]
1062        struct P {
1063            name: String,
1064        }
1065        let mut map = serde_json::Map::new();
1066        map.insert("name".to_owned(), serde_json::json!("test"));
1067        map.insert("extra".to_owned(), serde_json::json!(true));
1068        let p: P = deserialize_params(&map).unwrap();
1069        assert_eq!(
1070            p,
1071            P {
1072                name: "test".to_owned()
1073            }
1074        );
1075    }
1076
1077    #[test]
1078    fn tool_error_execution_display() {
1079        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash not found");
1080        let err = ToolError::Execution(io_err);
1081        assert!(err.to_string().starts_with("execution failed:"));
1082        assert!(err.to_string().contains("bash not found"));
1083    }
1084
1085    // ErrorKind classification tests
1086    #[test]
1087    fn error_kind_timeout_is_transient() {
1088        let err = ToolError::Timeout { timeout_secs: 30 };
1089        assert_eq!(err.kind(), ErrorKind::Transient);
1090    }
1091
1092    #[test]
1093    fn error_kind_blocked_is_permanent() {
1094        let err = ToolError::Blocked {
1095            command: "rm -rf /".to_owned(),
1096        };
1097        assert_eq!(err.kind(), ErrorKind::Permanent);
1098    }
1099
1100    #[test]
1101    fn error_kind_sandbox_violation_is_permanent() {
1102        let err = ToolError::SandboxViolation {
1103            path: "/etc/shadow".to_owned(),
1104        };
1105        assert_eq!(err.kind(), ErrorKind::Permanent);
1106    }
1107
1108    #[test]
1109    fn error_kind_cancelled_is_permanent() {
1110        assert_eq!(ToolError::Cancelled.kind(), ErrorKind::Permanent);
1111    }
1112
1113    #[test]
1114    fn error_kind_invalid_params_is_permanent() {
1115        let err = ToolError::InvalidParams {
1116            message: "bad arg".to_owned(),
1117        };
1118        assert_eq!(err.kind(), ErrorKind::Permanent);
1119    }
1120
1121    #[test]
1122    fn error_kind_confirmation_required_is_permanent() {
1123        let err = ToolError::ConfirmationRequired {
1124            command: "rm /tmp/x".to_owned(),
1125        };
1126        assert_eq!(err.kind(), ErrorKind::Permanent);
1127    }
1128
1129    #[test]
1130    fn error_kind_execution_timed_out_is_transient() {
1131        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1132        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1133    }
1134
1135    #[test]
1136    fn error_kind_execution_interrupted_is_transient() {
1137        let io_err = std::io::Error::new(std::io::ErrorKind::Interrupted, "interrupted");
1138        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1139    }
1140
1141    #[test]
1142    fn error_kind_execution_connection_reset_is_transient() {
1143        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
1144        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1145    }
1146
1147    #[test]
1148    fn error_kind_execution_broken_pipe_is_transient() {
1149        let io_err = std::io::Error::new(std::io::ErrorKind::BrokenPipe, "pipe broken");
1150        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1151    }
1152
1153    #[test]
1154    fn error_kind_execution_would_block_is_transient() {
1155        let io_err = std::io::Error::new(std::io::ErrorKind::WouldBlock, "would block");
1156        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1157    }
1158
1159    #[test]
1160    fn error_kind_execution_connection_aborted_is_transient() {
1161        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionAborted, "aborted");
1162        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1163    }
1164
1165    #[test]
1166    fn error_kind_execution_not_found_is_permanent() {
1167        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
1168        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1169    }
1170
1171    #[test]
1172    fn error_kind_execution_permission_denied_is_permanent() {
1173        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "denied");
1174        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1175    }
1176
1177    #[test]
1178    fn error_kind_execution_other_is_permanent() {
1179        let io_err = std::io::Error::other("some other error");
1180        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1181    }
1182
1183    #[test]
1184    fn error_kind_execution_already_exists_is_permanent() {
1185        let io_err = std::io::Error::new(std::io::ErrorKind::AlreadyExists, "exists");
1186        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1187    }
1188
1189    #[test]
1190    fn error_kind_display() {
1191        assert_eq!(ErrorKind::Transient.to_string(), "transient");
1192        assert_eq!(ErrorKind::Permanent.to_string(), "permanent");
1193    }
1194
1195    #[test]
1196    fn truncate_tool_output_short_passthrough() {
1197        let short = "hello world";
1198        assert_eq!(truncate_tool_output(short), short);
1199    }
1200
1201    #[test]
1202    fn truncate_tool_output_exact_limit() {
1203        let exact = "a".repeat(MAX_TOOL_OUTPUT_CHARS);
1204        assert_eq!(truncate_tool_output(&exact), exact);
1205    }
1206
1207    #[test]
1208    fn truncate_tool_output_long_split() {
1209        let long = "x".repeat(MAX_TOOL_OUTPUT_CHARS + 1000);
1210        let result = truncate_tool_output(&long);
1211        assert!(result.contains("truncated"));
1212        assert!(result.len() < long.len());
1213    }
1214
1215    #[test]
1216    fn truncate_tool_output_notice_contains_count() {
1217        let long = "y".repeat(MAX_TOOL_OUTPUT_CHARS + 2000);
1218        let result = truncate_tool_output(&long);
1219        assert!(result.contains("truncated"));
1220        assert!(result.contains("chars"));
1221    }
1222
1223    #[derive(Debug)]
1224    struct DefaultExecutor;
1225    impl ToolExecutor for DefaultExecutor {
1226        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1227            Ok(None)
1228        }
1229    }
1230
1231    #[tokio::test]
1232    async fn execute_tool_call_default_returns_none() {
1233        let exec = DefaultExecutor;
1234        let call = ToolCall {
1235            tool_id: ToolName::new("anything"),
1236            params: serde_json::Map::new(),
1237            caller_id: None,
1238            context: None,
1239
1240            tool_call_id: String::new(),
1241        };
1242        let result = exec.execute_tool_call(&call).await.unwrap();
1243        assert!(result.is_none());
1244    }
1245
1246    #[test]
1247    fn filter_stats_savings_pct() {
1248        let fs = FilterStats {
1249            raw_chars: 1000,
1250            filtered_chars: 200,
1251            ..Default::default()
1252        };
1253        assert!((fs.savings_pct() - 80.0).abs() < 0.01);
1254    }
1255
1256    #[test]
1257    fn filter_stats_savings_pct_zero() {
1258        let fs = FilterStats::default();
1259        assert!((fs.savings_pct()).abs() < 0.01);
1260    }
1261
1262    #[test]
1263    fn filter_stats_estimated_tokens_saved() {
1264        let fs = FilterStats {
1265            raw_chars: 1000,
1266            filtered_chars: 200,
1267            ..Default::default()
1268        };
1269        assert_eq!(fs.estimated_tokens_saved(), 200); // (1000 - 200) / 4
1270    }
1271
1272    #[test]
1273    fn filter_stats_format_inline() {
1274        let fs = FilterStats {
1275            raw_chars: 1000,
1276            filtered_chars: 200,
1277            raw_lines: 342,
1278            filtered_lines: 28,
1279            ..Default::default()
1280        };
1281        let line = fs.format_inline("shell");
1282        assert_eq!(line, "[shell] 342 lines \u{2192} 28 lines, 80.0% filtered");
1283    }
1284
1285    #[test]
1286    fn filter_stats_format_inline_zero() {
1287        let fs = FilterStats::default();
1288        let line = fs.format_inline("bash");
1289        assert_eq!(line, "[bash] 0 lines \u{2192} 0 lines, 0.0% filtered");
1290    }
1291
1292    // DynExecutor tests
1293
1294    struct FixedExecutor {
1295        tool_id: &'static str,
1296        output: &'static str,
1297    }
1298
1299    impl ToolExecutor for FixedExecutor {
1300        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1301            Ok(Some(ToolOutput {
1302                tool_name: ToolName::new(self.tool_id),
1303                summary: self.output.to_owned(),
1304                blocks_executed: 1,
1305                filter_stats: None,
1306                diff: None,
1307                streamed: false,
1308                terminal_id: None,
1309                locations: None,
1310                raw_response: None,
1311                claim_source: None,
1312            }))
1313        }
1314
1315        fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1316            vec![]
1317        }
1318
1319        async fn execute_tool_call(
1320            &self,
1321            _call: &ToolCall,
1322        ) -> Result<Option<ToolOutput>, ToolError> {
1323            Ok(Some(ToolOutput {
1324                tool_name: ToolName::new(self.tool_id),
1325                summary: self.output.to_owned(),
1326                blocks_executed: 1,
1327                filter_stats: None,
1328                diff: None,
1329                streamed: false,
1330                terminal_id: None,
1331                locations: None,
1332                raw_response: None,
1333                claim_source: None,
1334            }))
1335        }
1336    }
1337
1338    #[tokio::test]
1339    async fn dyn_executor_execute_delegates() {
1340        let inner = std::sync::Arc::new(FixedExecutor {
1341            tool_id: "bash",
1342            output: "hello",
1343        });
1344        let exec = DynExecutor(inner);
1345        let result = exec.execute("```bash\necho hello\n```").await.unwrap();
1346        assert!(result.is_some());
1347        assert_eq!(result.unwrap().summary, "hello");
1348    }
1349
1350    #[tokio::test]
1351    async fn dyn_executor_execute_confirmed_delegates() {
1352        let inner = std::sync::Arc::new(FixedExecutor {
1353            tool_id: "bash",
1354            output: "confirmed",
1355        });
1356        let exec = DynExecutor(inner);
1357        let result = exec.execute_confirmed("...").await.unwrap();
1358        assert!(result.is_some());
1359        assert_eq!(result.unwrap().summary, "confirmed");
1360    }
1361
1362    #[test]
1363    fn dyn_executor_tool_definitions_delegates() {
1364        let inner = std::sync::Arc::new(FixedExecutor {
1365            tool_id: "my_tool",
1366            output: "",
1367        });
1368        let exec = DynExecutor(inner);
1369        // FixedExecutor returns empty definitions; verify delegation occurs without panic.
1370        let defs = exec.tool_definitions();
1371        assert!(defs.is_empty());
1372    }
1373
1374    #[tokio::test]
1375    async fn dyn_executor_execute_tool_call_delegates() {
1376        let inner = std::sync::Arc::new(FixedExecutor {
1377            tool_id: "bash",
1378            output: "tool_call_result",
1379        });
1380        let exec = DynExecutor(inner);
1381        let call = ToolCall {
1382            tool_id: ToolName::new("bash"),
1383            params: serde_json::Map::new(),
1384            caller_id: None,
1385            context: None,
1386
1387            tool_call_id: String::new(),
1388        };
1389        let result = exec.execute_tool_call(&call).await.unwrap();
1390        assert!(result.is_some());
1391        assert_eq!(result.unwrap().summary, "tool_call_result");
1392    }
1393
1394    #[test]
1395    fn dyn_executor_set_effective_trust_delegates() {
1396        use std::sync::atomic::{AtomicU8, Ordering};
1397
1398        struct TrustCapture(AtomicU8);
1399        impl ToolExecutor for TrustCapture {
1400            async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1401                Ok(None)
1402            }
1403            fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
1404                // encode: Trusted=0, Verified=1, Quarantined=2, Blocked=3
1405                let v = match level {
1406                    crate::SkillTrustLevel::Trusted => 0u8,
1407                    crate::SkillTrustLevel::Verified => 1,
1408                    crate::SkillTrustLevel::Quarantined => 2,
1409                    crate::SkillTrustLevel::Blocked => 3,
1410                };
1411                self.0.store(v, Ordering::Relaxed);
1412            }
1413        }
1414
1415        let inner = std::sync::Arc::new(TrustCapture(AtomicU8::new(0)));
1416        let exec =
1417            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1418        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Quarantined);
1419        assert_eq!(inner.0.load(Ordering::Relaxed), 2);
1420
1421        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Blocked);
1422        assert_eq!(inner.0.load(Ordering::Relaxed), 3);
1423    }
1424
1425    #[test]
1426    fn extract_fenced_blocks_no_prefix_match() {
1427        // ```bashrc must NOT match when searching for "bash"
1428        assert!(extract_fenced_blocks("```bashrc\nfoo\n```", "bash").is_empty());
1429        // exact match
1430        assert_eq!(
1431            extract_fenced_blocks("```bash\nfoo\n```", "bash"),
1432            vec!["foo"]
1433        );
1434        // trailing space is fine
1435        assert_eq!(
1436            extract_fenced_blocks("```bash \nfoo\n```", "bash"),
1437            vec!["foo"]
1438        );
1439    }
1440
1441    // ── ToolError::category() delegation tests ────────────────────────────────
1442
1443    #[test]
1444    fn tool_error_http_400_category_is_invalid_parameters() {
1445        use crate::error_taxonomy::ToolErrorCategory;
1446        let err = ToolError::Http {
1447            status: 400,
1448            message: "bad request".to_owned(),
1449        };
1450        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1451    }
1452
1453    #[test]
1454    fn tool_error_http_401_category_is_policy_blocked() {
1455        use crate::error_taxonomy::ToolErrorCategory;
1456        let err = ToolError::Http {
1457            status: 401,
1458            message: "unauthorized".to_owned(),
1459        };
1460        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1461    }
1462
1463    #[test]
1464    fn tool_error_http_403_category_is_policy_blocked() {
1465        use crate::error_taxonomy::ToolErrorCategory;
1466        let err = ToolError::Http {
1467            status: 403,
1468            message: "forbidden".to_owned(),
1469        };
1470        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1471    }
1472
1473    #[test]
1474    fn tool_error_http_404_category_is_permanent_failure() {
1475        use crate::error_taxonomy::ToolErrorCategory;
1476        let err = ToolError::Http {
1477            status: 404,
1478            message: "not found".to_owned(),
1479        };
1480        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1481    }
1482
1483    #[test]
1484    fn tool_error_http_429_category_is_rate_limited() {
1485        use crate::error_taxonomy::ToolErrorCategory;
1486        let err = ToolError::Http {
1487            status: 429,
1488            message: "too many requests".to_owned(),
1489        };
1490        assert_eq!(err.category(), ToolErrorCategory::RateLimited);
1491    }
1492
1493    #[test]
1494    fn tool_error_http_500_category_is_server_error() {
1495        use crate::error_taxonomy::ToolErrorCategory;
1496        let err = ToolError::Http {
1497            status: 500,
1498            message: "internal server error".to_owned(),
1499        };
1500        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1501    }
1502
1503    #[test]
1504    fn tool_error_http_502_category_is_server_error() {
1505        use crate::error_taxonomy::ToolErrorCategory;
1506        let err = ToolError::Http {
1507            status: 502,
1508            message: "bad gateway".to_owned(),
1509        };
1510        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1511    }
1512
1513    #[test]
1514    fn tool_error_http_503_category_is_server_error() {
1515        use crate::error_taxonomy::ToolErrorCategory;
1516        let err = ToolError::Http {
1517            status: 503,
1518            message: "service unavailable".to_owned(),
1519        };
1520        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1521    }
1522
1523    #[test]
1524    fn tool_error_http_503_is_transient_triggers_phase2_retry() {
1525        // Phase 2 retry fires when err.kind() == ErrorKind::Transient.
1526        // Verify the full chain: Http{503} -> ServerError -> is_retryable() -> Transient.
1527        let err = ToolError::Http {
1528            status: 503,
1529            message: "service unavailable".to_owned(),
1530        };
1531        assert_eq!(
1532            err.kind(),
1533            ErrorKind::Transient,
1534            "HTTP 503 must be Transient so Phase 2 retry fires"
1535        );
1536    }
1537
1538    #[test]
1539    fn tool_error_blocked_category_is_policy_blocked() {
1540        use crate::error_taxonomy::ToolErrorCategory;
1541        let err = ToolError::Blocked {
1542            command: "rm -rf /".to_owned(),
1543        };
1544        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1545    }
1546
1547    #[test]
1548    fn tool_error_sandbox_violation_category_is_policy_blocked() {
1549        use crate::error_taxonomy::ToolErrorCategory;
1550        let err = ToolError::SandboxViolation {
1551            path: "/etc/shadow".to_owned(),
1552        };
1553        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1554    }
1555
1556    #[test]
1557    fn tool_error_confirmation_required_category() {
1558        use crate::error_taxonomy::ToolErrorCategory;
1559        let err = ToolError::ConfirmationRequired {
1560            command: "rm /tmp/x".to_owned(),
1561        };
1562        assert_eq!(err.category(), ToolErrorCategory::ConfirmationRequired);
1563    }
1564
1565    #[test]
1566    fn tool_error_timeout_category() {
1567        use crate::error_taxonomy::ToolErrorCategory;
1568        let err = ToolError::Timeout { timeout_secs: 30 };
1569        assert_eq!(err.category(), ToolErrorCategory::Timeout);
1570    }
1571
1572    #[test]
1573    fn tool_error_cancelled_category() {
1574        use crate::error_taxonomy::ToolErrorCategory;
1575        assert_eq!(
1576            ToolError::Cancelled.category(),
1577            ToolErrorCategory::Cancelled
1578        );
1579    }
1580
1581    #[test]
1582    fn tool_error_invalid_params_category() {
1583        use crate::error_taxonomy::ToolErrorCategory;
1584        let err = ToolError::InvalidParams {
1585            message: "missing field".to_owned(),
1586        };
1587        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1588    }
1589
1590    // B2 regression: Execution(NotFound) must NOT produce ToolNotFound.
1591    #[test]
1592    fn tool_error_execution_not_found_category_is_permanent_failure() {
1593        use crate::error_taxonomy::ToolErrorCategory;
1594        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash: not found");
1595        let err = ToolError::Execution(io_err);
1596        let cat = err.category();
1597        assert_ne!(
1598            cat,
1599            ToolErrorCategory::ToolNotFound,
1600            "Execution(NotFound) must NOT map to ToolNotFound"
1601        );
1602        assert_eq!(cat, ToolErrorCategory::PermanentFailure);
1603    }
1604
1605    #[test]
1606    fn tool_error_execution_timed_out_category_is_timeout() {
1607        use crate::error_taxonomy::ToolErrorCategory;
1608        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
1609        assert_eq!(
1610            ToolError::Execution(io_err).category(),
1611            ToolErrorCategory::Timeout
1612        );
1613    }
1614
1615    #[test]
1616    fn tool_error_execution_connection_refused_category_is_network_error() {
1617        use crate::error_taxonomy::ToolErrorCategory;
1618        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "refused");
1619        assert_eq!(
1620            ToolError::Execution(io_err).category(),
1621            ToolErrorCategory::NetworkError
1622        );
1623    }
1624
1625    // B4 regression: Http/network/transient categories must NOT be quality failures.
1626    #[test]
1627    fn b4_tool_error_http_429_not_quality_failure() {
1628        let err = ToolError::Http {
1629            status: 429,
1630            message: "rate limited".to_owned(),
1631        };
1632        assert!(
1633            !err.category().is_quality_failure(),
1634            "RateLimited must not be a quality failure"
1635        );
1636    }
1637
1638    #[test]
1639    fn b4_tool_error_http_503_not_quality_failure() {
1640        let err = ToolError::Http {
1641            status: 503,
1642            message: "service unavailable".to_owned(),
1643        };
1644        assert!(
1645            !err.category().is_quality_failure(),
1646            "ServerError must not be a quality failure"
1647        );
1648    }
1649
1650    #[test]
1651    fn b4_tool_error_execution_timed_out_not_quality_failure() {
1652        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1653        assert!(
1654            !ToolError::Execution(io_err).category().is_quality_failure(),
1655            "Timeout must not be a quality failure"
1656        );
1657    }
1658
1659    // ── ToolError::Shell category tests ──────────────────────────────────────
1660
1661    #[test]
1662    fn tool_error_shell_exit126_is_policy_blocked() {
1663        use crate::error_taxonomy::ToolErrorCategory;
1664        let err = ToolError::Shell {
1665            exit_code: 126,
1666            category: ToolErrorCategory::PolicyBlocked,
1667            message: "permission denied".to_owned(),
1668        };
1669        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1670    }
1671
1672    #[test]
1673    fn tool_error_shell_exit127_is_permanent_failure() {
1674        use crate::error_taxonomy::ToolErrorCategory;
1675        let err = ToolError::Shell {
1676            exit_code: 127,
1677            category: ToolErrorCategory::PermanentFailure,
1678            message: "command not found".to_owned(),
1679        };
1680        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1681        assert!(!err.category().is_retryable());
1682    }
1683
1684    #[test]
1685    fn tool_error_shell_not_quality_failure() {
1686        use crate::error_taxonomy::ToolErrorCategory;
1687        let err = ToolError::Shell {
1688            exit_code: 127,
1689            category: ToolErrorCategory::PermanentFailure,
1690            message: "command not found".to_owned(),
1691        };
1692        // Shell exit errors are not attributable to LLM output quality.
1693        assert!(!err.category().is_quality_failure());
1694    }
1695
1696    // ── requires_confirmation / requires_confirmation_erased tests (#3644) ───
1697
1698    /// Stub implementing only `ToolExecutor` without overriding `requires_confirmation`.
1699    struct StubExecutor;
1700    impl ToolExecutor for StubExecutor {
1701        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1702            Ok(None)
1703        }
1704    }
1705
1706    /// Stub that always signals confirmation is required via `ToolExecutor::requires_confirmation`.
1707    struct ConfirmingExecutor;
1708    impl ToolExecutor for ConfirmingExecutor {
1709        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1710            Ok(None)
1711        }
1712        fn requires_confirmation(&self, _call: &ToolCall) -> bool {
1713            true
1714        }
1715    }
1716
1717    fn dummy_call() -> ToolCall {
1718        ToolCall {
1719            tool_id: ToolName::new("test"),
1720            params: serde_json::Map::new(),
1721            caller_id: None,
1722            context: None,
1723
1724            tool_call_id: String::new(),
1725        }
1726    }
1727
1728    #[test]
1729    fn requires_confirmation_default_is_false_on_tool_executor() {
1730        let exec = StubExecutor;
1731        assert!(
1732            !exec.requires_confirmation(&dummy_call()),
1733            "ToolExecutor default requires_confirmation must be false"
1734        );
1735    }
1736
1737    #[test]
1738    fn requires_confirmation_erased_delegates_to_tool_executor_default() {
1739        // blanket impl routes erased → ToolExecutor::requires_confirmation (= false)
1740        let exec = StubExecutor;
1741        assert!(
1742            !ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1743            "requires_confirmation_erased via blanket impl must return false for stub executor"
1744        );
1745    }
1746
1747    #[test]
1748    fn requires_confirmation_erased_delegates_override() {
1749        // ConfirmingExecutor overrides requires_confirmation → true;
1750        // blanket impl must propagate this.
1751        let exec = ConfirmingExecutor;
1752        assert!(
1753            ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1754            "requires_confirmation_erased must return true when ToolExecutor override returns true"
1755        );
1756    }
1757
1758    #[test]
1759    fn requires_confirmation_erased_default_on_erased_trait_is_true() {
1760        // ErasedToolExecutor's own default (trait method body) returns true.
1761        // We construct a DynExecutor wrapping ConfirmingExecutor and verify via the erased path.
1762        // (We cannot instantiate ErasedToolExecutor directly without a concrete type.)
1763        // Instead verify via a type that only implements ErasedToolExecutor manually:
1764        struct ManualErased;
1765        impl ErasedToolExecutor for ManualErased {
1766            fn execute_erased<'a>(
1767                &'a self,
1768                _response: &'a str,
1769            ) -> std::pin::Pin<
1770                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1771            > {
1772                Box::pin(std::future::ready(Ok(None)))
1773            }
1774            fn execute_confirmed_erased<'a>(
1775                &'a self,
1776                _response: &'a str,
1777            ) -> std::pin::Pin<
1778                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1779            > {
1780                Box::pin(std::future::ready(Ok(None)))
1781            }
1782            fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
1783                vec![]
1784            }
1785            fn execute_tool_call_erased<'a>(
1786                &'a self,
1787                _call: &'a ToolCall,
1788            ) -> std::pin::Pin<
1789                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1790            > {
1791                Box::pin(std::future::ready(Ok(None)))
1792            }
1793            fn is_tool_retryable_erased(&self, _tool_id: &str) -> bool {
1794                false
1795            }
1796            // requires_confirmation_erased NOT overridden → trait default returns true
1797        }
1798        let exec = ManualErased;
1799        assert!(
1800            exec.requires_confirmation_erased(&dummy_call()),
1801            "ErasedToolExecutor trait-level default for requires_confirmation_erased must be true"
1802        );
1803    }
1804
1805    // ── DynExecutor::requires_confirmation delegation tests (#3650) ──────────
1806
1807    #[test]
1808    fn dyn_executor_requires_confirmation_delegates() {
1809        let inner = std::sync::Arc::new(ConfirmingExecutor);
1810        let exec =
1811            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1812        assert!(
1813            ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1814            "DynExecutor must delegate requires_confirmation to inner executor"
1815        );
1816    }
1817
1818    #[test]
1819    fn dyn_executor_requires_confirmation_default_false() {
1820        let inner = std::sync::Arc::new(StubExecutor);
1821        let exec =
1822            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1823        assert!(
1824            !ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1825            "DynExecutor must return false when inner executor does not require confirmation"
1826        );
1827    }
1828}
zeph_tools/executor.rs

zeph_tools/
executor.rs