zeph_tools/
executor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use zeph_common::ToolName;
7
8use crate::shell::background::RunId;
9
10/// Data for rendering file diffs in the TUI.
11///
12/// Produced by [`ShellExecutor`](crate::ShellExecutor) and [`FileExecutor`](crate::FileExecutor)
13/// when a tool call modifies a tracked file. The TUI uses this to display a side-by-side diff.
14#[derive(Debug, Clone)]
15pub struct DiffData {
16    /// Relative or absolute path to the file that was modified.
17    pub file_path: String,
18    /// File content before the tool executed.
19    pub old_content: String,
20    /// File content after the tool executed.
21    pub new_content: String,
22}
23
24/// Structured tool invocation from LLM.
25///
26/// Produced by the agent loop when the LLM emits a structured tool call (as opposed to
27/// a legacy fenced code block). Dispatched to [`ToolExecutor::execute_tool_call`].
28///
29/// # Example
30///
31/// ```rust
32/// use zeph_tools::ToolCall;
33/// use zeph_common::ToolName;
34///
35/// let call = ToolCall {
36///     tool_id: ToolName::new("bash"),
37///     params: {
38///         let mut m = serde_json::Map::new();
39///         m.insert("command".to_owned(), serde_json::Value::String("echo hello".to_owned()));
40///         m
41///     },
42///     caller_id: Some("user-42".to_owned()),
43/// };
44/// assert_eq!(call.tool_id, "bash");
45/// ```
46#[derive(Debug, Clone)]
47pub struct ToolCall {
48    /// The tool identifier, matching a value from [`ToolExecutor::tool_definitions`].
49    pub tool_id: ToolName,
50    /// JSON parameters for the tool call, deserialized into the tool's parameter struct.
51    pub params: serde_json::Map<String, serde_json::Value>,
52    /// Opaque caller identifier propagated from the channel (user ID, session ID, etc.).
53    /// `None` for system-initiated calls (scheduler, self-learning, internal).
54    pub caller_id: Option<String>,
55}
56
57/// Cumulative filter statistics for a single tool execution.
58///
59/// Populated by [`ShellExecutor`](crate::ShellExecutor) when output filters are configured.
60/// Displayed in the TUI to show how much output was compacted before being sent to the LLM.
61#[derive(Debug, Clone, Default)]
62pub struct FilterStats {
63    /// Raw character count before filtering.
64    pub raw_chars: usize,
65    /// Character count after filtering.
66    pub filtered_chars: usize,
67    /// Raw line count before filtering.
68    pub raw_lines: usize,
69    /// Line count after filtering.
70    pub filtered_lines: usize,
71    /// Worst-case confidence across all applied filters.
72    pub confidence: Option<crate::FilterConfidence>,
73    /// The shell command that produced this output, for display purposes.
74    pub command: Option<String>,
75    /// Zero-based line indices that were kept after filtering.
76    pub kept_lines: Vec<usize>,
77}
78
79impl FilterStats {
80    /// Returns the percentage of characters removed by filtering.
81    ///
82    /// Returns `0.0` when there was no raw output to filter.
83    #[must_use]
84    #[allow(clippy::cast_precision_loss)]
85    pub fn savings_pct(&self) -> f64 {
86        if self.raw_chars == 0 {
87            return 0.0;
88        }
89        (1.0 - self.filtered_chars as f64 / self.raw_chars as f64) * 100.0
90    }
91
92    /// Estimates the number of LLM tokens saved by filtering.
93    ///
94    /// Uses the 4-chars-per-token approximation. Suitable for logging and metrics,
95    /// not for billing or exact budget calculations.
96    #[must_use]
97    pub fn estimated_tokens_saved(&self) -> usize {
98        self.raw_chars.saturating_sub(self.filtered_chars) / 4
99    }
100
101    /// Formats a one-line filter summary for log messages and TUI status.
102    ///
103    /// # Example
104    ///
105    /// ```rust
106    /// use zeph_tools::FilterStats;
107    ///
108    /// let stats = FilterStats {
109    ///     raw_chars: 1000,
110    ///     filtered_chars: 400,
111    ///     raw_lines: 50,
112    ///     filtered_lines: 20,
113    ///     command: Some("cargo build".to_owned()),
114    ///     ..Default::default()
115    /// };
116    /// let summary = stats.format_inline("shell");
117    /// assert!(summary.contains("60.0% filtered"));
118    /// ```
119    #[must_use]
120    pub fn format_inline(&self, tool_name: &str) -> String {
121        let cmd_label = self
122            .command
123            .as_deref()
124            .map(|c| {
125                let trimmed = c.trim();
126                if trimmed.len() > 60 {
127                    format!(" `{}…`", &trimmed[..57])
128                } else {
129                    format!(" `{trimmed}`")
130                }
131            })
132            .unwrap_or_default();
133        format!(
134            "[{tool_name}]{cmd_label} {} lines \u{2192} {} lines, {:.1}% filtered",
135            self.raw_lines,
136            self.filtered_lines,
137            self.savings_pct()
138        )
139    }
140}
141
142/// Provenance of a tool execution result.
143///
144/// Set by each executor at `ToolOutput` construction time. Used by the sanitizer bridge
145/// in `zeph-core` to select the appropriate `ContentSourceKind` and trust level.
146/// `None` means the source is unspecified (pass-through code, mocks, tests).
147#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
148#[serde(rename_all = "snake_case")]
149pub enum ClaimSource {
150    /// Local shell command execution.
151    Shell,
152    /// Local file system read/write.
153    FileSystem,
154    /// HTTP web scrape.
155    WebScrape,
156    /// MCP server tool response.
157    Mcp,
158    /// A2A agent message.
159    A2a,
160    /// Code search (LSP or semantic).
161    CodeSearch,
162    /// Agent diagnostics (internal).
163    Diagnostics,
164    /// Memory retrieval (semantic search).
165    Memory,
166}
167
168/// Structured result from tool execution.
169///
170/// Returned by every [`ToolExecutor`] implementation on success. The agent loop uses
171/// [`ToolOutput::summary`] as the tool result text injected into the LLM context.
172///
173/// # Example
174///
175/// ```rust
176/// use zeph_tools::{ToolOutput, executor::ClaimSource};
177/// use zeph_common::ToolName;
178///
179/// let output = ToolOutput {
180///     tool_name: ToolName::new("shell"),
181///     summary: "hello\n".to_owned(),
182///     blocks_executed: 1,
183///     filter_stats: None,
184///     diff: None,
185///     streamed: false,
186///     terminal_id: None,
187///     locations: None,
188///     raw_response: None,
189///     claim_source: Some(ClaimSource::Shell),
190/// };
191/// assert_eq!(output.to_string(), "hello\n");
192/// ```
193#[derive(Debug, Clone)]
194pub struct ToolOutput {
195    /// Name of the tool that produced this output (e.g. `"shell"`, `"web-scrape"`).
196    pub tool_name: ToolName,
197    /// Human-readable result text injected into the LLM context.
198    pub summary: String,
199    /// Number of code blocks processed in this invocation.
200    pub blocks_executed: u32,
201    /// Output filter statistics when filtering was applied, `None` otherwise.
202    pub filter_stats: Option<FilterStats>,
203    /// File diff data for TUI display when the tool modified a tracked file.
204    pub diff: Option<DiffData>,
205    /// Whether this tool already streamed its output via `ToolEvent` channel.
206    pub streamed: bool,
207    /// Terminal ID when the tool was executed via IDE terminal (ACP terminal/* protocol).
208    pub terminal_id: Option<String>,
209    /// File paths touched by this tool call, for IDE follow-along (e.g. `ToolCallLocation`).
210    pub locations: Option<Vec<String>>,
211    /// Structured tool response payload for ACP intermediate `tool_call_update` notifications.
212    pub raw_response: Option<serde_json::Value>,
213    /// Provenance of this tool result. Set by the executor at construction time.
214    /// `None` in pass-through wrappers, mocks, and tests.
215    pub claim_source: Option<ClaimSource>,
216}
217
218impl fmt::Display for ToolOutput {
219    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
220        f.write_str(&self.summary)
221    }
222}
223
224/// Maximum characters of tool output injected into the LLM context without truncation.
225///
226/// Output that exceeds this limit is split into a head and tail via [`truncate_tool_output`]
227/// to keep both the beginning and end of large command outputs.
228pub const MAX_TOOL_OUTPUT_CHARS: usize = 30_000;
229
230/// Truncate tool output that exceeds [`MAX_TOOL_OUTPUT_CHARS`] using a head+tail split.
231///
232/// Equivalent to `truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)`.
233///
234/// # Example
235///
236/// ```rust
237/// use zeph_tools::executor::truncate_tool_output;
238///
239/// let short = "hello world";
240/// assert_eq!(truncate_tool_output(short), short);
241/// ```
242#[must_use]
243pub fn truncate_tool_output(output: &str) -> String {
244    truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)
245}
246
247/// Truncate tool output that exceeds `max_chars` using a head+tail split.
248///
249/// Preserves the first and last `max_chars / 2` characters and inserts a truncation
250/// marker in the middle. Both boundaries are snapped to valid UTF-8 character boundaries.
251///
252/// # Example
253///
254/// ```rust
255/// use zeph_tools::executor::truncate_tool_output_at;
256///
257/// let long = "a".repeat(200);
258/// let truncated = truncate_tool_output_at(&long, 100);
259/// assert!(truncated.contains("truncated"));
260/// assert!(truncated.len() < long.len());
261/// ```
262#[must_use]
263pub fn truncate_tool_output_at(output: &str, max_chars: usize) -> String {
264    if output.len() <= max_chars {
265        return output.to_string();
266    }
267
268    let half = max_chars / 2;
269    let head_end = output.floor_char_boundary(half);
270    let tail_start = output.ceil_char_boundary(output.len() - half);
271    let head = &output[..head_end];
272    let tail = &output[tail_start..];
273    let truncated = output.len() - head_end - (output.len() - tail_start);
274
275    format!(
276        "{head}\n\n... [truncated {truncated} chars, showing first and last ~{half} chars] ...\n\n{tail}"
277    )
278}
279
280/// Event emitted during tool execution for real-time UI updates.
281///
282/// Sent over the [`ToolEventTx`] channel to the TUI or channel adapter.
283/// Each event variant corresponds to a phase in the tool execution lifecycle.
284#[derive(Debug, Clone)]
285pub enum ToolEvent {
286    /// The tool has started. Displayed in the TUI as a spinner with the command text.
287    Started {
288        tool_name: ToolName,
289        command: String,
290        /// Active sandbox profile, if any. `None` when sandbox is disabled.
291        sandbox_profile: Option<String>,
292    },
293    /// A chunk of streaming output was produced (e.g. from a long-running command).
294    OutputChunk {
295        tool_name: ToolName,
296        command: String,
297        chunk: String,
298    },
299    /// The tool finished. Contains the full output and optional filter/diff data.
300    Completed {
301        tool_name: ToolName,
302        command: String,
303        /// Full output text (possibly filtered and truncated).
304        output: String,
305        /// `true` when the tool exited successfully, `false` on error.
306        success: bool,
307        filter_stats: Option<FilterStats>,
308        diff: Option<DiffData>,
309        /// Set when this completion belongs to a background run. `None` for blocking runs.
310        run_id: Option<RunId>,
311    },
312    /// A transactional rollback was performed, restoring or deleting files.
313    Rollback {
314        tool_name: ToolName,
315        command: String,
316        /// Number of files restored to their pre-execution content.
317        restored_count: usize,
318        /// Number of files that did not exist before execution and were deleted.
319        deleted_count: usize,
320    },
321}
322
323/// Sender half of the bounded channel used to stream [`ToolEvent`]s to the UI.
324///
325/// Capacity is 1024 slots. Streaming variants (`OutputChunk`, `Started`) use
326/// `try_send` and drop on full; terminal variants (`Completed`, `Rollback`) use
327/// `send().await` to guarantee delivery.
328///
329/// Created via [`tokio::sync::mpsc::channel`] with capacity `TOOL_EVENT_CHANNEL_CAP`.
330pub type ToolEventTx = tokio::sync::mpsc::Sender<ToolEvent>;
331
332/// Receiver half matching [`ToolEventTx`].
333pub type ToolEventRx = tokio::sync::mpsc::Receiver<ToolEvent>;
334
335/// Bounded capacity for the tool-event channel.
336pub const TOOL_EVENT_CHANNEL_CAP: usize = 1024;
337
338/// Classifies a tool error as transient (retryable) or permanent (abort immediately).
339///
340/// Transient errors may succeed on retry (network blips, race conditions).
341/// Permanent errors will not succeed regardless of retries (policy, bad args, not found).
342#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
343pub enum ErrorKind {
344    Transient,
345    Permanent,
346}
347
348impl std::fmt::Display for ErrorKind {
349    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
350        match self {
351            Self::Transient => f.write_str("transient"),
352            Self::Permanent => f.write_str("permanent"),
353        }
354    }
355}
356
357/// Errors that can occur during tool execution.
358#[derive(Debug, thiserror::Error)]
359pub enum ToolError {
360    #[error("command blocked by policy: {command}")]
361    Blocked { command: String },
362
363    #[error("path not allowed by sandbox: {path}")]
364    SandboxViolation { path: String },
365
366    #[error("command requires confirmation: {command}")]
367    ConfirmationRequired { command: String },
368
369    #[error("command timed out after {timeout_secs}s")]
370    Timeout { timeout_secs: u64 },
371
372    #[error("operation cancelled")]
373    Cancelled,
374
375    #[error("invalid tool parameters: {message}")]
376    InvalidParams { message: String },
377
378    #[error("execution failed: {0}")]
379    Execution(#[from] std::io::Error),
380
381    /// HTTP or API error with status code for fine-grained classification.
382    ///
383    /// Used by `WebScrapeExecutor` and other HTTP-based tools to preserve the status
384    /// code for taxonomy classification. Scope: HTTP tools only (MCP uses a separate path).
385    #[error("HTTP error {status}: {message}")]
386    Http { status: u16, message: String },
387
388    /// Shell execution error with explicit exit code and pre-classified category.
389    ///
390    /// Used by `ShellExecutor` when the exit code or stderr content maps to a known
391    /// taxonomy category (e.g., exit 126 → `PolicyBlocked`, exit 127 → `PermanentFailure`).
392    /// Preserves the exit code for audit logging and the category for skill evolution.
393    #[error("shell error (exit {exit_code}): {message}")]
394    Shell {
395        exit_code: i32,
396        category: crate::error_taxonomy::ToolErrorCategory,
397        message: String,
398    },
399
400    #[error("snapshot failed: {reason}")]
401    SnapshotFailed { reason: String },
402}
403
404impl ToolError {
405    /// Fine-grained error classification using the 12-category taxonomy.
406    ///
407    /// Prefer `category()` over `kind()` for new code. `kind()` is preserved for
408    /// backward compatibility and delegates to `category().error_kind()`.
409    #[must_use]
410    pub fn category(&self) -> crate::error_taxonomy::ToolErrorCategory {
411        use crate::error_taxonomy::{ToolErrorCategory, classify_http_status, classify_io_error};
412        match self {
413            Self::Blocked { .. } | Self::SandboxViolation { .. } => {
414                ToolErrorCategory::PolicyBlocked
415            }
416            Self::ConfirmationRequired { .. } => ToolErrorCategory::ConfirmationRequired,
417            Self::Timeout { .. } => ToolErrorCategory::Timeout,
418            Self::Cancelled => ToolErrorCategory::Cancelled,
419            Self::InvalidParams { .. } => ToolErrorCategory::InvalidParameters,
420            Self::Http { status, .. } => classify_http_status(*status),
421            Self::Execution(io_err) => classify_io_error(io_err),
422            Self::Shell { category, .. } => *category,
423            Self::SnapshotFailed { .. } => ToolErrorCategory::PermanentFailure,
424        }
425    }
426
427    /// Coarse classification for backward compatibility. Delegates to `category().error_kind()`.
428    ///
429    /// For `Execution(io::Error)`, the classification inspects `io::Error::kind()`:
430    /// - Transient: `TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`,
431    ///   `ConnectionAborted`, `BrokenPipe` — these may succeed on retry.
432    /// - Permanent: `NotFound`, `PermissionDenied`, `AlreadyExists`, and all other
433    ///   I/O error kinds — retrying would waste time with no benefit.
434    #[must_use]
435    pub fn kind(&self) -> ErrorKind {
436        use crate::error_taxonomy::ToolErrorCategoryExt;
437        self.category().error_kind()
438    }
439}
440
441/// Deserialize tool call params from a `serde_json::Map<String, Value>` into a typed struct.
442///
443/// # Errors
444///
445/// Returns `ToolError::InvalidParams` when deserialization fails.
446pub fn deserialize_params<T: serde::de::DeserializeOwned>(
447    params: &serde_json::Map<String, serde_json::Value>,
448) -> Result<T, ToolError> {
449    let obj = serde_json::Value::Object(params.clone());
450    serde_json::from_value(obj).map_err(|e| ToolError::InvalidParams {
451        message: e.to_string(),
452    })
453}
454
455/// Async trait for tool execution backends.
456///
457/// Implementations include [`ShellExecutor`](crate::ShellExecutor),
458/// [`WebScrapeExecutor`](crate::WebScrapeExecutor), [`CompositeExecutor`](crate::CompositeExecutor),
459/// and [`FileExecutor`](crate::FileExecutor).
460///
461/// # Contract
462///
463/// - [`execute`](ToolExecutor::execute) and [`execute_tool_call`](ToolExecutor::execute_tool_call)
464///   return `Ok(None)` when the executor does not handle the given input — callers must not
465///   treat `None` as an error.
466/// - All methods must be `Send + Sync` and free of blocking I/O.
467/// - Implementations must enforce their own security controls (blocklists, sandboxes, SSRF
468///   protection) before executing any side-effectful operation.
469/// - [`execute_confirmed`](ToolExecutor::execute_confirmed) and
470///   [`execute_tool_call_confirmed`](ToolExecutor::execute_tool_call_confirmed) bypass
471///   confirmation gates only — all other security controls remain active.
472///
473/// # Two Invocation Paths
474///
475/// **Legacy fenced blocks**: The agent loop passes the raw LLM response string to [`execute`](ToolExecutor::execute).
476/// The executor parses ` ```bash ` or ` ```scrape ` blocks and executes each one.
477///
478/// **Structured tool calls**: The agent loop constructs a [`ToolCall`] from the LLM's
479/// JSON tool-use response and dispatches it via [`execute_tool_call`](ToolExecutor::execute_tool_call).
480/// This is the preferred path for new code.
481///
482/// # Example
483///
484/// ```rust
485/// use zeph_tools::{ToolExecutor, ToolCall, ToolOutput, ToolError, executor::ClaimSource};
486///
487/// #[derive(Debug)]
488/// struct EchoExecutor;
489///
490/// impl ToolExecutor for EchoExecutor {
491///     async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
492///         Ok(None) // not a fenced-block executor
493///     }
494///
495///     async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
496///         if call.tool_id != "echo" {
497///             return Ok(None);
498///         }
499///         let text = call.params.get("text")
500///             .and_then(|v| v.as_str())
501///             .unwrap_or("")
502///             .to_owned();
503///         Ok(Some(ToolOutput {
504///             tool_name: "echo".into(),
505///             summary: text,
506///             blocks_executed: 1,
507///             filter_stats: None,
508///             diff: None,
509///             streamed: false,
510///             terminal_id: None,
511///             locations: None,
512///             raw_response: None,
513///             claim_source: None,
514///         }))
515///     }
516/// }
517/// ```
518/// # TODO (G3 — deferred: Tower-style tool middleware stack)
519///
520/// Currently, cross-cutting concerns (audit logging, rate limiting, sandboxing, guardrails)
521/// are scattered across individual executor implementations. The planned approach is a
522/// composable middleware stack similar to Tower's `Service` trait:
523///
524/// ```text
525/// AuditLayer::new(RateLimitLayer::new(SandboxLayer::new(ShellExecutor::new())))
526/// ```
527///
528/// **Blocked by:** requires D2 (consolidating `ToolExecutor` + `ErasedToolExecutor` into one
529/// object-safe trait). See critic review §S3 for the tradeoff between RPIT fast-path and
530/// dynamic dispatch overhead before collapsing D2.
531///
532/// # TODO (D2 — deferred: consolidate `ToolExecutor` and `ErasedToolExecutor`)
533///
534/// Having two parallel traits creates duplication and confusion. The blanket impl
535/// `impl<T: ToolExecutor> ErasedToolExecutor for T` works but every new method must be
536/// added to both traits. Use `trait_variant::make` or a single object-safe design.
537///
538/// **Blocked by:** need to benchmark the RPIT fast-path before removing it. See critic §S3.
539pub trait ToolExecutor: Send + Sync {
540    /// Parse `response` for fenced tool blocks and execute them.
541    ///
542    /// Returns `Ok(None)` when no tool blocks are found in `response`.
543    ///
544    /// # Errors
545    ///
546    /// Returns [`ToolError`] when a block is found but execution fails (blocked command,
547    /// sandbox violation, network error, timeout, etc.).
548    fn execute(
549        &self,
550        response: &str,
551    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send;
552
553    /// Execute bypassing confirmation checks (called after user approves).
554    ///
555    /// Security controls other than the confirmation gate remain active. Default
556    /// implementation delegates to [`execute`](ToolExecutor::execute).
557    ///
558    /// # Errors
559    ///
560    /// Returns [`ToolError`] on execution failure.
561    fn execute_confirmed(
562        &self,
563        response: &str,
564    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
565        self.execute(response)
566    }
567
568    /// Return the tool definitions this executor can handle.
569    ///
570    /// Used to populate the LLM's tool schema at context-assembly time.
571    /// Returns an empty `Vec` by default (for executors that only handle fenced blocks).
572    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
573        vec![]
574    }
575
576    /// Execute a structured tool call. Returns `Ok(None)` if `call.tool_id` is not handled.
577    ///
578    /// # Errors
579    ///
580    /// Returns [`ToolError`] when the tool ID is handled but execution fails.
581    fn execute_tool_call(
582        &self,
583        _call: &ToolCall,
584    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
585        std::future::ready(Ok(None))
586    }
587
588    /// Execute a structured tool call bypassing confirmation checks.
589    ///
590    /// Called after the user has explicitly approved the tool invocation.
591    /// Default implementation delegates to [`execute_tool_call`](ToolExecutor::execute_tool_call).
592    ///
593    /// # Errors
594    ///
595    /// Returns [`ToolError`] on execution failure.
596    fn execute_tool_call_confirmed(
597        &self,
598        call: &ToolCall,
599    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
600        self.execute_tool_call(call)
601    }
602
603    /// Inject environment variables for the currently active skill. No-op by default.
604    ///
605    /// Called by the agent loop before each turn when the active skill specifies env vars.
606    /// Implementations that ignore this (e.g. `WebScrapeExecutor`) may leave the default.
607    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
608
609    /// Set the effective trust level for the currently active skill. No-op by default.
610    ///
611    /// Trust level affects which operations are permitted (e.g. network access, file writes).
612    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
613
614    /// Whether the executor can safely retry this tool call on a transient error.
615    ///
616    /// Only idempotent operations (e.g. read-only HTTP GET) should return `true`.
617    /// Shell commands and other non-idempotent operations must keep the default `false`
618    /// to prevent double-execution of side-effectful commands.
619    fn is_tool_retryable(&self, _tool_id: &str) -> bool {
620        false
621    }
622
623    /// Whether a tool call can be safely dispatched speculatively (before the LLM finishes).
624    ///
625    /// Speculative execution requires the tool to be:
626    /// 1. Idempotent — repeated execution with the same args produces the same result.
627    /// 2. Side-effect-free or cheaply reversible.
628    /// 3. Not subject to user confirmation (`needs_confirmation` must be false at call time).
629    ///
630    /// Default: `false` (safe). Override to `true` only for tools that satisfy all three
631    /// properties. The engine additionally gates on trust level and confirmation status
632    /// regardless of this flag.
633    ///
634    /// # Examples
635    ///
636    /// ```rust
637    /// use zeph_tools::ToolExecutor;
638    ///
639    /// struct ReadOnlyExecutor;
640    /// impl ToolExecutor for ReadOnlyExecutor {
641    ///     async fn execute(&self, _: &str) -> Result<Option<zeph_tools::ToolOutput>, zeph_tools::ToolError> {
642    ///         Ok(None)
643    ///     }
644    ///     fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
645    ///         true // read-only, idempotent
646    ///     }
647    /// }
648    /// ```
649    fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
650        false
651    }
652}
653
654/// Object-safe erased version of [`ToolExecutor`] using boxed futures.
655///
656/// Because [`ToolExecutor`] uses `impl Future` return types, it is not object-safe and
657/// cannot be used as `dyn ToolExecutor`. This trait provides the same interface with
658/// `Pin<Box<dyn Future>>` returns, enabling dynamic dispatch.
659///
660/// Implemented automatically for all `T: ToolExecutor + 'static` via the blanket impl below.
661/// Use [`DynExecutor`] or `Box<dyn ErasedToolExecutor>` when runtime polymorphism is needed.
662pub trait ErasedToolExecutor: Send + Sync {
663    fn execute_erased<'a>(
664        &'a self,
665        response: &'a str,
666    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
667
668    fn execute_confirmed_erased<'a>(
669        &'a self,
670        response: &'a str,
671    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
672
673    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef>;
674
675    fn execute_tool_call_erased<'a>(
676        &'a self,
677        call: &'a ToolCall,
678    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
679
680    fn execute_tool_call_confirmed_erased<'a>(
681        &'a self,
682        call: &'a ToolCall,
683    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
684    {
685        // TrustGateExecutor overrides ToolExecutor::execute_tool_call_confirmed; the blanket
686        // impl for T: ToolExecutor routes this call through it via execute_tool_call_confirmed_erased.
687        // Other implementors fall back to execute_tool_call_erased (normal enforcement path).
688        self.execute_tool_call_erased(call)
689    }
690
691    /// Inject environment variables for the currently active skill. No-op by default.
692    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
693
694    /// Set the effective trust level for the currently active skill. No-op by default.
695    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
696
697    /// Whether the executor can safely retry this tool call on a transient error.
698    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool;
699
700    /// Whether a tool call can be safely dispatched speculatively.
701    ///
702    /// Default: `false`. Override to `true` in read-only executors.
703    fn is_tool_speculatable_erased(&self, _tool_id: &str) -> bool {
704        false
705    }
706
707    /// Return `true` when `call` would require user confirmation before execution.
708    ///
709    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
710    /// Used by the speculative engine to gate dispatch without causing double side-effects.
711    ///
712    /// Default: `false` (no confirmation required). Override in executors that enforce a
713    /// confirmation policy (e.g. `TrustGateExecutor`).
714    fn requires_confirmation_erased(&self, _call: &ToolCall) -> bool {
715        false
716    }
717}
718
719impl<T: ToolExecutor> ErasedToolExecutor for T {
720    fn execute_erased<'a>(
721        &'a self,
722        response: &'a str,
723    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
724    {
725        Box::pin(self.execute(response))
726    }
727
728    fn execute_confirmed_erased<'a>(
729        &'a self,
730        response: &'a str,
731    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
732    {
733        Box::pin(self.execute_confirmed(response))
734    }
735
736    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
737        self.tool_definitions()
738    }
739
740    fn execute_tool_call_erased<'a>(
741        &'a self,
742        call: &'a ToolCall,
743    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
744    {
745        Box::pin(self.execute_tool_call(call))
746    }
747
748    fn execute_tool_call_confirmed_erased<'a>(
749        &'a self,
750        call: &'a ToolCall,
751    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
752    {
753        Box::pin(self.execute_tool_call_confirmed(call))
754    }
755
756    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
757        ToolExecutor::set_skill_env(self, env);
758    }
759
760    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
761        ToolExecutor::set_effective_trust(self, level);
762    }
763
764    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool {
765        ToolExecutor::is_tool_retryable(self, tool_id)
766    }
767
768    fn is_tool_speculatable_erased(&self, tool_id: &str) -> bool {
769        ToolExecutor::is_tool_speculatable(self, tool_id)
770    }
771}
772
773/// Wraps `Arc<dyn ErasedToolExecutor>` so it can be used as a concrete `ToolExecutor`.
774///
775/// Enables dynamic composition of tool executors at runtime without static type chains.
776pub struct DynExecutor(pub std::sync::Arc<dyn ErasedToolExecutor>);
777
778impl ToolExecutor for DynExecutor {
779    fn execute(
780        &self,
781        response: &str,
782    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
783        // Clone data to satisfy the 'static-ish bound: erased futures must not borrow self.
784        let inner = std::sync::Arc::clone(&self.0);
785        let response = response.to_owned();
786        async move { inner.execute_erased(&response).await }
787    }
788
789    fn execute_confirmed(
790        &self,
791        response: &str,
792    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
793        let inner = std::sync::Arc::clone(&self.0);
794        let response = response.to_owned();
795        async move { inner.execute_confirmed_erased(&response).await }
796    }
797
798    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
799        self.0.tool_definitions_erased()
800    }
801
802    fn execute_tool_call(
803        &self,
804        call: &ToolCall,
805    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
806        let inner = std::sync::Arc::clone(&self.0);
807        let call = call.clone();
808        async move { inner.execute_tool_call_erased(&call).await }
809    }
810
811    fn execute_tool_call_confirmed(
812        &self,
813        call: &ToolCall,
814    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
815        let inner = std::sync::Arc::clone(&self.0);
816        let call = call.clone();
817        async move { inner.execute_tool_call_confirmed_erased(&call).await }
818    }
819
820    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
821        ErasedToolExecutor::set_skill_env(self.0.as_ref(), env);
822    }
823
824    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
825        ErasedToolExecutor::set_effective_trust(self.0.as_ref(), level);
826    }
827
828    fn is_tool_retryable(&self, tool_id: &str) -> bool {
829        self.0.is_tool_retryable_erased(tool_id)
830    }
831
832    fn is_tool_speculatable(&self, tool_id: &str) -> bool {
833        self.0.is_tool_speculatable_erased(tool_id)
834    }
835}
836
837/// Extract fenced code blocks with the given language marker from text.
838///
839/// Searches for `` ```{lang} `` … `` ``` `` pairs, returning trimmed content.
840#[must_use]
841pub fn extract_fenced_blocks<'a>(text: &'a str, lang: &str) -> Vec<&'a str> {
842    let marker = format!("```{lang}");
843    let marker_len = marker.len();
844    let mut blocks = Vec::new();
845    let mut rest = text;
846
847    let mut search_from = 0;
848    while let Some(rel) = rest[search_from..].find(&marker) {
849        let start = search_from + rel;
850        let after = &rest[start + marker_len..];
851        // Word-boundary check: the character immediately after the marker must be
852        // whitespace, end-of-string, or a non-word character (not alphanumeric / _ / -).
853        // This prevents "```bash" from matching "```bashrc".
854        let boundary_ok = after
855            .chars()
856            .next()
857            .is_none_or(|c| !c.is_alphanumeric() && c != '_' && c != '-');
858        if !boundary_ok {
859            search_from = start + marker_len;
860            continue;
861        }
862        if let Some(end) = after.find("```") {
863            blocks.push(after[..end].trim());
864            rest = &after[end + 3..];
865            search_from = 0;
866        } else {
867            break;
868        }
869    }
870
871    blocks
872}
873
874#[cfg(test)]
875mod tests {
876    use super::*;
877
878    #[test]
879    fn tool_output_display() {
880        let output = ToolOutput {
881            tool_name: ToolName::new("bash"),
882            summary: "$ echo hello\nhello".to_owned(),
883            blocks_executed: 1,
884            filter_stats: None,
885            diff: None,
886            streamed: false,
887            terminal_id: None,
888            locations: None,
889            raw_response: None,
890            claim_source: None,
891        };
892        assert_eq!(output.to_string(), "$ echo hello\nhello");
893    }
894
895    #[test]
896    fn tool_error_blocked_display() {
897        let err = ToolError::Blocked {
898            command: "rm -rf /".to_owned(),
899        };
900        assert_eq!(err.to_string(), "command blocked by policy: rm -rf /");
901    }
902
903    #[test]
904    fn tool_error_sandbox_violation_display() {
905        let err = ToolError::SandboxViolation {
906            path: "/etc/shadow".to_owned(),
907        };
908        assert_eq!(err.to_string(), "path not allowed by sandbox: /etc/shadow");
909    }
910
911    #[test]
912    fn tool_error_confirmation_required_display() {
913        let err = ToolError::ConfirmationRequired {
914            command: "rm -rf /tmp".to_owned(),
915        };
916        assert_eq!(
917            err.to_string(),
918            "command requires confirmation: rm -rf /tmp"
919        );
920    }
921
922    #[test]
923    fn tool_error_timeout_display() {
924        let err = ToolError::Timeout { timeout_secs: 30 };
925        assert_eq!(err.to_string(), "command timed out after 30s");
926    }
927
928    #[test]
929    fn tool_error_invalid_params_display() {
930        let err = ToolError::InvalidParams {
931            message: "missing field `command`".to_owned(),
932        };
933        assert_eq!(
934            err.to_string(),
935            "invalid tool parameters: missing field `command`"
936        );
937    }
938
939    #[test]
940    fn deserialize_params_valid() {
941        #[derive(Debug, serde::Deserialize, PartialEq)]
942        struct P {
943            name: String,
944            count: u32,
945        }
946        let mut map = serde_json::Map::new();
947        map.insert("name".to_owned(), serde_json::json!("test"));
948        map.insert("count".to_owned(), serde_json::json!(42));
949        let p: P = deserialize_params(&map).unwrap();
950        assert_eq!(
951            p,
952            P {
953                name: "test".to_owned(),
954                count: 42
955            }
956        );
957    }
958
959    #[test]
960    fn deserialize_params_missing_required_field() {
961        #[derive(Debug, serde::Deserialize)]
962        #[allow(dead_code)]
963        struct P {
964            name: String,
965        }
966        let map = serde_json::Map::new();
967        let err = deserialize_params::<P>(&map).unwrap_err();
968        assert!(matches!(err, ToolError::InvalidParams { .. }));
969    }
970
971    #[test]
972    fn deserialize_params_wrong_type() {
973        #[derive(Debug, serde::Deserialize)]
974        #[allow(dead_code)]
975        struct P {
976            count: u32,
977        }
978        let mut map = serde_json::Map::new();
979        map.insert("count".to_owned(), serde_json::json!("not a number"));
980        let err = deserialize_params::<P>(&map).unwrap_err();
981        assert!(matches!(err, ToolError::InvalidParams { .. }));
982    }
983
984    #[test]
985    fn deserialize_params_all_optional_empty() {
986        #[derive(Debug, serde::Deserialize, PartialEq)]
987        struct P {
988            name: Option<String>,
989        }
990        let map = serde_json::Map::new();
991        let p: P = deserialize_params(&map).unwrap();
992        assert_eq!(p, P { name: None });
993    }
994
995    #[test]
996    fn deserialize_params_ignores_extra_fields() {
997        #[derive(Debug, serde::Deserialize, PartialEq)]
998        struct P {
999            name: String,
1000        }
1001        let mut map = serde_json::Map::new();
1002        map.insert("name".to_owned(), serde_json::json!("test"));
1003        map.insert("extra".to_owned(), serde_json::json!(true));
1004        let p: P = deserialize_params(&map).unwrap();
1005        assert_eq!(
1006            p,
1007            P {
1008                name: "test".to_owned()
1009            }
1010        );
1011    }
1012
1013    #[test]
1014    fn tool_error_execution_display() {
1015        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash not found");
1016        let err = ToolError::Execution(io_err);
1017        assert!(err.to_string().starts_with("execution failed:"));
1018        assert!(err.to_string().contains("bash not found"));
1019    }
1020
1021    // ErrorKind classification tests
1022    #[test]
1023    fn error_kind_timeout_is_transient() {
1024        let err = ToolError::Timeout { timeout_secs: 30 };
1025        assert_eq!(err.kind(), ErrorKind::Transient);
1026    }
1027
1028    #[test]
1029    fn error_kind_blocked_is_permanent() {
1030        let err = ToolError::Blocked {
1031            command: "rm -rf /".to_owned(),
1032        };
1033        assert_eq!(err.kind(), ErrorKind::Permanent);
1034    }
1035
1036    #[test]
1037    fn error_kind_sandbox_violation_is_permanent() {
1038        let err = ToolError::SandboxViolation {
1039            path: "/etc/shadow".to_owned(),
1040        };
1041        assert_eq!(err.kind(), ErrorKind::Permanent);
1042    }
1043
1044    #[test]
1045    fn error_kind_cancelled_is_permanent() {
1046        assert_eq!(ToolError::Cancelled.kind(), ErrorKind::Permanent);
1047    }
1048
1049    #[test]
1050    fn error_kind_invalid_params_is_permanent() {
1051        let err = ToolError::InvalidParams {
1052            message: "bad arg".to_owned(),
1053        };
1054        assert_eq!(err.kind(), ErrorKind::Permanent);
1055    }
1056
1057    #[test]
1058    fn error_kind_confirmation_required_is_permanent() {
1059        let err = ToolError::ConfirmationRequired {
1060            command: "rm /tmp/x".to_owned(),
1061        };
1062        assert_eq!(err.kind(), ErrorKind::Permanent);
1063    }
1064
1065    #[test]
1066    fn error_kind_execution_timed_out_is_transient() {
1067        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1068        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1069    }
1070
1071    #[test]
1072    fn error_kind_execution_interrupted_is_transient() {
1073        let io_err = std::io::Error::new(std::io::ErrorKind::Interrupted, "interrupted");
1074        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1075    }
1076
1077    #[test]
1078    fn error_kind_execution_connection_reset_is_transient() {
1079        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
1080        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1081    }
1082
1083    #[test]
1084    fn error_kind_execution_broken_pipe_is_transient() {
1085        let io_err = std::io::Error::new(std::io::ErrorKind::BrokenPipe, "pipe broken");
1086        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1087    }
1088
1089    #[test]
1090    fn error_kind_execution_would_block_is_transient() {
1091        let io_err = std::io::Error::new(std::io::ErrorKind::WouldBlock, "would block");
1092        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1093    }
1094
1095    #[test]
1096    fn error_kind_execution_connection_aborted_is_transient() {
1097        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionAborted, "aborted");
1098        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1099    }
1100
1101    #[test]
1102    fn error_kind_execution_not_found_is_permanent() {
1103        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
1104        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1105    }
1106
1107    #[test]
1108    fn error_kind_execution_permission_denied_is_permanent() {
1109        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "denied");
1110        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1111    }
1112
1113    #[test]
1114    fn error_kind_execution_other_is_permanent() {
1115        let io_err = std::io::Error::other("some other error");
1116        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1117    }
1118
1119    #[test]
1120    fn error_kind_execution_already_exists_is_permanent() {
1121        let io_err = std::io::Error::new(std::io::ErrorKind::AlreadyExists, "exists");
1122        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1123    }
1124
1125    #[test]
1126    fn error_kind_display() {
1127        assert_eq!(ErrorKind::Transient.to_string(), "transient");
1128        assert_eq!(ErrorKind::Permanent.to_string(), "permanent");
1129    }
1130
1131    #[test]
1132    fn truncate_tool_output_short_passthrough() {
1133        let short = "hello world";
1134        assert_eq!(truncate_tool_output(short), short);
1135    }
1136
1137    #[test]
1138    fn truncate_tool_output_exact_limit() {
1139        let exact = "a".repeat(MAX_TOOL_OUTPUT_CHARS);
1140        assert_eq!(truncate_tool_output(&exact), exact);
1141    }
1142
1143    #[test]
1144    fn truncate_tool_output_long_split() {
1145        let long = "x".repeat(MAX_TOOL_OUTPUT_CHARS + 1000);
1146        let result = truncate_tool_output(&long);
1147        assert!(result.contains("truncated"));
1148        assert!(result.len() < long.len());
1149    }
1150
1151    #[test]
1152    fn truncate_tool_output_notice_contains_count() {
1153        let long = "y".repeat(MAX_TOOL_OUTPUT_CHARS + 2000);
1154        let result = truncate_tool_output(&long);
1155        assert!(result.contains("truncated"));
1156        assert!(result.contains("chars"));
1157    }
1158
1159    #[derive(Debug)]
1160    struct DefaultExecutor;
1161    impl ToolExecutor for DefaultExecutor {
1162        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1163            Ok(None)
1164        }
1165    }
1166
1167    #[tokio::test]
1168    async fn execute_tool_call_default_returns_none() {
1169        let exec = DefaultExecutor;
1170        let call = ToolCall {
1171            tool_id: ToolName::new("anything"),
1172            params: serde_json::Map::new(),
1173            caller_id: None,
1174        };
1175        let result = exec.execute_tool_call(&call).await.unwrap();
1176        assert!(result.is_none());
1177    }
1178
1179    #[test]
1180    fn filter_stats_savings_pct() {
1181        let fs = FilterStats {
1182            raw_chars: 1000,
1183            filtered_chars: 200,
1184            ..Default::default()
1185        };
1186        assert!((fs.savings_pct() - 80.0).abs() < 0.01);
1187    }
1188
1189    #[test]
1190    fn filter_stats_savings_pct_zero() {
1191        let fs = FilterStats::default();
1192        assert!((fs.savings_pct()).abs() < 0.01);
1193    }
1194
1195    #[test]
1196    fn filter_stats_estimated_tokens_saved() {
1197        let fs = FilterStats {
1198            raw_chars: 1000,
1199            filtered_chars: 200,
1200            ..Default::default()
1201        };
1202        assert_eq!(fs.estimated_tokens_saved(), 200); // (1000 - 200) / 4
1203    }
1204
1205    #[test]
1206    fn filter_stats_format_inline() {
1207        let fs = FilterStats {
1208            raw_chars: 1000,
1209            filtered_chars: 200,
1210            raw_lines: 342,
1211            filtered_lines: 28,
1212            ..Default::default()
1213        };
1214        let line = fs.format_inline("shell");
1215        assert_eq!(line, "[shell] 342 lines \u{2192} 28 lines, 80.0% filtered");
1216    }
1217
1218    #[test]
1219    fn filter_stats_format_inline_zero() {
1220        let fs = FilterStats::default();
1221        let line = fs.format_inline("bash");
1222        assert_eq!(line, "[bash] 0 lines \u{2192} 0 lines, 0.0% filtered");
1223    }
1224
1225    // DynExecutor tests
1226
1227    struct FixedExecutor {
1228        tool_id: &'static str,
1229        output: &'static str,
1230    }
1231
1232    impl ToolExecutor for FixedExecutor {
1233        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1234            Ok(Some(ToolOutput {
1235                tool_name: ToolName::new(self.tool_id),
1236                summary: self.output.to_owned(),
1237                blocks_executed: 1,
1238                filter_stats: None,
1239                diff: None,
1240                streamed: false,
1241                terminal_id: None,
1242                locations: None,
1243                raw_response: None,
1244                claim_source: None,
1245            }))
1246        }
1247
1248        fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1249            vec![]
1250        }
1251
1252        async fn execute_tool_call(
1253            &self,
1254            _call: &ToolCall,
1255        ) -> Result<Option<ToolOutput>, ToolError> {
1256            Ok(Some(ToolOutput {
1257                tool_name: ToolName::new(self.tool_id),
1258                summary: self.output.to_owned(),
1259                blocks_executed: 1,
1260                filter_stats: None,
1261                diff: None,
1262                streamed: false,
1263                terminal_id: None,
1264                locations: None,
1265                raw_response: None,
1266                claim_source: None,
1267            }))
1268        }
1269    }
1270
1271    #[tokio::test]
1272    async fn dyn_executor_execute_delegates() {
1273        let inner = std::sync::Arc::new(FixedExecutor {
1274            tool_id: "bash",
1275            output: "hello",
1276        });
1277        let exec = DynExecutor(inner);
1278        let result = exec.execute("```bash\necho hello\n```").await.unwrap();
1279        assert!(result.is_some());
1280        assert_eq!(result.unwrap().summary, "hello");
1281    }
1282
1283    #[tokio::test]
1284    async fn dyn_executor_execute_confirmed_delegates() {
1285        let inner = std::sync::Arc::new(FixedExecutor {
1286            tool_id: "bash",
1287            output: "confirmed",
1288        });
1289        let exec = DynExecutor(inner);
1290        let result = exec.execute_confirmed("...").await.unwrap();
1291        assert!(result.is_some());
1292        assert_eq!(result.unwrap().summary, "confirmed");
1293    }
1294
1295    #[test]
1296    fn dyn_executor_tool_definitions_delegates() {
1297        let inner = std::sync::Arc::new(FixedExecutor {
1298            tool_id: "my_tool",
1299            output: "",
1300        });
1301        let exec = DynExecutor(inner);
1302        // FixedExecutor returns empty definitions; verify delegation occurs without panic.
1303        let defs = exec.tool_definitions();
1304        assert!(defs.is_empty());
1305    }
1306
1307    #[tokio::test]
1308    async fn dyn_executor_execute_tool_call_delegates() {
1309        let inner = std::sync::Arc::new(FixedExecutor {
1310            tool_id: "bash",
1311            output: "tool_call_result",
1312        });
1313        let exec = DynExecutor(inner);
1314        let call = ToolCall {
1315            tool_id: ToolName::new("bash"),
1316            params: serde_json::Map::new(),
1317            caller_id: None,
1318        };
1319        let result = exec.execute_tool_call(&call).await.unwrap();
1320        assert!(result.is_some());
1321        assert_eq!(result.unwrap().summary, "tool_call_result");
1322    }
1323
1324    #[test]
1325    fn dyn_executor_set_effective_trust_delegates() {
1326        use std::sync::atomic::{AtomicU8, Ordering};
1327
1328        struct TrustCapture(AtomicU8);
1329        impl ToolExecutor for TrustCapture {
1330            async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1331                Ok(None)
1332            }
1333            fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
1334                // encode: Trusted=0, Verified=1, Quarantined=2, Blocked=3
1335                let v = match level {
1336                    crate::SkillTrustLevel::Trusted => 0u8,
1337                    crate::SkillTrustLevel::Verified => 1,
1338                    crate::SkillTrustLevel::Quarantined => 2,
1339                    crate::SkillTrustLevel::Blocked => 3,
1340                };
1341                self.0.store(v, Ordering::Relaxed);
1342            }
1343        }
1344
1345        let inner = std::sync::Arc::new(TrustCapture(AtomicU8::new(0)));
1346        let exec =
1347            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1348        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Quarantined);
1349        assert_eq!(inner.0.load(Ordering::Relaxed), 2);
1350
1351        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Blocked);
1352        assert_eq!(inner.0.load(Ordering::Relaxed), 3);
1353    }
1354
1355    #[test]
1356    fn extract_fenced_blocks_no_prefix_match() {
1357        // ```bashrc must NOT match when searching for "bash"
1358        assert!(extract_fenced_blocks("```bashrc\nfoo\n```", "bash").is_empty());
1359        // exact match
1360        assert_eq!(
1361            extract_fenced_blocks("```bash\nfoo\n```", "bash"),
1362            vec!["foo"]
1363        );
1364        // trailing space is fine
1365        assert_eq!(
1366            extract_fenced_blocks("```bash \nfoo\n```", "bash"),
1367            vec!["foo"]
1368        );
1369    }
1370
1371    // ── ToolError::category() delegation tests ────────────────────────────────
1372
1373    #[test]
1374    fn tool_error_http_400_category_is_invalid_parameters() {
1375        use crate::error_taxonomy::ToolErrorCategory;
1376        let err = ToolError::Http {
1377            status: 400,
1378            message: "bad request".to_owned(),
1379        };
1380        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1381    }
1382
1383    #[test]
1384    fn tool_error_http_401_category_is_policy_blocked() {
1385        use crate::error_taxonomy::ToolErrorCategory;
1386        let err = ToolError::Http {
1387            status: 401,
1388            message: "unauthorized".to_owned(),
1389        };
1390        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1391    }
1392
1393    #[test]
1394    fn tool_error_http_403_category_is_policy_blocked() {
1395        use crate::error_taxonomy::ToolErrorCategory;
1396        let err = ToolError::Http {
1397            status: 403,
1398            message: "forbidden".to_owned(),
1399        };
1400        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1401    }
1402
1403    #[test]
1404    fn tool_error_http_404_category_is_permanent_failure() {
1405        use crate::error_taxonomy::ToolErrorCategory;
1406        let err = ToolError::Http {
1407            status: 404,
1408            message: "not found".to_owned(),
1409        };
1410        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1411    }
1412
1413    #[test]
1414    fn tool_error_http_429_category_is_rate_limited() {
1415        use crate::error_taxonomy::ToolErrorCategory;
1416        let err = ToolError::Http {
1417            status: 429,
1418            message: "too many requests".to_owned(),
1419        };
1420        assert_eq!(err.category(), ToolErrorCategory::RateLimited);
1421    }
1422
1423    #[test]
1424    fn tool_error_http_500_category_is_server_error() {
1425        use crate::error_taxonomy::ToolErrorCategory;
1426        let err = ToolError::Http {
1427            status: 500,
1428            message: "internal server error".to_owned(),
1429        };
1430        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1431    }
1432
1433    #[test]
1434    fn tool_error_http_502_category_is_server_error() {
1435        use crate::error_taxonomy::ToolErrorCategory;
1436        let err = ToolError::Http {
1437            status: 502,
1438            message: "bad gateway".to_owned(),
1439        };
1440        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1441    }
1442
1443    #[test]
1444    fn tool_error_http_503_category_is_server_error() {
1445        use crate::error_taxonomy::ToolErrorCategory;
1446        let err = ToolError::Http {
1447            status: 503,
1448            message: "service unavailable".to_owned(),
1449        };
1450        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1451    }
1452
1453    #[test]
1454    fn tool_error_http_503_is_transient_triggers_phase2_retry() {
1455        // Phase 2 retry fires when err.kind() == ErrorKind::Transient.
1456        // Verify the full chain: Http{503} -> ServerError -> is_retryable() -> Transient.
1457        let err = ToolError::Http {
1458            status: 503,
1459            message: "service unavailable".to_owned(),
1460        };
1461        assert_eq!(
1462            err.kind(),
1463            ErrorKind::Transient,
1464            "HTTP 503 must be Transient so Phase 2 retry fires"
1465        );
1466    }
1467
1468    #[test]
1469    fn tool_error_blocked_category_is_policy_blocked() {
1470        use crate::error_taxonomy::ToolErrorCategory;
1471        let err = ToolError::Blocked {
1472            command: "rm -rf /".to_owned(),
1473        };
1474        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1475    }
1476
1477    #[test]
1478    fn tool_error_sandbox_violation_category_is_policy_blocked() {
1479        use crate::error_taxonomy::ToolErrorCategory;
1480        let err = ToolError::SandboxViolation {
1481            path: "/etc/shadow".to_owned(),
1482        };
1483        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1484    }
1485
1486    #[test]
1487    fn tool_error_confirmation_required_category() {
1488        use crate::error_taxonomy::ToolErrorCategory;
1489        let err = ToolError::ConfirmationRequired {
1490            command: "rm /tmp/x".to_owned(),
1491        };
1492        assert_eq!(err.category(), ToolErrorCategory::ConfirmationRequired);
1493    }
1494
1495    #[test]
1496    fn tool_error_timeout_category() {
1497        use crate::error_taxonomy::ToolErrorCategory;
1498        let err = ToolError::Timeout { timeout_secs: 30 };
1499        assert_eq!(err.category(), ToolErrorCategory::Timeout);
1500    }
1501
1502    #[test]
1503    fn tool_error_cancelled_category() {
1504        use crate::error_taxonomy::ToolErrorCategory;
1505        assert_eq!(
1506            ToolError::Cancelled.category(),
1507            ToolErrorCategory::Cancelled
1508        );
1509    }
1510
1511    #[test]
1512    fn tool_error_invalid_params_category() {
1513        use crate::error_taxonomy::ToolErrorCategory;
1514        let err = ToolError::InvalidParams {
1515            message: "missing field".to_owned(),
1516        };
1517        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1518    }
1519
1520    // B2 regression: Execution(NotFound) must NOT produce ToolNotFound.
1521    #[test]
1522    fn tool_error_execution_not_found_category_is_permanent_failure() {
1523        use crate::error_taxonomy::ToolErrorCategory;
1524        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash: not found");
1525        let err = ToolError::Execution(io_err);
1526        let cat = err.category();
1527        assert_ne!(
1528            cat,
1529            ToolErrorCategory::ToolNotFound,
1530            "Execution(NotFound) must NOT map to ToolNotFound"
1531        );
1532        assert_eq!(cat, ToolErrorCategory::PermanentFailure);
1533    }
1534
1535    #[test]
1536    fn tool_error_execution_timed_out_category_is_timeout() {
1537        use crate::error_taxonomy::ToolErrorCategory;
1538        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
1539        assert_eq!(
1540            ToolError::Execution(io_err).category(),
1541            ToolErrorCategory::Timeout
1542        );
1543    }
1544
1545    #[test]
1546    fn tool_error_execution_connection_refused_category_is_network_error() {
1547        use crate::error_taxonomy::ToolErrorCategory;
1548        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "refused");
1549        assert_eq!(
1550            ToolError::Execution(io_err).category(),
1551            ToolErrorCategory::NetworkError
1552        );
1553    }
1554
1555    // B4 regression: Http/network/transient categories must NOT be quality failures.
1556    #[test]
1557    fn b4_tool_error_http_429_not_quality_failure() {
1558        let err = ToolError::Http {
1559            status: 429,
1560            message: "rate limited".to_owned(),
1561        };
1562        assert!(
1563            !err.category().is_quality_failure(),
1564            "RateLimited must not be a quality failure"
1565        );
1566    }
1567
1568    #[test]
1569    fn b4_tool_error_http_503_not_quality_failure() {
1570        let err = ToolError::Http {
1571            status: 503,
1572            message: "service unavailable".to_owned(),
1573        };
1574        assert!(
1575            !err.category().is_quality_failure(),
1576            "ServerError must not be a quality failure"
1577        );
1578    }
1579
1580    #[test]
1581    fn b4_tool_error_execution_timed_out_not_quality_failure() {
1582        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1583        assert!(
1584            !ToolError::Execution(io_err).category().is_quality_failure(),
1585            "Timeout must not be a quality failure"
1586        );
1587    }
1588
1589    // ── ToolError::Shell category tests ──────────────────────────────────────
1590
1591    #[test]
1592    fn tool_error_shell_exit126_is_policy_blocked() {
1593        use crate::error_taxonomy::ToolErrorCategory;
1594        let err = ToolError::Shell {
1595            exit_code: 126,
1596            category: ToolErrorCategory::PolicyBlocked,
1597            message: "permission denied".to_owned(),
1598        };
1599        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1600    }
1601
1602    #[test]
1603    fn tool_error_shell_exit127_is_permanent_failure() {
1604        use crate::error_taxonomy::ToolErrorCategory;
1605        let err = ToolError::Shell {
1606            exit_code: 127,
1607            category: ToolErrorCategory::PermanentFailure,
1608            message: "command not found".to_owned(),
1609        };
1610        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1611        assert!(!err.category().is_retryable());
1612    }
1613
1614    #[test]
1615    fn tool_error_shell_not_quality_failure() {
1616        use crate::error_taxonomy::ToolErrorCategory;
1617        let err = ToolError::Shell {
1618            exit_code: 127,
1619            category: ToolErrorCategory::PermanentFailure,
1620            message: "command not found".to_owned(),
1621        };
1622        // Shell exit errors are not attributable to LLM output quality.
1623        assert!(!err.category().is_quality_failure());
1624    }
1625}
zeph_tools/executor.rs

zeph_tools/
executor.rs