Skip to main content

zeph_tools/
executor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use zeph_common::ToolName;
7
8use crate::shell::background::RunId;
9
10/// Data for rendering file diffs in the TUI.
11///
12/// Produced by [`ShellExecutor`](crate::ShellExecutor) and [`FileExecutor`](crate::FileExecutor)
13/// when a tool call modifies a tracked file. The TUI uses this to display a side-by-side diff.
14#[derive(Debug, Clone)]
15pub struct DiffData {
16    /// Relative or absolute path to the file that was modified.
17    pub file_path: String,
18    /// File content before the tool executed.
19    pub old_content: String,
20    /// File content after the tool executed.
21    pub new_content: String,
22}
23
24/// Structured tool invocation from LLM.
25///
26/// Produced by the agent loop when the LLM emits a structured tool call (as opposed to
27/// a legacy fenced code block). Dispatched to [`ToolExecutor::execute_tool_call`].
28///
29/// # Example
30///
31/// ```rust
32/// use zeph_tools::{ToolCall, ExecutionContext};
33/// use zeph_common::ToolName;
34///
35/// let call = ToolCall {
36///     tool_id: ToolName::new("bash"),
37///     params: {
38///         let mut m = serde_json::Map::new();
39///         m.insert("command".to_owned(), serde_json::Value::String("echo hello".to_owned()));
40///         m
41///     },
42///     caller_id: Some("user-42".to_owned()),
43///     context: Some(ExecutionContext::new().with_name("repo")),
44///     tool_call_id: String::new(),
45///     skill_name: None,
46/// };
47/// assert_eq!(call.tool_id, "bash");
48/// ```
49#[derive(Debug, Clone, Default)]
50pub struct ToolCall {
51    /// The tool identifier, matching a value from [`ToolExecutor::tool_definitions`].
52    pub tool_id: ToolName,
53    /// JSON parameters for the tool call, deserialized into the tool's parameter struct.
54    pub params: serde_json::Map<String, serde_json::Value>,
55    /// Opaque caller identifier propagated from the channel (user ID, session ID, etc.).
56    /// `None` for system-initiated calls (scheduler, self-learning, internal).
57    pub caller_id: Option<String>,
58    /// Per-turn execution environment. `None` means use the executor default (process CWD
59    /// and inherited env), which is identical to the behaviour before this field existed.
60    pub context: Option<crate::ExecutionContext>,
61    /// Opaque tool call ID used to correlate [`ToolEvent::OutputChunk`] events with
62    /// their originating tool call in the TUI. Empty when not set by the agent loop.
63    pub tool_call_id: String,
64    /// Names of skills active in the turn that issued this tool call (turn-level attribution).
65    ///
66    /// This is a best-effort, turn-scoped field: it lists the skills injected into the
67    /// system prompt for the current turn, not the specific skill that caused this individual
68    /// call (the LLM does not report per-call causation). `None` for system-initiated or
69    /// internal tool calls that execute outside the skill-augmented agent loop.
70    pub skill_name: Option<Vec<String>>,
71}
72
73/// Cumulative filter statistics for a single tool execution.
74///
75/// Populated by [`ShellExecutor`](crate::ShellExecutor) when output filters are configured.
76/// Displayed in the TUI to show how much output was compacted before being sent to the LLM.
77#[derive(Debug, Clone, Default)]
78pub struct FilterStats {
79    /// Raw character count before filtering.
80    pub raw_chars: usize,
81    /// Character count after filtering.
82    pub filtered_chars: usize,
83    /// Raw line count before filtering.
84    pub raw_lines: usize,
85    /// Line count after filtering.
86    pub filtered_lines: usize,
87    /// Worst-case confidence across all applied filters.
88    pub confidence: Option<crate::FilterConfidence>,
89    /// The shell command that produced this output, for display purposes.
90    pub command: Option<String>,
91    /// Zero-based line indices that were kept after filtering.
92    pub kept_lines: Vec<usize>,
93}
94
95impl FilterStats {
96    /// Returns the percentage of characters removed by filtering.
97    ///
98    /// Returns `0.0` when there was no raw output to filter.
99    #[must_use]
100    #[allow(clippy::cast_precision_loss)]
101    pub fn savings_pct(&self) -> f64 {
102        if self.raw_chars == 0 {
103            return 0.0;
104        }
105        (1.0 - self.filtered_chars as f64 / self.raw_chars as f64) * 100.0
106    }
107
108    /// Estimates the number of LLM tokens saved by filtering.
109    ///
110    /// Uses the 4-chars-per-token approximation. Suitable for logging and metrics,
111    /// not for billing or exact budget calculations.
112    #[must_use]
113    pub fn estimated_tokens_saved(&self) -> usize {
114        self.raw_chars.saturating_sub(self.filtered_chars) / 4
115    }
116
117    /// Formats a one-line filter summary for log messages and TUI status.
118    ///
119    /// # Example
120    ///
121    /// ```rust
122    /// use zeph_tools::FilterStats;
123    ///
124    /// let stats = FilterStats {
125    ///     raw_chars: 1000,
126    ///     filtered_chars: 400,
127    ///     raw_lines: 50,
128    ///     filtered_lines: 20,
129    ///     command: Some("cargo build".to_owned()),
130    ///     ..Default::default()
131    /// };
132    /// let summary = stats.format_inline("shell");
133    /// assert!(summary.contains("60.0% filtered"));
134    /// ```
135    #[must_use]
136    pub fn format_inline(&self, tool_name: &str) -> String {
137        let cmd_label = self
138            .command
139            .as_deref()
140            .map(|c| {
141                let trimmed = c.trim();
142                if trimmed.len() > 60 {
143                    format!(" `{}…`", &trimmed[..57])
144                } else {
145                    format!(" `{trimmed}`")
146                }
147            })
148            .unwrap_or_default();
149        format!(
150            "[{tool_name}]{cmd_label} {} lines \u{2192} {} lines, {:.1}% filtered",
151            self.raw_lines,
152            self.filtered_lines,
153            self.savings_pct()
154        )
155    }
156}
157
158/// Provenance of a tool execution result.
159///
160/// Set by each executor at `ToolOutput` construction time. Used by the sanitizer bridge
161/// in `zeph-core` to select the appropriate `ContentSourceKind` and trust level.
162/// `None` means the source is unspecified (pass-through code, mocks, tests).
163#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
164#[serde(rename_all = "snake_case")]
165#[non_exhaustive]
166pub enum ClaimSource {
167    /// Local shell command execution.
168    Shell,
169    /// Local file system read/write.
170    FileSystem,
171    /// HTTP web scrape.
172    WebScrape,
173    /// MCP server tool response.
174    Mcp,
175    /// A2A agent message.
176    A2a,
177    /// Code search (LSP or semantic).
178    CodeSearch,
179    /// Agent diagnostics (internal).
180    Diagnostics,
181    /// Memory retrieval (semantic search).
182    Memory,
183    /// Telegram moderation action (reaction deletion).
184    Moderation,
185}
186
187/// Structured result from tool execution.
188///
189/// Returned by every [`ToolExecutor`] implementation on success. The agent loop uses
190/// [`ToolOutput::summary`] as the tool result text injected into the LLM context.
191///
192/// # Example
193///
194/// ```rust
195/// use zeph_tools::{ToolOutput, executor::ClaimSource};
196/// use zeph_common::ToolName;
197///
198/// let output = ToolOutput {
199///     tool_name: ToolName::new("shell"),
200///     summary: "hello\n".to_owned(),
201///     blocks_executed: 1,
202///     filter_stats: None,
203///     diff: None,
204///     streamed: false,
205///     terminal_id: None,
206///     locations: None,
207///     raw_response: None,
208///     claim_source: Some(ClaimSource::Shell),
209/// };
210/// assert_eq!(output.to_string(), "hello\n");
211/// ```
212#[derive(Debug, Clone)]
213pub struct ToolOutput {
214    /// Name of the tool that produced this output (e.g. `"shell"`, `"web-scrape"`).
215    pub tool_name: ToolName,
216    /// Human-readable result text injected into the LLM context.
217    pub summary: String,
218    /// Number of code blocks processed in this invocation.
219    pub blocks_executed: u32,
220    /// Output filter statistics when filtering was applied, `None` otherwise.
221    pub filter_stats: Option<FilterStats>,
222    /// File diff data for TUI display when the tool modified a tracked file.
223    pub diff: Option<DiffData>,
224    /// Whether this tool already streamed its output via `ToolEvent` channel.
225    pub streamed: bool,
226    /// Terminal ID when the tool was executed via IDE terminal (ACP terminal/* protocol).
227    pub terminal_id: Option<String>,
228    /// File paths touched by this tool call, for IDE follow-along (e.g. `ToolCallLocation`).
229    pub locations: Option<Vec<String>>,
230    /// Structured tool response payload for ACP intermediate `tool_call_update` notifications.
231    pub raw_response: Option<serde_json::Value>,
232    /// Provenance of this tool result. Set by the executor at construction time.
233    /// `None` in pass-through wrappers, mocks, and tests.
234    pub claim_source: Option<ClaimSource>,
235}
236
237impl fmt::Display for ToolOutput {
238    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
239        f.write_str(&self.summary)
240    }
241}
242
243/// Maximum characters of tool output injected into the LLM context without truncation.
244///
245/// Output that exceeds this limit is split into a head and tail via [`truncate_tool_output`]
246/// to keep both the beginning and end of large command outputs.
247pub const MAX_TOOL_OUTPUT_CHARS: usize = 30_000;
248
249/// Truncate tool output that exceeds [`MAX_TOOL_OUTPUT_CHARS`] using a head+tail split.
250///
251/// Equivalent to `truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)`.
252///
253/// # Example
254///
255/// ```rust
256/// use zeph_tools::executor::truncate_tool_output;
257///
258/// let short = "hello world";
259/// assert_eq!(truncate_tool_output(short), short);
260/// ```
261#[must_use]
262pub fn truncate_tool_output(output: &str) -> String {
263    truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)
264}
265
266/// Truncate tool output that exceeds `max_chars` using a head+tail split.
267///
268/// Preserves the first and last `max_chars / 2` characters and inserts a truncation
269/// marker in the middle. Both boundaries are snapped to valid UTF-8 character boundaries.
270///
271/// # Example
272///
273/// ```rust
274/// use zeph_tools::executor::truncate_tool_output_at;
275///
276/// let long = "a".repeat(200);
277/// let truncated = truncate_tool_output_at(&long, 100);
278/// assert!(truncated.contains("truncated"));
279/// assert!(truncated.len() < long.len());
280/// ```
281#[must_use]
282pub fn truncate_tool_output_at(output: &str, max_chars: usize) -> String {
283    if output.len() <= max_chars {
284        return output.to_string();
285    }
286
287    let half = max_chars / 2;
288    let head_end = output.floor_char_boundary(half);
289    let tail_start = output.ceil_char_boundary(output.len() - half);
290    let head = &output[..head_end];
291    let tail = &output[tail_start..];
292    let truncated = output.len() - head_end - (output.len() - tail_start);
293
294    format!(
295        "{head}\n\n... [truncated {truncated} chars, showing first and last ~{half} chars] ...\n\n{tail}"
296    )
297}
298
299/// Event emitted during tool execution for real-time UI updates.
300///
301/// Sent over the [`ToolEventTx`] channel to the TUI or channel adapter.
302/// Each event variant corresponds to a phase in the tool execution lifecycle.
303#[derive(Debug, Clone)]
304#[non_exhaustive]
305pub enum ToolEvent {
306    /// The tool has started. Displayed in the TUI as a spinner with the command text.
307    Started {
308        tool_name: ToolName,
309        command: String,
310        /// Active sandbox profile, if any. `None` when sandbox is disabled.
311        sandbox_profile: Option<String>,
312        /// Canonical absolute working directory the command will run in.
313        /// `None` for executors that do not resolve a per-turn CWD.
314        resolved_cwd: Option<String>,
315        /// Name of the resolved execution environment (from `[[execution.environments]]`),
316        /// or `None` when no named environment was selected.
317        execution_env: Option<String>,
318    },
319    /// A chunk of streaming output was produced (e.g. from a long-running command).
320    OutputChunk {
321        tool_name: ToolName,
322        command: String,
323        chunk: String,
324        /// Opaque tool call ID matching the corresponding [`ToolEvent::Started`] event.
325        /// Empty string when the executor does not have access to the call ID.
326        tool_call_id: String,
327        /// Skills active in the turn that triggered this tool call (turn-level attribution).
328        skill_name: Option<Vec<String>>,
329    },
330    /// The tool finished. Contains the full output and optional filter/diff data.
331    Completed {
332        tool_name: ToolName,
333        command: String,
334        /// Full output text (possibly filtered and truncated).
335        output: String,
336        /// `true` when the tool exited successfully, `false` on error.
337        success: bool,
338        filter_stats: Option<FilterStats>,
339        diff: Option<DiffData>,
340        /// Set when this completion belongs to a background run. `None` for blocking runs.
341        run_id: Option<RunId>,
342    },
343    /// A transactional rollback was performed, restoring or deleting files.
344    Rollback {
345        tool_name: ToolName,
346        command: String,
347        /// Number of files restored to their pre-execution content.
348        restored_count: usize,
349        /// Number of files that did not exist before execution and were deleted.
350        deleted_count: usize,
351    },
352}
353
354/// Sender half of the bounded channel used to stream [`ToolEvent`]s to the UI.
355///
356/// Capacity is 1024 slots. Streaming variants (`OutputChunk`, `Started`) use
357/// `try_send` and drop on full; terminal variants (`Completed`, `Rollback`) use
358/// `send().await` to guarantee delivery.
359///
360/// Created via [`tokio::sync::mpsc::channel`] with capacity `TOOL_EVENT_CHANNEL_CAP`.
361pub type ToolEventTx = tokio::sync::mpsc::Sender<ToolEvent>;
362
363/// Receiver half matching [`ToolEventTx`].
364pub type ToolEventRx = tokio::sync::mpsc::Receiver<ToolEvent>;
365
366/// Bounded capacity for the tool-event channel.
367pub const TOOL_EVENT_CHANNEL_CAP: usize = 1024;
368
369/// Classifies a tool error as transient (retryable) or permanent (abort immediately).
370///
371/// Transient errors may succeed on retry (network blips, race conditions).
372/// Permanent errors will not succeed regardless of retries (policy, bad args, not found).
373#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
374#[non_exhaustive]
375pub enum ErrorKind {
376    Transient,
377    Permanent,
378}
379
380impl std::fmt::Display for ErrorKind {
381    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
382        match self {
383            Self::Transient => f.write_str("transient"),
384            Self::Permanent => f.write_str("permanent"),
385        }
386    }
387}
388
389#[non_exhaustive]
390/// Errors that can occur during tool execution.
391#[derive(Debug, thiserror::Error)]
392pub enum ToolError {
393    #[error("command blocked by policy: {command}")]
394    Blocked { command: String },
395
396    /// Command was blocked and a safer alternative is available.
397    ///
398    /// Emitted by [`ShellExecutor`](crate::ShellExecutor) when `suggest_fix` returns a
399    /// suggestion. The agent receives both the block reason and the alternative so it can
400    /// self-correct without additional prompting.
401    #[error("command blocked by policy: {command}")]
402    BlockedWithFix {
403        command: String,
404        suggestion: Option<crate::shell::SafeFixSuggestion>,
405    },
406
407    #[error("path not allowed by sandbox: {path}")]
408    SandboxViolation { path: String },
409
410    #[error("command requires confirmation: {command}")]
411    ConfirmationRequired { command: String },
412
413    #[error("command timed out after {timeout_secs}s")]
414    Timeout { timeout_secs: u64 },
415
416    #[error("operation cancelled")]
417    Cancelled,
418
419    #[error("invalid tool parameters: {message}")]
420    InvalidParams { message: String },
421
422    #[error("execution failed: {0}")]
423    Execution(#[from] std::io::Error),
424
425    /// HTTP or API error with status code for fine-grained classification.
426    ///
427    /// Used by `WebScrapeExecutor` and other HTTP-based tools to preserve the status
428    /// code for taxonomy classification. Scope: HTTP tools only (MCP uses a separate path).
429    #[error("HTTP error {status}: {message}")]
430    Http { status: u16, message: String },
431
432    /// Shell execution error with explicit exit code and pre-classified category.
433    ///
434    /// Used by `ShellExecutor` when the exit code or stderr content maps to a known
435    /// taxonomy category (e.g., exit 126 → `PolicyBlocked`, exit 127 → `PermanentFailure`).
436    /// Preserves the exit code for audit logging and the category for skill evolution.
437    #[error("shell error (exit {exit_code}): {message}")]
438    Shell {
439        exit_code: i32,
440        category: crate::error_taxonomy::ToolErrorCategory,
441        message: String,
442    },
443
444    #[error("snapshot failed: {reason}")]
445    SnapshotFailed { reason: String },
446
447    /// Tool call rejected because the tool id is outside the active capability scope.
448    ///
449    /// Emitted by `ScopedToolExecutor` before any tool side-effect runs.
450    /// The audit log records `error_category = "out_of_scope"`.
451    // LLM isolation: task_type is never shown in the error message (P2-OutOfScope).
452    #[error("tool call denied by policy")]
453    OutOfScope {
454        /// Fully-qualified tool id that was rejected.
455        tool_id: String,
456        /// Active task type at dispatch time, if any.
457        task_type: Option<String>,
458    },
459
460    /// Tool call blocked by `ShadowProbeExecutor` after the LLM safety probe returned Deny.
461    ///
462    /// Emitted before any tool side-effect runs. The probe evaluated the full trajectory
463    /// context and determined the call is unsafe. Reason is LLM-generated; shown to the
464    /// agent loop as the tool result so the model can adapt.
465    #[error("tool call denied by safety probe: {reason}")]
466    SafetyDenied {
467        /// Human-readable explanation from the LLM safety probe.
468        reason: String,
469    },
470
471    /// Tool call blocked by the MAGE `TrajectoryRiskAccumulator` (spec 004-16).
472    ///
473    /// Cumulative session risk exceeded `risk_threshold`. The agent loop receives the
474    /// score and the top contributing signals so it can explain the denial to the user.
475    #[error("tool call blocked: trajectory risk {score:.3} exceeds threshold")]
476    TrajectoryRiskExceeded {
477        /// Current `trajectory_risk` value at the time of the block.
478        score: f64,
479        /// Human-readable labels for the top contributing signals (up to 3).
480        top_signals: Vec<String>,
481    },
482}
483
484impl ToolError {
485    /// Fine-grained error classification using the 12-category taxonomy.
486    ///
487    /// Prefer `category()` over `kind()` for new code. `kind()` is preserved for
488    /// backward compatibility and delegates to `category().error_kind()`.
489    #[must_use]
490    pub fn category(&self) -> crate::error_taxonomy::ToolErrorCategory {
491        use crate::error_taxonomy::{ToolErrorCategory, classify_http_status, classify_io_error};
492        match self {
493            Self::Blocked { .. } | Self::BlockedWithFix { .. } | Self::SandboxViolation { .. } => {
494                ToolErrorCategory::PolicyBlocked
495            }
496            Self::ConfirmationRequired { .. } => ToolErrorCategory::ConfirmationRequired,
497            Self::Timeout { .. } => ToolErrorCategory::Timeout,
498            Self::Cancelled => ToolErrorCategory::Cancelled,
499            Self::InvalidParams { .. } => ToolErrorCategory::InvalidParameters,
500            Self::Http { status, .. } => classify_http_status(*status),
501            Self::Execution(io_err) => classify_io_error(io_err),
502            Self::Shell { category, .. } => *category,
503            Self::SnapshotFailed { .. } => ToolErrorCategory::PermanentFailure,
504            Self::OutOfScope { .. }
505            | Self::SafetyDenied { .. }
506            | Self::TrajectoryRiskExceeded { .. } => ToolErrorCategory::PolicyBlocked,
507        }
508    }
509
510    /// Coarse classification for backward compatibility. Delegates to `category().error_kind()`.
511    ///
512    /// For `Execution(io::Error)`, the classification inspects `io::Error::kind()`:
513    /// - Transient: `TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`,
514    ///   `ConnectionAborted`, `BrokenPipe` — these may succeed on retry.
515    /// - Permanent: `NotFound`, `PermissionDenied`, `AlreadyExists`, and all other
516    ///   I/O error kinds — retrying would waste time with no benefit.
517    #[must_use]
518    pub fn kind(&self) -> ErrorKind {
519        use crate::error_taxonomy::ToolErrorCategoryExt;
520        self.category().error_kind()
521    }
522}
523
524/// Deserialize tool call params from a `serde_json::Map<String, Value>` into a typed struct.
525///
526/// # Errors
527///
528/// Returns `ToolError::InvalidParams` when deserialization fails.
529pub fn deserialize_params<T: serde::de::DeserializeOwned>(
530    params: &serde_json::Map<String, serde_json::Value>,
531) -> Result<T, ToolError> {
532    let obj = serde_json::Value::Object(params.clone());
533    serde_json::from_value(obj).map_err(|e| ToolError::InvalidParams {
534        message: e.to_string(),
535    })
536}
537
538/// Async trait for tool execution backends.
539///
540/// Implementations include [`ShellExecutor`](crate::ShellExecutor),
541/// [`WebScrapeExecutor`](crate::WebScrapeExecutor), [`CompositeExecutor`](crate::CompositeExecutor),
542/// and [`FileExecutor`](crate::FileExecutor).
543///
544/// # Contract
545///
546/// - [`execute`](ToolExecutor::execute) and [`execute_tool_call`](ToolExecutor::execute_tool_call)
547///   return `Ok(None)` when the executor does not handle the given input — callers must not
548///   treat `None` as an error.
549/// - All methods must be `Send + Sync` and free of blocking I/O.
550/// - Implementations must enforce their own security controls (blocklists, sandboxes, SSRF
551///   protection) before executing any side-effectful operation.
552/// - [`execute_confirmed`](ToolExecutor::execute_confirmed) and
553///   [`execute_tool_call_confirmed`](ToolExecutor::execute_tool_call_confirmed) bypass
554///   confirmation gates only — all other security controls remain active.
555///
556/// # Two Invocation Paths
557///
558/// **Legacy fenced blocks**: The agent loop passes the raw LLM response string to [`execute`](ToolExecutor::execute).
559/// The executor parses ` ```bash ` or ` ```scrape ` blocks and executes each one.
560///
561/// **Structured tool calls**: The agent loop constructs a [`ToolCall`] from the LLM's
562/// JSON tool-use response and dispatches it via [`execute_tool_call`](ToolExecutor::execute_tool_call).
563/// This is the preferred path for new code.
564///
565/// # Example
566///
567/// ```rust
568/// use zeph_tools::{ToolExecutor, ToolCall, ToolOutput, ToolError, executor::ClaimSource};
569///
570/// #[derive(Debug)]
571/// struct EchoExecutor;
572///
573/// impl ToolExecutor for EchoExecutor {
574///     async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
575///         Ok(None) // not a fenced-block executor
576///     }
577///
578///     async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
579///         if call.tool_id != "echo" {
580///             return Ok(None);
581///         }
582///         let text = call.params.get("text")
583///             .and_then(|v| v.as_str())
584///             .unwrap_or("")
585///             .to_owned();
586///         Ok(Some(ToolOutput {
587///             tool_name: "echo".into(),
588///             summary: text,
589///             blocks_executed: 1,
590///             filter_stats: None,
591///             diff: None,
592///             streamed: false,
593///             terminal_id: None,
594///             locations: None,
595///             raw_response: None,
596///             claim_source: None,
597///         }))
598///     }
599/// }
600/// ```
601/// # TODO (G3 — deferred: Tower-style tool middleware stack)
602///
603/// Currently, cross-cutting concerns (audit logging, rate limiting, sandboxing, guardrails)
604/// are scattered across individual executor implementations. The planned approach is a
605/// composable middleware stack similar to Tower's `Service` trait:
606///
607/// ```text
608/// AuditLayer::new(RateLimitLayer::new(SandboxLayer::new(ShellExecutor::new())))
609/// ```
610///
611/// **Blocked by:** requires D2 (consolidating `ToolExecutor` + `ErasedToolExecutor` into one
612/// object-safe trait). See critic review §S3 for the tradeoff between RPIT fast-path and
613/// dynamic dispatch overhead before collapsing D2.
614///
615/// # TODO (D2 — deferred: consolidate `ToolExecutor` and `ErasedToolExecutor`)
616///
617/// Having two parallel traits creates duplication and confusion. The blanket impl
618/// `impl<T: ToolExecutor> ErasedToolExecutor for T` works but every new method must be
619/// added to both traits. Use `trait_variant::make` or a single object-safe design.
620///
621/// **Blocked by:** need to benchmark the RPIT fast-path before removing it. See critic §S3.
622pub trait ToolExecutor: Send + Sync {
623    /// Parse `response` for fenced tool blocks and execute them.
624    ///
625    /// Returns `Ok(None)` when no tool blocks are found in `response`.
626    ///
627    /// # Errors
628    ///
629    /// Returns [`ToolError`] when a block is found but execution fails (blocked command,
630    /// sandbox violation, network error, timeout, etc.).
631    fn execute(
632        &self,
633        response: &str,
634    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send;
635
636    /// Execute bypassing confirmation checks (called after user approves).
637    ///
638    /// Security controls other than the confirmation gate remain active. Default
639    /// implementation delegates to [`execute`](ToolExecutor::execute).
640    ///
641    /// # Errors
642    ///
643    /// Returns [`ToolError`] on execution failure.
644    fn execute_confirmed(
645        &self,
646        response: &str,
647    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
648        self.execute(response)
649    }
650
651    /// Return the tool definitions this executor can handle.
652    ///
653    /// Used to populate the LLM's tool schema at context-assembly time.
654    /// Returns an empty `Vec` by default (for executors that only handle fenced blocks).
655    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
656        vec![]
657    }
658
659    /// Execute a structured tool call. Returns `Ok(None)` if `call.tool_id` is not handled.
660    ///
661    /// # Errors
662    ///
663    /// Returns [`ToolError`] when the tool ID is handled but execution fails.
664    fn execute_tool_call(
665        &self,
666        _call: &ToolCall,
667    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
668        std::future::ready(Ok(None))
669    }
670
671    /// Execute a structured tool call bypassing confirmation checks.
672    ///
673    /// Called after the user has explicitly approved the tool invocation.
674    /// Default implementation delegates to [`execute_tool_call`](ToolExecutor::execute_tool_call).
675    ///
676    /// # Errors
677    ///
678    /// Returns [`ToolError`] on execution failure.
679    fn execute_tool_call_confirmed(
680        &self,
681        call: &ToolCall,
682    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
683        self.execute_tool_call(call)
684    }
685
686    /// Inject environment variables for the currently active skill. No-op by default.
687    ///
688    /// Called by the agent loop before each turn when the active skill specifies env vars.
689    /// Implementations that ignore this (e.g. `WebScrapeExecutor`) may leave the default.
690    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
691
692    /// Set the effective trust level for the currently active skill. No-op by default.
693    ///
694    /// Trust level affects which operations are permitted (e.g. network access, file writes).
695    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
696
697    /// Whether the executor can safely retry this tool call on a transient error.
698    ///
699    /// Only idempotent operations (e.g. read-only HTTP GET) should return `true`.
700    /// Shell commands and other non-idempotent operations must keep the default `false`
701    /// to prevent double-execution of side-effectful commands.
702    fn is_tool_retryable(&self, _tool_id: &str) -> bool {
703        false
704    }
705
706    /// Whether a tool call can be safely dispatched speculatively (before the LLM finishes).
707    ///
708    /// Speculative execution requires the tool to be:
709    /// 1. Idempotent — repeated execution with the same args produces the same result.
710    /// 2. Side-effect-free or cheaply reversible.
711    /// 3. Not subject to user confirmation (`needs_confirmation` must be false at call time).
712    ///
713    /// Default: `false` (safe). Override to `true` only for tools that satisfy all three
714    /// properties. The engine additionally gates on trust level and confirmation status
715    /// regardless of this flag.
716    ///
717    /// # Examples
718    ///
719    /// ```rust
720    /// use zeph_tools::ToolExecutor;
721    ///
722    /// struct ReadOnlyExecutor;
723    /// impl ToolExecutor for ReadOnlyExecutor {
724    ///     async fn execute(&self, _: &str) -> Result<Option<zeph_tools::ToolOutput>, zeph_tools::ToolError> {
725    ///         Ok(None)
726    ///     }
727    ///     fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
728    ///         true // read-only, idempotent
729    ///     }
730    /// }
731    /// ```
732    fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
733        false
734    }
735
736    /// Return `true` when `call` would require user confirmation before execution.
737    ///
738    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
739    /// Used by the speculative engine to gate dispatch without causing double side-effects.
740    ///
741    /// Default: `false`. Executors that enforce a confirmation policy (e.g. `TrustGateExecutor`)
742    /// must override this to reflect their actual policy without executing the tool.
743    fn requires_confirmation(&self, _call: &ToolCall) -> bool {
744        false
745    }
746}
747
748/// Object-safe erased version of [`ToolExecutor`] using boxed futures.
749///
750/// Because [`ToolExecutor`] uses `impl Future` return types, it is not object-safe and
751/// cannot be used as `dyn ToolExecutor`. This trait provides the same interface with
752/// `Pin<Box<dyn Future>>` returns, enabling dynamic dispatch.
753///
754/// Implemented automatically for all `T: ToolExecutor + 'static` via the blanket impl below.
755/// Use [`DynExecutor`] or `Box<dyn ErasedToolExecutor>` when runtime polymorphism is needed.
756pub trait ErasedToolExecutor: Send + Sync {
757    fn execute_erased<'a>(
758        &'a self,
759        response: &'a str,
760    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
761
762    fn execute_confirmed_erased<'a>(
763        &'a self,
764        response: &'a str,
765    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
766
767    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef>;
768
769    fn execute_tool_call_erased<'a>(
770        &'a self,
771        call: &'a ToolCall,
772    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
773
774    fn execute_tool_call_confirmed_erased<'a>(
775        &'a self,
776        call: &'a ToolCall,
777    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
778    {
779        // TrustGateExecutor overrides ToolExecutor::execute_tool_call_confirmed; the blanket
780        // impl for T: ToolExecutor routes this call through it via execute_tool_call_confirmed_erased.
781        // Other implementors fall back to execute_tool_call_erased (normal enforcement path).
782        self.execute_tool_call_erased(call)
783    }
784
785    /// Inject environment variables for the currently active skill. No-op by default.
786    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
787
788    /// Set the effective trust level for the currently active skill. No-op by default.
789    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
790
791    /// Whether the executor can safely retry this tool call on a transient error.
792    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool;
793
794    /// Whether a tool call can be safely dispatched speculatively.
795    ///
796    /// Default: `false`. Override to `true` in read-only executors.
797    fn is_tool_speculatable_erased(&self, _tool_id: &str) -> bool {
798        false
799    }
800
801    /// Return `true` when `call` would require user confirmation before execution.
802    ///
803    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
804    /// Used by the speculative engine to gate dispatch without causing double side-effects.
805    ///
806    /// Default: `true` (confirmation required). Implementors that want to allow speculative
807    /// dispatch must explicitly return `false`. The blanket impl for `T: ToolExecutor`
808    /// delegates to [`ToolExecutor::requires_confirmation`].
809    fn requires_confirmation_erased(&self, _call: &ToolCall) -> bool {
810        true
811    }
812}
813
814impl<T: ToolExecutor> ErasedToolExecutor for T {
815    fn execute_erased<'a>(
816        &'a self,
817        response: &'a str,
818    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
819    {
820        Box::pin(self.execute(response))
821    }
822
823    fn execute_confirmed_erased<'a>(
824        &'a self,
825        response: &'a str,
826    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
827    {
828        Box::pin(self.execute_confirmed(response))
829    }
830
831    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
832        self.tool_definitions()
833    }
834
835    fn execute_tool_call_erased<'a>(
836        &'a self,
837        call: &'a ToolCall,
838    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
839    {
840        Box::pin(self.execute_tool_call(call))
841    }
842
843    fn execute_tool_call_confirmed_erased<'a>(
844        &'a self,
845        call: &'a ToolCall,
846    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
847    {
848        Box::pin(self.execute_tool_call_confirmed(call))
849    }
850
851    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
852        ToolExecutor::set_skill_env(self, env);
853    }
854
855    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
856        ToolExecutor::set_effective_trust(self, level);
857    }
858
859    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool {
860        ToolExecutor::is_tool_retryable(self, tool_id)
861    }
862
863    fn is_tool_speculatable_erased(&self, tool_id: &str) -> bool {
864        ToolExecutor::is_tool_speculatable(self, tool_id)
865    }
866
867    fn requires_confirmation_erased(&self, call: &ToolCall) -> bool {
868        ToolExecutor::requires_confirmation(self, call)
869    }
870}
871
872/// Wraps `Arc<dyn ErasedToolExecutor>` so it can be used as a concrete `ToolExecutor`.
873///
874/// Enables dynamic composition of tool executors at runtime without static type chains.
875pub struct DynExecutor(pub std::sync::Arc<dyn ErasedToolExecutor>);
876
877impl ToolExecutor for DynExecutor {
878    fn execute(
879        &self,
880        response: &str,
881    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
882        // Clone data to satisfy the 'static-ish bound: erased futures must not borrow self.
883        let inner = std::sync::Arc::clone(&self.0);
884        let response = response.to_owned();
885        async move { inner.execute_erased(&response).await }
886    }
887
888    fn execute_confirmed(
889        &self,
890        response: &str,
891    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
892        let inner = std::sync::Arc::clone(&self.0);
893        let response = response.to_owned();
894        async move { inner.execute_confirmed_erased(&response).await }
895    }
896
897    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
898        self.0.tool_definitions_erased()
899    }
900
901    fn execute_tool_call(
902        &self,
903        call: &ToolCall,
904    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
905        let inner = std::sync::Arc::clone(&self.0);
906        let call = call.clone();
907        async move { inner.execute_tool_call_erased(&call).await }
908    }
909
910    fn execute_tool_call_confirmed(
911        &self,
912        call: &ToolCall,
913    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
914        let inner = std::sync::Arc::clone(&self.0);
915        let call = call.clone();
916        async move { inner.execute_tool_call_confirmed_erased(&call).await }
917    }
918
919    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
920        ErasedToolExecutor::set_skill_env(self.0.as_ref(), env);
921    }
922
923    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
924        ErasedToolExecutor::set_effective_trust(self.0.as_ref(), level);
925    }
926
927    fn is_tool_retryable(&self, tool_id: &str) -> bool {
928        self.0.is_tool_retryable_erased(tool_id)
929    }
930
931    fn is_tool_speculatable(&self, tool_id: &str) -> bool {
932        self.0.is_tool_speculatable_erased(tool_id)
933    }
934
935    fn requires_confirmation(&self, call: &ToolCall) -> bool {
936        self.0.requires_confirmation_erased(call)
937    }
938}
939
940/// Extract fenced code blocks with the given language marker from text.
941///
942/// Searches for `` ```{lang} `` … `` ``` `` pairs, returning trimmed content.
943#[must_use]
944pub fn extract_fenced_blocks<'a>(text: &'a str, lang: &str) -> Vec<&'a str> {
945    let marker = format!("```{lang}");
946    let marker_len = marker.len();
947    let mut blocks = Vec::new();
948    let mut rest = text;
949
950    let mut search_from = 0;
951    while let Some(rel) = rest[search_from..].find(&marker) {
952        let start = search_from + rel;
953        let after = &rest[start + marker_len..];
954        // Word-boundary check: the character immediately after the marker must be
955        // whitespace, end-of-string, or a non-word character (not alphanumeric / _ / -).
956        // This prevents "```bash" from matching "```bashrc".
957        let boundary_ok = after
958            .chars()
959            .next()
960            .is_none_or(|c| !c.is_alphanumeric() && c != '_' && c != '-');
961        if !boundary_ok {
962            search_from = start + marker_len;
963            continue;
964        }
965        if let Some(end) = after.find("```") {
966            blocks.push(after[..end].trim());
967            rest = &after[end + 3..];
968            search_from = 0;
969        } else {
970            break;
971        }
972    }
973
974    blocks
975}
976
977#[cfg(test)]
978mod tests {
979    use super::*;
980
981    #[test]
982    fn tool_output_display() {
983        let output = ToolOutput {
984            tool_name: ToolName::new("bash"),
985            summary: "$ echo hello\nhello".to_owned(),
986            blocks_executed: 1,
987            filter_stats: None,
988            diff: None,
989            streamed: false,
990            terminal_id: None,
991            locations: None,
992            raw_response: None,
993            claim_source: None,
994        };
995        assert_eq!(output.to_string(), "$ echo hello\nhello");
996    }
997
998    #[test]
999    fn tool_error_blocked_display() {
1000        let err = ToolError::Blocked {
1001            command: "rm -rf /".to_owned(),
1002        };
1003        assert_eq!(err.to_string(), "command blocked by policy: rm -rf /");
1004    }
1005
1006    #[test]
1007    fn tool_error_sandbox_violation_display() {
1008        let err = ToolError::SandboxViolation {
1009            path: "/etc/shadow".to_owned(),
1010        };
1011        assert_eq!(err.to_string(), "path not allowed by sandbox: /etc/shadow");
1012    }
1013
1014    #[test]
1015    fn tool_error_confirmation_required_display() {
1016        let err = ToolError::ConfirmationRequired {
1017            command: "rm -rf /tmp".to_owned(),
1018        };
1019        assert_eq!(
1020            err.to_string(),
1021            "command requires confirmation: rm -rf /tmp"
1022        );
1023    }
1024
1025    #[test]
1026    fn tool_error_timeout_display() {
1027        let err = ToolError::Timeout { timeout_secs: 30 };
1028        assert_eq!(err.to_string(), "command timed out after 30s");
1029    }
1030
1031    #[test]
1032    fn tool_error_invalid_params_display() {
1033        let err = ToolError::InvalidParams {
1034            message: "missing field `command`".to_owned(),
1035        };
1036        assert_eq!(
1037            err.to_string(),
1038            "invalid tool parameters: missing field `command`"
1039        );
1040    }
1041
1042    #[test]
1043    fn deserialize_params_valid() {
1044        #[derive(Debug, serde::Deserialize, PartialEq)]
1045        struct P {
1046            name: String,
1047            count: u32,
1048        }
1049        let mut map = serde_json::Map::new();
1050        map.insert("name".to_owned(), serde_json::json!("test"));
1051        map.insert("count".to_owned(), serde_json::json!(42));
1052        let p: P = deserialize_params(&map).unwrap();
1053        assert_eq!(
1054            p,
1055            P {
1056                name: "test".to_owned(),
1057                count: 42
1058            }
1059        );
1060    }
1061
1062    #[test]
1063    fn deserialize_params_missing_required_field() {
1064        #[derive(Debug, serde::Deserialize)]
1065        #[allow(dead_code)]
1066        struct P {
1067            name: String,
1068        }
1069        let map = serde_json::Map::new();
1070        let err = deserialize_params::<P>(&map).unwrap_err();
1071        assert!(matches!(err, ToolError::InvalidParams { .. }));
1072    }
1073
1074    #[test]
1075    fn deserialize_params_wrong_type() {
1076        #[derive(Debug, serde::Deserialize)]
1077        #[allow(dead_code)]
1078        struct P {
1079            count: u32,
1080        }
1081        let mut map = serde_json::Map::new();
1082        map.insert("count".to_owned(), serde_json::json!("not a number"));
1083        let err = deserialize_params::<P>(&map).unwrap_err();
1084        assert!(matches!(err, ToolError::InvalidParams { .. }));
1085    }
1086
1087    #[test]
1088    fn deserialize_params_all_optional_empty() {
1089        #[derive(Debug, serde::Deserialize, PartialEq)]
1090        struct P {
1091            name: Option<String>,
1092        }
1093        let map = serde_json::Map::new();
1094        let p: P = deserialize_params(&map).unwrap();
1095        assert_eq!(p, P { name: None });
1096    }
1097
1098    #[test]
1099    fn deserialize_params_ignores_extra_fields() {
1100        #[derive(Debug, serde::Deserialize, PartialEq)]
1101        struct P {
1102            name: String,
1103        }
1104        let mut map = serde_json::Map::new();
1105        map.insert("name".to_owned(), serde_json::json!("test"));
1106        map.insert("extra".to_owned(), serde_json::json!(true));
1107        let p: P = deserialize_params(&map).unwrap();
1108        assert_eq!(
1109            p,
1110            P {
1111                name: "test".to_owned()
1112            }
1113        );
1114    }
1115
1116    #[test]
1117    fn tool_error_execution_display() {
1118        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash not found");
1119        let err = ToolError::Execution(io_err);
1120        assert!(err.to_string().starts_with("execution failed:"));
1121        assert!(err.to_string().contains("bash not found"));
1122    }
1123
1124    // ErrorKind classification tests
1125    #[test]
1126    fn error_kind_timeout_is_transient() {
1127        let err = ToolError::Timeout { timeout_secs: 30 };
1128        assert_eq!(err.kind(), ErrorKind::Transient);
1129    }
1130
1131    #[test]
1132    fn error_kind_blocked_is_permanent() {
1133        let err = ToolError::Blocked {
1134            command: "rm -rf /".to_owned(),
1135        };
1136        assert_eq!(err.kind(), ErrorKind::Permanent);
1137    }
1138
1139    #[test]
1140    fn error_kind_sandbox_violation_is_permanent() {
1141        let err = ToolError::SandboxViolation {
1142            path: "/etc/shadow".to_owned(),
1143        };
1144        assert_eq!(err.kind(), ErrorKind::Permanent);
1145    }
1146
1147    #[test]
1148    fn error_kind_cancelled_is_permanent() {
1149        assert_eq!(ToolError::Cancelled.kind(), ErrorKind::Permanent);
1150    }
1151
1152    #[test]
1153    fn error_kind_invalid_params_is_permanent() {
1154        let err = ToolError::InvalidParams {
1155            message: "bad arg".to_owned(),
1156        };
1157        assert_eq!(err.kind(), ErrorKind::Permanent);
1158    }
1159
1160    #[test]
1161    fn error_kind_confirmation_required_is_permanent() {
1162        let err = ToolError::ConfirmationRequired {
1163            command: "rm /tmp/x".to_owned(),
1164        };
1165        assert_eq!(err.kind(), ErrorKind::Permanent);
1166    }
1167
1168    #[test]
1169    fn error_kind_execution_timed_out_is_transient() {
1170        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1171        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1172    }
1173
1174    #[test]
1175    fn error_kind_execution_interrupted_is_transient() {
1176        let io_err = std::io::Error::new(std::io::ErrorKind::Interrupted, "interrupted");
1177        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1178    }
1179
1180    #[test]
1181    fn error_kind_execution_connection_reset_is_transient() {
1182        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
1183        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1184    }
1185
1186    #[test]
1187    fn error_kind_execution_broken_pipe_is_transient() {
1188        let io_err = std::io::Error::new(std::io::ErrorKind::BrokenPipe, "pipe broken");
1189        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1190    }
1191
1192    #[test]
1193    fn error_kind_execution_would_block_is_transient() {
1194        let io_err = std::io::Error::new(std::io::ErrorKind::WouldBlock, "would block");
1195        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1196    }
1197
1198    #[test]
1199    fn error_kind_execution_connection_aborted_is_transient() {
1200        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionAborted, "aborted");
1201        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1202    }
1203
1204    #[test]
1205    fn error_kind_execution_not_found_is_permanent() {
1206        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
1207        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1208    }
1209
1210    #[test]
1211    fn error_kind_execution_permission_denied_is_permanent() {
1212        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "denied");
1213        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1214    }
1215
1216    #[test]
1217    fn error_kind_execution_other_is_permanent() {
1218        let io_err = std::io::Error::other("some other error");
1219        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1220    }
1221
1222    #[test]
1223    fn error_kind_execution_already_exists_is_permanent() {
1224        let io_err = std::io::Error::new(std::io::ErrorKind::AlreadyExists, "exists");
1225        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1226    }
1227
1228    #[test]
1229    fn error_kind_display() {
1230        assert_eq!(ErrorKind::Transient.to_string(), "transient");
1231        assert_eq!(ErrorKind::Permanent.to_string(), "permanent");
1232    }
1233
1234    #[test]
1235    fn truncate_tool_output_short_passthrough() {
1236        let short = "hello world";
1237        assert_eq!(truncate_tool_output(short), short);
1238    }
1239
1240    #[test]
1241    fn truncate_tool_output_exact_limit() {
1242        let exact = "a".repeat(MAX_TOOL_OUTPUT_CHARS);
1243        assert_eq!(truncate_tool_output(&exact), exact);
1244    }
1245
1246    #[test]
1247    fn truncate_tool_output_long_split() {
1248        let long = "x".repeat(MAX_TOOL_OUTPUT_CHARS + 1000);
1249        let result = truncate_tool_output(&long);
1250        assert!(result.contains("truncated"));
1251        assert!(result.len() < long.len());
1252    }
1253
1254    #[test]
1255    fn truncate_tool_output_notice_contains_count() {
1256        let long = "y".repeat(MAX_TOOL_OUTPUT_CHARS + 2000);
1257        let result = truncate_tool_output(&long);
1258        assert!(result.contains("truncated"));
1259        assert!(result.contains("chars"));
1260    }
1261
1262    #[derive(Debug)]
1263    struct DefaultExecutor;
1264    impl ToolExecutor for DefaultExecutor {
1265        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1266            Ok(None)
1267        }
1268    }
1269
1270    #[tokio::test]
1271    async fn execute_tool_call_default_returns_none() {
1272        let exec = DefaultExecutor;
1273        let call = ToolCall {
1274            tool_id: ToolName::new("anything"),
1275            params: serde_json::Map::new(),
1276            caller_id: None,
1277            context: None,
1278
1279            tool_call_id: String::new(),
1280            skill_name: None,
1281        };
1282        let result = exec.execute_tool_call(&call).await.unwrap();
1283        assert!(result.is_none());
1284    }
1285
1286    #[test]
1287    fn filter_stats_savings_pct() {
1288        let fs = FilterStats {
1289            raw_chars: 1000,
1290            filtered_chars: 200,
1291            ..Default::default()
1292        };
1293        assert!((fs.savings_pct() - 80.0).abs() < 0.01);
1294    }
1295
1296    #[test]
1297    fn filter_stats_savings_pct_zero() {
1298        let fs = FilterStats::default();
1299        assert!((fs.savings_pct()).abs() < 0.01);
1300    }
1301
1302    #[test]
1303    fn filter_stats_estimated_tokens_saved() {
1304        let fs = FilterStats {
1305            raw_chars: 1000,
1306            filtered_chars: 200,
1307            ..Default::default()
1308        };
1309        assert_eq!(fs.estimated_tokens_saved(), 200); // (1000 - 200) / 4
1310    }
1311
1312    #[test]
1313    fn filter_stats_format_inline() {
1314        let fs = FilterStats {
1315            raw_chars: 1000,
1316            filtered_chars: 200,
1317            raw_lines: 342,
1318            filtered_lines: 28,
1319            ..Default::default()
1320        };
1321        let line = fs.format_inline("shell");
1322        assert_eq!(line, "[shell] 342 lines \u{2192} 28 lines, 80.0% filtered");
1323    }
1324
1325    #[test]
1326    fn filter_stats_format_inline_zero() {
1327        let fs = FilterStats::default();
1328        let line = fs.format_inline("bash");
1329        assert_eq!(line, "[bash] 0 lines \u{2192} 0 lines, 0.0% filtered");
1330    }
1331
1332    // DynExecutor tests
1333
1334    struct FixedExecutor {
1335        tool_id: &'static str,
1336        output: &'static str,
1337    }
1338
1339    impl ToolExecutor for FixedExecutor {
1340        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1341            Ok(Some(ToolOutput {
1342                tool_name: ToolName::new(self.tool_id),
1343                summary: self.output.to_owned(),
1344                blocks_executed: 1,
1345                filter_stats: None,
1346                diff: None,
1347                streamed: false,
1348                terminal_id: None,
1349                locations: None,
1350                raw_response: None,
1351                claim_source: None,
1352            }))
1353        }
1354
1355        fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1356            vec![]
1357        }
1358
1359        async fn execute_tool_call(
1360            &self,
1361            _call: &ToolCall,
1362        ) -> Result<Option<ToolOutput>, ToolError> {
1363            Ok(Some(ToolOutput {
1364                tool_name: ToolName::new(self.tool_id),
1365                summary: self.output.to_owned(),
1366                blocks_executed: 1,
1367                filter_stats: None,
1368                diff: None,
1369                streamed: false,
1370                terminal_id: None,
1371                locations: None,
1372                raw_response: None,
1373                claim_source: None,
1374            }))
1375        }
1376    }
1377
1378    #[tokio::test]
1379    async fn dyn_executor_execute_delegates() {
1380        let inner = std::sync::Arc::new(FixedExecutor {
1381            tool_id: "bash",
1382            output: "hello",
1383        });
1384        let exec = DynExecutor(inner);
1385        let result = exec.execute("```bash\necho hello\n```").await.unwrap();
1386        assert!(result.is_some());
1387        assert_eq!(result.unwrap().summary, "hello");
1388    }
1389
1390    #[tokio::test]
1391    async fn dyn_executor_execute_confirmed_delegates() {
1392        let inner = std::sync::Arc::new(FixedExecutor {
1393            tool_id: "bash",
1394            output: "confirmed",
1395        });
1396        let exec = DynExecutor(inner);
1397        let result = exec.execute_confirmed("...").await.unwrap();
1398        assert!(result.is_some());
1399        assert_eq!(result.unwrap().summary, "confirmed");
1400    }
1401
1402    #[test]
1403    fn dyn_executor_tool_definitions_delegates() {
1404        let inner = std::sync::Arc::new(FixedExecutor {
1405            tool_id: "my_tool",
1406            output: "",
1407        });
1408        let exec = DynExecutor(inner);
1409        // FixedExecutor returns empty definitions; verify delegation occurs without panic.
1410        let defs = exec.tool_definitions();
1411        assert!(defs.is_empty());
1412    }
1413
1414    #[tokio::test]
1415    async fn dyn_executor_execute_tool_call_delegates() {
1416        let inner = std::sync::Arc::new(FixedExecutor {
1417            tool_id: "bash",
1418            output: "tool_call_result",
1419        });
1420        let exec = DynExecutor(inner);
1421        let call = ToolCall {
1422            tool_id: ToolName::new("bash"),
1423            params: serde_json::Map::new(),
1424            caller_id: None,
1425            context: None,
1426
1427            tool_call_id: String::new(),
1428            skill_name: None,
1429        };
1430        let result = exec.execute_tool_call(&call).await.unwrap();
1431        assert!(result.is_some());
1432        assert_eq!(result.unwrap().summary, "tool_call_result");
1433    }
1434
1435    #[test]
1436    fn dyn_executor_set_effective_trust_delegates() {
1437        use std::sync::atomic::{AtomicU8, Ordering};
1438
1439        struct TrustCapture(AtomicU8);
1440        impl ToolExecutor for TrustCapture {
1441            async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1442                Ok(None)
1443            }
1444            fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
1445                // encode: Trusted=0, Verified=1, Quarantined=2, Blocked=3
1446                let v = match level {
1447                    crate::SkillTrustLevel::Trusted => 0u8,
1448                    crate::SkillTrustLevel::Verified => 1,
1449                    crate::SkillTrustLevel::Quarantined => 2,
1450                    _ => 3,
1451                };
1452                self.0.store(v, Ordering::Relaxed);
1453            }
1454        }
1455
1456        let inner = std::sync::Arc::new(TrustCapture(AtomicU8::new(0)));
1457        let exec =
1458            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1459        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Quarantined);
1460        assert_eq!(inner.0.load(Ordering::Relaxed), 2);
1461
1462        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Blocked);
1463        assert_eq!(inner.0.load(Ordering::Relaxed), 3);
1464    }
1465
1466    #[test]
1467    fn extract_fenced_blocks_no_prefix_match() {
1468        // ```bashrc must NOT match when searching for "bash"
1469        assert!(extract_fenced_blocks("```bashrc\nfoo\n```", "bash").is_empty());
1470        // exact match
1471        assert_eq!(
1472            extract_fenced_blocks("```bash\nfoo\n```", "bash"),
1473            vec!["foo"]
1474        );
1475        // trailing space is fine
1476        assert_eq!(
1477            extract_fenced_blocks("```bash \nfoo\n```", "bash"),
1478            vec!["foo"]
1479        );
1480    }
1481
1482    // ── ToolError::category() delegation tests ────────────────────────────────
1483
1484    #[test]
1485    fn tool_error_http_400_category_is_invalid_parameters() {
1486        use crate::error_taxonomy::ToolErrorCategory;
1487        let err = ToolError::Http {
1488            status: 400,
1489            message: "bad request".to_owned(),
1490        };
1491        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1492    }
1493
1494    #[test]
1495    fn tool_error_http_401_category_is_policy_blocked() {
1496        use crate::error_taxonomy::ToolErrorCategory;
1497        let err = ToolError::Http {
1498            status: 401,
1499            message: "unauthorized".to_owned(),
1500        };
1501        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1502    }
1503
1504    #[test]
1505    fn tool_error_http_403_category_is_policy_blocked() {
1506        use crate::error_taxonomy::ToolErrorCategory;
1507        let err = ToolError::Http {
1508            status: 403,
1509            message: "forbidden".to_owned(),
1510        };
1511        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1512    }
1513
1514    #[test]
1515    fn tool_error_http_404_category_is_permanent_failure() {
1516        use crate::error_taxonomy::ToolErrorCategory;
1517        let err = ToolError::Http {
1518            status: 404,
1519            message: "not found".to_owned(),
1520        };
1521        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1522    }
1523
1524    #[test]
1525    fn tool_error_http_429_category_is_rate_limited() {
1526        use crate::error_taxonomy::ToolErrorCategory;
1527        let err = ToolError::Http {
1528            status: 429,
1529            message: "too many requests".to_owned(),
1530        };
1531        assert_eq!(err.category(), ToolErrorCategory::RateLimited);
1532    }
1533
1534    #[test]
1535    fn tool_error_http_500_category_is_server_error() {
1536        use crate::error_taxonomy::ToolErrorCategory;
1537        let err = ToolError::Http {
1538            status: 500,
1539            message: "internal server error".to_owned(),
1540        };
1541        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1542    }
1543
1544    #[test]
1545    fn tool_error_http_502_category_is_server_error() {
1546        use crate::error_taxonomy::ToolErrorCategory;
1547        let err = ToolError::Http {
1548            status: 502,
1549            message: "bad gateway".to_owned(),
1550        };
1551        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1552    }
1553
1554    #[test]
1555    fn tool_error_http_503_category_is_server_error() {
1556        use crate::error_taxonomy::ToolErrorCategory;
1557        let err = ToolError::Http {
1558            status: 503,
1559            message: "service unavailable".to_owned(),
1560        };
1561        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1562    }
1563
1564    #[test]
1565    fn tool_error_http_503_is_transient_triggers_phase2_retry() {
1566        // Phase 2 retry fires when err.kind() == ErrorKind::Transient.
1567        // Verify the full chain: Http{503} -> ServerError -> is_retryable() -> Transient.
1568        let err = ToolError::Http {
1569            status: 503,
1570            message: "service unavailable".to_owned(),
1571        };
1572        assert_eq!(
1573            err.kind(),
1574            ErrorKind::Transient,
1575            "HTTP 503 must be Transient so Phase 2 retry fires"
1576        );
1577    }
1578
1579    #[test]
1580    fn tool_error_blocked_category_is_policy_blocked() {
1581        use crate::error_taxonomy::ToolErrorCategory;
1582        let err = ToolError::Blocked {
1583            command: "rm -rf /".to_owned(),
1584        };
1585        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1586    }
1587
1588    #[test]
1589    fn tool_error_sandbox_violation_category_is_policy_blocked() {
1590        use crate::error_taxonomy::ToolErrorCategory;
1591        let err = ToolError::SandboxViolation {
1592            path: "/etc/shadow".to_owned(),
1593        };
1594        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1595    }
1596
1597    #[test]
1598    fn tool_error_confirmation_required_category() {
1599        use crate::error_taxonomy::ToolErrorCategory;
1600        let err = ToolError::ConfirmationRequired {
1601            command: "rm /tmp/x".to_owned(),
1602        };
1603        assert_eq!(err.category(), ToolErrorCategory::ConfirmationRequired);
1604    }
1605
1606    #[test]
1607    fn tool_error_timeout_category() {
1608        use crate::error_taxonomy::ToolErrorCategory;
1609        let err = ToolError::Timeout { timeout_secs: 30 };
1610        assert_eq!(err.category(), ToolErrorCategory::Timeout);
1611    }
1612
1613    #[test]
1614    fn tool_error_cancelled_category() {
1615        use crate::error_taxonomy::ToolErrorCategory;
1616        assert_eq!(
1617            ToolError::Cancelled.category(),
1618            ToolErrorCategory::Cancelled
1619        );
1620    }
1621
1622    #[test]
1623    fn tool_error_invalid_params_category() {
1624        use crate::error_taxonomy::ToolErrorCategory;
1625        let err = ToolError::InvalidParams {
1626            message: "missing field".to_owned(),
1627        };
1628        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1629    }
1630
1631    // B2 regression: Execution(NotFound) must NOT produce ToolNotFound.
1632    #[test]
1633    fn tool_error_execution_not_found_category_is_permanent_failure() {
1634        use crate::error_taxonomy::ToolErrorCategory;
1635        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash: not found");
1636        let err = ToolError::Execution(io_err);
1637        let cat = err.category();
1638        assert_ne!(
1639            cat,
1640            ToolErrorCategory::ToolNotFound,
1641            "Execution(NotFound) must NOT map to ToolNotFound"
1642        );
1643        assert_eq!(cat, ToolErrorCategory::PermanentFailure);
1644    }
1645
1646    #[test]
1647    fn tool_error_execution_timed_out_category_is_timeout() {
1648        use crate::error_taxonomy::ToolErrorCategory;
1649        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
1650        assert_eq!(
1651            ToolError::Execution(io_err).category(),
1652            ToolErrorCategory::Timeout
1653        );
1654    }
1655
1656    #[test]
1657    fn tool_error_execution_connection_refused_category_is_network_error() {
1658        use crate::error_taxonomy::ToolErrorCategory;
1659        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "refused");
1660        assert_eq!(
1661            ToolError::Execution(io_err).category(),
1662            ToolErrorCategory::NetworkError
1663        );
1664    }
1665
1666    // B4 regression: Http/network/transient categories must NOT be quality failures.
1667    #[test]
1668    fn b4_tool_error_http_429_not_quality_failure() {
1669        let err = ToolError::Http {
1670            status: 429,
1671            message: "rate limited".to_owned(),
1672        };
1673        assert!(
1674            !err.category().is_quality_failure(),
1675            "RateLimited must not be a quality failure"
1676        );
1677    }
1678
1679    #[test]
1680    fn b4_tool_error_http_503_not_quality_failure() {
1681        let err = ToolError::Http {
1682            status: 503,
1683            message: "service unavailable".to_owned(),
1684        };
1685        assert!(
1686            !err.category().is_quality_failure(),
1687            "ServerError must not be a quality failure"
1688        );
1689    }
1690
1691    #[test]
1692    fn b4_tool_error_execution_timed_out_not_quality_failure() {
1693        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1694        assert!(
1695            !ToolError::Execution(io_err).category().is_quality_failure(),
1696            "Timeout must not be a quality failure"
1697        );
1698    }
1699
1700    // ── ToolError::Shell category tests ──────────────────────────────────────
1701
1702    #[test]
1703    fn tool_error_shell_exit126_is_policy_blocked() {
1704        use crate::error_taxonomy::ToolErrorCategory;
1705        let err = ToolError::Shell {
1706            exit_code: 126,
1707            category: ToolErrorCategory::PolicyBlocked,
1708            message: "permission denied".to_owned(),
1709        };
1710        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1711    }
1712
1713    #[test]
1714    fn tool_error_shell_exit127_is_permanent_failure() {
1715        use crate::error_taxonomy::ToolErrorCategory;
1716        let err = ToolError::Shell {
1717            exit_code: 127,
1718            category: ToolErrorCategory::PermanentFailure,
1719            message: "command not found".to_owned(),
1720        };
1721        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1722        assert!(!err.category().is_retryable());
1723    }
1724
1725    #[test]
1726    fn tool_error_shell_not_quality_failure() {
1727        use crate::error_taxonomy::ToolErrorCategory;
1728        let err = ToolError::Shell {
1729            exit_code: 127,
1730            category: ToolErrorCategory::PermanentFailure,
1731            message: "command not found".to_owned(),
1732        };
1733        // Shell exit errors are not attributable to LLM output quality.
1734        assert!(!err.category().is_quality_failure());
1735    }
1736
1737    // ── requires_confirmation / requires_confirmation_erased tests (#3644) ───
1738
1739    /// Stub implementing only `ToolExecutor` without overriding `requires_confirmation`.
1740    struct StubExecutor;
1741    impl ToolExecutor for StubExecutor {
1742        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1743            Ok(None)
1744        }
1745    }
1746
1747    /// Stub that always signals confirmation is required via `ToolExecutor::requires_confirmation`.
1748    struct ConfirmingExecutor;
1749    impl ToolExecutor for ConfirmingExecutor {
1750        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1751            Ok(None)
1752        }
1753        fn requires_confirmation(&self, _call: &ToolCall) -> bool {
1754            true
1755        }
1756    }
1757
1758    fn dummy_call() -> ToolCall {
1759        ToolCall {
1760            tool_id: ToolName::new("test"),
1761            params: serde_json::Map::new(),
1762            caller_id: None,
1763            context: None,
1764
1765            tool_call_id: String::new(),
1766            skill_name: None,
1767        }
1768    }
1769
1770    #[test]
1771    fn requires_confirmation_default_is_false_on_tool_executor() {
1772        let exec = StubExecutor;
1773        assert!(
1774            !exec.requires_confirmation(&dummy_call()),
1775            "ToolExecutor default requires_confirmation must be false"
1776        );
1777    }
1778
1779    #[test]
1780    fn requires_confirmation_erased_delegates_to_tool_executor_default() {
1781        // blanket impl routes erased → ToolExecutor::requires_confirmation (= false)
1782        let exec = StubExecutor;
1783        assert!(
1784            !ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1785            "requires_confirmation_erased via blanket impl must return false for stub executor"
1786        );
1787    }
1788
1789    #[test]
1790    fn requires_confirmation_erased_delegates_override() {
1791        // ConfirmingExecutor overrides requires_confirmation → true;
1792        // blanket impl must propagate this.
1793        let exec = ConfirmingExecutor;
1794        assert!(
1795            ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1796            "requires_confirmation_erased must return true when ToolExecutor override returns true"
1797        );
1798    }
1799
1800    #[test]
1801    fn requires_confirmation_erased_default_on_erased_trait_is_true() {
1802        // ErasedToolExecutor's own default (trait method body) returns true.
1803        // We construct a DynExecutor wrapping ConfirmingExecutor and verify via the erased path.
1804        // (We cannot instantiate ErasedToolExecutor directly without a concrete type.)
1805        // Instead verify via a type that only implements ErasedToolExecutor manually:
1806        struct ManualErased;
1807        impl ErasedToolExecutor for ManualErased {
1808            fn execute_erased<'a>(
1809                &'a self,
1810                _response: &'a str,
1811            ) -> std::pin::Pin<
1812                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1813            > {
1814                Box::pin(std::future::ready(Ok(None)))
1815            }
1816            fn execute_confirmed_erased<'a>(
1817                &'a self,
1818                _response: &'a str,
1819            ) -> std::pin::Pin<
1820                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1821            > {
1822                Box::pin(std::future::ready(Ok(None)))
1823            }
1824            fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
1825                vec![]
1826            }
1827            fn execute_tool_call_erased<'a>(
1828                &'a self,
1829                _call: &'a ToolCall,
1830            ) -> std::pin::Pin<
1831                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1832            > {
1833                Box::pin(std::future::ready(Ok(None)))
1834            }
1835            fn is_tool_retryable_erased(&self, _tool_id: &str) -> bool {
1836                false
1837            }
1838            // requires_confirmation_erased NOT overridden → trait default returns true
1839        }
1840        let exec = ManualErased;
1841        assert!(
1842            exec.requires_confirmation_erased(&dummy_call()),
1843            "ErasedToolExecutor trait-level default for requires_confirmation_erased must be true"
1844        );
1845    }
1846
1847    // ── DynExecutor::requires_confirmation delegation tests (#3650) ──────────
1848
1849    #[test]
1850    fn dyn_executor_requires_confirmation_delegates() {
1851        let inner = std::sync::Arc::new(ConfirmingExecutor);
1852        let exec =
1853            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1854        assert!(
1855            ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1856            "DynExecutor must delegate requires_confirmation to inner executor"
1857        );
1858    }
1859
1860    #[test]
1861    fn dyn_executor_requires_confirmation_default_false() {
1862        let inner = std::sync::Arc::new(StubExecutor);
1863        let exec =
1864            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1865        assert!(
1866            !ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1867            "DynExecutor must return false when inner executor does not require confirmation"
1868        );
1869    }
1870}