zeph_tools/
executor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use zeph_common::ToolName;
7
8use crate::shell::background::RunId;
9
10/// Data for rendering file diffs in the TUI.
11///
12/// Produced by [`ShellExecutor`](crate::ShellExecutor) and [`FileExecutor`](crate::FileExecutor)
13/// when a tool call modifies a tracked file. The TUI uses this to display a side-by-side diff.
14#[derive(Debug, Clone)]
15pub struct DiffData {
16    /// Relative or absolute path to the file that was modified.
17    pub file_path: String,
18    /// File content before the tool executed.
19    pub old_content: String,
20    /// File content after the tool executed.
21    pub new_content: String,
22}
23
24/// Structured tool invocation from LLM.
25///
26/// Produced by the agent loop when the LLM emits a structured tool call (as opposed to
27/// a legacy fenced code block). Dispatched to [`ToolExecutor::execute_tool_call`].
28///
29/// # Example
30///
31/// ```rust
32/// use zeph_tools::{ToolCall, ExecutionContext};
33/// use zeph_common::ToolName;
34///
35/// let call = ToolCall {
36///     tool_id: ToolName::new("bash"),
37///     params: {
38///         let mut m = serde_json::Map::new();
39///         m.insert("command".to_owned(), serde_json::Value::String("echo hello".to_owned()));
40///         m
41///     },
42///     caller_id: Some("user-42".to_owned()),
43///     context: Some(ExecutionContext::new().with_name("repo")),
44///     tool_call_id: String::new(),
45/// };
46/// assert_eq!(call.tool_id, "bash");
47/// ```
48#[derive(Debug, Clone)]
49pub struct ToolCall {
50    /// The tool identifier, matching a value from [`ToolExecutor::tool_definitions`].
51    pub tool_id: ToolName,
52    /// JSON parameters for the tool call, deserialized into the tool's parameter struct.
53    pub params: serde_json::Map<String, serde_json::Value>,
54    /// Opaque caller identifier propagated from the channel (user ID, session ID, etc.).
55    /// `None` for system-initiated calls (scheduler, self-learning, internal).
56    pub caller_id: Option<String>,
57    /// Per-turn execution environment. `None` means use the executor default (process CWD
58    /// and inherited env), which is identical to the behaviour before this field existed.
59    pub context: Option<crate::ExecutionContext>,
60    /// Opaque tool call ID used to correlate [`ToolEvent::OutputChunk`] events with
61    /// their originating tool call in the TUI. Empty when not set by the agent loop.
62    pub tool_call_id: String,
63}
64
65/// Cumulative filter statistics for a single tool execution.
66///
67/// Populated by [`ShellExecutor`](crate::ShellExecutor) when output filters are configured.
68/// Displayed in the TUI to show how much output was compacted before being sent to the LLM.
69#[derive(Debug, Clone, Default)]
70pub struct FilterStats {
71    /// Raw character count before filtering.
72    pub raw_chars: usize,
73    /// Character count after filtering.
74    pub filtered_chars: usize,
75    /// Raw line count before filtering.
76    pub raw_lines: usize,
77    /// Line count after filtering.
78    pub filtered_lines: usize,
79    /// Worst-case confidence across all applied filters.
80    pub confidence: Option<crate::FilterConfidence>,
81    /// The shell command that produced this output, for display purposes.
82    pub command: Option<String>,
83    /// Zero-based line indices that were kept after filtering.
84    pub kept_lines: Vec<usize>,
85}
86
87impl FilterStats {
88    /// Returns the percentage of characters removed by filtering.
89    ///
90    /// Returns `0.0` when there was no raw output to filter.
91    #[must_use]
92    #[allow(clippy::cast_precision_loss)]
93    pub fn savings_pct(&self) -> f64 {
94        if self.raw_chars == 0 {
95            return 0.0;
96        }
97        (1.0 - self.filtered_chars as f64 / self.raw_chars as f64) * 100.0
98    }
99
100    /// Estimates the number of LLM tokens saved by filtering.
101    ///
102    /// Uses the 4-chars-per-token approximation. Suitable for logging and metrics,
103    /// not for billing or exact budget calculations.
104    #[must_use]
105    pub fn estimated_tokens_saved(&self) -> usize {
106        self.raw_chars.saturating_sub(self.filtered_chars) / 4
107    }
108
109    /// Formats a one-line filter summary for log messages and TUI status.
110    ///
111    /// # Example
112    ///
113    /// ```rust
114    /// use zeph_tools::FilterStats;
115    ///
116    /// let stats = FilterStats {
117    ///     raw_chars: 1000,
118    ///     filtered_chars: 400,
119    ///     raw_lines: 50,
120    ///     filtered_lines: 20,
121    ///     command: Some("cargo build".to_owned()),
122    ///     ..Default::default()
123    /// };
124    /// let summary = stats.format_inline("shell");
125    /// assert!(summary.contains("60.0% filtered"));
126    /// ```
127    #[must_use]
128    pub fn format_inline(&self, tool_name: &str) -> String {
129        let cmd_label = self
130            .command
131            .as_deref()
132            .map(|c| {
133                let trimmed = c.trim();
134                if trimmed.len() > 60 {
135                    format!(" `{}…`", &trimmed[..57])
136                } else {
137                    format!(" `{trimmed}`")
138                }
139            })
140            .unwrap_or_default();
141        format!(
142            "[{tool_name}]{cmd_label} {} lines \u{2192} {} lines, {:.1}% filtered",
143            self.raw_lines,
144            self.filtered_lines,
145            self.savings_pct()
146        )
147    }
148}
149
150/// Provenance of a tool execution result.
151///
152/// Set by each executor at `ToolOutput` construction time. Used by the sanitizer bridge
153/// in `zeph-core` to select the appropriate `ContentSourceKind` and trust level.
154/// `None` means the source is unspecified (pass-through code, mocks, tests).
155#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
156#[serde(rename_all = "snake_case")]
157#[non_exhaustive]
158pub enum ClaimSource {
159    /// Local shell command execution.
160    Shell,
161    /// Local file system read/write.
162    FileSystem,
163    /// HTTP web scrape.
164    WebScrape,
165    /// MCP server tool response.
166    Mcp,
167    /// A2A agent message.
168    A2a,
169    /// Code search (LSP or semantic).
170    CodeSearch,
171    /// Agent diagnostics (internal).
172    Diagnostics,
173    /// Memory retrieval (semantic search).
174    Memory,
175    /// Telegram moderation action (reaction deletion).
176    Moderation,
177}
178
179/// Structured result from tool execution.
180///
181/// Returned by every [`ToolExecutor`] implementation on success. The agent loop uses
182/// [`ToolOutput::summary`] as the tool result text injected into the LLM context.
183///
184/// # Example
185///
186/// ```rust
187/// use zeph_tools::{ToolOutput, executor::ClaimSource};
188/// use zeph_common::ToolName;
189///
190/// let output = ToolOutput {
191///     tool_name: ToolName::new("shell"),
192///     summary: "hello\n".to_owned(),
193///     blocks_executed: 1,
194///     filter_stats: None,
195///     diff: None,
196///     streamed: false,
197///     terminal_id: None,
198///     locations: None,
199///     raw_response: None,
200///     claim_source: Some(ClaimSource::Shell),
201/// };
202/// assert_eq!(output.to_string(), "hello\n");
203/// ```
204#[derive(Debug, Clone)]
205pub struct ToolOutput {
206    /// Name of the tool that produced this output (e.g. `"shell"`, `"web-scrape"`).
207    pub tool_name: ToolName,
208    /// Human-readable result text injected into the LLM context.
209    pub summary: String,
210    /// Number of code blocks processed in this invocation.
211    pub blocks_executed: u32,
212    /// Output filter statistics when filtering was applied, `None` otherwise.
213    pub filter_stats: Option<FilterStats>,
214    /// File diff data for TUI display when the tool modified a tracked file.
215    pub diff: Option<DiffData>,
216    /// Whether this tool already streamed its output via `ToolEvent` channel.
217    pub streamed: bool,
218    /// Terminal ID when the tool was executed via IDE terminal (ACP terminal/* protocol).
219    pub terminal_id: Option<String>,
220    /// File paths touched by this tool call, for IDE follow-along (e.g. `ToolCallLocation`).
221    pub locations: Option<Vec<String>>,
222    /// Structured tool response payload for ACP intermediate `tool_call_update` notifications.
223    pub raw_response: Option<serde_json::Value>,
224    /// Provenance of this tool result. Set by the executor at construction time.
225    /// `None` in pass-through wrappers, mocks, and tests.
226    pub claim_source: Option<ClaimSource>,
227}
228
229impl fmt::Display for ToolOutput {
230    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
231        f.write_str(&self.summary)
232    }
233}
234
235/// Maximum characters of tool output injected into the LLM context without truncation.
236///
237/// Output that exceeds this limit is split into a head and tail via [`truncate_tool_output`]
238/// to keep both the beginning and end of large command outputs.
239pub const MAX_TOOL_OUTPUT_CHARS: usize = 30_000;
240
241/// Truncate tool output that exceeds [`MAX_TOOL_OUTPUT_CHARS`] using a head+tail split.
242///
243/// Equivalent to `truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)`.
244///
245/// # Example
246///
247/// ```rust
248/// use zeph_tools::executor::truncate_tool_output;
249///
250/// let short = "hello world";
251/// assert_eq!(truncate_tool_output(short), short);
252/// ```
253#[must_use]
254pub fn truncate_tool_output(output: &str) -> String {
255    truncate_tool_output_at(output, MAX_TOOL_OUTPUT_CHARS)
256}
257
258/// Truncate tool output that exceeds `max_chars` using a head+tail split.
259///
260/// Preserves the first and last `max_chars / 2` characters and inserts a truncation
261/// marker in the middle. Both boundaries are snapped to valid UTF-8 character boundaries.
262///
263/// # Example
264///
265/// ```rust
266/// use zeph_tools::executor::truncate_tool_output_at;
267///
268/// let long = "a".repeat(200);
269/// let truncated = truncate_tool_output_at(&long, 100);
270/// assert!(truncated.contains("truncated"));
271/// assert!(truncated.len() < long.len());
272/// ```
273#[must_use]
274pub fn truncate_tool_output_at(output: &str, max_chars: usize) -> String {
275    if output.len() <= max_chars {
276        return output.to_string();
277    }
278
279    let half = max_chars / 2;
280    let head_end = output.floor_char_boundary(half);
281    let tail_start = output.ceil_char_boundary(output.len() - half);
282    let head = &output[..head_end];
283    let tail = &output[tail_start..];
284    let truncated = output.len() - head_end - (output.len() - tail_start);
285
286    format!(
287        "{head}\n\n... [truncated {truncated} chars, showing first and last ~{half} chars] ...\n\n{tail}"
288    )
289}
290
291/// Event emitted during tool execution for real-time UI updates.
292///
293/// Sent over the [`ToolEventTx`] channel to the TUI or channel adapter.
294/// Each event variant corresponds to a phase in the tool execution lifecycle.
295#[derive(Debug, Clone)]
296#[non_exhaustive]
297pub enum ToolEvent {
298    /// The tool has started. Displayed in the TUI as a spinner with the command text.
299    Started {
300        tool_name: ToolName,
301        command: String,
302        /// Active sandbox profile, if any. `None` when sandbox is disabled.
303        sandbox_profile: Option<String>,
304        /// Canonical absolute working directory the command will run in.
305        /// `None` for executors that do not resolve a per-turn CWD.
306        resolved_cwd: Option<String>,
307        /// Name of the resolved execution environment (from `[[execution.environments]]`),
308        /// or `None` when no named environment was selected.
309        execution_env: Option<String>,
310    },
311    /// A chunk of streaming output was produced (e.g. from a long-running command).
312    OutputChunk {
313        tool_name: ToolName,
314        command: String,
315        chunk: String,
316        /// Opaque tool call ID matching the corresponding [`ToolEvent::Started`] event.
317        /// Empty string when the executor does not have access to the call ID.
318        tool_call_id: String,
319    },
320    /// The tool finished. Contains the full output and optional filter/diff data.
321    Completed {
322        tool_name: ToolName,
323        command: String,
324        /// Full output text (possibly filtered and truncated).
325        output: String,
326        /// `true` when the tool exited successfully, `false` on error.
327        success: bool,
328        filter_stats: Option<FilterStats>,
329        diff: Option<DiffData>,
330        /// Set when this completion belongs to a background run. `None` for blocking runs.
331        run_id: Option<RunId>,
332    },
333    /// A transactional rollback was performed, restoring or deleting files.
334    Rollback {
335        tool_name: ToolName,
336        command: String,
337        /// Number of files restored to their pre-execution content.
338        restored_count: usize,
339        /// Number of files that did not exist before execution and were deleted.
340        deleted_count: usize,
341    },
342}
343
344/// Sender half of the bounded channel used to stream [`ToolEvent`]s to the UI.
345///
346/// Capacity is 1024 slots. Streaming variants (`OutputChunk`, `Started`) use
347/// `try_send` and drop on full; terminal variants (`Completed`, `Rollback`) use
348/// `send().await` to guarantee delivery.
349///
350/// Created via [`tokio::sync::mpsc::channel`] with capacity `TOOL_EVENT_CHANNEL_CAP`.
351pub type ToolEventTx = tokio::sync::mpsc::Sender<ToolEvent>;
352
353/// Receiver half matching [`ToolEventTx`].
354pub type ToolEventRx = tokio::sync::mpsc::Receiver<ToolEvent>;
355
356/// Bounded capacity for the tool-event channel.
357pub const TOOL_EVENT_CHANNEL_CAP: usize = 1024;
358
359/// Classifies a tool error as transient (retryable) or permanent (abort immediately).
360///
361/// Transient errors may succeed on retry (network blips, race conditions).
362/// Permanent errors will not succeed regardless of retries (policy, bad args, not found).
363#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
364#[non_exhaustive]
365pub enum ErrorKind {
366    Transient,
367    Permanent,
368}
369
370impl std::fmt::Display for ErrorKind {
371    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
372        match self {
373            Self::Transient => f.write_str("transient"),
374            Self::Permanent => f.write_str("permanent"),
375        }
376    }
377}
378
379/// Errors that can occur during tool execution.
380#[derive(Debug, thiserror::Error)]
381pub enum ToolError {
382    #[error("command blocked by policy: {command}")]
383    Blocked { command: String },
384
385    /// Command was blocked and a safer alternative is available.
386    ///
387    /// Emitted by [`ShellExecutor`](crate::ShellExecutor) when `suggest_fix` returns a
388    /// suggestion. The agent receives both the block reason and the alternative so it can
389    /// self-correct without additional prompting.
390    #[error("command blocked by policy: {command}")]
391    BlockedWithFix {
392        command: String,
393        suggestion: Option<crate::shell::SafeFixSuggestion>,
394    },
395
396    #[error("path not allowed by sandbox: {path}")]
397    SandboxViolation { path: String },
398
399    #[error("command requires confirmation: {command}")]
400    ConfirmationRequired { command: String },
401
402    #[error("command timed out after {timeout_secs}s")]
403    Timeout { timeout_secs: u64 },
404
405    #[error("operation cancelled")]
406    Cancelled,
407
408    #[error("invalid tool parameters: {message}")]
409    InvalidParams { message: String },
410
411    #[error("execution failed: {0}")]
412    Execution(#[from] std::io::Error),
413
414    /// HTTP or API error with status code for fine-grained classification.
415    ///
416    /// Used by `WebScrapeExecutor` and other HTTP-based tools to preserve the status
417    /// code for taxonomy classification. Scope: HTTP tools only (MCP uses a separate path).
418    #[error("HTTP error {status}: {message}")]
419    Http { status: u16, message: String },
420
421    /// Shell execution error with explicit exit code and pre-classified category.
422    ///
423    /// Used by `ShellExecutor` when the exit code or stderr content maps to a known
424    /// taxonomy category (e.g., exit 126 → `PolicyBlocked`, exit 127 → `PermanentFailure`).
425    /// Preserves the exit code for audit logging and the category for skill evolution.
426    #[error("shell error (exit {exit_code}): {message}")]
427    Shell {
428        exit_code: i32,
429        category: crate::error_taxonomy::ToolErrorCategory,
430        message: String,
431    },
432
433    #[error("snapshot failed: {reason}")]
434    SnapshotFailed { reason: String },
435
436    /// Tool call rejected because the tool id is outside the active capability scope.
437    ///
438    /// Emitted by `ScopedToolExecutor` before any tool side-effect runs.
439    /// The audit log records `error_category = "out_of_scope"`.
440    // LLM isolation: task_type is never shown in the error message (P2-OutOfScope).
441    #[error("tool call denied by policy")]
442    OutOfScope {
443        /// Fully-qualified tool id that was rejected.
444        tool_id: String,
445        /// Active task type at dispatch time, if any.
446        task_type: Option<String>,
447    },
448
449    /// Tool call blocked by `ShadowProbeExecutor` after the LLM safety probe returned Deny.
450    ///
451    /// Emitted before any tool side-effect runs. The probe evaluated the full trajectory
452    /// context and determined the call is unsafe. Reason is LLM-generated; shown to the
453    /// agent loop as the tool result so the model can adapt.
454    #[error("tool call denied by safety probe: {reason}")]
455    SafetyDenied {
456        /// Human-readable explanation from the LLM safety probe.
457        reason: String,
458    },
459
460    /// Tool call blocked by the MAGE `TrajectoryRiskAccumulator` (spec 004-16).
461    ///
462    /// Cumulative session risk exceeded `risk_threshold`. The agent loop receives the
463    /// score and the top contributing signals so it can explain the denial to the user.
464    #[error("tool call blocked: trajectory risk {score:.3} exceeds threshold")]
465    TrajectoryRiskExceeded {
466        /// Current `trajectory_risk` value at the time of the block.
467        score: f64,
468        /// Human-readable labels for the top contributing signals (up to 3).
469        top_signals: Vec<String>,
470    },
471}
472
473impl ToolError {
474    /// Fine-grained error classification using the 12-category taxonomy.
475    ///
476    /// Prefer `category()` over `kind()` for new code. `kind()` is preserved for
477    /// backward compatibility and delegates to `category().error_kind()`.
478    #[must_use]
479    pub fn category(&self) -> crate::error_taxonomy::ToolErrorCategory {
480        use crate::error_taxonomy::{ToolErrorCategory, classify_http_status, classify_io_error};
481        match self {
482            Self::Blocked { .. } | Self::BlockedWithFix { .. } | Self::SandboxViolation { .. } => {
483                ToolErrorCategory::PolicyBlocked
484            }
485            Self::ConfirmationRequired { .. } => ToolErrorCategory::ConfirmationRequired,
486            Self::Timeout { .. } => ToolErrorCategory::Timeout,
487            Self::Cancelled => ToolErrorCategory::Cancelled,
488            Self::InvalidParams { .. } => ToolErrorCategory::InvalidParameters,
489            Self::Http { status, .. } => classify_http_status(*status),
490            Self::Execution(io_err) => classify_io_error(io_err),
491            Self::Shell { category, .. } => *category,
492            Self::SnapshotFailed { .. } => ToolErrorCategory::PermanentFailure,
493            Self::OutOfScope { .. }
494            | Self::SafetyDenied { .. }
495            | Self::TrajectoryRiskExceeded { .. } => ToolErrorCategory::PolicyBlocked,
496        }
497    }
498
499    /// Coarse classification for backward compatibility. Delegates to `category().error_kind()`.
500    ///
501    /// For `Execution(io::Error)`, the classification inspects `io::Error::kind()`:
502    /// - Transient: `TimedOut`, `WouldBlock`, `Interrupted`, `ConnectionReset`,
503    ///   `ConnectionAborted`, `BrokenPipe` — these may succeed on retry.
504    /// - Permanent: `NotFound`, `PermissionDenied`, `AlreadyExists`, and all other
505    ///   I/O error kinds — retrying would waste time with no benefit.
506    #[must_use]
507    pub fn kind(&self) -> ErrorKind {
508        use crate::error_taxonomy::ToolErrorCategoryExt;
509        self.category().error_kind()
510    }
511}
512
513/// Deserialize tool call params from a `serde_json::Map<String, Value>` into a typed struct.
514///
515/// # Errors
516///
517/// Returns `ToolError::InvalidParams` when deserialization fails.
518pub fn deserialize_params<T: serde::de::DeserializeOwned>(
519    params: &serde_json::Map<String, serde_json::Value>,
520) -> Result<T, ToolError> {
521    let obj = serde_json::Value::Object(params.clone());
522    serde_json::from_value(obj).map_err(|e| ToolError::InvalidParams {
523        message: e.to_string(),
524    })
525}
526
527/// Async trait for tool execution backends.
528///
529/// Implementations include [`ShellExecutor`](crate::ShellExecutor),
530/// [`WebScrapeExecutor`](crate::WebScrapeExecutor), [`CompositeExecutor`](crate::CompositeExecutor),
531/// and [`FileExecutor`](crate::FileExecutor).
532///
533/// # Contract
534///
535/// - [`execute`](ToolExecutor::execute) and [`execute_tool_call`](ToolExecutor::execute_tool_call)
536///   return `Ok(None)` when the executor does not handle the given input — callers must not
537///   treat `None` as an error.
538/// - All methods must be `Send + Sync` and free of blocking I/O.
539/// - Implementations must enforce their own security controls (blocklists, sandboxes, SSRF
540///   protection) before executing any side-effectful operation.
541/// - [`execute_confirmed`](ToolExecutor::execute_confirmed) and
542///   [`execute_tool_call_confirmed`](ToolExecutor::execute_tool_call_confirmed) bypass
543///   confirmation gates only — all other security controls remain active.
544///
545/// # Two Invocation Paths
546///
547/// **Legacy fenced blocks**: The agent loop passes the raw LLM response string to [`execute`](ToolExecutor::execute).
548/// The executor parses ` ```bash ` or ` ```scrape ` blocks and executes each one.
549///
550/// **Structured tool calls**: The agent loop constructs a [`ToolCall`] from the LLM's
551/// JSON tool-use response and dispatches it via [`execute_tool_call`](ToolExecutor::execute_tool_call).
552/// This is the preferred path for new code.
553///
554/// # Example
555///
556/// ```rust
557/// use zeph_tools::{ToolExecutor, ToolCall, ToolOutput, ToolError, executor::ClaimSource};
558///
559/// #[derive(Debug)]
560/// struct EchoExecutor;
561///
562/// impl ToolExecutor for EchoExecutor {
563///     async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
564///         Ok(None) // not a fenced-block executor
565///     }
566///
567///     async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
568///         if call.tool_id != "echo" {
569///             return Ok(None);
570///         }
571///         let text = call.params.get("text")
572///             .and_then(|v| v.as_str())
573///             .unwrap_or("")
574///             .to_owned();
575///         Ok(Some(ToolOutput {
576///             tool_name: "echo".into(),
577///             summary: text,
578///             blocks_executed: 1,
579///             filter_stats: None,
580///             diff: None,
581///             streamed: false,
582///             terminal_id: None,
583///             locations: None,
584///             raw_response: None,
585///             claim_source: None,
586///         }))
587///     }
588/// }
589/// ```
590/// # TODO (G3 — deferred: Tower-style tool middleware stack)
591///
592/// Currently, cross-cutting concerns (audit logging, rate limiting, sandboxing, guardrails)
593/// are scattered across individual executor implementations. The planned approach is a
594/// composable middleware stack similar to Tower's `Service` trait:
595///
596/// ```text
597/// AuditLayer::new(RateLimitLayer::new(SandboxLayer::new(ShellExecutor::new())))
598/// ```
599///
600/// **Blocked by:** requires D2 (consolidating `ToolExecutor` + `ErasedToolExecutor` into one
601/// object-safe trait). See critic review §S3 for the tradeoff between RPIT fast-path and
602/// dynamic dispatch overhead before collapsing D2.
603///
604/// # TODO (D2 — deferred: consolidate `ToolExecutor` and `ErasedToolExecutor`)
605///
606/// Having two parallel traits creates duplication and confusion. The blanket impl
607/// `impl<T: ToolExecutor> ErasedToolExecutor for T` works but every new method must be
608/// added to both traits. Use `trait_variant::make` or a single object-safe design.
609///
610/// **Blocked by:** need to benchmark the RPIT fast-path before removing it. See critic §S3.
611pub trait ToolExecutor: Send + Sync {
612    /// Parse `response` for fenced tool blocks and execute them.
613    ///
614    /// Returns `Ok(None)` when no tool blocks are found in `response`.
615    ///
616    /// # Errors
617    ///
618    /// Returns [`ToolError`] when a block is found but execution fails (blocked command,
619    /// sandbox violation, network error, timeout, etc.).
620    fn execute(
621        &self,
622        response: &str,
623    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send;
624
625    /// Execute bypassing confirmation checks (called after user approves).
626    ///
627    /// Security controls other than the confirmation gate remain active. Default
628    /// implementation delegates to [`execute`](ToolExecutor::execute).
629    ///
630    /// # Errors
631    ///
632    /// Returns [`ToolError`] on execution failure.
633    fn execute_confirmed(
634        &self,
635        response: &str,
636    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
637        self.execute(response)
638    }
639
640    /// Return the tool definitions this executor can handle.
641    ///
642    /// Used to populate the LLM's tool schema at context-assembly time.
643    /// Returns an empty `Vec` by default (for executors that only handle fenced blocks).
644    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
645        vec![]
646    }
647
648    /// Execute a structured tool call. Returns `Ok(None)` if `call.tool_id` is not handled.
649    ///
650    /// # Errors
651    ///
652    /// Returns [`ToolError`] when the tool ID is handled but execution fails.
653    fn execute_tool_call(
654        &self,
655        _call: &ToolCall,
656    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
657        std::future::ready(Ok(None))
658    }
659
660    /// Execute a structured tool call bypassing confirmation checks.
661    ///
662    /// Called after the user has explicitly approved the tool invocation.
663    /// Default implementation delegates to [`execute_tool_call`](ToolExecutor::execute_tool_call).
664    ///
665    /// # Errors
666    ///
667    /// Returns [`ToolError`] on execution failure.
668    fn execute_tool_call_confirmed(
669        &self,
670        call: &ToolCall,
671    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
672        self.execute_tool_call(call)
673    }
674
675    /// Inject environment variables for the currently active skill. No-op by default.
676    ///
677    /// Called by the agent loop before each turn when the active skill specifies env vars.
678    /// Implementations that ignore this (e.g. `WebScrapeExecutor`) may leave the default.
679    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
680
681    /// Set the effective trust level for the currently active skill. No-op by default.
682    ///
683    /// Trust level affects which operations are permitted (e.g. network access, file writes).
684    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
685
686    /// Whether the executor can safely retry this tool call on a transient error.
687    ///
688    /// Only idempotent operations (e.g. read-only HTTP GET) should return `true`.
689    /// Shell commands and other non-idempotent operations must keep the default `false`
690    /// to prevent double-execution of side-effectful commands.
691    fn is_tool_retryable(&self, _tool_id: &str) -> bool {
692        false
693    }
694
695    /// Whether a tool call can be safely dispatched speculatively (before the LLM finishes).
696    ///
697    /// Speculative execution requires the tool to be:
698    /// 1. Idempotent — repeated execution with the same args produces the same result.
699    /// 2. Side-effect-free or cheaply reversible.
700    /// 3. Not subject to user confirmation (`needs_confirmation` must be false at call time).
701    ///
702    /// Default: `false` (safe). Override to `true` only for tools that satisfy all three
703    /// properties. The engine additionally gates on trust level and confirmation status
704    /// regardless of this flag.
705    ///
706    /// # Examples
707    ///
708    /// ```rust
709    /// use zeph_tools::ToolExecutor;
710    ///
711    /// struct ReadOnlyExecutor;
712    /// impl ToolExecutor for ReadOnlyExecutor {
713    ///     async fn execute(&self, _: &str) -> Result<Option<zeph_tools::ToolOutput>, zeph_tools::ToolError> {
714    ///         Ok(None)
715    ///     }
716    ///     fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
717    ///         true // read-only, idempotent
718    ///     }
719    /// }
720    /// ```
721    fn is_tool_speculatable(&self, _tool_id: &str) -> bool {
722        false
723    }
724
725    /// Return `true` when `call` would require user confirmation before execution.
726    ///
727    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
728    /// Used by the speculative engine to gate dispatch without causing double side-effects.
729    ///
730    /// Default: `false`. Executors that enforce a confirmation policy (e.g. `TrustGateExecutor`)
731    /// must override this to reflect their actual policy without executing the tool.
732    fn requires_confirmation(&self, _call: &ToolCall) -> bool {
733        false
734    }
735}
736
737/// Object-safe erased version of [`ToolExecutor`] using boxed futures.
738///
739/// Because [`ToolExecutor`] uses `impl Future` return types, it is not object-safe and
740/// cannot be used as `dyn ToolExecutor`. This trait provides the same interface with
741/// `Pin<Box<dyn Future>>` returns, enabling dynamic dispatch.
742///
743/// Implemented automatically for all `T: ToolExecutor + 'static` via the blanket impl below.
744/// Use [`DynExecutor`] or `Box<dyn ErasedToolExecutor>` when runtime polymorphism is needed.
745pub trait ErasedToolExecutor: Send + Sync {
746    fn execute_erased<'a>(
747        &'a self,
748        response: &'a str,
749    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
750
751    fn execute_confirmed_erased<'a>(
752        &'a self,
753        response: &'a str,
754    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
755
756    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef>;
757
758    fn execute_tool_call_erased<'a>(
759        &'a self,
760        call: &'a ToolCall,
761    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>;
762
763    fn execute_tool_call_confirmed_erased<'a>(
764        &'a self,
765        call: &'a ToolCall,
766    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
767    {
768        // TrustGateExecutor overrides ToolExecutor::execute_tool_call_confirmed; the blanket
769        // impl for T: ToolExecutor routes this call through it via execute_tool_call_confirmed_erased.
770        // Other implementors fall back to execute_tool_call_erased (normal enforcement path).
771        self.execute_tool_call_erased(call)
772    }
773
774    /// Inject environment variables for the currently active skill. No-op by default.
775    fn set_skill_env(&self, _env: Option<std::collections::HashMap<String, String>>) {}
776
777    /// Set the effective trust level for the currently active skill. No-op by default.
778    fn set_effective_trust(&self, _level: crate::SkillTrustLevel) {}
779
780    /// Whether the executor can safely retry this tool call on a transient error.
781    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool;
782
783    /// Whether a tool call can be safely dispatched speculatively.
784    ///
785    /// Default: `false`. Override to `true` in read-only executors.
786    fn is_tool_speculatable_erased(&self, _tool_id: &str) -> bool {
787        false
788    }
789
790    /// Return `true` when `call` would require user confirmation before execution.
791    ///
792    /// This is a pure metadata/policy query — implementations must **not** execute the tool.
793    /// Used by the speculative engine to gate dispatch without causing double side-effects.
794    ///
795    /// Default: `true` (confirmation required). Implementors that want to allow speculative
796    /// dispatch must explicitly return `false`. The blanket impl for `T: ToolExecutor`
797    /// delegates to [`ToolExecutor::requires_confirmation`].
798    fn requires_confirmation_erased(&self, _call: &ToolCall) -> bool {
799        true
800    }
801}
802
803impl<T: ToolExecutor> ErasedToolExecutor for T {
804    fn execute_erased<'a>(
805        &'a self,
806        response: &'a str,
807    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
808    {
809        Box::pin(self.execute(response))
810    }
811
812    fn execute_confirmed_erased<'a>(
813        &'a self,
814        response: &'a str,
815    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
816    {
817        Box::pin(self.execute_confirmed(response))
818    }
819
820    fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
821        self.tool_definitions()
822    }
823
824    fn execute_tool_call_erased<'a>(
825        &'a self,
826        call: &'a ToolCall,
827    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
828    {
829        Box::pin(self.execute_tool_call(call))
830    }
831
832    fn execute_tool_call_confirmed_erased<'a>(
833        &'a self,
834        call: &'a ToolCall,
835    ) -> std::pin::Pin<Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>>
836    {
837        Box::pin(self.execute_tool_call_confirmed(call))
838    }
839
840    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
841        ToolExecutor::set_skill_env(self, env);
842    }
843
844    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
845        ToolExecutor::set_effective_trust(self, level);
846    }
847
848    fn is_tool_retryable_erased(&self, tool_id: &str) -> bool {
849        ToolExecutor::is_tool_retryable(self, tool_id)
850    }
851
852    fn is_tool_speculatable_erased(&self, tool_id: &str) -> bool {
853        ToolExecutor::is_tool_speculatable(self, tool_id)
854    }
855
856    fn requires_confirmation_erased(&self, call: &ToolCall) -> bool {
857        ToolExecutor::requires_confirmation(self, call)
858    }
859}
860
861/// Wraps `Arc<dyn ErasedToolExecutor>` so it can be used as a concrete `ToolExecutor`.
862///
863/// Enables dynamic composition of tool executors at runtime without static type chains.
864pub struct DynExecutor(pub std::sync::Arc<dyn ErasedToolExecutor>);
865
866impl ToolExecutor for DynExecutor {
867    fn execute(
868        &self,
869        response: &str,
870    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
871        // Clone data to satisfy the 'static-ish bound: erased futures must not borrow self.
872        let inner = std::sync::Arc::clone(&self.0);
873        let response = response.to_owned();
874        async move { inner.execute_erased(&response).await }
875    }
876
877    fn execute_confirmed(
878        &self,
879        response: &str,
880    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
881        let inner = std::sync::Arc::clone(&self.0);
882        let response = response.to_owned();
883        async move { inner.execute_confirmed_erased(&response).await }
884    }
885
886    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
887        self.0.tool_definitions_erased()
888    }
889
890    fn execute_tool_call(
891        &self,
892        call: &ToolCall,
893    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
894        let inner = std::sync::Arc::clone(&self.0);
895        let call = call.clone();
896        async move { inner.execute_tool_call_erased(&call).await }
897    }
898
899    fn execute_tool_call_confirmed(
900        &self,
901        call: &ToolCall,
902    ) -> impl Future<Output = Result<Option<ToolOutput>, ToolError>> + Send {
903        let inner = std::sync::Arc::clone(&self.0);
904        let call = call.clone();
905        async move { inner.execute_tool_call_confirmed_erased(&call).await }
906    }
907
908    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
909        ErasedToolExecutor::set_skill_env(self.0.as_ref(), env);
910    }
911
912    fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
913        ErasedToolExecutor::set_effective_trust(self.0.as_ref(), level);
914    }
915
916    fn is_tool_retryable(&self, tool_id: &str) -> bool {
917        self.0.is_tool_retryable_erased(tool_id)
918    }
919
920    fn is_tool_speculatable(&self, tool_id: &str) -> bool {
921        self.0.is_tool_speculatable_erased(tool_id)
922    }
923
924    fn requires_confirmation(&self, call: &ToolCall) -> bool {
925        self.0.requires_confirmation_erased(call)
926    }
927}
928
929/// Extract fenced code blocks with the given language marker from text.
930///
931/// Searches for `` ```{lang} `` … `` ``` `` pairs, returning trimmed content.
932#[must_use]
933pub fn extract_fenced_blocks<'a>(text: &'a str, lang: &str) -> Vec<&'a str> {
934    let marker = format!("```{lang}");
935    let marker_len = marker.len();
936    let mut blocks = Vec::new();
937    let mut rest = text;
938
939    let mut search_from = 0;
940    while let Some(rel) = rest[search_from..].find(&marker) {
941        let start = search_from + rel;
942        let after = &rest[start + marker_len..];
943        // Word-boundary check: the character immediately after the marker must be
944        // whitespace, end-of-string, or a non-word character (not alphanumeric / _ / -).
945        // This prevents "```bash" from matching "```bashrc".
946        let boundary_ok = after
947            .chars()
948            .next()
949            .is_none_or(|c| !c.is_alphanumeric() && c != '_' && c != '-');
950        if !boundary_ok {
951            search_from = start + marker_len;
952            continue;
953        }
954        if let Some(end) = after.find("```") {
955            blocks.push(after[..end].trim());
956            rest = &after[end + 3..];
957            search_from = 0;
958        } else {
959            break;
960        }
961    }
962
963    blocks
964}
965
966#[cfg(test)]
967mod tests {
968    use super::*;
969
970    #[test]
971    fn tool_output_display() {
972        let output = ToolOutput {
973            tool_name: ToolName::new("bash"),
974            summary: "$ echo hello\nhello".to_owned(),
975            blocks_executed: 1,
976            filter_stats: None,
977            diff: None,
978            streamed: false,
979            terminal_id: None,
980            locations: None,
981            raw_response: None,
982            claim_source: None,
983        };
984        assert_eq!(output.to_string(), "$ echo hello\nhello");
985    }
986
987    #[test]
988    fn tool_error_blocked_display() {
989        let err = ToolError::Blocked {
990            command: "rm -rf /".to_owned(),
991        };
992        assert_eq!(err.to_string(), "command blocked by policy: rm -rf /");
993    }
994
995    #[test]
996    fn tool_error_sandbox_violation_display() {
997        let err = ToolError::SandboxViolation {
998            path: "/etc/shadow".to_owned(),
999        };
1000        assert_eq!(err.to_string(), "path not allowed by sandbox: /etc/shadow");
1001    }
1002
1003    #[test]
1004    fn tool_error_confirmation_required_display() {
1005        let err = ToolError::ConfirmationRequired {
1006            command: "rm -rf /tmp".to_owned(),
1007        };
1008        assert_eq!(
1009            err.to_string(),
1010            "command requires confirmation: rm -rf /tmp"
1011        );
1012    }
1013
1014    #[test]
1015    fn tool_error_timeout_display() {
1016        let err = ToolError::Timeout { timeout_secs: 30 };
1017        assert_eq!(err.to_string(), "command timed out after 30s");
1018    }
1019
1020    #[test]
1021    fn tool_error_invalid_params_display() {
1022        let err = ToolError::InvalidParams {
1023            message: "missing field `command`".to_owned(),
1024        };
1025        assert_eq!(
1026            err.to_string(),
1027            "invalid tool parameters: missing field `command`"
1028        );
1029    }
1030
1031    #[test]
1032    fn deserialize_params_valid() {
1033        #[derive(Debug, serde::Deserialize, PartialEq)]
1034        struct P {
1035            name: String,
1036            count: u32,
1037        }
1038        let mut map = serde_json::Map::new();
1039        map.insert("name".to_owned(), serde_json::json!("test"));
1040        map.insert("count".to_owned(), serde_json::json!(42));
1041        let p: P = deserialize_params(&map).unwrap();
1042        assert_eq!(
1043            p,
1044            P {
1045                name: "test".to_owned(),
1046                count: 42
1047            }
1048        );
1049    }
1050
1051    #[test]
1052    fn deserialize_params_missing_required_field() {
1053        #[derive(Debug, serde::Deserialize)]
1054        #[allow(dead_code)]
1055        struct P {
1056            name: String,
1057        }
1058        let map = serde_json::Map::new();
1059        let err = deserialize_params::<P>(&map).unwrap_err();
1060        assert!(matches!(err, ToolError::InvalidParams { .. }));
1061    }
1062
1063    #[test]
1064    fn deserialize_params_wrong_type() {
1065        #[derive(Debug, serde::Deserialize)]
1066        #[allow(dead_code)]
1067        struct P {
1068            count: u32,
1069        }
1070        let mut map = serde_json::Map::new();
1071        map.insert("count".to_owned(), serde_json::json!("not a number"));
1072        let err = deserialize_params::<P>(&map).unwrap_err();
1073        assert!(matches!(err, ToolError::InvalidParams { .. }));
1074    }
1075
1076    #[test]
1077    fn deserialize_params_all_optional_empty() {
1078        #[derive(Debug, serde::Deserialize, PartialEq)]
1079        struct P {
1080            name: Option<String>,
1081        }
1082        let map = serde_json::Map::new();
1083        let p: P = deserialize_params(&map).unwrap();
1084        assert_eq!(p, P { name: None });
1085    }
1086
1087    #[test]
1088    fn deserialize_params_ignores_extra_fields() {
1089        #[derive(Debug, serde::Deserialize, PartialEq)]
1090        struct P {
1091            name: String,
1092        }
1093        let mut map = serde_json::Map::new();
1094        map.insert("name".to_owned(), serde_json::json!("test"));
1095        map.insert("extra".to_owned(), serde_json::json!(true));
1096        let p: P = deserialize_params(&map).unwrap();
1097        assert_eq!(
1098            p,
1099            P {
1100                name: "test".to_owned()
1101            }
1102        );
1103    }
1104
1105    #[test]
1106    fn tool_error_execution_display() {
1107        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash not found");
1108        let err = ToolError::Execution(io_err);
1109        assert!(err.to_string().starts_with("execution failed:"));
1110        assert!(err.to_string().contains("bash not found"));
1111    }
1112
1113    // ErrorKind classification tests
1114    #[test]
1115    fn error_kind_timeout_is_transient() {
1116        let err = ToolError::Timeout { timeout_secs: 30 };
1117        assert_eq!(err.kind(), ErrorKind::Transient);
1118    }
1119
1120    #[test]
1121    fn error_kind_blocked_is_permanent() {
1122        let err = ToolError::Blocked {
1123            command: "rm -rf /".to_owned(),
1124        };
1125        assert_eq!(err.kind(), ErrorKind::Permanent);
1126    }
1127
1128    #[test]
1129    fn error_kind_sandbox_violation_is_permanent() {
1130        let err = ToolError::SandboxViolation {
1131            path: "/etc/shadow".to_owned(),
1132        };
1133        assert_eq!(err.kind(), ErrorKind::Permanent);
1134    }
1135
1136    #[test]
1137    fn error_kind_cancelled_is_permanent() {
1138        assert_eq!(ToolError::Cancelled.kind(), ErrorKind::Permanent);
1139    }
1140
1141    #[test]
1142    fn error_kind_invalid_params_is_permanent() {
1143        let err = ToolError::InvalidParams {
1144            message: "bad arg".to_owned(),
1145        };
1146        assert_eq!(err.kind(), ErrorKind::Permanent);
1147    }
1148
1149    #[test]
1150    fn error_kind_confirmation_required_is_permanent() {
1151        let err = ToolError::ConfirmationRequired {
1152            command: "rm /tmp/x".to_owned(),
1153        };
1154        assert_eq!(err.kind(), ErrorKind::Permanent);
1155    }
1156
1157    #[test]
1158    fn error_kind_execution_timed_out_is_transient() {
1159        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1160        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1161    }
1162
1163    #[test]
1164    fn error_kind_execution_interrupted_is_transient() {
1165        let io_err = std::io::Error::new(std::io::ErrorKind::Interrupted, "interrupted");
1166        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1167    }
1168
1169    #[test]
1170    fn error_kind_execution_connection_reset_is_transient() {
1171        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
1172        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1173    }
1174
1175    #[test]
1176    fn error_kind_execution_broken_pipe_is_transient() {
1177        let io_err = std::io::Error::new(std::io::ErrorKind::BrokenPipe, "pipe broken");
1178        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1179    }
1180
1181    #[test]
1182    fn error_kind_execution_would_block_is_transient() {
1183        let io_err = std::io::Error::new(std::io::ErrorKind::WouldBlock, "would block");
1184        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1185    }
1186
1187    #[test]
1188    fn error_kind_execution_connection_aborted_is_transient() {
1189        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionAborted, "aborted");
1190        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Transient);
1191    }
1192
1193    #[test]
1194    fn error_kind_execution_not_found_is_permanent() {
1195        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "not found");
1196        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1197    }
1198
1199    #[test]
1200    fn error_kind_execution_permission_denied_is_permanent() {
1201        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "denied");
1202        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1203    }
1204
1205    #[test]
1206    fn error_kind_execution_other_is_permanent() {
1207        let io_err = std::io::Error::other("some other error");
1208        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1209    }
1210
1211    #[test]
1212    fn error_kind_execution_already_exists_is_permanent() {
1213        let io_err = std::io::Error::new(std::io::ErrorKind::AlreadyExists, "exists");
1214        assert_eq!(ToolError::Execution(io_err).kind(), ErrorKind::Permanent);
1215    }
1216
1217    #[test]
1218    fn error_kind_display() {
1219        assert_eq!(ErrorKind::Transient.to_string(), "transient");
1220        assert_eq!(ErrorKind::Permanent.to_string(), "permanent");
1221    }
1222
1223    #[test]
1224    fn truncate_tool_output_short_passthrough() {
1225        let short = "hello world";
1226        assert_eq!(truncate_tool_output(short), short);
1227    }
1228
1229    #[test]
1230    fn truncate_tool_output_exact_limit() {
1231        let exact = "a".repeat(MAX_TOOL_OUTPUT_CHARS);
1232        assert_eq!(truncate_tool_output(&exact), exact);
1233    }
1234
1235    #[test]
1236    fn truncate_tool_output_long_split() {
1237        let long = "x".repeat(MAX_TOOL_OUTPUT_CHARS + 1000);
1238        let result = truncate_tool_output(&long);
1239        assert!(result.contains("truncated"));
1240        assert!(result.len() < long.len());
1241    }
1242
1243    #[test]
1244    fn truncate_tool_output_notice_contains_count() {
1245        let long = "y".repeat(MAX_TOOL_OUTPUT_CHARS + 2000);
1246        let result = truncate_tool_output(&long);
1247        assert!(result.contains("truncated"));
1248        assert!(result.contains("chars"));
1249    }
1250
1251    #[derive(Debug)]
1252    struct DefaultExecutor;
1253    impl ToolExecutor for DefaultExecutor {
1254        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1255            Ok(None)
1256        }
1257    }
1258
1259    #[tokio::test]
1260    async fn execute_tool_call_default_returns_none() {
1261        let exec = DefaultExecutor;
1262        let call = ToolCall {
1263            tool_id: ToolName::new("anything"),
1264            params: serde_json::Map::new(),
1265            caller_id: None,
1266            context: None,
1267
1268            tool_call_id: String::new(),
1269        };
1270        let result = exec.execute_tool_call(&call).await.unwrap();
1271        assert!(result.is_none());
1272    }
1273
1274    #[test]
1275    fn filter_stats_savings_pct() {
1276        let fs = FilterStats {
1277            raw_chars: 1000,
1278            filtered_chars: 200,
1279            ..Default::default()
1280        };
1281        assert!((fs.savings_pct() - 80.0).abs() < 0.01);
1282    }
1283
1284    #[test]
1285    fn filter_stats_savings_pct_zero() {
1286        let fs = FilterStats::default();
1287        assert!((fs.savings_pct()).abs() < 0.01);
1288    }
1289
1290    #[test]
1291    fn filter_stats_estimated_tokens_saved() {
1292        let fs = FilterStats {
1293            raw_chars: 1000,
1294            filtered_chars: 200,
1295            ..Default::default()
1296        };
1297        assert_eq!(fs.estimated_tokens_saved(), 200); // (1000 - 200) / 4
1298    }
1299
1300    #[test]
1301    fn filter_stats_format_inline() {
1302        let fs = FilterStats {
1303            raw_chars: 1000,
1304            filtered_chars: 200,
1305            raw_lines: 342,
1306            filtered_lines: 28,
1307            ..Default::default()
1308        };
1309        let line = fs.format_inline("shell");
1310        assert_eq!(line, "[shell] 342 lines \u{2192} 28 lines, 80.0% filtered");
1311    }
1312
1313    #[test]
1314    fn filter_stats_format_inline_zero() {
1315        let fs = FilterStats::default();
1316        let line = fs.format_inline("bash");
1317        assert_eq!(line, "[bash] 0 lines \u{2192} 0 lines, 0.0% filtered");
1318    }
1319
1320    // DynExecutor tests
1321
1322    struct FixedExecutor {
1323        tool_id: &'static str,
1324        output: &'static str,
1325    }
1326
1327    impl ToolExecutor for FixedExecutor {
1328        async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
1329            Ok(Some(ToolOutput {
1330                tool_name: ToolName::new(self.tool_id),
1331                summary: self.output.to_owned(),
1332                blocks_executed: 1,
1333                filter_stats: None,
1334                diff: None,
1335                streamed: false,
1336                terminal_id: None,
1337                locations: None,
1338                raw_response: None,
1339                claim_source: None,
1340            }))
1341        }
1342
1343        fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1344            vec![]
1345        }
1346
1347        async fn execute_tool_call(
1348            &self,
1349            _call: &ToolCall,
1350        ) -> Result<Option<ToolOutput>, ToolError> {
1351            Ok(Some(ToolOutput {
1352                tool_name: ToolName::new(self.tool_id),
1353                summary: self.output.to_owned(),
1354                blocks_executed: 1,
1355                filter_stats: None,
1356                diff: None,
1357                streamed: false,
1358                terminal_id: None,
1359                locations: None,
1360                raw_response: None,
1361                claim_source: None,
1362            }))
1363        }
1364    }
1365
1366    #[tokio::test]
1367    async fn dyn_executor_execute_delegates() {
1368        let inner = std::sync::Arc::new(FixedExecutor {
1369            tool_id: "bash",
1370            output: "hello",
1371        });
1372        let exec = DynExecutor(inner);
1373        let result = exec.execute("```bash\necho hello\n```").await.unwrap();
1374        assert!(result.is_some());
1375        assert_eq!(result.unwrap().summary, "hello");
1376    }
1377
1378    #[tokio::test]
1379    async fn dyn_executor_execute_confirmed_delegates() {
1380        let inner = std::sync::Arc::new(FixedExecutor {
1381            tool_id: "bash",
1382            output: "confirmed",
1383        });
1384        let exec = DynExecutor(inner);
1385        let result = exec.execute_confirmed("...").await.unwrap();
1386        assert!(result.is_some());
1387        assert_eq!(result.unwrap().summary, "confirmed");
1388    }
1389
1390    #[test]
1391    fn dyn_executor_tool_definitions_delegates() {
1392        let inner = std::sync::Arc::new(FixedExecutor {
1393            tool_id: "my_tool",
1394            output: "",
1395        });
1396        let exec = DynExecutor(inner);
1397        // FixedExecutor returns empty definitions; verify delegation occurs without panic.
1398        let defs = exec.tool_definitions();
1399        assert!(defs.is_empty());
1400    }
1401
1402    #[tokio::test]
1403    async fn dyn_executor_execute_tool_call_delegates() {
1404        let inner = std::sync::Arc::new(FixedExecutor {
1405            tool_id: "bash",
1406            output: "tool_call_result",
1407        });
1408        let exec = DynExecutor(inner);
1409        let call = ToolCall {
1410            tool_id: ToolName::new("bash"),
1411            params: serde_json::Map::new(),
1412            caller_id: None,
1413            context: None,
1414
1415            tool_call_id: String::new(),
1416        };
1417        let result = exec.execute_tool_call(&call).await.unwrap();
1418        assert!(result.is_some());
1419        assert_eq!(result.unwrap().summary, "tool_call_result");
1420    }
1421
1422    #[test]
1423    fn dyn_executor_set_effective_trust_delegates() {
1424        use std::sync::atomic::{AtomicU8, Ordering};
1425
1426        struct TrustCapture(AtomicU8);
1427        impl ToolExecutor for TrustCapture {
1428            async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1429                Ok(None)
1430            }
1431            fn set_effective_trust(&self, level: crate::SkillTrustLevel) {
1432                // encode: Trusted=0, Verified=1, Quarantined=2, Blocked=3
1433                let v = match level {
1434                    crate::SkillTrustLevel::Trusted => 0u8,
1435                    crate::SkillTrustLevel::Verified => 1,
1436                    crate::SkillTrustLevel::Quarantined => 2,
1437                    _ => 3,
1438                };
1439                self.0.store(v, Ordering::Relaxed);
1440            }
1441        }
1442
1443        let inner = std::sync::Arc::new(TrustCapture(AtomicU8::new(0)));
1444        let exec =
1445            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1446        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Quarantined);
1447        assert_eq!(inner.0.load(Ordering::Relaxed), 2);
1448
1449        ToolExecutor::set_effective_trust(&exec, crate::SkillTrustLevel::Blocked);
1450        assert_eq!(inner.0.load(Ordering::Relaxed), 3);
1451    }
1452
1453    #[test]
1454    fn extract_fenced_blocks_no_prefix_match() {
1455        // ```bashrc must NOT match when searching for "bash"
1456        assert!(extract_fenced_blocks("```bashrc\nfoo\n```", "bash").is_empty());
1457        // exact match
1458        assert_eq!(
1459            extract_fenced_blocks("```bash\nfoo\n```", "bash"),
1460            vec!["foo"]
1461        );
1462        // trailing space is fine
1463        assert_eq!(
1464            extract_fenced_blocks("```bash \nfoo\n```", "bash"),
1465            vec!["foo"]
1466        );
1467    }
1468
1469    // ── ToolError::category() delegation tests ────────────────────────────────
1470
1471    #[test]
1472    fn tool_error_http_400_category_is_invalid_parameters() {
1473        use crate::error_taxonomy::ToolErrorCategory;
1474        let err = ToolError::Http {
1475            status: 400,
1476            message: "bad request".to_owned(),
1477        };
1478        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1479    }
1480
1481    #[test]
1482    fn tool_error_http_401_category_is_policy_blocked() {
1483        use crate::error_taxonomy::ToolErrorCategory;
1484        let err = ToolError::Http {
1485            status: 401,
1486            message: "unauthorized".to_owned(),
1487        };
1488        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1489    }
1490
1491    #[test]
1492    fn tool_error_http_403_category_is_policy_blocked() {
1493        use crate::error_taxonomy::ToolErrorCategory;
1494        let err = ToolError::Http {
1495            status: 403,
1496            message: "forbidden".to_owned(),
1497        };
1498        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1499    }
1500
1501    #[test]
1502    fn tool_error_http_404_category_is_permanent_failure() {
1503        use crate::error_taxonomy::ToolErrorCategory;
1504        let err = ToolError::Http {
1505            status: 404,
1506            message: "not found".to_owned(),
1507        };
1508        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1509    }
1510
1511    #[test]
1512    fn tool_error_http_429_category_is_rate_limited() {
1513        use crate::error_taxonomy::ToolErrorCategory;
1514        let err = ToolError::Http {
1515            status: 429,
1516            message: "too many requests".to_owned(),
1517        };
1518        assert_eq!(err.category(), ToolErrorCategory::RateLimited);
1519    }
1520
1521    #[test]
1522    fn tool_error_http_500_category_is_server_error() {
1523        use crate::error_taxonomy::ToolErrorCategory;
1524        let err = ToolError::Http {
1525            status: 500,
1526            message: "internal server error".to_owned(),
1527        };
1528        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1529    }
1530
1531    #[test]
1532    fn tool_error_http_502_category_is_server_error() {
1533        use crate::error_taxonomy::ToolErrorCategory;
1534        let err = ToolError::Http {
1535            status: 502,
1536            message: "bad gateway".to_owned(),
1537        };
1538        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1539    }
1540
1541    #[test]
1542    fn tool_error_http_503_category_is_server_error() {
1543        use crate::error_taxonomy::ToolErrorCategory;
1544        let err = ToolError::Http {
1545            status: 503,
1546            message: "service unavailable".to_owned(),
1547        };
1548        assert_eq!(err.category(), ToolErrorCategory::ServerError);
1549    }
1550
1551    #[test]
1552    fn tool_error_http_503_is_transient_triggers_phase2_retry() {
1553        // Phase 2 retry fires when err.kind() == ErrorKind::Transient.
1554        // Verify the full chain: Http{503} -> ServerError -> is_retryable() -> Transient.
1555        let err = ToolError::Http {
1556            status: 503,
1557            message: "service unavailable".to_owned(),
1558        };
1559        assert_eq!(
1560            err.kind(),
1561            ErrorKind::Transient,
1562            "HTTP 503 must be Transient so Phase 2 retry fires"
1563        );
1564    }
1565
1566    #[test]
1567    fn tool_error_blocked_category_is_policy_blocked() {
1568        use crate::error_taxonomy::ToolErrorCategory;
1569        let err = ToolError::Blocked {
1570            command: "rm -rf /".to_owned(),
1571        };
1572        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1573    }
1574
1575    #[test]
1576    fn tool_error_sandbox_violation_category_is_policy_blocked() {
1577        use crate::error_taxonomy::ToolErrorCategory;
1578        let err = ToolError::SandboxViolation {
1579            path: "/etc/shadow".to_owned(),
1580        };
1581        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1582    }
1583
1584    #[test]
1585    fn tool_error_confirmation_required_category() {
1586        use crate::error_taxonomy::ToolErrorCategory;
1587        let err = ToolError::ConfirmationRequired {
1588            command: "rm /tmp/x".to_owned(),
1589        };
1590        assert_eq!(err.category(), ToolErrorCategory::ConfirmationRequired);
1591    }
1592
1593    #[test]
1594    fn tool_error_timeout_category() {
1595        use crate::error_taxonomy::ToolErrorCategory;
1596        let err = ToolError::Timeout { timeout_secs: 30 };
1597        assert_eq!(err.category(), ToolErrorCategory::Timeout);
1598    }
1599
1600    #[test]
1601    fn tool_error_cancelled_category() {
1602        use crate::error_taxonomy::ToolErrorCategory;
1603        assert_eq!(
1604            ToolError::Cancelled.category(),
1605            ToolErrorCategory::Cancelled
1606        );
1607    }
1608
1609    #[test]
1610    fn tool_error_invalid_params_category() {
1611        use crate::error_taxonomy::ToolErrorCategory;
1612        let err = ToolError::InvalidParams {
1613            message: "missing field".to_owned(),
1614        };
1615        assert_eq!(err.category(), ToolErrorCategory::InvalidParameters);
1616    }
1617
1618    // B2 regression: Execution(NotFound) must NOT produce ToolNotFound.
1619    #[test]
1620    fn tool_error_execution_not_found_category_is_permanent_failure() {
1621        use crate::error_taxonomy::ToolErrorCategory;
1622        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash: not found");
1623        let err = ToolError::Execution(io_err);
1624        let cat = err.category();
1625        assert_ne!(
1626            cat,
1627            ToolErrorCategory::ToolNotFound,
1628            "Execution(NotFound) must NOT map to ToolNotFound"
1629        );
1630        assert_eq!(cat, ToolErrorCategory::PermanentFailure);
1631    }
1632
1633    #[test]
1634    fn tool_error_execution_timed_out_category_is_timeout() {
1635        use crate::error_taxonomy::ToolErrorCategory;
1636        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
1637        assert_eq!(
1638            ToolError::Execution(io_err).category(),
1639            ToolErrorCategory::Timeout
1640        );
1641    }
1642
1643    #[test]
1644    fn tool_error_execution_connection_refused_category_is_network_error() {
1645        use crate::error_taxonomy::ToolErrorCategory;
1646        let io_err = std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "refused");
1647        assert_eq!(
1648            ToolError::Execution(io_err).category(),
1649            ToolErrorCategory::NetworkError
1650        );
1651    }
1652
1653    // B4 regression: Http/network/transient categories must NOT be quality failures.
1654    #[test]
1655    fn b4_tool_error_http_429_not_quality_failure() {
1656        let err = ToolError::Http {
1657            status: 429,
1658            message: "rate limited".to_owned(),
1659        };
1660        assert!(
1661            !err.category().is_quality_failure(),
1662            "RateLimited must not be a quality failure"
1663        );
1664    }
1665
1666    #[test]
1667    fn b4_tool_error_http_503_not_quality_failure() {
1668        let err = ToolError::Http {
1669            status: 503,
1670            message: "service unavailable".to_owned(),
1671        };
1672        assert!(
1673            !err.category().is_quality_failure(),
1674            "ServerError must not be a quality failure"
1675        );
1676    }
1677
1678    #[test]
1679    fn b4_tool_error_execution_timed_out_not_quality_failure() {
1680        let io_err = std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout");
1681        assert!(
1682            !ToolError::Execution(io_err).category().is_quality_failure(),
1683            "Timeout must not be a quality failure"
1684        );
1685    }
1686
1687    // ── ToolError::Shell category tests ──────────────────────────────────────
1688
1689    #[test]
1690    fn tool_error_shell_exit126_is_policy_blocked() {
1691        use crate::error_taxonomy::ToolErrorCategory;
1692        let err = ToolError::Shell {
1693            exit_code: 126,
1694            category: ToolErrorCategory::PolicyBlocked,
1695            message: "permission denied".to_owned(),
1696        };
1697        assert_eq!(err.category(), ToolErrorCategory::PolicyBlocked);
1698    }
1699
1700    #[test]
1701    fn tool_error_shell_exit127_is_permanent_failure() {
1702        use crate::error_taxonomy::ToolErrorCategory;
1703        let err = ToolError::Shell {
1704            exit_code: 127,
1705            category: ToolErrorCategory::PermanentFailure,
1706            message: "command not found".to_owned(),
1707        };
1708        assert_eq!(err.category(), ToolErrorCategory::PermanentFailure);
1709        assert!(!err.category().is_retryable());
1710    }
1711
1712    #[test]
1713    fn tool_error_shell_not_quality_failure() {
1714        use crate::error_taxonomy::ToolErrorCategory;
1715        let err = ToolError::Shell {
1716            exit_code: 127,
1717            category: ToolErrorCategory::PermanentFailure,
1718            message: "command not found".to_owned(),
1719        };
1720        // Shell exit errors are not attributable to LLM output quality.
1721        assert!(!err.category().is_quality_failure());
1722    }
1723
1724    // ── requires_confirmation / requires_confirmation_erased tests (#3644) ───
1725
1726    /// Stub implementing only `ToolExecutor` without overriding `requires_confirmation`.
1727    struct StubExecutor;
1728    impl ToolExecutor for StubExecutor {
1729        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1730            Ok(None)
1731        }
1732    }
1733
1734    /// Stub that always signals confirmation is required via `ToolExecutor::requires_confirmation`.
1735    struct ConfirmingExecutor;
1736    impl ToolExecutor for ConfirmingExecutor {
1737        async fn execute(&self, _: &str) -> Result<Option<ToolOutput>, ToolError> {
1738            Ok(None)
1739        }
1740        fn requires_confirmation(&self, _call: &ToolCall) -> bool {
1741            true
1742        }
1743    }
1744
1745    fn dummy_call() -> ToolCall {
1746        ToolCall {
1747            tool_id: ToolName::new("test"),
1748            params: serde_json::Map::new(),
1749            caller_id: None,
1750            context: None,
1751
1752            tool_call_id: String::new(),
1753        }
1754    }
1755
1756    #[test]
1757    fn requires_confirmation_default_is_false_on_tool_executor() {
1758        let exec = StubExecutor;
1759        assert!(
1760            !exec.requires_confirmation(&dummy_call()),
1761            "ToolExecutor default requires_confirmation must be false"
1762        );
1763    }
1764
1765    #[test]
1766    fn requires_confirmation_erased_delegates_to_tool_executor_default() {
1767        // blanket impl routes erased → ToolExecutor::requires_confirmation (= false)
1768        let exec = StubExecutor;
1769        assert!(
1770            !ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1771            "requires_confirmation_erased via blanket impl must return false for stub executor"
1772        );
1773    }
1774
1775    #[test]
1776    fn requires_confirmation_erased_delegates_override() {
1777        // ConfirmingExecutor overrides requires_confirmation → true;
1778        // blanket impl must propagate this.
1779        let exec = ConfirmingExecutor;
1780        assert!(
1781            ErasedToolExecutor::requires_confirmation_erased(&exec, &dummy_call()),
1782            "requires_confirmation_erased must return true when ToolExecutor override returns true"
1783        );
1784    }
1785
1786    #[test]
1787    fn requires_confirmation_erased_default_on_erased_trait_is_true() {
1788        // ErasedToolExecutor's own default (trait method body) returns true.
1789        // We construct a DynExecutor wrapping ConfirmingExecutor and verify via the erased path.
1790        // (We cannot instantiate ErasedToolExecutor directly without a concrete type.)
1791        // Instead verify via a type that only implements ErasedToolExecutor manually:
1792        struct ManualErased;
1793        impl ErasedToolExecutor for ManualErased {
1794            fn execute_erased<'a>(
1795                &'a self,
1796                _response: &'a str,
1797            ) -> std::pin::Pin<
1798                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1799            > {
1800                Box::pin(std::future::ready(Ok(None)))
1801            }
1802            fn execute_confirmed_erased<'a>(
1803                &'a self,
1804                _response: &'a str,
1805            ) -> std::pin::Pin<
1806                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1807            > {
1808                Box::pin(std::future::ready(Ok(None)))
1809            }
1810            fn tool_definitions_erased(&self) -> Vec<crate::registry::ToolDef> {
1811                vec![]
1812            }
1813            fn execute_tool_call_erased<'a>(
1814                &'a self,
1815                _call: &'a ToolCall,
1816            ) -> std::pin::Pin<
1817                Box<dyn Future<Output = Result<Option<ToolOutput>, ToolError>> + Send + 'a>,
1818            > {
1819                Box::pin(std::future::ready(Ok(None)))
1820            }
1821            fn is_tool_retryable_erased(&self, _tool_id: &str) -> bool {
1822                false
1823            }
1824            // requires_confirmation_erased NOT overridden → trait default returns true
1825        }
1826        let exec = ManualErased;
1827        assert!(
1828            exec.requires_confirmation_erased(&dummy_call()),
1829            "ErasedToolExecutor trait-level default for requires_confirmation_erased must be true"
1830        );
1831    }
1832
1833    // ── DynExecutor::requires_confirmation delegation tests (#3650) ──────────
1834
1835    #[test]
1836    fn dyn_executor_requires_confirmation_delegates() {
1837        let inner = std::sync::Arc::new(ConfirmingExecutor);
1838        let exec =
1839            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1840        assert!(
1841            ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1842            "DynExecutor must delegate requires_confirmation to inner executor"
1843        );
1844    }
1845
1846    #[test]
1847    fn dyn_executor_requires_confirmation_default_false() {
1848        let inner = std::sync::Arc::new(StubExecutor);
1849        let exec =
1850            DynExecutor(std::sync::Arc::clone(&inner) as std::sync::Arc<dyn ErasedToolExecutor>);
1851        assert!(
1852            !ToolExecutor::requires_confirmation(&exec, &dummy_call()),
1853            "DynExecutor must return false when inner executor does not require confirmation"
1854        );
1855    }
1856}
zeph_tools/executor.rs

zeph_tools/
executor.rs