zeph_tools/shell/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Shell executor that parses and runs bash blocks from LLM responses.
5//!
6//! [`ShellExecutor`] is the primary tool backend for Zeph. It handles both legacy
7//! fenced bash blocks and structured `bash` tool calls. Security controls enforced
8//! before every command:
9//!
10//! - **Blocklist** — commands matching any entry in `blocked_commands` (or the built-in
11//!   [`DEFAULT_BLOCKED_COMMANDS`]) are rejected with [`ToolError::Blocked`].
12//! - **Subshell metacharacters** — `$(`, `` ` ``, `<(`, and `>(` are always blocked
13//!   because nested evaluation cannot be safely analysed statically.
14//! - **Path sandbox** — the working directory and any file arguments must reside under
15//!   the configured `allowed_paths`.
16//! - **Confirmation gate** — commands matching `confirm_patterns` are held for user
17//!   approval before execution (bypassed by `execute_confirmed`).
18//! - **Environment blocklist** — variables in `env_blocklist` are stripped from the
19//!   subprocess environment before launch.
20//! - **Transactional rollback** — when enabled, file snapshots are taken before execution
21//!   and restored on failure or on non-zero exit codes in `auto_rollback_exit_codes`.
22
23use std::collections::HashMap;
24use std::path::PathBuf;
25use std::sync::Arc;
26use std::sync::atomic::AtomicBool;
27use std::time::{Duration, Instant};
28
29use tokio::process::Command;
30use tokio_util::sync::CancellationToken;
31
32use schemars::JsonSchema;
33use serde::Deserialize;
34
35use arc_swap::ArcSwap;
36use parking_lot::{Mutex, RwLock};
37
38use zeph_common::ToolName;
39
40use crate::audit::{AuditEntry, AuditLogger, AuditResult, chrono_now};
41use crate::config::ShellConfig;
42use crate::execution_context::ExecutionContext;
43use crate::executor::{
44    ClaimSource, FilterStats, ToolCall, ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput,
45};
46use crate::filter::{OutputFilterRegistry, sanitize_output};
47use crate::permissions::{PermissionAction, PermissionPolicy};
48use crate::sandbox::{Sandbox, SandboxPolicy};
49
50pub mod background;
51pub use background::BackgroundRunSnapshot;
52use background::{BackgroundCompletion, BackgroundHandle, RunId};
53
54pub mod deobfuscate;
55pub use deobfuscate::deobfuscate as deobfuscate_command;
56
57pub mod safe_fix;
58pub use safe_fix::SafeFixSuggestion;
59
60mod transaction;
61use transaction::{TransactionSnapshot, affected_paths, build_scope_matchers, is_write_command};
62
63use crate::risk_chain::RiskChainAccumulator;
64
65const DEFAULT_BLOCKED: &[&str] = &[
66    "rm -rf /", "sudo", "mkfs", "dd if=", "curl", "wget", "nc ", "ncat", "netcat", "shutdown",
67    "reboot", "halt",
68];
69
70/// Returns `true` if `cmd` is an `rm` invocation with both recursive and force flags
71/// that targets `.git/worktrees`, regardless of flag ordering or bundling style.
72///
73/// Blocks variants like `-rf`, `-fr`, `-rfd`, `-rfv`, `--recursive --force`, etc.
74/// A plain `rm -r .git/worktrees` (no force) is intentionally allowed.
75///
76/// # Examples
77///
78/// ```
79/// use zeph_tools::shell::is_blocked_rm_worktrees;
80/// assert!(is_blocked_rm_worktrees("rm -rf .git/worktrees"));
81/// assert!(is_blocked_rm_worktrees("rm -fr .git/worktrees"));
82/// assert!(is_blocked_rm_worktrees("rm -rfd .git/worktrees"));
83/// assert!(is_blocked_rm_worktrees("rm --recursive --force .git/worktrees"));
84/// assert!(!is_blocked_rm_worktrees("rm -r .git/worktrees")); // no force
85/// assert!(!is_blocked_rm_worktrees("rm -rf /tmp/other")); // no worktrees path
86/// ```
87#[must_use]
88pub fn is_blocked_rm_worktrees(cmd: &str) -> bool {
89    let lower = cmd.to_lowercase();
90    let tokens: Vec<&str> = lower.split_whitespace().collect();
91
92    // First token must be `rm` (or path-qualified, e.g. `/usr/bin/rm`).
93    let Some(first) = tokens.first() else {
94        return false;
95    };
96    if first.rsplit('/').next().unwrap_or(first) != "rm" {
97        return false;
98    }
99
100    if !lower.contains(".git/worktrees") {
101        return false;
102    }
103
104    let mut has_recursive = false;
105    let mut has_force = false;
106
107    for token in &tokens[1..] {
108        if *token == "--recursive" {
109            has_recursive = true;
110        } else if *token == "--force" {
111            has_force = true;
112        } else if let Some(flags) = token.strip_prefix('-').filter(|f| !f.starts_with('-')) {
113            // Short flag bundle like `-rfd` or `-fr`.
114            if flags.contains('r') || flags.contains('R') {
115                has_recursive = true;
116            }
117            if flags.contains('f') {
118                has_force = true;
119            }
120        }
121    }
122
123    has_recursive && has_force
124}
125
126/// Graceful period between SIGTERM and SIGKILL during process escalation.
127#[cfg(unix)]
128const GRACEFUL_TERM_MS: Duration = Duration::from_millis(250);
129
130/// The default list of blocked command patterns used by [`ShellExecutor`].
131///
132/// Includes highly destructive commands (`rm -rf /`, `mkfs`, `dd if=`), privilege
133/// escalation (`sudo`), and network egress tools (`curl`, `wget`, `nc`, `netcat`).
134/// Network commands can be re-enabled via [`ShellConfig::allow_network`].
135///
136/// `rm` commands targeting `.git/worktrees` with recursive+force flags are blocked
137/// semantically via [`is_blocked_rm_worktrees`] regardless of flag ordering or bundling,
138/// so they do not appear as literal entries in this list.
139///
140/// Exposed so other executors (e.g. `AcpShellExecutor`) can reuse the same
141/// blocklist without duplicating it.
142pub const DEFAULT_BLOCKED_COMMANDS: &[&str] = DEFAULT_BLOCKED;
143
144/// Shell interpreters that may execute arbitrary code via `-c` or positional args.
145///
146/// When [`check_blocklist`] receives a command whose binary matches one of these
147/// names, the `-c <script>` argument is extracted and checked against the blocklist
148/// instead of the binary name.
149pub const SHELL_INTERPRETERS: &[&str] =
150    &["bash", "sh", "zsh", "fish", "dash", "ksh", "csh", "tcsh"];
151
152/// Subshell metacharacters that could embed a blocked command inside a benign wrapper.
153/// Commands containing these sequences are rejected outright because safe static
154/// analysis of nested shell evaluation is not feasible.
155const SUBSHELL_METACHARS: &[&str] = &["$(", "`", "<(", ">("];
156
157/// Check if `command` matches any pattern in `blocklist`.
158///
159/// Returns the matched pattern string if the command is blocked, `None` otherwise.
160/// The check is case-insensitive and handles common shell escape sequences.
161///
162/// Commands containing subshell metacharacters (`$(` or `` ` ``) are always
163/// blocked because nested evaluation cannot be safely analysed statically.
164#[must_use]
165pub fn check_blocklist(command: &str, blocklist: &[String]) -> Option<String> {
166    let lower = command.to_lowercase();
167    // Reject commands that embed subshell constructs to prevent blocklist bypass.
168    for meta in SUBSHELL_METACHARS {
169        if lower.contains(meta) {
170            return Some((*meta).to_owned());
171        }
172    }
173    let cleaned = strip_shell_escapes(&lower);
174    let commands = tokenize_commands(&cleaned);
175    for cmd_tokens in &commands {
176        let joined = cmd_tokens.join(" ");
177        if is_blocked_rm_worktrees(&joined) {
178            return Some("rm --recursive --force .git/worktrees".to_owned());
179        }
180    }
181    for blocked in blocklist {
182        for cmd_tokens in &commands {
183            if tokens_match_pattern(cmd_tokens, blocked) {
184                return Some(blocked.clone());
185            }
186        }
187    }
188    None
189}
190
191/// Build the effective command string for blocklist evaluation when the binary is a
192/// shell interpreter (bash, sh, zsh, etc.) and args contains a `-c` script.
193///
194/// Returns `None` if the args do not follow the `-c <script>` pattern.
195#[must_use]
196pub fn effective_shell_command<'a>(binary: &str, args: &'a [String]) -> Option<&'a str> {
197    let base = binary.rsplit('/').next().unwrap_or(binary);
198    if !SHELL_INTERPRETERS.contains(&base) {
199        return None;
200    }
201    // Find "-c" and return the next element as the script to check.
202    let pos = args.iter().position(|a| a == "-c")?;
203    args.get(pos + 1).map(String::as_str)
204}
205
206const NETWORK_COMMANDS: &[&str] = &["curl", "wget", "nc ", "ncat", "netcat"];
207
208/// Effective command-restriction policy held inside a `ShellExecutor`.
209///
210/// Swapped atomically on hot-reload via [`ShellPolicyHandle`].
211#[derive(Debug)]
212pub(crate) struct ShellPolicy {
213    pub(crate) blocked_commands: Vec<String>,
214}
215
216/// Clonable handle for live policy rebuilds on hot-reload.
217///
218/// Obtained from [`ShellExecutor::policy_handle`] at construction time and stored
219/// on the agent. Call [`ShellPolicyHandle::rebuild`] to atomically replace the
220/// effective `blocked_commands` list without recreating the executor. Reads on
221/// the dispatch path are lock-free via `ArcSwap::load_full`.
222#[derive(Clone, Debug)]
223pub struct ShellPolicyHandle {
224    inner: Arc<ArcSwap<ShellPolicy>>,
225}
226
227impl ShellPolicyHandle {
228    /// Atomically install a new effective blocklist derived from `config`.
229    ///
230    /// # Rebuild contract
231    ///
232    /// `config` must be the **already-overlay-merged** `ShellConfig` (i.e. the
233    /// value produced by `load_config_with_overlay`). Plugin contributions are
234    /// already present in `config.blocked_commands` at this point; this method
235    /// does NOT re-apply overlays.
236    pub fn rebuild(&self, config: &crate::config::ShellConfig) {
237        let policy = Arc::new(ShellPolicy {
238            blocked_commands: compute_blocked_commands(config),
239        });
240        self.inner.store(policy);
241    }
242
243    /// Snapshot of the current effective blocklist.
244    #[must_use]
245    pub fn snapshot_blocked(&self) -> Vec<String> {
246        self.inner.load().blocked_commands.clone()
247    }
248}
249
250/// Compute the effective blocklist from an already-overlay-merged `ShellConfig`.
251///
252/// Invariant: identical to the logic in `ShellExecutor::new`.
253pub(crate) fn compute_blocked_commands(config: &crate::config::ShellConfig) -> Vec<String> {
254    let allowed: Vec<String> = config
255        .allowed_commands
256        .iter()
257        .map(|s| s.to_lowercase())
258        .collect();
259    let mut blocked: Vec<String> = DEFAULT_BLOCKED
260        .iter()
261        .filter(|s| !allowed.contains(&s.to_lowercase()))
262        .map(|s| (*s).to_owned())
263        .collect();
264    blocked.extend(config.blocked_commands.iter().map(|s| s.to_lowercase()));
265    if !config.allow_network {
266        for cmd in NETWORK_COMMANDS {
267            let lower = cmd.to_lowercase();
268            if !blocked.contains(&lower) {
269                blocked.push(lower);
270            }
271        }
272    }
273    blocked.sort();
274    blocked.dedup();
275    blocked
276}
277
278#[derive(Deserialize, JsonSchema)]
279pub(crate) struct BashParams {
280    /// The bash command to execute.
281    command: String,
282    /// When `true`, spawn the command in the background and return immediately.
283    ///
284    /// The agent receives a `run_id` in the synchronous tool result. When the
285    /// command finishes, a synthetic user-role message is injected at the start
286    /// of the next turn carrying the exit code and output.
287    #[serde(default)]
288    background: bool,
289}
290
291/// Bash block extraction and execution via `tokio::process::Command`.
292///
293/// Parses ` ```bash ` fenced blocks from LLM responses (legacy path) and handles
294/// structured `bash` tool calls (modern path). Use [`ShellExecutor::new`] with a
295/// [`ShellConfig`] and chain optional builder methods to attach audit logging,
296/// event streaming, permission policies, and cancellation.
297///
298/// # Example
299///
300/// ```rust,no_run
301/// use zeph_tools::{ShellExecutor, ToolExecutor, ShellConfig};
302///
303/// # async fn example() {
304/// let executor = ShellExecutor::new(&ShellConfig::default());
305///
306/// // Execute a fenced bash block.
307/// let response = "```bash\npwd\n```";
308/// if let Ok(Some(output)) = executor.execute(response).await {
309///     println!("{}", output.summary);
310/// }
311/// # }
312/// ```
313#[derive(Debug)]
314pub struct ShellExecutor {
315    timeout: Duration,
316    policy: Arc<ArcSwap<ShellPolicy>>,
317    confirm_patterns: Vec<String>,
318    env_blocklist: Vec<String>,
319    audit_logger: Option<Arc<AuditLogger>>,
320    tool_event_tx: Option<ToolEventTx>,
321    permission_policy: Option<PermissionPolicy>,
322    output_filter_registry: Option<OutputFilterRegistry>,
323    cancel_token: Option<CancellationToken>,
324    skill_env: RwLock<Option<std::collections::HashMap<String, String>>>,
325    transactional: bool,
326    auto_rollback: bool,
327    auto_rollback_exit_codes: Vec<i32>,
328    snapshot_required: bool,
329    max_snapshot_bytes: u64,
330    transaction_scope_matchers: Vec<globset::GlobMatcher>,
331    sandbox: Option<Arc<dyn Sandbox>>,
332    sandbox_policy: Option<SandboxPolicy>,
333    /// Registry of in-flight background runs. Bounded by `max_background_runs`.
334    background_runs: Arc<Mutex<HashMap<RunId, BackgroundHandle>>>,
335    /// Maximum number of concurrent background runs.
336    max_background_runs: usize,
337    /// Timeout applied to each background run.
338    background_timeout: Duration,
339    /// Set to `true` during shutdown to prevent new background spawns.
340    shutting_down: Arc<AtomicBool>,
341    /// Dedicated sender used to forward [`BackgroundCompletion`]s to the agent
342    /// (bypasses the UI-facing [`ToolEventTx`] channel). `None` when the agent
343    /// has not wired a background completion receiver.
344    background_completion_tx: Option<tokio::sync::mpsc::Sender<BackgroundCompletion>>,
345    /// Named execution environment registry built from `[execution]` config.
346    /// Keys are case-sensitive environment names; values are trusted `ExecutionContext`s.
347    environments: Arc<HashMap<String, ExecutionContext>>,
348    /// Pre-canonicalized `allowed_paths`. Built once at construction to avoid TOCTOU
349    /// between the canonicalize call and the prefix check at `resolve_context` time.
350    allowed_paths_canonical: Vec<PathBuf>,
351    /// Optional default environment name (from `[execution] default_env`).
352    default_env: Option<String>,
353    /// Optional per-turn risk chain accumulator for multi-step attack detection.
354    risk_chain: Option<Arc<RiskChainAccumulator>>,
355    /// Cumulative score threshold above which the risk chain blocks execution.
356    risk_chain_threshold: f32,
357}
358
359/// Fully resolved execution context for a single shell invocation.
360///
361/// Produced by [`ShellExecutor::resolve_context`] and passed to the inner execute
362/// functions. The canonical `cwd` is what `cmd.current_dir` receives — identical to
363/// the path that was validated against `allowed_paths`.
364#[derive(Debug)]
365pub(crate) struct ResolvedContext {
366    /// Canonical absolute working directory (follows all symlinks).
367    pub(crate) cwd: PathBuf,
368    /// Final merged environment (post-blocklist filter).
369    pub(crate) env: HashMap<String, String>,
370    /// Resolved environment name, for logs and audit entries.
371    pub(crate) name: Option<String>,
372    /// Whether the context originated from a trusted source (operator TOML).
373    /// Reserved for future audit log enrichment.
374    #[allow(dead_code)]
375    pub(crate) trusted: bool,
376}
377
378impl ShellExecutor {
379    /// Create a new `ShellExecutor` from configuration.
380    ///
381    /// Merges the built-in [`DEFAULT_BLOCKED_COMMANDS`] with any additional blocked
382    /// commands from `config`, then subtracts any explicitly allowed commands.
383    /// No subprocess is spawned at construction time.
384    #[must_use]
385    pub fn new(config: &ShellConfig) -> Self {
386        let policy = Arc::new(ArcSwap::from_pointee(ShellPolicy {
387            blocked_commands: compute_blocked_commands(config),
388        }));
389
390        let allowed_paths: Vec<PathBuf> = if config.allowed_paths.is_empty() {
391            vec![std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))]
392        } else {
393            config.allowed_paths.iter().map(PathBuf::from).collect()
394        };
395        let allowed_paths_canonical: Vec<PathBuf> = allowed_paths
396            .iter()
397            .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()))
398            .collect();
399
400        Self {
401            timeout: Duration::from_secs(config.timeout),
402            policy,
403            confirm_patterns: config.confirm_patterns.clone(),
404            env_blocklist: config.env_blocklist.clone(),
405            audit_logger: None,
406            tool_event_tx: None,
407            permission_policy: None,
408            output_filter_registry: None,
409            cancel_token: None,
410            skill_env: RwLock::new(None),
411            transactional: config.transactional,
412            auto_rollback: config.auto_rollback,
413            auto_rollback_exit_codes: config.auto_rollback_exit_codes.clone(),
414            snapshot_required: config.snapshot_required,
415            max_snapshot_bytes: config.max_snapshot_bytes,
416            transaction_scope_matchers: build_scope_matchers(&config.transaction_scope),
417            sandbox: None,
418            sandbox_policy: None,
419            background_runs: Arc::new(Mutex::new(HashMap::new())),
420            max_background_runs: config.max_background_runs,
421            background_timeout: Duration::from_secs(config.background_timeout_secs),
422            shutting_down: Arc::new(AtomicBool::new(false)),
423            background_completion_tx: None,
424            environments: Arc::new(HashMap::new()),
425            allowed_paths_canonical,
426            default_env: None,
427            risk_chain: None,
428            risk_chain_threshold: config.risk_chain_threshold.unwrap_or(0.7),
429        }
430    }
431
432    /// Attach an OS-level sandbox backend and a pre-snapshotted policy.
433    ///
434    /// The policy is snapshotted at construction and never re-resolved per call (no TOCTOU).
435    /// If a different policy is needed, create a new `ShellExecutor` via the builder chain.
436    #[must_use]
437    pub fn with_sandbox(mut self, sandbox: Arc<dyn Sandbox>, policy: SandboxPolicy) -> Self {
438        self.sandbox = Some(sandbox);
439        self.sandbox_policy = Some(policy);
440        self
441    }
442
443    /// Attach a per-turn risk chain accumulator for multi-step attack detection.
444    ///
445    /// When set, each command is recorded into the accumulator. If the cumulative
446    /// risk score exceeds `threshold`, the command is blocked before execution.
447    #[must_use]
448    pub fn with_risk_chain(mut self, accumulator: Arc<RiskChainAccumulator>) -> Self {
449        self.risk_chain = Some(accumulator);
450        self
451    }
452
453    /// Build the environment registry from `[execution]` config and wire it in one step.
454    ///
455    /// Convenience wrapper for agent startup. Converts [`zeph_config::ExecutionConfig`]
456    /// entries into trusted [`ExecutionContext`] instances and passes them to
457    /// [`Self::with_environments`].
458    ///
459    /// # Errors
460    ///
461    /// Returns an error string when any registry entry's `cwd` cannot be canonicalized
462    /// or escapes `allowed_paths`.
463    pub fn with_execution_config(
464        self,
465        config: &zeph_config::ExecutionConfig,
466    ) -> Result<Self, String> {
467        let registry: HashMap<String, ExecutionContext> = config
468            .environments
469            .iter()
470            .map(|e| {
471                let ctx = ExecutionContext::trusted_from_parts(
472                    Some(e.name.clone()),
473                    Some(std::path::PathBuf::from(&e.cwd)),
474                    e.env.clone(),
475                );
476                (e.name.clone(), ctx)
477            })
478            .collect();
479        self.with_environments(registry, config.default_env.clone())
480    }
481
482    /// Wire the named execution environment registry from `[execution]` config.
483    ///
484    /// Builds trusted [`ExecutionContext`] instances from the operator-authored TOML
485    /// entries and canonicalizes their `cwd` paths at construction time.
486    ///
487    /// # Errors
488    ///
489    /// Returns an error string (surfaced at agent startup) when a registry entry's
490    /// `cwd` path does not exist, cannot be canonicalized, or escapes `allowed_paths`.
491    pub fn with_environments(
492        mut self,
493        environments: HashMap<String, ExecutionContext>,
494        default_env: Option<String>,
495    ) -> Result<Self, String> {
496        // Validate that all registered cwds exist and are under allowed_paths.
497        for (name, ctx) in &environments {
498            if let Some(cwd) = ctx.cwd() {
499                let canonical = cwd.canonicalize().map_err(|e| {
500                    format!(
501                        "execution environment '{name}': cwd '{}' cannot be canonicalized: {e}",
502                        cwd.display()
503                    )
504                })?;
505                if !self
506                    .allowed_paths_canonical
507                    .iter()
508                    .any(|p| canonical.starts_with(p))
509                {
510                    return Err(format!(
511                        "execution environment '{name}': cwd '{}' is outside allowed_paths",
512                        cwd.display()
513                    ));
514                }
515            }
516        }
517        self.environments = Arc::new(environments);
518        self.default_env = default_env;
519        Ok(self)
520    }
521
522    /// Set environment variables to inject when executing the active skill's bash blocks.
523    pub fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
524        *self.skill_env.write() = env;
525    }
526
527    /// Attach an audit logger. Each shell invocation will emit an [`AuditEntry`].
528    #[must_use]
529    pub fn with_audit(mut self, logger: Arc<AuditLogger>) -> Self {
530        self.audit_logger = Some(logger);
531        self
532    }
533
534    /// Attach a tool-event sender for streaming output to the TUI or channel adapter.
535    ///
536    /// When set, [`ToolEvent::Started`], [`ToolEvent::OutputChunk`], and
537    /// [`ToolEvent::Completed`] events are sent on `tx` during execution.
538    #[must_use]
539    pub fn with_tool_event_tx(mut self, tx: ToolEventTx) -> Self {
540        self.tool_event_tx = Some(tx);
541        self
542    }
543
544    /// Attach a dedicated sender for routing [`BackgroundCompletion`] payloads to the agent.
545    ///
546    /// This channel is separate from [`ToolEventTx`] (which goes to the TUI). The agent holds
547    /// the receiver end and drains it at the start of each turn to inject deferred completions
548    /// into the message history as a single merged user-role block.
549    #[must_use]
550    pub fn with_background_completion_tx(
551        mut self,
552        tx: tokio::sync::mpsc::Sender<BackgroundCompletion>,
553    ) -> Self {
554        self.background_completion_tx = Some(tx);
555        self
556    }
557
558    /// Attach a permission policy for confirmation-gate enforcement.
559    ///
560    /// Commands matching the policy's rules may require user approval before
561    /// execution proceeds.
562    #[must_use]
563    pub fn with_permissions(mut self, policy: PermissionPolicy) -> Self {
564        self.permission_policy = Some(policy);
565        self
566    }
567
568    /// Attach a cancellation token. When the token is cancelled, the running subprocess
569    /// is killed and the executor returns [`ToolError::Cancelled`].
570    #[must_use]
571    pub fn with_cancel_token(mut self, token: CancellationToken) -> Self {
572        self.cancel_token = Some(token);
573        self
574    }
575
576    /// Attach an output filter registry. Filters are applied to stdout+stderr before
577    /// the summary is stored in [`ToolOutput`] and sent to the LLM.
578    #[must_use]
579    pub fn with_output_filters(mut self, registry: OutputFilterRegistry) -> Self {
580        self.output_filter_registry = Some(registry);
581        self
582    }
583
584    /// Snapshot all in-flight background runs.
585    ///
586    /// Acquires the lock once, maps each [`BackgroundHandle`] to a
587    /// [`BackgroundRunSnapshot`], then drops the guard before returning.
588    /// Safe to call from any thread.
589    #[must_use]
590    pub fn background_runs_snapshot(&self) -> Vec<background::BackgroundRunSnapshot> {
591        let runs = self.background_runs.lock();
592        runs.iter()
593            .map(|(id, h)| {
594                #[allow(clippy::cast_possible_truncation)]
595                let elapsed_ms = h.elapsed().as_millis() as u64;
596                background::BackgroundRunSnapshot {
597                    run_id: id.to_string(),
598                    command: h.command.clone(),
599                    elapsed_ms,
600                }
601            })
602            .collect()
603    }
604
605    /// Return a clonable handle for live policy rebuilds on hot-reload.
606    ///
607    /// Clone the handle out at construction time and store it on the agent.
608    /// Calling [`ShellPolicyHandle::rebuild`] atomically swaps the effective
609    /// `blocked_commands` without recreating the executor.
610    #[must_use]
611    pub fn policy_handle(&self) -> ShellPolicyHandle {
612        ShellPolicyHandle {
613            inner: Arc::clone(&self.policy),
614        }
615    }
616
617    /// Execute a bash block bypassing the confirmation check (called after user confirms).
618    ///
619    /// # Errors
620    ///
621    /// Returns `ToolError` on blocked commands, sandbox violations, or execution failures.
622    #[cfg_attr(
623        feature = "profiling",
624        tracing::instrument(name = "tool.shell", skip_all, fields(exit_code = tracing::field::Empty, duration_ms = tracing::field::Empty))
625    )]
626    pub async fn execute_confirmed(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
627        self.execute_inner(response, true).await
628    }
629
630    async fn execute_inner(
631        &self,
632        response: &str,
633        skip_confirm: bool,
634    ) -> Result<Option<ToolOutput>, ToolError> {
635        let blocks = extract_bash_blocks(response);
636        if blocks.is_empty() {
637            return Ok(None);
638        }
639
640        // Resolve with no call-site context so legacy path gets the same CWD/env
641        // treatment as the structured-tool-call path (default_env, skill_env, blocklist).
642        let resolved = self.resolve_context(None)?;
643
644        let mut outputs = Vec::with_capacity(blocks.len());
645        let mut cumulative_filter_stats: Option<FilterStats> = None;
646        let mut last_envelope: Option<ShellOutputEnvelope> = None;
647        #[allow(clippy::cast_possible_truncation)]
648        let blocks_executed = blocks.len() as u32;
649
650        for block in &blocks {
651            let (output_line, per_block_stats, envelope) =
652                self.execute_block(block, skip_confirm, &resolved).await?;
653            if let Some(fs) = per_block_stats {
654                let stats = cumulative_filter_stats.get_or_insert_with(FilterStats::default);
655                stats.raw_chars += fs.raw_chars;
656                stats.filtered_chars += fs.filtered_chars;
657                stats.raw_lines += fs.raw_lines;
658                stats.filtered_lines += fs.filtered_lines;
659                stats.confidence = Some(match (stats.confidence, fs.confidence) {
660                    (Some(prev), Some(cur)) => crate::filter::worse_confidence(prev, cur),
661                    (Some(prev), None) => prev,
662                    (None, Some(cur)) => cur,
663                    (None, None) => unreachable!(),
664                });
665                if stats.command.is_none() {
666                    stats.command = fs.command;
667                }
668                if stats.kept_lines.is_empty() && !fs.kept_lines.is_empty() {
669                    stats.kept_lines = fs.kept_lines;
670                }
671            }
672            last_envelope = Some(envelope);
673            outputs.push(output_line);
674        }
675
676        let raw_response = last_envelope
677            .as_ref()
678            .and_then(|e| serde_json::to_value(e).ok());
679
680        Ok(Some(ToolOutput {
681            tool_name: ToolName::new("bash"),
682            summary: outputs.join("\n\n"),
683            blocks_executed,
684            filter_stats: cumulative_filter_stats,
685            diff: None,
686            streamed: self.tool_event_tx.is_some(),
687            terminal_id: None,
688            locations: None,
689            raw_response,
690            claim_source: Some(ClaimSource::Shell),
691        }))
692    }
693
694    async fn execute_block(
695        &self,
696        block: &str,
697        skip_confirm: bool,
698        resolved: &ResolvedContext,
699    ) -> Result<(String, Option<FilterStats>, ShellOutputEnvelope), ToolError> {
700        self.check_permissions(block, skip_confirm).await?;
701        self.validate_sandbox_with_cwd(block, &resolved.cwd)?;
702
703        let (snapshot, snapshot_warning) = self.capture_snapshot_for(block)?;
704
705        if let Some(ref tx) = self.tool_event_tx {
706            let sandbox_profile = self
707                .sandbox_policy
708                .as_ref()
709                .map(|p| format!("{:?}", p.profile));
710            // Non-terminal streaming event: use try_send (drop on full).
711            let _ = tx.try_send(ToolEvent::Started {
712                tool_name: ToolName::new("bash"),
713                command: block.to_owned(),
714                sandbox_profile,
715                resolved_cwd: Some(resolved.cwd.display().to_string()),
716                execution_env: resolved.name.clone(),
717            });
718        }
719
720        let start = Instant::now();
721        let sandbox_pair = self
722            .sandbox
723            .as_ref()
724            .zip(self.sandbox_policy.as_ref())
725            .map(|(sb, pol)| (sb.as_ref(), pol));
726        let (mut envelope, out) = execute_bash_with_context(
727            block,
728            self.timeout,
729            self.tool_event_tx.as_ref(),
730            "",
731            self.cancel_token.as_ref(),
732            resolved,
733            sandbox_pair,
734        )
735        .await;
736        let exit_code = envelope.exit_code;
737        if exit_code == 130
738            && self
739                .cancel_token
740                .as_ref()
741                .is_some_and(CancellationToken::is_cancelled)
742        {
743            return Err(ToolError::Cancelled);
744        }
745        #[allow(clippy::cast_possible_truncation)]
746        let duration_ms = start.elapsed().as_millis() as u64;
747
748        if let Some(snap) = snapshot {
749            self.maybe_rollback(snap, block, exit_code, duration_ms)
750                .await;
751        }
752
753        if let Some(err) = self
754            .classify_and_audit(block, &out, exit_code, duration_ms)
755            .await
756        {
757            self.emit_completed(block, &out, false, None, None).await;
758            return Err(err);
759        }
760
761        let (filtered, per_block_stats) = self.apply_output_filter(block, &out, exit_code);
762
763        self.emit_completed(
764            block,
765            &out,
766            !out.contains("[error]"),
767            per_block_stats.clone(),
768            None,
769        )
770        .await;
771
772        // Mark truncated if output was shortened during filtering.
773        envelope.truncated = filtered.len() < out.len();
774
775        let audit_result = if out.contains("[error]") || out.contains("[stderr]") {
776            AuditResult::Error {
777                message: out.clone(),
778            }
779        } else {
780            AuditResult::Success
781        };
782        self.log_audit_with_context(
783            block,
784            audit_result,
785            duration_ms,
786            None,
787            Some(exit_code),
788            envelope.truncated,
789            resolved,
790        )
791        .await;
792
793        let output_line = match snapshot_warning {
794            Some(warn) => format!("{warn}\n$ {block}\n{filtered}"),
795            None => format!("$ {block}\n{filtered}"),
796        };
797        Ok((output_line, per_block_stats, envelope))
798    }
799
800    /// Execute `command` using a pre-resolved [`ResolvedContext`] (from `resolve_context`).
801    ///
802    /// This is the structured-tool-call path — it uses the resolved CWD and env directly
803    /// instead of re-reading process state on every call.
804    #[tracing::instrument(name = "tool.shell.execute_block", skip(self, resolved), level = "info",
805        fields(cwd = %resolved.cwd.display(), env_name = resolved.name.as_deref().unwrap_or("")))]
806    async fn execute_block_with_context(
807        &self,
808        command: &str,
809        skip_confirm: bool,
810        resolved: &ResolvedContext,
811        tool_call_id: &str,
812    ) -> Result<Option<ToolOutput>, ToolError> {
813        self.check_permissions(command, skip_confirm).await?;
814        self.validate_sandbox_with_cwd(command, &resolved.cwd)?;
815
816        let (snapshot, snapshot_warning) = self.capture_snapshot_for(command)?;
817
818        if let Some(ref tx) = self.tool_event_tx {
819            let sandbox_profile = self
820                .sandbox_policy
821                .as_ref()
822                .map(|p| format!("{:?}", p.profile));
823            let _ = tx.try_send(ToolEvent::Started {
824                tool_name: ToolName::new("bash"),
825                command: command.to_owned(),
826                sandbox_profile,
827                resolved_cwd: Some(resolved.cwd.display().to_string()),
828                execution_env: resolved.name.clone(),
829            });
830        }
831
832        let start = Instant::now();
833        let sandbox_pair = self
834            .sandbox
835            .as_ref()
836            .zip(self.sandbox_policy.as_ref())
837            .map(|(sb, pol)| (sb.as_ref(), pol));
838        let (mut envelope, out) = execute_bash_with_context(
839            command,
840            self.timeout,
841            self.tool_event_tx.as_ref(),
842            tool_call_id,
843            self.cancel_token.as_ref(),
844            resolved,
845            sandbox_pair,
846        )
847        .await;
848        let exit_code = envelope.exit_code;
849        if exit_code == 130
850            && self
851                .cancel_token
852                .as_ref()
853                .is_some_and(CancellationToken::is_cancelled)
854        {
855            return Err(ToolError::Cancelled);
856        }
857        #[allow(clippy::cast_possible_truncation)]
858        let duration_ms = start.elapsed().as_millis() as u64;
859
860        if let Some(snap) = snapshot {
861            self.maybe_rollback(snap, command, exit_code, duration_ms)
862                .await;
863        }
864
865        if let Some(err) = self
866            .classify_and_audit(command, &out, exit_code, duration_ms)
867            .await
868        {
869            self.emit_completed(command, &out, false, None, None).await;
870            return Err(err);
871        }
872
873        let (filtered, per_block_stats) = self.apply_output_filter(command, &out, exit_code);
874
875        self.emit_completed(
876            command,
877            &out,
878            !out.contains("[error]"),
879            per_block_stats.clone(),
880            None,
881        )
882        .await;
883
884        envelope.truncated = filtered.len() < out.len();
885
886        let audit_result = if out.contains("[error]") || out.contains("[stderr]") {
887            AuditResult::Error {
888                message: out.clone(),
889            }
890        } else {
891            AuditResult::Success
892        };
893        self.log_audit_with_context(
894            command,
895            audit_result,
896            duration_ms,
897            None,
898            Some(exit_code),
899            envelope.truncated,
900            resolved,
901        )
902        .await;
903
904        let output_line = match snapshot_warning {
905            Some(warn) => format!("{warn}\n$ {command}\n{filtered}"),
906            None => format!("$ {command}\n{filtered}"),
907        };
908        Ok(Some(ToolOutput {
909            tool_name: ToolName::new("bash"),
910            summary: output_line,
911            blocks_executed: 1,
912            filter_stats: per_block_stats,
913            diff: None,
914            streamed: false,
915            terminal_id: None,
916            locations: None,
917            raw_response: None,
918            claim_source: Some(ClaimSource::Shell),
919        }))
920    }
921
922    fn capture_snapshot_for(
923        &self,
924        block: &str,
925    ) -> Result<(Option<TransactionSnapshot>, Option<String>), ToolError> {
926        if !self.transactional || !is_write_command(block) {
927            return Ok((None, None));
928        }
929        let paths = affected_paths(block, &self.transaction_scope_matchers);
930        if paths.is_empty() {
931            return Ok((None, None));
932        }
933        match TransactionSnapshot::capture(&paths, self.max_snapshot_bytes) {
934            Ok(snap) => {
935                tracing::debug!(
936                    files = snap.file_count(),
937                    bytes = snap.total_bytes(),
938                    "transaction snapshot captured"
939                );
940                Ok((Some(snap), None))
941            }
942            Err(e) if self.snapshot_required => Err(ToolError::SnapshotFailed {
943                reason: e.to_string(),
944            }),
945            Err(e) => {
946                tracing::warn!(err = %e, "transaction snapshot failed, proceeding without rollback");
947                Ok((
948                    None,
949                    Some(format!("[warn] snapshot failed: {e}; rollback unavailable")),
950                ))
951            }
952        }
953    }
954
955    async fn maybe_rollback(
956        &self,
957        snap: TransactionSnapshot,
958        block: &str,
959        exit_code: i32,
960        duration_ms: u64,
961    ) {
962        let should_rollback = self.auto_rollback
963            && if self.auto_rollback_exit_codes.is_empty() {
964                exit_code >= 2
965            } else {
966                self.auto_rollback_exit_codes.contains(&exit_code)
967            };
968        if !should_rollback {
969            // Snapshot dropped here; TempDir auto-cleans.
970            return;
971        }
972        match snap.rollback() {
973            Ok(report) => {
974                tracing::info!(
975                    restored = report.restored_count,
976                    deleted = report.deleted_count,
977                    "transaction rollback completed"
978                );
979                self.log_audit(
980                    block,
981                    AuditResult::Rollback {
982                        restored: report.restored_count,
983                        deleted: report.deleted_count,
984                    },
985                    duration_ms,
986                    None,
987                    Some(exit_code),
988                    false,
989                )
990                .await;
991                if let Some(ref tx) = self.tool_event_tx {
992                    // Terminal event: must deliver. Use send().await.
993                    let _ = tx
994                        .send(ToolEvent::Rollback {
995                            tool_name: ToolName::new("bash"),
996                            command: block.to_owned(),
997                            restored_count: report.restored_count,
998                            deleted_count: report.deleted_count,
999                        })
1000                        .await;
1001                }
1002            }
1003            Err(e) => {
1004                tracing::error!(err = %e, "transaction rollback failed");
1005            }
1006        }
1007    }
1008
1009    async fn classify_and_audit(
1010        &self,
1011        block: &str,
1012        out: &str,
1013        exit_code: i32,
1014        duration_ms: u64,
1015    ) -> Option<ToolError> {
1016        if out.contains("[error] command timed out") {
1017            self.log_audit(
1018                block,
1019                AuditResult::Timeout,
1020                duration_ms,
1021                None,
1022                Some(exit_code),
1023                false,
1024            )
1025            .await;
1026            return Some(ToolError::Timeout {
1027                timeout_secs: self.timeout.as_secs(),
1028            });
1029        }
1030
1031        if let Some(category) = classify_shell_exit(exit_code, out) {
1032            return Some(ToolError::Shell {
1033                exit_code,
1034                category,
1035                message: out.lines().take(3).collect::<Vec<_>>().join("; "),
1036            });
1037        }
1038
1039        None
1040    }
1041
1042    fn apply_output_filter(
1043        &self,
1044        block: &str,
1045        out: &str,
1046        exit_code: i32,
1047    ) -> (String, Option<FilterStats>) {
1048        let sanitized = sanitize_output(out);
1049        if let Some(ref registry) = self.output_filter_registry {
1050            match registry.apply(block, &sanitized, exit_code) {
1051                Some(fr) => {
1052                    tracing::debug!(
1053                        command = block,
1054                        raw = fr.raw_chars,
1055                        filtered = fr.filtered_chars,
1056                        savings_pct = fr.savings_pct(),
1057                        "output filter applied"
1058                    );
1059                    let stats = FilterStats {
1060                        raw_chars: fr.raw_chars,
1061                        filtered_chars: fr.filtered_chars,
1062                        raw_lines: fr.raw_lines,
1063                        filtered_lines: fr.filtered_lines,
1064                        confidence: Some(fr.confidence),
1065                        command: Some(block.to_owned()),
1066                        kept_lines: fr.kept_lines.clone(),
1067                    };
1068                    (fr.output, Some(stats))
1069                }
1070                None => (sanitized, None),
1071            }
1072        } else {
1073            (sanitized, None)
1074        }
1075    }
1076
1077    async fn emit_completed(
1078        &self,
1079        command: &str,
1080        output: &str,
1081        success: bool,
1082        filter_stats: Option<FilterStats>,
1083        run_id: Option<RunId>,
1084    ) {
1085        if let Some(ref tx) = self.tool_event_tx {
1086            // Terminal event: must deliver. Use send().await (never dropped).
1087            let _ = tx
1088                .send(ToolEvent::Completed {
1089                    tool_name: ToolName::new("bash"),
1090                    command: command.to_owned(),
1091                    output: output.to_owned(),
1092                    success,
1093                    filter_stats,
1094                    diff: None,
1095                    run_id,
1096                })
1097                .await;
1098        }
1099    }
1100
1101    /// Check blocklist, permission policy, and confirmation requirements for `block`.
1102    #[allow(clippy::too_many_lines)]
1103    async fn check_permissions(&self, block: &str, skip_confirm: bool) -> Result<(), ToolError> {
1104        // Deobfuscate before any policy check to prevent bypass via encoding tricks.
1105        let normalized = deobfuscate::deobfuscate(block);
1106        let effective = normalized.as_str();
1107
1108        // Always check the blocklist first — it is a hard security boundary
1109        // that must not be bypassed by the PermissionPolicy layer.
1110        // Check both the original block (handles subshell metachar detection) and the
1111        // normalized form (handles hex/octal bypass). First match wins.
1112        let blocked_cmd = self
1113            .find_blocked_command(block)
1114            .or_else(|| self.find_blocked_command(effective));
1115        if let Some(blocked) = blocked_cmd {
1116            let fix = safe_fix::suggest_fix(effective);
1117            let err = if let Some(suggestion) = fix {
1118                let reason = format!("{blocked} — suggestion: {}", suggestion.alternative);
1119                self.log_audit(
1120                    block,
1121                    AuditResult::Blocked {
1122                        reason: format!("blocked command: {reason}"),
1123                    },
1124                    0,
1125                    None,
1126                    None,
1127                    false,
1128                )
1129                .await;
1130                ToolError::BlockedWithFix {
1131                    command: blocked,
1132                    suggestion: Some(suggestion),
1133                }
1134            } else {
1135                self.log_audit(
1136                    block,
1137                    AuditResult::Blocked {
1138                        reason: format!("blocked command: {blocked}"),
1139                    },
1140                    0,
1141                    None,
1142                    None,
1143                    false,
1144                )
1145                .await;
1146                ToolError::Blocked { command: blocked }
1147            };
1148            return Err(err);
1149        }
1150
1151        if let Some(ref policy) = self.permission_policy {
1152            match policy.check("bash", effective) {
1153                PermissionAction::Deny => {
1154                    let err = match safe_fix::suggest_fix(effective) {
1155                        Some(suggestion) => ToolError::BlockedWithFix {
1156                            command: effective.to_owned(),
1157                            suggestion: Some(suggestion),
1158                        },
1159                        None => ToolError::Blocked {
1160                            command: effective.to_owned(),
1161                        },
1162                    };
1163                    self.log_audit(
1164                        block,
1165                        AuditResult::Blocked {
1166                            reason: "denied by permission policy".to_owned(),
1167                        },
1168                        0,
1169                        None,
1170                        None,
1171                        false,
1172                    )
1173                    .await;
1174                    return Err(err);
1175                }
1176                PermissionAction::Ask if !skip_confirm => {
1177                    return Err(ToolError::ConfirmationRequired {
1178                        command: effective.to_owned(),
1179                    });
1180                }
1181                _ => {}
1182            }
1183        } else if !skip_confirm {
1184            // Check original block first (catches subshell metacharacters like `` ` ``),
1185            // then normalized form (catches obfuscated confirmation-required patterns).
1186            let confirm_pattern = self
1187                .find_confirm_command(block)
1188                .or_else(|| self.find_confirm_command(effective));
1189            if let Some(pattern) = confirm_pattern {
1190                return Err(ToolError::ConfirmationRequired {
1191                    command: pattern.to_owned(),
1192                });
1193            }
1194        }
1195
1196        // Risk chain check — record the call and block if threshold exceeded.
1197        if let Some(ref chain) = self.risk_chain {
1198            let verdict = chain.record("bash", effective, self.risk_chain_threshold);
1199            if verdict.should_block {
1200                let chain_name = verdict
1201                    .chain_pattern
1202                    .unwrap_or_else(|| "unknown".to_owned());
1203                tracing::warn!(
1204                    chain = chain_name,
1205                    score = verdict.cumulative_score,
1206                    "risk chain threshold exceeded"
1207                );
1208                return Err(ToolError::Blocked {
1209                    command: format!(
1210                        "risk chain blocked: {} (score {:.2})",
1211                        chain_name, verdict.cumulative_score
1212                    ),
1213                });
1214            }
1215        }
1216
1217        Ok(())
1218    }
1219
1220    /// Resolve the effective `(cwd, env, name, trusted)` for a single tool call.
1221    ///
1222    /// Implements the 6-step merge defined in the per-turn env spec:
1223    /// 1. Base = inherited process env.
1224    /// 2. Filter `env_blocklist`.
1225    /// 3. Apply `skill_env` overrides.
1226    /// 4. If `ctx` or `default_env` points to a named registry entry, apply its overrides.
1227    /// 5. Apply call-site `ctx.env_overrides`.
1228    /// 6. If context is untrusted, re-apply `env_blocklist` to strip any re-introduced keys.
1229    ///
1230    /// CWD precedence (highest wins): call-site `ctx.cwd` → named registry `cwd` → `default_env`
1231    /// registry `cwd` → `std::env::current_dir()`.
1232    #[tracing::instrument(name = "tools.shell.resolve_context", skip(self, ctx), level = "info")]
1233    pub(crate) fn resolve_context(
1234        &self,
1235        ctx: Option<&ExecutionContext>,
1236    ) -> Result<ResolvedContext, ToolError> {
1237        // Step 1: base env = process env.
1238        let mut env: HashMap<String, String> = std::env::vars().collect();
1239
1240        // Step 2: filter env_blocklist (prefix match, consistent with build_bash_command).
1241        env.retain(|k, _| {
1242            !self
1243                .env_blocklist
1244                .iter()
1245                .any(|prefix| k.starts_with(prefix.as_str()))
1246        });
1247
1248        // Step 3: apply skill_env.
1249        if let Some(skill) = self.skill_env.read().as_ref() {
1250            for (k, v) in skill {
1251                env.insert(k.clone(), v.clone());
1252            }
1253        }
1254
1255        // Determine the resolved name, cwd_override, and trusted flag.
1256        let mut resolved_name: Option<String> = None;
1257        let mut cwd_override: Option<PathBuf> = None;
1258        let mut trusted = false;
1259
1260        // Resolve via default_env registry entry (lowest priority named layer).
1261        if let Some(default_name) = &self.default_env
1262            && let Some(default_ctx) = self.environments.get(default_name.as_str())
1263        {
1264            resolved_name.get_or_insert_with(|| default_name.clone());
1265            if cwd_override.is_none() {
1266                cwd_override = default_ctx.cwd().map(ToOwned::to_owned);
1267            }
1268            trusted = default_ctx.is_trusted();
1269            for (k, v) in default_ctx.env_overrides() {
1270                env.insert(k.clone(), v.clone());
1271            }
1272        }
1273
1274        // Step 4: if call-site ctx names a registry entry, apply its overrides.
1275        if let Some(ctx) = ctx {
1276            if let Some(name) = ctx.name() {
1277                if let Some(reg_ctx) = self.environments.get(name) {
1278                    resolved_name = Some(name.to_owned());
1279                    if let Some(cwd) = reg_ctx.cwd() {
1280                        cwd_override = Some(cwd.to_owned());
1281                    }
1282                    trusted = reg_ctx.is_trusted();
1283                    for (k, v) in reg_ctx.env_overrides() {
1284                        env.insert(k.clone(), v.clone());
1285                    }
1286                } else {
1287                    return Err(ToolError::Execution(std::io::Error::other(format!(
1288                        "unknown execution environment '{name}'"
1289                    ))));
1290                }
1291            }
1292
1293            // Step 5: apply call-site cwd and env overrides (highest priority).
1294            if let Some(cwd) = ctx.cwd() {
1295                cwd_override = Some(cwd.to_owned());
1296            }
1297            if !ctx.is_trusted() {
1298                trusted = false;
1299            }
1300            for (k, v) in ctx.env_overrides() {
1301                env.insert(k.clone(), v.clone());
1302            }
1303        }
1304
1305        // Step 6: re-apply blocklist for untrusted contexts (prefix match).
1306        if !trusted {
1307            env.retain(|k, _| {
1308                !self
1309                    .env_blocklist
1310                    .iter()
1311                    .any(|prefix| k.starts_with(prefix.as_str()))
1312            });
1313        }
1314
1315        // Resolve final CWD: override (canonicalized) or process CWD.
1316        let cwd = if let Some(raw) = cwd_override {
1317            // Make relative paths absolute before canonicalize so they resolve
1318            // correctly regardless of the process working directory.
1319            let raw = if raw.is_absolute() {
1320                raw
1321            } else {
1322                std::env::current_dir()
1323                    .unwrap_or_else(|_| PathBuf::from("."))
1324                    .join(raw)
1325            };
1326            let canonical = raw
1327                .canonicalize()
1328                .map_err(|_| ToolError::SandboxViolation {
1329                    path: raw.display().to_string(),
1330                })?;
1331            // Validate against allowed_paths.
1332            if !self
1333                .allowed_paths_canonical
1334                .iter()
1335                .any(|p| canonical.starts_with(p))
1336            {
1337                return Err(ToolError::SandboxViolation {
1338                    path: canonical.display().to_string(),
1339                });
1340            }
1341            canonical
1342        } else {
1343            std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1344        };
1345
1346        Ok(ResolvedContext {
1347            cwd,
1348            env,
1349            name: resolved_name,
1350            trusted,
1351        })
1352    }
1353
1354    fn validate_sandbox_with_cwd(
1355        &self,
1356        code: &str,
1357        cwd: &std::path::Path,
1358    ) -> Result<(), ToolError> {
1359        for token in extract_paths(code) {
1360            if has_traversal(&token) {
1361                return Err(ToolError::SandboxViolation { path: token });
1362            }
1363
1364            if self.allowed_paths_canonical.is_empty() {
1365                continue;
1366            }
1367
1368            let path = if token.starts_with('/') {
1369                PathBuf::from(&token)
1370            } else {
1371                cwd.join(&token)
1372            };
1373            // For existing paths, canonicalize to resolve symlinks before the prefix
1374            // check — `std::path::absolute` does NOT collapse `..` or follow symlinks.
1375            // For non-existent paths, canonicalize the nearest existing ancestor and
1376            // reattach the suffix: this rejects `allowed/../../etc/shadow` while
1377            // allowing references to not-yet-created files within allowed dirs.
1378            let canonical = if let Ok(c) = path.canonicalize() {
1379                c
1380            } else {
1381                // Collect path components so we can walk up from the full path.
1382                let components: Vec<_> = path.components().collect();
1383                let mut base_len = components.len();
1384                let canonical_base = loop {
1385                    if base_len == 0 {
1386                        break PathBuf::new();
1387                    }
1388                    let candidate: PathBuf = components[..base_len].iter().collect();
1389                    if let Ok(c) = candidate.canonicalize() {
1390                        break c;
1391                    }
1392                    base_len -= 1;
1393                };
1394                // Reattach the non-existent suffix (components after base_len).
1395                components[base_len..]
1396                    .iter()
1397                    .fold(canonical_base, |acc, c| acc.join(c))
1398            };
1399            if !self
1400                .allowed_paths_canonical
1401                .iter()
1402                .any(|allowed| canonical.starts_with(allowed))
1403            {
1404                return Err(ToolError::SandboxViolation {
1405                    path: canonical.display().to_string(),
1406                });
1407            }
1408        }
1409        Ok(())
1410    }
1411
1412    fn validate_sandbox(&self, code: &str) -> Result<(), ToolError> {
1413        let cwd = std::env::current_dir().unwrap_or_default();
1414        self.validate_sandbox_with_cwd(code, &cwd)
1415    }
1416
1417    /// Scan `code` for commands that match the configured blocklist.
1418    ///
1419    /// The function normalizes input via [`strip_shell_escapes`] (decoding `$'\xNN'`,
1420    /// `$'\NNN'`, backslash escapes, and quote-splitting) and then splits on shell
1421    /// metacharacters (`||`, `&&`, `;`, `|`, `\n`) via [`tokenize_commands`].  Each
1422    /// resulting token sequence is tested against every entry in `blocked_commands`
1423    /// through [`tokens_match_pattern`], which handles transparent prefixes (`env`,
1424    /// `command`, `exec`, etc.), absolute paths, and dot-suffixed variants.
1425    ///
1426    /// # Known limitations
1427    ///
1428    /// The following constructs are **not** detected by this function:
1429    ///
1430    /// - **Here-strings** `<<<` with a shell interpreter: the outer command is the
1431    ///   shell (`bash`, `sh`), which is not blocked by default; the payload string is
1432    ///   opaque to this filter.
1433    ///   Example: `bash <<< 'sudo rm -rf /'` — inner payload is not parsed.
1434    ///
1435    /// - **`eval` and `bash -c` / `sh -c`**: the string argument is not parsed; any
1436    ///   blocked command embedded as a string argument passes through undetected.
1437    ///   Example: `eval 'sudo rm -rf /'`.
1438    ///
1439    /// - **Variable expansion**: `strip_shell_escapes` does not resolve variable
1440    ///   references, so `cmd=sudo; $cmd rm` bypasses the blocklist.
1441    ///
1442    /// `$(...)`, backtick, `<(...)`, and `>(...)` substitutions are detected by
1443    /// [`extract_subshell_contents`], which extracts the inner command string and
1444    /// checks it against the blocklist separately.  The default `confirm_patterns`
1445    /// in [`ShellConfig`] additionally include `"$("`, `` "`" ``, `"<("`, `">("`,
1446    /// `"<<<"`, and `"eval "`, so those constructs also trigger a confirmation
1447    /// request via [`find_confirm_command`] before execution.
1448    ///
1449    /// For high-security deployments, complement this filter with OS-level sandboxing
1450    /// (Linux namespaces, seccomp, or similar) to enforce hard execution boundaries.
1451    /// Scan `code` for commands that match the configured blocklist.
1452    ///
1453    /// Returns an owned `String` because the backing `Vec<String>` lives inside an
1454    /// `ArcSwap` that may be replaced between calls — borrowing from the snapshot
1455    /// guard would be unsound after the guard drops.
1456    fn find_blocked_command(&self, code: &str) -> Option<String> {
1457        let snapshot = self.policy.load_full();
1458        let cleaned = strip_shell_escapes(&code.to_lowercase());
1459        let commands = tokenize_commands(&cleaned);
1460        for cmd_tokens in &commands {
1461            let joined = cmd_tokens.join(" ");
1462            if is_blocked_rm_worktrees(&joined) {
1463                return Some("rm --recursive --force .git/worktrees".to_owned());
1464            }
1465        }
1466        for blocked in &snapshot.blocked_commands {
1467            for cmd_tokens in &commands {
1468                if tokens_match_pattern(cmd_tokens, blocked) {
1469                    return Some(blocked.clone());
1470                }
1471            }
1472        }
1473        // Also check commands embedded inside subshell constructs.
1474        for inner in extract_subshell_contents(&cleaned) {
1475            let inner_commands = tokenize_commands(&inner);
1476            for cmd_tokens in &inner_commands {
1477                let joined = cmd_tokens.join(" ");
1478                if is_blocked_rm_worktrees(&joined) {
1479                    return Some("rm --recursive --force .git/worktrees".to_owned());
1480                }
1481            }
1482            for blocked in &snapshot.blocked_commands {
1483                for cmd_tokens in &inner_commands {
1484                    if tokens_match_pattern(cmd_tokens, blocked) {
1485                        return Some(blocked.clone());
1486                    }
1487                }
1488            }
1489        }
1490        None
1491    }
1492
1493    fn find_confirm_command(&self, code: &str) -> Option<&str> {
1494        let normalized = code.to_lowercase();
1495        for pattern in &self.confirm_patterns {
1496            if normalized.contains(pattern.as_str()) {
1497                return Some(pattern.as_str());
1498            }
1499        }
1500        None
1501    }
1502
1503    async fn log_audit(
1504        &self,
1505        command: &str,
1506        result: AuditResult,
1507        duration_ms: u64,
1508        error: Option<&ToolError>,
1509        exit_code: Option<i32>,
1510        truncated: bool,
1511    ) {
1512        if let Some(ref logger) = self.audit_logger {
1513            let (error_category, error_domain, error_phase) =
1514                error.map_or((None, None, None), |e| {
1515                    let cat = e.category();
1516                    (
1517                        Some(cat.label().to_owned()),
1518                        Some(cat.domain().label().to_owned()),
1519                        Some(cat.phase().label().to_owned()),
1520                    )
1521                });
1522            let entry = AuditEntry {
1523                timestamp: chrono_now(),
1524                tool: "shell".into(),
1525                command: command.into(),
1526                result,
1527                duration_ms,
1528                error_category,
1529                error_domain,
1530                error_phase,
1531                claim_source: Some(ClaimSource::Shell),
1532                mcp_server_id: None,
1533                injection_flagged: false,
1534                embedding_anomalous: false,
1535                cross_boundary_mcp_to_acp: false,
1536                adversarial_policy_decision: None,
1537                exit_code,
1538                truncated,
1539                caller_id: None,
1540                policy_match: None,
1541                correlation_id: None,
1542                vigil_risk: None,
1543                execution_env: None,
1544                resolved_cwd: None,
1545                scope_at_definition: None,
1546                scope_at_dispatch: None,
1547            };
1548            logger.log(&entry).await;
1549        }
1550    }
1551
1552    #[allow(clippy::too_many_arguments)]
1553    async fn log_audit_with_context(
1554        &self,
1555        command: &str,
1556        result: AuditResult,
1557        duration_ms: u64,
1558        error: Option<&ToolError>,
1559        exit_code: Option<i32>,
1560        truncated: bool,
1561        resolved: &ResolvedContext,
1562    ) {
1563        if let Some(ref logger) = self.audit_logger {
1564            let (error_category, error_domain, error_phase) =
1565                error.map_or((None, None, None), |e| {
1566                    let cat = e.category();
1567                    (
1568                        Some(cat.label().to_owned()),
1569                        Some(cat.domain().label().to_owned()),
1570                        Some(cat.phase().label().to_owned()),
1571                    )
1572                });
1573            let entry = AuditEntry {
1574                timestamp: chrono_now(),
1575                tool: "shell".into(),
1576                command: command.into(),
1577                result,
1578                duration_ms,
1579                error_category,
1580                error_domain,
1581                error_phase,
1582                claim_source: Some(ClaimSource::Shell),
1583                mcp_server_id: None,
1584                injection_flagged: false,
1585                embedding_anomalous: false,
1586                cross_boundary_mcp_to_acp: false,
1587                adversarial_policy_decision: None,
1588                exit_code,
1589                truncated,
1590                caller_id: None,
1591                policy_match: None,
1592                correlation_id: None,
1593                vigil_risk: None,
1594                execution_env: resolved.name.clone(),
1595                resolved_cwd: Some(resolved.cwd.display().to_string()),
1596                scope_at_definition: None,
1597                scope_at_dispatch: None,
1598            };
1599            logger.log(&entry).await;
1600        }
1601    }
1602}
1603
1604impl ToolExecutor for std::sync::Arc<ShellExecutor> {
1605    async fn execute(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
1606        self.as_ref().execute(response).await
1607    }
1608
1609    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1610        self.as_ref().tool_definitions()
1611    }
1612
1613    async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
1614        self.as_ref().execute_tool_call(call).await
1615    }
1616
1617    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
1618        self.as_ref().set_skill_env(env);
1619    }
1620}
1621
1622impl ToolExecutor for ShellExecutor {
1623    async fn execute(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
1624        self.execute_inner(response, false).await
1625    }
1626
1627    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
1628        use crate::registry::{InvocationHint, ToolDef};
1629        vec![ToolDef {
1630            id: "bash".into(),
1631            description: "Execute a shell command and return stdout/stderr.\n\nParameters: command (string, required) - shell command to run\nReturns: stdout and stderr combined, prefixed with exit code\nErrors: Blocked if command matches security policy; Timeout after configured seconds; SandboxViolation if path outside allowed dirs\nExample: {\"command\": \"ls -la /tmp\"}".into(),
1632            schema: schemars::schema_for!(BashParams),
1633            invocation: InvocationHint::FencedBlock("bash"),
1634            output_schema: None,
1635        }]
1636    }
1637
1638    #[tracing::instrument(name = "tool.shell.execute_tool_call", skip(self, call), level = "info",
1639        fields(tool_id = %call.tool_id, env = call.context.as_ref().and_then(|c| c.name()).unwrap_or("")))]
1640    async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
1641        if call.tool_id != "bash" {
1642            return Ok(None);
1643        }
1644        let params: BashParams = crate::executor::deserialize_params(&call.params)?;
1645        if params.command.is_empty() {
1646            return Ok(None);
1647        }
1648        let command = &params.command;
1649
1650        // Resolve per-turn execution context — done before the background branch so that
1651        // background tasks also receive the correct env and CWD (spec §6).
1652        let resolved = self.resolve_context(call.context.as_ref())?;
1653
1654        if params.background {
1655            let run_id = self
1656                .spawn_background_with_context(command, &resolved)
1657                .await?;
1658            let id_short = &run_id.to_string()[..8];
1659            return Ok(Some(ToolOutput {
1660                tool_name: ToolName::new("bash"),
1661                summary: format!(
1662                    "[background] started run_id={run_id} — command: {command}\n\
1663                     The command is running in the background. When it completes, \
1664                     results will appear at the start of the next turn (run_id_short={id_short})."
1665                ),
1666                blocks_executed: 1,
1667                filter_stats: None,
1668                diff: None,
1669                streamed: true,
1670                terminal_id: None,
1671                locations: None,
1672                raw_response: None,
1673                claim_source: Some(ClaimSource::Shell),
1674            }));
1675        }
1676
1677        self.execute_block_with_context(command, false, &resolved, &call.tool_call_id)
1678            .await
1679    }
1680
1681    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
1682        ShellExecutor::set_skill_env(self, env);
1683    }
1684}
1685
1686impl ShellExecutor {
1687    /// Spawn `command` as a background shell process and return its [`RunId`].
1688    ///
1689    /// All security checks (blocklist, sandbox, permissions) are performed synchronously
1690    /// before spawning. When the cap (`max_background_runs`) is already reached, this
1691    /// returns [`ToolError::Blocked`] immediately without spawning.
1692    ///
1693    /// On completion the spawned task emits a
1694    /// `ToolEvent::Completed { run_id: Some(..), .. }` via `tool_event_tx`.
1695    ///
1696    /// # Errors
1697    ///
1698    /// Returns [`ToolError::Blocked`] when the background run cap is reached or the command
1699    /// is blocked by policy. Returns other [`ToolError`] variants on sandbox/permission
1700    /// failures.
1701    pub async fn spawn_background(&self, command: &str) -> Result<RunId, ToolError> {
1702        use std::sync::atomic::Ordering;
1703
1704        // Reject new spawns while shutting down.
1705        if self.shutting_down.load(Ordering::Acquire) {
1706            return Err(ToolError::Blocked {
1707                command: command.to_owned(),
1708            });
1709        }
1710
1711        // Enforce security checks — same as blocking mode.
1712        self.check_permissions(command, false).await?;
1713        self.validate_sandbox(command)?;
1714
1715        // Check cap under lock, then register the handle and spawn.
1716        let run_id = RunId::new();
1717        let mut runs = self.background_runs.lock();
1718        if runs.len() >= self.max_background_runs {
1719            return Err(ToolError::Blocked {
1720                command: format!(
1721                    "background run cap reached (max_background_runs={})",
1722                    self.max_background_runs
1723                ),
1724            });
1725        }
1726        let abort = CancellationToken::new();
1727        runs.insert(
1728            run_id,
1729            BackgroundHandle {
1730                command: command.to_owned(),
1731                started_at: std::time::Instant::now(),
1732                abort: abort.clone(),
1733                child_pid: None,
1734            },
1735        );
1736        drop(runs);
1737
1738        let tool_event_tx = self.tool_event_tx.clone();
1739        let background_completion_tx = self.background_completion_tx.clone();
1740        let background_runs = Arc::clone(&self.background_runs);
1741        let timeout = self.background_timeout;
1742        let env_blocklist = self.env_blocklist.clone();
1743        let skill_env_snapshot: Option<std::collections::HashMap<String, String>> =
1744            self.skill_env.read().clone();
1745        let command_owned = command.to_owned();
1746
1747        tokio::spawn(run_background_task(
1748            run_id,
1749            command_owned,
1750            timeout,
1751            abort,
1752            background_runs,
1753            tool_event_tx,
1754            background_completion_tx,
1755            skill_env_snapshot,
1756            env_blocklist,
1757        ));
1758
1759        Ok(run_id)
1760    }
1761
1762    /// Spawn `command` as a background process using an already-resolved [`ResolvedContext`].
1763    ///
1764    /// Like [`spawn_background`](Self::spawn_background) but uses the pre-resolved env and CWD
1765    /// instead of reading `skill_env`/process-env at spawn time.
1766    ///
1767    /// # Errors
1768    ///
1769    /// Same as [`spawn_background`](Self::spawn_background).
1770    async fn spawn_background_with_context(
1771        &self,
1772        command: &str,
1773        resolved: &ResolvedContext,
1774    ) -> Result<RunId, ToolError> {
1775        use std::sync::atomic::Ordering;
1776
1777        if self.shutting_down.load(Ordering::Acquire) {
1778            return Err(ToolError::Blocked {
1779                command: command.to_owned(),
1780            });
1781        }
1782
1783        self.check_permissions(command, false).await?;
1784        self.validate_sandbox_with_cwd(command, &resolved.cwd)?;
1785
1786        let run_id = RunId::new();
1787        let mut runs = self.background_runs.lock();
1788        if runs.len() >= self.max_background_runs {
1789            return Err(ToolError::Blocked {
1790                command: format!(
1791                    "background run cap reached (max_background_runs={})",
1792                    self.max_background_runs
1793                ),
1794            });
1795        }
1796        let abort = CancellationToken::new();
1797        runs.insert(
1798            run_id,
1799            BackgroundHandle {
1800                command: command.to_owned(),
1801                started_at: std::time::Instant::now(),
1802                abort: abort.clone(),
1803                child_pid: None,
1804            },
1805        );
1806        drop(runs);
1807
1808        let tool_event_tx = self.tool_event_tx.clone();
1809        let background_completion_tx = self.background_completion_tx.clone();
1810        let background_runs = Arc::clone(&self.background_runs);
1811        let timeout = self.background_timeout;
1812        let env = resolved.env.clone();
1813        let cwd = resolved.cwd.clone();
1814        let command_owned = command.to_owned();
1815
1816        tokio::spawn(run_background_task_with_env(
1817            run_id,
1818            command_owned,
1819            timeout,
1820            abort,
1821            background_runs,
1822            tool_event_tx,
1823            background_completion_tx,
1824            env,
1825            cwd,
1826        ));
1827
1828        Ok(run_id)
1829    }
1830
1831    /// Cancel all in-flight background runs.
1832    ///
1833    /// Called during agent shutdown. On Unix, issues SIGTERM/SIGKILL escalation
1834    /// against each captured process ID before cancelling the token. Each cancelled
1835    /// run emits a `ToolEvent::Completed { success: false }` event.
1836    pub async fn shutdown(&self) {
1837        use std::sync::atomic::Ordering;
1838
1839        self.shutting_down.store(true, Ordering::Release);
1840
1841        let handles: Vec<(RunId, String, CancellationToken, Option<u32>)> = {
1842            let runs = self.background_runs.lock();
1843            runs.iter()
1844                .map(|(id, h)| (*id, h.command.clone(), h.abort.clone(), h.child_pid))
1845                .collect()
1846        };
1847
1848        if handles.is_empty() {
1849            return;
1850        }
1851
1852        tracing::info!(
1853            count = handles.len(),
1854            "cancelling background shell runs for shutdown"
1855        );
1856
1857        for (run_id, command, abort, pid_opt) in &handles {
1858            abort.cancel();
1859
1860            #[cfg(unix)]
1861            if let Some(pid) = pid_opt {
1862                send_signal_with_escalation(*pid).await;
1863            }
1864            #[cfg(not(unix))]
1865            let _ = pid_opt;
1866
1867            if let Some(ref tx) = self.tool_event_tx {
1868                let _ = tx
1869                    .send(ToolEvent::Completed {
1870                        tool_name: ToolName::new("bash"),
1871                        command: command.clone(),
1872                        output: "[terminated by shutdown]".to_owned(),
1873                        success: false,
1874                        filter_stats: None,
1875                        diff: None,
1876                        run_id: Some(*run_id),
1877                    })
1878                    .await;
1879            }
1880        }
1881
1882        self.background_runs.lock().clear();
1883    }
1884}
1885
1886/// Drive a background shell run from spawn to completion.
1887///
1888/// This function is the body of the [`tokio::spawn`] task created by
1889/// [`ShellExecutor::spawn_background`]. It is extracted into a named async fn so
1890/// the spawner stays within the 100-line limit enforced by `clippy::too_many_lines`.
1891///
1892/// The child process is spawned here (not in the caller) so its PID can be written
1893/// back into the [`BackgroundHandle`] registry before the stream loop starts. This
1894/// makes the SIGTERM/SIGKILL escalation path in [`ShellExecutor::shutdown`] reachable.
1895#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
1896async fn run_background_task(
1897    run_id: RunId,
1898    command: String,
1899    timeout: Duration,
1900    abort: CancellationToken,
1901    background_runs: Arc<Mutex<HashMap<RunId, BackgroundHandle>>>,
1902    tool_event_tx: Option<ToolEventTx>,
1903    background_completion_tx: Option<tokio::sync::mpsc::Sender<BackgroundCompletion>>,
1904    skill_env_snapshot: Option<std::collections::HashMap<String, String>>,
1905    env_blocklist: Vec<String>,
1906) {
1907    use std::process::Stdio;
1908
1909    let started_at = std::time::Instant::now();
1910
1911    // Build and spawn the child directly so we can capture its PID and write it
1912    // back into the registry before entering the stream loop. Calling execute_bash
1913    // would hide the child handle and leave child_pid = None, making the
1914    // SIGTERM/SIGKILL escalation path in shutdown() unreachable.
1915    let mut cmd = build_bash_command(&command, skill_env_snapshot.as_ref(), &env_blocklist);
1916    cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
1917
1918    let mut child = match cmd.spawn() {
1919        Ok(c) => c,
1920        Err(ref e) => {
1921            let (_, out) = spawn_error_envelope(e);
1922            background_runs.lock().remove(&run_id);
1923            emit_completed(tool_event_tx.as_ref(), &command, out.clone(), false, run_id).await;
1924            if let Some(ref tx) = background_completion_tx {
1925                let _ = tx
1926                    .send(BackgroundCompletion {
1927                        run_id,
1928                        exit_code: 1,
1929                        output: out,
1930                        success: false,
1931                        elapsed_ms: 0,
1932                        command,
1933                    })
1934                    .await;
1935            }
1936            return;
1937        }
1938    };
1939
1940    // Write PID back so shutdown() can reach the SIGTERM/SIGKILL escalation path.
1941    if let Some(pid) = child.id()
1942        && let Some(handle) = background_runs.lock().get_mut(&run_id)
1943    {
1944        handle.child_pid = Some(pid);
1945    }
1946
1947    // stdout/stderr are guaranteed piped — set above before spawn.
1948    let stdout = child.stdout.take().expect("stdout piped");
1949    let stderr = child.stderr.take().expect("stderr piped");
1950    let mut line_rx = spawn_output_readers(stdout, stderr);
1951
1952    let mut combined = String::new();
1953    let mut stdout_buf = String::new();
1954    let mut stderr_buf = String::new();
1955    let deadline = tokio::time::Instant::now() + timeout;
1956    let timeout_secs = timeout.as_secs();
1957
1958    let (_, out) = match run_bash_stream(
1959        &command,
1960        deadline,
1961        Some(&abort),
1962        tool_event_tx.as_ref(),
1963        "",
1964        &mut line_rx,
1965        &mut combined,
1966        &mut stdout_buf,
1967        &mut stderr_buf,
1968        &mut child,
1969    )
1970    .await
1971    {
1972        BashLoopOutcome::TimedOut => (
1973            ShellOutputEnvelope {
1974                stdout: stdout_buf,
1975                stderr: format!("{stderr_buf}command timed out after {timeout_secs}s"),
1976                exit_code: 1,
1977                truncated: false,
1978            },
1979            format!("[error] command timed out after {timeout_secs}s"),
1980        ),
1981        BashLoopOutcome::Cancelled => (
1982            ShellOutputEnvelope {
1983                stdout: stdout_buf,
1984                stderr: format!("{stderr_buf}operation aborted"),
1985                exit_code: 130,
1986                truncated: false,
1987            },
1988            "[cancelled] operation aborted".to_string(),
1989        ),
1990        BashLoopOutcome::StreamClosed => {
1991            finalize_envelope(&mut child, combined, stdout_buf, stderr_buf).await
1992        }
1993    };
1994
1995    #[allow(clippy::cast_possible_truncation)]
1996    let elapsed_ms = started_at.elapsed().as_millis() as u64;
1997    let success = !out.contains("[error]");
1998    let exit_code = i32::from(!success);
1999    let truncated = crate::executor::truncate_tool_output_at(&out, 4096);
2000
2001    background_runs.lock().remove(&run_id);
2002    emit_completed(
2003        tool_event_tx.as_ref(),
2004        &command,
2005        truncated.clone(),
2006        success,
2007        run_id,
2008    )
2009    .await;
2010
2011    if let Some(ref tx) = background_completion_tx {
2012        let completion = BackgroundCompletion {
2013            run_id,
2014            exit_code,
2015            output: truncated,
2016            success,
2017            elapsed_ms,
2018            command,
2019        };
2020        if tx.send(completion).await.is_err() {
2021            tracing::warn!(
2022                run_id = %run_id,
2023                "background completion channel closed; agent may have shut down"
2024            );
2025        }
2026    }
2027
2028    tracing::debug!(run_id = %run_id, exit_code, elapsed_ms, "background shell run completed");
2029}
2030
2031/// Like [`run_background_task`] but uses a pre-resolved `env` and `cwd` from
2032/// `resolve_context` instead of reading `skill_env`/process-env at spawn time.
2033#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
2034async fn run_background_task_with_env(
2035    run_id: RunId,
2036    command: String,
2037    timeout: Duration,
2038    abort: CancellationToken,
2039    background_runs: Arc<Mutex<HashMap<RunId, BackgroundHandle>>>,
2040    tool_event_tx: Option<ToolEventTx>,
2041    background_completion_tx: Option<tokio::sync::mpsc::Sender<BackgroundCompletion>>,
2042    env: HashMap<String, String>,
2043    cwd: PathBuf,
2044) {
2045    use std::process::Stdio;
2046
2047    let started_at = std::time::Instant::now();
2048
2049    let mut cmd = build_bash_command_with_context(&command, &env, &cwd);
2050    cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
2051
2052    let mut child = match cmd.spawn() {
2053        Ok(c) => c,
2054        Err(ref e) => {
2055            let (_, out) = spawn_error_envelope(e);
2056            background_runs.lock().remove(&run_id);
2057            emit_completed(tool_event_tx.as_ref(), &command, out.clone(), false, run_id).await;
2058            if let Some(ref tx) = background_completion_tx {
2059                let _ = tx
2060                    .send(BackgroundCompletion {
2061                        run_id,
2062                        exit_code: 1,
2063                        output: out,
2064                        success: false,
2065                        elapsed_ms: 0,
2066                        command,
2067                    })
2068                    .await;
2069            }
2070            return;
2071        }
2072    };
2073
2074    if let Some(pid) = child.id()
2075        && let Some(handle) = background_runs.lock().get_mut(&run_id)
2076    {
2077        handle.child_pid = Some(pid);
2078    }
2079
2080    let stdout = child.stdout.take().expect("stdout piped");
2081    let stderr = child.stderr.take().expect("stderr piped");
2082    let mut line_rx = spawn_output_readers(stdout, stderr);
2083
2084    let mut combined = String::new();
2085    let mut stdout_buf = String::new();
2086    let mut stderr_buf = String::new();
2087    let deadline = tokio::time::Instant::now() + timeout;
2088    let timeout_secs = timeout.as_secs();
2089
2090    let (_, out) = match run_bash_stream(
2091        &command,
2092        deadline,
2093        Some(&abort),
2094        tool_event_tx.as_ref(),
2095        "",
2096        &mut line_rx,
2097        &mut combined,
2098        &mut stdout_buf,
2099        &mut stderr_buf,
2100        &mut child,
2101    )
2102    .await
2103    {
2104        BashLoopOutcome::TimedOut => (
2105            ShellOutputEnvelope {
2106                stdout: stdout_buf,
2107                stderr: format!("{stderr_buf}command timed out after {timeout_secs}s"),
2108                exit_code: 1,
2109                truncated: false,
2110            },
2111            format!("[error] command timed out after {timeout_secs}s"),
2112        ),
2113        BashLoopOutcome::Cancelled => (
2114            ShellOutputEnvelope {
2115                stdout: stdout_buf,
2116                stderr: stderr_buf,
2117                exit_code: 130,
2118                truncated: false,
2119            },
2120            "[cancelled] operation aborted".to_string(),
2121        ),
2122        BashLoopOutcome::StreamClosed => {
2123            finalize_envelope(&mut child, combined, stdout_buf, stderr_buf).await
2124        }
2125    };
2126
2127    #[allow(clippy::cast_possible_truncation)]
2128    let elapsed_ms = started_at.elapsed().as_millis() as u64;
2129    let success = !out.contains("[error]");
2130    let exit_code = i32::from(!success);
2131    let truncated = crate::executor::truncate_tool_output_at(&out, 4096);
2132
2133    background_runs.lock().remove(&run_id);
2134    emit_completed(
2135        tool_event_tx.as_ref(),
2136        &command,
2137        truncated.clone(),
2138        success,
2139        run_id,
2140    )
2141    .await;
2142
2143    if let Some(ref tx) = background_completion_tx {
2144        let completion = BackgroundCompletion {
2145            run_id,
2146            exit_code,
2147            output: truncated,
2148            success,
2149            elapsed_ms,
2150            command,
2151        };
2152        if tx.send(completion).await.is_err() {
2153            tracing::warn!(
2154                run_id = %run_id,
2155                "background completion channel closed; agent may have shut down"
2156            );
2157        }
2158    }
2159
2160    tracing::debug!(run_id = %run_id, exit_code, elapsed_ms, "background shell run (with context) completed");
2161}
2162
2163/// Emit a `ToolEvent::Completed` to `tool_event_tx` if it is set.
2164async fn emit_completed(
2165    tool_event_tx: Option<&ToolEventTx>,
2166    command: &str,
2167    output: String,
2168    success: bool,
2169    run_id: RunId,
2170) {
2171    if let Some(tx) = tool_event_tx {
2172        let _ = tx
2173            .send(ToolEvent::Completed {
2174                tool_name: ToolName::new("bash"),
2175                command: command.to_owned(),
2176                output,
2177                success,
2178                filter_stats: None,
2179                diff: None,
2180                run_id: Some(run_id),
2181            })
2182            .await;
2183    }
2184}
2185
2186/// Strip shell escape sequences that could bypass command detection.
2187/// Handles: backslash insertion (`su\do` -> `sudo`), `$'\xNN'` hex and `$'\NNN'` octal
2188/// escapes, adjacent quoted segments (`"su""do"` -> `sudo`), backslash-newline continuations.
2189pub(crate) fn strip_shell_escapes(input: &str) -> String {
2190    let mut out = String::with_capacity(input.len());
2191    let bytes = input.as_bytes();
2192    let mut i = 0;
2193    while i < bytes.len() {
2194        // $'...' ANSI-C quoting: decode \xNN hex and \NNN octal escapes
2195        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'\'' {
2196            let mut j = i + 2; // points after $'
2197            let mut decoded = String::new();
2198            let mut valid = false;
2199            while j < bytes.len() && bytes[j] != b'\'' {
2200                if bytes[j] == b'\\' && j + 1 < bytes.len() {
2201                    let next = bytes[j + 1];
2202                    if next == b'x' && j + 3 < bytes.len() {
2203                        // \xNN hex escape
2204                        let hi = (bytes[j + 2] as char).to_digit(16);
2205                        let lo = (bytes[j + 3] as char).to_digit(16);
2206                        if let (Some(h), Some(l)) = (hi, lo) {
2207                            #[allow(clippy::cast_possible_truncation)]
2208                            let byte = ((h << 4) | l) as u8;
2209                            decoded.push(byte as char);
2210                            j += 4;
2211                            valid = true;
2212                            continue;
2213                        }
2214                    } else if next.is_ascii_digit() {
2215                        // \NNN octal escape (up to 3 digits)
2216                        let mut val = u32::from(next - b'0');
2217                        let mut len = 2; // consumed \N so far
2218                        if j + 2 < bytes.len() && bytes[j + 2].is_ascii_digit() {
2219                            val = val * 8 + u32::from(bytes[j + 2] - b'0');
2220                            len = 3;
2221                            if j + 3 < bytes.len() && bytes[j + 3].is_ascii_digit() {
2222                                val = val * 8 + u32::from(bytes[j + 3] - b'0');
2223                                len = 4;
2224                            }
2225                        }
2226                        #[allow(clippy::cast_possible_truncation)]
2227                        decoded.push((val & 0xFF) as u8 as char);
2228                        j += len;
2229                        valid = true;
2230                        continue;
2231                    }
2232                    // other \X escape: emit X literally
2233                    decoded.push(next as char);
2234                    j += 2;
2235                } else {
2236                    decoded.push(bytes[j] as char);
2237                    j += 1;
2238                }
2239            }
2240            if j < bytes.len() && bytes[j] == b'\'' && valid {
2241                out.push_str(&decoded);
2242                i = j + 1;
2243                continue;
2244            }
2245            // not a decodable $'...' sequence — fall through to handle as regular chars
2246        }
2247        // backslash-newline continuation: remove both
2248        if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
2249            i += 2;
2250            continue;
2251        }
2252        // intra-word backslash: skip the backslash, keep next char (e.g. su\do -> sudo)
2253        if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] != b'\n' {
2254            i += 1;
2255            out.push(bytes[i] as char);
2256            i += 1;
2257            continue;
2258        }
2259        // quoted segment stripping: collapse adjacent quoted segments
2260        if bytes[i] == b'"' || bytes[i] == b'\'' {
2261            let quote = bytes[i];
2262            i += 1;
2263            while i < bytes.len() && bytes[i] != quote {
2264                out.push(bytes[i] as char);
2265                i += 1;
2266            }
2267            if i < bytes.len() {
2268                i += 1; // skip closing quote
2269            }
2270            continue;
2271        }
2272        out.push(bytes[i] as char);
2273        i += 1;
2274    }
2275    out
2276}
2277
2278/// Extract inner command strings from subshell constructs in `s`.
2279///
2280/// Recognises:
2281/// - Backtick: `` `cmd` `` → `cmd`
2282/// - Dollar-paren: `$(cmd)` → `cmd`
2283/// - Process substitution (lt): `<(cmd)` → `cmd`
2284/// - Process substitution (gt): `>(cmd)` → `cmd`
2285///
2286/// Depth counting handles nested parentheses correctly.
2287pub(crate) fn extract_subshell_contents(s: &str) -> Vec<String> {
2288    let mut results = Vec::new();
2289    let chars: Vec<char> = s.chars().collect();
2290    let len = chars.len();
2291    let mut i = 0;
2292
2293    while i < len {
2294        // Backtick substitution: `...`
2295        if chars[i] == '`' {
2296            let start = i + 1;
2297            let mut j = start;
2298            while j < len && chars[j] != '`' {
2299                j += 1;
2300            }
2301            if j < len {
2302                results.push(chars[start..j].iter().collect());
2303            }
2304            i = j + 1;
2305            continue;
2306        }
2307
2308        // $(...), <(...), >(...)
2309        let next_is_open_paren = i + 1 < len && chars[i + 1] == '(';
2310        let is_paren_subshell = next_is_open_paren && matches!(chars[i], '$' | '<' | '>');
2311
2312        if is_paren_subshell {
2313            let start = i + 2;
2314            let mut depth: usize = 1;
2315            let mut j = start;
2316            while j < len && depth > 0 {
2317                match chars[j] {
2318                    '(' => depth += 1,
2319                    ')' => depth -= 1,
2320                    _ => {}
2321                }
2322                if depth > 0 {
2323                    j += 1;
2324                } else {
2325                    break;
2326                }
2327            }
2328            if depth == 0 {
2329                results.push(chars[start..j].iter().collect());
2330            }
2331            i = j + 1;
2332            continue;
2333        }
2334
2335        i += 1;
2336    }
2337
2338    results
2339}
2340
2341/// Split normalized shell code into sub-commands on `|`, `||`, `&&`, `;`, `\n`.
2342/// Returns list of sub-commands, each as `Vec<String>` of tokens.
2343pub(crate) fn tokenize_commands(normalized: &str) -> Vec<Vec<String>> {
2344    // Replace two-char operators with a single separator, then split on single-char separators
2345    let replaced = normalized.replace("||", "\n").replace("&&", "\n");
2346    replaced
2347        .split([';', '|', '\n'])
2348        .map(|seg| {
2349            seg.split_whitespace()
2350                .map(str::to_owned)
2351                .collect::<Vec<String>>()
2352        })
2353        .filter(|tokens| !tokens.is_empty())
2354        .collect()
2355}
2356
2357/// Transparent prefix commands that invoke the next argument as a command.
2358/// Skipped when determining the "real" command name being invoked.
2359const TRANSPARENT_PREFIXES: &[&str] = &["env", "command", "exec", "nice", "nohup", "time", "xargs"];
2360
2361/// Return the basename of a token (last path component after '/').
2362fn cmd_basename(tok: &str) -> &str {
2363    tok.rsplit('/').next().unwrap_or(tok)
2364}
2365
2366/// Check if the first tokens of a sub-command match a blocked pattern.
2367/// Handles:
2368/// - Transparent prefix commands (`env sudo rm` -> checks `sudo`)
2369/// - Absolute paths (`/usr/bin/sudo rm` -> basename `sudo` is checked)
2370/// - Dot-suffixed variants (`mkfs` matches `mkfs.ext4`)
2371/// - Multi-word patterns (`rm -rf /` joined prefix check)
2372pub(crate) fn tokens_match_pattern(tokens: &[String], pattern: &str) -> bool {
2373    if tokens.is_empty() || pattern.is_empty() {
2374        return false;
2375    }
2376    let pattern = pattern.trim();
2377    let pattern_tokens: Vec<&str> = pattern.split_whitespace().collect();
2378    if pattern_tokens.is_empty() {
2379        return false;
2380    }
2381
2382    // Skip transparent prefix tokens to reach the real command
2383    let start = tokens
2384        .iter()
2385        .position(|t| !TRANSPARENT_PREFIXES.contains(&cmd_basename(t)))
2386        .unwrap_or(0);
2387    let effective = &tokens[start..];
2388    if effective.is_empty() {
2389        return false;
2390    }
2391
2392    if pattern_tokens.len() == 1 {
2393        let pat = pattern_tokens[0];
2394        let base = cmd_basename(&effective[0]);
2395        // Exact match OR dot-suffixed variant (e.g. "mkfs" matches "mkfs.ext4")
2396        base == pat || base.starts_with(&format!("{pat}."))
2397    } else {
2398        // Multi-word: join first N tokens (using basename for first) and check prefix
2399        let n = pattern_tokens.len().min(effective.len());
2400        let mut parts: Vec<&str> = vec![cmd_basename(&effective[0])];
2401        parts.extend(effective[1..n].iter().map(String::as_str));
2402        let joined = parts.join(" ");
2403        if joined.starts_with(pattern) {
2404            return true;
2405        }
2406        if effective.len() > n {
2407            let mut parts2: Vec<&str> = vec![cmd_basename(&effective[0])];
2408            parts2.extend(effective[1..=n].iter().map(String::as_str));
2409            parts2.join(" ").starts_with(pattern)
2410        } else {
2411            false
2412        }
2413    }
2414}
2415
2416fn extract_paths(code: &str) -> Vec<String> {
2417    let mut result = Vec::new();
2418
2419    // Tokenize respecting single/double quotes
2420    let mut tokens: Vec<String> = Vec::new();
2421    let mut current = String::new();
2422    let mut chars = code.chars().peekable();
2423    while let Some(c) = chars.next() {
2424        match c {
2425            '"' | '\'' => {
2426                let quote = c;
2427                while let Some(&nc) = chars.peek() {
2428                    if nc == quote {
2429                        chars.next();
2430                        break;
2431                    }
2432                    current.push(chars.next().unwrap());
2433                }
2434            }
2435            c if c.is_whitespace() || matches!(c, ';' | '|' | '&') => {
2436                if !current.is_empty() {
2437                    tokens.push(std::mem::take(&mut current));
2438                }
2439            }
2440            _ => current.push(c),
2441        }
2442    }
2443    if !current.is_empty() {
2444        tokens.push(current);
2445    }
2446
2447    for token in tokens {
2448        let trimmed = token.trim_end_matches([';', '&', '|']).to_owned();
2449        if trimmed.is_empty() {
2450            continue;
2451        }
2452        if trimmed.starts_with('/')
2453            || trimmed.starts_with("./")
2454            || trimmed.starts_with("../")
2455            || trimmed == ".."
2456            || (trimmed.starts_with('.') && trimmed.contains('/'))
2457            || is_relative_path_token(&trimmed)
2458        {
2459            result.push(trimmed);
2460        }
2461    }
2462    result
2463}
2464
2465/// Returns `true` if `token` looks like a relative path of the form `word/more`
2466/// (contains `/` but does not start with `/` or `.`).
2467///
2468/// Excluded:
2469/// - URL schemes (`scheme://`)
2470/// - Shell variable assignments (`KEY=value`)
2471fn is_relative_path_token(token: &str) -> bool {
2472    // Must contain a slash but not start with `/` (absolute) or `.` (handled above).
2473    if !token.contains('/') || token.starts_with('/') || token.starts_with('.') {
2474        return false;
2475    }
2476    // Reject URLs: anything with `://`
2477    if token.contains("://") {
2478        return false;
2479    }
2480    // Reject shell variable assignments: `IDENTIFIER=...`
2481    if let Some(eq_pos) = token.find('=') {
2482        let key = &token[..eq_pos];
2483        if key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
2484            return false;
2485        }
2486    }
2487    // First character must be an identifier-start (letter, digit, or `_`).
2488    token
2489        .chars()
2490        .next()
2491        .is_some_and(|c| c.is_ascii_alphanumeric() || c == '_')
2492}
2493
2494/// Classify shell exit codes and stderr patterns into `ToolErrorCategory`.
2495///
2496/// Returns `Some(category)` only for well-known failure modes that benefit from
2497/// structured feedback (exit 126/127, recognisable stderr patterns). All other
2498/// non-zero exits are left as `Ok` output so they surface verbatim to the LLM.
2499fn classify_shell_exit(
2500    exit_code: i32,
2501    output: &str,
2502) -> Option<crate::error_taxonomy::ToolErrorCategory> {
2503    use crate::error_taxonomy::ToolErrorCategory;
2504    match exit_code {
2505        // exit 126: command found but not executable (OS-level permission/policy)
2506        126 => Some(ToolErrorCategory::PolicyBlocked),
2507        // exit 127: command not found in PATH
2508        127 => Some(ToolErrorCategory::PermanentFailure),
2509        _ => {
2510            let lower = output.to_lowercase();
2511            if lower.contains("permission denied") {
2512                Some(ToolErrorCategory::PolicyBlocked)
2513            } else if lower.contains("no such file or directory") {
2514                Some(ToolErrorCategory::PermanentFailure)
2515            } else {
2516                None
2517            }
2518        }
2519    }
2520}
2521
2522fn has_traversal(path: &str) -> bool {
2523    path.split('/').any(|seg| seg == "..")
2524}
2525
2526fn extract_bash_blocks(text: &str) -> Vec<&str> {
2527    crate::executor::extract_fenced_blocks(text, "bash")
2528}
2529
2530/// Send SIGTERM to a process, wait [`GRACEFUL_TERM_MS`], then send SIGKILL.
2531///
2532/// `pkill -KILL -P <pid>` is issued before the final SIGKILL to reap any
2533/// child processes that bash may have spawned. Note: `pkill -P` sends SIGKILL
2534/// to the *children* of `pid`, not to `pid` itself.
2535///
2536/// **ESRCH on SIGKILL is safe and expected.** If the process exited voluntarily
2537/// during the grace period, the OS returns `ESRCH` ("no such process") for the
2538/// SIGKILL call; this is silently swallowed and not treated as an error.
2539///
2540/// **PID reuse caveat.** If bash exits during the 250 ms window and the OS
2541/// recycles its PID before `kill(SIGKILL)` is issued, the SIGKILL could
2542/// theoretically reach an unrelated process. In practice the 250 ms window is
2543/// too short for PID recycling under normal load, so this is treated as an
2544/// acceptable trade-off for MVP.
2545#[cfg(unix)]
2546async fn send_signal_with_escalation(pid: u32) {
2547    use nix::errno::Errno;
2548    use nix::sys::signal::{Signal, kill};
2549    use nix::unistd::Pid;
2550
2551    let Ok(pid_i32) = i32::try_from(pid) else {
2552        return;
2553    };
2554    let target = Pid::from_raw(pid_i32);
2555
2556    if let Err(e) = kill(target, Signal::SIGTERM)
2557        && e != Errno::ESRCH
2558    {
2559        tracing::debug!(pid, err = %e, "SIGTERM failed");
2560    }
2561    tokio::time::sleep(GRACEFUL_TERM_MS).await;
2562    // Kill children of pid (not pid itself); ESRCH if none exist is harmless.
2563    let _ = Command::new("pkill")
2564        .args(["-KILL", "-P", &pid.to_string()])
2565        .status()
2566        .await;
2567    if let Err(e) = kill(target, Signal::SIGKILL)
2568        && e != Errno::ESRCH
2569    {
2570        tracing::debug!(pid, err = %e, "SIGKILL failed");
2571    }
2572}
2573
2574/// Kill a child process and its descendants.
2575///
2576/// On Unix, sends SIGTERM first, waits [`GRACEFUL_TERM_MS`], reaps descendants,
2577/// then sends SIGKILL. Always finishes with [`tokio::process::Child::kill`] to
2578/// ensure the `Child` reaper sees the dead process.
2579async fn kill_process_tree(child: &mut tokio::process::Child) {
2580    #[cfg(unix)]
2581    if let Some(pid) = child.id() {
2582        send_signal_with_escalation(pid).await;
2583    }
2584    let _ = child.kill().await;
2585}
2586
2587/// Structured output from a shell command execution.
2588///
2589/// Produced by the internal `execute_bash` function and included in the final
2590/// [`ToolOutput`] and [`AuditEntry`] for the invocation.
2591#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2592pub struct ShellOutputEnvelope {
2593    /// Captured standard output, possibly truncated.
2594    pub stdout: String,
2595    /// Captured standard error, possibly truncated.
2596    pub stderr: String,
2597    /// Process exit code. `0` indicates success by convention.
2598    pub exit_code: i32,
2599    /// `true` when the combined output exceeded the configured max and was truncated.
2600    pub truncated: bool,
2601}
2602
2603// Used only in cfg(test) blocks; dead_code analysis does not see test imports.
2604#[allow(dead_code, clippy::too_many_arguments)]
2605async fn execute_bash(
2606    code: &str,
2607    timeout: Duration,
2608    event_tx: Option<&ToolEventTx>,
2609    cancel_token: Option<&CancellationToken>,
2610    extra_env: Option<&std::collections::HashMap<String, String>>,
2611    env_blocklist: &[String],
2612    sandbox: Option<(&dyn Sandbox, &SandboxPolicy)>,
2613    tool_call_id: &str,
2614) -> (ShellOutputEnvelope, String) {
2615    use std::process::Stdio;
2616
2617    let timeout_secs = timeout.as_secs();
2618    let mut cmd = build_bash_command(code, extra_env, env_blocklist);
2619
2620    if let Err(envelope_err) = apply_sandbox(&mut cmd, sandbox) {
2621        return envelope_err;
2622    }
2623
2624    cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
2625
2626    let mut child = match cmd.spawn() {
2627        Ok(c) => c,
2628        Err(ref e) => return spawn_error_envelope(e),
2629    };
2630
2631    let stdout = child.stdout.take().expect("stdout piped");
2632    let stderr = child.stderr.take().expect("stderr piped");
2633    let mut line_rx = spawn_output_readers(stdout, stderr);
2634
2635    let mut combined = String::new();
2636    let mut stdout_buf = String::new();
2637    let mut stderr_buf = String::new();
2638    let deadline = tokio::time::Instant::now() + timeout;
2639
2640    match run_bash_stream(
2641        code,
2642        deadline,
2643        cancel_token,
2644        event_tx,
2645        tool_call_id,
2646        &mut line_rx,
2647        &mut combined,
2648        &mut stdout_buf,
2649        &mut stderr_buf,
2650        &mut child,
2651    )
2652    .await
2653    {
2654        BashLoopOutcome::TimedOut => {
2655            let msg = format!("[error] command timed out after {timeout_secs}s");
2656            (
2657                ShellOutputEnvelope {
2658                    stdout: stdout_buf,
2659                    stderr: format!("{stderr_buf}command timed out after {timeout_secs}s"),
2660                    exit_code: 1,
2661                    truncated: false,
2662                },
2663                msg,
2664            )
2665        }
2666        BashLoopOutcome::Cancelled => (
2667            ShellOutputEnvelope {
2668                stdout: stdout_buf,
2669                stderr: format!("{stderr_buf}operation aborted"),
2670                exit_code: 130,
2671                truncated: false,
2672            },
2673            "[cancelled] operation aborted".to_string(),
2674        ),
2675        BashLoopOutcome::StreamClosed => {
2676            finalize_envelope(&mut child, combined, stdout_buf, stderr_buf).await
2677        }
2678    }
2679}
2680
2681fn build_bash_command(
2682    code: &str,
2683    extra_env: Option<&std::collections::HashMap<String, String>>,
2684    env_blocklist: &[String],
2685) -> Command {
2686    let mut cmd = Command::new("bash");
2687    cmd.arg("-c").arg(code);
2688    for (key, _) in std::env::vars() {
2689        if env_blocklist
2690            .iter()
2691            .any(|prefix| key.starts_with(prefix.as_str()))
2692        {
2693            cmd.env_remove(&key);
2694        }
2695    }
2696    if let Some(env) = extra_env {
2697        cmd.envs(env);
2698    }
2699    cmd
2700}
2701
2702/// Build a `Command` using a pre-resolved env map and explicit cwd.
2703///
2704/// Clears the process env and applies only `resolved_env` — no blocklist re-apply needed
2705/// because the caller (`resolve_context`) has already done that.
2706fn build_bash_command_with_context(
2707    code: &str,
2708    resolved_env: &HashMap<String, String>,
2709    cwd: &std::path::Path,
2710) -> Command {
2711    let mut cmd = Command::new("bash");
2712    cmd.arg("-c").arg(code);
2713    cmd.env_clear();
2714    cmd.envs(resolved_env);
2715    cmd.current_dir(cwd);
2716    cmd
2717}
2718
2719/// Execute `code` using a pre-resolved [`ResolvedContext`].
2720///
2721/// Unlike [`execute_bash`], this function receives the *final merged env* from
2722/// `resolve_context` and sets `current_dir` to the resolved CWD.
2723async fn execute_bash_with_context(
2724    code: &str,
2725    timeout: Duration,
2726    event_tx: Option<&ToolEventTx>,
2727    tool_call_id: &str,
2728    cancel_token: Option<&CancellationToken>,
2729    resolved: &ResolvedContext,
2730    sandbox: Option<(&dyn Sandbox, &SandboxPolicy)>,
2731) -> (ShellOutputEnvelope, String) {
2732    use std::process::Stdio;
2733
2734    let timeout_secs = timeout.as_secs();
2735    let mut cmd = build_bash_command_with_context(code, &resolved.env, &resolved.cwd);
2736
2737    if let Err(envelope_err) = apply_sandbox(&mut cmd, sandbox) {
2738        return envelope_err;
2739    }
2740
2741    cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
2742
2743    let mut child = match cmd.spawn() {
2744        Ok(c) => c,
2745        Err(ref e) => return spawn_error_envelope(e),
2746    };
2747
2748    let stdout = child.stdout.take().expect("stdout piped");
2749    let stderr = child.stderr.take().expect("stderr piped");
2750    let mut line_rx = spawn_output_readers(stdout, stderr);
2751
2752    let mut combined = String::new();
2753    let mut stdout_buf = String::new();
2754    let mut stderr_buf = String::new();
2755    let deadline = tokio::time::Instant::now() + timeout;
2756
2757    match run_bash_stream(
2758        code,
2759        deadline,
2760        cancel_token,
2761        event_tx,
2762        tool_call_id,
2763        &mut line_rx,
2764        &mut combined,
2765        &mut stdout_buf,
2766        &mut stderr_buf,
2767        &mut child,
2768    )
2769    .await
2770    {
2771        BashLoopOutcome::TimedOut => {
2772            let msg = format!("[error] command timed out after {timeout_secs}s");
2773            (
2774                ShellOutputEnvelope {
2775                    stdout: stdout_buf,
2776                    stderr: format!("{stderr_buf}command timed out after {timeout_secs}s"),
2777                    exit_code: 1,
2778                    truncated: false,
2779                },
2780                msg,
2781            )
2782        }
2783        BashLoopOutcome::Cancelled => (
2784            ShellOutputEnvelope {
2785                stdout: stdout_buf,
2786                stderr: format!("{stderr_buf}operation aborted"),
2787                exit_code: 130,
2788                truncated: false,
2789            },
2790            "[cancelled] operation aborted".to_string(),
2791        ),
2792        BashLoopOutcome::StreamClosed => {
2793            finalize_envelope(&mut child, combined, stdout_buf, stderr_buf).await
2794        }
2795    }
2796}
2797
2798fn apply_sandbox(
2799    cmd: &mut Command,
2800    sandbox: Option<(&dyn Sandbox, &SandboxPolicy)>,
2801) -> Result<(), (ShellOutputEnvelope, String)> {
2802    // Apply OS sandbox before setting stdio so the rewritten program is sandboxed.
2803    if let Some((sb, policy)) = sandbox
2804        && let Err(err) = sb.wrap(cmd, policy)
2805    {
2806        let msg = format!("[error] sandbox setup failed: {err}");
2807        return Err((
2808            ShellOutputEnvelope {
2809                stdout: String::new(),
2810                stderr: msg.clone(),
2811                exit_code: 1,
2812                truncated: false,
2813            },
2814            msg,
2815        ));
2816    }
2817    Ok(())
2818}
2819
2820fn spawn_error_envelope(e: &std::io::Error) -> (ShellOutputEnvelope, String) {
2821    let msg = format!("[error] {e}");
2822    (
2823        ShellOutputEnvelope {
2824            stdout: String::new(),
2825            stderr: msg.clone(),
2826            exit_code: 1,
2827            truncated: false,
2828        },
2829        msg,
2830    )
2831}
2832
2833// Channel carries (is_stderr, line) so we can accumulate separate buffers
2834// while still building a combined interleaved string for streaming and LLM context.
2835fn spawn_output_readers(
2836    stdout: tokio::process::ChildStdout,
2837    stderr: tokio::process::ChildStderr,
2838) -> tokio::sync::mpsc::Receiver<(bool, String)> {
2839    use tokio::io::{AsyncBufReadExt, BufReader};
2840
2841    let (line_tx, line_rx) = tokio::sync::mpsc::channel::<(bool, String)>(64);
2842
2843    let stdout_tx = line_tx.clone();
2844    tokio::spawn(async move {
2845        let mut reader = BufReader::new(stdout);
2846        let mut buf = String::new();
2847        while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
2848            let _ = stdout_tx.send((false, buf.clone())).await;
2849            buf.clear();
2850        }
2851    });
2852
2853    tokio::spawn(async move {
2854        let mut reader = BufReader::new(stderr);
2855        let mut buf = String::new();
2856        while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
2857            let _ = line_tx.send((true, buf.clone())).await;
2858            buf.clear();
2859        }
2860    });
2861
2862    line_rx
2863}
2864
2865/// Terminal condition of the streaming select loop.
2866///
2867/// `kill_process_tree` is called inside this function before returning `TimedOut`
2868/// or `Cancelled`, so the caller's envelope helpers can stay side-effect-free.
2869enum BashLoopOutcome {
2870    StreamClosed,
2871    TimedOut,
2872    Cancelled,
2873}
2874
2875#[allow(clippy::too_many_arguments)]
2876async fn run_bash_stream(
2877    code: &str,
2878    deadline: tokio::time::Instant,
2879    cancel_token: Option<&CancellationToken>,
2880    event_tx: Option<&ToolEventTx>,
2881    tool_call_id: &str,
2882    line_rx: &mut tokio::sync::mpsc::Receiver<(bool, String)>,
2883    combined: &mut String,
2884    stdout_buf: &mut String,
2885    stderr_buf: &mut String,
2886    child: &mut tokio::process::Child,
2887) -> BashLoopOutcome {
2888    loop {
2889        tokio::select! {
2890            line = line_rx.recv() => {
2891                match line {
2892                    Some((is_stderr, chunk)) => {
2893                        let interleaved = if is_stderr {
2894                            format!("[stderr] {chunk}")
2895                        } else {
2896                            chunk.clone()
2897                        };
2898                        if let Some(tx) = event_tx {
2899                            // Non-terminal streaming event: use try_send (drop on full).
2900                            let _ = tx.try_send(ToolEvent::OutputChunk {
2901                                tool_name: ToolName::new("bash"),
2902                                command: code.to_owned(),
2903                                chunk: interleaved.clone(),
2904                                tool_call_id: tool_call_id.to_owned(),
2905                            });
2906                        }
2907                        combined.push_str(&interleaved);
2908                        if is_stderr {
2909                            stderr_buf.push_str(&chunk);
2910                        } else {
2911                            stdout_buf.push_str(&chunk);
2912                        }
2913                    }
2914                    None => return BashLoopOutcome::StreamClosed,
2915                }
2916            }
2917            () = tokio::time::sleep_until(deadline) => {
2918                kill_process_tree(child).await;
2919                return BashLoopOutcome::TimedOut;
2920            }
2921            () = async {
2922                match cancel_token {
2923                    Some(t) => t.cancelled().await,
2924                    None => std::future::pending().await,
2925                }
2926            } => {
2927                kill_process_tree(child).await;
2928                return BashLoopOutcome::Cancelled;
2929            }
2930        }
2931    }
2932}
2933
2934async fn finalize_envelope(
2935    child: &mut tokio::process::Child,
2936    combined: String,
2937    stdout_buf: String,
2938    stderr_buf: String,
2939) -> (ShellOutputEnvelope, String) {
2940    let status = child.wait().await;
2941    let exit_code = status.ok().and_then(|s| s.code()).unwrap_or(1);
2942
2943    if combined.is_empty() {
2944        (
2945            ShellOutputEnvelope {
2946                stdout: String::new(),
2947                stderr: String::new(),
2948                exit_code,
2949                truncated: false,
2950            },
2951            "(no output)".to_string(),
2952        )
2953    } else {
2954        (
2955            ShellOutputEnvelope {
2956                stdout: stdout_buf.trim_end().to_owned(),
2957                stderr: stderr_buf.trim_end().to_owned(),
2958                exit_code,
2959                truncated: false,
2960            },
2961            combined,
2962        )
2963    }
2964}
2965
2966#[cfg(test)]
2967mod tests;
zeph_tools/shell/mod.rs

zeph_tools/shell/
mod.rs