Skip to main content

zeph_tools/shell/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Shell executor that parses and runs bash blocks from LLM responses.
5//!
6//! [`ShellExecutor`] is the primary tool backend for Zeph. It handles both legacy
7//! fenced bash blocks and structured `bash` tool calls. Security controls enforced
8//! before every command:
9//!
10//! - **Blocklist** — commands matching any entry in `blocked_commands` (or the built-in
11//!   [`DEFAULT_BLOCKED_COMMANDS`]) are rejected with [`ToolError::Blocked`].
12//! - **Subshell metacharacters** — `$(`, `` ` ``, `<(`, and `>(` are always blocked
13//!   because nested evaluation cannot be safely analysed statically.
14//! - **Path sandbox** — the working directory and any file arguments must reside under
15//!   the configured `allowed_paths`.
16//! - **Confirmation gate** — commands matching `confirm_patterns` are held for user
17//!   approval before execution (bypassed by `execute_confirmed`).
18//! - **Environment blocklist** — variables in `env_blocklist` are stripped from the
19//!   subprocess environment before launch.
20//! - **Transactional rollback** — when enabled, file snapshots are taken before execution
21//!   and restored on failure or on non-zero exit codes in `auto_rollback_exit_codes`.
22
23use std::path::PathBuf;
24use std::time::{Duration, Instant};
25
26use tokio::process::Command;
27use tokio_util::sync::CancellationToken;
28
29use schemars::JsonSchema;
30use serde::Deserialize;
31
32use std::sync::Arc;
33
34use arc_swap::ArcSwap;
35use parking_lot::RwLock;
36
37use zeph_common::ToolName;
38
39use crate::audit::{AuditEntry, AuditLogger, AuditResult, chrono_now};
40use crate::config::ShellConfig;
41use crate::executor::{
42    ClaimSource, FilterStats, ToolCall, ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput,
43};
44use crate::filter::{OutputFilterRegistry, sanitize_output};
45use crate::permissions::{PermissionAction, PermissionPolicy};
46use crate::sandbox::{Sandbox, SandboxPolicy};
47
48mod transaction;
49use transaction::{TransactionSnapshot, affected_paths, build_scope_matchers, is_write_command};
50
51const DEFAULT_BLOCKED: &[&str] = &[
52    "rm -rf /", "sudo", "mkfs", "dd if=", "curl", "wget", "nc ", "ncat", "netcat", "shutdown",
53    "reboot", "halt",
54];
55
56/// The default list of blocked command patterns used by [`ShellExecutor`].
57///
58/// Includes highly destructive commands (`rm -rf /`, `mkfs`, `dd if=`), privilege
59/// escalation (`sudo`), and network egress tools (`curl`, `wget`, `nc`, `netcat`).
60/// Network commands can be re-enabled via [`ShellConfig::allow_network`].
61///
62/// Exposed so other executors (e.g. `AcpShellExecutor`) can reuse the same
63/// blocklist without duplicating it.
64pub const DEFAULT_BLOCKED_COMMANDS: &[&str] = DEFAULT_BLOCKED;
65
66/// Shell interpreters that may execute arbitrary code via `-c` or positional args.
67///
68/// When [`check_blocklist`] receives a command whose binary matches one of these
69/// names, the `-c <script>` argument is extracted and checked against the blocklist
70/// instead of the binary name.
71pub const SHELL_INTERPRETERS: &[&str] =
72    &["bash", "sh", "zsh", "fish", "dash", "ksh", "csh", "tcsh"];
73
74/// Subshell metacharacters that could embed a blocked command inside a benign wrapper.
75/// Commands containing these sequences are rejected outright because safe static
76/// analysis of nested shell evaluation is not feasible.
77const SUBSHELL_METACHARS: &[&str] = &["$(", "`", "<(", ">("];
78
79/// Check if `command` matches any pattern in `blocklist`.
80///
81/// Returns the matched pattern string if the command is blocked, `None` otherwise.
82/// The check is case-insensitive and handles common shell escape sequences.
83///
84/// Commands containing subshell metacharacters (`$(` or `` ` ``) are always
85/// blocked because nested evaluation cannot be safely analysed statically.
86#[must_use]
87pub fn check_blocklist(command: &str, blocklist: &[String]) -> Option<String> {
88    let lower = command.to_lowercase();
89    // Reject commands that embed subshell constructs to prevent blocklist bypass.
90    for meta in SUBSHELL_METACHARS {
91        if lower.contains(meta) {
92            return Some((*meta).to_owned());
93        }
94    }
95    let cleaned = strip_shell_escapes(&lower);
96    let commands = tokenize_commands(&cleaned);
97    for blocked in blocklist {
98        for cmd_tokens in &commands {
99            if tokens_match_pattern(cmd_tokens, blocked) {
100                return Some(blocked.clone());
101            }
102        }
103    }
104    None
105}
106
107/// Build the effective command string for blocklist evaluation when the binary is a
108/// shell interpreter (bash, sh, zsh, etc.) and args contains a `-c` script.
109///
110/// Returns `None` if the args do not follow the `-c <script>` pattern.
111#[must_use]
112pub fn effective_shell_command<'a>(binary: &str, args: &'a [String]) -> Option<&'a str> {
113    let base = binary.rsplit('/').next().unwrap_or(binary);
114    if !SHELL_INTERPRETERS.contains(&base) {
115        return None;
116    }
117    // Find "-c" and return the next element as the script to check.
118    let pos = args.iter().position(|a| a == "-c")?;
119    args.get(pos + 1).map(String::as_str)
120}
121
122const NETWORK_COMMANDS: &[&str] = &["curl", "wget", "nc ", "ncat", "netcat"];
123
124/// Effective command-restriction policy held inside a `ShellExecutor`.
125///
126/// Swapped atomically on hot-reload via [`ShellPolicyHandle`].
127#[derive(Debug)]
128pub(crate) struct ShellPolicy {
129    pub(crate) blocked_commands: Vec<String>,
130}
131
132/// Clonable handle for live policy rebuilds on hot-reload.
133///
134/// Obtained from [`ShellExecutor::policy_handle`] at construction time and stored
135/// on the agent. Call [`ShellPolicyHandle::rebuild`] to atomically replace the
136/// effective `blocked_commands` list without recreating the executor. Reads on
137/// the dispatch path are lock-free via `ArcSwap::load_full`.
138#[derive(Clone, Debug)]
139pub struct ShellPolicyHandle {
140    inner: Arc<ArcSwap<ShellPolicy>>,
141}
142
143impl ShellPolicyHandle {
144    /// Atomically install a new effective blocklist derived from `config`.
145    ///
146    /// # Rebuild contract
147    ///
148    /// `config` must be the **already-overlay-merged** `ShellConfig` (i.e. the
149    /// value produced by `load_config_with_overlay`). Plugin contributions are
150    /// already present in `config.blocked_commands` at this point; this method
151    /// does NOT re-apply overlays.
152    pub fn rebuild(&self, config: &crate::config::ShellConfig) {
153        let policy = Arc::new(ShellPolicy {
154            blocked_commands: compute_blocked_commands(config),
155        });
156        self.inner.store(policy);
157    }
158
159    /// Snapshot of the current effective blocklist.
160    #[must_use]
161    pub fn snapshot_blocked(&self) -> Vec<String> {
162        self.inner.load().blocked_commands.clone()
163    }
164}
165
166/// Compute the effective blocklist from an already-overlay-merged `ShellConfig`.
167///
168/// Invariant: identical to the logic in `ShellExecutor::new`.
169pub(crate) fn compute_blocked_commands(config: &crate::config::ShellConfig) -> Vec<String> {
170    let allowed: Vec<String> = config
171        .allowed_commands
172        .iter()
173        .map(|s| s.to_lowercase())
174        .collect();
175    let mut blocked: Vec<String> = DEFAULT_BLOCKED
176        .iter()
177        .filter(|s| !allowed.contains(&s.to_lowercase()))
178        .map(|s| (*s).to_owned())
179        .collect();
180    blocked.extend(config.blocked_commands.iter().map(|s| s.to_lowercase()));
181    if !config.allow_network {
182        for cmd in NETWORK_COMMANDS {
183            let lower = cmd.to_lowercase();
184            if !blocked.contains(&lower) {
185                blocked.push(lower);
186            }
187        }
188    }
189    blocked.sort();
190    blocked.dedup();
191    blocked
192}
193
194#[derive(Deserialize, JsonSchema)]
195pub(crate) struct BashParams {
196    /// The bash command to execute
197    command: String,
198}
199
200/// Bash block extraction and execution via `tokio::process::Command`.
201///
202/// Parses ` ```bash ` fenced blocks from LLM responses (legacy path) and handles
203/// structured `bash` tool calls (modern path). Use [`ShellExecutor::new`] with a
204/// [`ShellConfig`] and chain optional builder methods to attach audit logging,
205/// event streaming, permission policies, and cancellation.
206///
207/// # Example
208///
209/// ```rust,no_run
210/// use zeph_tools::{ShellExecutor, ToolExecutor, config::ShellConfig};
211///
212/// # async fn example() {
213/// let executor = ShellExecutor::new(&ShellConfig::default());
214///
215/// // Execute a fenced bash block.
216/// let response = "```bash\npwd\n```";
217/// if let Ok(Some(output)) = executor.execute(response).await {
218///     println!("{}", output.summary);
219/// }
220/// # }
221/// ```
222#[derive(Debug)]
223pub struct ShellExecutor {
224    timeout: Duration,
225    policy: Arc<ArcSwap<ShellPolicy>>,
226    allowed_paths: Vec<PathBuf>,
227    confirm_patterns: Vec<String>,
228    env_blocklist: Vec<String>,
229    audit_logger: Option<Arc<AuditLogger>>,
230    tool_event_tx: Option<ToolEventTx>,
231    permission_policy: Option<PermissionPolicy>,
232    output_filter_registry: Option<OutputFilterRegistry>,
233    cancel_token: Option<CancellationToken>,
234    skill_env: RwLock<Option<std::collections::HashMap<String, String>>>,
235    transactional: bool,
236    auto_rollback: bool,
237    auto_rollback_exit_codes: Vec<i32>,
238    snapshot_required: bool,
239    max_snapshot_bytes: u64,
240    transaction_scope_matchers: Vec<globset::GlobMatcher>,
241    sandbox: Option<Arc<dyn Sandbox>>,
242    sandbox_policy: Option<SandboxPolicy>,
243}
244
245impl ShellExecutor {
246    /// Create a new `ShellExecutor` from configuration.
247    ///
248    /// Merges the built-in [`DEFAULT_BLOCKED_COMMANDS`] with any additional blocked
249    /// commands from `config`, then subtracts any explicitly allowed commands.
250    /// No subprocess is spawned at construction time.
251    #[must_use]
252    pub fn new(config: &ShellConfig) -> Self {
253        let policy = Arc::new(ArcSwap::from_pointee(ShellPolicy {
254            blocked_commands: compute_blocked_commands(config),
255        }));
256
257        let allowed_paths = if config.allowed_paths.is_empty() {
258            vec![std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))]
259        } else {
260            config.allowed_paths.iter().map(PathBuf::from).collect()
261        };
262
263        Self {
264            timeout: Duration::from_secs(config.timeout),
265            policy,
266            allowed_paths,
267            confirm_patterns: config.confirm_patterns.clone(),
268            env_blocklist: config.env_blocklist.clone(),
269            audit_logger: None,
270            tool_event_tx: None,
271            permission_policy: None,
272            output_filter_registry: None,
273            cancel_token: None,
274            skill_env: RwLock::new(None),
275            transactional: config.transactional,
276            auto_rollback: config.auto_rollback,
277            auto_rollback_exit_codes: config.auto_rollback_exit_codes.clone(),
278            snapshot_required: config.snapshot_required,
279            max_snapshot_bytes: config.max_snapshot_bytes,
280            transaction_scope_matchers: build_scope_matchers(&config.transaction_scope),
281            sandbox: None,
282            sandbox_policy: None,
283        }
284    }
285
286    /// Attach an OS-level sandbox backend and a pre-snapshotted policy.
287    ///
288    /// The policy is snapshotted at construction and never re-resolved per call (no TOCTOU).
289    /// If a different policy is needed, create a new `ShellExecutor` via the builder chain.
290    #[must_use]
291    pub fn with_sandbox(mut self, sandbox: Arc<dyn Sandbox>, policy: SandboxPolicy) -> Self {
292        self.sandbox = Some(sandbox);
293        self.sandbox_policy = Some(policy);
294        self
295    }
296
297    /// Set environment variables to inject when executing the active skill's bash blocks.
298    pub fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
299        *self.skill_env.write() = env;
300    }
301
302    /// Attach an audit logger. Each shell invocation will emit an [`AuditEntry`].
303    #[must_use]
304    pub fn with_audit(mut self, logger: Arc<AuditLogger>) -> Self {
305        self.audit_logger = Some(logger);
306        self
307    }
308
309    /// Attach a tool-event sender for streaming output to the TUI or channel adapter.
310    ///
311    /// When set, [`ToolEvent::Started`], [`ToolEvent::OutputChunk`], and
312    /// [`ToolEvent::Completed`] events are sent on `tx` during execution.
313    #[must_use]
314    pub fn with_tool_event_tx(mut self, tx: ToolEventTx) -> Self {
315        self.tool_event_tx = Some(tx);
316        self
317    }
318
319    /// Attach a permission policy for confirmation-gate enforcement.
320    ///
321    /// Commands matching the policy's rules may require user approval before
322    /// execution proceeds.
323    #[must_use]
324    pub fn with_permissions(mut self, policy: PermissionPolicy) -> Self {
325        self.permission_policy = Some(policy);
326        self
327    }
328
329    /// Attach a cancellation token. When the token is cancelled, the running subprocess
330    /// is killed and the executor returns [`ToolError::Cancelled`].
331    #[must_use]
332    pub fn with_cancel_token(mut self, token: CancellationToken) -> Self {
333        self.cancel_token = Some(token);
334        self
335    }
336
337    /// Attach an output filter registry. Filters are applied to stdout+stderr before
338    /// the summary is stored in [`ToolOutput`] and sent to the LLM.
339    #[must_use]
340    pub fn with_output_filters(mut self, registry: OutputFilterRegistry) -> Self {
341        self.output_filter_registry = Some(registry);
342        self
343    }
344
345    /// Return a clonable handle for live policy rebuilds on hot-reload.
346    ///
347    /// Clone the handle out at construction time and store it on the agent.
348    /// Calling [`ShellPolicyHandle::rebuild`] atomically swaps the effective
349    /// `blocked_commands` without recreating the executor.
350    #[must_use]
351    pub fn policy_handle(&self) -> ShellPolicyHandle {
352        ShellPolicyHandle {
353            inner: Arc::clone(&self.policy),
354        }
355    }
356
357    /// Execute a bash block bypassing the confirmation check (called after user confirms).
358    ///
359    /// # Errors
360    ///
361    /// Returns `ToolError` on blocked commands, sandbox violations, or execution failures.
362    #[cfg_attr(
363        feature = "profiling",
364        tracing::instrument(name = "tool.shell", skip_all, fields(exit_code = tracing::field::Empty, duration_ms = tracing::field::Empty))
365    )]
366    pub async fn execute_confirmed(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
367        self.execute_inner(response, true).await
368    }
369
370    async fn execute_inner(
371        &self,
372        response: &str,
373        skip_confirm: bool,
374    ) -> Result<Option<ToolOutput>, ToolError> {
375        let blocks = extract_bash_blocks(response);
376        if blocks.is_empty() {
377            return Ok(None);
378        }
379
380        let mut outputs = Vec::with_capacity(blocks.len());
381        let mut cumulative_filter_stats: Option<FilterStats> = None;
382        let mut last_envelope: Option<ShellOutputEnvelope> = None;
383        #[allow(clippy::cast_possible_truncation)]
384        let blocks_executed = blocks.len() as u32;
385
386        for block in &blocks {
387            let (output_line, per_block_stats, envelope) =
388                self.execute_block(block, skip_confirm).await?;
389            if let Some(fs) = per_block_stats {
390                let stats = cumulative_filter_stats.get_or_insert_with(FilterStats::default);
391                stats.raw_chars += fs.raw_chars;
392                stats.filtered_chars += fs.filtered_chars;
393                stats.raw_lines += fs.raw_lines;
394                stats.filtered_lines += fs.filtered_lines;
395                stats.confidence = Some(match (stats.confidence, fs.confidence) {
396                    (Some(prev), Some(cur)) => crate::filter::worse_confidence(prev, cur),
397                    (Some(prev), None) => prev,
398                    (None, Some(cur)) => cur,
399                    (None, None) => unreachable!(),
400                });
401                if stats.command.is_none() {
402                    stats.command = fs.command;
403                }
404                if stats.kept_lines.is_empty() && !fs.kept_lines.is_empty() {
405                    stats.kept_lines = fs.kept_lines;
406                }
407            }
408            last_envelope = Some(envelope);
409            outputs.push(output_line);
410        }
411
412        let raw_response = last_envelope
413            .as_ref()
414            .and_then(|e| serde_json::to_value(e).ok());
415
416        Ok(Some(ToolOutput {
417            tool_name: ToolName::new("bash"),
418            summary: outputs.join("\n\n"),
419            blocks_executed,
420            filter_stats: cumulative_filter_stats,
421            diff: None,
422            streamed: self.tool_event_tx.is_some(),
423            terminal_id: None,
424            locations: None,
425            raw_response,
426            claim_source: Some(ClaimSource::Shell),
427        }))
428    }
429
430    #[allow(clippy::too_many_lines)]
431    async fn execute_block(
432        &self,
433        block: &str,
434        skip_confirm: bool,
435    ) -> Result<(String, Option<FilterStats>, ShellOutputEnvelope), ToolError> {
436        self.check_permissions(block, skip_confirm).await?;
437        self.validate_sandbox(block)?;
438
439        // Take a transactional snapshot before executing write commands.
440        let mut snapshot_warning: Option<String> = None;
441        let snapshot = if self.transactional && is_write_command(block) {
442            let paths = affected_paths(block, &self.transaction_scope_matchers);
443            if paths.is_empty() {
444                None
445            } else {
446                match TransactionSnapshot::capture(&paths, self.max_snapshot_bytes) {
447                    Ok(snap) => {
448                        tracing::debug!(
449                            files = snap.file_count(),
450                            bytes = snap.total_bytes(),
451                            "transaction snapshot captured"
452                        );
453                        Some(snap)
454                    }
455                    Err(e) if self.snapshot_required => {
456                        return Err(ToolError::SnapshotFailed {
457                            reason: e.to_string(),
458                        });
459                    }
460                    Err(e) => {
461                        tracing::warn!(err = %e, "transaction snapshot failed, proceeding without rollback");
462                        snapshot_warning =
463                            Some(format!("[warn] snapshot failed: {e}; rollback unavailable"));
464                        None
465                    }
466                }
467            }
468        } else {
469            None
470        };
471
472        if let Some(ref tx) = self.tool_event_tx {
473            let sandbox_profile = self
474                .sandbox_policy
475                .as_ref()
476                .map(|p| format!("{:?}", p.profile));
477            let _ = tx.send(ToolEvent::Started {
478                tool_name: ToolName::new("bash"),
479                command: block.to_owned(),
480                sandbox_profile,
481            });
482        }
483
484        let start = Instant::now();
485        let skill_env_snapshot: Option<std::collections::HashMap<String, String>> =
486            self.skill_env.read().clone();
487        let sandbox_pair = self
488            .sandbox
489            .as_ref()
490            .zip(self.sandbox_policy.as_ref())
491            .map(|(sb, pol)| (sb.as_ref(), pol));
492        let (mut envelope, out) = execute_bash(
493            block,
494            self.timeout,
495            self.tool_event_tx.as_ref(),
496            self.cancel_token.as_ref(),
497            skill_env_snapshot.as_ref(),
498            &self.env_blocklist,
499            sandbox_pair,
500        )
501        .await;
502        let exit_code = envelope.exit_code;
503        if exit_code == 130
504            && self
505                .cancel_token
506                .as_ref()
507                .is_some_and(CancellationToken::is_cancelled)
508        {
509            return Err(ToolError::Cancelled);
510        }
511        #[allow(clippy::cast_possible_truncation)]
512        let duration_ms = start.elapsed().as_millis() as u64;
513
514        // Perform auto-rollback if configured and the exit code qualifies.
515        if let Some(snap) = snapshot {
516            let should_rollback = self.auto_rollback
517                && if self.auto_rollback_exit_codes.is_empty() {
518                    exit_code >= 2
519                } else {
520                    self.auto_rollback_exit_codes.contains(&exit_code)
521                };
522            if should_rollback {
523                match snap.rollback() {
524                    Ok(report) => {
525                        tracing::info!(
526                            restored = report.restored_count,
527                            deleted = report.deleted_count,
528                            "transaction rollback completed"
529                        );
530                        self.log_audit(
531                            block,
532                            AuditResult::Rollback {
533                                restored: report.restored_count,
534                                deleted: report.deleted_count,
535                            },
536                            duration_ms,
537                            None,
538                            Some(exit_code),
539                            false,
540                        )
541                        .await;
542                        if let Some(ref tx) = self.tool_event_tx {
543                            let _ = tx.send(ToolEvent::Rollback {
544                                tool_name: ToolName::new("bash"),
545                                command: block.to_owned(),
546                                restored_count: report.restored_count,
547                                deleted_count: report.deleted_count,
548                            });
549                        }
550                    }
551                    Err(e) => {
552                        tracing::error!(err = %e, "transaction rollback failed");
553                    }
554                }
555            }
556            // On success (no rollback): snapshot dropped here; TempDir auto-cleans.
557        }
558
559        let is_timeout = out.contains("[error] command timed out");
560        let audit_result = if is_timeout {
561            AuditResult::Timeout
562        } else if out.contains("[error]") || out.contains("[stderr]") {
563            AuditResult::Error {
564                message: out.clone(),
565            }
566        } else {
567            AuditResult::Success
568        };
569        if is_timeout {
570            self.log_audit(
571                block,
572                audit_result,
573                duration_ms,
574                None,
575                Some(exit_code),
576                false,
577            )
578            .await;
579            self.emit_completed(block, &out, false, None);
580            return Err(ToolError::Timeout {
581                timeout_secs: self.timeout.as_secs(),
582            });
583        }
584
585        if let Some(category) = classify_shell_exit(exit_code, &out) {
586            self.emit_completed(block, &out, false, None);
587            return Err(ToolError::Shell {
588                exit_code,
589                category,
590                message: out.lines().take(3).collect::<Vec<_>>().join("; "),
591            });
592        }
593
594        let sanitized = sanitize_output(&out);
595        let mut per_block_stats: Option<FilterStats> = None;
596        let filtered = if let Some(ref registry) = self.output_filter_registry {
597            match registry.apply(block, &sanitized, exit_code) {
598                Some(fr) => {
599                    tracing::debug!(
600                        command = block,
601                        raw = fr.raw_chars,
602                        filtered = fr.filtered_chars,
603                        savings_pct = fr.savings_pct(),
604                        "output filter applied"
605                    );
606                    per_block_stats = Some(FilterStats {
607                        raw_chars: fr.raw_chars,
608                        filtered_chars: fr.filtered_chars,
609                        raw_lines: fr.raw_lines,
610                        filtered_lines: fr.filtered_lines,
611                        confidence: Some(fr.confidence),
612                        command: Some(block.to_owned()),
613                        kept_lines: fr.kept_lines.clone(),
614                    });
615                    fr.output
616                }
617                None => sanitized,
618            }
619        } else {
620            sanitized
621        };
622
623        self.emit_completed(
624            block,
625            &out,
626            !out.contains("[error]"),
627            per_block_stats.clone(),
628        );
629
630        // Mark truncated if output was shortened during filtering.
631        envelope.truncated = filtered.len() < out.len();
632
633        self.log_audit(
634            block,
635            audit_result,
636            duration_ms,
637            None,
638            Some(exit_code),
639            envelope.truncated,
640        )
641        .await;
642
643        let output_line = if let Some(warn) = snapshot_warning {
644            format!("{warn}\n$ {block}\n{filtered}")
645        } else {
646            format!("$ {block}\n{filtered}")
647        };
648        Ok((output_line, per_block_stats, envelope))
649    }
650
651    fn emit_completed(
652        &self,
653        command: &str,
654        output: &str,
655        success: bool,
656        filter_stats: Option<FilterStats>,
657    ) {
658        if let Some(ref tx) = self.tool_event_tx {
659            let _ = tx.send(ToolEvent::Completed {
660                tool_name: ToolName::new("bash"),
661                command: command.to_owned(),
662                output: output.to_owned(),
663                success,
664                filter_stats,
665                diff: None,
666            });
667        }
668    }
669
670    /// Check blocklist, permission policy, and confirmation requirements for `block`.
671    async fn check_permissions(&self, block: &str, skip_confirm: bool) -> Result<(), ToolError> {
672        // Always check the blocklist first — it is a hard security boundary
673        // that must not be bypassed by the PermissionPolicy layer.
674        if let Some(blocked) = self.find_blocked_command(block) {
675            let err = ToolError::Blocked {
676                command: blocked.clone(),
677            };
678            self.log_audit(
679                block,
680                AuditResult::Blocked {
681                    reason: format!("blocked command: {blocked}"),
682                },
683                0,
684                Some(&err),
685                None,
686                false,
687            )
688            .await;
689            return Err(err);
690        }
691
692        if let Some(ref policy) = self.permission_policy {
693            match policy.check("bash", block) {
694                PermissionAction::Deny => {
695                    let err = ToolError::Blocked {
696                        command: block.to_owned(),
697                    };
698                    self.log_audit(
699                        block,
700                        AuditResult::Blocked {
701                            reason: "denied by permission policy".to_owned(),
702                        },
703                        0,
704                        Some(&err),
705                        None,
706                        false,
707                    )
708                    .await;
709                    return Err(err);
710                }
711                PermissionAction::Ask if !skip_confirm => {
712                    return Err(ToolError::ConfirmationRequired {
713                        command: block.to_owned(),
714                    });
715                }
716                _ => {}
717            }
718        } else if !skip_confirm && let Some(pattern) = self.find_confirm_command(block) {
719            return Err(ToolError::ConfirmationRequired {
720                command: pattern.to_owned(),
721            });
722        }
723
724        Ok(())
725    }
726
727    fn validate_sandbox(&self, code: &str) -> Result<(), ToolError> {
728        let cwd = std::env::current_dir().unwrap_or_default();
729
730        for token in extract_paths(code) {
731            if has_traversal(&token) {
732                return Err(ToolError::SandboxViolation { path: token });
733            }
734
735            let path = if token.starts_with('/') {
736                PathBuf::from(&token)
737            } else {
738                cwd.join(&token)
739            };
740            let canonical = path
741                .canonicalize()
742                .or_else(|_| std::path::absolute(&path))
743                .unwrap_or(path);
744            if !self
745                .allowed_paths
746                .iter()
747                .any(|allowed| canonical.starts_with(allowed))
748            {
749                return Err(ToolError::SandboxViolation {
750                    path: canonical.display().to_string(),
751                });
752            }
753        }
754        Ok(())
755    }
756
757    /// Scan `code` for commands that match the configured blocklist.
758    ///
759    /// The function normalizes input via [`strip_shell_escapes`] (decoding `$'\xNN'`,
760    /// `$'\NNN'`, backslash escapes, and quote-splitting) and then splits on shell
761    /// metacharacters (`||`, `&&`, `;`, `|`, `\n`) via [`tokenize_commands`].  Each
762    /// resulting token sequence is tested against every entry in `blocked_commands`
763    /// through [`tokens_match_pattern`], which handles transparent prefixes (`env`,
764    /// `command`, `exec`, etc.), absolute paths, and dot-suffixed variants.
765    ///
766    /// # Known limitations
767    ///
768    /// The following constructs are **not** detected by this function:
769    ///
770    /// - **Here-strings** `<<<` with a shell interpreter: the outer command is the
771    ///   shell (`bash`, `sh`), which is not blocked by default; the payload string is
772    ///   opaque to this filter.
773    ///   Example: `bash <<< 'sudo rm -rf /'` — inner payload is not parsed.
774    ///
775    /// - **`eval` and `bash -c` / `sh -c`**: the string argument is not parsed; any
776    ///   blocked command embedded as a string argument passes through undetected.
777    ///   Example: `eval 'sudo rm -rf /'`.
778    ///
779    /// - **Variable expansion**: `strip_shell_escapes` does not resolve variable
780    ///   references, so `cmd=sudo; $cmd rm` bypasses the blocklist.
781    ///
782    /// `$(...)`, backtick, `<(...)`, and `>(...)` substitutions are detected by
783    /// [`extract_subshell_contents`], which extracts the inner command string and
784    /// checks it against the blocklist separately.  The default `confirm_patterns`
785    /// in [`ShellConfig`] additionally include `"$("`, `` "`" ``, `"<("`, `">("`,
786    /// `"<<<"`, and `"eval "`, so those constructs also trigger a confirmation
787    /// request via [`find_confirm_command`] before execution.
788    ///
789    /// For high-security deployments, complement this filter with OS-level sandboxing
790    /// (Linux namespaces, seccomp, or similar) to enforce hard execution boundaries.
791    /// Scan `code` for commands that match the configured blocklist.
792    ///
793    /// Returns an owned `String` because the backing `Vec<String>` lives inside an
794    /// `ArcSwap` that may be replaced between calls — borrowing from the snapshot
795    /// guard would be unsound after the guard drops.
796    fn find_blocked_command(&self, code: &str) -> Option<String> {
797        let snapshot = self.policy.load_full();
798        let cleaned = strip_shell_escapes(&code.to_lowercase());
799        let commands = tokenize_commands(&cleaned);
800        for blocked in &snapshot.blocked_commands {
801            for cmd_tokens in &commands {
802                if tokens_match_pattern(cmd_tokens, blocked) {
803                    return Some(blocked.clone());
804                }
805            }
806        }
807        // Also check commands embedded inside subshell constructs.
808        for inner in extract_subshell_contents(&cleaned) {
809            let inner_commands = tokenize_commands(&inner);
810            for blocked in &snapshot.blocked_commands {
811                for cmd_tokens in &inner_commands {
812                    if tokens_match_pattern(cmd_tokens, blocked) {
813                        return Some(blocked.clone());
814                    }
815                }
816            }
817        }
818        None
819    }
820
821    fn find_confirm_command(&self, code: &str) -> Option<&str> {
822        let normalized = code.to_lowercase();
823        for pattern in &self.confirm_patterns {
824            if normalized.contains(pattern.as_str()) {
825                return Some(pattern.as_str());
826            }
827        }
828        None
829    }
830
831    async fn log_audit(
832        &self,
833        command: &str,
834        result: AuditResult,
835        duration_ms: u64,
836        error: Option<&ToolError>,
837        exit_code: Option<i32>,
838        truncated: bool,
839    ) {
840        if let Some(ref logger) = self.audit_logger {
841            let (error_category, error_domain, error_phase) =
842                error.map_or((None, None, None), |e| {
843                    let cat = e.category();
844                    (
845                        Some(cat.label().to_owned()),
846                        Some(cat.domain().label().to_owned()),
847                        Some(cat.phase().label().to_owned()),
848                    )
849                });
850            let entry = AuditEntry {
851                timestamp: chrono_now(),
852                tool: "shell".into(),
853                command: command.into(),
854                result,
855                duration_ms,
856                error_category,
857                error_domain,
858                error_phase,
859                claim_source: Some(ClaimSource::Shell),
860                mcp_server_id: None,
861                injection_flagged: false,
862                embedding_anomalous: false,
863                cross_boundary_mcp_to_acp: false,
864                adversarial_policy_decision: None,
865                exit_code,
866                truncated,
867                caller_id: None,
868                policy_match: None,
869                correlation_id: None,
870                vigil_risk: None,
871            };
872            logger.log(&entry).await;
873        }
874    }
875}
876
877impl ToolExecutor for ShellExecutor {
878    async fn execute(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
879        self.execute_inner(response, false).await
880    }
881
882    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
883        use crate::registry::{InvocationHint, ToolDef};
884        vec![ToolDef {
885            id: "bash".into(),
886            description: "Execute a shell command and return stdout/stderr.\n\nParameters: command (string, required) - shell command to run\nReturns: stdout and stderr combined, prefixed with exit code\nErrors: Blocked if command matches security policy; Timeout after configured seconds; SandboxViolation if path outside allowed dirs\nExample: {\"command\": \"ls -la /tmp\"}".into(),
887            schema: schemars::schema_for!(BashParams),
888            invocation: InvocationHint::FencedBlock("bash"),
889            output_schema: None,
890        }]
891    }
892
893    async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
894        if call.tool_id != "bash" {
895            return Ok(None);
896        }
897        let params: BashParams = crate::executor::deserialize_params(&call.params)?;
898        if params.command.is_empty() {
899            return Ok(None);
900        }
901        let command = &params.command;
902        // Wrap as a fenced block so execute_inner can extract and run it
903        let synthetic = format!("```bash\n{command}\n```");
904        self.execute_inner(&synthetic, false).await
905    }
906
907    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
908        ShellExecutor::set_skill_env(self, env);
909    }
910}
911
912/// Strip shell escape sequences that could bypass command detection.
913/// Handles: backslash insertion (`su\do` -> `sudo`), `$'\xNN'` hex and `$'\NNN'` octal
914/// escapes, adjacent quoted segments (`"su""do"` -> `sudo`), backslash-newline continuations.
915pub(crate) fn strip_shell_escapes(input: &str) -> String {
916    let mut out = String::with_capacity(input.len());
917    let bytes = input.as_bytes();
918    let mut i = 0;
919    while i < bytes.len() {
920        // $'...' ANSI-C quoting: decode \xNN hex and \NNN octal escapes
921        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'\'' {
922            let mut j = i + 2; // points after $'
923            let mut decoded = String::new();
924            let mut valid = false;
925            while j < bytes.len() && bytes[j] != b'\'' {
926                if bytes[j] == b'\\' && j + 1 < bytes.len() {
927                    let next = bytes[j + 1];
928                    if next == b'x' && j + 3 < bytes.len() {
929                        // \xNN hex escape
930                        let hi = (bytes[j + 2] as char).to_digit(16);
931                        let lo = (bytes[j + 3] as char).to_digit(16);
932                        if let (Some(h), Some(l)) = (hi, lo) {
933                            #[allow(clippy::cast_possible_truncation)]
934                            let byte = ((h << 4) | l) as u8;
935                            decoded.push(byte as char);
936                            j += 4;
937                            valid = true;
938                            continue;
939                        }
940                    } else if next.is_ascii_digit() {
941                        // \NNN octal escape (up to 3 digits)
942                        let mut val = u32::from(next - b'0');
943                        let mut len = 2; // consumed \N so far
944                        if j + 2 < bytes.len() && bytes[j + 2].is_ascii_digit() {
945                            val = val * 8 + u32::from(bytes[j + 2] - b'0');
946                            len = 3;
947                            if j + 3 < bytes.len() && bytes[j + 3].is_ascii_digit() {
948                                val = val * 8 + u32::from(bytes[j + 3] - b'0');
949                                len = 4;
950                            }
951                        }
952                        #[allow(clippy::cast_possible_truncation)]
953                        decoded.push((val & 0xFF) as u8 as char);
954                        j += len;
955                        valid = true;
956                        continue;
957                    }
958                    // other \X escape: emit X literally
959                    decoded.push(next as char);
960                    j += 2;
961                } else {
962                    decoded.push(bytes[j] as char);
963                    j += 1;
964                }
965            }
966            if j < bytes.len() && bytes[j] == b'\'' && valid {
967                out.push_str(&decoded);
968                i = j + 1;
969                continue;
970            }
971            // not a decodable $'...' sequence — fall through to handle as regular chars
972        }
973        // backslash-newline continuation: remove both
974        if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
975            i += 2;
976            continue;
977        }
978        // intra-word backslash: skip the backslash, keep next char (e.g. su\do -> sudo)
979        if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] != b'\n' {
980            i += 1;
981            out.push(bytes[i] as char);
982            i += 1;
983            continue;
984        }
985        // quoted segment stripping: collapse adjacent quoted segments
986        if bytes[i] == b'"' || bytes[i] == b'\'' {
987            let quote = bytes[i];
988            i += 1;
989            while i < bytes.len() && bytes[i] != quote {
990                out.push(bytes[i] as char);
991                i += 1;
992            }
993            if i < bytes.len() {
994                i += 1; // skip closing quote
995            }
996            continue;
997        }
998        out.push(bytes[i] as char);
999        i += 1;
1000    }
1001    out
1002}
1003
1004/// Extract inner command strings from subshell constructs in `s`.
1005///
1006/// Recognises:
1007/// - Backtick: `` `cmd` `` → `cmd`
1008/// - Dollar-paren: `$(cmd)` → `cmd`
1009/// - Process substitution (lt): `<(cmd)` → `cmd`
1010/// - Process substitution (gt): `>(cmd)` → `cmd`
1011///
1012/// Depth counting handles nested parentheses correctly.
1013pub(crate) fn extract_subshell_contents(s: &str) -> Vec<String> {
1014    let mut results = Vec::new();
1015    let chars: Vec<char> = s.chars().collect();
1016    let len = chars.len();
1017    let mut i = 0;
1018
1019    while i < len {
1020        // Backtick substitution: `...`
1021        if chars[i] == '`' {
1022            let start = i + 1;
1023            let mut j = start;
1024            while j < len && chars[j] != '`' {
1025                j += 1;
1026            }
1027            if j < len {
1028                results.push(chars[start..j].iter().collect());
1029            }
1030            i = j + 1;
1031            continue;
1032        }
1033
1034        // $(...), <(...), >(...)
1035        let next_is_open_paren = i + 1 < len && chars[i + 1] == '(';
1036        let is_paren_subshell = next_is_open_paren && matches!(chars[i], '$' | '<' | '>');
1037
1038        if is_paren_subshell {
1039            let start = i + 2;
1040            let mut depth: usize = 1;
1041            let mut j = start;
1042            while j < len && depth > 0 {
1043                match chars[j] {
1044                    '(' => depth += 1,
1045                    ')' => depth -= 1,
1046                    _ => {}
1047                }
1048                if depth > 0 {
1049                    j += 1;
1050                } else {
1051                    break;
1052                }
1053            }
1054            if depth == 0 {
1055                results.push(chars[start..j].iter().collect());
1056            }
1057            i = j + 1;
1058            continue;
1059        }
1060
1061        i += 1;
1062    }
1063
1064    results
1065}
1066
1067/// Split normalized shell code into sub-commands on `|`, `||`, `&&`, `;`, `\n`.
1068/// Returns list of sub-commands, each as `Vec<String>` of tokens.
1069pub(crate) fn tokenize_commands(normalized: &str) -> Vec<Vec<String>> {
1070    // Replace two-char operators with a single separator, then split on single-char separators
1071    let replaced = normalized.replace("||", "\n").replace("&&", "\n");
1072    replaced
1073        .split([';', '|', '\n'])
1074        .map(|seg| {
1075            seg.split_whitespace()
1076                .map(str::to_owned)
1077                .collect::<Vec<String>>()
1078        })
1079        .filter(|tokens| !tokens.is_empty())
1080        .collect()
1081}
1082
1083/// Transparent prefix commands that invoke the next argument as a command.
1084/// Skipped when determining the "real" command name being invoked.
1085const TRANSPARENT_PREFIXES: &[&str] = &["env", "command", "exec", "nice", "nohup", "time", "xargs"];
1086
1087/// Return the basename of a token (last path component after '/').
1088fn cmd_basename(tok: &str) -> &str {
1089    tok.rsplit('/').next().unwrap_or(tok)
1090}
1091
1092/// Check if the first tokens of a sub-command match a blocked pattern.
1093/// Handles:
1094/// - Transparent prefix commands (`env sudo rm` -> checks `sudo`)
1095/// - Absolute paths (`/usr/bin/sudo rm` -> basename `sudo` is checked)
1096/// - Dot-suffixed variants (`mkfs` matches `mkfs.ext4`)
1097/// - Multi-word patterns (`rm -rf /` joined prefix check)
1098pub(crate) fn tokens_match_pattern(tokens: &[String], pattern: &str) -> bool {
1099    if tokens.is_empty() || pattern.is_empty() {
1100        return false;
1101    }
1102    let pattern = pattern.trim();
1103    let pattern_tokens: Vec<&str> = pattern.split_whitespace().collect();
1104    if pattern_tokens.is_empty() {
1105        return false;
1106    }
1107
1108    // Skip transparent prefix tokens to reach the real command
1109    let start = tokens
1110        .iter()
1111        .position(|t| !TRANSPARENT_PREFIXES.contains(&cmd_basename(t)))
1112        .unwrap_or(0);
1113    let effective = &tokens[start..];
1114    if effective.is_empty() {
1115        return false;
1116    }
1117
1118    if pattern_tokens.len() == 1 {
1119        let pat = pattern_tokens[0];
1120        let base = cmd_basename(&effective[0]);
1121        // Exact match OR dot-suffixed variant (e.g. "mkfs" matches "mkfs.ext4")
1122        base == pat || base.starts_with(&format!("{pat}."))
1123    } else {
1124        // Multi-word: join first N tokens (using basename for first) and check prefix
1125        let n = pattern_tokens.len().min(effective.len());
1126        let mut parts: Vec<&str> = vec![cmd_basename(&effective[0])];
1127        parts.extend(effective[1..n].iter().map(String::as_str));
1128        let joined = parts.join(" ");
1129        if joined.starts_with(pattern) {
1130            return true;
1131        }
1132        if effective.len() > n {
1133            let mut parts2: Vec<&str> = vec![cmd_basename(&effective[0])];
1134            parts2.extend(effective[1..=n].iter().map(String::as_str));
1135            parts2.join(" ").starts_with(pattern)
1136        } else {
1137            false
1138        }
1139    }
1140}
1141
1142fn extract_paths(code: &str) -> Vec<String> {
1143    let mut result = Vec::new();
1144
1145    // Tokenize respecting single/double quotes
1146    let mut tokens: Vec<String> = Vec::new();
1147    let mut current = String::new();
1148    let mut chars = code.chars().peekable();
1149    while let Some(c) = chars.next() {
1150        match c {
1151            '"' | '\'' => {
1152                let quote = c;
1153                while let Some(&nc) = chars.peek() {
1154                    if nc == quote {
1155                        chars.next();
1156                        break;
1157                    }
1158                    current.push(chars.next().unwrap());
1159                }
1160            }
1161            c if c.is_whitespace() || matches!(c, ';' | '|' | '&') => {
1162                if !current.is_empty() {
1163                    tokens.push(std::mem::take(&mut current));
1164                }
1165            }
1166            _ => current.push(c),
1167        }
1168    }
1169    if !current.is_empty() {
1170        tokens.push(current);
1171    }
1172
1173    for token in tokens {
1174        let trimmed = token.trim_end_matches([';', '&', '|']).to_owned();
1175        if trimmed.is_empty() {
1176            continue;
1177        }
1178        if trimmed.starts_with('/')
1179            || trimmed.starts_with("./")
1180            || trimmed.starts_with("../")
1181            || trimmed == ".."
1182            || (trimmed.starts_with('.') && trimmed.contains('/'))
1183            || is_relative_path_token(&trimmed)
1184        {
1185            result.push(trimmed);
1186        }
1187    }
1188    result
1189}
1190
1191/// Returns `true` if `token` looks like a relative path of the form `word/more`
1192/// (contains `/` but does not start with `/` or `.`).
1193///
1194/// Excluded:
1195/// - URL schemes (`scheme://`)
1196/// - Shell variable assignments (`KEY=value`)
1197fn is_relative_path_token(token: &str) -> bool {
1198    // Must contain a slash but not start with `/` (absolute) or `.` (handled above).
1199    if !token.contains('/') || token.starts_with('/') || token.starts_with('.') {
1200        return false;
1201    }
1202    // Reject URLs: anything with `://`
1203    if token.contains("://") {
1204        return false;
1205    }
1206    // Reject shell variable assignments: `IDENTIFIER=...`
1207    if let Some(eq_pos) = token.find('=') {
1208        let key = &token[..eq_pos];
1209        if key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
1210            return false;
1211        }
1212    }
1213    // First character must be an identifier-start (letter, digit, or `_`).
1214    token
1215        .chars()
1216        .next()
1217        .is_some_and(|c| c.is_ascii_alphanumeric() || c == '_')
1218}
1219
1220/// Classify shell exit codes and stderr patterns into `ToolErrorCategory`.
1221///
1222/// Returns `Some(category)` only for well-known failure modes that benefit from
1223/// structured feedback (exit 126/127, recognisable stderr patterns). All other
1224/// non-zero exits are left as `Ok` output so they surface verbatim to the LLM.
1225fn classify_shell_exit(
1226    exit_code: i32,
1227    output: &str,
1228) -> Option<crate::error_taxonomy::ToolErrorCategory> {
1229    use crate::error_taxonomy::ToolErrorCategory;
1230    match exit_code {
1231        // exit 126: command found but not executable (OS-level permission/policy)
1232        126 => Some(ToolErrorCategory::PolicyBlocked),
1233        // exit 127: command not found in PATH
1234        127 => Some(ToolErrorCategory::PermanentFailure),
1235        _ => {
1236            let lower = output.to_lowercase();
1237            if lower.contains("permission denied") {
1238                Some(ToolErrorCategory::PolicyBlocked)
1239            } else if lower.contains("no such file or directory") {
1240                Some(ToolErrorCategory::PermanentFailure)
1241            } else {
1242                None
1243            }
1244        }
1245    }
1246}
1247
1248fn has_traversal(path: &str) -> bool {
1249    path.split('/').any(|seg| seg == "..")
1250}
1251
1252fn extract_bash_blocks(text: &str) -> Vec<&str> {
1253    crate::executor::extract_fenced_blocks(text, "bash")
1254}
1255
1256/// Kill a child process and its descendants.
1257/// On unix, sends SIGKILL to child processes via `pkill -KILL -P <pid>` before
1258/// killing the parent, preventing zombie subprocesses.
1259async fn kill_process_tree(child: &mut tokio::process::Child) {
1260    #[cfg(unix)]
1261    if let Some(pid) = child.id() {
1262        let _ = Command::new("pkill")
1263            .args(["-KILL", "-P", &pid.to_string()])
1264            .status()
1265            .await;
1266    }
1267    let _ = child.kill().await;
1268}
1269
1270/// Structured output from a shell command execution.
1271///
1272/// Produced by the internal `execute_bash` function and included in the final
1273/// [`ToolOutput`] and [`AuditEntry`] for the invocation.
1274#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1275pub struct ShellOutputEnvelope {
1276    /// Captured standard output, possibly truncated.
1277    pub stdout: String,
1278    /// Captured standard error, possibly truncated.
1279    pub stderr: String,
1280    /// Process exit code. `0` indicates success by convention.
1281    pub exit_code: i32,
1282    /// `true` when the combined output exceeded the configured max and was truncated.
1283    pub truncated: bool,
1284}
1285
1286#[allow(clippy::too_many_lines)]
1287async fn execute_bash(
1288    code: &str,
1289    timeout: Duration,
1290    event_tx: Option<&ToolEventTx>,
1291    cancel_token: Option<&CancellationToken>,
1292    extra_env: Option<&std::collections::HashMap<String, String>>,
1293    env_blocklist: &[String],
1294    sandbox: Option<(&dyn Sandbox, &SandboxPolicy)>,
1295) -> (ShellOutputEnvelope, String) {
1296    use std::process::Stdio;
1297    use tokio::io::{AsyncBufReadExt, BufReader};
1298
1299    let timeout_secs = timeout.as_secs();
1300
1301    let mut cmd = Command::new("bash");
1302    cmd.arg("-c").arg(code);
1303
1304    for (key, _) in std::env::vars() {
1305        if env_blocklist
1306            .iter()
1307            .any(|prefix| key.starts_with(prefix.as_str()))
1308        {
1309            cmd.env_remove(&key);
1310        }
1311    }
1312
1313    if let Some(env) = extra_env {
1314        cmd.envs(env);
1315    }
1316
1317    // Apply OS sandbox before setting stdio so the rewritten program is sandboxed.
1318    if let Some((sb, policy)) = sandbox
1319        && let Err(err) = sb.wrap(&mut cmd, policy)
1320    {
1321        let msg = format!("[error] sandbox setup failed: {err}");
1322        return (
1323            ShellOutputEnvelope {
1324                stdout: String::new(),
1325                stderr: msg.clone(),
1326                exit_code: 1,
1327                truncated: false,
1328            },
1329            msg,
1330        );
1331    }
1332
1333    cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
1334
1335    let child_result = cmd.spawn();
1336
1337    let mut child = match child_result {
1338        Ok(c) => c,
1339        Err(e) => {
1340            let msg = format!("[error] {e}");
1341            return (
1342                ShellOutputEnvelope {
1343                    stdout: String::new(),
1344                    stderr: msg.clone(),
1345                    exit_code: 1,
1346                    truncated: false,
1347                },
1348                msg,
1349            );
1350        }
1351    };
1352
1353    let stdout = child.stdout.take().expect("stdout piped");
1354    let stderr = child.stderr.take().expect("stderr piped");
1355
1356    // Channel carries (is_stderr, line) so we can accumulate separate buffers
1357    // while still building a combined interleaved string for streaming and LLM context.
1358    let (line_tx, mut line_rx) = tokio::sync::mpsc::channel::<(bool, String)>(64);
1359
1360    let stdout_tx = line_tx.clone();
1361    tokio::spawn(async move {
1362        let mut reader = BufReader::new(stdout);
1363        let mut buf = String::new();
1364        while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
1365            let _ = stdout_tx.send((false, buf.clone())).await;
1366            buf.clear();
1367        }
1368    });
1369
1370    tokio::spawn(async move {
1371        let mut reader = BufReader::new(stderr);
1372        let mut buf = String::new();
1373        while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
1374            let _ = line_tx.send((true, buf.clone())).await;
1375            buf.clear();
1376        }
1377    });
1378
1379    let mut combined = String::new();
1380    let mut stdout_buf = String::new();
1381    let mut stderr_buf = String::new();
1382    let deadline = tokio::time::Instant::now() + timeout;
1383
1384    loop {
1385        tokio::select! {
1386            line = line_rx.recv() => {
1387                match line {
1388                    Some((is_stderr, chunk)) => {
1389                        let interleaved = if is_stderr {
1390                            format!("[stderr] {chunk}")
1391                        } else {
1392                            chunk.clone()
1393                        };
1394                        if let Some(tx) = event_tx {
1395                            let _ = tx.send(ToolEvent::OutputChunk {
1396                                tool_name: ToolName::new("bash"),
1397                                command: code.to_owned(),
1398                                chunk: interleaved.clone(),
1399                            });
1400                        }
1401                        combined.push_str(&interleaved);
1402                        if is_stderr {
1403                            stderr_buf.push_str(&chunk);
1404                        } else {
1405                            stdout_buf.push_str(&chunk);
1406                        }
1407                    }
1408                    None => break,
1409                }
1410            }
1411            () = tokio::time::sleep_until(deadline) => {
1412                kill_process_tree(&mut child).await;
1413                let msg = format!("[error] command timed out after {timeout_secs}s");
1414                return (
1415                    ShellOutputEnvelope {
1416                        stdout: stdout_buf,
1417                        stderr: format!("{stderr_buf}command timed out after {timeout_secs}s"),
1418                        exit_code: 1,
1419                        truncated: false,
1420                    },
1421                    msg,
1422                );
1423            }
1424            () = async {
1425                match cancel_token {
1426                    Some(t) => t.cancelled().await,
1427                    None => std::future::pending().await,
1428                }
1429            } => {
1430                kill_process_tree(&mut child).await;
1431                return (
1432                    ShellOutputEnvelope {
1433                        stdout: stdout_buf,
1434                        stderr: format!("{stderr_buf}operation aborted"),
1435                        exit_code: 130,
1436                        truncated: false,
1437                    },
1438                    "[cancelled] operation aborted".to_string(),
1439                );
1440            }
1441        }
1442    }
1443
1444    let status = child.wait().await;
1445    let exit_code = status.ok().and_then(|s| s.code()).unwrap_or(1);
1446
1447    let (envelope, combined) = if combined.is_empty() {
1448        (
1449            ShellOutputEnvelope {
1450                stdout: String::new(),
1451                stderr: String::new(),
1452                exit_code,
1453                truncated: false,
1454            },
1455            "(no output)".to_string(),
1456        )
1457    } else {
1458        (
1459            ShellOutputEnvelope {
1460                stdout: stdout_buf.trim_end().to_owned(),
1461                stderr: stderr_buf.trim_end().to_owned(),
1462                exit_code,
1463                truncated: false,
1464            },
1465            combined,
1466        )
1467    };
1468    (envelope, combined)
1469}
1470
1471#[cfg(test)]
1472mod tests;