zeph_tools/shell/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Shell executor that parses and runs bash blocks from LLM responses.
5//!
6//! [`ShellExecutor`] is the primary tool backend for Zeph. It handles both legacy
7//! fenced bash blocks and structured `bash` tool calls. Security controls enforced
8//! before every command:
9//!
10//! - **Blocklist** — commands matching any entry in `blocked_commands` (or the built-in
11//!   [`DEFAULT_BLOCKED_COMMANDS`]) are rejected with [`ToolError::Blocked`].
12//! - **Subshell metacharacters** — `$(`, `` ` ``, `<(`, and `>(` are always blocked
13//!   because nested evaluation cannot be safely analysed statically.
14//! - **Path sandbox** — the working directory and any file arguments must reside under
15//!   the configured `allowed_paths`.
16//! - **Confirmation gate** — commands matching `confirm_patterns` are held for user
17//!   approval before execution (bypassed by `execute_confirmed`).
18//! - **Environment blocklist** — variables in `env_blocklist` are stripped from the
19//!   subprocess environment before launch.
20//! - **Transactional rollback** — when enabled, file snapshots are taken before execution
21//!   and restored on failure or on non-zero exit codes in `auto_rollback_exit_codes`.
22
23use std::collections::HashMap;
24use std::path::PathBuf;
25use std::sync::Arc;
26use std::sync::atomic::AtomicBool;
27use std::time::{Duration, Instant};
28
29use tokio::process::Command;
30use tokio_util::sync::CancellationToken;
31
32use schemars::JsonSchema;
33use serde::Deserialize;
34
35use arc_swap::ArcSwap;
36use parking_lot::{Mutex, RwLock};
37
38use zeph_common::ToolName;
39
40use crate::audit::{AuditEntry, AuditLogger, AuditResult, chrono_now};
41use crate::config::ShellConfig;
42use crate::executor::{
43    ClaimSource, FilterStats, ToolCall, ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput,
44};
45use crate::filter::{OutputFilterRegistry, sanitize_output};
46use crate::permissions::{PermissionAction, PermissionPolicy};
47use crate::sandbox::{Sandbox, SandboxPolicy};
48
49pub mod background;
50use background::{BackgroundCompletion, BackgroundHandle, RunId};
51
52mod transaction;
53use transaction::{TransactionSnapshot, affected_paths, build_scope_matchers, is_write_command};
54
55const DEFAULT_BLOCKED: &[&str] = &[
56    "rm -rf /", "sudo", "mkfs", "dd if=", "curl", "wget", "nc ", "ncat", "netcat", "shutdown",
57    "reboot", "halt",
58];
59
60/// The default list of blocked command patterns used by [`ShellExecutor`].
61///
62/// Includes highly destructive commands (`rm -rf /`, `mkfs`, `dd if=`), privilege
63/// escalation (`sudo`), and network egress tools (`curl`, `wget`, `nc`, `netcat`).
64/// Network commands can be re-enabled via [`ShellConfig::allow_network`].
65///
66/// Exposed so other executors (e.g. `AcpShellExecutor`) can reuse the same
67/// blocklist without duplicating it.
68pub const DEFAULT_BLOCKED_COMMANDS: &[&str] = DEFAULT_BLOCKED;
69
70/// Shell interpreters that may execute arbitrary code via `-c` or positional args.
71///
72/// When [`check_blocklist`] receives a command whose binary matches one of these
73/// names, the `-c <script>` argument is extracted and checked against the blocklist
74/// instead of the binary name.
75pub const SHELL_INTERPRETERS: &[&str] =
76    &["bash", "sh", "zsh", "fish", "dash", "ksh", "csh", "tcsh"];
77
78/// Subshell metacharacters that could embed a blocked command inside a benign wrapper.
79/// Commands containing these sequences are rejected outright because safe static
80/// analysis of nested shell evaluation is not feasible.
81const SUBSHELL_METACHARS: &[&str] = &["$(", "`", "<(", ">("];
82
83/// Check if `command` matches any pattern in `blocklist`.
84///
85/// Returns the matched pattern string if the command is blocked, `None` otherwise.
86/// The check is case-insensitive and handles common shell escape sequences.
87///
88/// Commands containing subshell metacharacters (`$(` or `` ` ``) are always
89/// blocked because nested evaluation cannot be safely analysed statically.
90#[must_use]
91pub fn check_blocklist(command: &str, blocklist: &[String]) -> Option<String> {
92    let lower = command.to_lowercase();
93    // Reject commands that embed subshell constructs to prevent blocklist bypass.
94    for meta in SUBSHELL_METACHARS {
95        if lower.contains(meta) {
96            return Some((*meta).to_owned());
97        }
98    }
99    let cleaned = strip_shell_escapes(&lower);
100    let commands = tokenize_commands(&cleaned);
101    for blocked in blocklist {
102        for cmd_tokens in &commands {
103            if tokens_match_pattern(cmd_tokens, blocked) {
104                return Some(blocked.clone());
105            }
106        }
107    }
108    None
109}
110
111/// Build the effective command string for blocklist evaluation when the binary is a
112/// shell interpreter (bash, sh, zsh, etc.) and args contains a `-c` script.
113///
114/// Returns `None` if the args do not follow the `-c <script>` pattern.
115#[must_use]
116pub fn effective_shell_command<'a>(binary: &str, args: &'a [String]) -> Option<&'a str> {
117    let base = binary.rsplit('/').next().unwrap_or(binary);
118    if !SHELL_INTERPRETERS.contains(&base) {
119        return None;
120    }
121    // Find "-c" and return the next element as the script to check.
122    let pos = args.iter().position(|a| a == "-c")?;
123    args.get(pos + 1).map(String::as_str)
124}
125
126const NETWORK_COMMANDS: &[&str] = &["curl", "wget", "nc ", "ncat", "netcat"];
127
128/// Effective command-restriction policy held inside a `ShellExecutor`.
129///
130/// Swapped atomically on hot-reload via [`ShellPolicyHandle`].
131#[derive(Debug)]
132pub(crate) struct ShellPolicy {
133    pub(crate) blocked_commands: Vec<String>,
134}
135
136/// Clonable handle for live policy rebuilds on hot-reload.
137///
138/// Obtained from [`ShellExecutor::policy_handle`] at construction time and stored
139/// on the agent. Call [`ShellPolicyHandle::rebuild`] to atomically replace the
140/// effective `blocked_commands` list without recreating the executor. Reads on
141/// the dispatch path are lock-free via `ArcSwap::load_full`.
142#[derive(Clone, Debug)]
143pub struct ShellPolicyHandle {
144    inner: Arc<ArcSwap<ShellPolicy>>,
145}
146
147impl ShellPolicyHandle {
148    /// Atomically install a new effective blocklist derived from `config`.
149    ///
150    /// # Rebuild contract
151    ///
152    /// `config` must be the **already-overlay-merged** `ShellConfig` (i.e. the
153    /// value produced by `load_config_with_overlay`). Plugin contributions are
154    /// already present in `config.blocked_commands` at this point; this method
155    /// does NOT re-apply overlays.
156    pub fn rebuild(&self, config: &crate::config::ShellConfig) {
157        let policy = Arc::new(ShellPolicy {
158            blocked_commands: compute_blocked_commands(config),
159        });
160        self.inner.store(policy);
161    }
162
163    /// Snapshot of the current effective blocklist.
164    #[must_use]
165    pub fn snapshot_blocked(&self) -> Vec<String> {
166        self.inner.load().blocked_commands.clone()
167    }
168}
169
170/// Compute the effective blocklist from an already-overlay-merged `ShellConfig`.
171///
172/// Invariant: identical to the logic in `ShellExecutor::new`.
173pub(crate) fn compute_blocked_commands(config: &crate::config::ShellConfig) -> Vec<String> {
174    let allowed: Vec<String> = config
175        .allowed_commands
176        .iter()
177        .map(|s| s.to_lowercase())
178        .collect();
179    let mut blocked: Vec<String> = DEFAULT_BLOCKED
180        .iter()
181        .filter(|s| !allowed.contains(&s.to_lowercase()))
182        .map(|s| (*s).to_owned())
183        .collect();
184    blocked.extend(config.blocked_commands.iter().map(|s| s.to_lowercase()));
185    if !config.allow_network {
186        for cmd in NETWORK_COMMANDS {
187            let lower = cmd.to_lowercase();
188            if !blocked.contains(&lower) {
189                blocked.push(lower);
190            }
191        }
192    }
193    blocked.sort();
194    blocked.dedup();
195    blocked
196}
197
198#[derive(Deserialize, JsonSchema)]
199pub(crate) struct BashParams {
200    /// The bash command to execute.
201    command: String,
202    /// When `true`, spawn the command in the background and return immediately.
203    ///
204    /// The agent receives a `run_id` in the synchronous tool result. When the
205    /// command finishes, a synthetic user-role message is injected at the start
206    /// of the next turn carrying the exit code and output.
207    #[serde(default)]
208    background: bool,
209}
210
211/// Bash block extraction and execution via `tokio::process::Command`.
212///
213/// Parses ` ```bash ` fenced blocks from LLM responses (legacy path) and handles
214/// structured `bash` tool calls (modern path). Use [`ShellExecutor::new`] with a
215/// [`ShellConfig`] and chain optional builder methods to attach audit logging,
216/// event streaming, permission policies, and cancellation.
217///
218/// # Example
219///
220/// ```rust,no_run
221/// use zeph_tools::{ShellExecutor, ToolExecutor, config::ShellConfig};
222///
223/// # async fn example() {
224/// let executor = ShellExecutor::new(&ShellConfig::default());
225///
226/// // Execute a fenced bash block.
227/// let response = "```bash\npwd\n```";
228/// if let Ok(Some(output)) = executor.execute(response).await {
229///     println!("{}", output.summary);
230/// }
231/// # }
232/// ```
233#[derive(Debug)]
234pub struct ShellExecutor {
235    timeout: Duration,
236    policy: Arc<ArcSwap<ShellPolicy>>,
237    allowed_paths: Vec<PathBuf>,
238    confirm_patterns: Vec<String>,
239    env_blocklist: Vec<String>,
240    audit_logger: Option<Arc<AuditLogger>>,
241    tool_event_tx: Option<ToolEventTx>,
242    permission_policy: Option<PermissionPolicy>,
243    output_filter_registry: Option<OutputFilterRegistry>,
244    cancel_token: Option<CancellationToken>,
245    skill_env: RwLock<Option<std::collections::HashMap<String, String>>>,
246    transactional: bool,
247    auto_rollback: bool,
248    auto_rollback_exit_codes: Vec<i32>,
249    snapshot_required: bool,
250    max_snapshot_bytes: u64,
251    transaction_scope_matchers: Vec<globset::GlobMatcher>,
252    sandbox: Option<Arc<dyn Sandbox>>,
253    sandbox_policy: Option<SandboxPolicy>,
254    /// Registry of in-flight background runs. Bounded by `max_background_runs`.
255    background_runs: Arc<Mutex<HashMap<RunId, BackgroundHandle>>>,
256    /// Maximum number of concurrent background runs.
257    max_background_runs: usize,
258    /// Timeout applied to each background run.
259    background_timeout: Duration,
260    /// Set to `true` during shutdown to prevent new background spawns.
261    shutting_down: Arc<AtomicBool>,
262    /// Dedicated sender used to forward [`BackgroundCompletion`]s to the agent
263    /// (bypasses the UI-facing [`ToolEventTx`] channel). `None` when the agent
264    /// has not wired a background completion receiver.
265    background_completion_tx: Option<tokio::sync::mpsc::Sender<BackgroundCompletion>>,
266}
267
268impl ShellExecutor {
269    /// Create a new `ShellExecutor` from configuration.
270    ///
271    /// Merges the built-in [`DEFAULT_BLOCKED_COMMANDS`] with any additional blocked
272    /// commands from `config`, then subtracts any explicitly allowed commands.
273    /// No subprocess is spawned at construction time.
274    #[must_use]
275    pub fn new(config: &ShellConfig) -> Self {
276        let policy = Arc::new(ArcSwap::from_pointee(ShellPolicy {
277            blocked_commands: compute_blocked_commands(config),
278        }));
279
280        let allowed_paths = if config.allowed_paths.is_empty() {
281            vec![std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))]
282        } else {
283            config.allowed_paths.iter().map(PathBuf::from).collect()
284        };
285
286        Self {
287            timeout: Duration::from_secs(config.timeout),
288            policy,
289            allowed_paths,
290            confirm_patterns: config.confirm_patterns.clone(),
291            env_blocklist: config.env_blocklist.clone(),
292            audit_logger: None,
293            tool_event_tx: None,
294            permission_policy: None,
295            output_filter_registry: None,
296            cancel_token: None,
297            skill_env: RwLock::new(None),
298            transactional: config.transactional,
299            auto_rollback: config.auto_rollback,
300            auto_rollback_exit_codes: config.auto_rollback_exit_codes.clone(),
301            snapshot_required: config.snapshot_required,
302            max_snapshot_bytes: config.max_snapshot_bytes,
303            transaction_scope_matchers: build_scope_matchers(&config.transaction_scope),
304            sandbox: None,
305            sandbox_policy: None,
306            background_runs: Arc::new(Mutex::new(HashMap::new())),
307            max_background_runs: config.max_background_runs,
308            background_timeout: Duration::from_secs(config.background_timeout_secs),
309            shutting_down: Arc::new(AtomicBool::new(false)),
310            background_completion_tx: None,
311        }
312    }
313
314    /// Attach an OS-level sandbox backend and a pre-snapshotted policy.
315    ///
316    /// The policy is snapshotted at construction and never re-resolved per call (no TOCTOU).
317    /// If a different policy is needed, create a new `ShellExecutor` via the builder chain.
318    #[must_use]
319    pub fn with_sandbox(mut self, sandbox: Arc<dyn Sandbox>, policy: SandboxPolicy) -> Self {
320        self.sandbox = Some(sandbox);
321        self.sandbox_policy = Some(policy);
322        self
323    }
324
325    /// Set environment variables to inject when executing the active skill's bash blocks.
326    pub fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
327        *self.skill_env.write() = env;
328    }
329
330    /// Attach an audit logger. Each shell invocation will emit an [`AuditEntry`].
331    #[must_use]
332    pub fn with_audit(mut self, logger: Arc<AuditLogger>) -> Self {
333        self.audit_logger = Some(logger);
334        self
335    }
336
337    /// Attach a tool-event sender for streaming output to the TUI or channel adapter.
338    ///
339    /// When set, [`ToolEvent::Started`], [`ToolEvent::OutputChunk`], and
340    /// [`ToolEvent::Completed`] events are sent on `tx` during execution.
341    #[must_use]
342    pub fn with_tool_event_tx(mut self, tx: ToolEventTx) -> Self {
343        self.tool_event_tx = Some(tx);
344        self
345    }
346
347    /// Attach a dedicated sender for routing [`BackgroundCompletion`] payloads to the agent.
348    ///
349    /// This channel is separate from [`ToolEventTx`] (which goes to the TUI). The agent holds
350    /// the receiver end and drains it at the start of each turn to inject deferred completions
351    /// into the message history as a single merged user-role block.
352    #[must_use]
353    pub fn with_background_completion_tx(
354        mut self,
355        tx: tokio::sync::mpsc::Sender<BackgroundCompletion>,
356    ) -> Self {
357        self.background_completion_tx = Some(tx);
358        self
359    }
360
361    /// Attach a permission policy for confirmation-gate enforcement.
362    ///
363    /// Commands matching the policy's rules may require user approval before
364    /// execution proceeds.
365    #[must_use]
366    pub fn with_permissions(mut self, policy: PermissionPolicy) -> Self {
367        self.permission_policy = Some(policy);
368        self
369    }
370
371    /// Attach a cancellation token. When the token is cancelled, the running subprocess
372    /// is killed and the executor returns [`ToolError::Cancelled`].
373    #[must_use]
374    pub fn with_cancel_token(mut self, token: CancellationToken) -> Self {
375        self.cancel_token = Some(token);
376        self
377    }
378
379    /// Attach an output filter registry. Filters are applied to stdout+stderr before
380    /// the summary is stored in [`ToolOutput`] and sent to the LLM.
381    #[must_use]
382    pub fn with_output_filters(mut self, registry: OutputFilterRegistry) -> Self {
383        self.output_filter_registry = Some(registry);
384        self
385    }
386
387    /// Return a clonable handle for live policy rebuilds on hot-reload.
388    ///
389    /// Clone the handle out at construction time and store it on the agent.
390    /// Calling [`ShellPolicyHandle::rebuild`] atomically swaps the effective
391    /// `blocked_commands` without recreating the executor.
392    #[must_use]
393    pub fn policy_handle(&self) -> ShellPolicyHandle {
394        ShellPolicyHandle {
395            inner: Arc::clone(&self.policy),
396        }
397    }
398
399    /// Execute a bash block bypassing the confirmation check (called after user confirms).
400    ///
401    /// # Errors
402    ///
403    /// Returns `ToolError` on blocked commands, sandbox violations, or execution failures.
404    #[cfg_attr(
405        feature = "profiling",
406        tracing::instrument(name = "tool.shell", skip_all, fields(exit_code = tracing::field::Empty, duration_ms = tracing::field::Empty))
407    )]
408    pub async fn execute_confirmed(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
409        self.execute_inner(response, true).await
410    }
411
412    async fn execute_inner(
413        &self,
414        response: &str,
415        skip_confirm: bool,
416    ) -> Result<Option<ToolOutput>, ToolError> {
417        let blocks = extract_bash_blocks(response);
418        if blocks.is_empty() {
419            return Ok(None);
420        }
421
422        let mut outputs = Vec::with_capacity(blocks.len());
423        let mut cumulative_filter_stats: Option<FilterStats> = None;
424        let mut last_envelope: Option<ShellOutputEnvelope> = None;
425        #[allow(clippy::cast_possible_truncation)]
426        let blocks_executed = blocks.len() as u32;
427
428        for block in &blocks {
429            let (output_line, per_block_stats, envelope) =
430                self.execute_block(block, skip_confirm).await?;
431            if let Some(fs) = per_block_stats {
432                let stats = cumulative_filter_stats.get_or_insert_with(FilterStats::default);
433                stats.raw_chars += fs.raw_chars;
434                stats.filtered_chars += fs.filtered_chars;
435                stats.raw_lines += fs.raw_lines;
436                stats.filtered_lines += fs.filtered_lines;
437                stats.confidence = Some(match (stats.confidence, fs.confidence) {
438                    (Some(prev), Some(cur)) => crate::filter::worse_confidence(prev, cur),
439                    (Some(prev), None) => prev,
440                    (None, Some(cur)) => cur,
441                    (None, None) => unreachable!(),
442                });
443                if stats.command.is_none() {
444                    stats.command = fs.command;
445                }
446                if stats.kept_lines.is_empty() && !fs.kept_lines.is_empty() {
447                    stats.kept_lines = fs.kept_lines;
448                }
449            }
450            last_envelope = Some(envelope);
451            outputs.push(output_line);
452        }
453
454        let raw_response = last_envelope
455            .as_ref()
456            .and_then(|e| serde_json::to_value(e).ok());
457
458        Ok(Some(ToolOutput {
459            tool_name: ToolName::new("bash"),
460            summary: outputs.join("\n\n"),
461            blocks_executed,
462            filter_stats: cumulative_filter_stats,
463            diff: None,
464            streamed: self.tool_event_tx.is_some(),
465            terminal_id: None,
466            locations: None,
467            raw_response,
468            claim_source: Some(ClaimSource::Shell),
469        }))
470    }
471
472    #[allow(clippy::too_many_lines)]
473    async fn execute_block(
474        &self,
475        block: &str,
476        skip_confirm: bool,
477    ) -> Result<(String, Option<FilterStats>, ShellOutputEnvelope), ToolError> {
478        self.check_permissions(block, skip_confirm).await?;
479        self.validate_sandbox(block)?;
480
481        // Take a transactional snapshot before executing write commands.
482        let mut snapshot_warning: Option<String> = None;
483        let snapshot = if self.transactional && is_write_command(block) {
484            let paths = affected_paths(block, &self.transaction_scope_matchers);
485            if paths.is_empty() {
486                None
487            } else {
488                match TransactionSnapshot::capture(&paths, self.max_snapshot_bytes) {
489                    Ok(snap) => {
490                        tracing::debug!(
491                            files = snap.file_count(),
492                            bytes = snap.total_bytes(),
493                            "transaction snapshot captured"
494                        );
495                        Some(snap)
496                    }
497                    Err(e) if self.snapshot_required => {
498                        return Err(ToolError::SnapshotFailed {
499                            reason: e.to_string(),
500                        });
501                    }
502                    Err(e) => {
503                        tracing::warn!(err = %e, "transaction snapshot failed, proceeding without rollback");
504                        snapshot_warning =
505                            Some(format!("[warn] snapshot failed: {e}; rollback unavailable"));
506                        None
507                    }
508                }
509            }
510        } else {
511            None
512        };
513
514        if let Some(ref tx) = self.tool_event_tx {
515            let sandbox_profile = self
516                .sandbox_policy
517                .as_ref()
518                .map(|p| format!("{:?}", p.profile));
519            // Non-terminal streaming event: use try_send (drop on full).
520            let _ = tx.try_send(ToolEvent::Started {
521                tool_name: ToolName::new("bash"),
522                command: block.to_owned(),
523                sandbox_profile,
524            });
525        }
526
527        let start = Instant::now();
528        let skill_env_snapshot: Option<std::collections::HashMap<String, String>> =
529            self.skill_env.read().clone();
530        let sandbox_pair = self
531            .sandbox
532            .as_ref()
533            .zip(self.sandbox_policy.as_ref())
534            .map(|(sb, pol)| (sb.as_ref(), pol));
535        let (mut envelope, out) = execute_bash(
536            block,
537            self.timeout,
538            self.tool_event_tx.as_ref(),
539            self.cancel_token.as_ref(),
540            skill_env_snapshot.as_ref(),
541            &self.env_blocklist,
542            sandbox_pair,
543        )
544        .await;
545        let exit_code = envelope.exit_code;
546        if exit_code == 130
547            && self
548                .cancel_token
549                .as_ref()
550                .is_some_and(CancellationToken::is_cancelled)
551        {
552            return Err(ToolError::Cancelled);
553        }
554        #[allow(clippy::cast_possible_truncation)]
555        let duration_ms = start.elapsed().as_millis() as u64;
556
557        // Perform auto-rollback if configured and the exit code qualifies.
558        if let Some(snap) = snapshot {
559            let should_rollback = self.auto_rollback
560                && if self.auto_rollback_exit_codes.is_empty() {
561                    exit_code >= 2
562                } else {
563                    self.auto_rollback_exit_codes.contains(&exit_code)
564                };
565            if should_rollback {
566                match snap.rollback() {
567                    Ok(report) => {
568                        tracing::info!(
569                            restored = report.restored_count,
570                            deleted = report.deleted_count,
571                            "transaction rollback completed"
572                        );
573                        self.log_audit(
574                            block,
575                            AuditResult::Rollback {
576                                restored: report.restored_count,
577                                deleted: report.deleted_count,
578                            },
579                            duration_ms,
580                            None,
581                            Some(exit_code),
582                            false,
583                        )
584                        .await;
585                        if let Some(ref tx) = self.tool_event_tx {
586                            // Terminal event: must deliver. Use send().await.
587                            let _ = tx
588                                .send(ToolEvent::Rollback {
589                                    tool_name: ToolName::new("bash"),
590                                    command: block.to_owned(),
591                                    restored_count: report.restored_count,
592                                    deleted_count: report.deleted_count,
593                                })
594                                .await;
595                        }
596                    }
597                    Err(e) => {
598                        tracing::error!(err = %e, "transaction rollback failed");
599                    }
600                }
601            }
602            // On success (no rollback): snapshot dropped here; TempDir auto-cleans.
603        }
604
605        let is_timeout = out.contains("[error] command timed out");
606        let audit_result = if is_timeout {
607            AuditResult::Timeout
608        } else if out.contains("[error]") || out.contains("[stderr]") {
609            AuditResult::Error {
610                message: out.clone(),
611            }
612        } else {
613            AuditResult::Success
614        };
615        if is_timeout {
616            self.log_audit(
617                block,
618                audit_result,
619                duration_ms,
620                None,
621                Some(exit_code),
622                false,
623            )
624            .await;
625            self.emit_completed(block, &out, false, None, None).await;
626            return Err(ToolError::Timeout {
627                timeout_secs: self.timeout.as_secs(),
628            });
629        }
630
631        if let Some(category) = classify_shell_exit(exit_code, &out) {
632            self.emit_completed(block, &out, false, None, None).await;
633            return Err(ToolError::Shell {
634                exit_code,
635                category,
636                message: out.lines().take(3).collect::<Vec<_>>().join("; "),
637            });
638        }
639
640        let sanitized = sanitize_output(&out);
641        let mut per_block_stats: Option<FilterStats> = None;
642        let filtered = if let Some(ref registry) = self.output_filter_registry {
643            match registry.apply(block, &sanitized, exit_code) {
644                Some(fr) => {
645                    tracing::debug!(
646                        command = block,
647                        raw = fr.raw_chars,
648                        filtered = fr.filtered_chars,
649                        savings_pct = fr.savings_pct(),
650                        "output filter applied"
651                    );
652                    per_block_stats = Some(FilterStats {
653                        raw_chars: fr.raw_chars,
654                        filtered_chars: fr.filtered_chars,
655                        raw_lines: fr.raw_lines,
656                        filtered_lines: fr.filtered_lines,
657                        confidence: Some(fr.confidence),
658                        command: Some(block.to_owned()),
659                        kept_lines: fr.kept_lines.clone(),
660                    });
661                    fr.output
662                }
663                None => sanitized,
664            }
665        } else {
666            sanitized
667        };
668
669        self.emit_completed(
670            block,
671            &out,
672            !out.contains("[error]"),
673            per_block_stats.clone(),
674            None,
675        )
676        .await;
677
678        // Mark truncated if output was shortened during filtering.
679        envelope.truncated = filtered.len() < out.len();
680
681        self.log_audit(
682            block,
683            audit_result,
684            duration_ms,
685            None,
686            Some(exit_code),
687            envelope.truncated,
688        )
689        .await;
690
691        let output_line = if let Some(warn) = snapshot_warning {
692            format!("{warn}\n$ {block}\n{filtered}")
693        } else {
694            format!("$ {block}\n{filtered}")
695        };
696        Ok((output_line, per_block_stats, envelope))
697    }
698
699    async fn emit_completed(
700        &self,
701        command: &str,
702        output: &str,
703        success: bool,
704        filter_stats: Option<FilterStats>,
705        run_id: Option<RunId>,
706    ) {
707        if let Some(ref tx) = self.tool_event_tx {
708            // Terminal event: must deliver. Use send().await (never dropped).
709            let _ = tx
710                .send(ToolEvent::Completed {
711                    tool_name: ToolName::new("bash"),
712                    command: command.to_owned(),
713                    output: output.to_owned(),
714                    success,
715                    filter_stats,
716                    diff: None,
717                    run_id,
718                })
719                .await;
720        }
721    }
722
723    /// Check blocklist, permission policy, and confirmation requirements for `block`.
724    async fn check_permissions(&self, block: &str, skip_confirm: bool) -> Result<(), ToolError> {
725        // Always check the blocklist first — it is a hard security boundary
726        // that must not be bypassed by the PermissionPolicy layer.
727        if let Some(blocked) = self.find_blocked_command(block) {
728            let err = ToolError::Blocked {
729                command: blocked.clone(),
730            };
731            self.log_audit(
732                block,
733                AuditResult::Blocked {
734                    reason: format!("blocked command: {blocked}"),
735                },
736                0,
737                Some(&err),
738                None,
739                false,
740            )
741            .await;
742            return Err(err);
743        }
744
745        if let Some(ref policy) = self.permission_policy {
746            match policy.check("bash", block) {
747                PermissionAction::Deny => {
748                    let err = ToolError::Blocked {
749                        command: block.to_owned(),
750                    };
751                    self.log_audit(
752                        block,
753                        AuditResult::Blocked {
754                            reason: "denied by permission policy".to_owned(),
755                        },
756                        0,
757                        Some(&err),
758                        None,
759                        false,
760                    )
761                    .await;
762                    return Err(err);
763                }
764                PermissionAction::Ask if !skip_confirm => {
765                    return Err(ToolError::ConfirmationRequired {
766                        command: block.to_owned(),
767                    });
768                }
769                _ => {}
770            }
771        } else if !skip_confirm && let Some(pattern) = self.find_confirm_command(block) {
772            return Err(ToolError::ConfirmationRequired {
773                command: pattern.to_owned(),
774            });
775        }
776
777        Ok(())
778    }
779
780    fn validate_sandbox(&self, code: &str) -> Result<(), ToolError> {
781        let cwd = std::env::current_dir().unwrap_or_default();
782
783        for token in extract_paths(code) {
784            if has_traversal(&token) {
785                return Err(ToolError::SandboxViolation { path: token });
786            }
787
788            let path = if token.starts_with('/') {
789                PathBuf::from(&token)
790            } else {
791                cwd.join(&token)
792            };
793            let canonical = path
794                .canonicalize()
795                .or_else(|_| std::path::absolute(&path))
796                .unwrap_or(path);
797            if !self
798                .allowed_paths
799                .iter()
800                .any(|allowed| canonical.starts_with(allowed))
801            {
802                return Err(ToolError::SandboxViolation {
803                    path: canonical.display().to_string(),
804                });
805            }
806        }
807        Ok(())
808    }
809
810    /// Scan `code` for commands that match the configured blocklist.
811    ///
812    /// The function normalizes input via [`strip_shell_escapes`] (decoding `$'\xNN'`,
813    /// `$'\NNN'`, backslash escapes, and quote-splitting) and then splits on shell
814    /// metacharacters (`||`, `&&`, `;`, `|`, `\n`) via [`tokenize_commands`].  Each
815    /// resulting token sequence is tested against every entry in `blocked_commands`
816    /// through [`tokens_match_pattern`], which handles transparent prefixes (`env`,
817    /// `command`, `exec`, etc.), absolute paths, and dot-suffixed variants.
818    ///
819    /// # Known limitations
820    ///
821    /// The following constructs are **not** detected by this function:
822    ///
823    /// - **Here-strings** `<<<` with a shell interpreter: the outer command is the
824    ///   shell (`bash`, `sh`), which is not blocked by default; the payload string is
825    ///   opaque to this filter.
826    ///   Example: `bash <<< 'sudo rm -rf /'` — inner payload is not parsed.
827    ///
828    /// - **`eval` and `bash -c` / `sh -c`**: the string argument is not parsed; any
829    ///   blocked command embedded as a string argument passes through undetected.
830    ///   Example: `eval 'sudo rm -rf /'`.
831    ///
832    /// - **Variable expansion**: `strip_shell_escapes` does not resolve variable
833    ///   references, so `cmd=sudo; $cmd rm` bypasses the blocklist.
834    ///
835    /// `$(...)`, backtick, `<(...)`, and `>(...)` substitutions are detected by
836    /// [`extract_subshell_contents`], which extracts the inner command string and
837    /// checks it against the blocklist separately.  The default `confirm_patterns`
838    /// in [`ShellConfig`] additionally include `"$("`, `` "`" ``, `"<("`, `">("`,
839    /// `"<<<"`, and `"eval "`, so those constructs also trigger a confirmation
840    /// request via [`find_confirm_command`] before execution.
841    ///
842    /// For high-security deployments, complement this filter with OS-level sandboxing
843    /// (Linux namespaces, seccomp, or similar) to enforce hard execution boundaries.
844    /// Scan `code` for commands that match the configured blocklist.
845    ///
846    /// Returns an owned `String` because the backing `Vec<String>` lives inside an
847    /// `ArcSwap` that may be replaced between calls — borrowing from the snapshot
848    /// guard would be unsound after the guard drops.
849    fn find_blocked_command(&self, code: &str) -> Option<String> {
850        let snapshot = self.policy.load_full();
851        let cleaned = strip_shell_escapes(&code.to_lowercase());
852        let commands = tokenize_commands(&cleaned);
853        for blocked in &snapshot.blocked_commands {
854            for cmd_tokens in &commands {
855                if tokens_match_pattern(cmd_tokens, blocked) {
856                    return Some(blocked.clone());
857                }
858            }
859        }
860        // Also check commands embedded inside subshell constructs.
861        for inner in extract_subshell_contents(&cleaned) {
862            let inner_commands = tokenize_commands(&inner);
863            for blocked in &snapshot.blocked_commands {
864                for cmd_tokens in &inner_commands {
865                    if tokens_match_pattern(cmd_tokens, blocked) {
866                        return Some(blocked.clone());
867                    }
868                }
869            }
870        }
871        None
872    }
873
874    fn find_confirm_command(&self, code: &str) -> Option<&str> {
875        let normalized = code.to_lowercase();
876        for pattern in &self.confirm_patterns {
877            if normalized.contains(pattern.as_str()) {
878                return Some(pattern.as_str());
879            }
880        }
881        None
882    }
883
884    async fn log_audit(
885        &self,
886        command: &str,
887        result: AuditResult,
888        duration_ms: u64,
889        error: Option<&ToolError>,
890        exit_code: Option<i32>,
891        truncated: bool,
892    ) {
893        if let Some(ref logger) = self.audit_logger {
894            let (error_category, error_domain, error_phase) =
895                error.map_or((None, None, None), |e| {
896                    let cat = e.category();
897                    (
898                        Some(cat.label().to_owned()),
899                        Some(cat.domain().label().to_owned()),
900                        Some(cat.phase().label().to_owned()),
901                    )
902                });
903            let entry = AuditEntry {
904                timestamp: chrono_now(),
905                tool: "shell".into(),
906                command: command.into(),
907                result,
908                duration_ms,
909                error_category,
910                error_domain,
911                error_phase,
912                claim_source: Some(ClaimSource::Shell),
913                mcp_server_id: None,
914                injection_flagged: false,
915                embedding_anomalous: false,
916                cross_boundary_mcp_to_acp: false,
917                adversarial_policy_decision: None,
918                exit_code,
919                truncated,
920                caller_id: None,
921                policy_match: None,
922                correlation_id: None,
923                vigil_risk: None,
924            };
925            logger.log(&entry).await;
926        }
927    }
928}
929
930impl ToolExecutor for ShellExecutor {
931    async fn execute(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
932        self.execute_inner(response, false).await
933    }
934
935    fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
936        use crate::registry::{InvocationHint, ToolDef};
937        vec![ToolDef {
938            id: "bash".into(),
939            description: "Execute a shell command and return stdout/stderr.\n\nParameters: command (string, required) - shell command to run\nReturns: stdout and stderr combined, prefixed with exit code\nErrors: Blocked if command matches security policy; Timeout after configured seconds; SandboxViolation if path outside allowed dirs\nExample: {\"command\": \"ls -la /tmp\"}".into(),
940            schema: schemars::schema_for!(BashParams),
941            invocation: InvocationHint::FencedBlock("bash"),
942            output_schema: None,
943        }]
944    }
945
946    async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
947        if call.tool_id != "bash" {
948            return Ok(None);
949        }
950        let params: BashParams = crate::executor::deserialize_params(&call.params)?;
951        if params.command.is_empty() {
952            return Ok(None);
953        }
954        let command = &params.command;
955
956        if params.background {
957            let run_id = self.spawn_background(command).await?;
958            let id_short = &run_id.to_string()[..8];
959            return Ok(Some(ToolOutput {
960                tool_name: ToolName::new("bash"),
961                summary: format!(
962                    "[background] started run_id={run_id} — command: {command}\n\
963                     The command is running in the background. When it completes, \
964                     results will appear at the start of the next turn (run_id_short={id_short})."
965                ),
966                blocks_executed: 1,
967                filter_stats: None,
968                diff: None,
969                streamed: true,
970                terminal_id: None,
971                locations: None,
972                raw_response: None,
973                claim_source: Some(ClaimSource::Shell),
974            }));
975        }
976
977        // Wrap as a fenced block so execute_inner can extract and run it.
978        let synthetic = format!("```bash\n{command}\n```");
979        self.execute_inner(&synthetic, false).await
980    }
981
982    fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
983        ShellExecutor::set_skill_env(self, env);
984    }
985}
986
987impl ShellExecutor {
988    /// Spawn `command` as a background shell process and return its [`RunId`].
989    ///
990    /// All security checks (blocklist, sandbox, permissions) are performed synchronously
991    /// before spawning. When the cap (`max_background_runs`) is already reached, this
992    /// returns [`ToolError::Blocked`] immediately without spawning.
993    ///
994    /// On completion the spawned task emits a
995    /// `ToolEvent::Completed { run_id: Some(..), .. }` via `tool_event_tx`.
996    ///
997    /// # Errors
998    ///
999    /// Returns [`ToolError::Blocked`] when the background run cap is reached or the command
1000    /// is blocked by policy. Returns other [`ToolError`] variants on sandbox/permission
1001    /// failures.
1002    pub async fn spawn_background(&self, command: &str) -> Result<RunId, ToolError> {
1003        use std::sync::atomic::Ordering;
1004
1005        // Reject new spawns while shutting down.
1006        if self.shutting_down.load(Ordering::Acquire) {
1007            return Err(ToolError::Blocked {
1008                command: command.to_owned(),
1009            });
1010        }
1011
1012        // Enforce security checks — same as blocking mode.
1013        self.check_permissions(command, false).await?;
1014        self.validate_sandbox(command)?;
1015
1016        // Check cap under lock.
1017        let run_id = RunId::new();
1018        {
1019            let mut runs = self.background_runs.lock();
1020            if runs.len() >= self.max_background_runs {
1021                return Err(ToolError::Blocked {
1022                    command: format!(
1023                        "background run cap reached (max_background_runs={})",
1024                        self.max_background_runs
1025                    ),
1026                });
1027            }
1028            let abort = CancellationToken::new();
1029            runs.insert(
1030                run_id,
1031                BackgroundHandle {
1032                    command: command.to_owned(),
1033                    started_at: std::time::Instant::now(),
1034                    abort: abort.clone(),
1035                    child_pid: None,
1036                },
1037            );
1038            drop(runs);
1039
1040            let tool_event_tx = self.tool_event_tx.clone();
1041            let background_completion_tx = self.background_completion_tx.clone();
1042            let background_runs = Arc::clone(&self.background_runs);
1043            let timeout = self.background_timeout;
1044            let env_blocklist = self.env_blocklist.clone();
1045            let skill_env_snapshot: Option<std::collections::HashMap<String, String>> =
1046                self.skill_env.read().clone();
1047            let command_owned = command.to_owned();
1048
1049            tokio::spawn(async move {
1050                let started_at = std::time::Instant::now();
1051                let (_, out) = execute_bash(
1052                    &command_owned,
1053                    timeout,
1054                    tool_event_tx.as_ref(),
1055                    Some(&abort),
1056                    skill_env_snapshot.as_ref(),
1057                    &env_blocklist,
1058                    None,
1059                )
1060                .await;
1061
1062                #[allow(clippy::cast_possible_truncation)]
1063                let elapsed_ms = started_at.elapsed().as_millis() as u64;
1064                let success = !out.contains("[error]");
1065                let exit_code = i32::from(!success);
1066                let truncated = crate::executor::truncate_tool_output_at(&out, 4096);
1067
1068                // Remove from registry.
1069                background_runs.lock().remove(&run_id);
1070
1071                // Deliver terminal event to the TUI/channel adapter.
1072                if let Some(ref tx) = tool_event_tx {
1073                    let _ = tx
1074                        .send(ToolEvent::Completed {
1075                            tool_name: ToolName::new("bash"),
1076                            command: command_owned.clone(),
1077                            output: truncated.clone(),
1078                            success,
1079                            filter_stats: None,
1080                            diff: None,
1081                            run_id: Some(run_id),
1082                        })
1083                        .await;
1084                }
1085
1086                // Deliver completion to the agent for injection into the next turn.
1087                if let Some(ref tx) = background_completion_tx {
1088                    let completion = BackgroundCompletion {
1089                        run_id,
1090                        exit_code,
1091                        output: truncated,
1092                        success,
1093                        elapsed_ms,
1094                        command: command_owned,
1095                    };
1096                    if tx.send(completion).await.is_err() {
1097                        tracing::warn!(
1098                            run_id = %run_id,
1099                            "background completion channel closed; agent may have shut down"
1100                        );
1101                    }
1102                }
1103
1104                tracing::debug!(
1105                    run_id = %run_id,
1106                    exit_code,
1107                    elapsed_ms,
1108                    "background shell run completed"
1109                );
1110            });
1111        }
1112
1113        Ok(run_id)
1114    }
1115
1116    /// Cancel all in-flight background runs.
1117    ///
1118    /// Called during agent shutdown. Each cancelled run emits a
1119    /// `ToolEvent::Completed { success: false }` event. Cancellation is cooperative:
1120    /// the spawned tasks detect the token and exit on the next check point.
1121    ///
1122    /// # Note
1123    ///
1124    /// SIGTERM/SIGKILL escalation is deferred to a future enhancement
1125    /// (requires a safe OS-signal abstraction). The `CancellationToken` is
1126    /// sufficient for the process-local case.
1127    // TODO(review): add SIGTERM+SIGKILL escalation via a safe signal wrapper (e.g. nix crate).
1128    pub async fn shutdown(&self) {
1129        use std::sync::atomic::Ordering;
1130
1131        self.shutting_down.store(true, Ordering::Release);
1132
1133        let handles: Vec<(RunId, String, CancellationToken)> = {
1134            let runs = self.background_runs.lock();
1135            runs.iter()
1136                .map(|(id, h)| (*id, h.command.clone(), h.abort.clone()))
1137                .collect()
1138        };
1139
1140        if handles.is_empty() {
1141            return;
1142        }
1143
1144        tracing::info!(
1145            count = handles.len(),
1146            "cancelling background shell runs for shutdown"
1147        );
1148
1149        for (run_id, command, abort) in &handles {
1150            abort.cancel();
1151
1152            if let Some(ref tx) = self.tool_event_tx {
1153                let _ = tx
1154                    .send(ToolEvent::Completed {
1155                        tool_name: ToolName::new("bash"),
1156                        command: command.clone(),
1157                        output: "[terminated by shutdown]".to_owned(),
1158                        success: false,
1159                        filter_stats: None,
1160                        diff: None,
1161                        run_id: Some(*run_id),
1162                    })
1163                    .await;
1164            }
1165        }
1166
1167        self.background_runs.lock().clear();
1168    }
1169}
1170
1171/// Strip shell escape sequences that could bypass command detection.
1172/// Handles: backslash insertion (`su\do` -> `sudo`), `$'\xNN'` hex and `$'\NNN'` octal
1173/// escapes, adjacent quoted segments (`"su""do"` -> `sudo`), backslash-newline continuations.
1174pub(crate) fn strip_shell_escapes(input: &str) -> String {
1175    let mut out = String::with_capacity(input.len());
1176    let bytes = input.as_bytes();
1177    let mut i = 0;
1178    while i < bytes.len() {
1179        // $'...' ANSI-C quoting: decode \xNN hex and \NNN octal escapes
1180        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'\'' {
1181            let mut j = i + 2; // points after $'
1182            let mut decoded = String::new();
1183            let mut valid = false;
1184            while j < bytes.len() && bytes[j] != b'\'' {
1185                if bytes[j] == b'\\' && j + 1 < bytes.len() {
1186                    let next = bytes[j + 1];
1187                    if next == b'x' && j + 3 < bytes.len() {
1188                        // \xNN hex escape
1189                        let hi = (bytes[j + 2] as char).to_digit(16);
1190                        let lo = (bytes[j + 3] as char).to_digit(16);
1191                        if let (Some(h), Some(l)) = (hi, lo) {
1192                            #[allow(clippy::cast_possible_truncation)]
1193                            let byte = ((h << 4) | l) as u8;
1194                            decoded.push(byte as char);
1195                            j += 4;
1196                            valid = true;
1197                            continue;
1198                        }
1199                    } else if next.is_ascii_digit() {
1200                        // \NNN octal escape (up to 3 digits)
1201                        let mut val = u32::from(next - b'0');
1202                        let mut len = 2; // consumed \N so far
1203                        if j + 2 < bytes.len() && bytes[j + 2].is_ascii_digit() {
1204                            val = val * 8 + u32::from(bytes[j + 2] - b'0');
1205                            len = 3;
1206                            if j + 3 < bytes.len() && bytes[j + 3].is_ascii_digit() {
1207                                val = val * 8 + u32::from(bytes[j + 3] - b'0');
1208                                len = 4;
1209                            }
1210                        }
1211                        #[allow(clippy::cast_possible_truncation)]
1212                        decoded.push((val & 0xFF) as u8 as char);
1213                        j += len;
1214                        valid = true;
1215                        continue;
1216                    }
1217                    // other \X escape: emit X literally
1218                    decoded.push(next as char);
1219                    j += 2;
1220                } else {
1221                    decoded.push(bytes[j] as char);
1222                    j += 1;
1223                }
1224            }
1225            if j < bytes.len() && bytes[j] == b'\'' && valid {
1226                out.push_str(&decoded);
1227                i = j + 1;
1228                continue;
1229            }
1230            // not a decodable $'...' sequence — fall through to handle as regular chars
1231        }
1232        // backslash-newline continuation: remove both
1233        if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1234            i += 2;
1235            continue;
1236        }
1237        // intra-word backslash: skip the backslash, keep next char (e.g. su\do -> sudo)
1238        if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] != b'\n' {
1239            i += 1;
1240            out.push(bytes[i] as char);
1241            i += 1;
1242            continue;
1243        }
1244        // quoted segment stripping: collapse adjacent quoted segments
1245        if bytes[i] == b'"' || bytes[i] == b'\'' {
1246            let quote = bytes[i];
1247            i += 1;
1248            while i < bytes.len() && bytes[i] != quote {
1249                out.push(bytes[i] as char);
1250                i += 1;
1251            }
1252            if i < bytes.len() {
1253                i += 1; // skip closing quote
1254            }
1255            continue;
1256        }
1257        out.push(bytes[i] as char);
1258        i += 1;
1259    }
1260    out
1261}
1262
1263/// Extract inner command strings from subshell constructs in `s`.
1264///
1265/// Recognises:
1266/// - Backtick: `` `cmd` `` → `cmd`
1267/// - Dollar-paren: `$(cmd)` → `cmd`
1268/// - Process substitution (lt): `<(cmd)` → `cmd`
1269/// - Process substitution (gt): `>(cmd)` → `cmd`
1270///
1271/// Depth counting handles nested parentheses correctly.
1272pub(crate) fn extract_subshell_contents(s: &str) -> Vec<String> {
1273    let mut results = Vec::new();
1274    let chars: Vec<char> = s.chars().collect();
1275    let len = chars.len();
1276    let mut i = 0;
1277
1278    while i < len {
1279        // Backtick substitution: `...`
1280        if chars[i] == '`' {
1281            let start = i + 1;
1282            let mut j = start;
1283            while j < len && chars[j] != '`' {
1284                j += 1;
1285            }
1286            if j < len {
1287                results.push(chars[start..j].iter().collect());
1288            }
1289            i = j + 1;
1290            continue;
1291        }
1292
1293        // $(...), <(...), >(...)
1294        let next_is_open_paren = i + 1 < len && chars[i + 1] == '(';
1295        let is_paren_subshell = next_is_open_paren && matches!(chars[i], '$' | '<' | '>');
1296
1297        if is_paren_subshell {
1298            let start = i + 2;
1299            let mut depth: usize = 1;
1300            let mut j = start;
1301            while j < len && depth > 0 {
1302                match chars[j] {
1303                    '(' => depth += 1,
1304                    ')' => depth -= 1,
1305                    _ => {}
1306                }
1307                if depth > 0 {
1308                    j += 1;
1309                } else {
1310                    break;
1311                }
1312            }
1313            if depth == 0 {
1314                results.push(chars[start..j].iter().collect());
1315            }
1316            i = j + 1;
1317            continue;
1318        }
1319
1320        i += 1;
1321    }
1322
1323    results
1324}
1325
1326/// Split normalized shell code into sub-commands on `|`, `||`, `&&`, `;`, `\n`.
1327/// Returns list of sub-commands, each as `Vec<String>` of tokens.
1328pub(crate) fn tokenize_commands(normalized: &str) -> Vec<Vec<String>> {
1329    // Replace two-char operators with a single separator, then split on single-char separators
1330    let replaced = normalized.replace("||", "\n").replace("&&", "\n");
1331    replaced
1332        .split([';', '|', '\n'])
1333        .map(|seg| {
1334            seg.split_whitespace()
1335                .map(str::to_owned)
1336                .collect::<Vec<String>>()
1337        })
1338        .filter(|tokens| !tokens.is_empty())
1339        .collect()
1340}
1341
1342/// Transparent prefix commands that invoke the next argument as a command.
1343/// Skipped when determining the "real" command name being invoked.
1344const TRANSPARENT_PREFIXES: &[&str] = &["env", "command", "exec", "nice", "nohup", "time", "xargs"];
1345
1346/// Return the basename of a token (last path component after '/').
1347fn cmd_basename(tok: &str) -> &str {
1348    tok.rsplit('/').next().unwrap_or(tok)
1349}
1350
1351/// Check if the first tokens of a sub-command match a blocked pattern.
1352/// Handles:
1353/// - Transparent prefix commands (`env sudo rm` -> checks `sudo`)
1354/// - Absolute paths (`/usr/bin/sudo rm` -> basename `sudo` is checked)
1355/// - Dot-suffixed variants (`mkfs` matches `mkfs.ext4`)
1356/// - Multi-word patterns (`rm -rf /` joined prefix check)
1357pub(crate) fn tokens_match_pattern(tokens: &[String], pattern: &str) -> bool {
1358    if tokens.is_empty() || pattern.is_empty() {
1359        return false;
1360    }
1361    let pattern = pattern.trim();
1362    let pattern_tokens: Vec<&str> = pattern.split_whitespace().collect();
1363    if pattern_tokens.is_empty() {
1364        return false;
1365    }
1366
1367    // Skip transparent prefix tokens to reach the real command
1368    let start = tokens
1369        .iter()
1370        .position(|t| !TRANSPARENT_PREFIXES.contains(&cmd_basename(t)))
1371        .unwrap_or(0);
1372    let effective = &tokens[start..];
1373    if effective.is_empty() {
1374        return false;
1375    }
1376
1377    if pattern_tokens.len() == 1 {
1378        let pat = pattern_tokens[0];
1379        let base = cmd_basename(&effective[0]);
1380        // Exact match OR dot-suffixed variant (e.g. "mkfs" matches "mkfs.ext4")
1381        base == pat || base.starts_with(&format!("{pat}."))
1382    } else {
1383        // Multi-word: join first N tokens (using basename for first) and check prefix
1384        let n = pattern_tokens.len().min(effective.len());
1385        let mut parts: Vec<&str> = vec![cmd_basename(&effective[0])];
1386        parts.extend(effective[1..n].iter().map(String::as_str));
1387        let joined = parts.join(" ");
1388        if joined.starts_with(pattern) {
1389            return true;
1390        }
1391        if effective.len() > n {
1392            let mut parts2: Vec<&str> = vec![cmd_basename(&effective[0])];
1393            parts2.extend(effective[1..=n].iter().map(String::as_str));
1394            parts2.join(" ").starts_with(pattern)
1395        } else {
1396            false
1397        }
1398    }
1399}
1400
1401fn extract_paths(code: &str) -> Vec<String> {
1402    let mut result = Vec::new();
1403
1404    // Tokenize respecting single/double quotes
1405    let mut tokens: Vec<String> = Vec::new();
1406    let mut current = String::new();
1407    let mut chars = code.chars().peekable();
1408    while let Some(c) = chars.next() {
1409        match c {
1410            '"' | '\'' => {
1411                let quote = c;
1412                while let Some(&nc) = chars.peek() {
1413                    if nc == quote {
1414                        chars.next();
1415                        break;
1416                    }
1417                    current.push(chars.next().unwrap());
1418                }
1419            }
1420            c if c.is_whitespace() || matches!(c, ';' | '|' | '&') => {
1421                if !current.is_empty() {
1422                    tokens.push(std::mem::take(&mut current));
1423                }
1424            }
1425            _ => current.push(c),
1426        }
1427    }
1428    if !current.is_empty() {
1429        tokens.push(current);
1430    }
1431
1432    for token in tokens {
1433        let trimmed = token.trim_end_matches([';', '&', '|']).to_owned();
1434        if trimmed.is_empty() {
1435            continue;
1436        }
1437        if trimmed.starts_with('/')
1438            || trimmed.starts_with("./")
1439            || trimmed.starts_with("../")
1440            || trimmed == ".."
1441            || (trimmed.starts_with('.') && trimmed.contains('/'))
1442            || is_relative_path_token(&trimmed)
1443        {
1444            result.push(trimmed);
1445        }
1446    }
1447    result
1448}
1449
1450/// Returns `true` if `token` looks like a relative path of the form `word/more`
1451/// (contains `/` but does not start with `/` or `.`).
1452///
1453/// Excluded:
1454/// - URL schemes (`scheme://`)
1455/// - Shell variable assignments (`KEY=value`)
1456fn is_relative_path_token(token: &str) -> bool {
1457    // Must contain a slash but not start with `/` (absolute) or `.` (handled above).
1458    if !token.contains('/') || token.starts_with('/') || token.starts_with('.') {
1459        return false;
1460    }
1461    // Reject URLs: anything with `://`
1462    if token.contains("://") {
1463        return false;
1464    }
1465    // Reject shell variable assignments: `IDENTIFIER=...`
1466    if let Some(eq_pos) = token.find('=') {
1467        let key = &token[..eq_pos];
1468        if key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
1469            return false;
1470        }
1471    }
1472    // First character must be an identifier-start (letter, digit, or `_`).
1473    token
1474        .chars()
1475        .next()
1476        .is_some_and(|c| c.is_ascii_alphanumeric() || c == '_')
1477}
1478
1479/// Classify shell exit codes and stderr patterns into `ToolErrorCategory`.
1480///
1481/// Returns `Some(category)` only for well-known failure modes that benefit from
1482/// structured feedback (exit 126/127, recognisable stderr patterns). All other
1483/// non-zero exits are left as `Ok` output so they surface verbatim to the LLM.
1484fn classify_shell_exit(
1485    exit_code: i32,
1486    output: &str,
1487) -> Option<crate::error_taxonomy::ToolErrorCategory> {
1488    use crate::error_taxonomy::ToolErrorCategory;
1489    match exit_code {
1490        // exit 126: command found but not executable (OS-level permission/policy)
1491        126 => Some(ToolErrorCategory::PolicyBlocked),
1492        // exit 127: command not found in PATH
1493        127 => Some(ToolErrorCategory::PermanentFailure),
1494        _ => {
1495            let lower = output.to_lowercase();
1496            if lower.contains("permission denied") {
1497                Some(ToolErrorCategory::PolicyBlocked)
1498            } else if lower.contains("no such file or directory") {
1499                Some(ToolErrorCategory::PermanentFailure)
1500            } else {
1501                None
1502            }
1503        }
1504    }
1505}
1506
1507fn has_traversal(path: &str) -> bool {
1508    path.split('/').any(|seg| seg == "..")
1509}
1510
1511fn extract_bash_blocks(text: &str) -> Vec<&str> {
1512    crate::executor::extract_fenced_blocks(text, "bash")
1513}
1514
1515/// Kill a child process and its descendants.
1516/// On unix, sends SIGKILL to child processes via `pkill -KILL -P <pid>` before
1517/// killing the parent, preventing zombie subprocesses.
1518async fn kill_process_tree(child: &mut tokio::process::Child) {
1519    #[cfg(unix)]
1520    if let Some(pid) = child.id() {
1521        let _ = Command::new("pkill")
1522            .args(["-KILL", "-P", &pid.to_string()])
1523            .status()
1524            .await;
1525    }
1526    let _ = child.kill().await;
1527}
1528
1529/// Structured output from a shell command execution.
1530///
1531/// Produced by the internal `execute_bash` function and included in the final
1532/// [`ToolOutput`] and [`AuditEntry`] for the invocation.
1533#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1534pub struct ShellOutputEnvelope {
1535    /// Captured standard output, possibly truncated.
1536    pub stdout: String,
1537    /// Captured standard error, possibly truncated.
1538    pub stderr: String,
1539    /// Process exit code. `0` indicates success by convention.
1540    pub exit_code: i32,
1541    /// `true` when the combined output exceeded the configured max and was truncated.
1542    pub truncated: bool,
1543}
1544
1545#[allow(clippy::too_many_lines)]
1546async fn execute_bash(
1547    code: &str,
1548    timeout: Duration,
1549    event_tx: Option<&ToolEventTx>,
1550    cancel_token: Option<&CancellationToken>,
1551    extra_env: Option<&std::collections::HashMap<String, String>>,
1552    env_blocklist: &[String],
1553    sandbox: Option<(&dyn Sandbox, &SandboxPolicy)>,
1554) -> (ShellOutputEnvelope, String) {
1555    use std::process::Stdio;
1556    use tokio::io::{AsyncBufReadExt, BufReader};
1557
1558    let timeout_secs = timeout.as_secs();
1559
1560    let mut cmd = Command::new("bash");
1561    cmd.arg("-c").arg(code);
1562
1563    for (key, _) in std::env::vars() {
1564        if env_blocklist
1565            .iter()
1566            .any(|prefix| key.starts_with(prefix.as_str()))
1567        {
1568            cmd.env_remove(&key);
1569        }
1570    }
1571
1572    if let Some(env) = extra_env {
1573        cmd.envs(env);
1574    }
1575
1576    // Apply OS sandbox before setting stdio so the rewritten program is sandboxed.
1577    if let Some((sb, policy)) = sandbox
1578        && let Err(err) = sb.wrap(&mut cmd, policy)
1579    {
1580        let msg = format!("[error] sandbox setup failed: {err}");
1581        return (
1582            ShellOutputEnvelope {
1583                stdout: String::new(),
1584                stderr: msg.clone(),
1585                exit_code: 1,
1586                truncated: false,
1587            },
1588            msg,
1589        );
1590    }
1591
1592    cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
1593
1594    let child_result = cmd.spawn();
1595
1596    let mut child = match child_result {
1597        Ok(c) => c,
1598        Err(e) => {
1599            let msg = format!("[error] {e}");
1600            return (
1601                ShellOutputEnvelope {
1602                    stdout: String::new(),
1603                    stderr: msg.clone(),
1604                    exit_code: 1,
1605                    truncated: false,
1606                },
1607                msg,
1608            );
1609        }
1610    };
1611
1612    let stdout = child.stdout.take().expect("stdout piped");
1613    let stderr = child.stderr.take().expect("stderr piped");
1614
1615    // Channel carries (is_stderr, line) so we can accumulate separate buffers
1616    // while still building a combined interleaved string for streaming and LLM context.
1617    let (line_tx, mut line_rx) = tokio::sync::mpsc::channel::<(bool, String)>(64);
1618
1619    let stdout_tx = line_tx.clone();
1620    tokio::spawn(async move {
1621        let mut reader = BufReader::new(stdout);
1622        let mut buf = String::new();
1623        while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
1624            let _ = stdout_tx.send((false, buf.clone())).await;
1625            buf.clear();
1626        }
1627    });
1628
1629    tokio::spawn(async move {
1630        let mut reader = BufReader::new(stderr);
1631        let mut buf = String::new();
1632        while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
1633            let _ = line_tx.send((true, buf.clone())).await;
1634            buf.clear();
1635        }
1636    });
1637
1638    let mut combined = String::new();
1639    let mut stdout_buf = String::new();
1640    let mut stderr_buf = String::new();
1641    let deadline = tokio::time::Instant::now() + timeout;
1642
1643    loop {
1644        tokio::select! {
1645            line = line_rx.recv() => {
1646                match line {
1647                    Some((is_stderr, chunk)) => {
1648                        let interleaved = if is_stderr {
1649                            format!("[stderr] {chunk}")
1650                        } else {
1651                            chunk.clone()
1652                        };
1653                        if let Some(tx) = event_tx {
1654                            // Non-terminal streaming event: use try_send (drop on full).
1655                            let _ = tx.try_send(ToolEvent::OutputChunk {
1656                                tool_name: ToolName::new("bash"),
1657                                command: code.to_owned(),
1658                                chunk: interleaved.clone(),
1659                            });
1660                        }
1661                        combined.push_str(&interleaved);
1662                        if is_stderr {
1663                            stderr_buf.push_str(&chunk);
1664                        } else {
1665                            stdout_buf.push_str(&chunk);
1666                        }
1667                    }
1668                    None => break,
1669                }
1670            }
1671            () = tokio::time::sleep_until(deadline) => {
1672                kill_process_tree(&mut child).await;
1673                let msg = format!("[error] command timed out after {timeout_secs}s");
1674                return (
1675                    ShellOutputEnvelope {
1676                        stdout: stdout_buf,
1677                        stderr: format!("{stderr_buf}command timed out after {timeout_secs}s"),
1678                        exit_code: 1,
1679                        truncated: false,
1680                    },
1681                    msg,
1682                );
1683            }
1684            () = async {
1685                match cancel_token {
1686                    Some(t) => t.cancelled().await,
1687                    None => std::future::pending().await,
1688                }
1689            } => {
1690                kill_process_tree(&mut child).await;
1691                return (
1692                    ShellOutputEnvelope {
1693                        stdout: stdout_buf,
1694                        stderr: format!("{stderr_buf}operation aborted"),
1695                        exit_code: 130,
1696                        truncated: false,
1697                    },
1698                    "[cancelled] operation aborted".to_string(),
1699                );
1700            }
1701        }
1702    }
1703
1704    let status = child.wait().await;
1705    let exit_code = status.ok().and_then(|s| s.code()).unwrap_or(1);
1706
1707    let (envelope, combined) = if combined.is_empty() {
1708        (
1709            ShellOutputEnvelope {
1710                stdout: String::new(),
1711                stderr: String::new(),
1712                exit_code,
1713                truncated: false,
1714            },
1715            "(no output)".to_string(),
1716        )
1717    } else {
1718        (
1719            ShellOutputEnvelope {
1720                stdout: stdout_buf.trim_end().to_owned(),
1721                stderr: stderr_buf.trim_end().to_owned(),
1722                exit_code,
1723                truncated: false,
1724            },
1725            combined,
1726        )
1727    };
1728    (envelope, combined)
1729}
1730
1731#[cfg(test)]
1732mod tests;
zeph_tools/shell/mod.rs

zeph_tools/shell/
mod.rs