1use std::path::PathBuf;
24use std::time::{Duration, Instant};
25
26use tokio::process::Command;
27use tokio_util::sync::CancellationToken;
28
29use schemars::JsonSchema;
30use serde::Deserialize;
31
32use std::sync::Arc;
33
34use arc_swap::ArcSwap;
35use parking_lot::RwLock;
36
37use zeph_common::ToolName;
38
39use crate::audit::{AuditEntry, AuditLogger, AuditResult, chrono_now};
40use crate::config::ShellConfig;
41use crate::executor::{
42 ClaimSource, FilterStats, ToolCall, ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput,
43};
44use crate::filter::{OutputFilterRegistry, sanitize_output};
45use crate::permissions::{PermissionAction, PermissionPolicy};
46use crate::sandbox::{Sandbox, SandboxPolicy};
47
48mod transaction;
49use transaction::{TransactionSnapshot, affected_paths, build_scope_matchers, is_write_command};
50
51const DEFAULT_BLOCKED: &[&str] = &[
52 "rm -rf /", "sudo", "mkfs", "dd if=", "curl", "wget", "nc ", "ncat", "netcat", "shutdown",
53 "reboot", "halt",
54];
55
56pub const DEFAULT_BLOCKED_COMMANDS: &[&str] = DEFAULT_BLOCKED;
65
66pub const SHELL_INTERPRETERS: &[&str] =
72 &["bash", "sh", "zsh", "fish", "dash", "ksh", "csh", "tcsh"];
73
74const SUBSHELL_METACHARS: &[&str] = &["$(", "`", "<(", ">("];
78
79#[must_use]
87pub fn check_blocklist(command: &str, blocklist: &[String]) -> Option<String> {
88 let lower = command.to_lowercase();
89 for meta in SUBSHELL_METACHARS {
91 if lower.contains(meta) {
92 return Some((*meta).to_owned());
93 }
94 }
95 let cleaned = strip_shell_escapes(&lower);
96 let commands = tokenize_commands(&cleaned);
97 for blocked in blocklist {
98 for cmd_tokens in &commands {
99 if tokens_match_pattern(cmd_tokens, blocked) {
100 return Some(blocked.clone());
101 }
102 }
103 }
104 None
105}
106
107#[must_use]
112pub fn effective_shell_command<'a>(binary: &str, args: &'a [String]) -> Option<&'a str> {
113 let base = binary.rsplit('/').next().unwrap_or(binary);
114 if !SHELL_INTERPRETERS.contains(&base) {
115 return None;
116 }
117 let pos = args.iter().position(|a| a == "-c")?;
119 args.get(pos + 1).map(String::as_str)
120}
121
122const NETWORK_COMMANDS: &[&str] = &["curl", "wget", "nc ", "ncat", "netcat"];
123
124#[derive(Debug)]
128pub(crate) struct ShellPolicy {
129 pub(crate) blocked_commands: Vec<String>,
130}
131
132#[derive(Clone, Debug)]
139pub struct ShellPolicyHandle {
140 inner: Arc<ArcSwap<ShellPolicy>>,
141}
142
143impl ShellPolicyHandle {
144 pub fn rebuild(&self, config: &crate::config::ShellConfig) {
153 let policy = Arc::new(ShellPolicy {
154 blocked_commands: compute_blocked_commands(config),
155 });
156 self.inner.store(policy);
157 }
158
159 #[must_use]
161 pub fn snapshot_blocked(&self) -> Vec<String> {
162 self.inner.load().blocked_commands.clone()
163 }
164}
165
166pub(crate) fn compute_blocked_commands(config: &crate::config::ShellConfig) -> Vec<String> {
170 let allowed: Vec<String> = config
171 .allowed_commands
172 .iter()
173 .map(|s| s.to_lowercase())
174 .collect();
175 let mut blocked: Vec<String> = DEFAULT_BLOCKED
176 .iter()
177 .filter(|s| !allowed.contains(&s.to_lowercase()))
178 .map(|s| (*s).to_owned())
179 .collect();
180 blocked.extend(config.blocked_commands.iter().map(|s| s.to_lowercase()));
181 if !config.allow_network {
182 for cmd in NETWORK_COMMANDS {
183 let lower = cmd.to_lowercase();
184 if !blocked.contains(&lower) {
185 blocked.push(lower);
186 }
187 }
188 }
189 blocked.sort();
190 blocked.dedup();
191 blocked
192}
193
194#[derive(Deserialize, JsonSchema)]
195pub(crate) struct BashParams {
196 command: String,
198}
199
200#[derive(Debug)]
223pub struct ShellExecutor {
224 timeout: Duration,
225 policy: Arc<ArcSwap<ShellPolicy>>,
226 allowed_paths: Vec<PathBuf>,
227 confirm_patterns: Vec<String>,
228 env_blocklist: Vec<String>,
229 audit_logger: Option<Arc<AuditLogger>>,
230 tool_event_tx: Option<ToolEventTx>,
231 permission_policy: Option<PermissionPolicy>,
232 output_filter_registry: Option<OutputFilterRegistry>,
233 cancel_token: Option<CancellationToken>,
234 skill_env: RwLock<Option<std::collections::HashMap<String, String>>>,
235 transactional: bool,
236 auto_rollback: bool,
237 auto_rollback_exit_codes: Vec<i32>,
238 snapshot_required: bool,
239 max_snapshot_bytes: u64,
240 transaction_scope_matchers: Vec<globset::GlobMatcher>,
241 sandbox: Option<Arc<dyn Sandbox>>,
242 sandbox_policy: Option<SandboxPolicy>,
243}
244
245impl ShellExecutor {
246 #[must_use]
252 pub fn new(config: &ShellConfig) -> Self {
253 let policy = Arc::new(ArcSwap::from_pointee(ShellPolicy {
254 blocked_commands: compute_blocked_commands(config),
255 }));
256
257 let allowed_paths = if config.allowed_paths.is_empty() {
258 vec![std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))]
259 } else {
260 config.allowed_paths.iter().map(PathBuf::from).collect()
261 };
262
263 Self {
264 timeout: Duration::from_secs(config.timeout),
265 policy,
266 allowed_paths,
267 confirm_patterns: config.confirm_patterns.clone(),
268 env_blocklist: config.env_blocklist.clone(),
269 audit_logger: None,
270 tool_event_tx: None,
271 permission_policy: None,
272 output_filter_registry: None,
273 cancel_token: None,
274 skill_env: RwLock::new(None),
275 transactional: config.transactional,
276 auto_rollback: config.auto_rollback,
277 auto_rollback_exit_codes: config.auto_rollback_exit_codes.clone(),
278 snapshot_required: config.snapshot_required,
279 max_snapshot_bytes: config.max_snapshot_bytes,
280 transaction_scope_matchers: build_scope_matchers(&config.transaction_scope),
281 sandbox: None,
282 sandbox_policy: None,
283 }
284 }
285
286 #[must_use]
291 pub fn with_sandbox(mut self, sandbox: Arc<dyn Sandbox>, policy: SandboxPolicy) -> Self {
292 self.sandbox = Some(sandbox);
293 self.sandbox_policy = Some(policy);
294 self
295 }
296
297 pub fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
299 *self.skill_env.write() = env;
300 }
301
302 #[must_use]
304 pub fn with_audit(mut self, logger: Arc<AuditLogger>) -> Self {
305 self.audit_logger = Some(logger);
306 self
307 }
308
309 #[must_use]
314 pub fn with_tool_event_tx(mut self, tx: ToolEventTx) -> Self {
315 self.tool_event_tx = Some(tx);
316 self
317 }
318
319 #[must_use]
324 pub fn with_permissions(mut self, policy: PermissionPolicy) -> Self {
325 self.permission_policy = Some(policy);
326 self
327 }
328
329 #[must_use]
332 pub fn with_cancel_token(mut self, token: CancellationToken) -> Self {
333 self.cancel_token = Some(token);
334 self
335 }
336
337 #[must_use]
340 pub fn with_output_filters(mut self, registry: OutputFilterRegistry) -> Self {
341 self.output_filter_registry = Some(registry);
342 self
343 }
344
345 #[must_use]
351 pub fn policy_handle(&self) -> ShellPolicyHandle {
352 ShellPolicyHandle {
353 inner: Arc::clone(&self.policy),
354 }
355 }
356
357 #[cfg_attr(
363 feature = "profiling",
364 tracing::instrument(name = "tool.shell", skip_all, fields(exit_code = tracing::field::Empty, duration_ms = tracing::field::Empty))
365 )]
366 pub async fn execute_confirmed(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
367 self.execute_inner(response, true).await
368 }
369
370 async fn execute_inner(
371 &self,
372 response: &str,
373 skip_confirm: bool,
374 ) -> Result<Option<ToolOutput>, ToolError> {
375 let blocks = extract_bash_blocks(response);
376 if blocks.is_empty() {
377 return Ok(None);
378 }
379
380 let mut outputs = Vec::with_capacity(blocks.len());
381 let mut cumulative_filter_stats: Option<FilterStats> = None;
382 let mut last_envelope: Option<ShellOutputEnvelope> = None;
383 #[allow(clippy::cast_possible_truncation)]
384 let blocks_executed = blocks.len() as u32;
385
386 for block in &blocks {
387 let (output_line, per_block_stats, envelope) =
388 self.execute_block(block, skip_confirm).await?;
389 if let Some(fs) = per_block_stats {
390 let stats = cumulative_filter_stats.get_or_insert_with(FilterStats::default);
391 stats.raw_chars += fs.raw_chars;
392 stats.filtered_chars += fs.filtered_chars;
393 stats.raw_lines += fs.raw_lines;
394 stats.filtered_lines += fs.filtered_lines;
395 stats.confidence = Some(match (stats.confidence, fs.confidence) {
396 (Some(prev), Some(cur)) => crate::filter::worse_confidence(prev, cur),
397 (Some(prev), None) => prev,
398 (None, Some(cur)) => cur,
399 (None, None) => unreachable!(),
400 });
401 if stats.command.is_none() {
402 stats.command = fs.command;
403 }
404 if stats.kept_lines.is_empty() && !fs.kept_lines.is_empty() {
405 stats.kept_lines = fs.kept_lines;
406 }
407 }
408 last_envelope = Some(envelope);
409 outputs.push(output_line);
410 }
411
412 let raw_response = last_envelope
413 .as_ref()
414 .and_then(|e| serde_json::to_value(e).ok());
415
416 Ok(Some(ToolOutput {
417 tool_name: ToolName::new("bash"),
418 summary: outputs.join("\n\n"),
419 blocks_executed,
420 filter_stats: cumulative_filter_stats,
421 diff: None,
422 streamed: self.tool_event_tx.is_some(),
423 terminal_id: None,
424 locations: None,
425 raw_response,
426 claim_source: Some(ClaimSource::Shell),
427 }))
428 }
429
430 #[allow(clippy::too_many_lines)]
431 async fn execute_block(
432 &self,
433 block: &str,
434 skip_confirm: bool,
435 ) -> Result<(String, Option<FilterStats>, ShellOutputEnvelope), ToolError> {
436 self.check_permissions(block, skip_confirm).await?;
437 self.validate_sandbox(block)?;
438
439 let mut snapshot_warning: Option<String> = None;
441 let snapshot = if self.transactional && is_write_command(block) {
442 let paths = affected_paths(block, &self.transaction_scope_matchers);
443 if paths.is_empty() {
444 None
445 } else {
446 match TransactionSnapshot::capture(&paths, self.max_snapshot_bytes) {
447 Ok(snap) => {
448 tracing::debug!(
449 files = snap.file_count(),
450 bytes = snap.total_bytes(),
451 "transaction snapshot captured"
452 );
453 Some(snap)
454 }
455 Err(e) if self.snapshot_required => {
456 return Err(ToolError::SnapshotFailed {
457 reason: e.to_string(),
458 });
459 }
460 Err(e) => {
461 tracing::warn!(err = %e, "transaction snapshot failed, proceeding without rollback");
462 snapshot_warning =
463 Some(format!("[warn] snapshot failed: {e}; rollback unavailable"));
464 None
465 }
466 }
467 }
468 } else {
469 None
470 };
471
472 if let Some(ref tx) = self.tool_event_tx {
473 let sandbox_profile = self
474 .sandbox_policy
475 .as_ref()
476 .map(|p| format!("{:?}", p.profile));
477 let _ = tx.send(ToolEvent::Started {
478 tool_name: ToolName::new("bash"),
479 command: block.to_owned(),
480 sandbox_profile,
481 });
482 }
483
484 let start = Instant::now();
485 let skill_env_snapshot: Option<std::collections::HashMap<String, String>> =
486 self.skill_env.read().clone();
487 let sandbox_pair = self
488 .sandbox
489 .as_ref()
490 .zip(self.sandbox_policy.as_ref())
491 .map(|(sb, pol)| (sb.as_ref(), pol));
492 let (mut envelope, out) = execute_bash(
493 block,
494 self.timeout,
495 self.tool_event_tx.as_ref(),
496 self.cancel_token.as_ref(),
497 skill_env_snapshot.as_ref(),
498 &self.env_blocklist,
499 sandbox_pair,
500 )
501 .await;
502 let exit_code = envelope.exit_code;
503 if exit_code == 130
504 && self
505 .cancel_token
506 .as_ref()
507 .is_some_and(CancellationToken::is_cancelled)
508 {
509 return Err(ToolError::Cancelled);
510 }
511 #[allow(clippy::cast_possible_truncation)]
512 let duration_ms = start.elapsed().as_millis() as u64;
513
514 if let Some(snap) = snapshot {
516 let should_rollback = self.auto_rollback
517 && if self.auto_rollback_exit_codes.is_empty() {
518 exit_code >= 2
519 } else {
520 self.auto_rollback_exit_codes.contains(&exit_code)
521 };
522 if should_rollback {
523 match snap.rollback() {
524 Ok(report) => {
525 tracing::info!(
526 restored = report.restored_count,
527 deleted = report.deleted_count,
528 "transaction rollback completed"
529 );
530 self.log_audit(
531 block,
532 AuditResult::Rollback {
533 restored: report.restored_count,
534 deleted: report.deleted_count,
535 },
536 duration_ms,
537 None,
538 Some(exit_code),
539 false,
540 )
541 .await;
542 if let Some(ref tx) = self.tool_event_tx {
543 let _ = tx.send(ToolEvent::Rollback {
544 tool_name: ToolName::new("bash"),
545 command: block.to_owned(),
546 restored_count: report.restored_count,
547 deleted_count: report.deleted_count,
548 });
549 }
550 }
551 Err(e) => {
552 tracing::error!(err = %e, "transaction rollback failed");
553 }
554 }
555 }
556 }
558
559 let is_timeout = out.contains("[error] command timed out");
560 let audit_result = if is_timeout {
561 AuditResult::Timeout
562 } else if out.contains("[error]") || out.contains("[stderr]") {
563 AuditResult::Error {
564 message: out.clone(),
565 }
566 } else {
567 AuditResult::Success
568 };
569 if is_timeout {
570 self.log_audit(
571 block,
572 audit_result,
573 duration_ms,
574 None,
575 Some(exit_code),
576 false,
577 )
578 .await;
579 self.emit_completed(block, &out, false, None);
580 return Err(ToolError::Timeout {
581 timeout_secs: self.timeout.as_secs(),
582 });
583 }
584
585 if let Some(category) = classify_shell_exit(exit_code, &out) {
586 self.emit_completed(block, &out, false, None);
587 return Err(ToolError::Shell {
588 exit_code,
589 category,
590 message: out.lines().take(3).collect::<Vec<_>>().join("; "),
591 });
592 }
593
594 let sanitized = sanitize_output(&out);
595 let mut per_block_stats: Option<FilterStats> = None;
596 let filtered = if let Some(ref registry) = self.output_filter_registry {
597 match registry.apply(block, &sanitized, exit_code) {
598 Some(fr) => {
599 tracing::debug!(
600 command = block,
601 raw = fr.raw_chars,
602 filtered = fr.filtered_chars,
603 savings_pct = fr.savings_pct(),
604 "output filter applied"
605 );
606 per_block_stats = Some(FilterStats {
607 raw_chars: fr.raw_chars,
608 filtered_chars: fr.filtered_chars,
609 raw_lines: fr.raw_lines,
610 filtered_lines: fr.filtered_lines,
611 confidence: Some(fr.confidence),
612 command: Some(block.to_owned()),
613 kept_lines: fr.kept_lines.clone(),
614 });
615 fr.output
616 }
617 None => sanitized,
618 }
619 } else {
620 sanitized
621 };
622
623 self.emit_completed(
624 block,
625 &out,
626 !out.contains("[error]"),
627 per_block_stats.clone(),
628 );
629
630 envelope.truncated = filtered.len() < out.len();
632
633 self.log_audit(
634 block,
635 audit_result,
636 duration_ms,
637 None,
638 Some(exit_code),
639 envelope.truncated,
640 )
641 .await;
642
643 let output_line = if let Some(warn) = snapshot_warning {
644 format!("{warn}\n$ {block}\n{filtered}")
645 } else {
646 format!("$ {block}\n{filtered}")
647 };
648 Ok((output_line, per_block_stats, envelope))
649 }
650
651 fn emit_completed(
652 &self,
653 command: &str,
654 output: &str,
655 success: bool,
656 filter_stats: Option<FilterStats>,
657 ) {
658 if let Some(ref tx) = self.tool_event_tx {
659 let _ = tx.send(ToolEvent::Completed {
660 tool_name: ToolName::new("bash"),
661 command: command.to_owned(),
662 output: output.to_owned(),
663 success,
664 filter_stats,
665 diff: None,
666 });
667 }
668 }
669
670 async fn check_permissions(&self, block: &str, skip_confirm: bool) -> Result<(), ToolError> {
672 if let Some(blocked) = self.find_blocked_command(block) {
675 let err = ToolError::Blocked {
676 command: blocked.clone(),
677 };
678 self.log_audit(
679 block,
680 AuditResult::Blocked {
681 reason: format!("blocked command: {blocked}"),
682 },
683 0,
684 Some(&err),
685 None,
686 false,
687 )
688 .await;
689 return Err(err);
690 }
691
692 if let Some(ref policy) = self.permission_policy {
693 match policy.check("bash", block) {
694 PermissionAction::Deny => {
695 let err = ToolError::Blocked {
696 command: block.to_owned(),
697 };
698 self.log_audit(
699 block,
700 AuditResult::Blocked {
701 reason: "denied by permission policy".to_owned(),
702 },
703 0,
704 Some(&err),
705 None,
706 false,
707 )
708 .await;
709 return Err(err);
710 }
711 PermissionAction::Ask if !skip_confirm => {
712 return Err(ToolError::ConfirmationRequired {
713 command: block.to_owned(),
714 });
715 }
716 _ => {}
717 }
718 } else if !skip_confirm && let Some(pattern) = self.find_confirm_command(block) {
719 return Err(ToolError::ConfirmationRequired {
720 command: pattern.to_owned(),
721 });
722 }
723
724 Ok(())
725 }
726
727 fn validate_sandbox(&self, code: &str) -> Result<(), ToolError> {
728 let cwd = std::env::current_dir().unwrap_or_default();
729
730 for token in extract_paths(code) {
731 if has_traversal(&token) {
732 return Err(ToolError::SandboxViolation { path: token });
733 }
734
735 let path = if token.starts_with('/') {
736 PathBuf::from(&token)
737 } else {
738 cwd.join(&token)
739 };
740 let canonical = path
741 .canonicalize()
742 .or_else(|_| std::path::absolute(&path))
743 .unwrap_or(path);
744 if !self
745 .allowed_paths
746 .iter()
747 .any(|allowed| canonical.starts_with(allowed))
748 {
749 return Err(ToolError::SandboxViolation {
750 path: canonical.display().to_string(),
751 });
752 }
753 }
754 Ok(())
755 }
756
757 fn find_blocked_command(&self, code: &str) -> Option<String> {
797 let snapshot = self.policy.load_full();
798 let cleaned = strip_shell_escapes(&code.to_lowercase());
799 let commands = tokenize_commands(&cleaned);
800 for blocked in &snapshot.blocked_commands {
801 for cmd_tokens in &commands {
802 if tokens_match_pattern(cmd_tokens, blocked) {
803 return Some(blocked.clone());
804 }
805 }
806 }
807 for inner in extract_subshell_contents(&cleaned) {
809 let inner_commands = tokenize_commands(&inner);
810 for blocked in &snapshot.blocked_commands {
811 for cmd_tokens in &inner_commands {
812 if tokens_match_pattern(cmd_tokens, blocked) {
813 return Some(blocked.clone());
814 }
815 }
816 }
817 }
818 None
819 }
820
821 fn find_confirm_command(&self, code: &str) -> Option<&str> {
822 let normalized = code.to_lowercase();
823 for pattern in &self.confirm_patterns {
824 if normalized.contains(pattern.as_str()) {
825 return Some(pattern.as_str());
826 }
827 }
828 None
829 }
830
831 async fn log_audit(
832 &self,
833 command: &str,
834 result: AuditResult,
835 duration_ms: u64,
836 error: Option<&ToolError>,
837 exit_code: Option<i32>,
838 truncated: bool,
839 ) {
840 if let Some(ref logger) = self.audit_logger {
841 let (error_category, error_domain, error_phase) =
842 error.map_or((None, None, None), |e| {
843 let cat = e.category();
844 (
845 Some(cat.label().to_owned()),
846 Some(cat.domain().label().to_owned()),
847 Some(cat.phase().label().to_owned()),
848 )
849 });
850 let entry = AuditEntry {
851 timestamp: chrono_now(),
852 tool: "shell".into(),
853 command: command.into(),
854 result,
855 duration_ms,
856 error_category,
857 error_domain,
858 error_phase,
859 claim_source: Some(ClaimSource::Shell),
860 mcp_server_id: None,
861 injection_flagged: false,
862 embedding_anomalous: false,
863 cross_boundary_mcp_to_acp: false,
864 adversarial_policy_decision: None,
865 exit_code,
866 truncated,
867 caller_id: None,
868 policy_match: None,
869 correlation_id: None,
870 vigil_risk: None,
871 };
872 logger.log(&entry).await;
873 }
874 }
875}
876
877impl ToolExecutor for ShellExecutor {
878 async fn execute(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
879 self.execute_inner(response, false).await
880 }
881
882 fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
883 use crate::registry::{InvocationHint, ToolDef};
884 vec![ToolDef {
885 id: "bash".into(),
886 description: "Execute a shell command and return stdout/stderr.\n\nParameters: command (string, required) - shell command to run\nReturns: stdout and stderr combined, prefixed with exit code\nErrors: Blocked if command matches security policy; Timeout after configured seconds; SandboxViolation if path outside allowed dirs\nExample: {\"command\": \"ls -la /tmp\"}".into(),
887 schema: schemars::schema_for!(BashParams),
888 invocation: InvocationHint::FencedBlock("bash"),
889 output_schema: None,
890 }]
891 }
892
893 async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
894 if call.tool_id != "bash" {
895 return Ok(None);
896 }
897 let params: BashParams = crate::executor::deserialize_params(&call.params)?;
898 if params.command.is_empty() {
899 return Ok(None);
900 }
901 let command = ¶ms.command;
902 let synthetic = format!("```bash\n{command}\n```");
904 self.execute_inner(&synthetic, false).await
905 }
906
907 fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
908 ShellExecutor::set_skill_env(self, env);
909 }
910}
911
912pub(crate) fn strip_shell_escapes(input: &str) -> String {
916 let mut out = String::with_capacity(input.len());
917 let bytes = input.as_bytes();
918 let mut i = 0;
919 while i < bytes.len() {
920 if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'\'' {
922 let mut j = i + 2; let mut decoded = String::new();
924 let mut valid = false;
925 while j < bytes.len() && bytes[j] != b'\'' {
926 if bytes[j] == b'\\' && j + 1 < bytes.len() {
927 let next = bytes[j + 1];
928 if next == b'x' && j + 3 < bytes.len() {
929 let hi = (bytes[j + 2] as char).to_digit(16);
931 let lo = (bytes[j + 3] as char).to_digit(16);
932 if let (Some(h), Some(l)) = (hi, lo) {
933 #[allow(clippy::cast_possible_truncation)]
934 let byte = ((h << 4) | l) as u8;
935 decoded.push(byte as char);
936 j += 4;
937 valid = true;
938 continue;
939 }
940 } else if next.is_ascii_digit() {
941 let mut val = u32::from(next - b'0');
943 let mut len = 2; if j + 2 < bytes.len() && bytes[j + 2].is_ascii_digit() {
945 val = val * 8 + u32::from(bytes[j + 2] - b'0');
946 len = 3;
947 if j + 3 < bytes.len() && bytes[j + 3].is_ascii_digit() {
948 val = val * 8 + u32::from(bytes[j + 3] - b'0');
949 len = 4;
950 }
951 }
952 #[allow(clippy::cast_possible_truncation)]
953 decoded.push((val & 0xFF) as u8 as char);
954 j += len;
955 valid = true;
956 continue;
957 }
958 decoded.push(next as char);
960 j += 2;
961 } else {
962 decoded.push(bytes[j] as char);
963 j += 1;
964 }
965 }
966 if j < bytes.len() && bytes[j] == b'\'' && valid {
967 out.push_str(&decoded);
968 i = j + 1;
969 continue;
970 }
971 }
973 if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
975 i += 2;
976 continue;
977 }
978 if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] != b'\n' {
980 i += 1;
981 out.push(bytes[i] as char);
982 i += 1;
983 continue;
984 }
985 if bytes[i] == b'"' || bytes[i] == b'\'' {
987 let quote = bytes[i];
988 i += 1;
989 while i < bytes.len() && bytes[i] != quote {
990 out.push(bytes[i] as char);
991 i += 1;
992 }
993 if i < bytes.len() {
994 i += 1; }
996 continue;
997 }
998 out.push(bytes[i] as char);
999 i += 1;
1000 }
1001 out
1002}
1003
1004pub(crate) fn extract_subshell_contents(s: &str) -> Vec<String> {
1014 let mut results = Vec::new();
1015 let chars: Vec<char> = s.chars().collect();
1016 let len = chars.len();
1017 let mut i = 0;
1018
1019 while i < len {
1020 if chars[i] == '`' {
1022 let start = i + 1;
1023 let mut j = start;
1024 while j < len && chars[j] != '`' {
1025 j += 1;
1026 }
1027 if j < len {
1028 results.push(chars[start..j].iter().collect());
1029 }
1030 i = j + 1;
1031 continue;
1032 }
1033
1034 let next_is_open_paren = i + 1 < len && chars[i + 1] == '(';
1036 let is_paren_subshell = next_is_open_paren && matches!(chars[i], '$' | '<' | '>');
1037
1038 if is_paren_subshell {
1039 let start = i + 2;
1040 let mut depth: usize = 1;
1041 let mut j = start;
1042 while j < len && depth > 0 {
1043 match chars[j] {
1044 '(' => depth += 1,
1045 ')' => depth -= 1,
1046 _ => {}
1047 }
1048 if depth > 0 {
1049 j += 1;
1050 } else {
1051 break;
1052 }
1053 }
1054 if depth == 0 {
1055 results.push(chars[start..j].iter().collect());
1056 }
1057 i = j + 1;
1058 continue;
1059 }
1060
1061 i += 1;
1062 }
1063
1064 results
1065}
1066
1067pub(crate) fn tokenize_commands(normalized: &str) -> Vec<Vec<String>> {
1070 let replaced = normalized.replace("||", "\n").replace("&&", "\n");
1072 replaced
1073 .split([';', '|', '\n'])
1074 .map(|seg| {
1075 seg.split_whitespace()
1076 .map(str::to_owned)
1077 .collect::<Vec<String>>()
1078 })
1079 .filter(|tokens| !tokens.is_empty())
1080 .collect()
1081}
1082
1083const TRANSPARENT_PREFIXES: &[&str] = &["env", "command", "exec", "nice", "nohup", "time", "xargs"];
1086
1087fn cmd_basename(tok: &str) -> &str {
1089 tok.rsplit('/').next().unwrap_or(tok)
1090}
1091
1092pub(crate) fn tokens_match_pattern(tokens: &[String], pattern: &str) -> bool {
1099 if tokens.is_empty() || pattern.is_empty() {
1100 return false;
1101 }
1102 let pattern = pattern.trim();
1103 let pattern_tokens: Vec<&str> = pattern.split_whitespace().collect();
1104 if pattern_tokens.is_empty() {
1105 return false;
1106 }
1107
1108 let start = tokens
1110 .iter()
1111 .position(|t| !TRANSPARENT_PREFIXES.contains(&cmd_basename(t)))
1112 .unwrap_or(0);
1113 let effective = &tokens[start..];
1114 if effective.is_empty() {
1115 return false;
1116 }
1117
1118 if pattern_tokens.len() == 1 {
1119 let pat = pattern_tokens[0];
1120 let base = cmd_basename(&effective[0]);
1121 base == pat || base.starts_with(&format!("{pat}."))
1123 } else {
1124 let n = pattern_tokens.len().min(effective.len());
1126 let mut parts: Vec<&str> = vec![cmd_basename(&effective[0])];
1127 parts.extend(effective[1..n].iter().map(String::as_str));
1128 let joined = parts.join(" ");
1129 if joined.starts_with(pattern) {
1130 return true;
1131 }
1132 if effective.len() > n {
1133 let mut parts2: Vec<&str> = vec![cmd_basename(&effective[0])];
1134 parts2.extend(effective[1..=n].iter().map(String::as_str));
1135 parts2.join(" ").starts_with(pattern)
1136 } else {
1137 false
1138 }
1139 }
1140}
1141
1142fn extract_paths(code: &str) -> Vec<String> {
1143 let mut result = Vec::new();
1144
1145 let mut tokens: Vec<String> = Vec::new();
1147 let mut current = String::new();
1148 let mut chars = code.chars().peekable();
1149 while let Some(c) = chars.next() {
1150 match c {
1151 '"' | '\'' => {
1152 let quote = c;
1153 while let Some(&nc) = chars.peek() {
1154 if nc == quote {
1155 chars.next();
1156 break;
1157 }
1158 current.push(chars.next().unwrap());
1159 }
1160 }
1161 c if c.is_whitespace() || matches!(c, ';' | '|' | '&') => {
1162 if !current.is_empty() {
1163 tokens.push(std::mem::take(&mut current));
1164 }
1165 }
1166 _ => current.push(c),
1167 }
1168 }
1169 if !current.is_empty() {
1170 tokens.push(current);
1171 }
1172
1173 for token in tokens {
1174 let trimmed = token.trim_end_matches([';', '&', '|']).to_owned();
1175 if trimmed.is_empty() {
1176 continue;
1177 }
1178 if trimmed.starts_with('/')
1179 || trimmed.starts_with("./")
1180 || trimmed.starts_with("../")
1181 || trimmed == ".."
1182 || (trimmed.starts_with('.') && trimmed.contains('/'))
1183 || is_relative_path_token(&trimmed)
1184 {
1185 result.push(trimmed);
1186 }
1187 }
1188 result
1189}
1190
1191fn is_relative_path_token(token: &str) -> bool {
1198 if !token.contains('/') || token.starts_with('/') || token.starts_with('.') {
1200 return false;
1201 }
1202 if token.contains("://") {
1204 return false;
1205 }
1206 if let Some(eq_pos) = token.find('=') {
1208 let key = &token[..eq_pos];
1209 if key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
1210 return false;
1211 }
1212 }
1213 token
1215 .chars()
1216 .next()
1217 .is_some_and(|c| c.is_ascii_alphanumeric() || c == '_')
1218}
1219
1220fn classify_shell_exit(
1226 exit_code: i32,
1227 output: &str,
1228) -> Option<crate::error_taxonomy::ToolErrorCategory> {
1229 use crate::error_taxonomy::ToolErrorCategory;
1230 match exit_code {
1231 126 => Some(ToolErrorCategory::PolicyBlocked),
1233 127 => Some(ToolErrorCategory::PermanentFailure),
1235 _ => {
1236 let lower = output.to_lowercase();
1237 if lower.contains("permission denied") {
1238 Some(ToolErrorCategory::PolicyBlocked)
1239 } else if lower.contains("no such file or directory") {
1240 Some(ToolErrorCategory::PermanentFailure)
1241 } else {
1242 None
1243 }
1244 }
1245 }
1246}
1247
1248fn has_traversal(path: &str) -> bool {
1249 path.split('/').any(|seg| seg == "..")
1250}
1251
1252fn extract_bash_blocks(text: &str) -> Vec<&str> {
1253 crate::executor::extract_fenced_blocks(text, "bash")
1254}
1255
1256async fn kill_process_tree(child: &mut tokio::process::Child) {
1260 #[cfg(unix)]
1261 if let Some(pid) = child.id() {
1262 let _ = Command::new("pkill")
1263 .args(["-KILL", "-P", &pid.to_string()])
1264 .status()
1265 .await;
1266 }
1267 let _ = child.kill().await;
1268}
1269
1270#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1275pub struct ShellOutputEnvelope {
1276 pub stdout: String,
1278 pub stderr: String,
1280 pub exit_code: i32,
1282 pub truncated: bool,
1284}
1285
1286#[allow(clippy::too_many_lines)]
1287async fn execute_bash(
1288 code: &str,
1289 timeout: Duration,
1290 event_tx: Option<&ToolEventTx>,
1291 cancel_token: Option<&CancellationToken>,
1292 extra_env: Option<&std::collections::HashMap<String, String>>,
1293 env_blocklist: &[String],
1294 sandbox: Option<(&dyn Sandbox, &SandboxPolicy)>,
1295) -> (ShellOutputEnvelope, String) {
1296 use std::process::Stdio;
1297 use tokio::io::{AsyncBufReadExt, BufReader};
1298
1299 let timeout_secs = timeout.as_secs();
1300
1301 let mut cmd = Command::new("bash");
1302 cmd.arg("-c").arg(code);
1303
1304 for (key, _) in std::env::vars() {
1305 if env_blocklist
1306 .iter()
1307 .any(|prefix| key.starts_with(prefix.as_str()))
1308 {
1309 cmd.env_remove(&key);
1310 }
1311 }
1312
1313 if let Some(env) = extra_env {
1314 cmd.envs(env);
1315 }
1316
1317 if let Some((sb, policy)) = sandbox
1319 && let Err(err) = sb.wrap(&mut cmd, policy)
1320 {
1321 let msg = format!("[error] sandbox setup failed: {err}");
1322 return (
1323 ShellOutputEnvelope {
1324 stdout: String::new(),
1325 stderr: msg.clone(),
1326 exit_code: 1,
1327 truncated: false,
1328 },
1329 msg,
1330 );
1331 }
1332
1333 cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
1334
1335 let child_result = cmd.spawn();
1336
1337 let mut child = match child_result {
1338 Ok(c) => c,
1339 Err(e) => {
1340 let msg = format!("[error] {e}");
1341 return (
1342 ShellOutputEnvelope {
1343 stdout: String::new(),
1344 stderr: msg.clone(),
1345 exit_code: 1,
1346 truncated: false,
1347 },
1348 msg,
1349 );
1350 }
1351 };
1352
1353 let stdout = child.stdout.take().expect("stdout piped");
1354 let stderr = child.stderr.take().expect("stderr piped");
1355
1356 let (line_tx, mut line_rx) = tokio::sync::mpsc::channel::<(bool, String)>(64);
1359
1360 let stdout_tx = line_tx.clone();
1361 tokio::spawn(async move {
1362 let mut reader = BufReader::new(stdout);
1363 let mut buf = String::new();
1364 while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
1365 let _ = stdout_tx.send((false, buf.clone())).await;
1366 buf.clear();
1367 }
1368 });
1369
1370 tokio::spawn(async move {
1371 let mut reader = BufReader::new(stderr);
1372 let mut buf = String::new();
1373 while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
1374 let _ = line_tx.send((true, buf.clone())).await;
1375 buf.clear();
1376 }
1377 });
1378
1379 let mut combined = String::new();
1380 let mut stdout_buf = String::new();
1381 let mut stderr_buf = String::new();
1382 let deadline = tokio::time::Instant::now() + timeout;
1383
1384 loop {
1385 tokio::select! {
1386 line = line_rx.recv() => {
1387 match line {
1388 Some((is_stderr, chunk)) => {
1389 let interleaved = if is_stderr {
1390 format!("[stderr] {chunk}")
1391 } else {
1392 chunk.clone()
1393 };
1394 if let Some(tx) = event_tx {
1395 let _ = tx.send(ToolEvent::OutputChunk {
1396 tool_name: ToolName::new("bash"),
1397 command: code.to_owned(),
1398 chunk: interleaved.clone(),
1399 });
1400 }
1401 combined.push_str(&interleaved);
1402 if is_stderr {
1403 stderr_buf.push_str(&chunk);
1404 } else {
1405 stdout_buf.push_str(&chunk);
1406 }
1407 }
1408 None => break,
1409 }
1410 }
1411 () = tokio::time::sleep_until(deadline) => {
1412 kill_process_tree(&mut child).await;
1413 let msg = format!("[error] command timed out after {timeout_secs}s");
1414 return (
1415 ShellOutputEnvelope {
1416 stdout: stdout_buf,
1417 stderr: format!("{stderr_buf}command timed out after {timeout_secs}s"),
1418 exit_code: 1,
1419 truncated: false,
1420 },
1421 msg,
1422 );
1423 }
1424 () = async {
1425 match cancel_token {
1426 Some(t) => t.cancelled().await,
1427 None => std::future::pending().await,
1428 }
1429 } => {
1430 kill_process_tree(&mut child).await;
1431 return (
1432 ShellOutputEnvelope {
1433 stdout: stdout_buf,
1434 stderr: format!("{stderr_buf}operation aborted"),
1435 exit_code: 130,
1436 truncated: false,
1437 },
1438 "[cancelled] operation aborted".to_string(),
1439 );
1440 }
1441 }
1442 }
1443
1444 let status = child.wait().await;
1445 let exit_code = status.ok().and_then(|s| s.code()).unwrap_or(1);
1446
1447 let (envelope, combined) = if combined.is_empty() {
1448 (
1449 ShellOutputEnvelope {
1450 stdout: String::new(),
1451 stderr: String::new(),
1452 exit_code,
1453 truncated: false,
1454 },
1455 "(no output)".to_string(),
1456 )
1457 } else {
1458 (
1459 ShellOutputEnvelope {
1460 stdout: stdout_buf.trim_end().to_owned(),
1461 stderr: stderr_buf.trim_end().to_owned(),
1462 exit_code,
1463 truncated: false,
1464 },
1465 combined,
1466 )
1467 };
1468 (envelope, combined)
1469}
1470
1471#[cfg(test)]
1472mod tests;