1use std::path::PathBuf;
24use std::time::{Duration, Instant};
25
26use tokio::process::Command;
27use tokio_util::sync::CancellationToken;
28
29use schemars::JsonSchema;
30use serde::Deserialize;
31
32use std::sync::Arc;
33
34use parking_lot::RwLock;
35
36use zeph_common::ToolName;
37
38use crate::audit::{AuditEntry, AuditLogger, AuditResult, chrono_now};
39use crate::config::ShellConfig;
40use crate::executor::{
41 ClaimSource, FilterStats, ToolCall, ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput,
42};
43use crate::filter::{OutputFilterRegistry, sanitize_output};
44use crate::permissions::{PermissionAction, PermissionPolicy};
45
46mod transaction;
47use transaction::{TransactionSnapshot, affected_paths, build_scope_matchers, is_write_command};
48
49const DEFAULT_BLOCKED: &[&str] = &[
50 "rm -rf /", "sudo", "mkfs", "dd if=", "curl", "wget", "nc ", "ncat", "netcat", "shutdown",
51 "reboot", "halt",
52];
53
54pub const DEFAULT_BLOCKED_COMMANDS: &[&str] = DEFAULT_BLOCKED;
63
64pub const SHELL_INTERPRETERS: &[&str] =
70 &["bash", "sh", "zsh", "fish", "dash", "ksh", "csh", "tcsh"];
71
72const SUBSHELL_METACHARS: &[&str] = &["$(", "`", "<(", ">("];
76
77#[must_use]
85pub fn check_blocklist(command: &str, blocklist: &[String]) -> Option<String> {
86 let lower = command.to_lowercase();
87 for meta in SUBSHELL_METACHARS {
89 if lower.contains(meta) {
90 return Some((*meta).to_owned());
91 }
92 }
93 let cleaned = strip_shell_escapes(&lower);
94 let commands = tokenize_commands(&cleaned);
95 for blocked in blocklist {
96 for cmd_tokens in &commands {
97 if tokens_match_pattern(cmd_tokens, blocked) {
98 return Some(blocked.clone());
99 }
100 }
101 }
102 None
103}
104
105#[must_use]
110pub fn effective_shell_command<'a>(binary: &str, args: &'a [String]) -> Option<&'a str> {
111 let base = binary.rsplit('/').next().unwrap_or(binary);
112 if !SHELL_INTERPRETERS.contains(&base) {
113 return None;
114 }
115 let pos = args.iter().position(|a| a == "-c")?;
117 args.get(pos + 1).map(String::as_str)
118}
119
120const NETWORK_COMMANDS: &[&str] = &["curl", "wget", "nc ", "ncat", "netcat"];
121
122#[derive(Deserialize, JsonSchema)]
123pub(crate) struct BashParams {
124 command: String,
126}
127
128#[derive(Debug)]
151pub struct ShellExecutor {
152 timeout: Duration,
153 blocked_commands: Vec<String>,
154 allowed_paths: Vec<PathBuf>,
155 confirm_patterns: Vec<String>,
156 env_blocklist: Vec<String>,
157 audit_logger: Option<Arc<AuditLogger>>,
158 tool_event_tx: Option<ToolEventTx>,
159 permission_policy: Option<PermissionPolicy>,
160 output_filter_registry: Option<OutputFilterRegistry>,
161 cancel_token: Option<CancellationToken>,
162 skill_env: RwLock<Option<std::collections::HashMap<String, String>>>,
163 transactional: bool,
164 auto_rollback: bool,
165 auto_rollback_exit_codes: Vec<i32>,
166 snapshot_required: bool,
167 max_snapshot_bytes: u64,
168 transaction_scope_matchers: Vec<globset::GlobMatcher>,
169}
170
171impl ShellExecutor {
172 #[must_use]
178 pub fn new(config: &ShellConfig) -> Self {
179 let allowed: Vec<String> = config
180 .allowed_commands
181 .iter()
182 .map(|s| s.to_lowercase())
183 .collect();
184
185 let mut blocked: Vec<String> = DEFAULT_BLOCKED
186 .iter()
187 .filter(|s| !allowed.contains(&s.to_lowercase()))
188 .map(|s| (*s).to_owned())
189 .collect();
190 blocked.extend(config.blocked_commands.iter().map(|s| s.to_lowercase()));
191
192 if !config.allow_network {
193 for cmd in NETWORK_COMMANDS {
194 let lower = cmd.to_lowercase();
195 if !blocked.contains(&lower) {
196 blocked.push(lower);
197 }
198 }
199 }
200
201 blocked.sort();
202 blocked.dedup();
203
204 let allowed_paths = if config.allowed_paths.is_empty() {
205 vec![std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))]
206 } else {
207 config.allowed_paths.iter().map(PathBuf::from).collect()
208 };
209
210 Self {
211 timeout: Duration::from_secs(config.timeout),
212 blocked_commands: blocked,
213 allowed_paths,
214 confirm_patterns: config.confirm_patterns.clone(),
215 env_blocklist: config.env_blocklist.clone(),
216 audit_logger: None,
217 tool_event_tx: None,
218 permission_policy: None,
219 output_filter_registry: None,
220 cancel_token: None,
221 skill_env: RwLock::new(None),
222 transactional: config.transactional,
223 auto_rollback: config.auto_rollback,
224 auto_rollback_exit_codes: config.auto_rollback_exit_codes.clone(),
225 snapshot_required: config.snapshot_required,
226 max_snapshot_bytes: config.max_snapshot_bytes,
227 transaction_scope_matchers: build_scope_matchers(&config.transaction_scope),
228 }
229 }
230
231 pub fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
233 *self.skill_env.write() = env;
234 }
235
236 #[must_use]
238 pub fn with_audit(mut self, logger: Arc<AuditLogger>) -> Self {
239 self.audit_logger = Some(logger);
240 self
241 }
242
243 #[must_use]
248 pub fn with_tool_event_tx(mut self, tx: ToolEventTx) -> Self {
249 self.tool_event_tx = Some(tx);
250 self
251 }
252
253 #[must_use]
258 pub fn with_permissions(mut self, policy: PermissionPolicy) -> Self {
259 self.permission_policy = Some(policy);
260 self
261 }
262
263 #[must_use]
266 pub fn with_cancel_token(mut self, token: CancellationToken) -> Self {
267 self.cancel_token = Some(token);
268 self
269 }
270
271 #[must_use]
274 pub fn with_output_filters(mut self, registry: OutputFilterRegistry) -> Self {
275 self.output_filter_registry = Some(registry);
276 self
277 }
278
279 #[cfg_attr(
285 feature = "profiling",
286 tracing::instrument(name = "tool.shell", skip_all, fields(exit_code = tracing::field::Empty, duration_ms = tracing::field::Empty))
287 )]
288 pub async fn execute_confirmed(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
289 self.execute_inner(response, true).await
290 }
291
292 async fn execute_inner(
293 &self,
294 response: &str,
295 skip_confirm: bool,
296 ) -> Result<Option<ToolOutput>, ToolError> {
297 let blocks = extract_bash_blocks(response);
298 if blocks.is_empty() {
299 return Ok(None);
300 }
301
302 let mut outputs = Vec::with_capacity(blocks.len());
303 let mut cumulative_filter_stats: Option<FilterStats> = None;
304 let mut last_envelope: Option<ShellOutputEnvelope> = None;
305 #[allow(clippy::cast_possible_truncation)]
306 let blocks_executed = blocks.len() as u32;
307
308 for block in &blocks {
309 let (output_line, per_block_stats, envelope) =
310 self.execute_block(block, skip_confirm).await?;
311 if let Some(fs) = per_block_stats {
312 let stats = cumulative_filter_stats.get_or_insert_with(FilterStats::default);
313 stats.raw_chars += fs.raw_chars;
314 stats.filtered_chars += fs.filtered_chars;
315 stats.raw_lines += fs.raw_lines;
316 stats.filtered_lines += fs.filtered_lines;
317 stats.confidence = Some(match (stats.confidence, fs.confidence) {
318 (Some(prev), Some(cur)) => crate::filter::worse_confidence(prev, cur),
319 (Some(prev), None) => prev,
320 (None, Some(cur)) => cur,
321 (None, None) => unreachable!(),
322 });
323 if stats.command.is_none() {
324 stats.command = fs.command;
325 }
326 if stats.kept_lines.is_empty() && !fs.kept_lines.is_empty() {
327 stats.kept_lines = fs.kept_lines;
328 }
329 }
330 last_envelope = Some(envelope);
331 outputs.push(output_line);
332 }
333
334 let raw_response = last_envelope
335 .as_ref()
336 .and_then(|e| serde_json::to_value(e).ok());
337
338 Ok(Some(ToolOutput {
339 tool_name: ToolName::new("bash"),
340 summary: outputs.join("\n\n"),
341 blocks_executed,
342 filter_stats: cumulative_filter_stats,
343 diff: None,
344 streamed: self.tool_event_tx.is_some(),
345 terminal_id: None,
346 locations: None,
347 raw_response,
348 claim_source: Some(ClaimSource::Shell),
349 }))
350 }
351
352 #[allow(clippy::too_many_lines)]
353 async fn execute_block(
354 &self,
355 block: &str,
356 skip_confirm: bool,
357 ) -> Result<(String, Option<FilterStats>, ShellOutputEnvelope), ToolError> {
358 self.check_permissions(block, skip_confirm).await?;
359 self.validate_sandbox(block)?;
360
361 let mut snapshot_warning: Option<String> = None;
363 let snapshot = if self.transactional && is_write_command(block) {
364 let paths = affected_paths(block, &self.transaction_scope_matchers);
365 if paths.is_empty() {
366 None
367 } else {
368 match TransactionSnapshot::capture(&paths, self.max_snapshot_bytes) {
369 Ok(snap) => {
370 tracing::debug!(
371 files = snap.file_count(),
372 bytes = snap.total_bytes(),
373 "transaction snapshot captured"
374 );
375 Some(snap)
376 }
377 Err(e) if self.snapshot_required => {
378 return Err(ToolError::SnapshotFailed {
379 reason: e.to_string(),
380 });
381 }
382 Err(e) => {
383 tracing::warn!(err = %e, "transaction snapshot failed, proceeding without rollback");
384 snapshot_warning =
385 Some(format!("[warn] snapshot failed: {e}; rollback unavailable"));
386 None
387 }
388 }
389 }
390 } else {
391 None
392 };
393
394 if let Some(ref tx) = self.tool_event_tx {
395 let _ = tx.send(ToolEvent::Started {
396 tool_name: ToolName::new("bash"),
397 command: block.to_owned(),
398 });
399 }
400
401 let start = Instant::now();
402 let skill_env_snapshot: Option<std::collections::HashMap<String, String>> =
403 self.skill_env.read().clone();
404 let (mut envelope, out) = execute_bash(
405 block,
406 self.timeout,
407 self.tool_event_tx.as_ref(),
408 self.cancel_token.as_ref(),
409 skill_env_snapshot.as_ref(),
410 &self.env_blocklist,
411 )
412 .await;
413 let exit_code = envelope.exit_code;
414 if exit_code == 130
415 && self
416 .cancel_token
417 .as_ref()
418 .is_some_and(CancellationToken::is_cancelled)
419 {
420 return Err(ToolError::Cancelled);
421 }
422 #[allow(clippy::cast_possible_truncation)]
423 let duration_ms = start.elapsed().as_millis() as u64;
424
425 if let Some(snap) = snapshot {
427 let should_rollback = self.auto_rollback
428 && if self.auto_rollback_exit_codes.is_empty() {
429 exit_code >= 2
430 } else {
431 self.auto_rollback_exit_codes.contains(&exit_code)
432 };
433 if should_rollback {
434 match snap.rollback() {
435 Ok(report) => {
436 tracing::info!(
437 restored = report.restored_count,
438 deleted = report.deleted_count,
439 "transaction rollback completed"
440 );
441 self.log_audit(
442 block,
443 AuditResult::Rollback {
444 restored: report.restored_count,
445 deleted: report.deleted_count,
446 },
447 duration_ms,
448 None,
449 Some(exit_code),
450 false,
451 )
452 .await;
453 if let Some(ref tx) = self.tool_event_tx {
454 let _ = tx.send(ToolEvent::Rollback {
455 tool_name: ToolName::new("bash"),
456 command: block.to_owned(),
457 restored_count: report.restored_count,
458 deleted_count: report.deleted_count,
459 });
460 }
461 }
462 Err(e) => {
463 tracing::error!(err = %e, "transaction rollback failed");
464 }
465 }
466 }
467 }
469
470 let is_timeout = out.contains("[error] command timed out");
471 let audit_result = if is_timeout {
472 AuditResult::Timeout
473 } else if out.contains("[error]") || out.contains("[stderr]") {
474 AuditResult::Error {
475 message: out.clone(),
476 }
477 } else {
478 AuditResult::Success
479 };
480 if is_timeout {
481 self.log_audit(
482 block,
483 audit_result,
484 duration_ms,
485 None,
486 Some(exit_code),
487 false,
488 )
489 .await;
490 self.emit_completed(block, &out, false, None);
491 return Err(ToolError::Timeout {
492 timeout_secs: self.timeout.as_secs(),
493 });
494 }
495
496 if let Some(category) = classify_shell_exit(exit_code, &out) {
497 self.emit_completed(block, &out, false, None);
498 return Err(ToolError::Shell {
499 exit_code,
500 category,
501 message: out.lines().take(3).collect::<Vec<_>>().join("; "),
502 });
503 }
504
505 let sanitized = sanitize_output(&out);
506 let mut per_block_stats: Option<FilterStats> = None;
507 let filtered = if let Some(ref registry) = self.output_filter_registry {
508 match registry.apply(block, &sanitized, exit_code) {
509 Some(fr) => {
510 tracing::debug!(
511 command = block,
512 raw = fr.raw_chars,
513 filtered = fr.filtered_chars,
514 savings_pct = fr.savings_pct(),
515 "output filter applied"
516 );
517 per_block_stats = Some(FilterStats {
518 raw_chars: fr.raw_chars,
519 filtered_chars: fr.filtered_chars,
520 raw_lines: fr.raw_lines,
521 filtered_lines: fr.filtered_lines,
522 confidence: Some(fr.confidence),
523 command: Some(block.to_owned()),
524 kept_lines: fr.kept_lines.clone(),
525 });
526 fr.output
527 }
528 None => sanitized,
529 }
530 } else {
531 sanitized
532 };
533
534 self.emit_completed(
535 block,
536 &out,
537 !out.contains("[error]"),
538 per_block_stats.clone(),
539 );
540
541 envelope.truncated = filtered.len() < out.len();
543
544 self.log_audit(
545 block,
546 audit_result,
547 duration_ms,
548 None,
549 Some(exit_code),
550 envelope.truncated,
551 )
552 .await;
553
554 let output_line = if let Some(warn) = snapshot_warning {
555 format!("{warn}\n$ {block}\n{filtered}")
556 } else {
557 format!("$ {block}\n{filtered}")
558 };
559 Ok((output_line, per_block_stats, envelope))
560 }
561
562 fn emit_completed(
563 &self,
564 command: &str,
565 output: &str,
566 success: bool,
567 filter_stats: Option<FilterStats>,
568 ) {
569 if let Some(ref tx) = self.tool_event_tx {
570 let _ = tx.send(ToolEvent::Completed {
571 tool_name: ToolName::new("bash"),
572 command: command.to_owned(),
573 output: output.to_owned(),
574 success,
575 filter_stats,
576 diff: None,
577 });
578 }
579 }
580
581 async fn check_permissions(&self, block: &str, skip_confirm: bool) -> Result<(), ToolError> {
583 if let Some(blocked) = self.find_blocked_command(block) {
586 let err = ToolError::Blocked {
587 command: blocked.to_owned(),
588 };
589 self.log_audit(
590 block,
591 AuditResult::Blocked {
592 reason: format!("blocked command: {blocked}"),
593 },
594 0,
595 Some(&err),
596 None,
597 false,
598 )
599 .await;
600 return Err(err);
601 }
602
603 if let Some(ref policy) = self.permission_policy {
604 match policy.check("bash", block) {
605 PermissionAction::Deny => {
606 let err = ToolError::Blocked {
607 command: block.to_owned(),
608 };
609 self.log_audit(
610 block,
611 AuditResult::Blocked {
612 reason: "denied by permission policy".to_owned(),
613 },
614 0,
615 Some(&err),
616 None,
617 false,
618 )
619 .await;
620 return Err(err);
621 }
622 PermissionAction::Ask if !skip_confirm => {
623 return Err(ToolError::ConfirmationRequired {
624 command: block.to_owned(),
625 });
626 }
627 _ => {}
628 }
629 } else if !skip_confirm && let Some(pattern) = self.find_confirm_command(block) {
630 return Err(ToolError::ConfirmationRequired {
631 command: pattern.to_owned(),
632 });
633 }
634
635 Ok(())
636 }
637
638 fn validate_sandbox(&self, code: &str) -> Result<(), ToolError> {
639 let cwd = std::env::current_dir().unwrap_or_default();
640
641 for token in extract_paths(code) {
642 if has_traversal(&token) {
643 return Err(ToolError::SandboxViolation { path: token });
644 }
645
646 let path = if token.starts_with('/') {
647 PathBuf::from(&token)
648 } else {
649 cwd.join(&token)
650 };
651 let canonical = path
652 .canonicalize()
653 .or_else(|_| std::path::absolute(&path))
654 .unwrap_or(path);
655 if !self
656 .allowed_paths
657 .iter()
658 .any(|allowed| canonical.starts_with(allowed))
659 {
660 return Err(ToolError::SandboxViolation {
661 path: canonical.display().to_string(),
662 });
663 }
664 }
665 Ok(())
666 }
667
668 fn find_blocked_command(&self, code: &str) -> Option<&str> {
703 let cleaned = strip_shell_escapes(&code.to_lowercase());
704 let commands = tokenize_commands(&cleaned);
705 for blocked in &self.blocked_commands {
706 for cmd_tokens in &commands {
707 if tokens_match_pattern(cmd_tokens, blocked) {
708 return Some(blocked.as_str());
709 }
710 }
711 }
712 for inner in extract_subshell_contents(&cleaned) {
714 let inner_commands = tokenize_commands(&inner);
715 for blocked in &self.blocked_commands {
716 for cmd_tokens in &inner_commands {
717 if tokens_match_pattern(cmd_tokens, blocked) {
718 return Some(blocked.as_str());
719 }
720 }
721 }
722 }
723 None
724 }
725
726 fn find_confirm_command(&self, code: &str) -> Option<&str> {
727 let normalized = code.to_lowercase();
728 for pattern in &self.confirm_patterns {
729 if normalized.contains(pattern.as_str()) {
730 return Some(pattern.as_str());
731 }
732 }
733 None
734 }
735
736 async fn log_audit(
737 &self,
738 command: &str,
739 result: AuditResult,
740 duration_ms: u64,
741 error: Option<&ToolError>,
742 exit_code: Option<i32>,
743 truncated: bool,
744 ) {
745 if let Some(ref logger) = self.audit_logger {
746 let (error_category, error_domain, error_phase) =
747 error.map_or((None, None, None), |e| {
748 let cat = e.category();
749 (
750 Some(cat.label().to_owned()),
751 Some(cat.domain().label().to_owned()),
752 Some(cat.phase().label().to_owned()),
753 )
754 });
755 let entry = AuditEntry {
756 timestamp: chrono_now(),
757 tool: "shell".into(),
758 command: command.into(),
759 result,
760 duration_ms,
761 error_category,
762 error_domain,
763 error_phase,
764 claim_source: Some(ClaimSource::Shell),
765 mcp_server_id: None,
766 injection_flagged: false,
767 embedding_anomalous: false,
768 cross_boundary_mcp_to_acp: false,
769 adversarial_policy_decision: None,
770 exit_code,
771 truncated,
772 caller_id: None,
773 policy_match: None,
774 };
775 logger.log(&entry).await;
776 }
777 }
778}
779
780impl ToolExecutor for ShellExecutor {
781 async fn execute(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
782 self.execute_inner(response, false).await
783 }
784
785 fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
786 use crate::registry::{InvocationHint, ToolDef};
787 vec![ToolDef {
788 id: "bash".into(),
789 description: "Execute a shell command and return stdout/stderr.\n\nParameters: command (string, required) - shell command to run\nReturns: stdout and stderr combined, prefixed with exit code\nErrors: Blocked if command matches security policy; Timeout after configured seconds; SandboxViolation if path outside allowed dirs\nExample: {\"command\": \"ls -la /tmp\"}".into(),
790 schema: schemars::schema_for!(BashParams),
791 invocation: InvocationHint::FencedBlock("bash"),
792 }]
793 }
794
795 async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
796 if call.tool_id != "bash" {
797 return Ok(None);
798 }
799 let params: BashParams = crate::executor::deserialize_params(&call.params)?;
800 if params.command.is_empty() {
801 return Ok(None);
802 }
803 let command = ¶ms.command;
804 let synthetic = format!("```bash\n{command}\n```");
806 self.execute_inner(&synthetic, false).await
807 }
808
809 fn set_skill_env(&self, env: Option<std::collections::HashMap<String, String>>) {
810 ShellExecutor::set_skill_env(self, env);
811 }
812}
813
814pub(crate) fn strip_shell_escapes(input: &str) -> String {
818 let mut out = String::with_capacity(input.len());
819 let bytes = input.as_bytes();
820 let mut i = 0;
821 while i < bytes.len() {
822 if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'\'' {
824 let mut j = i + 2; let mut decoded = String::new();
826 let mut valid = false;
827 while j < bytes.len() && bytes[j] != b'\'' {
828 if bytes[j] == b'\\' && j + 1 < bytes.len() {
829 let next = bytes[j + 1];
830 if next == b'x' && j + 3 < bytes.len() {
831 let hi = (bytes[j + 2] as char).to_digit(16);
833 let lo = (bytes[j + 3] as char).to_digit(16);
834 if let (Some(h), Some(l)) = (hi, lo) {
835 #[allow(clippy::cast_possible_truncation)]
836 let byte = ((h << 4) | l) as u8;
837 decoded.push(byte as char);
838 j += 4;
839 valid = true;
840 continue;
841 }
842 } else if next.is_ascii_digit() {
843 let mut val = u32::from(next - b'0');
845 let mut len = 2; if j + 2 < bytes.len() && bytes[j + 2].is_ascii_digit() {
847 val = val * 8 + u32::from(bytes[j + 2] - b'0');
848 len = 3;
849 if j + 3 < bytes.len() && bytes[j + 3].is_ascii_digit() {
850 val = val * 8 + u32::from(bytes[j + 3] - b'0');
851 len = 4;
852 }
853 }
854 #[allow(clippy::cast_possible_truncation)]
855 decoded.push((val & 0xFF) as u8 as char);
856 j += len;
857 valid = true;
858 continue;
859 }
860 decoded.push(next as char);
862 j += 2;
863 } else {
864 decoded.push(bytes[j] as char);
865 j += 1;
866 }
867 }
868 if j < bytes.len() && bytes[j] == b'\'' && valid {
869 out.push_str(&decoded);
870 i = j + 1;
871 continue;
872 }
873 }
875 if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
877 i += 2;
878 continue;
879 }
880 if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] != b'\n' {
882 i += 1;
883 out.push(bytes[i] as char);
884 i += 1;
885 continue;
886 }
887 if bytes[i] == b'"' || bytes[i] == b'\'' {
889 let quote = bytes[i];
890 i += 1;
891 while i < bytes.len() && bytes[i] != quote {
892 out.push(bytes[i] as char);
893 i += 1;
894 }
895 if i < bytes.len() {
896 i += 1; }
898 continue;
899 }
900 out.push(bytes[i] as char);
901 i += 1;
902 }
903 out
904}
905
906pub(crate) fn extract_subshell_contents(s: &str) -> Vec<String> {
916 let mut results = Vec::new();
917 let chars: Vec<char> = s.chars().collect();
918 let len = chars.len();
919 let mut i = 0;
920
921 while i < len {
922 if chars[i] == '`' {
924 let start = i + 1;
925 let mut j = start;
926 while j < len && chars[j] != '`' {
927 j += 1;
928 }
929 if j < len {
930 results.push(chars[start..j].iter().collect());
931 }
932 i = j + 1;
933 continue;
934 }
935
936 let next_is_open_paren = i + 1 < len && chars[i + 1] == '(';
938 let is_paren_subshell = next_is_open_paren && matches!(chars[i], '$' | '<' | '>');
939
940 if is_paren_subshell {
941 let start = i + 2;
942 let mut depth: usize = 1;
943 let mut j = start;
944 while j < len && depth > 0 {
945 match chars[j] {
946 '(' => depth += 1,
947 ')' => depth -= 1,
948 _ => {}
949 }
950 if depth > 0 {
951 j += 1;
952 } else {
953 break;
954 }
955 }
956 if depth == 0 {
957 results.push(chars[start..j].iter().collect());
958 }
959 i = j + 1;
960 continue;
961 }
962
963 i += 1;
964 }
965
966 results
967}
968
969pub(crate) fn tokenize_commands(normalized: &str) -> Vec<Vec<String>> {
972 let replaced = normalized.replace("||", "\n").replace("&&", "\n");
974 replaced
975 .split([';', '|', '\n'])
976 .map(|seg| {
977 seg.split_whitespace()
978 .map(str::to_owned)
979 .collect::<Vec<String>>()
980 })
981 .filter(|tokens| !tokens.is_empty())
982 .collect()
983}
984
985const TRANSPARENT_PREFIXES: &[&str] = &["env", "command", "exec", "nice", "nohup", "time", "xargs"];
988
989fn cmd_basename(tok: &str) -> &str {
991 tok.rsplit('/').next().unwrap_or(tok)
992}
993
994pub(crate) fn tokens_match_pattern(tokens: &[String], pattern: &str) -> bool {
1001 if tokens.is_empty() || pattern.is_empty() {
1002 return false;
1003 }
1004 let pattern = pattern.trim();
1005 let pattern_tokens: Vec<&str> = pattern.split_whitespace().collect();
1006 if pattern_tokens.is_empty() {
1007 return false;
1008 }
1009
1010 let start = tokens
1012 .iter()
1013 .position(|t| !TRANSPARENT_PREFIXES.contains(&cmd_basename(t)))
1014 .unwrap_or(0);
1015 let effective = &tokens[start..];
1016 if effective.is_empty() {
1017 return false;
1018 }
1019
1020 if pattern_tokens.len() == 1 {
1021 let pat = pattern_tokens[0];
1022 let base = cmd_basename(&effective[0]);
1023 base == pat || base.starts_with(&format!("{pat}."))
1025 } else {
1026 let n = pattern_tokens.len().min(effective.len());
1028 let mut parts: Vec<&str> = vec![cmd_basename(&effective[0])];
1029 parts.extend(effective[1..n].iter().map(String::as_str));
1030 let joined = parts.join(" ");
1031 if joined.starts_with(pattern) {
1032 return true;
1033 }
1034 if effective.len() > n {
1035 let mut parts2: Vec<&str> = vec![cmd_basename(&effective[0])];
1036 parts2.extend(effective[1..=n].iter().map(String::as_str));
1037 parts2.join(" ").starts_with(pattern)
1038 } else {
1039 false
1040 }
1041 }
1042}
1043
1044fn extract_paths(code: &str) -> Vec<String> {
1045 let mut result = Vec::new();
1046
1047 let mut tokens: Vec<String> = Vec::new();
1049 let mut current = String::new();
1050 let mut chars = code.chars().peekable();
1051 while let Some(c) = chars.next() {
1052 match c {
1053 '"' | '\'' => {
1054 let quote = c;
1055 while let Some(&nc) = chars.peek() {
1056 if nc == quote {
1057 chars.next();
1058 break;
1059 }
1060 current.push(chars.next().unwrap());
1061 }
1062 }
1063 c if c.is_whitespace() || matches!(c, ';' | '|' | '&') => {
1064 if !current.is_empty() {
1065 tokens.push(std::mem::take(&mut current));
1066 }
1067 }
1068 _ => current.push(c),
1069 }
1070 }
1071 if !current.is_empty() {
1072 tokens.push(current);
1073 }
1074
1075 for token in tokens {
1076 let trimmed = token.trim_end_matches([';', '&', '|']).to_owned();
1077 if trimmed.is_empty() {
1078 continue;
1079 }
1080 if trimmed.starts_with('/')
1081 || trimmed.starts_with("./")
1082 || trimmed.starts_with("../")
1083 || trimmed == ".."
1084 || (trimmed.starts_with('.') && trimmed.contains('/'))
1085 || is_relative_path_token(&trimmed)
1086 {
1087 result.push(trimmed);
1088 }
1089 }
1090 result
1091}
1092
1093fn is_relative_path_token(token: &str) -> bool {
1100 if !token.contains('/') || token.starts_with('/') || token.starts_with('.') {
1102 return false;
1103 }
1104 if token.contains("://") {
1106 return false;
1107 }
1108 if let Some(eq_pos) = token.find('=') {
1110 let key = &token[..eq_pos];
1111 if key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
1112 return false;
1113 }
1114 }
1115 token
1117 .chars()
1118 .next()
1119 .is_some_and(|c| c.is_ascii_alphanumeric() || c == '_')
1120}
1121
1122fn classify_shell_exit(
1128 exit_code: i32,
1129 output: &str,
1130) -> Option<crate::error_taxonomy::ToolErrorCategory> {
1131 use crate::error_taxonomy::ToolErrorCategory;
1132 match exit_code {
1133 126 => Some(ToolErrorCategory::PolicyBlocked),
1135 127 => Some(ToolErrorCategory::PermanentFailure),
1137 _ => {
1138 let lower = output.to_lowercase();
1139 if lower.contains("permission denied") {
1140 Some(ToolErrorCategory::PolicyBlocked)
1141 } else if lower.contains("no such file or directory") {
1142 Some(ToolErrorCategory::PermanentFailure)
1143 } else {
1144 None
1145 }
1146 }
1147 }
1148}
1149
1150fn has_traversal(path: &str) -> bool {
1151 path.split('/').any(|seg| seg == "..")
1152}
1153
1154fn extract_bash_blocks(text: &str) -> Vec<&str> {
1155 crate::executor::extract_fenced_blocks(text, "bash")
1156}
1157
1158async fn kill_process_tree(child: &mut tokio::process::Child) {
1162 #[cfg(unix)]
1163 if let Some(pid) = child.id() {
1164 let _ = Command::new("pkill")
1165 .args(["-KILL", "-P", &pid.to_string()])
1166 .status()
1167 .await;
1168 }
1169 let _ = child.kill().await;
1170}
1171
1172#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1177pub struct ShellOutputEnvelope {
1178 pub stdout: String,
1180 pub stderr: String,
1182 pub exit_code: i32,
1184 pub truncated: bool,
1186}
1187
1188#[allow(clippy::too_many_lines)]
1189async fn execute_bash(
1190 code: &str,
1191 timeout: Duration,
1192 event_tx: Option<&ToolEventTx>,
1193 cancel_token: Option<&CancellationToken>,
1194 extra_env: Option<&std::collections::HashMap<String, String>>,
1195 env_blocklist: &[String],
1196) -> (ShellOutputEnvelope, String) {
1197 use std::process::Stdio;
1198 use tokio::io::{AsyncBufReadExt, BufReader};
1199
1200 let timeout_secs = timeout.as_secs();
1201
1202 let mut cmd = Command::new("bash");
1203 cmd.arg("-c")
1204 .arg(code)
1205 .stdout(Stdio::piped())
1206 .stderr(Stdio::piped());
1207
1208 for (key, _) in std::env::vars() {
1209 if env_blocklist
1210 .iter()
1211 .any(|prefix| key.starts_with(prefix.as_str()))
1212 {
1213 cmd.env_remove(&key);
1214 }
1215 }
1216
1217 if let Some(env) = extra_env {
1218 cmd.envs(env);
1219 }
1220 let child_result = cmd.spawn();
1221
1222 let mut child = match child_result {
1223 Ok(c) => c,
1224 Err(e) => {
1225 let msg = format!("[error] {e}");
1226 return (
1227 ShellOutputEnvelope {
1228 stdout: String::new(),
1229 stderr: msg.clone(),
1230 exit_code: 1,
1231 truncated: false,
1232 },
1233 msg,
1234 );
1235 }
1236 };
1237
1238 let stdout = child.stdout.take().expect("stdout piped");
1239 let stderr = child.stderr.take().expect("stderr piped");
1240
1241 let (line_tx, mut line_rx) = tokio::sync::mpsc::channel::<(bool, String)>(64);
1244
1245 let stdout_tx = line_tx.clone();
1246 tokio::spawn(async move {
1247 let mut reader = BufReader::new(stdout);
1248 let mut buf = String::new();
1249 while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
1250 let _ = stdout_tx.send((false, buf.clone())).await;
1251 buf.clear();
1252 }
1253 });
1254
1255 tokio::spawn(async move {
1256 let mut reader = BufReader::new(stderr);
1257 let mut buf = String::new();
1258 while reader.read_line(&mut buf).await.unwrap_or(0) > 0 {
1259 let _ = line_tx.send((true, buf.clone())).await;
1260 buf.clear();
1261 }
1262 });
1263
1264 let mut combined = String::new();
1265 let mut stdout_buf = String::new();
1266 let mut stderr_buf = String::new();
1267 let deadline = tokio::time::Instant::now() + timeout;
1268
1269 loop {
1270 tokio::select! {
1271 line = line_rx.recv() => {
1272 match line {
1273 Some((is_stderr, chunk)) => {
1274 let interleaved = if is_stderr {
1275 format!("[stderr] {chunk}")
1276 } else {
1277 chunk.clone()
1278 };
1279 if let Some(tx) = event_tx {
1280 let _ = tx.send(ToolEvent::OutputChunk {
1281 tool_name: ToolName::new("bash"),
1282 command: code.to_owned(),
1283 chunk: interleaved.clone(),
1284 });
1285 }
1286 combined.push_str(&interleaved);
1287 if is_stderr {
1288 stderr_buf.push_str(&chunk);
1289 } else {
1290 stdout_buf.push_str(&chunk);
1291 }
1292 }
1293 None => break,
1294 }
1295 }
1296 () = tokio::time::sleep_until(deadline) => {
1297 kill_process_tree(&mut child).await;
1298 let msg = format!("[error] command timed out after {timeout_secs}s");
1299 return (
1300 ShellOutputEnvelope {
1301 stdout: stdout_buf,
1302 stderr: format!("{stderr_buf}command timed out after {timeout_secs}s"),
1303 exit_code: 1,
1304 truncated: false,
1305 },
1306 msg,
1307 );
1308 }
1309 () = async {
1310 match cancel_token {
1311 Some(t) => t.cancelled().await,
1312 None => std::future::pending().await,
1313 }
1314 } => {
1315 kill_process_tree(&mut child).await;
1316 return (
1317 ShellOutputEnvelope {
1318 stdout: stdout_buf,
1319 stderr: format!("{stderr_buf}operation aborted"),
1320 exit_code: 130,
1321 truncated: false,
1322 },
1323 "[cancelled] operation aborted".to_string(),
1324 );
1325 }
1326 }
1327 }
1328
1329 let status = child.wait().await;
1330 let exit_code = status.ok().and_then(|s| s.code()).unwrap_or(1);
1331
1332 let (envelope, combined) = if combined.is_empty() {
1333 (
1334 ShellOutputEnvelope {
1335 stdout: String::new(),
1336 stderr: String::new(),
1337 exit_code,
1338 truncated: false,
1339 },
1340 "(no output)".to_string(),
1341 )
1342 } else {
1343 (
1344 ShellOutputEnvelope {
1345 stdout: stdout_buf.trim_end().to_owned(),
1346 stderr: stderr_buf.trim_end().to_owned(),
1347 exit_code,
1348 truncated: false,
1349 },
1350 combined,
1351 )
1352 };
1353 (envelope, combined)
1354}
1355
1356#[cfg(test)]
1357mod tests;