Skip to main content

brainwires_tool_runtime/
sandbox_executor.rs

1//! Sandboxed tool executor decorator.
2//!
3//! Wraps any [`ToolExecutor`] and intercepts calls to known-dangerous tool
4//! names (`bash` / `execute_command` / `code_exec` / `execute_code`), running
5//! them inside a [`brainwires_sandbox::Sandbox`] instead of on the host. All
6//! other tool calls pass through unchanged.
7//!
8//! Sandbox errors (timeout, policy violation, docker failures) are always
9//! returned as [`ToolResult::error`] so the agent loop treats them as
10//! ordinary tool results rather than hard errors that abort the run.
11
12use std::collections::BTreeMap;
13use std::path::PathBuf;
14use std::sync::Arc;
15use std::time::Duration;
16
17use anyhow::Result;
18use async_trait::async_trait;
19use tracing::debug;
20
21use brainwires_core::{Tool, ToolContext, ToolResult, ToolUse};
22use brainwires_sandbox::{ExecSpec, Sandbox, SandboxError, SandboxPolicy};
23
24use crate::executor::ToolExecutor;
25
26/// Fallback workdir used when neither the policy's `workspace_mount` nor the
27/// [`ToolContext::working_directory`] yields a usable path.
28const DEFAULT_WORKDIR: &str = "/workspace";
29
30/// Decorator that routes dangerous tool calls (`bash`, `execute_command`,
31/// `code_exec`, `execute_code`) through a [`Sandbox`] and forwards everything
32/// else to `inner`.
33pub struct SandboxedToolExecutor<E: ToolExecutor> {
34    inner: E,
35    sandbox: Arc<dyn Sandbox>,
36    policy: SandboxPolicy,
37    default_timeout: Duration,
38}
39
40impl<E: ToolExecutor> SandboxedToolExecutor<E> {
41    /// Wrap `inner` so dangerous calls are routed through `sandbox` under
42    /// `policy`. Defaults to a 5-minute wall-clock timeout per sandboxed call.
43    pub fn new(inner: E, sandbox: Arc<dyn Sandbox>, policy: SandboxPolicy) -> Self {
44        Self {
45            inner,
46            sandbox,
47            policy,
48            default_timeout: Duration::from_secs(300),
49        }
50    }
51
52    /// Override the per-call wall-clock timeout used for sandboxed commands.
53    pub fn with_timeout(mut self, timeout: Duration) -> Self {
54        self.default_timeout = timeout;
55        self
56    }
57
58    /// Borrow the wrapped executor.
59    pub fn inner(&self) -> &E {
60        &self.inner
61    }
62
63    /// Borrow the active sandbox policy.
64    pub fn policy(&self) -> &SandboxPolicy {
65        &self.policy
66    }
67
68    /// Resolve the workdir to use inside the sandbox.
69    ///
70    /// Preference order:
71    /// 1. `policy.workspace_mount` (keeps the sandbox pinned inside a known
72    ///    mount so the process can't land on a host path that doesn't exist
73    ///    inside the container).
74    /// 2. `ToolContext::working_directory` (the agent's cwd).
75    /// 3. `/workspace` (final fallback).
76    fn workdir_for(&self, context: &ToolContext) -> PathBuf {
77        if let Some(ref mount) = self.policy.workspace_mount {
78            return mount.clone();
79        }
80        if !context.working_directory.is_empty() {
81            return PathBuf::from(&context.working_directory);
82        }
83        PathBuf::from(DEFAULT_WORKDIR)
84    }
85
86    async fn run_in_sandbox(
87        &self,
88        tool_use_id: &str,
89        tool_name: &str,
90        cmd: Vec<String>,
91        workdir: PathBuf,
92    ) -> ToolResult {
93        // Host env is intentionally NOT inherited — any secret leakage here
94        // would defeat the isolation the caller is paying for.
95        let spec = ExecSpec {
96            cmd,
97            env: BTreeMap::new(),
98            workdir,
99            stdin: None,
100            mounts: vec![],
101            timeout: self.default_timeout,
102        };
103
104        let handle = match self.sandbox.spawn(spec).await {
105            Ok(h) => h,
106            Err(e) => return sandbox_error_to_result(tool_use_id, e, self.default_timeout),
107        };
108
109        match self.sandbox.wait(handle).await {
110            Ok(output) => {
111                debug!(
112                    tool = tool_name,
113                    exit_code = output.exit_code,
114                    wall_time_ms = output.wall_time.as_millis() as u64,
115                    "sandboxed tool call completed"
116                );
117                let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
118                if output.exit_code == 0 {
119                    ToolResult::success(tool_use_id.to_string(), stdout)
120                } else {
121                    let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
122                    ToolResult::error(
123                        tool_use_id.to_string(),
124                        format!("exit {}: {}", output.exit_code, stderr),
125                    )
126                }
127            }
128            Err(e) => sandbox_error_to_result(tool_use_id, e, self.default_timeout),
129        }
130    }
131
132    async fn run_bash(&self, tool_use: &ToolUse, context: &ToolContext) -> ToolResult {
133        let Some(command) = tool_use.input.get("command").and_then(|v| v.as_str()) else {
134            return ToolResult::error(
135                tool_use.id.clone(),
136                "sandbox: missing or non-string 'command' parameter".to_string(),
137            );
138        };
139        let cmd = vec!["/bin/sh".to_string(), "-c".to_string(), command.to_string()];
140        self.run_in_sandbox(&tool_use.id, &tool_use.name, cmd, self.workdir_for(context))
141            .await
142    }
143
144    async fn run_code_exec(&self, tool_use: &ToolUse, context: &ToolContext) -> ToolResult {
145        let Some(language) = tool_use.input.get("language").and_then(|v| v.as_str()) else {
146            return ToolResult::error(
147                tool_use.id.clone(),
148                "sandbox: missing or non-string 'language' parameter".to_string(),
149            );
150        };
151        let Some(code) = tool_use.input.get("code").and_then(|v| v.as_str()) else {
152            return ToolResult::error(
153                tool_use.id.clone(),
154                "sandbox: missing or non-string 'code' parameter".to_string(),
155            );
156        };
157
158        let lang = language.to_lowercase();
159        let cmd = match lang.as_str() {
160            "python" | "python3" => {
161                vec!["python3".to_string(), "-c".to_string(), code.to_string()]
162            }
163            "node" | "javascript" | "js" => {
164                vec!["node".to_string(), "-e".to_string(), code.to_string()]
165            }
166            "bash" | "sh" | "shell" => {
167                vec!["/bin/sh".to_string(), "-c".to_string(), code.to_string()]
168            }
169            other => {
170                return ToolResult::error(
171                    tool_use.id.clone(),
172                    format!("sandbox does not yet support language '{other}'"),
173                );
174            }
175        };
176
177        self.run_in_sandbox(&tool_use.id, &tool_use.name, cmd, self.workdir_for(context))
178            .await
179    }
180}
181
182#[async_trait]
183impl<E: ToolExecutor> ToolExecutor for SandboxedToolExecutor<E> {
184    async fn execute(&self, tool_use: &ToolUse, context: &ToolContext) -> Result<ToolResult> {
185        match tool_use.name.as_str() {
186            "bash" | "execute_command" => Ok(self.run_bash(tool_use, context).await),
187            "code_exec" | "execute_code" => Ok(self.run_code_exec(tool_use, context).await),
188            _ => self.inner.execute(tool_use, context).await,
189        }
190    }
191
192    fn available_tools(&self) -> Vec<Tool> {
193        self.inner.available_tools()
194    }
195}
196
197fn sandbox_error_to_result(tool_use_id: &str, err: SandboxError, timeout: Duration) -> ToolResult {
198    let msg = match err {
199        SandboxError::Timeout => format!("sandboxed command timed out after {:?}", timeout),
200        SandboxError::PolicyViolation(reason) => format!("policy violation: {reason}"),
201        other => format!("sandbox error: {other}"),
202    };
203    ToolResult::error(tool_use_id.to_string(), msg)
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    use async_trait::async_trait;
210    use serde_json::json;
211    use std::sync::Mutex;
212    use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
213
214    use brainwires_core::{ToolContext, ToolUse};
215    use brainwires_sandbox::{ExecHandle, ExecOutput, ExecSpec, Sandbox, SandboxRuntime};
216
217    struct MockSandbox {
218        exit_code: i32,
219        stdout: Vec<u8>,
220        stderr: Vec<u8>,
221        should_timeout: AtomicBool,
222        spawned_specs: Mutex<Vec<ExecSpec>>,
223    }
224
225    impl MockSandbox {
226        fn new(exit_code: i32, stdout: &[u8], stderr: &[u8]) -> Arc<Self> {
227            Arc::new(Self {
228                exit_code,
229                stdout: stdout.to_vec(),
230                stderr: stderr.to_vec(),
231                should_timeout: AtomicBool::new(false),
232                spawned_specs: Mutex::new(Vec::new()),
233            })
234        }
235
236        fn timing_out() -> Arc<Self> {
237            Arc::new(Self {
238                exit_code: 0,
239                stdout: Vec::new(),
240                stderr: Vec::new(),
241                should_timeout: AtomicBool::new(true),
242                spawned_specs: Mutex::new(Vec::new()),
243            })
244        }
245
246        fn specs(&self) -> Vec<ExecSpec> {
247            self.spawned_specs.lock().unwrap().clone()
248        }
249    }
250
251    #[async_trait]
252    impl Sandbox for MockSandbox {
253        async fn spawn(&self, spec: ExecSpec) -> brainwires_sandbox::Result<ExecHandle> {
254            self.spawned_specs.lock().unwrap().push(spec);
255            Ok(ExecHandle::new())
256        }
257
258        async fn wait(&self, _handle: ExecHandle) -> brainwires_sandbox::Result<ExecOutput> {
259            if self.should_timeout.load(Ordering::SeqCst) {
260                return Err(SandboxError::Timeout);
261            }
262            Ok(ExecOutput {
263                exit_code: self.exit_code,
264                stdout: self.stdout.clone(),
265                stderr: self.stderr.clone(),
266                wall_time: Duration::from_millis(1),
267            })
268        }
269
270        async fn shutdown(&self) -> brainwires_sandbox::Result<()> {
271            Ok(())
272        }
273
274        fn runtime(&self) -> SandboxRuntime {
275            SandboxRuntime::Host
276        }
277    }
278
279    struct CountingInner {
280        calls: AtomicUsize,
281    }
282
283    impl CountingInner {
284        fn new() -> Self {
285            Self {
286                calls: AtomicUsize::new(0),
287            }
288        }
289
290        fn call_count(&self) -> usize {
291            self.calls.load(Ordering::SeqCst)
292        }
293    }
294
295    #[async_trait]
296    impl ToolExecutor for CountingInner {
297        async fn execute(&self, tool_use: &ToolUse, _ctx: &ToolContext) -> Result<ToolResult> {
298            self.calls.fetch_add(1, Ordering::SeqCst);
299            Ok(ToolResult::success(
300                tool_use.id.clone(),
301                "inner-executed".to_string(),
302            ))
303        }
304
305        fn available_tools(&self) -> Vec<Tool> {
306            Vec::new()
307        }
308    }
309
310    fn ctx() -> ToolContext {
311        ToolContext {
312            working_directory: "/tmp".to_string(),
313            ..Default::default()
314        }
315    }
316
317    #[tokio::test]
318    async fn bash_is_routed_through_sandbox_and_inner_is_not_called() {
319        let sandbox = MockSandbox::new(0, b"hello from sandbox\n", b"");
320        let exec = SandboxedToolExecutor::new(
321            CountingInner::new(),
322            sandbox.clone() as Arc<dyn Sandbox>,
323            SandboxPolicy::default(),
324        );
325
326        let tool_use = ToolUse {
327            id: "t-1".to_string(),
328            name: "bash".to_string(),
329            input: json!({ "command": "echo hello" }),
330        };
331
332        let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
333        assert!(!result.is_error, "unexpected error: {}", result.content);
334        assert!(result.content.contains("hello from sandbox"));
335        assert_eq!(
336            exec.inner().call_count(),
337            0,
338            "inner executor must not be called for bash"
339        );
340
341        let specs = sandbox.specs();
342        assert_eq!(specs.len(), 1);
343        assert_eq!(
344            specs[0].cmd,
345            vec![
346                "/bin/sh".to_string(),
347                "-c".to_string(),
348                "echo hello".to_string()
349            ]
350        );
351        assert!(specs[0].env.is_empty(), "host env must not leak");
352    }
353
354    #[tokio::test]
355    async fn non_dangerous_tool_delegates_to_inner_executor() {
356        let sandbox = MockSandbox::new(0, b"should not appear", b"");
357        let exec = SandboxedToolExecutor::new(
358            CountingInner::new(),
359            sandbox as Arc<dyn Sandbox>,
360            SandboxPolicy::default(),
361        );
362
363        let tool_use = ToolUse {
364            id: "t-2".to_string(),
365            name: "read_file".to_string(),
366            input: json!({ "path": "/etc/hosts" }),
367        };
368
369        let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
370        assert!(!result.is_error);
371        assert_eq!(result.content, "inner-executed");
372        assert_eq!(exec.inner().call_count(), 1);
373    }
374
375    #[tokio::test]
376    async fn non_zero_exit_becomes_error_result_with_exit_code() {
377        let sandbox = MockSandbox::new(42, b"", b"boom");
378        let exec = SandboxedToolExecutor::new(
379            CountingInner::new(),
380            sandbox as Arc<dyn Sandbox>,
381            SandboxPolicy::default(),
382        );
383
384        let tool_use = ToolUse {
385            id: "t-3".to_string(),
386            name: "execute_command".to_string(),
387            input: json!({ "command": "false" }),
388        };
389
390        let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
391        assert!(result.is_error);
392        assert!(
393            result.content.contains("exit 42"),
394            "content was: {}",
395            result.content
396        );
397        assert!(result.content.contains("boom"));
398    }
399
400    #[tokio::test]
401    async fn timeout_becomes_error_result_containing_timed_out() {
402        let sandbox = MockSandbox::timing_out();
403        let exec = SandboxedToolExecutor::new(
404            CountingInner::new(),
405            sandbox as Arc<dyn Sandbox>,
406            SandboxPolicy::default(),
407        )
408        .with_timeout(Duration::from_millis(5));
409
410        let tool_use = ToolUse {
411            id: "t-4".to_string(),
412            name: "bash".to_string(),
413            input: json!({ "command": "sleep 999" }),
414        };
415
416        let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
417        assert!(result.is_error);
418        assert!(
419            result.content.contains("timed out"),
420            "content was: {}",
421            result.content
422        );
423    }
424}