1use std::collections::BTreeMap;
13use std::path::PathBuf;
14use std::sync::Arc;
15use std::time::Duration;
16
17use anyhow::Result;
18use async_trait::async_trait;
19use tracing::debug;
20
21use brainwires_core::{Tool, ToolContext, ToolResult, ToolUse};
22use brainwires_sandbox::{ExecSpec, Sandbox, SandboxError, SandboxPolicy};
23
24use crate::executor::ToolExecutor;
25
26const DEFAULT_WORKDIR: &str = "/workspace";
29
30pub struct SandboxedToolExecutor<E: ToolExecutor> {
34 inner: E,
35 sandbox: Arc<dyn Sandbox>,
36 policy: SandboxPolicy,
37 default_timeout: Duration,
38}
39
40impl<E: ToolExecutor> SandboxedToolExecutor<E> {
41 pub fn new(inner: E, sandbox: Arc<dyn Sandbox>, policy: SandboxPolicy) -> Self {
44 Self {
45 inner,
46 sandbox,
47 policy,
48 default_timeout: Duration::from_secs(300),
49 }
50 }
51
52 pub fn with_timeout(mut self, timeout: Duration) -> Self {
54 self.default_timeout = timeout;
55 self
56 }
57
58 pub fn inner(&self) -> &E {
60 &self.inner
61 }
62
63 pub fn policy(&self) -> &SandboxPolicy {
65 &self.policy
66 }
67
68 fn workdir_for(&self, context: &ToolContext) -> PathBuf {
77 if let Some(ref mount) = self.policy.workspace_mount {
78 return mount.clone();
79 }
80 if !context.working_directory.is_empty() {
81 return PathBuf::from(&context.working_directory);
82 }
83 PathBuf::from(DEFAULT_WORKDIR)
84 }
85
86 async fn run_in_sandbox(
87 &self,
88 tool_use_id: &str,
89 tool_name: &str,
90 cmd: Vec<String>,
91 workdir: PathBuf,
92 ) -> ToolResult {
93 let spec = ExecSpec {
96 cmd,
97 env: BTreeMap::new(),
98 workdir,
99 stdin: None,
100 mounts: vec![],
101 timeout: self.default_timeout,
102 };
103
104 let handle = match self.sandbox.spawn(spec).await {
105 Ok(h) => h,
106 Err(e) => return sandbox_error_to_result(tool_use_id, e, self.default_timeout),
107 };
108
109 match self.sandbox.wait(handle).await {
110 Ok(output) => {
111 debug!(
112 tool = tool_name,
113 exit_code = output.exit_code,
114 wall_time_ms = output.wall_time.as_millis() as u64,
115 "sandboxed tool call completed"
116 );
117 let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
118 if output.exit_code == 0 {
119 ToolResult::success(tool_use_id.to_string(), stdout)
120 } else {
121 let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
122 ToolResult::error(
123 tool_use_id.to_string(),
124 format!("exit {}: {}", output.exit_code, stderr),
125 )
126 }
127 }
128 Err(e) => sandbox_error_to_result(tool_use_id, e, self.default_timeout),
129 }
130 }
131
132 async fn run_bash(&self, tool_use: &ToolUse, context: &ToolContext) -> ToolResult {
133 let Some(command) = tool_use.input.get("command").and_then(|v| v.as_str()) else {
134 return ToolResult::error(
135 tool_use.id.clone(),
136 "sandbox: missing or non-string 'command' parameter".to_string(),
137 );
138 };
139 let cmd = vec!["/bin/sh".to_string(), "-c".to_string(), command.to_string()];
140 self.run_in_sandbox(&tool_use.id, &tool_use.name, cmd, self.workdir_for(context))
141 .await
142 }
143
144 async fn run_code_exec(&self, tool_use: &ToolUse, context: &ToolContext) -> ToolResult {
145 let Some(language) = tool_use.input.get("language").and_then(|v| v.as_str()) else {
146 return ToolResult::error(
147 tool_use.id.clone(),
148 "sandbox: missing or non-string 'language' parameter".to_string(),
149 );
150 };
151 let Some(code) = tool_use.input.get("code").and_then(|v| v.as_str()) else {
152 return ToolResult::error(
153 tool_use.id.clone(),
154 "sandbox: missing or non-string 'code' parameter".to_string(),
155 );
156 };
157
158 let lang = language.to_lowercase();
159 let cmd = match lang.as_str() {
160 "python" | "python3" => {
161 vec!["python3".to_string(), "-c".to_string(), code.to_string()]
162 }
163 "node" | "javascript" | "js" => {
164 vec!["node".to_string(), "-e".to_string(), code.to_string()]
165 }
166 "bash" | "sh" | "shell" => {
167 vec!["/bin/sh".to_string(), "-c".to_string(), code.to_string()]
168 }
169 other => {
170 return ToolResult::error(
171 tool_use.id.clone(),
172 format!("sandbox does not yet support language '{other}'"),
173 );
174 }
175 };
176
177 self.run_in_sandbox(&tool_use.id, &tool_use.name, cmd, self.workdir_for(context))
178 .await
179 }
180}
181
182#[async_trait]
183impl<E: ToolExecutor> ToolExecutor for SandboxedToolExecutor<E> {
184 async fn execute(&self, tool_use: &ToolUse, context: &ToolContext) -> Result<ToolResult> {
185 match tool_use.name.as_str() {
186 "bash" | "execute_command" => Ok(self.run_bash(tool_use, context).await),
187 "code_exec" | "execute_code" => Ok(self.run_code_exec(tool_use, context).await),
188 _ => self.inner.execute(tool_use, context).await,
189 }
190 }
191
192 fn available_tools(&self) -> Vec<Tool> {
193 self.inner.available_tools()
194 }
195}
196
197fn sandbox_error_to_result(tool_use_id: &str, err: SandboxError, timeout: Duration) -> ToolResult {
198 let msg = match err {
199 SandboxError::Timeout => format!("sandboxed command timed out after {:?}", timeout),
200 SandboxError::PolicyViolation(reason) => format!("policy violation: {reason}"),
201 other => format!("sandbox error: {other}"),
202 };
203 ToolResult::error(tool_use_id.to_string(), msg)
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209 use async_trait::async_trait;
210 use serde_json::json;
211 use std::sync::Mutex;
212 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
213
214 use brainwires_core::{ToolContext, ToolUse};
215 use brainwires_sandbox::{ExecHandle, ExecOutput, ExecSpec, Sandbox, SandboxRuntime};
216
217 struct MockSandbox {
218 exit_code: i32,
219 stdout: Vec<u8>,
220 stderr: Vec<u8>,
221 should_timeout: AtomicBool,
222 spawned_specs: Mutex<Vec<ExecSpec>>,
223 }
224
225 impl MockSandbox {
226 fn new(exit_code: i32, stdout: &[u8], stderr: &[u8]) -> Arc<Self> {
227 Arc::new(Self {
228 exit_code,
229 stdout: stdout.to_vec(),
230 stderr: stderr.to_vec(),
231 should_timeout: AtomicBool::new(false),
232 spawned_specs: Mutex::new(Vec::new()),
233 })
234 }
235
236 fn timing_out() -> Arc<Self> {
237 Arc::new(Self {
238 exit_code: 0,
239 stdout: Vec::new(),
240 stderr: Vec::new(),
241 should_timeout: AtomicBool::new(true),
242 spawned_specs: Mutex::new(Vec::new()),
243 })
244 }
245
246 fn specs(&self) -> Vec<ExecSpec> {
247 self.spawned_specs.lock().unwrap().clone()
248 }
249 }
250
251 #[async_trait]
252 impl Sandbox for MockSandbox {
253 async fn spawn(&self, spec: ExecSpec) -> brainwires_sandbox::Result<ExecHandle> {
254 self.spawned_specs.lock().unwrap().push(spec);
255 Ok(ExecHandle::new())
256 }
257
258 async fn wait(&self, _handle: ExecHandle) -> brainwires_sandbox::Result<ExecOutput> {
259 if self.should_timeout.load(Ordering::SeqCst) {
260 return Err(SandboxError::Timeout);
261 }
262 Ok(ExecOutput {
263 exit_code: self.exit_code,
264 stdout: self.stdout.clone(),
265 stderr: self.stderr.clone(),
266 wall_time: Duration::from_millis(1),
267 })
268 }
269
270 async fn shutdown(&self) -> brainwires_sandbox::Result<()> {
271 Ok(())
272 }
273
274 fn runtime(&self) -> SandboxRuntime {
275 SandboxRuntime::Host
276 }
277 }
278
279 struct CountingInner {
280 calls: AtomicUsize,
281 }
282
283 impl CountingInner {
284 fn new() -> Self {
285 Self {
286 calls: AtomicUsize::new(0),
287 }
288 }
289
290 fn call_count(&self) -> usize {
291 self.calls.load(Ordering::SeqCst)
292 }
293 }
294
295 #[async_trait]
296 impl ToolExecutor for CountingInner {
297 async fn execute(&self, tool_use: &ToolUse, _ctx: &ToolContext) -> Result<ToolResult> {
298 self.calls.fetch_add(1, Ordering::SeqCst);
299 Ok(ToolResult::success(
300 tool_use.id.clone(),
301 "inner-executed".to_string(),
302 ))
303 }
304
305 fn available_tools(&self) -> Vec<Tool> {
306 Vec::new()
307 }
308 }
309
310 fn ctx() -> ToolContext {
311 ToolContext {
312 working_directory: "/tmp".to_string(),
313 ..Default::default()
314 }
315 }
316
317 #[tokio::test]
318 async fn bash_is_routed_through_sandbox_and_inner_is_not_called() {
319 let sandbox = MockSandbox::new(0, b"hello from sandbox\n", b"");
320 let exec = SandboxedToolExecutor::new(
321 CountingInner::new(),
322 sandbox.clone() as Arc<dyn Sandbox>,
323 SandboxPolicy::default(),
324 );
325
326 let tool_use = ToolUse {
327 id: "t-1".to_string(),
328 name: "bash".to_string(),
329 input: json!({ "command": "echo hello" }),
330 };
331
332 let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
333 assert!(!result.is_error, "unexpected error: {}", result.content);
334 assert!(result.content.contains("hello from sandbox"));
335 assert_eq!(
336 exec.inner().call_count(),
337 0,
338 "inner executor must not be called for bash"
339 );
340
341 let specs = sandbox.specs();
342 assert_eq!(specs.len(), 1);
343 assert_eq!(
344 specs[0].cmd,
345 vec![
346 "/bin/sh".to_string(),
347 "-c".to_string(),
348 "echo hello".to_string()
349 ]
350 );
351 assert!(specs[0].env.is_empty(), "host env must not leak");
352 }
353
354 #[tokio::test]
355 async fn non_dangerous_tool_delegates_to_inner_executor() {
356 let sandbox = MockSandbox::new(0, b"should not appear", b"");
357 let exec = SandboxedToolExecutor::new(
358 CountingInner::new(),
359 sandbox as Arc<dyn Sandbox>,
360 SandboxPolicy::default(),
361 );
362
363 let tool_use = ToolUse {
364 id: "t-2".to_string(),
365 name: "read_file".to_string(),
366 input: json!({ "path": "/etc/hosts" }),
367 };
368
369 let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
370 assert!(!result.is_error);
371 assert_eq!(result.content, "inner-executed");
372 assert_eq!(exec.inner().call_count(), 1);
373 }
374
375 #[tokio::test]
376 async fn non_zero_exit_becomes_error_result_with_exit_code() {
377 let sandbox = MockSandbox::new(42, b"", b"boom");
378 let exec = SandboxedToolExecutor::new(
379 CountingInner::new(),
380 sandbox as Arc<dyn Sandbox>,
381 SandboxPolicy::default(),
382 );
383
384 let tool_use = ToolUse {
385 id: "t-3".to_string(),
386 name: "execute_command".to_string(),
387 input: json!({ "command": "false" }),
388 };
389
390 let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
391 assert!(result.is_error);
392 assert!(
393 result.content.contains("exit 42"),
394 "content was: {}",
395 result.content
396 );
397 assert!(result.content.contains("boom"));
398 }
399
400 #[tokio::test]
401 async fn timeout_becomes_error_result_containing_timed_out() {
402 let sandbox = MockSandbox::timing_out();
403 let exec = SandboxedToolExecutor::new(
404 CountingInner::new(),
405 sandbox as Arc<dyn Sandbox>,
406 SandboxPolicy::default(),
407 )
408 .with_timeout(Duration::from_millis(5));
409
410 let tool_use = ToolUse {
411 id: "t-4".to_string(),
412 name: "bash".to_string(),
413 input: json!({ "command": "sleep 999" }),
414 };
415
416 let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
417 assert!(result.is_error);
418 assert!(
419 result.content.contains("timed out"),
420 "content was: {}",
421 result.content
422 );
423 }
424}