use std::collections::BTreeMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Result;
use async_trait::async_trait;
use tracing::debug;
use brainwires_core::{Tool, ToolContext, ToolResult, ToolUse};
use brainwires_sandbox::{ExecSpec, Sandbox, SandboxError, SandboxPolicy};
use crate::executor::ToolExecutor;
const DEFAULT_WORKDIR: &str = "/workspace";
pub struct SandboxedToolExecutor<E: ToolExecutor> {
inner: E,
sandbox: Arc<dyn Sandbox>,
policy: SandboxPolicy,
default_timeout: Duration,
}
impl<E: ToolExecutor> SandboxedToolExecutor<E> {
pub fn new(inner: E, sandbox: Arc<dyn Sandbox>, policy: SandboxPolicy) -> Self {
Self {
inner,
sandbox,
policy,
default_timeout: Duration::from_secs(300),
}
}
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.default_timeout = timeout;
self
}
pub fn inner(&self) -> &E {
&self.inner
}
pub fn policy(&self) -> &SandboxPolicy {
&self.policy
}
fn workdir_for(&self, context: &ToolContext) -> PathBuf {
if let Some(ref mount) = self.policy.workspace_mount {
return mount.clone();
}
if !context.working_directory.is_empty() {
return PathBuf::from(&context.working_directory);
}
PathBuf::from(DEFAULT_WORKDIR)
}
async fn run_in_sandbox(
&self,
tool_use_id: &str,
tool_name: &str,
cmd: Vec<String>,
workdir: PathBuf,
) -> ToolResult {
let spec = ExecSpec {
cmd,
env: BTreeMap::new(),
workdir,
stdin: None,
mounts: vec![],
timeout: self.default_timeout,
};
let handle = match self.sandbox.spawn(spec).await {
Ok(h) => h,
Err(e) => return sandbox_error_to_result(tool_use_id, e, self.default_timeout),
};
match self.sandbox.wait(handle).await {
Ok(output) => {
debug!(
tool = tool_name,
exit_code = output.exit_code,
wall_time_ms = output.wall_time.as_millis() as u64,
"sandboxed tool call completed"
);
let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
if output.exit_code == 0 {
ToolResult::success(tool_use_id.to_string(), stdout)
} else {
let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
ToolResult::error(
tool_use_id.to_string(),
format!("exit {}: {}", output.exit_code, stderr),
)
}
}
Err(e) => sandbox_error_to_result(tool_use_id, e, self.default_timeout),
}
}
async fn run_bash(&self, tool_use: &ToolUse, context: &ToolContext) -> ToolResult {
let Some(command) = tool_use.input.get("command").and_then(|v| v.as_str()) else {
return ToolResult::error(
tool_use.id.clone(),
"sandbox: missing or non-string 'command' parameter".to_string(),
);
};
let cmd = vec!["/bin/sh".to_string(), "-c".to_string(), command.to_string()];
self.run_in_sandbox(&tool_use.id, &tool_use.name, cmd, self.workdir_for(context))
.await
}
async fn run_code_exec(&self, tool_use: &ToolUse, context: &ToolContext) -> ToolResult {
let Some(language) = tool_use.input.get("language").and_then(|v| v.as_str()) else {
return ToolResult::error(
tool_use.id.clone(),
"sandbox: missing or non-string 'language' parameter".to_string(),
);
};
let Some(code) = tool_use.input.get("code").and_then(|v| v.as_str()) else {
return ToolResult::error(
tool_use.id.clone(),
"sandbox: missing or non-string 'code' parameter".to_string(),
);
};
let lang = language.to_lowercase();
let cmd = match lang.as_str() {
"python" | "python3" => {
vec!["python3".to_string(), "-c".to_string(), code.to_string()]
}
"node" | "javascript" | "js" => {
vec!["node".to_string(), "-e".to_string(), code.to_string()]
}
"bash" | "sh" | "shell" => {
vec!["/bin/sh".to_string(), "-c".to_string(), code.to_string()]
}
other => {
return ToolResult::error(
tool_use.id.clone(),
format!("sandbox does not yet support language '{other}'"),
);
}
};
self.run_in_sandbox(&tool_use.id, &tool_use.name, cmd, self.workdir_for(context))
.await
}
}
#[async_trait]
impl<E: ToolExecutor> ToolExecutor for SandboxedToolExecutor<E> {
async fn execute(&self, tool_use: &ToolUse, context: &ToolContext) -> Result<ToolResult> {
match tool_use.name.as_str() {
"bash" | "execute_command" => Ok(self.run_bash(tool_use, context).await),
"code_exec" | "execute_code" => Ok(self.run_code_exec(tool_use, context).await),
_ => self.inner.execute(tool_use, context).await,
}
}
fn available_tools(&self) -> Vec<Tool> {
self.inner.available_tools()
}
}
fn sandbox_error_to_result(tool_use_id: &str, err: SandboxError, timeout: Duration) -> ToolResult {
let msg = match err {
SandboxError::Timeout => format!("sandboxed command timed out after {:?}", timeout),
SandboxError::PolicyViolation(reason) => format!("policy violation: {reason}"),
other => format!("sandbox error: {other}"),
};
ToolResult::error(tool_use_id.to_string(), msg)
}
#[cfg(test)]
mod tests {
use super::*;
use async_trait::async_trait;
use serde_json::json;
use std::sync::Mutex;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use brainwires_core::{ToolContext, ToolUse};
use brainwires_sandbox::{ExecHandle, ExecOutput, ExecSpec, Sandbox, SandboxRuntime};
struct MockSandbox {
exit_code: i32,
stdout: Vec<u8>,
stderr: Vec<u8>,
should_timeout: AtomicBool,
spawned_specs: Mutex<Vec<ExecSpec>>,
}
impl MockSandbox {
fn new(exit_code: i32, stdout: &[u8], stderr: &[u8]) -> Arc<Self> {
Arc::new(Self {
exit_code,
stdout: stdout.to_vec(),
stderr: stderr.to_vec(),
should_timeout: AtomicBool::new(false),
spawned_specs: Mutex::new(Vec::new()),
})
}
fn timing_out() -> Arc<Self> {
Arc::new(Self {
exit_code: 0,
stdout: Vec::new(),
stderr: Vec::new(),
should_timeout: AtomicBool::new(true),
spawned_specs: Mutex::new(Vec::new()),
})
}
fn specs(&self) -> Vec<ExecSpec> {
self.spawned_specs.lock().unwrap().clone()
}
}
#[async_trait]
impl Sandbox for MockSandbox {
async fn spawn(&self, spec: ExecSpec) -> brainwires_sandbox::Result<ExecHandle> {
self.spawned_specs.lock().unwrap().push(spec);
Ok(ExecHandle::new())
}
async fn wait(&self, _handle: ExecHandle) -> brainwires_sandbox::Result<ExecOutput> {
if self.should_timeout.load(Ordering::SeqCst) {
return Err(SandboxError::Timeout);
}
Ok(ExecOutput {
exit_code: self.exit_code,
stdout: self.stdout.clone(),
stderr: self.stderr.clone(),
wall_time: Duration::from_millis(1),
})
}
async fn shutdown(&self) -> brainwires_sandbox::Result<()> {
Ok(())
}
fn runtime(&self) -> SandboxRuntime {
SandboxRuntime::Host
}
}
struct CountingInner {
calls: AtomicUsize,
}
impl CountingInner {
fn new() -> Self {
Self {
calls: AtomicUsize::new(0),
}
}
fn call_count(&self) -> usize {
self.calls.load(Ordering::SeqCst)
}
}
#[async_trait]
impl ToolExecutor for CountingInner {
async fn execute(&self, tool_use: &ToolUse, _ctx: &ToolContext) -> Result<ToolResult> {
self.calls.fetch_add(1, Ordering::SeqCst);
Ok(ToolResult::success(
tool_use.id.clone(),
"inner-executed".to_string(),
))
}
fn available_tools(&self) -> Vec<Tool> {
Vec::new()
}
}
fn ctx() -> ToolContext {
ToolContext {
working_directory: "/tmp".to_string(),
..Default::default()
}
}
#[tokio::test]
async fn bash_is_routed_through_sandbox_and_inner_is_not_called() {
let sandbox = MockSandbox::new(0, b"hello from sandbox\n", b"");
let exec = SandboxedToolExecutor::new(
CountingInner::new(),
sandbox.clone() as Arc<dyn Sandbox>,
SandboxPolicy::default(),
);
let tool_use = ToolUse {
id: "t-1".to_string(),
name: "bash".to_string(),
input: json!({ "command": "echo hello" }),
};
let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
assert!(!result.is_error, "unexpected error: {}", result.content);
assert!(result.content.contains("hello from sandbox"));
assert_eq!(
exec.inner().call_count(),
0,
"inner executor must not be called for bash"
);
let specs = sandbox.specs();
assert_eq!(specs.len(), 1);
assert_eq!(
specs[0].cmd,
vec![
"/bin/sh".to_string(),
"-c".to_string(),
"echo hello".to_string()
]
);
assert!(specs[0].env.is_empty(), "host env must not leak");
}
#[tokio::test]
async fn non_dangerous_tool_delegates_to_inner_executor() {
let sandbox = MockSandbox::new(0, b"should not appear", b"");
let exec = SandboxedToolExecutor::new(
CountingInner::new(),
sandbox as Arc<dyn Sandbox>,
SandboxPolicy::default(),
);
let tool_use = ToolUse {
id: "t-2".to_string(),
name: "read_file".to_string(),
input: json!({ "path": "/etc/hosts" }),
};
let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
assert!(!result.is_error);
assert_eq!(result.content, "inner-executed");
assert_eq!(exec.inner().call_count(), 1);
}
#[tokio::test]
async fn non_zero_exit_becomes_error_result_with_exit_code() {
let sandbox = MockSandbox::new(42, b"", b"boom");
let exec = SandboxedToolExecutor::new(
CountingInner::new(),
sandbox as Arc<dyn Sandbox>,
SandboxPolicy::default(),
);
let tool_use = ToolUse {
id: "t-3".to_string(),
name: "execute_command".to_string(),
input: json!({ "command": "false" }),
};
let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
assert!(result.is_error);
assert!(
result.content.contains("exit 42"),
"content was: {}",
result.content
);
assert!(result.content.contains("boom"));
}
#[tokio::test]
async fn timeout_becomes_error_result_containing_timed_out() {
let sandbox = MockSandbox::timing_out();
let exec = SandboxedToolExecutor::new(
CountingInner::new(),
sandbox as Arc<dyn Sandbox>,
SandboxPolicy::default(),
)
.with_timeout(Duration::from_millis(5));
let tool_use = ToolUse {
id: "t-4".to_string(),
name: "bash".to_string(),
input: json!({ "command": "sleep 999" }),
};
let result = exec.execute(&tool_use, &ctx()).await.expect("execute");
assert!(result.is_error);
assert!(
result.content.contains("timed out"),
"content was: {}",
result.content
);
}
}