forge_sandbox/
executor.rs

1//! Sandbox executor — creates fresh V8 isolates and runs LLM-generated code.
2//!
3//! Each execution gets a brand new runtime. No state leaks between calls.
4//!
5//! V8 isolates are `!Send`, so all JsRuntime operations run on a dedicated
6//! thread with its own single-threaded tokio runtime. The public API is
7//! fully async and `Send`-safe.
8
9use std::sync::atomic::{AtomicBool, Ordering};
10use std::sync::Arc;
11use std::time::Duration;
12
13use deno_core::{v8, JsRuntime, PollEventLoopOptions, RuntimeOptions};
14use serde_json::Value;
15use tokio::sync::Semaphore;
16
17use crate::audit::{
18    AuditEntryBuilder, AuditLogger, AuditOperation, AuditingDispatcher, NoopAuditLogger,
19    ToolCallAudit,
20};
21use crate::error::SandboxError;
22use crate::ops::{forge_ext, ExecutionResult, ToolCallLimits};
23use crate::validator::validate_code;
24use crate::ToolDispatcher;
25
26/// How the sandbox executes code.
27#[derive(Debug, Clone, Default, PartialEq, Eq)]
28pub enum ExecutionMode {
29    /// Run V8 in-process on a dedicated thread (default, suitable for tests).
30    #[default]
31    InProcess,
32    /// Spawn an isolated child process per execution (production security mode).
33    ChildProcess,
34}
35
36/// Configuration for the sandbox executor.
37#[derive(Debug, Clone)]
38pub struct SandboxConfig {
39    /// Maximum execution time before the sandbox is terminated.
40    pub timeout: Duration,
41    /// Maximum size of LLM-generated code in bytes.
42    pub max_code_size: usize,
43    /// Maximum size of the JSON result in bytes.
44    pub max_output_size: usize,
45    /// V8 heap limit in bytes.
46    pub max_heap_size: usize,
47    /// Maximum concurrent sandbox executions.
48    pub max_concurrent: usize,
49    /// Maximum tool calls per execution.
50    pub max_tool_calls: usize,
51    /// Maximum size of tool call arguments in bytes.
52    pub max_tool_call_args_size: usize,
53    /// Execution mode: in-process or child-process isolation.
54    pub execution_mode: ExecutionMode,
55}
56
57impl Default for SandboxConfig {
58    fn default() -> Self {
59        Self {
60            timeout: Duration::from_secs(5),
61            max_code_size: 64 * 1024,        // 64 KB
62            max_output_size: 1024 * 1024,    // 1 MB
63            max_heap_size: 64 * 1024 * 1024, // 64 MB
64            max_concurrent: 8,
65            max_tool_calls: 50,
66            max_tool_call_args_size: 1024 * 1024, // 1 MB
67            execution_mode: ExecutionMode::default(),
68        }
69    }
70}
71
72/// The sandbox executor. Creates fresh V8 isolates for each execution.
73///
74/// This is `Send + Sync` safe — all V8 operations are dispatched to a
75/// dedicated thread internally. A concurrency semaphore limits the number
76/// of simultaneous V8 isolates.
77pub struct SandboxExecutor {
78    config: SandboxConfig,
79    semaphore: Arc<Semaphore>,
80    audit_logger: Arc<dyn AuditLogger>,
81}
82
83impl SandboxExecutor {
84    /// Create a new sandbox executor with the given configuration.
85    pub fn new(config: SandboxConfig) -> Self {
86        let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
87        Self {
88            config,
89            semaphore,
90            audit_logger: Arc::new(NoopAuditLogger),
91        }
92    }
93
94    /// Create a new sandbox executor with an audit logger.
95    pub fn with_audit_logger(config: SandboxConfig, logger: Arc<dyn AuditLogger>) -> Self {
96        let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
97        Self {
98            config,
99            semaphore,
100            audit_logger: logger,
101        }
102    }
103
104    /// Execute a `search()` call — runs code against the capability manifest.
105    ///
106    /// The manifest is injected as `globalThis.manifest` in the sandbox.
107    /// The LLM's code is an async arrow function that queries it.
108    /// Search always runs in-process (read-only, no credential exposure risk).
109    pub async fn execute_search(
110        &self,
111        code: &str,
112        manifest: &Value,
113    ) -> Result<Value, SandboxError> {
114        tracing::info!(code_len = code.len(), "execute_search: starting");
115
116        let audit_builder = AuditEntryBuilder::new(code, AuditOperation::Search);
117
118        validate_code(code, Some(self.config.max_code_size))?;
119
120        let _permit = self.semaphore.clone().try_acquire_owned().map_err(|_| {
121            SandboxError::ConcurrencyLimit {
122                max: self.config.max_concurrent,
123            }
124        })?;
125
126        let code = code.to_string();
127        let manifest = manifest.clone();
128        let config = self.config.clone();
129
130        // V8 isolates are !Send — run everything on a dedicated thread
131        let (tx, rx) = tokio::sync::oneshot::channel();
132        std::thread::spawn(move || {
133            let rt = match tokio::runtime::Builder::new_current_thread()
134                .enable_all()
135                .build()
136            {
137                Ok(rt) => rt,
138                Err(e) => {
139                    if tx.send(Err(SandboxError::Execution(e.into()))).is_err() {
140                        tracing::warn!("sandbox result receiver dropped");
141                    }
142                    return;
143                }
144            };
145            let result = rt.block_on(run_search(&config, &code, &manifest));
146            if tx.send(result).is_err() {
147                tracing::warn!("sandbox result receiver dropped before result was sent");
148            }
149        });
150
151        let result = rx
152            .await
153            .map_err(|_| SandboxError::Execution(anyhow::anyhow!("sandbox thread panicked")))?;
154
155        // Emit audit entry
156        let entry = audit_builder.finish(&result);
157        self.audit_logger.log(&entry).await;
158
159        match &result {
160            Ok(_) => tracing::info!("execute_search: complete"),
161            Err(e) => tracing::warn!(error = %e, "execute_search: failed"),
162        }
163
164        result
165    }
166
167    /// Execute an `execute()` call — runs code against the tool API.
168    ///
169    /// Tool calls go through `forge.callTool(server, tool, args)` which
170    /// dispatches to the Rust-side ToolDispatcher via `op_forge_call_tool`.
171    ///
172    /// In `ChildProcess` mode, spawns an isolated worker process. In `InProcess`
173    /// mode (default), runs V8 on a dedicated thread in the current process.
174    pub async fn execute_code(
175        &self,
176        code: &str,
177        dispatcher: Arc<dyn ToolDispatcher>,
178    ) -> Result<Value, SandboxError> {
179        tracing::info!(
180            code_len = code.len(),
181            mode = ?self.config.execution_mode,
182            "execute_code: starting"
183        );
184
185        let mut audit_builder = AuditEntryBuilder::new(code, AuditOperation::Execute);
186
187        validate_code(code, Some(self.config.max_code_size))?;
188
189        let _permit = self.semaphore.clone().try_acquire_owned().map_err(|_| {
190            SandboxError::ConcurrencyLimit {
191                max: self.config.max_concurrent,
192            }
193        })?;
194
195        // Wrap dispatcher with audit tracking
196        let (audit_tx, mut audit_rx) = tokio::sync::mpsc::unbounded_channel::<ToolCallAudit>();
197        let auditing_dispatcher: Arc<dyn ToolDispatcher> =
198            Arc::new(AuditingDispatcher::new(dispatcher, audit_tx));
199
200        let result = match self.config.execution_mode {
201            ExecutionMode::ChildProcess => {
202                crate::host::SandboxHost::execute_in_child(code, &self.config, auditing_dispatcher)
203                    .await
204            }
205            ExecutionMode::InProcess => {
206                self.execute_code_in_process(code, auditing_dispatcher)
207                    .await
208            }
209        };
210
211        // Collect tool call audits
212        while let Ok(tool_audit) = audit_rx.try_recv() {
213            audit_builder.record_tool_call(tool_audit);
214        }
215
216        // Emit audit entry
217        let entry = audit_builder.finish(&result);
218        self.audit_logger.log(&entry).await;
219
220        match &result {
221            Ok(_) => tracing::info!("execute_code: complete"),
222            Err(e) => tracing::warn!(error = %e, "execute_code: failed"),
223        }
224
225        result
226    }
227
228    /// In-process execution: spawn a dedicated thread with its own V8 isolate.
229    async fn execute_code_in_process(
230        &self,
231        code: &str,
232        dispatcher: Arc<dyn ToolDispatcher>,
233    ) -> Result<Value, SandboxError> {
234        let code = code.to_string();
235        let config = self.config.clone();
236
237        let (tx, rx) = tokio::sync::oneshot::channel();
238        std::thread::spawn(move || {
239            let rt = match tokio::runtime::Builder::new_current_thread()
240                .enable_all()
241                .build()
242            {
243                Ok(rt) => rt,
244                Err(e) => {
245                    if tx.send(Err(SandboxError::Execution(e.into()))).is_err() {
246                        tracing::warn!("sandbox result receiver dropped");
247                    }
248                    return;
249                }
250            };
251            let result = rt.block_on(run_execute(&config, &code, dispatcher));
252            if tx.send(result).is_err() {
253                tracing::warn!("sandbox result receiver dropped before result was sent");
254            }
255        });
256
257        rx.await
258            .map_err(|_| SandboxError::Execution(anyhow::anyhow!("sandbox thread panicked")))?
259    }
260}
261
262/// State for the near-heap-limit callback.
263struct HeapLimitState {
264    handle: v8::IsolateHandle,
265    /// Whether the heap limit has been triggered. Uses AtomicBool so the callback
266    /// can use a shared `&` reference instead of `&mut`, eliminating aliasing concerns.
267    triggered: AtomicBool,
268}
269
270/// V8 near-heap-limit callback. Terminates execution and grants 1MB grace
271/// for the termination to propagate cleanly.
272extern "C" fn near_heap_limit_callback(
273    data: *mut std::ffi::c_void,
274    current_heap_limit: usize,
275    _initial_heap_limit: usize,
276) -> usize {
277    // SAFETY: `data` points to `heap_state` (Box<HeapLimitState>) allocated below.
278    // The Box outlives this callback because: (1) the watchdog thread is joined
279    // before heap_state is dropped, and (2) V8 only invokes this callback while the
280    // isolate's event loop is running, which completes before the join.
281    // We use a shared `&` reference (not `&mut`) because `triggered` is AtomicBool,
282    // so no aliasing concerns even if V8 were to call this callback re-entrantly.
283    let state = unsafe { &*(data as *const HeapLimitState) };
284    if !state.triggered.swap(true, Ordering::SeqCst) {
285        state.handle.terminate_execution();
286    }
287    // Grant 1MB grace so the termination exception can propagate
288    current_heap_limit + 1024 * 1024
289}
290
291/// Run a search operation on the current thread (must be called from a
292/// dedicated thread, not the main tokio runtime).
293///
294/// Public for reuse in the worker binary.
295pub async fn run_search(
296    config: &SandboxConfig,
297    code: &str,
298    manifest: &Value,
299) -> Result<Value, SandboxError> {
300    let mut runtime = create_runtime(None, config.max_heap_size, None)?;
301
302    // Inject the manifest as a global
303    let manifest_json = serde_json::to_string(manifest)?;
304    let bootstrap = format!("globalThis.manifest = {};", manifest_json);
305    runtime
306        .execute_script("[forge:manifest]", bootstrap)
307        .map_err(|e| SandboxError::JsError {
308            message: e.to_string(),
309        })?;
310
311    // Bootstrap: capture ops in closures, create minimal forge object, delete Deno,
312    // and remove dangerous code generation primitives.
313    runtime
314        .execute_script(
315            "[forge:bootstrap]",
316            r#"
317                ((ops) => {
318                    const setResult = (json) => ops.op_forge_set_result(json);
319                    const log = (msg) => ops.op_forge_log(String(msg));
320                    globalThis.forge = Object.freeze({
321                        __setResult: setResult,
322                        log: log,
323                    });
324                    delete globalThis.Deno;
325
326                    // Remove code generation primitives to prevent prototype chain attacks.
327                    // Even with the validator banning eval( and Function(, an attacker could
328                    // reach Function via forge.log.constructor or similar prototype chain access.
329                    delete globalThis.eval;
330                    const AsyncFunction = (async function(){}).constructor;
331                    const GeneratorFunction = (function*(){}).constructor;
332                    Object.defineProperty(Function.prototype, 'constructor', {
333                        value: undefined, configurable: false, writable: false
334                    });
335                    Object.defineProperty(AsyncFunction.prototype, 'constructor', {
336                        value: undefined, configurable: false, writable: false
337                    });
338                    Object.defineProperty(GeneratorFunction.prototype, 'constructor', {
339                        value: undefined, configurable: false, writable: false
340                    });
341                })(Deno.core.ops);
342            "#,
343        )
344        .map_err(|e| SandboxError::JsError {
345            message: e.to_string(),
346        })?;
347
348    run_user_code(&mut runtime, code, config).await
349}
350
351/// Run an execute operation on the current thread.
352///
353/// Public for reuse in the worker binary.
354pub async fn run_execute(
355    config: &SandboxConfig,
356    code: &str,
357    dispatcher: Arc<dyn ToolDispatcher>,
358) -> Result<Value, SandboxError> {
359    let limits = ToolCallLimits {
360        max_calls: config.max_tool_calls,
361        max_args_size: config.max_tool_call_args_size,
362        calls_made: 0,
363    };
364    let mut runtime = create_runtime(Some(dispatcher), config.max_heap_size, Some(limits))?;
365
366    // Bootstrap: capture ops in closures, create full forge API, delete Deno,
367    // and remove dangerous code generation primitives.
368    // User code accesses tools via forge.callTool() or forge.server("x").cat.tool().
369    runtime
370        .execute_script(
371            "[forge:bootstrap]",
372            r#"
373                ((ops) => {
374                    const callToolOp = ops.op_forge_call_tool;
375                    const setResult = (json) => ops.op_forge_set_result(json);
376                    const log = (msg) => ops.op_forge_log(String(msg));
377
378                    const callTool = async (server, tool, args) => {
379                        const resultJson = await callToolOp(
380                            server, tool, JSON.stringify(args || {})
381                        );
382                        return JSON.parse(resultJson);
383                    };
384
385                    globalThis.forge = Object.freeze({
386                        __setResult: setResult,
387                        log: log,
388                        callTool: callTool,
389                        server: (name) => {
390                            return new Proxy({}, {
391                                get(_target, category) {
392                                    return new Proxy({}, {
393                                        get(_target2, tool) {
394                                            return async (args) => {
395                                                return callTool(
396                                                    name,
397                                                    `${category}.${tool}`,
398                                                    args || {}
399                                                );
400                                            };
401                                        }
402                                    });
403                                }
404                            });
405                        }
406                    });
407
408                    delete globalThis.Deno;
409
410                    // Remove code generation primitives to prevent prototype chain attacks.
411                    delete globalThis.eval;
412                    const AsyncFunction = (async function(){}).constructor;
413                    const GeneratorFunction = (function*(){}).constructor;
414                    Object.defineProperty(Function.prototype, 'constructor', {
415                        value: undefined, configurable: false, writable: false
416                    });
417                    Object.defineProperty(AsyncFunction.prototype, 'constructor', {
418                        value: undefined, configurable: false, writable: false
419                    });
420                    Object.defineProperty(GeneratorFunction.prototype, 'constructor', {
421                        value: undefined, configurable: false, writable: false
422                    });
423                })(Deno.core.ops);
424            "#,
425        )
426        .map_err(|e| SandboxError::JsError {
427            message: e.to_string(),
428        })?;
429
430    run_user_code(&mut runtime, code, config).await
431}
432
433/// Create a fresh JsRuntime with the forge extension loaded and V8 heap limits set.
434///
435/// Public for reuse in the worker binary.
436pub fn create_runtime(
437    dispatcher: Option<Arc<dyn ToolDispatcher>>,
438    max_heap_size: usize,
439    tool_call_limits: Option<ToolCallLimits>,
440) -> Result<JsRuntime, SandboxError> {
441    let create_params = v8::CreateParams::default().heap_limits(0, max_heap_size);
442
443    let runtime = JsRuntime::new(RuntimeOptions {
444        extensions: vec![forge_ext::init()],
445        create_params: Some(create_params),
446        ..Default::default()
447    });
448
449    if let Some(d) = dispatcher {
450        runtime.op_state().borrow_mut().put(d);
451    }
452    if let Some(limits) = tool_call_limits {
453        runtime.op_state().borrow_mut().put(limits);
454    }
455
456    Ok(runtime)
457}
458
459/// Wrap the user's async arrow function, execute it, and extract the result.
460///
461/// Sets up a CPU watchdog thread and near-heap-limit callback before running
462/// user code. The watchdog terminates V8 execution if the timeout elapses
463/// (handles CPU-bound infinite loops). The heap callback terminates execution
464/// if V8 approaches the heap limit (prevents OOM abort).
465async fn run_user_code(
466    runtime: &mut JsRuntime,
467    code: &str,
468    config: &SandboxConfig,
469) -> Result<Value, SandboxError> {
470    // --- Set up heap limit callback ---
471    let heap_state = Box::new(HeapLimitState {
472        handle: runtime.v8_isolate().thread_safe_handle(),
473        triggered: AtomicBool::new(false),
474    });
475    runtime.v8_isolate().add_near_heap_limit_callback(
476        near_heap_limit_callback,
477        &*heap_state as *const HeapLimitState as *mut std::ffi::c_void,
478    );
479
480    // --- Set up CPU watchdog ---
481    let watchdog_handle = runtime.v8_isolate().thread_safe_handle();
482    let timed_out = Arc::new(AtomicBool::new(false));
483    let watchdog_timed_out = timed_out.clone();
484    let timeout = config.timeout;
485    let (cancel_tx, cancel_rx) = std::sync::mpsc::channel::<()>();
486
487    let watchdog = std::thread::spawn(move || {
488        if let Err(std::sync::mpsc::RecvTimeoutError::Timeout) = cancel_rx.recv_timeout(timeout) {
489            watchdog_timed_out.store(true, Ordering::SeqCst);
490            watchdog_handle.terminate_execution();
491        }
492    });
493
494    // --- Execute user code ---
495    let wrapped = format!(
496        r#"
497        (async () => {{
498            try {{
499                const __userFn = {code};
500                const __result = await __userFn();
501                forge.__setResult(
502                    JSON.stringify({{ ok: __result }})
503                );
504            }} catch (e) {{
505                forge.__setResult(
506                    JSON.stringify({{ error: e.message || String(e) }})
507                );
508            }}
509        }})();
510        "#
511    );
512
513    let exec_error = match runtime.execute_script("[forge:execute]", wrapped) {
514        Ok(_) => {
515            // Drive the event loop to resolve async operations
516            match tokio::time::timeout(
517                config.timeout,
518                runtime.run_event_loop(PollEventLoopOptions::default()),
519            )
520            .await
521            {
522                Ok(Ok(())) => None,
523                Ok(Err(e)) => Some(e.to_string()),
524                Err(_) => Some("async timeout".to_string()),
525            }
526        }
527        Err(e) => Some(e.to_string()),
528    };
529
530    // --- Cleanup: cancel watchdog and wait for it to exit ---
531    // This ensures the watchdog thread is done before we drop the runtime,
532    // preventing use-after-free on the IsolateHandle.
533    let _ = cancel_tx.send(());
534    let _ = watchdog.join();
535
536    // --- Check error causes in priority order ---
537    if heap_state.triggered.load(Ordering::SeqCst) {
538        return Err(SandboxError::HeapLimitExceeded);
539    }
540
541    if timed_out.load(Ordering::SeqCst) {
542        return Err(SandboxError::Timeout {
543            timeout_ms: config.timeout.as_millis() as u64,
544        });
545    }
546
547    if let Some(err_msg) = exec_error {
548        return Err(SandboxError::JsError { message: err_msg });
549    }
550
551    // --- Extract result from OpState ---
552    let result_str = {
553        let state = runtime.op_state();
554        let state = state.borrow();
555        state
556            .try_borrow::<ExecutionResult>()
557            .map(|r| r.0.clone())
558            .ok_or_else(|| SandboxError::JsError {
559                message: "no result returned from sandbox execution".into(),
560            })?
561    };
562
563    if result_str.len() > config.max_output_size {
564        return Err(SandboxError::OutputTooLarge {
565            max: config.max_output_size,
566        });
567    }
568
569    let envelope: Value = serde_json::from_str(&result_str)?;
570
571    if let Some(error) = envelope.get("error") {
572        return Err(SandboxError::JsError {
573            message: error.as_str().unwrap_or("unknown error").to_string(),
574        });
575    }
576
577    Ok(envelope.get("ok").cloned().unwrap_or(Value::Null))
578}
579
580#[cfg(test)]
581mod tests {
582    use super::*;
583
584    fn executor() -> SandboxExecutor {
585        SandboxExecutor::new(SandboxConfig::default())
586    }
587
588    /// Test dispatcher that echoes back the server/tool/args.
589    struct TestDispatcher;
590
591    #[async_trait::async_trait]
592    impl ToolDispatcher for TestDispatcher {
593        async fn call_tool(
594            &self,
595            server: &str,
596            tool: &str,
597            args: serde_json::Value,
598        ) -> Result<serde_json::Value, anyhow::Error> {
599            Ok(serde_json::json!({
600                "server": server,
601                "tool": tool,
602                "args": args,
603                "status": "ok"
604            }))
605        }
606    }
607
608    #[tokio::test]
609    async fn search_returns_manifest_data() {
610        let exec = executor();
611        let manifest = serde_json::json!({
612            "tools": [
613                {"name": "parse_ast", "category": "ast"},
614                {"name": "find_symbols", "category": "symbols"},
615            ]
616        });
617
618        let code = r#"async () => {
619            return manifest.tools.filter(t => t.category === "ast");
620        }"#;
621
622        let result = exec.execute_search(code, &manifest).await.unwrap();
623        let tools = result.as_array().unwrap();
624        assert_eq!(tools.len(), 1);
625        assert_eq!(tools[0]["name"], "parse_ast");
626    }
627
628    #[tokio::test]
629    async fn search_handles_complex_queries() {
630        let exec = executor();
631        let manifest = serde_json::json!({
632            "servers": [
633                {
634                    "name": "narsil",
635                    "categories": {
636                        "ast": { "tools": ["parse", "query", "walk"] },
637                        "symbols": { "tools": ["find", "references"] }
638                    }
639                }
640            ]
641        });
642
643        let code = r#"async () => {
644            return manifest.servers
645                .map(s => ({ name: s.name, categories: Object.keys(s.categories) }));
646        }"#;
647
648        let result = exec.execute_search(code, &manifest).await.unwrap();
649        let servers = result.as_array().unwrap();
650        assert_eq!(servers[0]["name"], "narsil");
651    }
652
653    #[tokio::test]
654    async fn timeout_is_enforced() {
655        let exec = SandboxExecutor::new(SandboxConfig {
656            timeout: Duration::from_millis(200),
657            ..Default::default()
658        });
659        let manifest = serde_json::json!({});
660
661        // A never-resolving promise should trigger a timeout
662        let code = r#"async () => {
663            await new Promise(() => {});
664        }"#;
665
666        let start = std::time::Instant::now();
667        let err = exec.execute_search(code, &manifest).await.unwrap_err();
668        let elapsed = start.elapsed();
669
670        // Should be a timeout or a "no result" error (the event loop completes
671        // when there are no more pending ops, even if the promise is unresolved)
672        match &err {
673            SandboxError::Timeout { .. } => {}
674            SandboxError::JsError { message } if message.contains("no result") => {
675                // deno_core's event loop exits when there are no pending ops,
676                // so the never-resolving promise doesn't actually block
677            }
678            other => panic!("unexpected error: {other:?}, elapsed: {elapsed:?}"),
679        }
680    }
681
682    #[tokio::test]
683    async fn js_errors_are_captured() {
684        let exec = executor();
685        let manifest = serde_json::json!({});
686
687        let code = r#"async () => {
688            throw new Error("intentional test error");
689        }"#;
690
691        let err = exec.execute_search(code, &manifest).await.unwrap_err();
692        assert!(matches!(err, SandboxError::JsError { .. }));
693        let msg = err.to_string();
694        assert!(msg.contains("intentional test error"));
695    }
696
697    #[tokio::test]
698    async fn no_filesystem_access() {
699        let exec = executor();
700        let manifest = serde_json::json!({});
701
702        // require() is a banned pattern — caught by validator
703        let code = r#"async () => {
704            const fs = require("fs");
705            return "ESCAPED";
706        }"#;
707
708        let err = exec.execute_search(code, &manifest).await;
709        assert!(err.is_err());
710    }
711
712    #[tokio::test]
713    async fn no_network_access() {
714        let exec = executor();
715        let manifest = serde_json::json!({});
716
717        let code = r#"async () => {
718            try {
719                await fetch("https://example.com");
720                return "ESCAPED";
721            } catch(e) {
722                return "CONTAINED";
723            }
724        }"#;
725
726        let result = exec.execute_search(code, &manifest).await.unwrap();
727        assert_eq!(result, "CONTAINED");
728    }
729
730    // --- WU4 new tests ---
731
732    #[tokio::test]
733    async fn cpu_bound_infinite_loop_is_terminated() {
734        let exec = SandboxExecutor::new(SandboxConfig {
735            timeout: Duration::from_millis(500),
736            ..Default::default()
737        });
738        let manifest = serde_json::json!({});
739
740        let code = r#"async () => {
741            while(true) {}
742        }"#;
743
744        let start = std::time::Instant::now();
745        let err = exec.execute_search(code, &manifest).await.unwrap_err();
746        let elapsed = start.elapsed();
747
748        assert!(
749            matches!(err, SandboxError::Timeout { .. }),
750            "expected timeout, got: {err:?}"
751        );
752        assert!(
753            elapsed < Duration::from_secs(5),
754            "should complete reasonably fast, took: {elapsed:?}"
755        );
756    }
757
758    #[tokio::test]
759    async fn heap_limit_prevents_oom() {
760        let exec = SandboxExecutor::new(SandboxConfig {
761            max_heap_size: 10 * 1024 * 1024,  // 10 MB
762            timeout: Duration::from_secs(30), // Long timeout so heap fills first
763            ..Default::default()
764        });
765        let manifest = serde_json::json!({});
766
767        // Rapidly allocate memory to exceed the heap limit
768        let code = r#"async () => {
769            const arr = [];
770            while(true) {
771                arr.push(new Array(100000).fill("x"));
772            }
773        }"#;
774
775        let err = exec.execute_search(code, &manifest).await.unwrap_err();
776        assert!(
777            matches!(
778                err,
779                SandboxError::HeapLimitExceeded | SandboxError::JsError { .. }
780            ),
781            "expected heap limit or JS error, got: {err:?}"
782        );
783    }
784
785    #[tokio::test]
786    async fn concurrency_limit_enforced() {
787        // Use max_concurrent=0 so no executions are allowed (deterministic test)
788        let exec = SandboxExecutor::new(SandboxConfig {
789            max_concurrent: 0,
790            ..Default::default()
791        });
792
793        let code = r#"async () => { return 1; }"#;
794        let err = exec
795            .execute_search(code, &serde_json::json!({}))
796            .await
797            .unwrap_err();
798        assert!(
799            matches!(err, SandboxError::ConcurrencyLimit { max: 0 }),
800            "expected concurrency limit, got: {err:?}"
801        );
802    }
803
804    #[tokio::test]
805    async fn deno_global_is_not_accessible() {
806        let exec = executor();
807        let manifest = serde_json::json!({});
808
809        let code = r#"async () => {
810            const props = Object.getOwnPropertyNames(globalThis);
811            return !props.includes("Deno");
812        }"#;
813
814        let result = exec.execute_search(code, &manifest).await.unwrap();
815        assert_eq!(result, true);
816    }
817
818    #[tokio::test]
819    async fn forge_object_is_frozen() {
820        let exec = executor();
821        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
822
823        let code = r#"async () => {
824            return Object.isFrozen(forge);
825        }"#;
826
827        let result = exec.execute_code(code, dispatcher).await.unwrap();
828        assert_eq!(result, true);
829    }
830
831    #[tokio::test]
832    async fn tool_call_rate_limit() {
833        let exec = SandboxExecutor::new(SandboxConfig {
834            max_tool_calls: 2,
835            ..Default::default()
836        });
837        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
838
839        let code = r#"async () => {
840            await forge.callTool("test", "tool1", {});
841            await forge.callTool("test", "tool2", {});
842            try {
843                await forge.callTool("test", "tool3", {});
844                return "should not reach here";
845            } catch(e) {
846                return e.message;
847            }
848        }"#;
849
850        let result = exec.execute_code(code, dispatcher).await.unwrap();
851        assert!(
852            result
853                .as_str()
854                .unwrap()
855                .contains("tool call limit exceeded"),
856            "expected tool call limit message, got: {result:?}"
857        );
858    }
859
860    #[tokio::test]
861    async fn tool_call_args_size_limit() {
862        let exec = SandboxExecutor::new(SandboxConfig {
863            max_tool_call_args_size: 100,
864            ..Default::default()
865        });
866        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
867
868        let code = r#"async () => {
869            try {
870                await forge.callTool("test", "tool", { data: "x".repeat(200) });
871                return "should not reach here";
872            } catch(e) {
873                return e.message;
874            }
875        }"#;
876
877        let result = exec.execute_code(code, dispatcher).await.unwrap();
878        assert!(
879            result.as_str().unwrap().contains("too large"),
880            "expected args too large message, got: {result:?}"
881        );
882    }
883
884    #[tokio::test]
885    async fn forge_log_works() {
886        let exec = executor();
887        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
888
889        let code = r#"async () => {
890            forge.log("test message from sandbox");
891            return "ok";
892        }"#;
893
894        let result = exec.execute_code(code, dispatcher).await.unwrap();
895        assert_eq!(result, "ok");
896    }
897
898    #[tokio::test]
899    async fn forge_server_proxy_calls_tool() {
900        let exec = executor();
901        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
902
903        let code = r#"async () => {
904            const result = await forge.server("narsil").ast.parse({ file: "test.rs" });
905            return result;
906        }"#;
907
908        let result = exec.execute_code(code, dispatcher).await.unwrap();
909        assert_eq!(result["server"], "narsil");
910        assert_eq!(result["tool"], "ast.parse");
911        assert_eq!(result["status"], "ok");
912    }
913
914    #[tokio::test]
915    async fn multiple_tool_calls_in_single_execution() {
916        let exec = executor();
917        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
918
919        let code = r#"async () => {
920            const r1 = await forge.callTool("server1", "tool1", {});
921            const r2 = await forge.callTool("server2", "tool2", {});
922            return [r1, r2];
923        }"#;
924
925        let result = exec.execute_code(code, dispatcher).await.unwrap();
926        let arr = result.as_array().unwrap();
927        assert_eq!(arr.len(), 2);
928        assert_eq!(arr[0]["server"], "server1");
929        assert_eq!(arr[1]["server"], "server2");
930    }
931
932    #[tokio::test]
933    async fn eval_is_not_accessible() {
934        let exec = executor();
935        let manifest = serde_json::json!({});
936
937        let code = r#"async () => {
938            return typeof globalThis.eval;
939        }"#;
940
941        let result = exec.execute_search(code, &manifest).await.unwrap();
942        assert_eq!(result, "undefined");
943    }
944
945    #[tokio::test]
946    async fn function_constructor_is_blocked() {
947        let exec = executor();
948        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
949
950        // Try to access Function via prototype chain — should get undefined
951        let code = r#"async () => {
952            const ctor = forge.log.constructor;
953            return String(ctor);
954        }"#;
955
956        let result = exec.execute_code(code, dispatcher).await.unwrap();
957        assert_eq!(result, "undefined");
958    }
959
960    #[tokio::test]
961    async fn async_function_constructor_is_blocked() {
962        let exec = executor();
963        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
964
965        // Try to access AsyncFunction via prototype chain
966        let code = r#"async () => {
967            const fn1 = async () => {};
968            const ctor = fn1.constructor;
969            return String(ctor);
970        }"#;
971
972        let result = exec.execute_code(code, dispatcher).await.unwrap();
973        assert_eq!(result, "undefined");
974    }
975
976    #[tokio::test]
977    async fn large_output_is_rejected() {
978        let exec = SandboxExecutor::new(SandboxConfig {
979            max_output_size: 100,
980            ..Default::default()
981        });
982        let manifest = serde_json::json!({});
983
984        let code = r#"async () => {
985            return "x".repeat(1000);
986        }"#;
987
988        let err = exec.execute_search(code, &manifest).await.unwrap_err();
989        assert!(
990            matches!(err, SandboxError::OutputTooLarge { .. }),
991            "expected output too large, got: {err:?}"
992        );
993    }
994}
forge_sandbox/executor.rs

forge_sandbox/
executor.rs