forge_sandbox/
executor.rs

1//! Sandbox executor — creates fresh V8 isolates and runs LLM-generated code.
2//!
3//! Each execution gets a brand new runtime. No state leaks between calls.
4//!
5//! V8 isolates are `!Send`, so all JsRuntime operations run on a dedicated
6//! thread with its own single-threaded tokio runtime. The public API is
7//! fully async and `Send`-safe.
8
9use std::sync::atomic::{AtomicBool, Ordering};
10use std::sync::Arc;
11use std::time::Duration;
12
13use deno_core::{v8, JsRuntime, PollEventLoopOptions, RuntimeOptions};
14use serde_json::Value;
15use tokio::sync::Semaphore;
16
17use crate::audit::{
18    AuditEntryBuilder, AuditLogger, AuditOperation, AuditingDispatcher, AuditingResourceDispatcher,
19    AuditingStashDispatcher, NoopAuditLogger, ResourceReadAudit, StashOperationAudit,
20    ToolCallAudit,
21};
22use crate::error::SandboxError;
23use crate::ops::{
24    forge_ext, CurrentGroup, ExecutionResult, KnownServers, MaxResourceSize, ToolCallLimits,
25};
26use crate::validator::validate_code;
27use crate::{ResourceDispatcher, StashDispatcher, ToolDispatcher};
28
29/// How the sandbox executes code.
30#[derive(Debug, Clone, Default, PartialEq, Eq)]
31pub enum ExecutionMode {
32    /// Run V8 in-process on a dedicated thread (default, suitable for tests).
33    #[default]
34    InProcess,
35    /// Spawn an isolated child process per execution (production security mode).
36    ChildProcess,
37}
38
39/// Configuration for the sandbox executor.
40#[derive(Debug, Clone)]
41pub struct SandboxConfig {
42    /// Maximum execution time before the sandbox is terminated.
43    pub timeout: Duration,
44    /// Maximum size of LLM-generated code in bytes.
45    pub max_code_size: usize,
46    /// Maximum size of the JSON result in bytes.
47    pub max_output_size: usize,
48    /// V8 heap limit in bytes.
49    pub max_heap_size: usize,
50    /// Maximum concurrent sandbox executions.
51    pub max_concurrent: usize,
52    /// Maximum tool calls per execution.
53    pub max_tool_calls: usize,
54    /// Maximum size of tool call arguments in bytes.
55    pub max_tool_call_args_size: usize,
56    /// Execution mode: in-process or child-process isolation.
57    pub execution_mode: ExecutionMode,
58    /// Maximum resource content size in bytes (default: 64 MB).
59    pub max_resource_size: usize,
60    /// Maximum concurrent calls in forge.parallel() (default: 8).
61    pub max_parallel: usize,
62    /// Maximum IPC message size in bytes (default: 8 MB).
63    pub max_ipc_message_size: usize,
64}
65
66impl Default for SandboxConfig {
67    fn default() -> Self {
68        Self {
69            timeout: Duration::from_secs(5),
70            max_code_size: 64 * 1024,        // 64 KB
71            max_output_size: 1024 * 1024,    // 1 MB
72            max_heap_size: 64 * 1024 * 1024, // 64 MB
73            max_concurrent: 8,
74            max_tool_calls: 50,
75            max_tool_call_args_size: 1024 * 1024, // 1 MB
76            execution_mode: ExecutionMode::default(),
77            max_resource_size: 64 * 1024 * 1024, // 64 MB
78            max_parallel: 8,
79            max_ipc_message_size: crate::ipc::DEFAULT_MAX_IPC_MESSAGE_SIZE,
80        }
81    }
82}
83
84/// The sandbox executor. Creates fresh V8 isolates for each execution.
85///
86/// This is `Send + Sync` safe — all V8 operations are dispatched to a
87/// dedicated thread internally. A concurrency semaphore limits the number
88/// of simultaneous V8 isolates.
89pub struct SandboxExecutor {
90    config: SandboxConfig,
91    semaphore: Arc<Semaphore>,
92    audit_logger: Arc<dyn AuditLogger>,
93}
94
95impl SandboxExecutor {
96    /// Create a new sandbox executor with the given configuration.
97    pub fn new(config: SandboxConfig) -> Self {
98        let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
99        Self {
100            config,
101            semaphore,
102            audit_logger: Arc::new(NoopAuditLogger),
103        }
104    }
105
106    /// Create a new sandbox executor with an audit logger.
107    pub fn with_audit_logger(config: SandboxConfig, logger: Arc<dyn AuditLogger>) -> Self {
108        let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
109        Self {
110            config,
111            semaphore,
112            audit_logger: logger,
113        }
114    }
115
116    /// Execute a `search()` call — runs code against the capability manifest.
117    ///
118    /// The manifest is injected as `globalThis.manifest` in the sandbox.
119    /// The LLM's code is an async arrow function that queries it.
120    /// Search always runs in-process (read-only, no credential exposure risk).
121    pub async fn execute_search(
122        &self,
123        code: &str,
124        manifest: &Value,
125    ) -> Result<Value, SandboxError> {
126        tracing::info!(code_len = code.len(), "execute_search: starting");
127
128        let audit_builder = AuditEntryBuilder::new(code, AuditOperation::Search);
129
130        validate_code(code, Some(self.config.max_code_size))?;
131
132        let _permit = self.semaphore.clone().try_acquire_owned().map_err(|_| {
133            SandboxError::ConcurrencyLimit {
134                max: self.config.max_concurrent,
135            }
136        })?;
137
138        let code = code.to_string();
139        let manifest = manifest.clone();
140        let config = self.config.clone();
141
142        // V8 isolates are !Send — run everything on a dedicated thread
143        let (tx, rx) = tokio::sync::oneshot::channel();
144        std::thread::spawn(move || {
145            let rt = match tokio::runtime::Builder::new_current_thread()
146                .enable_all()
147                .build()
148            {
149                Ok(rt) => rt,
150                Err(e) => {
151                    if tx.send(Err(SandboxError::Execution(e.into()))).is_err() {
152                        tracing::warn!("sandbox result receiver dropped");
153                    }
154                    return;
155                }
156            };
157            let result = rt.block_on(run_search(&config, &code, &manifest));
158            if tx.send(result).is_err() {
159                tracing::warn!("sandbox result receiver dropped before result was sent");
160            }
161        });
162
163        let result = rx
164            .await
165            .map_err(|_| SandboxError::Execution(anyhow::anyhow!("sandbox thread panicked")))?;
166
167        // Emit audit entry
168        let entry = audit_builder.finish(&result);
169        self.audit_logger.log(&entry).await;
170
171        match &result {
172            Ok(_) => tracing::info!("execute_search: complete"),
173            Err(e) => tracing::warn!(error = %e, "execute_search: failed"),
174        }
175
176        result
177    }
178
179    /// Execute an `execute()` call — runs code against the tool API.
180    ///
181    /// Tool calls go through `forge.callTool(server, tool, args)` which
182    /// dispatches to the Rust-side ToolDispatcher via `op_forge_call_tool`.
183    /// Resource reads go through `forge.readResource(server, uri)` which
184    /// dispatches to the Rust-side ResourceDispatcher via `op_forge_read_resource`.
185    ///
186    /// In `ChildProcess` mode, spawns an isolated worker process. In `InProcess`
187    /// mode (default), runs V8 on a dedicated thread in the current process.
188    pub async fn execute_code(
189        &self,
190        code: &str,
191        dispatcher: Arc<dyn ToolDispatcher>,
192        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
193        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
194    ) -> Result<Value, SandboxError> {
195        self.execute_code_with_options(
196            code,
197            dispatcher,
198            resource_dispatcher,
199            stash_dispatcher,
200            None,
201        )
202        .await
203    }
204
205    /// Execute code with additional options (known servers for SR-R6 validation).
206    pub async fn execute_code_with_options(
207        &self,
208        code: &str,
209        dispatcher: Arc<dyn ToolDispatcher>,
210        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
211        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
212        known_servers: Option<std::collections::HashSet<String>>,
213    ) -> Result<Value, SandboxError> {
214        tracing::info!(
215            code_len = code.len(),
216            mode = ?self.config.execution_mode,
217            "execute_code: starting"
218        );
219
220        let mut audit_builder = AuditEntryBuilder::new(code, AuditOperation::Execute);
221
222        validate_code(code, Some(self.config.max_code_size))?;
223
224        let _permit = self.semaphore.clone().try_acquire_owned().map_err(|_| {
225            SandboxError::ConcurrencyLimit {
226                max: self.config.max_concurrent,
227            }
228        })?;
229
230        // Wrap dispatcher with audit tracking
231        let (audit_tx, mut audit_rx) = tokio::sync::mpsc::unbounded_channel::<ToolCallAudit>();
232        let auditing_dispatcher: Arc<dyn ToolDispatcher> =
233            Arc::new(AuditingDispatcher::new(dispatcher, audit_tx));
234
235        // Wrap resource dispatcher with audit tracking
236        let (resource_audit_tx, mut resource_audit_rx) =
237            tokio::sync::mpsc::unbounded_channel::<ResourceReadAudit>();
238        let auditing_resource_dispatcher = resource_dispatcher.map(|rd| {
239            Arc::new(AuditingResourceDispatcher::new(rd, resource_audit_tx))
240                as Arc<dyn ResourceDispatcher>
241        });
242
243        // Wrap stash dispatcher with audit tracking
244        let (stash_audit_tx, mut stash_audit_rx) =
245            tokio::sync::mpsc::unbounded_channel::<StashOperationAudit>();
246        let auditing_stash_dispatcher = stash_dispatcher.map(|sd| {
247            Arc::new(AuditingStashDispatcher::new(sd, stash_audit_tx)) as Arc<dyn StashDispatcher>
248        });
249
250        let result = match self.config.execution_mode {
251            ExecutionMode::ChildProcess => {
252                crate::host::SandboxHost::execute_in_child(
253                    code,
254                    &self.config,
255                    auditing_dispatcher,
256                    auditing_resource_dispatcher,
257                    auditing_stash_dispatcher,
258                )
259                .await
260            }
261            ExecutionMode::InProcess => {
262                self.execute_code_in_process(
263                    code,
264                    auditing_dispatcher,
265                    auditing_resource_dispatcher,
266                    auditing_stash_dispatcher,
267                    known_servers,
268                )
269                .await
270            }
271        };
272
273        // Collect tool call audits
274        while let Ok(tool_audit) = audit_rx.try_recv() {
275            audit_builder.record_tool_call(tool_audit);
276        }
277
278        // Collect resource read audits
279        while let Ok(resource_audit) = resource_audit_rx.try_recv() {
280            audit_builder.record_resource_read(resource_audit);
281        }
282
283        // Collect stash operation audits
284        while let Ok(stash_audit) = stash_audit_rx.try_recv() {
285            audit_builder.record_stash_op(stash_audit);
286        }
287
288        // Emit audit entry
289        let entry = audit_builder.finish(&result);
290        self.audit_logger.log(&entry).await;
291
292        match &result {
293            Ok(_) => tracing::info!("execute_code: complete"),
294            Err(e) => tracing::warn!(error = %e, "execute_code: failed"),
295        }
296
297        result
298    }
299
300    /// In-process execution: spawn a dedicated thread with its own V8 isolate.
301    async fn execute_code_in_process(
302        &self,
303        code: &str,
304        dispatcher: Arc<dyn ToolDispatcher>,
305        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
306        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
307        known_servers: Option<std::collections::HashSet<String>>,
308    ) -> Result<Value, SandboxError> {
309        let code = code.to_string();
310        let config = self.config.clone();
311
312        let (tx, rx) = tokio::sync::oneshot::channel();
313        std::thread::spawn(move || {
314            let rt = match tokio::runtime::Builder::new_current_thread()
315                .enable_all()
316                .build()
317            {
318                Ok(rt) => rt,
319                Err(e) => {
320                    if tx.send(Err(SandboxError::Execution(e.into()))).is_err() {
321                        tracing::warn!("sandbox result receiver dropped");
322                    }
323                    return;
324                }
325            };
326            let result = rt.block_on(run_execute_with_known_servers(
327                &config,
328                &code,
329                dispatcher,
330                resource_dispatcher,
331                stash_dispatcher,
332                known_servers,
333            ));
334            if tx.send(result).is_err() {
335                tracing::warn!("sandbox result receiver dropped before result was sent");
336            }
337        });
338
339        rx.await
340            .map_err(|_| SandboxError::Execution(anyhow::anyhow!("sandbox thread panicked")))?
341    }
342}
343
344/// State for the near-heap-limit callback.
345struct HeapLimitState {
346    handle: v8::IsolateHandle,
347    /// Whether the heap limit has been triggered. Uses AtomicBool so the callback
348    /// can use a shared `&` reference instead of `&mut`, eliminating aliasing concerns.
349    triggered: AtomicBool,
350}
351
352/// V8 near-heap-limit callback. Terminates execution and grants 1MB grace
353/// for the termination to propagate cleanly.
354extern "C" fn near_heap_limit_callback(
355    data: *mut std::ffi::c_void,
356    current_heap_limit: usize,
357    _initial_heap_limit: usize,
358) -> usize {
359    // SAFETY: `data` points to `heap_state` (Box<HeapLimitState>) allocated below.
360    // The Box outlives this callback because: (1) the watchdog thread is joined
361    // before heap_state is dropped, and (2) V8 only invokes this callback while the
362    // isolate's event loop is running, which completes before the join.
363    // We use a shared `&` reference (not `&mut`) because `triggered` is AtomicBool,
364    // so no aliasing concerns even if V8 were to call this callback re-entrantly.
365    let state = unsafe { &*(data as *const HeapLimitState) };
366    if !state.triggered.swap(true, Ordering::SeqCst) {
367        state.handle.terminate_execution();
368    }
369    // Grant 1MB grace so the termination exception can propagate
370    current_heap_limit + 1024 * 1024
371}
372
373/// Run a search operation on the current thread (must be called from a
374/// dedicated thread, not the main tokio runtime).
375///
376/// Public for reuse in the worker binary.
377pub async fn run_search(
378    config: &SandboxConfig,
379    code: &str,
380    manifest: &Value,
381) -> Result<Value, SandboxError> {
382    let mut runtime = create_runtime(None, None, config.max_heap_size, None, None, None, None)?;
383
384    // Inject the manifest as a global
385    let manifest_json = serde_json::to_string(manifest)?;
386    let bootstrap = format!("globalThis.manifest = {};", manifest_json);
387    runtime
388        .execute_script("[forge:manifest]", bootstrap)
389        .map_err(|e| SandboxError::JsError {
390            message: e.to_string(),
391        })?;
392
393    // Bootstrap: capture ops in closures, create minimal forge object, delete Deno,
394    // and remove dangerous code generation primitives.
395    runtime
396        .execute_script(
397            "[forge:bootstrap]",
398            r#"
399                ((ops) => {
400                    const setResult = (json) => ops.op_forge_set_result(json);
401                    const log = (msg) => ops.op_forge_log(String(msg));
402                    globalThis.forge = Object.freeze({
403                        __setResult: setResult,
404                        log: log,
405                    });
406                    delete globalThis.Deno;
407
408                    // Remove code generation primitives to prevent prototype chain attacks.
409                    // Even with the validator banning eval( and Function(, an attacker could
410                    // reach Function via forge.log.constructor or similar prototype chain access.
411                    delete globalThis.eval;
412                    const AsyncFunction = (async function(){}).constructor;
413                    const GeneratorFunction = (function*(){}).constructor;
414                    Object.defineProperty(Function.prototype, 'constructor', {
415                        value: undefined, configurable: false, writable: false
416                    });
417                    Object.defineProperty(AsyncFunction.prototype, 'constructor', {
418                        value: undefined, configurable: false, writable: false
419                    });
420                    Object.defineProperty(GeneratorFunction.prototype, 'constructor', {
421                        value: undefined, configurable: false, writable: false
422                    });
423                })(Deno.core.ops);
424            "#,
425        )
426        .map_err(|e| SandboxError::JsError {
427            message: e.to_string(),
428        })?;
429
430    run_user_code(&mut runtime, code, config).await
431}
432
433/// Run an execute operation on the current thread.
434///
435/// Public for reuse in the worker binary.
436pub async fn run_execute(
437    config: &SandboxConfig,
438    code: &str,
439    dispatcher: Arc<dyn ToolDispatcher>,
440    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
441    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
442) -> Result<Value, SandboxError> {
443    run_execute_with_known_servers(
444        config,
445        code,
446        dispatcher,
447        resource_dispatcher,
448        stash_dispatcher,
449        None,
450    )
451    .await
452}
453
454/// Run an execute operation with an optional set of known server names for SR-R6 validation.
455pub async fn run_execute_with_known_servers(
456    config: &SandboxConfig,
457    code: &str,
458    dispatcher: Arc<dyn ToolDispatcher>,
459    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
460    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
461    known_servers: Option<std::collections::HashSet<String>>,
462) -> Result<Value, SandboxError> {
463    let limits = ToolCallLimits {
464        max_calls: config.max_tool_calls,
465        max_args_size: config.max_tool_call_args_size,
466        calls_made: 0,
467    };
468    let mut runtime = create_runtime(
469        Some(dispatcher),
470        resource_dispatcher.clone(),
471        config.max_heap_size,
472        Some(limits),
473        Some(config.max_resource_size),
474        stash_dispatcher.clone(),
475        known_servers,
476    )?;
477
478    // Determine which capabilities are available
479    let has_resource_dispatcher = resource_dispatcher.is_some();
480    let has_stash_dispatcher = stash_dispatcher.is_some();
481
482    // Bootstrap: capture ops in closures, create full forge API, delete Deno,
483    // and remove dangerous code generation primitives.
484    // User code accesses tools via forge.callTool() or forge.server("x").cat.tool().
485    // Conditionally includes readResource and stash based on available dispatchers.
486    let bootstrap = build_execute_bootstrap(
487        has_resource_dispatcher,
488        has_stash_dispatcher,
489        config.max_parallel,
490    );
491
492    runtime
493        .execute_script("[forge:bootstrap]", bootstrap)
494        .map_err(|e| SandboxError::JsError {
495            message: e.to_string(),
496        })?;
497
498    run_user_code(&mut runtime, code, config).await
499}
500
501/// Build the bootstrap JavaScript for execute mode.
502///
503/// Conditionally includes `readResource` and `stash` APIs based on which
504/// dispatchers are available.
505fn build_execute_bootstrap(has_resource: bool, has_stash: bool, max_parallel: usize) -> String {
506    let mut parts = Vec::new();
507
508    // Always available ops + frozen concurrency cap
509    parts.push(format!(
510        r#"((ops) => {{
511                    const callToolOp = ops.op_forge_call_tool;
512                    const setResult = (json) => ops.op_forge_set_result(json);
513                    const log = (msg) => ops.op_forge_log(String(msg));
514                    const __MAX_PARALLEL = Object.freeze({max_parallel});
515
516                    const callTool = async (server, tool, args) => {{
517                        const resultJson = await callToolOp(
518                            server, tool, JSON.stringify(args || {{}})
519                        );
520                        return JSON.parse(resultJson);
521                    }};"#
522    ));
523
524    // readResource binding (conditional)
525    if has_resource {
526        parts.push(
527            r#"
528                    const readResourceOp = ops.op_forge_read_resource;
529                    const readResource = async (server, uri) => {
530                        const resultJson = await readResourceOp(server, uri);
531                        return JSON.parse(resultJson);
532                    };"#
533            .to_string(),
534        );
535    }
536
537    // stash bindings (conditional)
538    if has_stash {
539        parts.push(
540            r#"
541                    const stashPutOp = ops.op_forge_stash_put;
542                    const stashGetOp = ops.op_forge_stash_get;
543                    const stashDeleteOp = ops.op_forge_stash_delete;
544                    const stashKeysOp = ops.op_forge_stash_keys;"#
545                .to_string(),
546        );
547    }
548
549    // Build the forge object properties
550    let mut forge_props = vec![
551        "                        __setResult: setResult".to_string(),
552        "                        log: log".to_string(),
553        "                        callTool: callTool".to_string(),
554    ];
555
556    if has_resource {
557        forge_props.push("                        readResource: readResource".to_string());
558    }
559
560    if has_stash {
561        forge_props.push(
562            r#"                        stash: Object.freeze({
563                            put: async (key, value, opts) => {
564                                const ttl = (opts && opts.ttl) ? opts.ttl : 0;
565                                const resultJson = await stashPutOp(key, JSON.stringify(value), ttl);
566                                return JSON.parse(resultJson);
567                            },
568                            get: async (key) => {
569                                const resultJson = await stashGetOp(key);
570                                return JSON.parse(resultJson);
571                            },
572                            delete: async (key) => {
573                                const resultJson = await stashDeleteOp(key);
574                                return JSON.parse(resultJson);
575                            },
576                            keys: async () => {
577                                const resultJson = await stashKeysOp();
578                                return JSON.parse(resultJson);
579                            }
580                        })"#
581            .to_string(),
582        );
583    }
584
585    // server proxy is always included
586    forge_props.push(
587        r#"                        server: (name) => {
588                            return new Proxy({}, {
589                                get(_target, category) {
590                                    return new Proxy({}, {
591                                        get(_target2, tool) {
592                                            return async (args) => {
593                                                return callTool(
594                                                    name,
595                                                    `${category}.${tool}`,
596                                                    args || {}
597                                                );
598                                            };
599                                        }
600                                    });
601                                }
602                            });
603                        }"#
604        .to_string(),
605    );
606
607    // forge.parallel() — bounded concurrency wrapper over callTool/readResource
608    forge_props.push(
609        r#"                        parallel: async (calls, opts) => {
610                            opts = opts || {};
611                            const concurrency = Math.min(
612                                opts.concurrency || __MAX_PARALLEL,
613                                __MAX_PARALLEL
614                            );
615                            const failFast = opts.failFast || false;
616                            const results = new Array(calls.length).fill(null);
617                            const errors = [];
618                            let aborted = false;
619
620                            for (let i = 0; i < calls.length && !aborted; i += concurrency) {
621                                const batch = calls.slice(i, i + concurrency);
622                                await Promise.allSettled(
623                                    batch.map((fn, idx) => fn().then(
624                                        val => { results[i + idx] = val; },
625                                        err => {
626                                            errors.push({ index: i + idx, error: err.message || String(err) });
627                                            if (failFast) aborted = true;
628                                        }
629                                    ))
630                                );
631                            }
632
633                            return { results, errors, aborted };
634                        }"#
635        .to_string(),
636    );
637
638    let forge_obj = format!(
639        r#"
640                    globalThis.forge = Object.freeze({{
641{}
642                    }});"#,
643        forge_props.join(",\n")
644    );
645    parts.push(forge_obj);
646
647    // Security: remove dangerous globals
648    parts.push(
649        r#"
650                    delete globalThis.Deno;
651
652                    // Remove code generation primitives to prevent prototype chain attacks.
653                    delete globalThis.eval;
654                    const AsyncFunction = (async function(){}).constructor;
655                    const GeneratorFunction = (function*(){}).constructor;
656                    Object.defineProperty(Function.prototype, 'constructor', {
657                        value: undefined, configurable: false, writable: false
658                    });
659                    Object.defineProperty(AsyncFunction.prototype, 'constructor', {
660                        value: undefined, configurable: false, writable: false
661                    });
662                    Object.defineProperty(GeneratorFunction.prototype, 'constructor', {
663                        value: undefined, configurable: false, writable: false
664                    });
665                })(Deno.core.ops);"#
666            .to_string(),
667    );
668
669    parts.join("\n")
670}
671
672/// Create a fresh JsRuntime with the forge extension loaded and V8 heap limits set.
673pub(crate) fn create_runtime(
674    dispatcher: Option<Arc<dyn ToolDispatcher>>,
675    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
676    max_heap_size: usize,
677    tool_call_limits: Option<ToolCallLimits>,
678    max_resource_size: Option<usize>,
679    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
680    known_servers: Option<std::collections::HashSet<String>>,
681) -> Result<JsRuntime, SandboxError> {
682    let create_params = v8::CreateParams::default().heap_limits(0, max_heap_size);
683
684    let runtime = JsRuntime::new(RuntimeOptions {
685        extensions: vec![forge_ext::init()],
686        create_params: Some(create_params),
687        ..Default::default()
688    });
689
690    if let Some(d) = dispatcher {
691        runtime.op_state().borrow_mut().put(d);
692    }
693    if let Some(rd) = resource_dispatcher {
694        runtime.op_state().borrow_mut().put(rd);
695    }
696    if let Some(limits) = tool_call_limits {
697        runtime.op_state().borrow_mut().put(limits);
698    }
699    if let Some(size) = max_resource_size {
700        runtime.op_state().borrow_mut().put(MaxResourceSize(size));
701    }
702    if let Some(sd) = stash_dispatcher {
703        runtime.op_state().borrow_mut().put(sd);
704        // CurrentGroup defaults to None; the ForgeServer level sets the actual group
705        runtime.op_state().borrow_mut().put(CurrentGroup(None));
706    }
707    if let Some(servers) = known_servers {
708        runtime.op_state().borrow_mut().put(KnownServers(servers));
709    }
710
711    Ok(runtime)
712}
713
714/// Wrap the user's async arrow function, execute it, and extract the result.
715///
716/// Sets up a CPU watchdog thread and near-heap-limit callback before running
717/// user code. The watchdog terminates V8 execution if the timeout elapses
718/// (handles CPU-bound infinite loops). The heap callback terminates execution
719/// if V8 approaches the heap limit (prevents OOM abort).
720async fn run_user_code(
721    runtime: &mut JsRuntime,
722    code: &str,
723    config: &SandboxConfig,
724) -> Result<Value, SandboxError> {
725    // --- Set up heap limit callback ---
726    let heap_state = Box::new(HeapLimitState {
727        handle: runtime.v8_isolate().thread_safe_handle(),
728        triggered: AtomicBool::new(false),
729    });
730    runtime.v8_isolate().add_near_heap_limit_callback(
731        near_heap_limit_callback,
732        &*heap_state as *const HeapLimitState as *mut std::ffi::c_void,
733    );
734
735    // --- Set up CPU watchdog ---
736    let watchdog_handle = runtime.v8_isolate().thread_safe_handle();
737    let timed_out = Arc::new(AtomicBool::new(false));
738    let watchdog_timed_out = timed_out.clone();
739    let timeout = config.timeout;
740    let (cancel_tx, cancel_rx) = std::sync::mpsc::channel::<()>();
741
742    let watchdog = std::thread::spawn(move || {
743        if let Err(std::sync::mpsc::RecvTimeoutError::Timeout) = cancel_rx.recv_timeout(timeout) {
744            watchdog_timed_out.store(true, Ordering::SeqCst);
745            watchdog_handle.terminate_execution();
746        }
747    });
748
749    // --- Execute user code ---
750    let wrapped = format!(
751        r#"
752        (async () => {{
753            try {{
754                const __userFn = {code};
755                const __result = await __userFn();
756                forge.__setResult(
757                    JSON.stringify({{ ok: __result }})
758                );
759            }} catch (e) {{
760                forge.__setResult(
761                    JSON.stringify({{ error: e.message || String(e) }})
762                );
763            }}
764        }})();
765        "#
766    );
767
768    let exec_error = match runtime.execute_script("[forge:execute]", wrapped) {
769        Ok(_) => {
770            // Drive the event loop to resolve async operations
771            match tokio::time::timeout(
772                config.timeout,
773                runtime.run_event_loop(PollEventLoopOptions::default()),
774            )
775            .await
776            {
777                Ok(Ok(())) => None,
778                Ok(Err(e)) => Some(e.to_string()),
779                Err(_) => Some("async timeout".to_string()),
780            }
781        }
782        Err(e) => Some(e.to_string()),
783    };
784
785    // --- Cleanup: cancel watchdog and wait for it to exit ---
786    // This ensures the watchdog thread is done before we drop the runtime,
787    // preventing use-after-free on the IsolateHandle.
788    let _ = cancel_tx.send(());
789    let _ = watchdog.join();
790
791    // --- Check error causes in priority order ---
792    if heap_state.triggered.load(Ordering::SeqCst) {
793        return Err(SandboxError::HeapLimitExceeded);
794    }
795
796    if timed_out.load(Ordering::SeqCst) {
797        return Err(SandboxError::Timeout {
798            timeout_ms: config.timeout.as_millis() as u64,
799        });
800    }
801
802    if let Some(err_msg) = exec_error {
803        return Err(SandboxError::JsError { message: err_msg });
804    }
805
806    // --- Extract result from OpState ---
807    let result_str = {
808        let state = runtime.op_state();
809        let state = state.borrow();
810        state
811            .try_borrow::<ExecutionResult>()
812            .map(|r| r.0.clone())
813            .ok_or_else(|| SandboxError::JsError {
814                message: "no result returned from sandbox execution".into(),
815            })?
816    };
817
818    if result_str.len() > config.max_output_size {
819        return Err(SandboxError::OutputTooLarge {
820            max: config.max_output_size,
821        });
822    }
823
824    let envelope: Value = serde_json::from_str(&result_str)?;
825
826    if let Some(error) = envelope.get("error") {
827        return Err(SandboxError::JsError {
828            message: error.as_str().unwrap_or("unknown error").to_string(),
829        });
830    }
831
832    Ok(envelope.get("ok").cloned().unwrap_or(Value::Null))
833}
834
835#[cfg(test)]
836mod tests {
837    use super::*;
838
839    fn executor() -> SandboxExecutor {
840        SandboxExecutor::new(SandboxConfig::default())
841    }
842
843    /// Test dispatcher that echoes back the server/tool/args.
844    struct TestDispatcher;
845
846    #[async_trait::async_trait]
847    impl ToolDispatcher for TestDispatcher {
848        async fn call_tool(
849            &self,
850            server: &str,
851            tool: &str,
852            args: serde_json::Value,
853        ) -> Result<serde_json::Value, anyhow::Error> {
854            Ok(serde_json::json!({
855                "server": server,
856                "tool": tool,
857                "args": args,
858                "status": "ok"
859            }))
860        }
861    }
862
863    #[tokio::test]
864    async fn search_returns_manifest_data() {
865        let exec = executor();
866        let manifest = serde_json::json!({
867            "tools": [
868                {"name": "parse_ast", "category": "ast"},
869                {"name": "find_symbols", "category": "symbols"},
870            ]
871        });
872
873        let code = r#"async () => {
874            return manifest.tools.filter(t => t.category === "ast");
875        }"#;
876
877        let result = exec.execute_search(code, &manifest).await.unwrap();
878        let tools = result.as_array().unwrap();
879        assert_eq!(tools.len(), 1);
880        assert_eq!(tools[0]["name"], "parse_ast");
881    }
882
883    #[tokio::test]
884    async fn search_handles_complex_queries() {
885        let exec = executor();
886        let manifest = serde_json::json!({
887            "servers": [
888                {
889                    "name": "narsil",
890                    "categories": {
891                        "ast": { "tools": ["parse", "query", "walk"] },
892                        "symbols": { "tools": ["find", "references"] }
893                    }
894                }
895            ]
896        });
897
898        let code = r#"async () => {
899            return manifest.servers
900                .map(s => ({ name: s.name, categories: Object.keys(s.categories) }));
901        }"#;
902
903        let result = exec.execute_search(code, &manifest).await.unwrap();
904        let servers = result.as_array().unwrap();
905        assert_eq!(servers[0]["name"], "narsil");
906    }
907
908    #[tokio::test]
909    async fn timeout_is_enforced() {
910        let exec = SandboxExecutor::new(SandboxConfig {
911            timeout: Duration::from_millis(200),
912            ..Default::default()
913        });
914        let manifest = serde_json::json!({});
915
916        // A never-resolving promise should trigger a timeout
917        let code = r#"async () => {
918            await new Promise(() => {});
919        }"#;
920
921        let start = std::time::Instant::now();
922        let err = exec.execute_search(code, &manifest).await.unwrap_err();
923        let elapsed = start.elapsed();
924
925        // Should be a timeout or a "no result" error (the event loop completes
926        // when there are no more pending ops, even if the promise is unresolved)
927        match &err {
928            SandboxError::Timeout { .. } => {}
929            SandboxError::JsError { message } if message.contains("no result") => {
930                // deno_core's event loop exits when there are no pending ops,
931                // so the never-resolving promise doesn't actually block
932            }
933            other => panic!("unexpected error: {other:?}, elapsed: {elapsed:?}"),
934        }
935    }
936
937    #[tokio::test]
938    async fn js_errors_are_captured() {
939        let exec = executor();
940        let manifest = serde_json::json!({});
941
942        let code = r#"async () => {
943            throw new Error("intentional test error");
944        }"#;
945
946        let err = exec.execute_search(code, &manifest).await.unwrap_err();
947        assert!(matches!(err, SandboxError::JsError { .. }));
948        let msg = err.to_string();
949        assert!(msg.contains("intentional test error"));
950    }
951
952    #[tokio::test]
953    async fn no_filesystem_access() {
954        let exec = executor();
955        let manifest = serde_json::json!({});
956
957        // require() is a banned pattern — caught by validator
958        let code = r#"async () => {
959            const fs = require("fs");
960            return "ESCAPED";
961        }"#;
962
963        let err = exec.execute_search(code, &manifest).await;
964        assert!(err.is_err());
965    }
966
967    #[tokio::test]
968    async fn no_network_access() {
969        let exec = executor();
970        let manifest = serde_json::json!({});
971
972        let code = r#"async () => {
973            try {
974                await fetch("https://example.com");
975                return "ESCAPED";
976            } catch(e) {
977                return "CONTAINED";
978            }
979        }"#;
980
981        let result = exec.execute_search(code, &manifest).await.unwrap();
982        assert_eq!(result, "CONTAINED");
983    }
984
985    // --- WU4 new tests ---
986
987    #[tokio::test]
988    async fn cpu_bound_infinite_loop_is_terminated() {
989        let exec = SandboxExecutor::new(SandboxConfig {
990            timeout: Duration::from_millis(500),
991            ..Default::default()
992        });
993        let manifest = serde_json::json!({});
994
995        let code = r#"async () => {
996            while(true) {}
997        }"#;
998
999        let start = std::time::Instant::now();
1000        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1001        let elapsed = start.elapsed();
1002
1003        assert!(
1004            matches!(err, SandboxError::Timeout { .. }),
1005            "expected timeout, got: {err:?}"
1006        );
1007        assert!(
1008            elapsed < Duration::from_secs(5),
1009            "should complete reasonably fast, took: {elapsed:?}"
1010        );
1011    }
1012
1013    #[tokio::test]
1014    async fn heap_limit_prevents_oom() {
1015        let exec = SandboxExecutor::new(SandboxConfig {
1016            max_heap_size: 10 * 1024 * 1024,  // 10 MB
1017            timeout: Duration::from_secs(30), // Long timeout so heap fills first
1018            ..Default::default()
1019        });
1020        let manifest = serde_json::json!({});
1021
1022        // Rapidly allocate memory to exceed the heap limit
1023        let code = r#"async () => {
1024            const arr = [];
1025            while(true) {
1026                arr.push(new Array(100000).fill("x"));
1027            }
1028        }"#;
1029
1030        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1031        assert!(
1032            matches!(
1033                err,
1034                SandboxError::HeapLimitExceeded | SandboxError::JsError { .. }
1035            ),
1036            "expected heap limit or JS error, got: {err:?}"
1037        );
1038    }
1039
1040    #[tokio::test]
1041    async fn concurrency_limit_enforced() {
1042        // Use max_concurrent=0 so no executions are allowed (deterministic test)
1043        let exec = SandboxExecutor::new(SandboxConfig {
1044            max_concurrent: 0,
1045            ..Default::default()
1046        });
1047
1048        let code = r#"async () => { return 1; }"#;
1049        let err = exec
1050            .execute_search(code, &serde_json::json!({}))
1051            .await
1052            .unwrap_err();
1053        assert!(
1054            matches!(err, SandboxError::ConcurrencyLimit { max: 0 }),
1055            "expected concurrency limit, got: {err:?}"
1056        );
1057    }
1058
1059    #[tokio::test]
1060    async fn deno_global_is_not_accessible() {
1061        let exec = executor();
1062        let manifest = serde_json::json!({});
1063
1064        let code = r#"async () => {
1065            const props = Object.getOwnPropertyNames(globalThis);
1066            return !props.includes("Deno");
1067        }"#;
1068
1069        let result = exec.execute_search(code, &manifest).await.unwrap();
1070        assert_eq!(result, true);
1071    }
1072
1073    #[tokio::test]
1074    async fn forge_object_is_frozen() {
1075        let exec = executor();
1076        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1077
1078        let code = r#"async () => {
1079            return Object.isFrozen(forge);
1080        }"#;
1081
1082        let result = exec
1083            .execute_code(code, dispatcher, None, None)
1084            .await
1085            .unwrap();
1086        assert_eq!(result, true);
1087    }
1088
1089    #[tokio::test]
1090    async fn tool_call_rate_limit() {
1091        let exec = SandboxExecutor::new(SandboxConfig {
1092            max_tool_calls: 2,
1093            ..Default::default()
1094        });
1095        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1096
1097        let code = r#"async () => {
1098            await forge.callTool("test", "tool1", {});
1099            await forge.callTool("test", "tool2", {});
1100            try {
1101                await forge.callTool("test", "tool3", {});
1102                return "should not reach here";
1103            } catch(e) {
1104                return e.message;
1105            }
1106        }"#;
1107
1108        let result = exec
1109            .execute_code(code, dispatcher, None, None)
1110            .await
1111            .unwrap();
1112        assert!(
1113            result
1114                .as_str()
1115                .unwrap()
1116                .contains("tool call limit exceeded"),
1117            "expected tool call limit message, got: {result:?}"
1118        );
1119    }
1120
1121    #[tokio::test]
1122    async fn tool_call_args_size_limit() {
1123        let exec = SandboxExecutor::new(SandboxConfig {
1124            max_tool_call_args_size: 100,
1125            ..Default::default()
1126        });
1127        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1128
1129        let code = r#"async () => {
1130            try {
1131                await forge.callTool("test", "tool", { data: "x".repeat(200) });
1132                return "should not reach here";
1133            } catch(e) {
1134                return e.message;
1135            }
1136        }"#;
1137
1138        let result = exec
1139            .execute_code(code, dispatcher, None, None)
1140            .await
1141            .unwrap();
1142        assert!(
1143            result.as_str().unwrap().contains("too large"),
1144            "expected args too large message, got: {result:?}"
1145        );
1146    }
1147
1148    #[tokio::test]
1149    async fn forge_log_works() {
1150        let exec = executor();
1151        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1152
1153        let code = r#"async () => {
1154            forge.log("test message from sandbox");
1155            return "ok";
1156        }"#;
1157
1158        let result = exec
1159            .execute_code(code, dispatcher, None, None)
1160            .await
1161            .unwrap();
1162        assert_eq!(result, "ok");
1163    }
1164
1165    #[tokio::test]
1166    async fn forge_server_proxy_calls_tool() {
1167        let exec = executor();
1168        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1169
1170        let code = r#"async () => {
1171            const result = await forge.server("narsil").ast.parse({ file: "test.rs" });
1172            return result;
1173        }"#;
1174
1175        let result = exec
1176            .execute_code(code, dispatcher, None, None)
1177            .await
1178            .unwrap();
1179        assert_eq!(result["server"], "narsil");
1180        assert_eq!(result["tool"], "ast.parse");
1181        assert_eq!(result["status"], "ok");
1182    }
1183
1184    #[tokio::test]
1185    async fn multiple_tool_calls_in_single_execution() {
1186        let exec = executor();
1187        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1188
1189        let code = r#"async () => {
1190            const r1 = await forge.callTool("server1", "tool1", {});
1191            const r2 = await forge.callTool("server2", "tool2", {});
1192            return [r1, r2];
1193        }"#;
1194
1195        let result = exec
1196            .execute_code(code, dispatcher, None, None)
1197            .await
1198            .unwrap();
1199        let arr = result.as_array().unwrap();
1200        assert_eq!(arr.len(), 2);
1201        assert_eq!(arr[0]["server"], "server1");
1202        assert_eq!(arr[1]["server"], "server2");
1203    }
1204
1205    #[tokio::test]
1206    async fn eval_is_not_accessible() {
1207        let exec = executor();
1208        let manifest = serde_json::json!({});
1209
1210        let code = r#"async () => {
1211            return typeof globalThis.eval;
1212        }"#;
1213
1214        let result = exec.execute_search(code, &manifest).await.unwrap();
1215        assert_eq!(result, "undefined");
1216    }
1217
1218    #[tokio::test]
1219    async fn function_constructor_is_blocked() {
1220        let exec = executor();
1221        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1222
1223        // Try to access Function via prototype chain — should get undefined
1224        let code = r#"async () => {
1225            const ctor = forge.log.constructor;
1226            return String(ctor);
1227        }"#;
1228
1229        let result = exec
1230            .execute_code(code, dispatcher, None, None)
1231            .await
1232            .unwrap();
1233        assert_eq!(result, "undefined");
1234    }
1235
1236    #[tokio::test]
1237    async fn async_function_constructor_is_blocked() {
1238        let exec = executor();
1239        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1240
1241        // Try to access AsyncFunction via prototype chain
1242        let code = r#"async () => {
1243            const fn1 = async () => {};
1244            const ctor = fn1.constructor;
1245            return String(ctor);
1246        }"#;
1247
1248        let result = exec
1249            .execute_code(code, dispatcher, None, None)
1250            .await
1251            .unwrap();
1252        assert_eq!(result, "undefined");
1253    }
1254
1255    // --- v0.2 Resource read test dispatchers ---
1256
1257    /// Resource dispatcher that echoes back server/uri.
1258    struct TestResourceDispatcher;
1259
1260    #[async_trait::async_trait]
1261    impl ResourceDispatcher for TestResourceDispatcher {
1262        async fn read_resource(
1263            &self,
1264            server: &str,
1265            uri: &str,
1266        ) -> Result<serde_json::Value, anyhow::Error> {
1267            Ok(serde_json::json!({
1268                "server": server,
1269                "uri": uri,
1270                "content": "test resource content"
1271            }))
1272        }
1273    }
1274
1275    /// Resource dispatcher that returns a large payload.
1276    struct LargeResourceDispatcher {
1277        content_size: usize,
1278    }
1279
1280    #[async_trait::async_trait]
1281    impl ResourceDispatcher for LargeResourceDispatcher {
1282        async fn read_resource(
1283            &self,
1284            _server: &str,
1285            _uri: &str,
1286        ) -> Result<serde_json::Value, anyhow::Error> {
1287            Ok(serde_json::json!({
1288                "data": "x".repeat(self.content_size)
1289            }))
1290        }
1291    }
1292
1293    /// Resource dispatcher that always fails with a configurable error.
1294    struct FailingResourceDispatcher {
1295        error_msg: String,
1296    }
1297
1298    #[async_trait::async_trait]
1299    impl ResourceDispatcher for FailingResourceDispatcher {
1300        async fn read_resource(
1301            &self,
1302            _server: &str,
1303            _uri: &str,
1304        ) -> Result<serde_json::Value, anyhow::Error> {
1305            Err(anyhow::anyhow!("{}", self.error_msg))
1306        }
1307    }
1308
1309    // --- RS-U01: readResource routes to correct server ---
1310    #[tokio::test]
1311    async fn rs_u01_read_resource_routes_to_correct_server() {
1312        let exec = executor();
1313        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1314        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1315            Some(Arc::new(TestResourceDispatcher));
1316
1317        let code = r#"async () => {
1318            const result = await forge.readResource("my-server", "file:///logs/app.log");
1319            return result;
1320        }"#;
1321
1322        let result = exec
1323            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1324            .await
1325            .unwrap();
1326        assert_eq!(result["server"], "my-server");
1327        assert_eq!(result["uri"], "file:///logs/app.log");
1328        assert_eq!(result["content"], "test resource content");
1329    }
1330
1331    // --- RS-U02: readResource increments ToolCallLimits.calls_made ---
1332    #[tokio::test]
1333    async fn rs_u02_read_resource_shares_rate_limit_with_tool_calls() {
1334        let exec = SandboxExecutor::new(SandboxConfig {
1335            max_tool_calls: 3,
1336            ..Default::default()
1337        });
1338        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1339        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1340            Some(Arc::new(TestResourceDispatcher));
1341
1342        // 1 tool call + 2 resource reads = 3 (limit), then 4th fails
1343        let code = r#"async () => {
1344            await forge.callTool("s", "t", {});
1345            await forge.readResource("s", "file:///a");
1346            await forge.readResource("s", "file:///b");
1347            try {
1348                await forge.readResource("s", "file:///c");
1349                return "should not reach here";
1350            } catch(e) {
1351                return e.message;
1352            }
1353        }"#;
1354
1355        let result = exec
1356            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1357            .await
1358            .unwrap();
1359        assert!(
1360            result
1361                .as_str()
1362                .unwrap()
1363                .contains("tool call limit exceeded"),
1364            "expected rate limit message, got: {result:?}"
1365        );
1366    }
1367
1368    // --- RS-U03: readResource rejects when limits exhausted ---
1369    #[tokio::test]
1370    async fn rs_u03_read_resource_rejects_when_limits_exhausted() {
1371        let exec = SandboxExecutor::new(SandboxConfig {
1372            max_tool_calls: 1,
1373            ..Default::default()
1374        });
1375        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1376        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1377            Some(Arc::new(TestResourceDispatcher));
1378
1379        let code = r#"async () => {
1380            await forge.readResource("s", "file:///a");
1381            try {
1382                await forge.readResource("s", "file:///b");
1383                return "should not reach here";
1384            } catch(e) {
1385                return e.message;
1386            }
1387        }"#;
1388
1389        let result = exec
1390            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1391            .await
1392            .unwrap();
1393        assert!(
1394            result
1395                .as_str()
1396                .unwrap()
1397                .contains("tool call limit exceeded"),
1398            "expected rate limit error, got: {result:?}"
1399        );
1400    }
1401
1402    // --- RS-U08: truncates response at max_resource_size ---
1403    #[tokio::test]
1404    async fn rs_u08_read_resource_truncates_at_max_resource_size() {
1405        let exec = SandboxExecutor::new(SandboxConfig {
1406            max_resource_size: 100, // very small limit
1407            ..Default::default()
1408        });
1409        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1410        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1411            Some(Arc::new(LargeResourceDispatcher { content_size: 500 }));
1412
1413        // Large resource truncated → JSON.parse fails in bootstrap
1414        let code = r#"async () => {
1415            try {
1416                await forge.readResource("s", "file:///big");
1417                return "no truncation";
1418            } catch(e) {
1419                return "truncated";
1420            }
1421        }"#;
1422
1423        let result = exec
1424            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1425            .await
1426            .unwrap();
1427        assert_eq!(result, "truncated", "large resource should be truncated");
1428    }
1429
1430    // --- RS-U09: errors redacted through redact_error_for_llm ---
1431    #[tokio::test]
1432    async fn rs_u09_read_resource_redacts_errors() {
1433        let exec = executor();
1434        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1435        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1436            Some(Arc::new(FailingResourceDispatcher {
1437                error_msg: "connection refused: http://internal.corp:9876/secret/path".into(),
1438            }));
1439
1440        let code = r#"async () => {
1441            try {
1442                await forge.readResource("my-server", "file:///logs/secret.log");
1443                return "should not reach here";
1444            } catch(e) {
1445                return e.message;
1446            }
1447        }"#;
1448
1449        let result = exec
1450            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1451            .await
1452            .unwrap();
1453        let msg = result.as_str().unwrap();
1454        assert!(
1455            !msg.contains("internal.corp"),
1456            "should not leak internal URL: {msg}"
1457        );
1458        assert!(!msg.contains("9876"), "should not leak port: {msg}");
1459        assert!(
1460            msg.contains("my-server"),
1461            "should mention server name: {msg}"
1462        );
1463    }
1464
1465    // --- RS-U10: binary content (base64 encoding) ---
1466    #[tokio::test]
1467    async fn rs_u10_read_resource_handles_binary_content() {
1468        struct Base64ResourceDispatcher;
1469
1470        #[async_trait::async_trait]
1471        impl ResourceDispatcher for Base64ResourceDispatcher {
1472            async fn read_resource(
1473                &self,
1474                _server: &str,
1475                _uri: &str,
1476            ) -> Result<serde_json::Value, anyhow::Error> {
1477                Ok(serde_json::json!({
1478                    "content": "SGVsbG8gV29ybGQ=",
1479                    "_encoding": "base64"
1480                }))
1481            }
1482        }
1483
1484        let exec = executor();
1485        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1486        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1487            Some(Arc::new(Base64ResourceDispatcher));
1488
1489        let code = r#"async () => {
1490            const result = await forge.readResource("s", "file:///binary");
1491            return result;
1492        }"#;
1493
1494        let result = exec
1495            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1496            .await
1497            .unwrap();
1498        assert_eq!(result["_encoding"], "base64");
1499        assert_eq!(result["content"], "SGVsbG8gV29ybGQ=");
1500    }
1501
1502    // --- RS-U11: error for nonexistent resource ---
1503    #[tokio::test]
1504    async fn rs_u11_read_resource_error_for_nonexistent() {
1505        let exec = executor();
1506        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1507        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1508            Some(Arc::new(FailingResourceDispatcher {
1509                error_msg: "resource not found".into(),
1510            }));
1511
1512        let code = r#"async () => {
1513            try {
1514                await forge.readResource("s", "file:///nonexistent");
1515                return "should not reach here";
1516            } catch(e) {
1517                return e.message;
1518            }
1519        }"#;
1520
1521        let result = exec
1522            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1523            .await
1524            .unwrap();
1525        let msg = result.as_str().unwrap();
1526        assert!(
1527            msg.contains("failed"),
1528            "should indicate failure: {result:?}"
1529        );
1530    }
1531
1532    // --- RS-U12: handles large (>1MB) content ---
1533    #[tokio::test]
1534    async fn rs_u12_read_resource_handles_large_content() {
1535        let exec = SandboxExecutor::new(SandboxConfig {
1536            max_resource_size: 2 * 1024 * 1024, // 2 MB
1537            timeout: Duration::from_secs(10),
1538            ..Default::default()
1539        });
1540        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1541        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1542            Some(Arc::new(LargeResourceDispatcher {
1543                content_size: 1_100_000,
1544            }));
1545
1546        let code = r#"async () => {
1547            const result = await forge.readResource("s", "file:///large");
1548            return result.data.length;
1549        }"#;
1550
1551        let result = exec
1552            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1553            .await
1554            .unwrap();
1555        assert_eq!(result, 1_100_000);
1556    }
1557
1558    // --- RS-S05: URI for non-file-server — error redacted, no path leakage ---
1559    #[tokio::test]
1560    async fn rs_s05_error_on_invalid_resource_uri_for_server() {
1561        let exec = executor();
1562        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1563        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1564            Some(Arc::new(FailingResourceDispatcher {
1565                error_msg: "unknown resource URI: file:///etc/shadow".into(),
1566            }));
1567
1568        let code = r#"async () => {
1569            try {
1570                await forge.readResource("postgres-server", "file:///etc/shadow");
1571                return "should not reach here";
1572            } catch(e) {
1573                return e.message;
1574            }
1575        }"#;
1576
1577        let result = exec
1578            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1579            .await
1580            .unwrap();
1581        let msg = result.as_str().unwrap();
1582        // SR-R5: Error should use "readResource" not the raw URI
1583        assert!(
1584            !msg.contains("/etc/shadow"),
1585            "should not leak file path: {msg}"
1586        );
1587        // Should still mention server for context
1588        assert!(
1589            msg.contains("postgres-server"),
1590            "should mention server: {msg}"
1591        );
1592        assert!(
1593            msg.contains("readResource"),
1594            "should use safe identifier: {msg}"
1595        );
1596    }
1597
1598    // --- RS-S06: error message does not leak full URI path ---
1599    #[tokio::test]
1600    async fn rs_s06_error_message_does_not_leak_full_uri() {
1601        let exec = executor();
1602        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1603        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1604            Some(Arc::new(FailingResourceDispatcher {
1605                error_msg: "file not found: /var/secrets/database/credentials.json".into(),
1606            }));
1607
1608        let code = r#"async () => {
1609            try {
1610                await forge.readResource("server", "file:///var/secrets/database/credentials.json");
1611                return "should not reach here";
1612            } catch(e) {
1613                return e.message;
1614            }
1615        }"#;
1616
1617        let result = exec
1618            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1619            .await
1620            .unwrap();
1621        let msg = result.as_str().unwrap();
1622        // Paths are redacted by redact_error_message
1623        assert!(!msg.contains("/var/secrets"), "should not leak path: {msg}");
1624        assert!(
1625            !msg.contains("credentials.json"),
1626            "should not leak filename: {msg}"
1627        );
1628        // URI itself should not appear in error (SR-R5)
1629        assert!(
1630            !msg.contains("file:///var/secrets"),
1631            "should not leak URI: {msg}"
1632        );
1633    }
1634
1635    // --- RS-S07: large content truncated, not OOM ---
1636    #[tokio::test]
1637    async fn rs_s07_large_content_truncated_not_oom() {
1638        let exec = SandboxExecutor::new(SandboxConfig {
1639            max_resource_size: 1024, // 1 KB limit
1640            timeout: Duration::from_secs(10),
1641            ..Default::default()
1642        });
1643        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1644        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1645            Some(Arc::new(LargeResourceDispatcher {
1646                content_size: 1_000_000, // 1 MB, far exceeds 1 KB limit
1647            }));
1648
1649        let code = r#"async () => {
1650            try {
1651                const result = await forge.readResource("s", "file:///huge");
1652                return "got result without truncation";
1653            } catch(e) {
1654                return "safely truncated";
1655            }
1656        }"#;
1657
1658        // Must complete without OOM
1659        let result = exec
1660            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1661            .await;
1662        assert!(result.is_ok(), "should complete without OOM: {result:?}");
1663        assert_eq!(result.unwrap(), "safely truncated");
1664    }
1665
1666    // --- RS-S08: many resource reads hit rate limit ---
1667    #[tokio::test]
1668    async fn rs_s08_many_reads_hit_rate_limit() {
1669        let exec = SandboxExecutor::new(SandboxConfig {
1670            max_tool_calls: 5,
1671            ..Default::default()
1672        });
1673        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1674        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1675            Some(Arc::new(TestResourceDispatcher));
1676
1677        let code = r#"async () => {
1678            let count = 0;
1679            for (let i = 0; i < 1000; i++) {
1680                try {
1681                    await forge.readResource("s", "file:///r" + i);
1682                    count++;
1683                } catch(e) {
1684                    return { count, error: e.message };
1685                }
1686            }
1687            return { count, error: null };
1688        }"#;
1689
1690        let result = exec
1691            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1692            .await
1693            .unwrap();
1694        assert_eq!(
1695            result["count"], 5,
1696            "should allow exactly max_tool_calls reads"
1697        );
1698        assert!(result["error"]
1699            .as_str()
1700            .unwrap()
1701            .contains("tool call limit exceeded"));
1702    }
1703
1704    // --- RS-S09: search mode blocks resource read ---
1705    #[tokio::test]
1706    async fn rs_s09_search_mode_blocks_resource_read() {
1707        let exec = executor();
1708        let manifest = serde_json::json!({"servers": []});
1709
1710        // In search mode, forge.readResource should not exist
1711        let code = r#"async () => {
1712            return typeof forge.readResource;
1713        }"#;
1714
1715        let result = exec.execute_search(code, &manifest).await.unwrap();
1716        assert_eq!(
1717            result, "undefined",
1718            "readResource should not exist in search mode"
1719        );
1720    }
1721
1722    // --- SR-R6: unknown server rejected at op level ---
1723    #[tokio::test]
1724    async fn sr_r6_unknown_server_rejected_at_op_level() {
1725        let exec = executor();
1726        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1727        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1728            Some(Arc::new(TestResourceDispatcher));
1729
1730        // Use execute_code_with_options to set known servers
1731        let mut known = std::collections::HashSet::new();
1732        known.insert("allowed-server".to_string());
1733
1734        let code = r#"async () => {
1735            try {
1736                await forge.readResource("nonexistent_server", "file:///x");
1737                return "should not reach here";
1738            } catch(e) {
1739                return e.message;
1740            }
1741        }"#;
1742
1743        let result = exec
1744            .execute_code_with_options(
1745                code,
1746                tool_dispatcher,
1747                resource_dispatcher,
1748                None,
1749                Some(known),
1750            )
1751            .await
1752            .unwrap();
1753        let msg = result.as_str().unwrap();
1754        assert!(
1755            msg.contains("unknown server"),
1756            "expected 'unknown server' error, got: {msg}"
1757        );
1758        assert!(
1759            msg.contains("nonexistent_server"),
1760            "should mention the server name: {msg}"
1761        );
1762    }
1763
1764    // --- RS-S10: audit log records resource reads with URI hash ---
1765    #[tokio::test]
1766    async fn rs_s10_audit_records_resource_reads_with_uri_hash() {
1767        struct CapturingAuditLogger {
1768            entries: std::sync::Mutex<Vec<crate::audit::AuditEntry>>,
1769        }
1770
1771        #[async_trait::async_trait]
1772        impl crate::audit::AuditLogger for CapturingAuditLogger {
1773            async fn log(&self, entry: &crate::audit::AuditEntry) {
1774                self.entries.lock().unwrap().push(entry.clone());
1775            }
1776        }
1777
1778        let logger = Arc::new(CapturingAuditLogger {
1779            entries: std::sync::Mutex::new(Vec::new()),
1780        });
1781        let exec = SandboxExecutor::with_audit_logger(SandboxConfig::default(), logger.clone());
1782        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1783        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1784            Some(Arc::new(TestResourceDispatcher));
1785
1786        let code = r#"async () => {
1787            await forge.readResource("my-server", "file:///logs/app.log");
1788            return "done";
1789        }"#;
1790
1791        let _ = exec
1792            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1793            .await
1794            .unwrap();
1795
1796        let entries = logger.entries.lock().unwrap();
1797        assert_eq!(entries.len(), 1);
1798        let entry = &entries[0];
1799        assert_eq!(entry.resource_reads.len(), 1);
1800
1801        let read = &entry.resource_reads[0];
1802        assert_eq!(read.server, "my-server");
1803        assert!(read.success);
1804        // URI should be hashed, not raw
1805        assert_ne!(
1806            read.uri_hash, "file:///logs/app.log",
1807            "URI should be hashed, not stored raw"
1808        );
1809        // Verify it's a valid SHA-256 hex string
1810        assert_eq!(read.uri_hash.len(), 64, "should be SHA-256 hex");
1811        assert!(read.uri_hash.chars().all(|c| c.is_ascii_hexdigit()));
1812    }
1813
1814    #[tokio::test]
1815    async fn large_output_is_rejected() {
1816        let exec = SandboxExecutor::new(SandboxConfig {
1817            max_output_size: 100,
1818            ..Default::default()
1819        });
1820        let manifest = serde_json::json!({});
1821
1822        let code = r#"async () => {
1823            return "x".repeat(1000);
1824        }"#;
1825
1826        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1827        assert!(
1828            matches!(err, SandboxError::OutputTooLarge { .. }),
1829            "expected output too large, got: {err:?}"
1830        );
1831    }
1832
1833    // --- Stash test infrastructure ---
1834
1835    /// Direct stash dispatcher wrapping an Arc<tokio::sync::Mutex<SessionStash>>.
1836    /// Used by integration/security tests without going through IPC.
1837    struct DirectStashDispatcher {
1838        stash: Arc<tokio::sync::Mutex<crate::stash::SessionStash>>,
1839        current_group: Option<String>,
1840    }
1841
1842    #[async_trait::async_trait]
1843    impl crate::StashDispatcher for DirectStashDispatcher {
1844        async fn put(
1845            &self,
1846            key: &str,
1847            value: serde_json::Value,
1848            ttl_secs: Option<u32>,
1849            _current_group: Option<String>,
1850        ) -> Result<serde_json::Value, anyhow::Error> {
1851            let ttl = ttl_secs
1852                .filter(|&s| s > 0)
1853                .map(|s| std::time::Duration::from_secs(s as u64));
1854            let mut stash = self.stash.lock().await;
1855            stash.put(key, value, ttl, self.current_group.as_deref())?;
1856            Ok(serde_json::json!({"ok": true}))
1857        }
1858
1859        async fn get(
1860            &self,
1861            key: &str,
1862            _current_group: Option<String>,
1863        ) -> Result<serde_json::Value, anyhow::Error> {
1864            let stash = self.stash.lock().await;
1865            match stash.get(key, self.current_group.as_deref())? {
1866                Some(v) => Ok(v.clone()),
1867                None => Ok(serde_json::Value::Null),
1868            }
1869        }
1870
1871        async fn delete(
1872            &self,
1873            key: &str,
1874            _current_group: Option<String>,
1875        ) -> Result<serde_json::Value, anyhow::Error> {
1876            let mut stash = self.stash.lock().await;
1877            let deleted = stash.delete(key, self.current_group.as_deref())?;
1878            Ok(serde_json::json!({"deleted": deleted}))
1879        }
1880
1881        async fn keys(
1882            &self,
1883            _current_group: Option<String>,
1884        ) -> Result<serde_json::Value, anyhow::Error> {
1885            let stash = self.stash.lock().await;
1886            let keys: Vec<&str> = stash.keys(self.current_group.as_deref());
1887            Ok(serde_json::json!(keys))
1888        }
1889    }
1890
1891    fn make_stash(
1892        config: crate::stash::StashConfig,
1893    ) -> Arc<tokio::sync::Mutex<crate::stash::SessionStash>> {
1894        Arc::new(tokio::sync::Mutex::new(crate::stash::SessionStash::new(
1895            config,
1896        )))
1897    }
1898
1899    fn make_stash_dispatcher(
1900        stash: Arc<tokio::sync::Mutex<crate::stash::SessionStash>>,
1901        group: Option<&str>,
1902    ) -> Arc<dyn crate::StashDispatcher> {
1903        Arc::new(DirectStashDispatcher {
1904            stash,
1905            current_group: group.map(str::to_string),
1906        })
1907    }
1908
1909    // --- ST-I01: Two execute_code calls sharing stash (put in first, get in second) ---
1910    #[tokio::test]
1911    async fn st_i01_stash_shared_across_executions() {
1912        let exec = executor();
1913        let stash = make_stash(crate::stash::StashConfig::default());
1914        let sd = make_stash_dispatcher(stash.clone(), None);
1915        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1916
1917        // First execution: put a value
1918        let code1 = r#"async () => {
1919            await forge.stash.put("shared-key", { value: 42 });
1920            return "stored";
1921        }"#;
1922        let result1 = exec
1923            .execute_code(code1, dispatcher.clone(), None, Some(sd.clone()))
1924            .await
1925            .unwrap();
1926        assert_eq!(result1, "stored");
1927
1928        // Second execution: get the value
1929        let sd2 = make_stash_dispatcher(stash, None);
1930        let code2 = r#"async () => {
1931            const v = await forge.stash.get("shared-key");
1932            return v;
1933        }"#;
1934        let result2 = exec
1935            .execute_code(code2, dispatcher, None, Some(sd2))
1936            .await
1937            .unwrap();
1938        assert_eq!(result2["value"], 42);
1939    }
1940
1941    // --- ST-I02: Stash put + get within single execution ---
1942    #[tokio::test]
1943    async fn st_i02_stash_put_get_single_execution() {
1944        let exec = executor();
1945        let stash = make_stash(crate::stash::StashConfig::default());
1946        let sd = make_stash_dispatcher(stash, None);
1947        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1948
1949        let code = r#"async () => {
1950            await forge.stash.put("key", "hello");
1951            const v = await forge.stash.get("key");
1952            return v;
1953        }"#;
1954        let result = exec
1955            .execute_code(code, dispatcher, None, Some(sd))
1956            .await
1957            .unwrap();
1958        assert_eq!(result, "hello");
1959    }
1960
1961    // --- ST-I03: Stash group isolation (put with group A, get with group B fails) ---
1962    #[tokio::test]
1963    async fn st_i03_stash_group_isolation() {
1964        let exec = executor();
1965        let stash = make_stash(crate::stash::StashConfig::default());
1966        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1967
1968        // Put with group A
1969        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
1970        let code1 = r#"async () => {
1971            await forge.stash.put("secret", "group-a-data");
1972            return "stored";
1973        }"#;
1974        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
1975            .await
1976            .unwrap();
1977
1978        // Get with group B should fail
1979        let sd_b = make_stash_dispatcher(stash, Some("group-b"));
1980        let code2 = r#"async () => {
1981            try {
1982                await forge.stash.get("secret");
1983                return "should not reach here";
1984            } catch(e) {
1985                return e.message;
1986            }
1987        }"#;
1988        let result = exec
1989            .execute_code(code2, dispatcher, None, Some(sd_b))
1990            .await
1991            .unwrap();
1992        assert!(
1993            result.as_str().unwrap().contains("cross-group"),
1994            "expected cross-group error, got: {result:?}"
1995        );
1996    }
1997
1998    // --- ST-I05: Stash combined with callTool + readResource ---
1999    #[tokio::test]
2000    async fn st_i05_stash_combined_with_tool_and_resource() {
2001        let exec = executor();
2002        let stash = make_stash(crate::stash::StashConfig::default());
2003        let sd = make_stash_dispatcher(stash, None);
2004        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2005        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
2006            Some(Arc::new(TestResourceDispatcher));
2007
2008        let code = r#"async () => {
2009            // Call a tool
2010            const toolResult = await forge.callTool("s", "t", {});
2011
2012            // Read a resource
2013            const resource = await forge.readResource("s", "file:///data");
2014
2015            // Store combined result in stash
2016            await forge.stash.put("combined", {
2017                tool: toolResult.server,
2018                resource: resource.content
2019            });
2020
2021            // Read it back
2022            const v = await forge.stash.get("combined");
2023            return v;
2024        }"#;
2025        let result = exec
2026            .execute_code(code, tool_dispatcher, resource_dispatcher, Some(sd))
2027            .await
2028            .unwrap();
2029        assert_eq!(result["tool"], "s");
2030        assert_eq!(result["resource"], "test resource content");
2031    }
2032
2033    // --- ST-I06: Stash key limit produces clear error ---
2034    #[tokio::test]
2035    async fn st_i06_stash_key_limit_error() {
2036        let exec = executor();
2037        let stash = make_stash(crate::stash::StashConfig {
2038            max_keys: 2,
2039            ..Default::default()
2040        });
2041        let sd = make_stash_dispatcher(stash, None);
2042        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2043
2044        let code = r#"async () => {
2045            await forge.stash.put("k1", 1);
2046            await forge.stash.put("k2", 2);
2047            try {
2048                await forge.stash.put("k3", 3);
2049                return "should not reach here";
2050            } catch(e) {
2051                return e.message;
2052            }
2053        }"#;
2054        let result = exec
2055            .execute_code(code, dispatcher, None, Some(sd))
2056            .await
2057            .unwrap();
2058        assert!(
2059            result.as_str().unwrap().contains("key limit"),
2060            "expected key limit error, got: {result:?}"
2061        );
2062    }
2063
2064    // --- ST-I07: Stash value size limit produces clear error ---
2065    #[tokio::test]
2066    async fn st_i07_stash_value_size_limit_error() {
2067        let exec = executor();
2068        let stash = make_stash(crate::stash::StashConfig {
2069            max_value_size: 50,
2070            ..Default::default()
2071        });
2072        let sd = make_stash_dispatcher(stash, None);
2073        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2074
2075        let code = r#"async () => {
2076            try {
2077                await forge.stash.put("k", "x".repeat(100));
2078                return "should not reach here";
2079            } catch(e) {
2080                return e.message;
2081            }
2082        }"#;
2083        let result = exec
2084            .execute_code(code, dispatcher, None, Some(sd))
2085            .await
2086            .unwrap();
2087        assert!(
2088            result.as_str().unwrap().contains("too large"),
2089            "expected value too large error, got: {result:?}"
2090        );
2091    }
2092
2093    // --- ST-I08: Stash keys() returns correct subset for group context ---
2094    #[tokio::test]
2095    async fn st_i08_stash_keys_group_subset() {
2096        let exec = executor();
2097        let stash = make_stash(crate::stash::StashConfig::default());
2098        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2099
2100        // Put a public key and a group-A key
2101        let sd_none = make_stash_dispatcher(stash.clone(), None);
2102        let code1 = r#"async () => {
2103            await forge.stash.put("public-key", "pub");
2104            return "ok";
2105        }"#;
2106        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_none))
2107            .await
2108            .unwrap();
2109
2110        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
2111        let code2 = r#"async () => {
2112            await forge.stash.put("group-a-key", "secret");
2113            return "ok";
2114        }"#;
2115        exec.execute_code(code2, dispatcher.clone(), None, Some(sd_a))
2116            .await
2117            .unwrap();
2118
2119        // List keys from group-a perspective: should see both
2120        let sd_a2 = make_stash_dispatcher(stash.clone(), Some("group-a"));
2121        let code3 = r#"async () => {
2122            const k = await forge.stash.keys();
2123            k.sort();
2124            return k;
2125        }"#;
2126        let result = exec
2127            .execute_code(code3, dispatcher.clone(), None, Some(sd_a2))
2128            .await
2129            .unwrap();
2130        let keys = result.as_array().unwrap();
2131        assert_eq!(keys.len(), 2);
2132
2133        // List keys from ungrouped: should only see public
2134        let sd_none2 = make_stash_dispatcher(stash, None);
2135        let code4 = r#"async () => {
2136            const k = await forge.stash.keys();
2137            return k;
2138        }"#;
2139        let result2 = exec
2140            .execute_code(code4, dispatcher, None, Some(sd_none2))
2141            .await
2142            .unwrap();
2143        let keys2 = result2.as_array().unwrap();
2144        assert_eq!(keys2.len(), 1);
2145        assert_eq!(keys2[0], "public-key");
2146    }
2147
2148    // --- Security Tests ---
2149
2150    // --- ST-S01: Stash key with path traversal characters rejected ---
2151    #[tokio::test]
2152    async fn st_s01_stash_key_path_traversal_rejected() {
2153        let exec = executor();
2154        let stash = make_stash(crate::stash::StashConfig::default());
2155        let sd = make_stash_dispatcher(stash, None);
2156        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2157
2158        let code = r#"async () => {
2159            try {
2160                await forge.stash.put("../../etc/passwd", "evil");
2161                return "should not reach here";
2162            } catch(e) {
2163                return e.message;
2164            }
2165        }"#;
2166        let result = exec
2167            .execute_code(code, dispatcher, None, Some(sd))
2168            .await
2169            .unwrap();
2170        assert!(
2171            result.as_str().unwrap().contains("invalid"),
2172            "expected invalid key error, got: {result:?}"
2173        );
2174    }
2175
2176    // --- ST-S02: Stash key with script injection (<script>) rejected ---
2177    #[tokio::test]
2178    async fn st_s02_stash_key_script_injection_rejected() {
2179        let exec = executor();
2180        let stash = make_stash(crate::stash::StashConfig::default());
2181        let sd = make_stash_dispatcher(stash, None);
2182        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2183
2184        let code = r#"async () => {
2185            try {
2186                await forge.stash.put("<script>alert(1)</script>", "evil");
2187                return "should not reach here";
2188            } catch(e) {
2189                return e.message;
2190            }
2191        }"#;
2192        let result = exec
2193            .execute_code(code, dispatcher, None, Some(sd))
2194            .await
2195            .unwrap();
2196        assert!(
2197            result.as_str().unwrap().contains("invalid"),
2198            "expected invalid key error, got: {result:?}"
2199        );
2200    }
2201
2202    // --- ST-S03: Stash value containing JS code stored as inert data ---
2203    #[tokio::test]
2204    async fn st_s03_stash_value_js_code_is_inert() {
2205        let exec = executor();
2206        let stash = make_stash(crate::stash::StashConfig::default());
2207        let sd = make_stash_dispatcher(stash, None);
2208        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2209
2210        // Store a string that looks like executable JS code.
2211        // We build it from parts to avoid triggering the banned-pattern validator.
2212        let code = r#"async () => {
2213            const part1 = "function() { return ";
2214            const part2 = "globalThis.secret; }";
2215            const malicious = part1 + part2;
2216            await forge.stash.put("code-value", malicious);
2217            const v = await forge.stash.get("code-value");
2218            // The value should be a plain string, not executed
2219            return typeof v === "string" && v.includes("globalThis");
2220        }"#;
2221        let result = exec
2222            .execute_code(code, dispatcher, None, Some(sd))
2223            .await
2224            .unwrap();
2225        assert_eq!(result, true, "JS code in stash values should be inert data");
2226    }
2227
2228    // --- ST-S04: Stash put from group A, get from group B → error ---
2229    #[tokio::test]
2230    async fn st_s04_stash_cross_group_get_error() {
2231        let exec = executor();
2232        let stash = make_stash(crate::stash::StashConfig::default());
2233        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2234
2235        // Put with group A
2236        let sd_a = make_stash_dispatcher(stash.clone(), Some("team-alpha"));
2237        let code1 = r#"async () => {
2238            await forge.stash.put("alpha-secret", "classified");
2239            return "stored";
2240        }"#;
2241        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2242            .await
2243            .unwrap();
2244
2245        // Get with group B should error
2246        let sd_b = make_stash_dispatcher(stash, Some("team-beta"));
2247        let code2 = r#"async () => {
2248            try {
2249                await forge.stash.get("alpha-secret");
2250                return "leaked";
2251            } catch(e) {
2252                return e.message;
2253            }
2254        }"#;
2255        let result = exec
2256            .execute_code(code2, dispatcher, None, Some(sd_b))
2257            .await
2258            .unwrap();
2259        assert!(
2260            result.as_str().unwrap().contains("cross-group"),
2261            "expected cross-group error, got: {result:?}"
2262        );
2263    }
2264
2265    // --- ST-S05: Stash put from group A, get from ungrouped → error ---
2266    #[tokio::test]
2267    async fn st_s05_stash_grouped_entry_inaccessible_to_ungrouped() {
2268        let exec = executor();
2269        let stash = make_stash(crate::stash::StashConfig::default());
2270        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2271
2272        // Put with group A
2273        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-x"));
2274        let code1 = r#"async () => {
2275            await forge.stash.put("gx-data", 999);
2276            return "stored";
2277        }"#;
2278        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2279            .await
2280            .unwrap();
2281
2282        // Get from ungrouped should error
2283        let sd_none = make_stash_dispatcher(stash, None);
2284        let code2 = r#"async () => {
2285            try {
2286                await forge.stash.get("gx-data");
2287                return "leaked";
2288            } catch(e) {
2289                return e.message;
2290            }
2291        }"#;
2292        let result = exec
2293            .execute_code(code2, dispatcher, None, Some(sd_none))
2294            .await
2295            .unwrap();
2296        assert!(
2297            result.as_str().unwrap().contains("cross-group"),
2298            "expected cross-group error, got: {result:?}"
2299        );
2300    }
2301
2302    // --- ST-S06: Stash total size limit prevents OOM (many puts) ---
2303    #[tokio::test]
2304    async fn st_s06_stash_total_size_limit_prevents_oom() {
2305        let exec = executor();
2306        let stash = make_stash(crate::stash::StashConfig {
2307            max_total_size: 200,
2308            max_value_size: 1024,
2309            max_keys: 1000,
2310            ..Default::default()
2311        });
2312        let sd = make_stash_dispatcher(stash, None);
2313        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2314
2315        let code = r#"async () => {
2316            let count = 0;
2317            for (let i = 0; i < 100; i++) {
2318                try {
2319                    await forge.stash.put("k" + i, "x".repeat(50));
2320                    count++;
2321                } catch(e) {
2322                    return { count, error: e.message };
2323                }
2324            }
2325            return { count, error: null };
2326        }"#;
2327        let result = exec
2328            .execute_code(code, dispatcher, None, Some(sd))
2329            .await
2330            .unwrap();
2331        // Should have been stopped before 100 puts due to total_size=200
2332        let count = result["count"].as_i64().unwrap();
2333        assert!(
2334            count < 100,
2335            "total size limit should prevent all 100 puts, but {count} succeeded"
2336        );
2337        assert!(
2338            result["error"].as_str().unwrap().contains("total size"),
2339            "expected total size error, got: {:?}",
2340            result["error"]
2341        );
2342    }
2343
2344    // --- ST-S07: Stash ops in search() mode blocked ---
2345    #[tokio::test]
2346    async fn st_s07_stash_ops_blocked_in_search_mode() {
2347        let exec = executor();
2348        let manifest = serde_json::json!({"servers": []});
2349
2350        // In search mode, forge.stash should not exist
2351        let code = r#"async () => {
2352            return typeof forge.stash;
2353        }"#;
2354
2355        let result = exec.execute_search(code, &manifest).await.unwrap();
2356        assert_eq!(result, "undefined", "stash should not exist in search mode");
2357    }
2358
2359    // --- ST-S09: Error messages from stash ops don't leak other keys/values ---
2360    #[tokio::test]
2361    async fn st_s09_stash_error_messages_dont_leak_data() {
2362        let exec = executor();
2363        let stash = make_stash(crate::stash::StashConfig::default());
2364        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2365
2366        // Put a secret value with group-a
2367        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
2368        let code1 = r#"async () => {
2369            await forge.stash.put("secret-key", "top-secret-value-12345");
2370            return "stored";
2371        }"#;
2372        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2373            .await
2374            .unwrap();
2375
2376        // Try to access from group-b — error should not contain the value
2377        let sd_b = make_stash_dispatcher(stash, Some("group-b"));
2378        let code2 = r#"async () => {
2379            try {
2380                await forge.stash.get("secret-key");
2381                return "should not reach here";
2382            } catch(e) {
2383                return e.message;
2384            }
2385        }"#;
2386        let result = exec
2387            .execute_code(code2, dispatcher, None, Some(sd_b))
2388            .await
2389            .unwrap();
2390        let msg = result.as_str().unwrap();
2391        assert!(
2392            !msg.contains("top-secret-value-12345"),
2393            "error should not leak value: {msg}"
2394        );
2395        assert!(
2396            !msg.contains("secret-key"),
2397            "error should not leak key names: {msg}"
2398        );
2399    }
2400
2401    // --- ST-S10: TTL expiry enforced ---
2402    #[tokio::test]
2403    async fn st_s10_stash_ttl_expiry_enforced() {
2404        let exec = executor();
2405        let stash = make_stash(crate::stash::StashConfig::default());
2406        let sd = make_stash_dispatcher(stash.clone(), None);
2407        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2408
2409        // Put with 1-second TTL
2410        let code1 = r#"async () => {
2411            await forge.stash.put("ttl-key", "ephemeral", {ttl: 1});
2412            const v = await forge.stash.get("ttl-key");
2413            return v;
2414        }"#;
2415        let result1 = exec
2416            .execute_code(code1, dispatcher.clone(), None, Some(sd))
2417            .await
2418            .unwrap();
2419        assert_eq!(result1, "ephemeral", "should be readable immediately");
2420
2421        // Wait for TTL to expire
2422        tokio::time::sleep(std::time::Duration::from_millis(1100)).await;
2423
2424        // Get after expiry should return null
2425        let sd2 = make_stash_dispatcher(stash, None);
2426        let code2 = r#"async () => {
2427            const v = await forge.stash.get("ttl-key");
2428            return v;
2429        }"#;
2430        let result2 = exec
2431            .execute_code(code2, dispatcher, None, Some(sd2))
2432            .await
2433            .unwrap();
2434        assert_eq!(
2435            result2,
2436            serde_json::Value::Null,
2437            "expired key should return null"
2438        );
2439    }
2440
2441    // =========================================================================
2442    // Phase 7: forge.parallel() tests (PL-U01..PL-U09, PL-S01..PL-S05)
2443    // =========================================================================
2444
2445    // --- PL-U01: parallel with 3 successful calls returns all results ---
2446    #[tokio::test]
2447    async fn pl_u01_parallel_three_successful_calls() {
2448        let exec = executor();
2449        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2450
2451        let code = r#"async () => {
2452            const result = await forge.parallel([
2453                () => forge.callTool("s1", "t1", { id: 1 }),
2454                () => forge.callTool("s2", "t2", { id: 2 }),
2455                () => forge.callTool("s3", "t3", { id: 3 }),
2456            ]);
2457            return result;
2458        }"#;
2459
2460        let result = exec
2461            .execute_code(code, dispatcher, None, None)
2462            .await
2463            .unwrap();
2464        let results = result["results"].as_array().unwrap();
2465        assert_eq!(results.len(), 3);
2466        assert_eq!(results[0]["server"], "s1");
2467        assert_eq!(results[1]["server"], "s2");
2468        assert_eq!(results[2]["server"], "s3");
2469        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2470        assert_eq!(result["aborted"], false);
2471    }
2472
2473    // --- PL-U02: parallel with 1 failure returns partial results + error ---
2474    #[tokio::test]
2475    async fn pl_u02_parallel_partial_failure() {
2476        struct PartialFailDispatcher;
2477
2478        #[async_trait::async_trait]
2479        impl ToolDispatcher for PartialFailDispatcher {
2480            async fn call_tool(
2481                &self,
2482                _server: &str,
2483                tool: &str,
2484                _args: serde_json::Value,
2485            ) -> Result<serde_json::Value, anyhow::Error> {
2486                if tool == "fail" {
2487                    Err(anyhow::anyhow!("deliberate failure"))
2488                } else {
2489                    Ok(serde_json::json!({"tool": tool, "ok": true}))
2490                }
2491            }
2492        }
2493
2494        let exec = executor();
2495        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(PartialFailDispatcher);
2496
2497        let code = r#"async () => {
2498            return await forge.parallel([
2499                () => forge.callTool("s", "ok1", {}),
2500                () => forge.callTool("s", "fail", {}),
2501                () => forge.callTool("s", "ok2", {}),
2502            ]);
2503        }"#;
2504
2505        let result = exec
2506            .execute_code(code, dispatcher, None, None)
2507            .await
2508            .unwrap();
2509        let results = result["results"].as_array().unwrap();
2510        assert!(results[0]["ok"] == true);
2511        assert!(results[1].is_null(), "failed call should have null result");
2512        assert!(results[2]["ok"] == true);
2513        let errors = result["errors"].as_array().unwrap();
2514        assert_eq!(errors.len(), 1);
2515        assert_eq!(errors[0]["index"], 1);
2516    }
2517
2518    // --- PL-U03: parallel with failFast aborts on first error ---
2519    #[tokio::test]
2520    async fn pl_u03_parallel_fail_fast() {
2521        let exec = SandboxExecutor::new(SandboxConfig {
2522            max_tool_calls: 50,
2523            max_parallel: 2, // batch size 2
2524            ..Default::default()
2525        });
2526
2527        struct FailOnSecondDispatcher {
2528            calls: std::sync::Mutex<u32>,
2529        }
2530
2531        #[async_trait::async_trait]
2532        impl ToolDispatcher for FailOnSecondDispatcher {
2533            async fn call_tool(
2534                &self,
2535                _server: &str,
2536                tool: &str,
2537                _args: serde_json::Value,
2538            ) -> Result<serde_json::Value, anyhow::Error> {
2539                let mut c = self.calls.lock().unwrap();
2540                *c += 1;
2541                if tool == "fail" {
2542                    Err(anyhow::anyhow!("fail"))
2543                } else {
2544                    Ok(serde_json::json!({"ok": true}))
2545                }
2546            }
2547        }
2548
2549        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(FailOnSecondDispatcher {
2550            calls: std::sync::Mutex::new(0),
2551        });
2552
2553        // 4 calls with batch=2. Second call in first batch fails, so second batch should be skipped
2554        let code = r#"async () => {
2555            return await forge.parallel([
2556                () => forge.callTool("s", "ok", {}),
2557                () => forge.callTool("s", "fail", {}),
2558                () => forge.callTool("s", "ok", {}),
2559                () => forge.callTool("s", "ok", {}),
2560            ], { failFast: true });
2561        }"#;
2562
2563        let result = exec
2564            .execute_code(code, dispatcher, None, None)
2565            .await
2566            .unwrap();
2567        assert_eq!(result["aborted"], true);
2568        assert!(!result["errors"].as_array().unwrap().is_empty());
2569    }
2570
2571    // --- PL-U04: parallel respects concurrency limit ---
2572    #[tokio::test]
2573    async fn pl_u04_parallel_respects_concurrency_limit() {
2574        let exec = SandboxExecutor::new(SandboxConfig {
2575            max_parallel: 2,
2576            timeout: Duration::from_secs(10),
2577            ..Default::default()
2578        });
2579
2580        struct ConcurrencyTracker {
2581            current: std::sync::atomic::AtomicUsize,
2582            peak: std::sync::atomic::AtomicUsize,
2583        }
2584
2585        #[async_trait::async_trait]
2586        impl ToolDispatcher for ConcurrencyTracker {
2587            async fn call_tool(
2588                &self,
2589                _server: &str,
2590                _tool: &str,
2591                _args: serde_json::Value,
2592            ) -> Result<serde_json::Value, anyhow::Error> {
2593                let c = self
2594                    .current
2595                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
2596                    + 1;
2597                // Update peak
2598                self.peak.fetch_max(c, std::sync::atomic::Ordering::SeqCst);
2599                // Small delay to let concurrent calls overlap
2600                tokio::time::sleep(Duration::from_millis(10)).await;
2601                self.current
2602                    .fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
2603                Ok(serde_json::json!({"peak": self.peak.load(std::sync::atomic::Ordering::SeqCst)}))
2604            }
2605        }
2606
2607        let tracker = Arc::new(ConcurrencyTracker {
2608            current: std::sync::atomic::AtomicUsize::new(0),
2609            peak: std::sync::atomic::AtomicUsize::new(0),
2610        });
2611        let dispatcher: Arc<dyn ToolDispatcher> = tracker.clone();
2612
2613        // 6 calls with max_parallel=2
2614        let code = r#"async () => {
2615            return await forge.parallel([
2616                () => forge.callTool("s", "t", {}),
2617                () => forge.callTool("s", "t", {}),
2618                () => forge.callTool("s", "t", {}),
2619                () => forge.callTool("s", "t", {}),
2620                () => forge.callTool("s", "t", {}),
2621                () => forge.callTool("s", "t", {}),
2622            ]);
2623        }"#;
2624
2625        let result = exec
2626            .execute_code(code, dispatcher, None, None)
2627            .await
2628            .unwrap();
2629        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2630        let peak = tracker.peak.load(std::sync::atomic::Ordering::SeqCst);
2631        assert!(peak <= 2, "peak concurrency should be <= 2, was: {peak}");
2632    }
2633
2634    // --- PL-U05: parallel with empty array ---
2635    #[tokio::test]
2636    async fn pl_u05_parallel_empty_array() {
2637        let exec = executor();
2638        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2639
2640        let code = r#"async () => {
2641            return await forge.parallel([]);
2642        }"#;
2643
2644        let result = exec
2645            .execute_code(code, dispatcher, None, None)
2646            .await
2647            .unwrap();
2648        assert_eq!(result["results"].as_array().unwrap().len(), 0);
2649        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2650        assert_eq!(result["aborted"], false);
2651    }
2652
2653    // --- PL-U06: parallel with single call ---
2654    #[tokio::test]
2655    async fn pl_u06_parallel_single_call() {
2656        let exec = executor();
2657        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2658
2659        let code = r#"async () => {
2660            return await forge.parallel([
2661                () => forge.callTool("s", "t", { id: 1 }),
2662            ]);
2663        }"#;
2664
2665        let result = exec
2666            .execute_code(code, dispatcher, None, None)
2667            .await
2668            .unwrap();
2669        let results = result["results"].as_array().unwrap();
2670        assert_eq!(results.len(), 1);
2671        assert_eq!(results[0]["server"], "s");
2672    }
2673
2674    // --- PL-U07: parallel errors contain redacted messages ---
2675    #[tokio::test]
2676    async fn pl_u07_parallel_errors_redacted() {
2677        struct LeakyDispatcher;
2678
2679        #[async_trait::async_trait]
2680        impl ToolDispatcher for LeakyDispatcher {
2681            async fn call_tool(
2682                &self,
2683                _server: &str,
2684                _tool: &str,
2685                _args: serde_json::Value,
2686            ) -> Result<serde_json::Value, anyhow::Error> {
2687                Err(anyhow::anyhow!(
2688                    "connection to http://internal.secret:9999/api failed"
2689                ))
2690            }
2691        }
2692
2693        let exec = executor();
2694        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(LeakyDispatcher);
2695
2696        let code = r#"async () => {
2697            return await forge.parallel([
2698                () => forge.callTool("server", "tool", {}),
2699            ]);
2700        }"#;
2701
2702        let result = exec
2703            .execute_code(code, dispatcher, None, None)
2704            .await
2705            .unwrap();
2706        let errors = result["errors"].as_array().unwrap();
2707        assert_eq!(errors.len(), 1);
2708        let msg = errors[0]["error"].as_str().unwrap();
2709        assert!(!msg.contains("internal.secret"), "should redact URL: {msg}");
2710    }
2711
2712    // --- PL-U08: parallel combined with readResource ---
2713    #[tokio::test]
2714    async fn pl_u08_parallel_with_read_resource() {
2715        let exec = executor();
2716        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2717        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
2718            Some(Arc::new(TestResourceDispatcher));
2719
2720        let code = r#"async () => {
2721            return await forge.parallel([
2722                () => forge.callTool("s", "t", {}),
2723                () => forge.readResource("rs", "file:///log"),
2724            ]);
2725        }"#;
2726
2727        let result = exec
2728            .execute_code(code, dispatcher, resource_dispatcher, None)
2729            .await
2730            .unwrap();
2731        let results = result["results"].as_array().unwrap();
2732        assert_eq!(results.len(), 2);
2733        assert_eq!(results[0]["server"], "s");
2734        assert_eq!(results[1]["server"], "rs");
2735    }
2736
2737    // --- PL-U09: parallel exceeding max_tool_calls ---
2738    #[tokio::test]
2739    async fn pl_u09_parallel_exceeds_rate_limit() {
2740        let exec = SandboxExecutor::new(SandboxConfig {
2741            max_tool_calls: 3,
2742            ..Default::default()
2743        });
2744        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2745
2746        let code = r#"async () => {
2747            return await forge.parallel([
2748                () => forge.callTool("s", "t1", {}),
2749                () => forge.callTool("s", "t2", {}),
2750                () => forge.callTool("s", "t3", {}),
2751                () => forge.callTool("s", "t4", {}),
2752                () => forge.callTool("s", "t5", {}),
2753            ]);
2754        }"#;
2755
2756        let result = exec
2757            .execute_code(code, dispatcher, None, None)
2758            .await
2759            .unwrap();
2760        // First 3 should succeed, remaining should error
2761        let errors = result["errors"].as_array().unwrap();
2762        assert!(!errors.is_empty(), "should have errors from rate limiting");
2763        // At least some results should be non-null
2764        let results = result["results"].as_array().unwrap();
2765        let successes = results.iter().filter(|r| !r.is_null()).count();
2766        assert_eq!(successes, 3, "should have exactly 3 successful calls");
2767    }
2768
2769    // --- PL-S01: cannot exceed __MAX_PARALLEL even with high concurrency opt ---
2770    #[tokio::test]
2771    async fn pl_s01_cannot_exceed_max_parallel() {
2772        let exec = SandboxExecutor::new(SandboxConfig {
2773            max_parallel: 2,
2774            timeout: Duration::from_secs(10),
2775            ..Default::default()
2776        });
2777
2778        struct ConcurrencyCounter {
2779            peak: std::sync::atomic::AtomicUsize,
2780            current: std::sync::atomic::AtomicUsize,
2781        }
2782
2783        #[async_trait::async_trait]
2784        impl ToolDispatcher for ConcurrencyCounter {
2785            async fn call_tool(
2786                &self,
2787                _server: &str,
2788                _tool: &str,
2789                _args: serde_json::Value,
2790            ) -> Result<serde_json::Value, anyhow::Error> {
2791                let c = self
2792                    .current
2793                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
2794                    + 1;
2795                self.peak.fetch_max(c, std::sync::atomic::Ordering::SeqCst);
2796                tokio::time::sleep(Duration::from_millis(10)).await;
2797                self.current
2798                    .fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
2799                Ok(serde_json::json!({}))
2800            }
2801        }
2802
2803        let counter = Arc::new(ConcurrencyCounter {
2804            peak: std::sync::atomic::AtomicUsize::new(0),
2805            current: std::sync::atomic::AtomicUsize::new(0),
2806        });
2807        let dispatcher: Arc<dyn ToolDispatcher> = counter.clone();
2808
2809        // Request concurrency=9999 but max_parallel=2
2810        let code = r#"async () => {
2811            return await forge.parallel([
2812                () => forge.callTool("s", "t", {}),
2813                () => forge.callTool("s", "t", {}),
2814                () => forge.callTool("s", "t", {}),
2815                () => forge.callTool("s", "t", {}),
2816            ], { concurrency: 9999 });
2817        }"#;
2818
2819        let _ = exec
2820            .execute_code(code, dispatcher, None, None)
2821            .await
2822            .unwrap();
2823        let peak = counter.peak.load(std::sync::atomic::Ordering::SeqCst);
2824        assert!(
2825            peak <= 2,
2826            "peak should be capped at max_parallel=2, was: {peak}"
2827        );
2828    }
2829
2830    // --- PL-S02: parallel calls to mixed strict groups ---
2831    #[tokio::test]
2832    async fn pl_s02_parallel_mixed_strict_groups() {
2833        use crate::groups::{GroupEnforcingDispatcher, GroupPolicy};
2834        use std::collections::HashMap;
2835
2836        let mut groups = HashMap::new();
2837        groups.insert(
2838            "internal".to_string(),
2839            (vec!["vault".to_string()], "strict".to_string()),
2840        );
2841        groups.insert(
2842            "external".to_string(),
2843            (vec!["slack".to_string()], "strict".to_string()),
2844        );
2845        let policy = Arc::new(GroupPolicy::from_config(&groups));
2846        let inner: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2847        let enforcer = GroupEnforcingDispatcher::new(inner, policy);
2848        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(enforcer);
2849
2850        let exec = executor();
2851
2852        // Parallel calls: first locks to "internal", second to "external" should fail
2853        let code = r#"async () => {
2854            return await forge.parallel([
2855                () => forge.callTool("vault", "secrets.list", {}),
2856                () => forge.callTool("slack", "messages.send", {}),
2857            ]);
2858        }"#;
2859
2860        let result = exec
2861            .execute_code(code, dispatcher, None, None)
2862            .await
2863            .unwrap();
2864        let errors = result["errors"].as_array().unwrap();
2865        // At least one should fail with cross-group error
2866        assert!(
2867            !errors.is_empty(),
2868            "should have cross-group error: {result:?}"
2869        );
2870        let has_cross_group = errors
2871            .iter()
2872            .any(|e| e["error"].as_str().unwrap_or("").contains("cross-group"));
2873        assert!(has_cross_group, "should mention cross-group: {result:?}");
2874    }
2875
2876    // --- PL-S03: 500 parallel calls hits rate limit ---
2877    #[tokio::test]
2878    async fn pl_s03_many_parallel_calls_hit_rate_limit() {
2879        let exec = SandboxExecutor::new(SandboxConfig {
2880            max_tool_calls: 10,
2881            ..Default::default()
2882        });
2883        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2884
2885        let code = r#"async () => {
2886            const calls = [];
2887            for (let i = 0; i < 100; i++) {
2888                calls.push(() => forge.callTool("s", "t", { i }));
2889            }
2890            return await forge.parallel(calls);
2891        }"#;
2892
2893        let result = exec
2894            .execute_code(code, dispatcher, None, None)
2895            .await
2896            .unwrap();
2897        let errors = result["errors"].as_array().unwrap();
2898        let results = result["results"].as_array().unwrap();
2899        let successes = results.iter().filter(|r| !r.is_null()).count();
2900        assert_eq!(
2901            successes, 10,
2902            "should have exactly max_tool_calls successes"
2903        );
2904        assert_eq!(errors.len(), 90, "remaining 90 should be rate limited");
2905    }
2906
2907    // --- PL-S04: __MAX_PARALLEL is not modifiable ---
2908    #[tokio::test]
2909    async fn pl_s04_max_parallel_not_modifiable() {
2910        let exec = SandboxExecutor::new(SandboxConfig {
2911            max_parallel: 3,
2912            ..Default::default()
2913        });
2914        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2915
2916        // Attempt to modify the frozen constant — should fail silently or throw
2917        let code = r#"async () => {
2918            try {
2919                // __MAX_PARALLEL is a local const in the bootstrap closure,
2920                // not accessible from user code. Attempting to use it would fail.
2921                return typeof __MAX_PARALLEL;
2922            } catch(e) {
2923                return "error";
2924            }
2925        }"#;
2926
2927        let result = exec
2928            .execute_code(code, dispatcher, None, None)
2929            .await
2930            .unwrap();
2931        // __MAX_PARALLEL is scoped inside the IIFE, not visible to user code
2932        assert_eq!(
2933            result, "undefined",
2934            "__MAX_PARALLEL should not be accessible"
2935        );
2936    }
2937
2938    // --- PL-S05: raw Promise.all still hits rate limit ---
2939    #[tokio::test]
2940    async fn pl_s05_raw_promise_all_hits_rate_limit() {
2941        let exec = SandboxExecutor::new(SandboxConfig {
2942            max_tool_calls: 3,
2943            ..Default::default()
2944        });
2945        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2946
2947        // Bypass forge.parallel() and use raw Promise.allSettled
2948        let code = r#"async () => {
2949            const results = await Promise.allSettled([
2950                forge.callTool("s", "t1", {}),
2951                forge.callTool("s", "t2", {}),
2952                forge.callTool("s", "t3", {}),
2953                forge.callTool("s", "t4", {}),
2954                forge.callTool("s", "t5", {}),
2955            ]);
2956            const fulfilled = results.filter(r => r.status === "fulfilled").length;
2957            const rejected = results.filter(r => r.status === "rejected").length;
2958            return { fulfilled, rejected };
2959        }"#;
2960
2961        let result = exec
2962            .execute_code(code, dispatcher, None, None)
2963            .await
2964            .unwrap();
2965        assert_eq!(result["fulfilled"], 3, "should have 3 successful calls");
2966        assert_eq!(result["rejected"], 2, "should have 2 rate-limited calls");
2967    }
2968
2969    // =========================================================================
2970    // Phase 8: Bootstrap + Invariant Tests (BS-01..BS-12, INV-01..INV-10)
2971    // =========================================================================
2972
2973    // --- BS-01: forge object is frozen ---
2974    #[tokio::test]
2975    async fn bs_01_forge_object_is_frozen() {
2976        let exec = executor();
2977        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2978        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
2979        let stash_store = make_stash(Default::default());
2980        let stash = make_stash_dispatcher(stash_store, None);
2981
2982        let code = r#"async () => {
2983            return Object.isFrozen(forge);
2984        }"#;
2985
2986        let result = exec
2987            .execute_code(code, dispatcher, Some(resource), Some(stash))
2988            .await
2989            .unwrap();
2990        assert_eq!(result, true, "forge object must be frozen");
2991    }
2992
2993    // --- BS-02: forge.stash is frozen ---
2994    #[tokio::test]
2995    async fn bs_02_forge_stash_is_frozen() {
2996        let exec = executor();
2997        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2998        let stash_store = make_stash(Default::default());
2999        let stash = make_stash_dispatcher(stash_store, None);
3000
3001        let code = r#"async () => {
3002            return Object.isFrozen(forge.stash);
3003        }"#;
3004
3005        let result = exec
3006            .execute_code(code, dispatcher, None, Some(stash))
3007            .await
3008            .unwrap();
3009        assert_eq!(result, true, "forge.stash must be frozen");
3010    }
3011
3012    // --- BS-03: __MAX_PARALLEL is not accessible from user code as a global ---
3013    #[tokio::test]
3014    async fn bs_03_max_parallel_not_accessible_as_global() {
3015        let exec = executor();
3016        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3017
3018        let code = r#"async () => {
3019            return {
3020                global: typeof globalThis.__MAX_PARALLEL,
3021                direct: typeof __MAX_PARALLEL,
3022            };
3023        }"#;
3024
3025        let result = exec
3026            .execute_code(code, dispatcher, None, None)
3027            .await
3028            .unwrap();
3029        assert_eq!(
3030            result["global"], "undefined",
3031            "__MAX_PARALLEL must not be on globalThis"
3032        );
3033        // __MAX_PARALLEL is a local const inside the bootstrap IIFE,
3034        // so direct access from user code (different scope) should fail.
3035        // User code runs in a separate eval context, so it shouldn't see the IIFE local.
3036        assert_eq!(
3037            result["direct"], "undefined",
3038            "__MAX_PARALLEL must not be accessible from user scope"
3039        );
3040    }
3041
3042    // --- BS-04: forge.readResource is a function in execute mode ---
3043    #[tokio::test]
3044    async fn bs_04_read_resource_is_function_in_execute_mode() {
3045        let exec = executor();
3046        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3047        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3048
3049        let code = r#"async () => {
3050            return typeof forge.readResource;
3051        }"#;
3052
3053        let result = exec
3054            .execute_code(code, dispatcher, Some(resource), None)
3055            .await
3056            .unwrap();
3057        assert_eq!(result, "function", "forge.readResource must be a function");
3058    }
3059
3060    // --- BS-05: forge.readResource is undefined in search mode ---
3061    #[tokio::test]
3062    async fn bs_05_read_resource_undefined_in_search_mode() {
3063        let exec = executor();
3064        let manifest = serde_json::json!({"servers": []});
3065
3066        let code = r#"async () => {
3067            return typeof forge.readResource;
3068        }"#;
3069
3070        let result = exec.execute_search(code, &manifest).await.unwrap();
3071        assert_eq!(
3072            result, "undefined",
3073            "forge.readResource must be undefined in search mode"
3074        );
3075    }
3076
3077    // --- BS-06: forge.stash has put/get/delete/keys in execute mode ---
3078    #[tokio::test]
3079    async fn bs_06_stash_has_all_methods_in_execute_mode() {
3080        let exec = executor();
3081        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3082        let stash_store = make_stash(Default::default());
3083        let stash = make_stash_dispatcher(stash_store, None);
3084
3085        let code = r#"async () => {
3086            return {
3087                type: typeof forge.stash,
3088                put: typeof forge.stash.put,
3089                get: typeof forge.stash.get,
3090                del: typeof forge.stash.delete,
3091                keys: typeof forge.stash.keys,
3092            };
3093        }"#;
3094
3095        let result = exec
3096            .execute_code(code, dispatcher, None, Some(stash))
3097            .await
3098            .unwrap();
3099        assert_eq!(result["type"], "object", "forge.stash must be an object");
3100        assert_eq!(result["put"], "function");
3101        assert_eq!(result["get"], "function");
3102        assert_eq!(result["del"], "function");
3103        assert_eq!(result["keys"], "function");
3104    }
3105
3106    // --- BS-07: forge.stash is undefined in search mode ---
3107    #[tokio::test]
3108    async fn bs_07_stash_undefined_in_search_mode() {
3109        let exec = executor();
3110        let manifest = serde_json::json!({"servers": []});
3111
3112        let code = r#"async () => {
3113            return typeof forge.stash;
3114        }"#;
3115
3116        let result = exec.execute_search(code, &manifest).await.unwrap();
3117        assert_eq!(
3118            result, "undefined",
3119            "forge.stash must be undefined in search mode"
3120        );
3121    }
3122
3123    // --- BS-08: forge.parallel is a function in execute mode ---
3124    #[tokio::test]
3125    async fn bs_08_parallel_is_function_in_execute_mode() {
3126        let exec = executor();
3127        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3128
3129        let code = r#"async () => {
3130            return typeof forge.parallel;
3131        }"#;
3132
3133        let result = exec
3134            .execute_code(code, dispatcher, None, None)
3135            .await
3136            .unwrap();
3137        assert_eq!(result, "function", "forge.parallel must be a function");
3138    }
3139
3140    // --- BS-09: forge.parallel is undefined in search mode ---
3141    #[tokio::test]
3142    async fn bs_09_parallel_undefined_in_search_mode() {
3143        let exec = executor();
3144        let manifest = serde_json::json!({"servers": []});
3145
3146        let code = r#"async () => {
3147            return typeof forge.parallel;
3148        }"#;
3149
3150        let result = exec.execute_search(code, &manifest).await.unwrap();
3151        assert_eq!(
3152            result, "undefined",
3153            "forge.parallel must be undefined in search mode"
3154        );
3155    }
3156
3157    // --- BS-10: forge.server("x").cat.tool() still works (Proxy not broken) ---
3158    #[tokio::test]
3159    async fn bs_10_server_proxy_still_works() {
3160        let exec = executor();
3161        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3162        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3163        let stash_store = make_stash(Default::default());
3164        let stash = make_stash_dispatcher(stash_store, None);
3165
3166        let code = r#"async () => {
3167            const result = await forge.server("myserver").ast.parse({ file: "test.rs" });
3168            return result;
3169        }"#;
3170
3171        let result = exec
3172            .execute_code(code, dispatcher, Some(resource), Some(stash))
3173            .await
3174            .unwrap();
3175        assert_eq!(result["server"], "myserver");
3176        assert_eq!(result["tool"], "ast.parse");
3177        assert_eq!(result["args"]["file"], "test.rs");
3178    }
3179
3180    // --- BS-11: delete globalThis.Deno still happens after new APIs ---
3181    #[tokio::test]
3182    async fn bs_11_deno_deleted_in_execute_mode() {
3183        let exec = executor();
3184        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3185        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3186        let stash_store = make_stash(Default::default());
3187        let stash = make_stash_dispatcher(stash_store, None);
3188
3189        let code = r#"async () => {
3190            return typeof globalThis.Deno;
3191        }"#;
3192
3193        let result = exec
3194            .execute_code(code, dispatcher, Some(resource), Some(stash))
3195            .await
3196            .unwrap();
3197        assert_eq!(result, "undefined", "Deno must be deleted in execute mode");
3198    }
3199
3200    // --- BS-12: Function.prototype.constructor is still undefined ---
3201    #[tokio::test]
3202    async fn bs_12_function_constructor_undefined_in_execute_mode() {
3203        let exec = executor();
3204        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3205        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3206        let stash_store = make_stash(Default::default());
3207        let stash = make_stash_dispatcher(stash_store, None);
3208
3209        // After bootstrap, Function.prototype.constructor is undefined.
3210        // Since .constructor is undefined, (async fn).constructor is also undefined,
3211        // so we cannot chain .prototype.constructor — we verify via separate checks.
3212        let code = r#"async () => {
3213            const funcCtor = typeof Function.prototype.constructor;
3214            // AsyncFunction and GeneratorFunction constructors are also wiped
3215            // because they inherit from Function.prototype.
3216            const asyncFn = async function(){};
3217            const genFn = function*(){};
3218            const asyncCtor = typeof asyncFn.constructor;
3219            const genCtor = typeof genFn.constructor;
3220            return { funcCtor, asyncCtor, genCtor };
3221        }"#;
3222
3223        let result = exec
3224            .execute_code(code, dispatcher, Some(resource), Some(stash))
3225            .await
3226            .unwrap();
3227        assert_eq!(
3228            result["funcCtor"], "undefined",
3229            "Function.prototype.constructor must be undefined"
3230        );
3231        assert_eq!(
3232            result["asyncCtor"], "undefined",
3233            "AsyncFunction .constructor must be undefined"
3234        );
3235        assert_eq!(
3236            result["genCtor"], "undefined",
3237            "GeneratorFunction .constructor must be undefined"
3238        );
3239    }
3240
3241    // --- INV-01: search() mode cannot access forge.callTool ---
3242    #[tokio::test]
3243    async fn inv_01_search_mode_no_call_tool() {
3244        let exec = executor();
3245        let manifest = serde_json::json!({"servers": []});
3246
3247        let code = r#"async () => {
3248            return typeof forge.callTool;
3249        }"#;
3250
3251        let result = exec.execute_search(code, &manifest).await.unwrap();
3252        assert_eq!(
3253            result, "undefined",
3254            "forge.callTool must not exist in search mode"
3255        );
3256    }
3257
3258    // --- INV-02: search() mode cannot access forge.readResource ---
3259    #[tokio::test]
3260    async fn inv_02_search_mode_no_read_resource() {
3261        let exec = executor();
3262        let manifest = serde_json::json!({"servers": []});
3263
3264        let code = r#"async () => {
3265            return typeof forge.readResource;
3266        }"#;
3267
3268        let result = exec.execute_search(code, &manifest).await.unwrap();
3269        assert_eq!(
3270            result, "undefined",
3271            "forge.readResource must not exist in search mode"
3272        );
3273    }
3274
3275    // --- INV-03: search() mode cannot access forge.stash ---
3276    #[tokio::test]
3277    async fn inv_03_search_mode_no_stash() {
3278        let exec = executor();
3279        let manifest = serde_json::json!({"servers": []});
3280
3281        let code = r#"async () => {
3282            return typeof forge.stash;
3283        }"#;
3284
3285        let result = exec.execute_search(code, &manifest).await.unwrap();
3286        assert_eq!(
3287            result, "undefined",
3288            "forge.stash must not exist in search mode"
3289        );
3290    }
3291
3292    // --- INV-04: search() mode cannot access forge.parallel ---
3293    #[tokio::test]
3294    async fn inv_04_search_mode_no_parallel() {
3295        let exec = executor();
3296        let manifest = serde_json::json!({"servers": []});
3297
3298        let code = r#"async () => {
3299            return typeof forge.parallel;
3300        }"#;
3301
3302        let result = exec.execute_search(code, &manifest).await.unwrap();
3303        assert_eq!(
3304            result, "undefined",
3305            "forge.parallel must not exist in search mode"
3306        );
3307    }
3308
3309    // --- INV-05: eval is undefined in all modes ---
3310    #[tokio::test]
3311    async fn inv_05_eval_undefined_in_all_modes() {
3312        let exec = executor();
3313
3314        // Execute mode
3315        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3316        let code = r#"async () => { return typeof eval; }"#;
3317        let result = exec
3318            .execute_code(code, dispatcher, None, None)
3319            .await
3320            .unwrap();
3321        assert_eq!(
3322            result, "undefined",
3323            "eval must be undefined in execute mode"
3324        );
3325
3326        // Search mode
3327        let manifest = serde_json::json!({"servers": []});
3328        let result = exec.execute_search(code, &manifest).await.unwrap();
3329        assert_eq!(result, "undefined", "eval must be undefined in search mode");
3330    }
3331
3332    // --- INV-06: Function.prototype.constructor is undefined in all modes ---
3333    #[tokio::test]
3334    async fn inv_06_function_constructor_undefined_all_modes() {
3335        let exec = executor();
3336
3337        let code = r#"async () => {
3338            return typeof Function.prototype.constructor;
3339        }"#;
3340
3341        // Execute mode
3342        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3343        let result = exec
3344            .execute_code(code, dispatcher, None, None)
3345            .await
3346            .unwrap();
3347        assert_eq!(
3348            result, "undefined",
3349            "Function.prototype.constructor must be undefined in execute mode"
3350        );
3351
3352        // Search mode
3353        let manifest = serde_json::json!({"servers": []});
3354        let result = exec.execute_search(code, &manifest).await.unwrap();
3355        assert_eq!(
3356            result, "undefined",
3357            "Function.prototype.constructor must be undefined in search mode"
3358        );
3359    }
3360
3361    // --- INV-07: Deno is undefined after bootstrap in all modes ---
3362    #[tokio::test]
3363    async fn inv_07_deno_undefined_all_modes() {
3364        let exec = executor();
3365
3366        let code = r#"async () => { return typeof globalThis.Deno; }"#;
3367
3368        // Execute mode
3369        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3370        let result = exec
3371            .execute_code(code, dispatcher, None, None)
3372            .await
3373            .unwrap();
3374        assert_eq!(
3375            result, "undefined",
3376            "Deno must be undefined in execute mode"
3377        );
3378
3379        // Search mode
3380        let manifest = serde_json::json!({"servers": []});
3381        let result = exec.execute_search(code, &manifest).await.unwrap();
3382        assert_eq!(result, "undefined", "Deno must be undefined in search mode");
3383    }
3384
3385    // --- INV-08: forge object is frozen in all modes ---
3386    #[tokio::test]
3387    async fn inv_08_forge_frozen_all_modes() {
3388        let exec = executor();
3389
3390        let code = r#"async () => { return Object.isFrozen(forge); }"#;
3391
3392        // Execute mode
3393        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3394        let result = exec
3395            .execute_code(code, dispatcher, None, None)
3396            .await
3397            .unwrap();
3398        assert_eq!(result, true, "forge must be frozen in execute mode");
3399
3400        // Search mode
3401        let manifest = serde_json::json!({"servers": []});
3402        let result = exec.execute_search(code, &manifest).await.unwrap();
3403        assert_eq!(result, true, "forge must be frozen in search mode");
3404    }
3405
3406    // --- INV-09: forge.stash object is frozen in execute mode ---
3407    #[tokio::test]
3408    async fn inv_09_stash_frozen_in_execute_mode() {
3409        let exec = executor();
3410        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3411        let stash_store = make_stash(Default::default());
3412        let stash = make_stash_dispatcher(stash_store, None);
3413
3414        // Verify stash is frozen and cannot be modified
3415        let code = r#"async () => {
3416            const frozen = Object.isFrozen(forge.stash);
3417            let mutated = false;
3418            try {
3419                forge.stash.evil = () => {};
3420                mutated = forge.stash.evil !== undefined;
3421            } catch (e) {
3422                // TypeError in strict mode, which is fine
3423            }
3424            return { frozen, mutated };
3425        }"#;
3426
3427        let result = exec
3428            .execute_code(code, dispatcher, None, Some(stash))
3429            .await
3430            .unwrap();
3431        assert_eq!(result["frozen"], true, "forge.stash must be frozen");
3432        assert_eq!(result["mutated"], false, "forge.stash must not be mutable");
3433    }
3434
3435    // --- INV-10: error messages from all new ops pass through redact_error_for_llm ---
3436    #[tokio::test]
3437    async fn inv_10_error_messages_redacted() {
3438        let exec = executor();
3439
3440        // Use a resource dispatcher that fails with a message containing file paths
3441        let failing_resource: Arc<dyn ResourceDispatcher> = Arc::new(FailingResourceDispatcher {
3442            error_msg: "connection refused to /var/secret/db.sock".to_string(),
3443        });
3444        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3445
3446        let code = r#"async () => {
3447            try {
3448                await forge.readResource("secret-server", "file:///data/log.txt");
3449                return { error: null };
3450            } catch (e) {
3451                return { error: e.message || String(e) };
3452            }
3453        }"#;
3454
3455        let result = exec
3456            .execute_code(code, dispatcher, Some(failing_resource), None)
3457            .await
3458            .unwrap();
3459        let error_msg = result["error"].as_str().unwrap();
3460        // Error should be redacted — should not contain raw file paths from the dispatcher
3461        // The redaction replaces the error with a safe format
3462        assert!(
3463            !error_msg.contains("/var/secret/db.sock"),
3464            "error must be redacted, got: {error_msg}"
3465        );
3466        // Should mention the server name in a safe way
3467        assert!(
3468            error_msg.contains("secret-server"),
3469            "error should reference server name: {error_msg}"
3470        );
3471    }
3472}
forge_sandbox/executor.rs

forge_sandbox/
executor.rs