forge_sandbox/
executor.rs

1//! Sandbox executor — creates fresh V8 isolates and runs LLM-generated code.
2//!
3//! Each execution gets a brand new runtime. No state leaks between calls.
4//!
5//! V8 isolates are `!Send`, so all JsRuntime operations run on a dedicated
6//! thread with its own single-threaded tokio runtime. The public API is
7//! fully async and `Send`-safe.
8
9use std::sync::atomic::{AtomicBool, Ordering};
10use std::sync::Arc;
11use std::time::Duration;
12
13use deno_core::{v8, JsRuntime, PollEventLoopOptions, RuntimeOptions};
14use serde_json::Value;
15use tokio::sync::Semaphore;
16
17use crate::audit::{
18    AuditEntryBuilder, AuditLogger, AuditOperation, AuditingDispatcher, AuditingResourceDispatcher,
19    AuditingStashDispatcher, NoopAuditLogger, ResourceReadAudit, StashOperationAudit,
20    ToolCallAudit,
21};
22use crate::error::SandboxError;
23use crate::ops::{
24    forge_ext, CurrentGroup, ExecutionResult, KnownServers, KnownTools, MaxResourceSize,
25    ToolCallLimits,
26};
27use crate::validator::validate_code;
28use crate::{ResourceDispatcher, StashDispatcher, ToolDispatcher};
29
30/// How the sandbox executes code.
31#[derive(Debug, Clone, Default, PartialEq, Eq)]
32#[non_exhaustive]
33pub enum ExecutionMode {
34    /// Run V8 in-process on a dedicated thread (default, suitable for tests).
35    #[default]
36    InProcess,
37    /// Spawn an isolated child process per execution (production security mode).
38    ChildProcess,
39}
40
41/// Configuration for the sandbox executor.
42#[derive(Debug, Clone)]
43pub struct SandboxConfig {
44    /// Maximum execution time before the sandbox is terminated.
45    pub timeout: Duration,
46    /// Maximum size of LLM-generated code in bytes.
47    pub max_code_size: usize,
48    /// Maximum size of the JSON result in bytes.
49    pub max_output_size: usize,
50    /// V8 heap limit in bytes.
51    pub max_heap_size: usize,
52    /// Maximum concurrent sandbox executions.
53    pub max_concurrent: usize,
54    /// Maximum tool calls per execution.
55    pub max_tool_calls: usize,
56    /// Maximum size of tool call arguments in bytes.
57    pub max_tool_call_args_size: usize,
58    /// Execution mode: in-process or child-process isolation.
59    pub execution_mode: ExecutionMode,
60    /// Maximum resource content size in bytes (default: 64 MB).
61    pub max_resource_size: usize,
62    /// Maximum concurrent calls in forge.parallel() (default: 8).
63    pub max_parallel: usize,
64    /// Maximum IPC message size in bytes (default: 8 MB).
65    pub max_ipc_message_size: usize,
66}
67
68impl Default for SandboxConfig {
69    fn default() -> Self {
70        Self {
71            timeout: Duration::from_secs(5),
72            max_code_size: 64 * 1024,        // 64 KB
73            max_output_size: 1024 * 1024,    // 1 MB
74            max_heap_size: 64 * 1024 * 1024, // 64 MB
75            max_concurrent: 8,
76            max_tool_calls: 50,
77            max_tool_call_args_size: 1024 * 1024, // 1 MB
78            execution_mode: ExecutionMode::default(),
79            max_resource_size: 64 * 1024 * 1024, // 64 MB
80            max_parallel: 8,
81            max_ipc_message_size: crate::ipc::DEFAULT_MAX_IPC_MESSAGE_SIZE,
82        }
83    }
84}
85
86/// The sandbox executor. Creates fresh V8 isolates for each execution.
87///
88/// This is `Send + Sync` safe — all V8 operations are dispatched to a
89/// dedicated thread internally. A concurrency semaphore limits the number
90/// of simultaneous V8 isolates.
91pub struct SandboxExecutor {
92    config: SandboxConfig,
93    semaphore: Arc<Semaphore>,
94    audit_logger: Arc<dyn AuditLogger>,
95    /// Optional worker pool for reusing child processes.
96    pool: Option<Arc<crate::pool::WorkerPool>>,
97}
98
99impl SandboxExecutor {
100    /// Create a new sandbox executor with the given configuration.
101    pub fn new(config: SandboxConfig) -> Self {
102        let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
103        Self {
104            config,
105            semaphore,
106            audit_logger: Arc::new(NoopAuditLogger),
107            pool: None,
108        }
109    }
110
111    /// Create a new sandbox executor with an audit logger.
112    pub fn with_audit_logger(config: SandboxConfig, logger: Arc<dyn AuditLogger>) -> Self {
113        let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
114        Self {
115            config,
116            semaphore,
117            audit_logger: logger,
118            pool: None,
119        }
120    }
121
122    /// Attach a worker pool for reusing child processes.
123    ///
124    /// When a pool is set and the execution mode is `ChildProcess`, workers
125    /// are acquired from the pool instead of spawning fresh processes.
126    pub fn with_pool(mut self, pool: Arc<crate::pool::WorkerPool>) -> Self {
127        self.pool = Some(pool);
128        self
129    }
130
131    /// Execute a `search()` call — runs code against the capability manifest.
132    ///
133    /// The manifest is injected as `globalThis.manifest` in the sandbox.
134    /// The LLM's code is an async arrow function that queries it.
135    /// Search always runs in-process (read-only, no credential exposure risk).
136    #[tracing::instrument(skip(self, code, manifest), fields(code_len = code.len()))]
137    pub async fn execute_search(
138        &self,
139        code: &str,
140        manifest: &Value,
141    ) -> Result<Value, SandboxError> {
142        tracing::info!("execute_search: starting");
143
144        let audit_builder = AuditEntryBuilder::new(code, AuditOperation::Search);
145
146        validate_code(code, Some(self.config.max_code_size))?;
147
148        let _permit = self.semaphore.clone().try_acquire_owned().map_err(|_| {
149            SandboxError::ConcurrencyLimit {
150                max: self.config.max_concurrent,
151            }
152        })?;
153
154        let code = code.to_string();
155        let manifest = manifest.clone();
156        let config = self.config.clone();
157
158        // V8 isolates are !Send — run everything on a dedicated thread
159        let (tx, rx) = tokio::sync::oneshot::channel();
160        std::thread::spawn(move || {
161            let rt = match tokio::runtime::Builder::new_current_thread()
162                .enable_all()
163                .build()
164            {
165                Ok(rt) => rt,
166                Err(e) => {
167                    if tx.send(Err(SandboxError::Execution(e.into()))).is_err() {
168                        tracing::warn!("sandbox result receiver dropped");
169                    }
170                    return;
171                }
172            };
173            let result = rt.block_on(run_search(&config, &code, &manifest));
174            if tx.send(result).is_err() {
175                tracing::warn!("sandbox result receiver dropped before result was sent");
176            }
177        });
178
179        let result = rx
180            .await
181            .map_err(|_| SandboxError::Execution(anyhow::anyhow!("sandbox thread panicked")))?;
182
183        // Emit audit entry
184        let entry = audit_builder.finish(&result);
185        self.audit_logger.log(&entry).await;
186
187        match &result {
188            Ok(_) => tracing::info!("execute_search: complete"),
189            Err(e) => tracing::warn!(error = %e, "execute_search: failed"),
190        }
191
192        result
193    }
194
195    /// Execute an `execute()` call — runs code against the tool API.
196    ///
197    /// Tool calls go through `forge.callTool(server, tool, args)` which
198    /// dispatches to the Rust-side ToolDispatcher via `op_forge_call_tool`.
199    /// Resource reads go through `forge.readResource(server, uri)` which
200    /// dispatches to the Rust-side ResourceDispatcher via `op_forge_read_resource`.
201    ///
202    /// In `ChildProcess` mode, spawns an isolated worker process. In `InProcess`
203    /// mode (default), runs V8 on a dedicated thread in the current process.
204    pub async fn execute_code(
205        &self,
206        code: &str,
207        dispatcher: Arc<dyn ToolDispatcher>,
208        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
209        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
210    ) -> Result<Value, SandboxError> {
211        self.execute_code_with_options(
212            code,
213            dispatcher,
214            resource_dispatcher,
215            stash_dispatcher,
216            None,
217            None,
218        )
219        .await
220    }
221
222    /// Execute code with additional options (known servers for SR-R6 validation,
223    /// known tools for structured error fuzzy matching).
224    #[tracing::instrument(skip(self, code, dispatcher, resource_dispatcher, stash_dispatcher, known_servers, known_tools), fields(code_len = code.len(), mode = ?self.config.execution_mode))]
225    pub async fn execute_code_with_options(
226        &self,
227        code: &str,
228        dispatcher: Arc<dyn ToolDispatcher>,
229        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
230        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
231        known_servers: Option<std::collections::HashSet<String>>,
232        known_tools: Option<Vec<(String, String)>>,
233    ) -> Result<Value, SandboxError> {
234        tracing::info!("execute_code: starting");
235
236        let mut audit_builder = AuditEntryBuilder::new(code, AuditOperation::Execute);
237
238        validate_code(code, Some(self.config.max_code_size))?;
239
240        let _permit = self.semaphore.clone().try_acquire_owned().map_err(|_| {
241            SandboxError::ConcurrencyLimit {
242                max: self.config.max_concurrent,
243            }
244        })?;
245
246        // Wrap dispatcher with audit tracking
247        let (audit_tx, mut audit_rx) = tokio::sync::mpsc::unbounded_channel::<ToolCallAudit>();
248        let auditing_dispatcher: Arc<dyn ToolDispatcher> =
249            Arc::new(AuditingDispatcher::new(dispatcher, audit_tx));
250
251        // Wrap resource dispatcher with audit tracking
252        let (resource_audit_tx, mut resource_audit_rx) =
253            tokio::sync::mpsc::unbounded_channel::<ResourceReadAudit>();
254        let auditing_resource_dispatcher = resource_dispatcher.map(|rd| {
255            Arc::new(AuditingResourceDispatcher::new(rd, resource_audit_tx))
256                as Arc<dyn ResourceDispatcher>
257        });
258
259        // Wrap stash dispatcher with audit tracking
260        let (stash_audit_tx, mut stash_audit_rx) =
261            tokio::sync::mpsc::unbounded_channel::<StashOperationAudit>();
262        let auditing_stash_dispatcher = stash_dispatcher.map(|sd| {
263            Arc::new(AuditingStashDispatcher::new(sd, stash_audit_tx)) as Arc<dyn StashDispatcher>
264        });
265
266        let result = match self.config.execution_mode {
267            ExecutionMode::ChildProcess => {
268                if let Some(ref pool) = self.pool {
269                    // Pool mode: acquire a warm worker, execute, release
270                    match pool.acquire(&self.config).await {
271                        Ok(mut worker) => {
272                            let exec_result = worker
273                                .execute(
274                                    code,
275                                    &self.config,
276                                    auditing_dispatcher,
277                                    auditing_resource_dispatcher,
278                                    auditing_stash_dispatcher,
279                                )
280                                .await;
281                            let outcome = if is_fatal_sandbox_error(&exec_result) {
282                                crate::pool::ReleaseOutcome::Fatal
283                            } else {
284                                crate::pool::ReleaseOutcome::Ok
285                            };
286                            pool.release(worker, outcome).await;
287                            exec_result
288                        }
289                        Err(e) => {
290                            tracing::warn!(error = %e, "pool acquire failed, falling back to fresh process");
291                            crate::host::SandboxHost::execute_in_child(
292                                code,
293                                &self.config,
294                                auditing_dispatcher,
295                                auditing_resource_dispatcher,
296                                auditing_stash_dispatcher,
297                                known_servers,
298                                known_tools,
299                            )
300                            .await
301                        }
302                    }
303                } else {
304                    // No pool: spawn fresh child process
305                    crate::host::SandboxHost::execute_in_child(
306                        code,
307                        &self.config,
308                        auditing_dispatcher,
309                        auditing_resource_dispatcher,
310                        auditing_stash_dispatcher,
311                        known_servers,
312                        known_tools,
313                    )
314                    .await
315                }
316            }
317            ExecutionMode::InProcess => {
318                self.execute_code_in_process(
319                    code,
320                    auditing_dispatcher,
321                    auditing_resource_dispatcher,
322                    auditing_stash_dispatcher,
323                    known_servers,
324                    known_tools,
325                )
326                .await
327            }
328        };
329
330        // Collect tool call audits
331        while let Ok(tool_audit) = audit_rx.try_recv() {
332            audit_builder.record_tool_call(tool_audit);
333        }
334
335        // Collect resource read audits
336        while let Ok(resource_audit) = resource_audit_rx.try_recv() {
337            audit_builder.record_resource_read(resource_audit);
338        }
339
340        // Collect stash operation audits
341        while let Ok(stash_audit) = stash_audit_rx.try_recv() {
342            audit_builder.record_stash_op(stash_audit);
343        }
344
345        // Emit audit entry
346        let entry = audit_builder.finish(&result);
347        self.audit_logger.log(&entry).await;
348
349        match &result {
350            Ok(_) => tracing::info!("execute_code: complete"),
351            Err(e) => tracing::warn!(error = %e, "execute_code: failed"),
352        }
353
354        result
355    }
356
357    /// In-process execution: spawn a dedicated thread with its own V8 isolate.
358    async fn execute_code_in_process(
359        &self,
360        code: &str,
361        dispatcher: Arc<dyn ToolDispatcher>,
362        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
363        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
364        known_servers: Option<std::collections::HashSet<String>>,
365        known_tools: Option<Vec<(String, String)>>,
366    ) -> Result<Value, SandboxError> {
367        let code = code.to_string();
368        let config = self.config.clone();
369
370        let (tx, rx) = tokio::sync::oneshot::channel();
371        std::thread::spawn(move || {
372            let rt = match tokio::runtime::Builder::new_current_thread()
373                .enable_all()
374                .build()
375            {
376                Ok(rt) => rt,
377                Err(e) => {
378                    if tx.send(Err(SandboxError::Execution(e.into()))).is_err() {
379                        tracing::warn!("sandbox result receiver dropped");
380                    }
381                    return;
382                }
383            };
384            let result = rt.block_on(run_execute_with_known_servers(
385                &config,
386                &code,
387                dispatcher,
388                resource_dispatcher,
389                stash_dispatcher,
390                known_servers,
391                known_tools,
392            ));
393            if tx.send(result).is_err() {
394                tracing::warn!("sandbox result receiver dropped before result was sent");
395            }
396        });
397
398        rx.await
399            .map_err(|_| SandboxError::Execution(anyhow::anyhow!("sandbox thread panicked")))?
400    }
401}
402
403/// Determine if a sandbox execution result indicates a fatal worker condition.
404///
405/// Workers that time out or exceed their heap limit must be killed rather than
406/// reused. With the `ErrorKind` field in `ExecutionComplete`, the host now
407/// reconstructs the correct typed `SandboxError` variant, so this function
408/// only needs to match the native types.
409fn is_fatal_sandbox_error(result: &Result<Value, SandboxError>) -> bool {
410    matches!(
411        result,
412        Err(SandboxError::Timeout { .. }) | Err(SandboxError::HeapLimitExceeded)
413    )
414}
415
416/// State for the near-heap-limit callback.
417struct HeapLimitState {
418    handle: v8::IsolateHandle,
419    /// Whether the heap limit has been triggered. Uses AtomicBool so the callback
420    /// can use a shared `&` reference instead of `&mut`, eliminating aliasing concerns.
421    triggered: AtomicBool,
422}
423
424/// V8 near-heap-limit callback. Terminates execution and grants 1MB grace
425/// for the termination to propagate cleanly.
426extern "C" fn near_heap_limit_callback(
427    data: *mut std::ffi::c_void,
428    current_heap_limit: usize,
429    _initial_heap_limit: usize,
430) -> usize {
431    // SAFETY: `data` points to `heap_state` (Box<HeapLimitState>) allocated below.
432    // The Box outlives this callback because: (1) the watchdog thread is joined
433    // before heap_state is dropped, and (2) V8 only invokes this callback while the
434    // isolate's event loop is running, which completes before the join.
435    // We use a shared `&` reference (not `&mut`) because `triggered` is AtomicBool,
436    // so no aliasing concerns even if V8 were to call this callback re-entrantly.
437    let state = unsafe { &*(data as *const HeapLimitState) };
438    if !state.triggered.swap(true, Ordering::SeqCst) {
439        state.handle.terminate_execution();
440    }
441    // Grant 1MB grace so the termination exception can propagate
442    current_heap_limit + 1024 * 1024
443}
444
445/// Run a search operation on the current thread (must be called from a
446/// dedicated thread, not the main tokio runtime).
447///
448/// Public for reuse in the worker binary.
449pub async fn run_search(
450    config: &SandboxConfig,
451    code: &str,
452    manifest: &Value,
453) -> Result<Value, SandboxError> {
454    let mut runtime = create_runtime(
455        None,
456        None,
457        config.max_heap_size,
458        None,
459        None,
460        None,
461        None,
462        None,
463    )?;
464
465    // Inject the manifest as a global
466    let manifest_json = serde_json::to_string(manifest)?;
467    let bootstrap = format!("globalThis.manifest = {};", manifest_json);
468    runtime
469        .execute_script("[forge:manifest]", bootstrap)
470        .map_err(|e| SandboxError::JsError {
471            message: e.to_string(),
472        })?;
473
474    // Bootstrap: capture ops in closures, create minimal forge object, delete Deno,
475    // and remove dangerous code generation primitives.
476    runtime
477        .execute_script(
478            "[forge:bootstrap]",
479            r#"
480                ((ops) => {
481                    const setResult = (json) => ops.op_forge_set_result(json);
482                    const log = (msg) => ops.op_forge_log(String(msg));
483                    globalThis.forge = Object.freeze({
484                        __setResult: setResult,
485                        log: log,
486                    });
487                    delete globalThis.Deno;
488
489                    // Remove code generation primitives to prevent prototype chain attacks.
490                    // Even with the validator banning eval( and Function(, an attacker could
491                    // reach Function via forge.log.constructor or similar prototype chain access.
492                    delete globalThis.eval;
493                    const AsyncFunction = (async function(){}).constructor;
494                    const GeneratorFunction = (function*(){}).constructor;
495                    Object.defineProperty(Function.prototype, 'constructor', {
496                        value: undefined, configurable: false, writable: false
497                    });
498                    Object.defineProperty(AsyncFunction.prototype, 'constructor', {
499                        value: undefined, configurable: false, writable: false
500                    });
501                    Object.defineProperty(GeneratorFunction.prototype, 'constructor', {
502                        value: undefined, configurable: false, writable: false
503                    });
504                })(Deno.core.ops);
505            "#,
506        )
507        .map_err(|e| SandboxError::JsError {
508            message: e.to_string(),
509        })?;
510
511    run_user_code(&mut runtime, code, config).await
512}
513
514/// Run an execute operation on the current thread.
515///
516/// Public for reuse in the worker binary.
517pub async fn run_execute(
518    config: &SandboxConfig,
519    code: &str,
520    dispatcher: Arc<dyn ToolDispatcher>,
521    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
522    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
523) -> Result<Value, SandboxError> {
524    run_execute_with_known_servers(
525        config,
526        code,
527        dispatcher,
528        resource_dispatcher,
529        stash_dispatcher,
530        None,
531        None,
532    )
533    .await
534}
535
536/// Run an execute operation with an optional set of known server names for SR-R6 validation
537/// and known tools for structured error fuzzy matching.
538pub async fn run_execute_with_known_servers(
539    config: &SandboxConfig,
540    code: &str,
541    dispatcher: Arc<dyn ToolDispatcher>,
542    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
543    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
544    known_servers: Option<std::collections::HashSet<String>>,
545    known_tools: Option<Vec<(String, String)>>,
546) -> Result<Value, SandboxError> {
547    let limits = ToolCallLimits {
548        max_calls: config.max_tool_calls,
549        max_args_size: config.max_tool_call_args_size,
550        calls_made: 0,
551    };
552    let mut runtime = create_runtime(
553        Some(dispatcher),
554        resource_dispatcher.clone(),
555        config.max_heap_size,
556        Some(limits),
557        Some(config.max_resource_size),
558        stash_dispatcher.clone(),
559        known_servers,
560        known_tools,
561    )?;
562
563    // Determine which capabilities are available
564    let has_resource_dispatcher = resource_dispatcher.is_some();
565    let has_stash_dispatcher = stash_dispatcher.is_some();
566
567    // Bootstrap: capture ops in closures, create full forge API, delete Deno,
568    // and remove dangerous code generation primitives.
569    // User code accesses tools via forge.callTool() or forge.server("x").cat.tool().
570    // Conditionally includes readResource and stash based on available dispatchers.
571    let bootstrap = build_execute_bootstrap(
572        has_resource_dispatcher,
573        has_stash_dispatcher,
574        config.max_parallel,
575    );
576
577    runtime
578        .execute_script("[forge:bootstrap]", bootstrap)
579        .map_err(|e| SandboxError::JsError {
580            message: e.to_string(),
581        })?;
582
583    run_user_code(&mut runtime, code, config).await
584}
585
586/// Build the bootstrap JavaScript for execute mode.
587///
588/// Conditionally includes `readResource` and `stash` APIs based on which
589/// dispatchers are available.
590fn build_execute_bootstrap(has_resource: bool, has_stash: bool, max_parallel: usize) -> String {
591    let mut parts = Vec::new();
592
593    // Always available ops + frozen concurrency cap
594    parts.push(format!(
595        r#"((ops) => {{
596                    const callToolOp = ops.op_forge_call_tool;
597                    const setResult = (json) => ops.op_forge_set_result(json);
598                    const log = (msg) => ops.op_forge_log(String(msg));
599                    const __MAX_PARALLEL = Object.freeze({max_parallel});
600
601                    const callTool = async (server, tool, args) => {{
602                        const resultJson = await callToolOp(
603                            server, tool, JSON.stringify(args || {{}})
604                        );
605                        return JSON.parse(resultJson);
606                    }};"#
607    ));
608
609    // readResource binding (conditional)
610    if has_resource {
611        parts.push(
612            r#"
613                    const readResourceOp = ops.op_forge_read_resource;
614                    const readResource = async (server, uri) => {
615                        const resultJson = await readResourceOp(server, uri);
616                        return JSON.parse(resultJson);
617                    };"#
618            .to_string(),
619        );
620    }
621
622    // stash bindings (conditional)
623    if has_stash {
624        parts.push(
625            r#"
626                    const stashPutOp = ops.op_forge_stash_put;
627                    const stashGetOp = ops.op_forge_stash_get;
628                    const stashDeleteOp = ops.op_forge_stash_delete;
629                    const stashKeysOp = ops.op_forge_stash_keys;"#
630                .to_string(),
631        );
632    }
633
634    // Build the forge object properties
635    let mut forge_props = vec![
636        "                        __setResult: setResult".to_string(),
637        "                        log: log".to_string(),
638        "                        callTool: callTool".to_string(),
639    ];
640
641    if has_resource {
642        forge_props.push("                        readResource: readResource".to_string());
643    }
644
645    if has_stash {
646        forge_props.push(
647            r#"                        stash: Object.freeze({
648                            put: async (key, value, opts) => {
649                                const ttl = (opts && opts.ttl) ? opts.ttl : 0;
650                                const resultJson = await stashPutOp(key, JSON.stringify(value), ttl);
651                                return JSON.parse(resultJson);
652                            },
653                            get: async (key) => {
654                                const resultJson = await stashGetOp(key);
655                                return JSON.parse(resultJson);
656                            },
657                            delete: async (key) => {
658                                const resultJson = await stashDeleteOp(key);
659                                return JSON.parse(resultJson);
660                            },
661                            keys: async () => {
662                                const resultJson = await stashKeysOp();
663                                return JSON.parse(resultJson);
664                            }
665                        })"#
666            .to_string(),
667        );
668    }
669
670    // server proxy is always included
671    forge_props.push(
672        r#"                        server: (name) => {
673                            return new Proxy({}, {
674                                get(_target, category) {
675                                    return new Proxy({}, {
676                                        get(_target2, tool) {
677                                            return async (args) => {
678                                                const toolName = category === 'general' ? tool : `${category}.${tool}`;
679                                                return callTool(
680                                                    name,
681                                                    toolName,
682                                                    args || {}
683                                                );
684                                            };
685                                        }
686                                    });
687                                }
688                            });
689                        }"#
690        .to_string(),
691    );
692
693    // forge.parallel() — bounded concurrency wrapper over callTool/readResource
694    forge_props.push(
695        r#"                        parallel: async (calls, opts) => {
696                            opts = opts || {};
697                            const concurrency = Math.min(
698                                opts.concurrency || __MAX_PARALLEL,
699                                __MAX_PARALLEL
700                            );
701                            const failFast = opts.failFast || false;
702                            const results = new Array(calls.length).fill(null);
703                            const errors = [];
704                            let aborted = false;
705
706                            for (let i = 0; i < calls.length && !aborted; i += concurrency) {
707                                const batch = calls.slice(i, i + concurrency);
708                                await Promise.allSettled(
709                                    batch.map((item, idx) => {
710                                        const fn_ = typeof item === 'function' ? item : item.fn;
711                                        return fn_().then(
712                                            val => {
713                                                if (val && val.error === true && val.code) {
714                                                    errors.push({ index: i + idx, error: val.message || val.code });
715                                                } else {
716                                                    results[i + idx] = val;
717                                                }
718                                                if (errors.length > 0 && failFast) aborted = true;
719                                            },
720                                            err => {
721                                                errors.push({ index: i + idx, error: err.message || String(err) });
722                                                if (failFast) aborted = true;
723                                            }
724                                        );
725                                    })
726                                );
727                            }
728
729                            return { results, errors, aborted };
730                        }"#
731        .to_string(),
732    );
733
734    let forge_obj = format!(
735        r#"
736                    globalThis.forge = Object.freeze({{
737{}
738                    }});"#,
739        forge_props.join(",\n")
740    );
741    parts.push(forge_obj);
742
743    // Security: remove dangerous globals
744    parts.push(
745        r#"
746                    delete globalThis.Deno;
747
748                    // Remove code generation primitives to prevent prototype chain attacks.
749                    delete globalThis.eval;
750                    const AsyncFunction = (async function(){}).constructor;
751                    const GeneratorFunction = (function*(){}).constructor;
752                    Object.defineProperty(Function.prototype, 'constructor', {
753                        value: undefined, configurable: false, writable: false
754                    });
755                    Object.defineProperty(AsyncFunction.prototype, 'constructor', {
756                        value: undefined, configurable: false, writable: false
757                    });
758                    Object.defineProperty(GeneratorFunction.prototype, 'constructor', {
759                        value: undefined, configurable: false, writable: false
760                    });
761                })(Deno.core.ops);"#
762            .to_string(),
763    );
764
765    parts.join("\n")
766}
767
768/// Create a fresh JsRuntime with the forge extension loaded and V8 heap limits set.
769#[allow(clippy::too_many_arguments)]
770pub(crate) fn create_runtime(
771    dispatcher: Option<Arc<dyn ToolDispatcher>>,
772    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
773    max_heap_size: usize,
774    tool_call_limits: Option<ToolCallLimits>,
775    max_resource_size: Option<usize>,
776    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
777    known_servers: Option<std::collections::HashSet<String>>,
778    known_tools: Option<Vec<(String, String)>>,
779) -> Result<JsRuntime, SandboxError> {
780    let create_params = v8::CreateParams::default().heap_limits(0, max_heap_size);
781
782    let runtime = JsRuntime::new(RuntimeOptions {
783        extensions: vec![forge_ext::init()],
784        create_params: Some(create_params),
785        ..Default::default()
786    });
787
788    if let Some(d) = dispatcher {
789        runtime.op_state().borrow_mut().put(d);
790    }
791    if let Some(rd) = resource_dispatcher {
792        runtime.op_state().borrow_mut().put(rd);
793    }
794    if let Some(limits) = tool_call_limits {
795        runtime.op_state().borrow_mut().put(limits);
796    }
797    if let Some(size) = max_resource_size {
798        runtime.op_state().borrow_mut().put(MaxResourceSize(size));
799    }
800    if let Some(sd) = stash_dispatcher {
801        runtime.op_state().borrow_mut().put(sd);
802        // CurrentGroup defaults to None; the ForgeServer level sets the actual group
803        runtime.op_state().borrow_mut().put(CurrentGroup(None));
804    }
805    if let Some(servers) = known_servers {
806        runtime.op_state().borrow_mut().put(KnownServers(servers));
807    }
808    if let Some(tools) = known_tools {
809        runtime.op_state().borrow_mut().put(KnownTools(tools));
810    }
811
812    Ok(runtime)
813}
814
815/// Wrap the user's async arrow function, execute it, and extract the result.
816///
817/// Sets up a CPU watchdog thread and near-heap-limit callback before running
818/// user code. The watchdog terminates V8 execution if the timeout elapses
819/// (handles CPU-bound infinite loops). The heap callback terminates execution
820/// if V8 approaches the heap limit (prevents OOM abort).
821async fn run_user_code(
822    runtime: &mut JsRuntime,
823    code: &str,
824    config: &SandboxConfig,
825) -> Result<Value, SandboxError> {
826    // --- Set up heap limit callback ---
827    let heap_state = Box::new(HeapLimitState {
828        handle: runtime.v8_isolate().thread_safe_handle(),
829        triggered: AtomicBool::new(false),
830    });
831    runtime.v8_isolate().add_near_heap_limit_callback(
832        near_heap_limit_callback,
833        &*heap_state as *const HeapLimitState as *mut std::ffi::c_void,
834    );
835
836    // --- Set up CPU watchdog ---
837    let watchdog_handle = runtime.v8_isolate().thread_safe_handle();
838    let timed_out = Arc::new(AtomicBool::new(false));
839    let watchdog_timed_out = timed_out.clone();
840    let timeout = config.timeout;
841    let (cancel_tx, cancel_rx) = std::sync::mpsc::channel::<()>();
842
843    let watchdog = std::thread::spawn(move || {
844        if let Err(std::sync::mpsc::RecvTimeoutError::Timeout) = cancel_rx.recv_timeout(timeout) {
845            watchdog_timed_out.store(true, Ordering::SeqCst);
846            watchdog_handle.terminate_execution();
847        }
848    });
849
850    // --- Execute user code ---
851    let wrapped = format!(
852        r#"
853        (async () => {{
854            try {{
855                const __userFn = {code};
856                const __result = await __userFn();
857                forge.__setResult(
858                    JSON.stringify({{ ok: __result }})
859                );
860            }} catch (e) {{
861                forge.__setResult(
862                    JSON.stringify({{ error: e.message || String(e) }})
863                );
864            }}
865        }})();
866        "#
867    );
868
869    let exec_error = match runtime.execute_script("[forge:execute]", wrapped) {
870        Ok(_) => {
871            // Drive the event loop to resolve async operations
872            match tokio::time::timeout(
873                config.timeout,
874                runtime.run_event_loop(PollEventLoopOptions::default()),
875            )
876            .await
877            {
878                Ok(Ok(())) => None,
879                Ok(Err(e)) => Some(e.to_string()),
880                Err(_) => Some("async timeout".to_string()),
881            }
882        }
883        Err(e) => Some(e.to_string()),
884    };
885
886    // --- Cleanup: cancel watchdog and wait for it to exit ---
887    // This ensures the watchdog thread is done before we drop the runtime,
888    // preventing use-after-free on the IsolateHandle.
889    let _ = cancel_tx.send(());
890    let _ = watchdog.join();
891
892    // --- Check error causes in priority order ---
893    if heap_state.triggered.load(Ordering::SeqCst) {
894        return Err(SandboxError::HeapLimitExceeded);
895    }
896
897    if timed_out.load(Ordering::SeqCst) {
898        return Err(SandboxError::Timeout {
899            timeout_ms: config.timeout.as_millis() as u64,
900        });
901    }
902
903    if let Some(err_msg) = exec_error {
904        return Err(SandboxError::JsError { message: err_msg });
905    }
906
907    // --- Extract result from OpState ---
908    let result_str = {
909        let state = runtime.op_state();
910        let state = state.borrow();
911        state
912            .try_borrow::<ExecutionResult>()
913            .map(|r| r.0.clone())
914            .ok_or_else(|| SandboxError::JsError {
915                message: "no result returned from sandbox execution".into(),
916            })?
917    };
918
919    if result_str.len() > config.max_output_size {
920        return Err(SandboxError::OutputTooLarge {
921            max: config.max_output_size,
922        });
923    }
924
925    let envelope: Value = serde_json::from_str(&result_str)?;
926
927    if let Some(error) = envelope.get("error") {
928        return Err(SandboxError::JsError {
929            message: error.as_str().unwrap_or("unknown error").to_string(),
930        });
931    }
932
933    Ok(envelope.get("ok").cloned().unwrap_or(Value::Null))
934}
935
936#[cfg(test)]
937mod tests {
938    use super::*;
939
940    fn executor() -> SandboxExecutor {
941        SandboxExecutor::new(SandboxConfig::default())
942    }
943
944    /// Test dispatcher that echoes back the server/tool/args.
945    struct TestDispatcher;
946
947    #[async_trait::async_trait]
948    impl ToolDispatcher for TestDispatcher {
949        async fn call_tool(
950            &self,
951            server: &str,
952            tool: &str,
953            args: serde_json::Value,
954        ) -> Result<serde_json::Value, forge_error::DispatchError> {
955            Ok(serde_json::json!({
956                "server": server,
957                "tool": tool,
958                "args": args,
959                "status": "ok"
960            }))
961        }
962    }
963
964    #[tokio::test]
965    async fn search_returns_manifest_data() {
966        let exec = executor();
967        let manifest = serde_json::json!({
968            "tools": [
969                {"name": "parse_ast", "category": "ast"},
970                {"name": "find_symbols", "category": "symbols"},
971            ]
972        });
973
974        let code = r#"async () => {
975            return manifest.tools.filter(t => t.category === "ast");
976        }"#;
977
978        let result = exec.execute_search(code, &manifest).await.unwrap();
979        let tools = result.as_array().unwrap();
980        assert_eq!(tools.len(), 1);
981        assert_eq!(tools[0]["name"], "parse_ast");
982    }
983
984    #[tokio::test]
985    async fn search_handles_complex_queries() {
986        let exec = executor();
987        let manifest = serde_json::json!({
988            "servers": [
989                {
990                    "name": "narsil",
991                    "categories": {
992                        "ast": { "tools": ["parse", "query", "walk"] },
993                        "symbols": { "tools": ["find", "references"] }
994                    }
995                }
996            ]
997        });
998
999        let code = r#"async () => {
1000            return manifest.servers
1001                .map(s => ({ name: s.name, categories: Object.keys(s.categories) }));
1002        }"#;
1003
1004        let result = exec.execute_search(code, &manifest).await.unwrap();
1005        let servers = result.as_array().unwrap();
1006        assert_eq!(servers[0]["name"], "narsil");
1007    }
1008
1009    #[tokio::test]
1010    async fn timeout_is_enforced() {
1011        let exec = SandboxExecutor::new(SandboxConfig {
1012            timeout: Duration::from_millis(200),
1013            ..Default::default()
1014        });
1015        let manifest = serde_json::json!({});
1016
1017        // A never-resolving promise should trigger a timeout
1018        let code = r#"async () => {
1019            await new Promise(() => {});
1020        }"#;
1021
1022        let start = std::time::Instant::now();
1023        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1024        let elapsed = start.elapsed();
1025
1026        // Should be a timeout or a "no result" error (the event loop completes
1027        // when there are no more pending ops, even if the promise is unresolved)
1028        match &err {
1029            SandboxError::Timeout { .. } => {}
1030            SandboxError::JsError { message } if message.contains("no result") => {
1031                // deno_core's event loop exits when there are no pending ops,
1032                // so the never-resolving promise doesn't actually block
1033            }
1034            other => panic!("unexpected error: {other:?}, elapsed: {elapsed:?}"),
1035        }
1036    }
1037
1038    #[tokio::test]
1039    async fn js_errors_are_captured() {
1040        let exec = executor();
1041        let manifest = serde_json::json!({});
1042
1043        let code = r#"async () => {
1044            throw new Error("intentional test error");
1045        }"#;
1046
1047        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1048        assert!(matches!(err, SandboxError::JsError { .. }));
1049        let msg = err.to_string();
1050        assert!(msg.contains("intentional test error"));
1051    }
1052
1053    #[tokio::test]
1054    async fn no_filesystem_access() {
1055        let exec = executor();
1056        let manifest = serde_json::json!({});
1057
1058        // require() is a banned pattern — caught by validator
1059        let code = r#"async () => {
1060            const fs = require("fs");
1061            return "ESCAPED";
1062        }"#;
1063
1064        let err = exec.execute_search(code, &manifest).await;
1065        assert!(err.is_err());
1066    }
1067
1068    #[tokio::test]
1069    async fn no_network_access() {
1070        let exec = executor();
1071        let manifest = serde_json::json!({});
1072
1073        let code = r#"async () => {
1074            try {
1075                await fetch("https://example.com");
1076                return "ESCAPED";
1077            } catch(e) {
1078                return "CONTAINED";
1079            }
1080        }"#;
1081
1082        let result = exec.execute_search(code, &manifest).await.unwrap();
1083        assert_eq!(result, "CONTAINED");
1084    }
1085
1086    // --- WU4 new tests ---
1087
1088    #[tokio::test]
1089    async fn cpu_bound_infinite_loop_is_terminated() {
1090        let exec = SandboxExecutor::new(SandboxConfig {
1091            timeout: Duration::from_millis(500),
1092            ..Default::default()
1093        });
1094        let manifest = serde_json::json!({});
1095
1096        let code = r#"async () => {
1097            while(true) {}
1098        }"#;
1099
1100        let start = std::time::Instant::now();
1101        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1102        let elapsed = start.elapsed();
1103
1104        assert!(
1105            matches!(err, SandboxError::Timeout { .. }),
1106            "expected timeout, got: {err:?}"
1107        );
1108        assert!(
1109            elapsed < Duration::from_secs(5),
1110            "should complete reasonably fast, took: {elapsed:?}"
1111        );
1112    }
1113
1114    #[tokio::test]
1115    async fn heap_limit_prevents_oom() {
1116        let exec = SandboxExecutor::new(SandboxConfig {
1117            max_heap_size: 10 * 1024 * 1024,  // 10 MB
1118            timeout: Duration::from_secs(30), // Long timeout so heap fills first
1119            ..Default::default()
1120        });
1121        let manifest = serde_json::json!({});
1122
1123        // Rapidly allocate memory to exceed the heap limit
1124        let code = r#"async () => {
1125            const arr = [];
1126            while(true) {
1127                arr.push(new Array(100000).fill("x"));
1128            }
1129        }"#;
1130
1131        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1132        assert!(
1133            matches!(
1134                err,
1135                SandboxError::HeapLimitExceeded | SandboxError::JsError { .. }
1136            ),
1137            "expected heap limit or JS error, got: {err:?}"
1138        );
1139    }
1140
1141    #[tokio::test]
1142    async fn concurrency_limit_enforced() {
1143        // Use max_concurrent=0 so no executions are allowed (deterministic test)
1144        let exec = SandboxExecutor::new(SandboxConfig {
1145            max_concurrent: 0,
1146            ..Default::default()
1147        });
1148
1149        let code = r#"async () => { return 1; }"#;
1150        let err = exec
1151            .execute_search(code, &serde_json::json!({}))
1152            .await
1153            .unwrap_err();
1154        assert!(
1155            matches!(err, SandboxError::ConcurrencyLimit { max: 0 }),
1156            "expected concurrency limit, got: {err:?}"
1157        );
1158    }
1159
1160    #[tokio::test]
1161    async fn deno_global_is_not_accessible() {
1162        let exec = executor();
1163        let manifest = serde_json::json!({});
1164
1165        let code = r#"async () => {
1166            const props = Object.getOwnPropertyNames(globalThis);
1167            return !props.includes("Deno");
1168        }"#;
1169
1170        let result = exec.execute_search(code, &manifest).await.unwrap();
1171        assert_eq!(result, true);
1172    }
1173
1174    #[tokio::test]
1175    async fn forge_object_is_frozen() {
1176        let exec = executor();
1177        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1178
1179        let code = r#"async () => {
1180            return Object.isFrozen(forge);
1181        }"#;
1182
1183        let result = exec
1184            .execute_code(code, dispatcher, None, None)
1185            .await
1186            .unwrap();
1187        assert_eq!(result, true);
1188    }
1189
1190    #[tokio::test]
1191    async fn tool_call_rate_limit() {
1192        let exec = SandboxExecutor::new(SandboxConfig {
1193            max_tool_calls: 2,
1194            ..Default::default()
1195        });
1196        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1197
1198        let code = r#"async () => {
1199            await forge.callTool("test", "tool1", {});
1200            await forge.callTool("test", "tool2", {});
1201            try {
1202                await forge.callTool("test", "tool3", {});
1203                return "should not reach here";
1204            } catch(e) {
1205                return e.message;
1206            }
1207        }"#;
1208
1209        let result = exec
1210            .execute_code(code, dispatcher, None, None)
1211            .await
1212            .unwrap();
1213        assert!(
1214            result
1215                .as_str()
1216                .unwrap()
1217                .contains("tool call limit exceeded"),
1218            "expected tool call limit message, got: {result:?}"
1219        );
1220    }
1221
1222    #[tokio::test]
1223    async fn tool_call_args_size_limit() {
1224        let exec = SandboxExecutor::new(SandboxConfig {
1225            max_tool_call_args_size: 100,
1226            ..Default::default()
1227        });
1228        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1229
1230        let code = r#"async () => {
1231            try {
1232                await forge.callTool("test", "tool", { data: "x".repeat(200) });
1233                return "should not reach here";
1234            } catch(e) {
1235                return e.message;
1236            }
1237        }"#;
1238
1239        let result = exec
1240            .execute_code(code, dispatcher, None, None)
1241            .await
1242            .unwrap();
1243        assert!(
1244            result.as_str().unwrap().contains("too large"),
1245            "expected args too large message, got: {result:?}"
1246        );
1247    }
1248
1249    #[tokio::test]
1250    async fn forge_log_works() {
1251        let exec = executor();
1252        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1253
1254        let code = r#"async () => {
1255            forge.log("test message from sandbox");
1256            return "ok";
1257        }"#;
1258
1259        let result = exec
1260            .execute_code(code, dispatcher, None, None)
1261            .await
1262            .unwrap();
1263        assert_eq!(result, "ok");
1264    }
1265
1266    #[tokio::test]
1267    async fn forge_server_proxy_calls_tool() {
1268        let exec = executor();
1269        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1270
1271        let code = r#"async () => {
1272            const result = await forge.server("narsil").ast.parse({ file: "test.rs" });
1273            return result;
1274        }"#;
1275
1276        let result = exec
1277            .execute_code(code, dispatcher, None, None)
1278            .await
1279            .unwrap();
1280        assert_eq!(result["server"], "narsil");
1281        assert_eq!(result["tool"], "ast.parse");
1282        assert_eq!(result["status"], "ok");
1283    }
1284
1285    #[tokio::test]
1286    async fn forge_server_proxy_general_category_strips_prefix() {
1287        let exec = executor();
1288        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1289
1290        // Tools in the "general" category should NOT have "general." prepended
1291        let code = r#"async () => {
1292            const result = await forge.server("narsil").general.find_symbols({ pattern: "main" });
1293            return result;
1294        }"#;
1295
1296        let result = exec
1297            .execute_code(code, dispatcher, None, None)
1298            .await
1299            .unwrap();
1300        assert_eq!(result["server"], "narsil");
1301        assert_eq!(result["tool"], "find_symbols");
1302        assert_eq!(result["status"], "ok");
1303    }
1304
1305    #[tokio::test]
1306    async fn multiple_tool_calls_in_single_execution() {
1307        let exec = executor();
1308        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1309
1310        let code = r#"async () => {
1311            const r1 = await forge.callTool("server1", "tool1", {});
1312            const r2 = await forge.callTool("server2", "tool2", {});
1313            return [r1, r2];
1314        }"#;
1315
1316        let result = exec
1317            .execute_code(code, dispatcher, None, None)
1318            .await
1319            .unwrap();
1320        let arr = result.as_array().unwrap();
1321        assert_eq!(arr.len(), 2);
1322        assert_eq!(arr[0]["server"], "server1");
1323        assert_eq!(arr[1]["server"], "server2");
1324    }
1325
1326    #[tokio::test]
1327    async fn eval_is_not_accessible() {
1328        let exec = executor();
1329        let manifest = serde_json::json!({});
1330
1331        let code = r#"async () => {
1332            return typeof globalThis.eval;
1333        }"#;
1334
1335        let result = exec.execute_search(code, &manifest).await.unwrap();
1336        assert_eq!(result, "undefined");
1337    }
1338
1339    #[tokio::test]
1340    async fn function_constructor_is_blocked() {
1341        let exec = executor();
1342        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1343
1344        // Try to access Function via prototype chain — should get undefined
1345        let code = r#"async () => {
1346            const ctor = forge.log.constructor;
1347            return String(ctor);
1348        }"#;
1349
1350        let result = exec
1351            .execute_code(code, dispatcher, None, None)
1352            .await
1353            .unwrap();
1354        assert_eq!(result, "undefined");
1355    }
1356
1357    #[tokio::test]
1358    async fn async_function_constructor_is_blocked() {
1359        let exec = executor();
1360        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1361
1362        // Try to access AsyncFunction via prototype chain
1363        let code = r#"async () => {
1364            const fn1 = async () => {};
1365            const ctor = fn1.constructor;
1366            return String(ctor);
1367        }"#;
1368
1369        let result = exec
1370            .execute_code(code, dispatcher, None, None)
1371            .await
1372            .unwrap();
1373        assert_eq!(result, "undefined");
1374    }
1375
1376    // --- v0.2 Resource read test dispatchers ---
1377
1378    /// Resource dispatcher that echoes back server/uri.
1379    struct TestResourceDispatcher;
1380
1381    #[async_trait::async_trait]
1382    impl ResourceDispatcher for TestResourceDispatcher {
1383        async fn read_resource(
1384            &self,
1385            server: &str,
1386            uri: &str,
1387        ) -> Result<serde_json::Value, forge_error::DispatchError> {
1388            Ok(serde_json::json!({
1389                "server": server,
1390                "uri": uri,
1391                "content": "test resource content"
1392            }))
1393        }
1394    }
1395
1396    /// Resource dispatcher that returns a large payload.
1397    struct LargeResourceDispatcher {
1398        content_size: usize,
1399    }
1400
1401    #[async_trait::async_trait]
1402    impl ResourceDispatcher for LargeResourceDispatcher {
1403        async fn read_resource(
1404            &self,
1405            _server: &str,
1406            _uri: &str,
1407        ) -> Result<serde_json::Value, forge_error::DispatchError> {
1408            Ok(serde_json::json!({
1409                "data": "x".repeat(self.content_size)
1410            }))
1411        }
1412    }
1413
1414    /// Resource dispatcher that always fails with a configurable error.
1415    struct FailingResourceDispatcher {
1416        error_msg: String,
1417    }
1418
1419    #[async_trait::async_trait]
1420    impl ResourceDispatcher for FailingResourceDispatcher {
1421        async fn read_resource(
1422            &self,
1423            _server: &str,
1424            _uri: &str,
1425        ) -> Result<serde_json::Value, forge_error::DispatchError> {
1426            Err(anyhow::anyhow!("{}", self.error_msg).into())
1427        }
1428    }
1429
1430    // --- RS-U01: readResource routes to correct server ---
1431    #[tokio::test]
1432    async fn rs_u01_read_resource_routes_to_correct_server() {
1433        let exec = executor();
1434        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1435        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1436            Some(Arc::new(TestResourceDispatcher));
1437
1438        let code = r#"async () => {
1439            const result = await forge.readResource("my-server", "file:///logs/app.log");
1440            return result;
1441        }"#;
1442
1443        let result = exec
1444            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1445            .await
1446            .unwrap();
1447        assert_eq!(result["server"], "my-server");
1448        assert_eq!(result["uri"], "file:///logs/app.log");
1449        assert_eq!(result["content"], "test resource content");
1450    }
1451
1452    // --- RS-U02: readResource increments ToolCallLimits.calls_made ---
1453    #[tokio::test]
1454    async fn rs_u02_read_resource_shares_rate_limit_with_tool_calls() {
1455        let exec = SandboxExecutor::new(SandboxConfig {
1456            max_tool_calls: 3,
1457            ..Default::default()
1458        });
1459        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1460        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1461            Some(Arc::new(TestResourceDispatcher));
1462
1463        // 1 tool call + 2 resource reads = 3 (limit), then 4th fails
1464        let code = r#"async () => {
1465            await forge.callTool("s", "t", {});
1466            await forge.readResource("s", "file:///a");
1467            await forge.readResource("s", "file:///b");
1468            try {
1469                await forge.readResource("s", "file:///c");
1470                return "should not reach here";
1471            } catch(e) {
1472                return e.message;
1473            }
1474        }"#;
1475
1476        let result = exec
1477            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1478            .await
1479            .unwrap();
1480        assert!(
1481            result
1482                .as_str()
1483                .unwrap()
1484                .contains("tool call limit exceeded"),
1485            "expected rate limit message, got: {result:?}"
1486        );
1487    }
1488
1489    // --- RS-U03: readResource rejects when limits exhausted ---
1490    #[tokio::test]
1491    async fn rs_u03_read_resource_rejects_when_limits_exhausted() {
1492        let exec = SandboxExecutor::new(SandboxConfig {
1493            max_tool_calls: 1,
1494            ..Default::default()
1495        });
1496        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1497        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1498            Some(Arc::new(TestResourceDispatcher));
1499
1500        let code = r#"async () => {
1501            await forge.readResource("s", "file:///a");
1502            try {
1503                await forge.readResource("s", "file:///b");
1504                return "should not reach here";
1505            } catch(e) {
1506                return e.message;
1507            }
1508        }"#;
1509
1510        let result = exec
1511            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1512            .await
1513            .unwrap();
1514        assert!(
1515            result
1516                .as_str()
1517                .unwrap()
1518                .contains("tool call limit exceeded"),
1519            "expected rate limit error, got: {result:?}"
1520        );
1521    }
1522
1523    // --- RS-U08: truncates response at max_resource_size ---
1524    #[tokio::test]
1525    async fn rs_u08_read_resource_truncates_at_max_resource_size() {
1526        let exec = SandboxExecutor::new(SandboxConfig {
1527            max_resource_size: 100, // very small limit
1528            ..Default::default()
1529        });
1530        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1531        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1532            Some(Arc::new(LargeResourceDispatcher { content_size: 500 }));
1533
1534        // Large resource truncated → JSON.parse fails in bootstrap
1535        let code = r#"async () => {
1536            try {
1537                await forge.readResource("s", "file:///big");
1538                return "no truncation";
1539            } catch(e) {
1540                return "truncated";
1541            }
1542        }"#;
1543
1544        let result = exec
1545            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1546            .await
1547            .unwrap();
1548        assert_eq!(result, "truncated", "large resource should be truncated");
1549    }
1550
1551    // --- RS-U09: errors redacted through redact_error_for_llm ---
1552    #[tokio::test]
1553    async fn rs_u09_read_resource_redacts_errors() {
1554        let exec = executor();
1555        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1556        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1557            Some(Arc::new(FailingResourceDispatcher {
1558                error_msg: "connection refused: http://internal.corp:9876/secret/path".into(),
1559            }));
1560
1561        // Structured errors are returned as values, not thrown
1562        let code = r#"async () => {
1563            const result = await forge.readResource("my-server", "file:///logs/secret.log");
1564            return result;
1565        }"#;
1566
1567        let result = exec
1568            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1569            .await
1570            .unwrap();
1571        assert_eq!(
1572            result["error"], true,
1573            "should be structured error: {result}"
1574        );
1575        let msg = result["message"].as_str().unwrap();
1576        assert!(
1577            !msg.contains("internal.corp"),
1578            "should not leak internal URL: {msg}"
1579        );
1580        assert!(!msg.contains("9876"), "should not leak port: {msg}");
1581        assert!(
1582            msg.contains("my-server"),
1583            "should mention server name: {msg}"
1584        );
1585    }
1586
1587    // --- RS-U10: binary content (base64 encoding) ---
1588    #[tokio::test]
1589    async fn rs_u10_read_resource_handles_binary_content() {
1590        struct Base64ResourceDispatcher;
1591
1592        #[async_trait::async_trait]
1593        impl ResourceDispatcher for Base64ResourceDispatcher {
1594            async fn read_resource(
1595                &self,
1596                _server: &str,
1597                _uri: &str,
1598            ) -> Result<serde_json::Value, forge_error::DispatchError> {
1599                Ok(serde_json::json!({
1600                    "content": "SGVsbG8gV29ybGQ=",
1601                    "_encoding": "base64"
1602                }))
1603            }
1604        }
1605
1606        let exec = executor();
1607        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1608        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1609            Some(Arc::new(Base64ResourceDispatcher));
1610
1611        let code = r#"async () => {
1612            const result = await forge.readResource("s", "file:///binary");
1613            return result;
1614        }"#;
1615
1616        let result = exec
1617            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1618            .await
1619            .unwrap();
1620        assert_eq!(result["_encoding"], "base64");
1621        assert_eq!(result["content"], "SGVsbG8gV29ybGQ=");
1622    }
1623
1624    // --- RS-U11: error for nonexistent resource ---
1625    #[tokio::test]
1626    async fn rs_u11_read_resource_error_for_nonexistent() {
1627        let exec = executor();
1628        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1629        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1630            Some(Arc::new(FailingResourceDispatcher {
1631                error_msg: "resource not found".into(),
1632            }));
1633
1634        // Structured errors are returned as values, not thrown
1635        let code = r#"async () => {
1636            const result = await forge.readResource("s", "file:///nonexistent");
1637            return result;
1638        }"#;
1639
1640        let result = exec
1641            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1642            .await
1643            .unwrap();
1644        assert_eq!(
1645            result["error"], true,
1646            "should be structured error: {result}"
1647        );
1648        let msg = result["message"].as_str().unwrap();
1649        assert!(
1650            msg.contains("failed"),
1651            "should indicate failure: {result:?}"
1652        );
1653    }
1654
1655    // --- RS-U12: handles large (>1MB) content ---
1656    #[tokio::test]
1657    async fn rs_u12_read_resource_handles_large_content() {
1658        let exec = SandboxExecutor::new(SandboxConfig {
1659            max_resource_size: 2 * 1024 * 1024, // 2 MB
1660            timeout: Duration::from_secs(10),
1661            ..Default::default()
1662        });
1663        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1664        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1665            Some(Arc::new(LargeResourceDispatcher {
1666                content_size: 1_100_000,
1667            }));
1668
1669        let code = r#"async () => {
1670            const result = await forge.readResource("s", "file:///large");
1671            return result.data.length;
1672        }"#;
1673
1674        let result = exec
1675            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1676            .await
1677            .unwrap();
1678        assert_eq!(result, 1_100_000);
1679    }
1680
1681    // --- RS-S05: URI for non-file-server — error redacted, no path leakage ---
1682    #[tokio::test]
1683    async fn rs_s05_error_on_invalid_resource_uri_for_server() {
1684        let exec = executor();
1685        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1686        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1687            Some(Arc::new(FailingResourceDispatcher {
1688                error_msg: "unknown resource URI: file:///etc/shadow".into(),
1689            }));
1690
1691        // Structured errors are returned as values, not thrown
1692        let code = r#"async () => {
1693            const result = await forge.readResource("postgres-server", "file:///etc/shadow");
1694            return result;
1695        }"#;
1696
1697        let result = exec
1698            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1699            .await
1700            .unwrap();
1701        assert_eq!(
1702            result["error"], true,
1703            "should be structured error: {result}"
1704        );
1705        let msg = result["message"].as_str().unwrap();
1706        // SR-R5: Error should use "readResource" not the raw URI
1707        assert!(
1708            !msg.contains("/etc/shadow"),
1709            "should not leak file path: {msg}"
1710        );
1711        // Should still mention server for context
1712        assert!(
1713            msg.contains("postgres-server"),
1714            "should mention server: {msg}"
1715        );
1716        assert!(
1717            msg.contains("readResource"),
1718            "should use safe identifier: {msg}"
1719        );
1720    }
1721
1722    // --- RS-S06: error message does not leak full URI path ---
1723    #[tokio::test]
1724    async fn rs_s06_error_message_does_not_leak_full_uri() {
1725        let exec = executor();
1726        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1727        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1728            Some(Arc::new(FailingResourceDispatcher {
1729                error_msg: "file not found: /var/secrets/database/credentials.json".into(),
1730            }));
1731
1732        let code = r#"async () => {
1733            try {
1734                await forge.readResource("server", "file:///var/secrets/database/credentials.json");
1735                return "should not reach here";
1736            } catch(e) {
1737                return e.message;
1738            }
1739        }"#;
1740
1741        let result = exec
1742            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1743            .await
1744            .unwrap();
1745        let msg = result.as_str().unwrap();
1746        // Paths are redacted by redact_error_message
1747        assert!(!msg.contains("/var/secrets"), "should not leak path: {msg}");
1748        assert!(
1749            !msg.contains("credentials.json"),
1750            "should not leak filename: {msg}"
1751        );
1752        // URI itself should not appear in error (SR-R5)
1753        assert!(
1754            !msg.contains("file:///var/secrets"),
1755            "should not leak URI: {msg}"
1756        );
1757    }
1758
1759    // --- RS-S07: large content truncated, not OOM ---
1760    #[tokio::test]
1761    async fn rs_s07_large_content_truncated_not_oom() {
1762        let exec = SandboxExecutor::new(SandboxConfig {
1763            max_resource_size: 1024, // 1 KB limit
1764            timeout: Duration::from_secs(10),
1765            ..Default::default()
1766        });
1767        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1768        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1769            Some(Arc::new(LargeResourceDispatcher {
1770                content_size: 1_000_000, // 1 MB, far exceeds 1 KB limit
1771            }));
1772
1773        let code = r#"async () => {
1774            try {
1775                const result = await forge.readResource("s", "file:///huge");
1776                return "got result without truncation";
1777            } catch(e) {
1778                return "safely truncated";
1779            }
1780        }"#;
1781
1782        // Must complete without OOM
1783        let result = exec
1784            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1785            .await;
1786        assert!(result.is_ok(), "should complete without OOM: {result:?}");
1787        assert_eq!(result.unwrap(), "safely truncated");
1788    }
1789
1790    // --- RS-S08: many resource reads hit rate limit ---
1791    #[tokio::test]
1792    async fn rs_s08_many_reads_hit_rate_limit() {
1793        let exec = SandboxExecutor::new(SandboxConfig {
1794            max_tool_calls: 5,
1795            ..Default::default()
1796        });
1797        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1798        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1799            Some(Arc::new(TestResourceDispatcher));
1800
1801        let code = r#"async () => {
1802            let count = 0;
1803            for (let i = 0; i < 1000; i++) {
1804                try {
1805                    await forge.readResource("s", "file:///r" + i);
1806                    count++;
1807                } catch(e) {
1808                    return { count, error: e.message };
1809                }
1810            }
1811            return { count, error: null };
1812        }"#;
1813
1814        let result = exec
1815            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1816            .await
1817            .unwrap();
1818        assert_eq!(
1819            result["count"], 5,
1820            "should allow exactly max_tool_calls reads"
1821        );
1822        assert!(result["error"]
1823            .as_str()
1824            .unwrap()
1825            .contains("tool call limit exceeded"));
1826    }
1827
1828    // --- RS-S09: search mode blocks resource read ---
1829    #[tokio::test]
1830    async fn rs_s09_search_mode_blocks_resource_read() {
1831        let exec = executor();
1832        let manifest = serde_json::json!({"servers": []});
1833
1834        // In search mode, forge.readResource should not exist
1835        let code = r#"async () => {
1836            return typeof forge.readResource;
1837        }"#;
1838
1839        let result = exec.execute_search(code, &manifest).await.unwrap();
1840        assert_eq!(
1841            result, "undefined",
1842            "readResource should not exist in search mode"
1843        );
1844    }
1845
1846    // --- SR-R6: unknown server rejected at op level ---
1847    #[tokio::test]
1848    async fn sr_r6_unknown_server_rejected_at_op_level() {
1849        let exec = executor();
1850        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1851        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1852            Some(Arc::new(TestResourceDispatcher));
1853
1854        // Use execute_code_with_options to set known servers
1855        let mut known = std::collections::HashSet::new();
1856        known.insert("allowed-server".to_string());
1857
1858        let code = r#"async () => {
1859            try {
1860                await forge.readResource("nonexistent_server", "file:///x");
1861                return "should not reach here";
1862            } catch(e) {
1863                return e.message;
1864            }
1865        }"#;
1866
1867        let result = exec
1868            .execute_code_with_options(
1869                code,
1870                tool_dispatcher,
1871                resource_dispatcher,
1872                None,
1873                Some(known),
1874                None,
1875            )
1876            .await
1877            .unwrap();
1878        let msg = result.as_str().unwrap();
1879        assert!(
1880            msg.contains("unknown server"),
1881            "expected 'unknown server' error, got: {msg}"
1882        );
1883        assert!(
1884            msg.contains("nonexistent_server"),
1885            "should mention the server name: {msg}"
1886        );
1887    }
1888
1889    // --- RS-S10: audit log records resource reads with URI hash ---
1890    #[tokio::test]
1891    async fn rs_s10_audit_records_resource_reads_with_uri_hash() {
1892        struct CapturingAuditLogger {
1893            entries: std::sync::Mutex<Vec<crate::audit::AuditEntry>>,
1894        }
1895
1896        #[async_trait::async_trait]
1897        impl crate::audit::AuditLogger for CapturingAuditLogger {
1898            async fn log(&self, entry: &crate::audit::AuditEntry) {
1899                self.entries.lock().unwrap().push(entry.clone());
1900            }
1901        }
1902
1903        let logger = Arc::new(CapturingAuditLogger {
1904            entries: std::sync::Mutex::new(Vec::new()),
1905        });
1906        let exec = SandboxExecutor::with_audit_logger(SandboxConfig::default(), logger.clone());
1907        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1908        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1909            Some(Arc::new(TestResourceDispatcher));
1910
1911        let code = r#"async () => {
1912            await forge.readResource("my-server", "file:///logs/app.log");
1913            return "done";
1914        }"#;
1915
1916        let _ = exec
1917            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1918            .await
1919            .unwrap();
1920
1921        let entries = logger.entries.lock().unwrap();
1922        assert_eq!(entries.len(), 1);
1923        let entry = &entries[0];
1924        assert_eq!(entry.resource_reads.len(), 1);
1925
1926        let read = &entry.resource_reads[0];
1927        assert_eq!(read.server, "my-server");
1928        assert!(read.success);
1929        // URI should be hashed, not raw
1930        assert_ne!(
1931            read.uri_hash, "file:///logs/app.log",
1932            "URI should be hashed, not stored raw"
1933        );
1934        // Verify it's a valid SHA-256 hex string
1935        assert_eq!(read.uri_hash.len(), 64, "should be SHA-256 hex");
1936        assert!(read.uri_hash.chars().all(|c| c.is_ascii_hexdigit()));
1937    }
1938
1939    #[tokio::test]
1940    async fn large_output_is_rejected() {
1941        let exec = SandboxExecutor::new(SandboxConfig {
1942            max_output_size: 100,
1943            ..Default::default()
1944        });
1945        let manifest = serde_json::json!({});
1946
1947        let code = r#"async () => {
1948            return "x".repeat(1000);
1949        }"#;
1950
1951        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1952        assert!(
1953            matches!(err, SandboxError::OutputTooLarge { .. }),
1954            "expected output too large, got: {err:?}"
1955        );
1956    }
1957
1958    // --- Stash test infrastructure ---
1959
1960    /// Direct stash dispatcher wrapping an Arc<tokio::sync::Mutex<SessionStash>>.
1961    /// Used by integration/security tests without going through IPC.
1962    struct DirectStashDispatcher {
1963        stash: Arc<tokio::sync::Mutex<crate::stash::SessionStash>>,
1964        current_group: Option<String>,
1965    }
1966
1967    #[async_trait::async_trait]
1968    impl crate::StashDispatcher for DirectStashDispatcher {
1969        async fn put(
1970            &self,
1971            key: &str,
1972            value: serde_json::Value,
1973            ttl_secs: Option<u32>,
1974            _current_group: Option<String>,
1975        ) -> Result<serde_json::Value, forge_error::DispatchError> {
1976            let ttl = ttl_secs
1977                .filter(|&s| s > 0)
1978                .map(|s| std::time::Duration::from_secs(s as u64));
1979            let mut stash = self.stash.lock().await;
1980            stash
1981                .put(key, value, ttl, self.current_group.as_deref())
1982                .map_err(|e| forge_error::DispatchError::Internal(e.into()))?;
1983            Ok(serde_json::json!({"ok": true}))
1984        }
1985
1986        async fn get(
1987            &self,
1988            key: &str,
1989            _current_group: Option<String>,
1990        ) -> Result<serde_json::Value, forge_error::DispatchError> {
1991            let stash = self.stash.lock().await;
1992            match stash
1993                .get(key, self.current_group.as_deref())
1994                .map_err(|e| forge_error::DispatchError::Internal(e.into()))?
1995            {
1996                Some(v) => Ok(v.clone()),
1997                None => Ok(serde_json::Value::Null),
1998            }
1999        }
2000
2001        async fn delete(
2002            &self,
2003            key: &str,
2004            _current_group: Option<String>,
2005        ) -> Result<serde_json::Value, forge_error::DispatchError> {
2006            let mut stash = self.stash.lock().await;
2007            let deleted = stash
2008                .delete(key, self.current_group.as_deref())
2009                .map_err(|e| forge_error::DispatchError::Internal(e.into()))?;
2010            Ok(serde_json::json!({"deleted": deleted}))
2011        }
2012
2013        async fn keys(
2014            &self,
2015            _current_group: Option<String>,
2016        ) -> Result<serde_json::Value, forge_error::DispatchError> {
2017            let stash = self.stash.lock().await;
2018            let keys: Vec<&str> = stash.keys(self.current_group.as_deref());
2019            Ok(serde_json::json!(keys))
2020        }
2021    }
2022
2023    fn make_stash(
2024        config: crate::stash::StashConfig,
2025    ) -> Arc<tokio::sync::Mutex<crate::stash::SessionStash>> {
2026        Arc::new(tokio::sync::Mutex::new(crate::stash::SessionStash::new(
2027            config,
2028        )))
2029    }
2030
2031    fn make_stash_dispatcher(
2032        stash: Arc<tokio::sync::Mutex<crate::stash::SessionStash>>,
2033        group: Option<&str>,
2034    ) -> Arc<dyn crate::StashDispatcher> {
2035        Arc::new(DirectStashDispatcher {
2036            stash,
2037            current_group: group.map(str::to_string),
2038        })
2039    }
2040
2041    // --- ST-I01: Two execute_code calls sharing stash (put in first, get in second) ---
2042    #[tokio::test]
2043    async fn st_i01_stash_shared_across_executions() {
2044        let exec = executor();
2045        let stash = make_stash(crate::stash::StashConfig::default());
2046        let sd = make_stash_dispatcher(stash.clone(), None);
2047        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2048
2049        // First execution: put a value
2050        let code1 = r#"async () => {
2051            await forge.stash.put("shared-key", { value: 42 });
2052            return "stored";
2053        }"#;
2054        let result1 = exec
2055            .execute_code(code1, dispatcher.clone(), None, Some(sd.clone()))
2056            .await
2057            .unwrap();
2058        assert_eq!(result1, "stored");
2059
2060        // Second execution: get the value
2061        let sd2 = make_stash_dispatcher(stash, None);
2062        let code2 = r#"async () => {
2063            const v = await forge.stash.get("shared-key");
2064            return v;
2065        }"#;
2066        let result2 = exec
2067            .execute_code(code2, dispatcher, None, Some(sd2))
2068            .await
2069            .unwrap();
2070        assert_eq!(result2["value"], 42);
2071    }
2072
2073    // --- ST-I02: Stash put + get within single execution ---
2074    #[tokio::test]
2075    async fn st_i02_stash_put_get_single_execution() {
2076        let exec = executor();
2077        let stash = make_stash(crate::stash::StashConfig::default());
2078        let sd = make_stash_dispatcher(stash, None);
2079        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2080
2081        let code = r#"async () => {
2082            await forge.stash.put("key", "hello");
2083            const v = await forge.stash.get("key");
2084            return v;
2085        }"#;
2086        let result = exec
2087            .execute_code(code, dispatcher, None, Some(sd))
2088            .await
2089            .unwrap();
2090        assert_eq!(result, "hello");
2091    }
2092
2093    // --- ST-I03: Stash group isolation (put with group A, get with group B fails) ---
2094    #[tokio::test]
2095    async fn st_i03_stash_group_isolation() {
2096        let exec = executor();
2097        let stash = make_stash(crate::stash::StashConfig::default());
2098        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2099
2100        // Put with group A
2101        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
2102        let code1 = r#"async () => {
2103            await forge.stash.put("secret", "group-a-data");
2104            return "stored";
2105        }"#;
2106        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2107            .await
2108            .unwrap();
2109
2110        // Get with group B should fail
2111        let sd_b = make_stash_dispatcher(stash, Some("group-b"));
2112        let code2 = r#"async () => {
2113            try {
2114                await forge.stash.get("secret");
2115                return "should not reach here";
2116            } catch(e) {
2117                return e.message;
2118            }
2119        }"#;
2120        let result = exec
2121            .execute_code(code2, dispatcher, None, Some(sd_b))
2122            .await
2123            .unwrap();
2124        assert!(
2125            result.as_str().unwrap().contains("cross-group"),
2126            "expected cross-group error, got: {result:?}"
2127        );
2128    }
2129
2130    // --- ST-I05: Stash combined with callTool + readResource ---
2131    #[tokio::test]
2132    async fn st_i05_stash_combined_with_tool_and_resource() {
2133        let exec = executor();
2134        let stash = make_stash(crate::stash::StashConfig::default());
2135        let sd = make_stash_dispatcher(stash, None);
2136        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2137        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
2138            Some(Arc::new(TestResourceDispatcher));
2139
2140        let code = r#"async () => {
2141            // Call a tool
2142            const toolResult = await forge.callTool("s", "t", {});
2143
2144            // Read a resource
2145            const resource = await forge.readResource("s", "file:///data");
2146
2147            // Store combined result in stash
2148            await forge.stash.put("combined", {
2149                tool: toolResult.server,
2150                resource: resource.content
2151            });
2152
2153            // Read it back
2154            const v = await forge.stash.get("combined");
2155            return v;
2156        }"#;
2157        let result = exec
2158            .execute_code(code, tool_dispatcher, resource_dispatcher, Some(sd))
2159            .await
2160            .unwrap();
2161        assert_eq!(result["tool"], "s");
2162        assert_eq!(result["resource"], "test resource content");
2163    }
2164
2165    // --- ST-I06: Stash key limit produces clear error ---
2166    #[tokio::test]
2167    async fn st_i06_stash_key_limit_error() {
2168        let exec = executor();
2169        let stash = make_stash(crate::stash::StashConfig {
2170            max_keys: 2,
2171            ..Default::default()
2172        });
2173        let sd = make_stash_dispatcher(stash, None);
2174        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2175
2176        let code = r#"async () => {
2177            await forge.stash.put("k1", 1);
2178            await forge.stash.put("k2", 2);
2179            try {
2180                await forge.stash.put("k3", 3);
2181                return "should not reach here";
2182            } catch(e) {
2183                return e.message;
2184            }
2185        }"#;
2186        let result = exec
2187            .execute_code(code, dispatcher, None, Some(sd))
2188            .await
2189            .unwrap();
2190        assert!(
2191            result.as_str().unwrap().contains("key limit"),
2192            "expected key limit error, got: {result:?}"
2193        );
2194    }
2195
2196    // --- ST-I07: Stash value size limit produces clear error ---
2197    #[tokio::test]
2198    async fn st_i07_stash_value_size_limit_error() {
2199        let exec = executor();
2200        let stash = make_stash(crate::stash::StashConfig {
2201            max_value_size: 50,
2202            ..Default::default()
2203        });
2204        let sd = make_stash_dispatcher(stash, None);
2205        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2206
2207        let code = r#"async () => {
2208            try {
2209                await forge.stash.put("k", "x".repeat(100));
2210                return "should not reach here";
2211            } catch(e) {
2212                return e.message;
2213            }
2214        }"#;
2215        let result = exec
2216            .execute_code(code, dispatcher, None, Some(sd))
2217            .await
2218            .unwrap();
2219        assert!(
2220            result.as_str().unwrap().contains("too large"),
2221            "expected value too large error, got: {result:?}"
2222        );
2223    }
2224
2225    // --- ST-I08: Stash keys() returns correct subset for group context ---
2226    #[tokio::test]
2227    async fn st_i08_stash_keys_group_subset() {
2228        let exec = executor();
2229        let stash = make_stash(crate::stash::StashConfig::default());
2230        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2231
2232        // Put a public key and a group-A key
2233        let sd_none = make_stash_dispatcher(stash.clone(), None);
2234        let code1 = r#"async () => {
2235            await forge.stash.put("public-key", "pub");
2236            return "ok";
2237        }"#;
2238        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_none))
2239            .await
2240            .unwrap();
2241
2242        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
2243        let code2 = r#"async () => {
2244            await forge.stash.put("group-a-key", "secret");
2245            return "ok";
2246        }"#;
2247        exec.execute_code(code2, dispatcher.clone(), None, Some(sd_a))
2248            .await
2249            .unwrap();
2250
2251        // List keys from group-a perspective: should see both
2252        let sd_a2 = make_stash_dispatcher(stash.clone(), Some("group-a"));
2253        let code3 = r#"async () => {
2254            const k = await forge.stash.keys();
2255            k.sort();
2256            return k;
2257        }"#;
2258        let result = exec
2259            .execute_code(code3, dispatcher.clone(), None, Some(sd_a2))
2260            .await
2261            .unwrap();
2262        let keys = result.as_array().unwrap();
2263        assert_eq!(keys.len(), 2);
2264
2265        // List keys from ungrouped: should only see public
2266        let sd_none2 = make_stash_dispatcher(stash, None);
2267        let code4 = r#"async () => {
2268            const k = await forge.stash.keys();
2269            return k;
2270        }"#;
2271        let result2 = exec
2272            .execute_code(code4, dispatcher, None, Some(sd_none2))
2273            .await
2274            .unwrap();
2275        let keys2 = result2.as_array().unwrap();
2276        assert_eq!(keys2.len(), 1);
2277        assert_eq!(keys2[0], "public-key");
2278    }
2279
2280    // --- Security Tests ---
2281
2282    // --- ST-S01: Stash key with path traversal characters rejected ---
2283    #[tokio::test]
2284    async fn st_s01_stash_key_path_traversal_rejected() {
2285        let exec = executor();
2286        let stash = make_stash(crate::stash::StashConfig::default());
2287        let sd = make_stash_dispatcher(stash, None);
2288        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2289
2290        let code = r#"async () => {
2291            try {
2292                await forge.stash.put("../../etc/passwd", "evil");
2293                return "should not reach here";
2294            } catch(e) {
2295                return e.message;
2296            }
2297        }"#;
2298        let result = exec
2299            .execute_code(code, dispatcher, None, Some(sd))
2300            .await
2301            .unwrap();
2302        assert!(
2303            result.as_str().unwrap().contains("invalid"),
2304            "expected invalid key error, got: {result:?}"
2305        );
2306    }
2307
2308    // --- ST-S02: Stash key with script injection (<script>) rejected ---
2309    #[tokio::test]
2310    async fn st_s02_stash_key_script_injection_rejected() {
2311        let exec = executor();
2312        let stash = make_stash(crate::stash::StashConfig::default());
2313        let sd = make_stash_dispatcher(stash, None);
2314        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2315
2316        let code = r#"async () => {
2317            try {
2318                await forge.stash.put("<script>alert(1)</script>", "evil");
2319                return "should not reach here";
2320            } catch(e) {
2321                return e.message;
2322            }
2323        }"#;
2324        let result = exec
2325            .execute_code(code, dispatcher, None, Some(sd))
2326            .await
2327            .unwrap();
2328        assert!(
2329            result.as_str().unwrap().contains("invalid"),
2330            "expected invalid key error, got: {result:?}"
2331        );
2332    }
2333
2334    // --- ST-S03: Stash value containing JS code stored as inert data ---
2335    #[tokio::test]
2336    async fn st_s03_stash_value_js_code_is_inert() {
2337        let exec = executor();
2338        let stash = make_stash(crate::stash::StashConfig::default());
2339        let sd = make_stash_dispatcher(stash, None);
2340        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2341
2342        // Store a string that looks like executable JS code.
2343        // We build it from parts to avoid triggering the banned-pattern validator.
2344        let code = r#"async () => {
2345            const part1 = "function() { return ";
2346            const part2 = "globalThis.secret; }";
2347            const malicious = part1 + part2;
2348            await forge.stash.put("code-value", malicious);
2349            const v = await forge.stash.get("code-value");
2350            // The value should be a plain string, not executed
2351            return typeof v === "string" && v.includes("globalThis");
2352        }"#;
2353        let result = exec
2354            .execute_code(code, dispatcher, None, Some(sd))
2355            .await
2356            .unwrap();
2357        assert_eq!(result, true, "JS code in stash values should be inert data");
2358    }
2359
2360    // --- ST-S04: Stash put from group A, get from group B → error ---
2361    #[tokio::test]
2362    async fn st_s04_stash_cross_group_get_error() {
2363        let exec = executor();
2364        let stash = make_stash(crate::stash::StashConfig::default());
2365        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2366
2367        // Put with group A
2368        let sd_a = make_stash_dispatcher(stash.clone(), Some("team-alpha"));
2369        let code1 = r#"async () => {
2370            await forge.stash.put("alpha-secret", "classified");
2371            return "stored";
2372        }"#;
2373        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2374            .await
2375            .unwrap();
2376
2377        // Get with group B should error
2378        let sd_b = make_stash_dispatcher(stash, Some("team-beta"));
2379        let code2 = r#"async () => {
2380            try {
2381                await forge.stash.get("alpha-secret");
2382                return "leaked";
2383            } catch(e) {
2384                return e.message;
2385            }
2386        }"#;
2387        let result = exec
2388            .execute_code(code2, dispatcher, None, Some(sd_b))
2389            .await
2390            .unwrap();
2391        assert!(
2392            result.as_str().unwrap().contains("cross-group"),
2393            "expected cross-group error, got: {result:?}"
2394        );
2395    }
2396
2397    // --- ST-S05: Stash put from group A, get from ungrouped → error ---
2398    #[tokio::test]
2399    async fn st_s05_stash_grouped_entry_inaccessible_to_ungrouped() {
2400        let exec = executor();
2401        let stash = make_stash(crate::stash::StashConfig::default());
2402        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2403
2404        // Put with group A
2405        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-x"));
2406        let code1 = r#"async () => {
2407            await forge.stash.put("gx-data", 999);
2408            return "stored";
2409        }"#;
2410        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2411            .await
2412            .unwrap();
2413
2414        // Get from ungrouped should error
2415        let sd_none = make_stash_dispatcher(stash, None);
2416        let code2 = r#"async () => {
2417            try {
2418                await forge.stash.get("gx-data");
2419                return "leaked";
2420            } catch(e) {
2421                return e.message;
2422            }
2423        }"#;
2424        let result = exec
2425            .execute_code(code2, dispatcher, None, Some(sd_none))
2426            .await
2427            .unwrap();
2428        assert!(
2429            result.as_str().unwrap().contains("cross-group"),
2430            "expected cross-group error, got: {result:?}"
2431        );
2432    }
2433
2434    // --- ST-S06: Stash total size limit prevents OOM (many puts) ---
2435    #[tokio::test]
2436    async fn st_s06_stash_total_size_limit_prevents_oom() {
2437        let exec = executor();
2438        let stash = make_stash(crate::stash::StashConfig {
2439            max_total_size: 200,
2440            max_value_size: 1024,
2441            max_keys: 1000,
2442            ..Default::default()
2443        });
2444        let sd = make_stash_dispatcher(stash, None);
2445        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2446
2447        let code = r#"async () => {
2448            let count = 0;
2449            for (let i = 0; i < 100; i++) {
2450                try {
2451                    await forge.stash.put("k" + i, "x".repeat(50));
2452                    count++;
2453                } catch(e) {
2454                    return { count, error: e.message };
2455                }
2456            }
2457            return { count, error: null };
2458        }"#;
2459        let result = exec
2460            .execute_code(code, dispatcher, None, Some(sd))
2461            .await
2462            .unwrap();
2463        // Should have been stopped before 100 puts due to total_size=200
2464        let count = result["count"].as_i64().unwrap();
2465        assert!(
2466            count < 100,
2467            "total size limit should prevent all 100 puts, but {count} succeeded"
2468        );
2469        assert!(
2470            result["error"].as_str().unwrap().contains("total size"),
2471            "expected total size error, got: {:?}",
2472            result["error"]
2473        );
2474    }
2475
2476    // --- ST-S07: Stash ops in search() mode blocked ---
2477    #[tokio::test]
2478    async fn st_s07_stash_ops_blocked_in_search_mode() {
2479        let exec = executor();
2480        let manifest = serde_json::json!({"servers": []});
2481
2482        // In search mode, forge.stash should not exist
2483        let code = r#"async () => {
2484            return typeof forge.stash;
2485        }"#;
2486
2487        let result = exec.execute_search(code, &manifest).await.unwrap();
2488        assert_eq!(result, "undefined", "stash should not exist in search mode");
2489    }
2490
2491    // --- ST-S09: Error messages from stash ops don't leak other keys/values ---
2492    #[tokio::test]
2493    async fn st_s09_stash_error_messages_dont_leak_data() {
2494        let exec = executor();
2495        let stash = make_stash(crate::stash::StashConfig::default());
2496        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2497
2498        // Put a secret value with group-a
2499        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
2500        let code1 = r#"async () => {
2501            await forge.stash.put("secret-key", "top-secret-value-12345");
2502            return "stored";
2503        }"#;
2504        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2505            .await
2506            .unwrap();
2507
2508        // Try to access from group-b — error should not contain the value
2509        let sd_b = make_stash_dispatcher(stash, Some("group-b"));
2510        let code2 = r#"async () => {
2511            try {
2512                await forge.stash.get("secret-key");
2513                return "should not reach here";
2514            } catch(e) {
2515                return e.message;
2516            }
2517        }"#;
2518        let result = exec
2519            .execute_code(code2, dispatcher, None, Some(sd_b))
2520            .await
2521            .unwrap();
2522        let msg = result.as_str().unwrap();
2523        assert!(
2524            !msg.contains("top-secret-value-12345"),
2525            "error should not leak value: {msg}"
2526        );
2527        assert!(
2528            !msg.contains("secret-key"),
2529            "error should not leak key names: {msg}"
2530        );
2531    }
2532
2533    // --- ST-S10: TTL expiry enforced ---
2534    #[tokio::test]
2535    async fn st_s10_stash_ttl_expiry_enforced() {
2536        let exec = executor();
2537        let stash = make_stash(crate::stash::StashConfig::default());
2538        let sd = make_stash_dispatcher(stash.clone(), None);
2539        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2540
2541        // Put with 1-second TTL
2542        let code1 = r#"async () => {
2543            await forge.stash.put("ttl-key", "ephemeral", {ttl: 1});
2544            const v = await forge.stash.get("ttl-key");
2545            return v;
2546        }"#;
2547        let result1 = exec
2548            .execute_code(code1, dispatcher.clone(), None, Some(sd))
2549            .await
2550            .unwrap();
2551        assert_eq!(result1, "ephemeral", "should be readable immediately");
2552
2553        // Wait for TTL to expire
2554        tokio::time::sleep(std::time::Duration::from_millis(1100)).await;
2555
2556        // Get after expiry should return null
2557        let sd2 = make_stash_dispatcher(stash, None);
2558        let code2 = r#"async () => {
2559            const v = await forge.stash.get("ttl-key");
2560            return v;
2561        }"#;
2562        let result2 = exec
2563            .execute_code(code2, dispatcher, None, Some(sd2))
2564            .await
2565            .unwrap();
2566        assert_eq!(
2567            result2,
2568            serde_json::Value::Null,
2569            "expired key should return null"
2570        );
2571    }
2572
2573    // =========================================================================
2574    // Phase 7: forge.parallel() tests (PL-U01..PL-U09, PL-S01..PL-S05)
2575    // =========================================================================
2576
2577    // --- PL-U01: parallel with 3 successful calls returns all results ---
2578    #[tokio::test]
2579    async fn pl_u01_parallel_three_successful_calls() {
2580        let exec = executor();
2581        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2582
2583        let code = r#"async () => {
2584            const result = await forge.parallel([
2585                () => forge.callTool("s1", "t1", { id: 1 }),
2586                () => forge.callTool("s2", "t2", { id: 2 }),
2587                () => forge.callTool("s3", "t3", { id: 3 }),
2588            ]);
2589            return result;
2590        }"#;
2591
2592        let result = exec
2593            .execute_code(code, dispatcher, None, None)
2594            .await
2595            .unwrap();
2596        let results = result["results"].as_array().unwrap();
2597        assert_eq!(results.len(), 3);
2598        assert_eq!(results[0]["server"], "s1");
2599        assert_eq!(results[1]["server"], "s2");
2600        assert_eq!(results[2]["server"], "s3");
2601        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2602        assert_eq!(result["aborted"], false);
2603    }
2604
2605    // --- PL-U02: parallel with 1 failure returns partial results + error ---
2606    #[tokio::test]
2607    async fn pl_u02_parallel_partial_failure() {
2608        struct PartialFailDispatcher;
2609
2610        #[async_trait::async_trait]
2611        impl ToolDispatcher for PartialFailDispatcher {
2612            async fn call_tool(
2613                &self,
2614                _server: &str,
2615                tool: &str,
2616                _args: serde_json::Value,
2617            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2618                if tool == "fail" {
2619                    Err(anyhow::anyhow!("deliberate failure").into())
2620                } else {
2621                    Ok(serde_json::json!({"tool": tool, "ok": true}))
2622                }
2623            }
2624        }
2625
2626        let exec = executor();
2627        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(PartialFailDispatcher);
2628
2629        let code = r#"async () => {
2630            return await forge.parallel([
2631                () => forge.callTool("s", "ok1", {}),
2632                () => forge.callTool("s", "fail", {}),
2633                () => forge.callTool("s", "ok2", {}),
2634            ]);
2635        }"#;
2636
2637        let result = exec
2638            .execute_code(code, dispatcher, None, None)
2639            .await
2640            .unwrap();
2641        let results = result["results"].as_array().unwrap();
2642        assert!(results[0]["ok"] == true);
2643        assert!(results[1].is_null(), "failed call should have null result");
2644        assert!(results[2]["ok"] == true);
2645        let errors = result["errors"].as_array().unwrap();
2646        assert_eq!(errors.len(), 1);
2647        assert_eq!(errors[0]["index"], 1);
2648    }
2649
2650    // --- PL-U03: parallel with failFast aborts on first error ---
2651    #[tokio::test]
2652    async fn pl_u03_parallel_fail_fast() {
2653        let exec = SandboxExecutor::new(SandboxConfig {
2654            max_tool_calls: 50,
2655            max_parallel: 2, // batch size 2
2656            ..Default::default()
2657        });
2658
2659        struct FailOnSecondDispatcher {
2660            calls: std::sync::Mutex<u32>,
2661        }
2662
2663        #[async_trait::async_trait]
2664        impl ToolDispatcher for FailOnSecondDispatcher {
2665            async fn call_tool(
2666                &self,
2667                _server: &str,
2668                tool: &str,
2669                _args: serde_json::Value,
2670            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2671                let mut c = self.calls.lock().unwrap();
2672                *c += 1;
2673                if tool == "fail" {
2674                    Err(anyhow::anyhow!("fail").into())
2675                } else {
2676                    Ok(serde_json::json!({"ok": true}))
2677                }
2678            }
2679        }
2680
2681        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(FailOnSecondDispatcher {
2682            calls: std::sync::Mutex::new(0),
2683        });
2684
2685        // 4 calls with batch=2. Second call in first batch fails, so second batch should be skipped
2686        let code = r#"async () => {
2687            return await forge.parallel([
2688                () => forge.callTool("s", "ok", {}),
2689                () => forge.callTool("s", "fail", {}),
2690                () => forge.callTool("s", "ok", {}),
2691                () => forge.callTool("s", "ok", {}),
2692            ], { failFast: true });
2693        }"#;
2694
2695        let result = exec
2696            .execute_code(code, dispatcher, None, None)
2697            .await
2698            .unwrap();
2699        assert_eq!(result["aborted"], true);
2700        assert!(!result["errors"].as_array().unwrap().is_empty());
2701    }
2702
2703    // --- PL-U04: parallel respects concurrency limit ---
2704    #[tokio::test]
2705    async fn pl_u04_parallel_respects_concurrency_limit() {
2706        let exec = SandboxExecutor::new(SandboxConfig {
2707            max_parallel: 2,
2708            timeout: Duration::from_secs(10),
2709            ..Default::default()
2710        });
2711
2712        struct ConcurrencyTracker {
2713            current: std::sync::atomic::AtomicUsize,
2714            peak: std::sync::atomic::AtomicUsize,
2715        }
2716
2717        #[async_trait::async_trait]
2718        impl ToolDispatcher for ConcurrencyTracker {
2719            async fn call_tool(
2720                &self,
2721                _server: &str,
2722                _tool: &str,
2723                _args: serde_json::Value,
2724            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2725                let c = self
2726                    .current
2727                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
2728                    + 1;
2729                // Update peak
2730                self.peak.fetch_max(c, std::sync::atomic::Ordering::SeqCst);
2731                // Small delay to let concurrent calls overlap
2732                tokio::time::sleep(Duration::from_millis(10)).await;
2733                self.current
2734                    .fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
2735                Ok(serde_json::json!({"peak": self.peak.load(std::sync::atomic::Ordering::SeqCst)}))
2736            }
2737        }
2738
2739        let tracker = Arc::new(ConcurrencyTracker {
2740            current: std::sync::atomic::AtomicUsize::new(0),
2741            peak: std::sync::atomic::AtomicUsize::new(0),
2742        });
2743        let dispatcher: Arc<dyn ToolDispatcher> = tracker.clone();
2744
2745        // 6 calls with max_parallel=2
2746        let code = r#"async () => {
2747            return await forge.parallel([
2748                () => forge.callTool("s", "t", {}),
2749                () => forge.callTool("s", "t", {}),
2750                () => forge.callTool("s", "t", {}),
2751                () => forge.callTool("s", "t", {}),
2752                () => forge.callTool("s", "t", {}),
2753                () => forge.callTool("s", "t", {}),
2754            ]);
2755        }"#;
2756
2757        let result = exec
2758            .execute_code(code, dispatcher, None, None)
2759            .await
2760            .unwrap();
2761        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2762        let peak = tracker.peak.load(std::sync::atomic::Ordering::SeqCst);
2763        assert!(peak <= 2, "peak concurrency should be <= 2, was: {peak}");
2764    }
2765
2766    // --- PL-U05: parallel with empty array ---
2767    #[tokio::test]
2768    async fn pl_u05_parallel_empty_array() {
2769        let exec = executor();
2770        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2771
2772        let code = r#"async () => {
2773            return await forge.parallel([]);
2774        }"#;
2775
2776        let result = exec
2777            .execute_code(code, dispatcher, None, None)
2778            .await
2779            .unwrap();
2780        assert_eq!(result["results"].as_array().unwrap().len(), 0);
2781        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2782        assert_eq!(result["aborted"], false);
2783    }
2784
2785    // --- PL-U06: parallel with single call ---
2786    #[tokio::test]
2787    async fn pl_u06_parallel_single_call() {
2788        let exec = executor();
2789        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2790
2791        let code = r#"async () => {
2792            return await forge.parallel([
2793                () => forge.callTool("s", "t", { id: 1 }),
2794            ]);
2795        }"#;
2796
2797        let result = exec
2798            .execute_code(code, dispatcher, None, None)
2799            .await
2800            .unwrap();
2801        let results = result["results"].as_array().unwrap();
2802        assert_eq!(results.len(), 1);
2803        assert_eq!(results[0]["server"], "s");
2804    }
2805
2806    // --- PL-U07: parallel errors contain redacted messages ---
2807    #[tokio::test]
2808    async fn pl_u07_parallel_errors_redacted() {
2809        struct LeakyDispatcher;
2810
2811        #[async_trait::async_trait]
2812        impl ToolDispatcher for LeakyDispatcher {
2813            async fn call_tool(
2814                &self,
2815                _server: &str,
2816                _tool: &str,
2817                _args: serde_json::Value,
2818            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2819                Err(anyhow::anyhow!("connection to http://internal.secret:9999/api failed").into())
2820            }
2821        }
2822
2823        let exec = executor();
2824        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(LeakyDispatcher);
2825
2826        let code = r#"async () => {
2827            return await forge.parallel([
2828                () => forge.callTool("server", "tool", {}),
2829            ]);
2830        }"#;
2831
2832        let result = exec
2833            .execute_code(code, dispatcher, None, None)
2834            .await
2835            .unwrap();
2836        let errors = result["errors"].as_array().unwrap();
2837        assert_eq!(errors.len(), 1);
2838        let msg = errors[0]["error"].as_str().unwrap();
2839        assert!(!msg.contains("internal.secret"), "should redact URL: {msg}");
2840    }
2841
2842    // --- PL-U08: parallel combined with readResource ---
2843    #[tokio::test]
2844    async fn pl_u08_parallel_with_read_resource() {
2845        let exec = executor();
2846        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2847        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
2848            Some(Arc::new(TestResourceDispatcher));
2849
2850        let code = r#"async () => {
2851            return await forge.parallel([
2852                () => forge.callTool("s", "t", {}),
2853                () => forge.readResource("rs", "file:///log"),
2854            ]);
2855        }"#;
2856
2857        let result = exec
2858            .execute_code(code, dispatcher, resource_dispatcher, None)
2859            .await
2860            .unwrap();
2861        let results = result["results"].as_array().unwrap();
2862        assert_eq!(results.len(), 2);
2863        assert_eq!(results[0]["server"], "s");
2864        assert_eq!(results[1]["server"], "rs");
2865    }
2866
2867    // --- PL-U09: parallel exceeding max_tool_calls ---
2868    #[tokio::test]
2869    async fn pl_u09_parallel_exceeds_rate_limit() {
2870        let exec = SandboxExecutor::new(SandboxConfig {
2871            max_tool_calls: 3,
2872            ..Default::default()
2873        });
2874        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2875
2876        let code = r#"async () => {
2877            return await forge.parallel([
2878                () => forge.callTool("s", "t1", {}),
2879                () => forge.callTool("s", "t2", {}),
2880                () => forge.callTool("s", "t3", {}),
2881                () => forge.callTool("s", "t4", {}),
2882                () => forge.callTool("s", "t5", {}),
2883            ]);
2884        }"#;
2885
2886        let result = exec
2887            .execute_code(code, dispatcher, None, None)
2888            .await
2889            .unwrap();
2890        // First 3 should succeed, remaining should error
2891        let errors = result["errors"].as_array().unwrap();
2892        assert!(!errors.is_empty(), "should have errors from rate limiting");
2893        // At least some results should be non-null
2894        let results = result["results"].as_array().unwrap();
2895        let successes = results.iter().filter(|r| !r.is_null()).count();
2896        assert_eq!(successes, 3, "should have exactly 3 successful calls");
2897    }
2898
2899    // --- PL-S01: cannot exceed __MAX_PARALLEL even with high concurrency opt ---
2900    #[tokio::test]
2901    async fn pl_s01_cannot_exceed_max_parallel() {
2902        let exec = SandboxExecutor::new(SandboxConfig {
2903            max_parallel: 2,
2904            timeout: Duration::from_secs(10),
2905            ..Default::default()
2906        });
2907
2908        struct ConcurrencyCounter {
2909            peak: std::sync::atomic::AtomicUsize,
2910            current: std::sync::atomic::AtomicUsize,
2911        }
2912
2913        #[async_trait::async_trait]
2914        impl ToolDispatcher for ConcurrencyCounter {
2915            async fn call_tool(
2916                &self,
2917                _server: &str,
2918                _tool: &str,
2919                _args: serde_json::Value,
2920            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2921                let c = self
2922                    .current
2923                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
2924                    + 1;
2925                self.peak.fetch_max(c, std::sync::atomic::Ordering::SeqCst);
2926                tokio::time::sleep(Duration::from_millis(10)).await;
2927                self.current
2928                    .fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
2929                Ok(serde_json::json!({}))
2930            }
2931        }
2932
2933        let counter = Arc::new(ConcurrencyCounter {
2934            peak: std::sync::atomic::AtomicUsize::new(0),
2935            current: std::sync::atomic::AtomicUsize::new(0),
2936        });
2937        let dispatcher: Arc<dyn ToolDispatcher> = counter.clone();
2938
2939        // Request concurrency=9999 but max_parallel=2
2940        let code = r#"async () => {
2941            return await forge.parallel([
2942                () => forge.callTool("s", "t", {}),
2943                () => forge.callTool("s", "t", {}),
2944                () => forge.callTool("s", "t", {}),
2945                () => forge.callTool("s", "t", {}),
2946            ], { concurrency: 9999 });
2947        }"#;
2948
2949        let _ = exec
2950            .execute_code(code, dispatcher, None, None)
2951            .await
2952            .unwrap();
2953        let peak = counter.peak.load(std::sync::atomic::Ordering::SeqCst);
2954        assert!(
2955            peak <= 2,
2956            "peak should be capped at max_parallel=2, was: {peak}"
2957        );
2958    }
2959
2960    // --- PL-S02: parallel calls to mixed strict groups ---
2961    #[tokio::test]
2962    async fn pl_s02_parallel_mixed_strict_groups() {
2963        use crate::groups::{GroupEnforcingDispatcher, GroupPolicy};
2964        use std::collections::HashMap;
2965
2966        let mut groups = HashMap::new();
2967        groups.insert(
2968            "internal".to_string(),
2969            (vec!["vault".to_string()], "strict".to_string()),
2970        );
2971        groups.insert(
2972            "external".to_string(),
2973            (vec!["slack".to_string()], "strict".to_string()),
2974        );
2975        let policy = Arc::new(GroupPolicy::from_config(&groups));
2976        let inner: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2977        let enforcer = GroupEnforcingDispatcher::new(inner, policy);
2978        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(enforcer);
2979
2980        let exec = executor();
2981
2982        // Parallel calls: first locks to "internal", second to "external" should fail
2983        let code = r#"async () => {
2984            return await forge.parallel([
2985                () => forge.callTool("vault", "secrets.list", {}),
2986                () => forge.callTool("slack", "messages.send", {}),
2987            ]);
2988        }"#;
2989
2990        let result = exec
2991            .execute_code(code, dispatcher, None, None)
2992            .await
2993            .unwrap();
2994        let errors = result["errors"].as_array().unwrap();
2995        // At least one should fail with cross-group error
2996        assert!(
2997            !errors.is_empty(),
2998            "should have cross-group error: {result:?}"
2999        );
3000        let has_cross_group = errors
3001            .iter()
3002            .any(|e| e["error"].as_str().unwrap_or("").contains("cross-group"));
3003        assert!(has_cross_group, "should mention cross-group: {result:?}");
3004    }
3005
3006    // --- PL-S03: 500 parallel calls hits rate limit ---
3007    #[tokio::test]
3008    async fn pl_s03_many_parallel_calls_hit_rate_limit() {
3009        let exec = SandboxExecutor::new(SandboxConfig {
3010            max_tool_calls: 10,
3011            ..Default::default()
3012        });
3013        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3014
3015        let code = r#"async () => {
3016            const calls = [];
3017            for (let i = 0; i < 100; i++) {
3018                calls.push(() => forge.callTool("s", "t", { i }));
3019            }
3020            return await forge.parallel(calls);
3021        }"#;
3022
3023        let result = exec
3024            .execute_code(code, dispatcher, None, None)
3025            .await
3026            .unwrap();
3027        let errors = result["errors"].as_array().unwrap();
3028        let results = result["results"].as_array().unwrap();
3029        let successes = results.iter().filter(|r| !r.is_null()).count();
3030        assert_eq!(
3031            successes, 10,
3032            "should have exactly max_tool_calls successes"
3033        );
3034        assert_eq!(errors.len(), 90, "remaining 90 should be rate limited");
3035    }
3036
3037    // --- PL-S04: __MAX_PARALLEL is not modifiable ---
3038    #[tokio::test]
3039    async fn pl_s04_max_parallel_not_modifiable() {
3040        let exec = SandboxExecutor::new(SandboxConfig {
3041            max_parallel: 3,
3042            ..Default::default()
3043        });
3044        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3045
3046        // Attempt to modify the frozen constant — should fail silently or throw
3047        let code = r#"async () => {
3048            try {
3049                // __MAX_PARALLEL is a local const in the bootstrap closure,
3050                // not accessible from user code. Attempting to use it would fail.
3051                return typeof __MAX_PARALLEL;
3052            } catch(e) {
3053                return "error";
3054            }
3055        }"#;
3056
3057        let result = exec
3058            .execute_code(code, dispatcher, None, None)
3059            .await
3060            .unwrap();
3061        // __MAX_PARALLEL is scoped inside the IIFE, not visible to user code
3062        assert_eq!(
3063            result, "undefined",
3064            "__MAX_PARALLEL should not be accessible"
3065        );
3066    }
3067
3068    // --- PL-S05: raw Promise.all still hits rate limit ---
3069    #[tokio::test]
3070    async fn pl_s05_raw_promise_all_hits_rate_limit() {
3071        let exec = SandboxExecutor::new(SandboxConfig {
3072            max_tool_calls: 3,
3073            ..Default::default()
3074        });
3075        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3076
3077        // Bypass forge.parallel() and use raw Promise.allSettled
3078        let code = r#"async () => {
3079            const results = await Promise.allSettled([
3080                forge.callTool("s", "t1", {}),
3081                forge.callTool("s", "t2", {}),
3082                forge.callTool("s", "t3", {}),
3083                forge.callTool("s", "t4", {}),
3084                forge.callTool("s", "t5", {}),
3085            ]);
3086            const fulfilled = results.filter(r => r.status === "fulfilled").length;
3087            const rejected = results.filter(r => r.status === "rejected").length;
3088            return { fulfilled, rejected };
3089        }"#;
3090
3091        let result = exec
3092            .execute_code(code, dispatcher, None, None)
3093            .await
3094            .unwrap();
3095        assert_eq!(result["fulfilled"], 3, "should have 3 successful calls");
3096        assert_eq!(result["rejected"], 2, "should have 2 rate-limited calls");
3097    }
3098
3099    // =========================================================================
3100    // Phase 8: Bootstrap + Invariant Tests (BS-01..BS-12, INV-01..INV-10)
3101    // =========================================================================
3102
3103    // --- BS-01: forge object is frozen ---
3104    #[tokio::test]
3105    async fn bs_01_forge_object_is_frozen() {
3106        let exec = executor();
3107        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3108        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3109        let stash_store = make_stash(Default::default());
3110        let stash = make_stash_dispatcher(stash_store, None);
3111
3112        let code = r#"async () => {
3113            return Object.isFrozen(forge);
3114        }"#;
3115
3116        let result = exec
3117            .execute_code(code, dispatcher, Some(resource), Some(stash))
3118            .await
3119            .unwrap();
3120        assert_eq!(result, true, "forge object must be frozen");
3121    }
3122
3123    // --- BS-02: forge.stash is frozen ---
3124    #[tokio::test]
3125    async fn bs_02_forge_stash_is_frozen() {
3126        let exec = executor();
3127        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3128        let stash_store = make_stash(Default::default());
3129        let stash = make_stash_dispatcher(stash_store, None);
3130
3131        let code = r#"async () => {
3132            return Object.isFrozen(forge.stash);
3133        }"#;
3134
3135        let result = exec
3136            .execute_code(code, dispatcher, None, Some(stash))
3137            .await
3138            .unwrap();
3139        assert_eq!(result, true, "forge.stash must be frozen");
3140    }
3141
3142    // --- BS-03: __MAX_PARALLEL is not accessible from user code as a global ---
3143    #[tokio::test]
3144    async fn bs_03_max_parallel_not_accessible_as_global() {
3145        let exec = executor();
3146        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3147
3148        let code = r#"async () => {
3149            return {
3150                global: typeof globalThis.__MAX_PARALLEL,
3151                direct: typeof __MAX_PARALLEL,
3152            };
3153        }"#;
3154
3155        let result = exec
3156            .execute_code(code, dispatcher, None, None)
3157            .await
3158            .unwrap();
3159        assert_eq!(
3160            result["global"], "undefined",
3161            "__MAX_PARALLEL must not be on globalThis"
3162        );
3163        // __MAX_PARALLEL is a local const inside the bootstrap IIFE,
3164        // so direct access from user code (different scope) should fail.
3165        // User code runs in a separate eval context, so it shouldn't see the IIFE local.
3166        assert_eq!(
3167            result["direct"], "undefined",
3168            "__MAX_PARALLEL must not be accessible from user scope"
3169        );
3170    }
3171
3172    // --- BS-04: forge.readResource is a function in execute mode ---
3173    #[tokio::test]
3174    async fn bs_04_read_resource_is_function_in_execute_mode() {
3175        let exec = executor();
3176        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3177        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3178
3179        let code = r#"async () => {
3180            return typeof forge.readResource;
3181        }"#;
3182
3183        let result = exec
3184            .execute_code(code, dispatcher, Some(resource), None)
3185            .await
3186            .unwrap();
3187        assert_eq!(result, "function", "forge.readResource must be a function");
3188    }
3189
3190    // --- BS-05: forge.readResource is undefined in search mode ---
3191    #[tokio::test]
3192    async fn bs_05_read_resource_undefined_in_search_mode() {
3193        let exec = executor();
3194        let manifest = serde_json::json!({"servers": []});
3195
3196        let code = r#"async () => {
3197            return typeof forge.readResource;
3198        }"#;
3199
3200        let result = exec.execute_search(code, &manifest).await.unwrap();
3201        assert_eq!(
3202            result, "undefined",
3203            "forge.readResource must be undefined in search mode"
3204        );
3205    }
3206
3207    // --- BS-06: forge.stash has put/get/delete/keys in execute mode ---
3208    #[tokio::test]
3209    async fn bs_06_stash_has_all_methods_in_execute_mode() {
3210        let exec = executor();
3211        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3212        let stash_store = make_stash(Default::default());
3213        let stash = make_stash_dispatcher(stash_store, None);
3214
3215        let code = r#"async () => {
3216            return {
3217                type: typeof forge.stash,
3218                put: typeof forge.stash.put,
3219                get: typeof forge.stash.get,
3220                del: typeof forge.stash.delete,
3221                keys: typeof forge.stash.keys,
3222            };
3223        }"#;
3224
3225        let result = exec
3226            .execute_code(code, dispatcher, None, Some(stash))
3227            .await
3228            .unwrap();
3229        assert_eq!(result["type"], "object", "forge.stash must be an object");
3230        assert_eq!(result["put"], "function");
3231        assert_eq!(result["get"], "function");
3232        assert_eq!(result["del"], "function");
3233        assert_eq!(result["keys"], "function");
3234    }
3235
3236    // --- BS-07: forge.stash is undefined in search mode ---
3237    #[tokio::test]
3238    async fn bs_07_stash_undefined_in_search_mode() {
3239        let exec = executor();
3240        let manifest = serde_json::json!({"servers": []});
3241
3242        let code = r#"async () => {
3243            return typeof forge.stash;
3244        }"#;
3245
3246        let result = exec.execute_search(code, &manifest).await.unwrap();
3247        assert_eq!(
3248            result, "undefined",
3249            "forge.stash must be undefined in search mode"
3250        );
3251    }
3252
3253    // --- BS-08: forge.parallel is a function in execute mode ---
3254    #[tokio::test]
3255    async fn bs_08_parallel_is_function_in_execute_mode() {
3256        let exec = executor();
3257        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3258
3259        let code = r#"async () => {
3260            return typeof forge.parallel;
3261        }"#;
3262
3263        let result = exec
3264            .execute_code(code, dispatcher, None, None)
3265            .await
3266            .unwrap();
3267        assert_eq!(result, "function", "forge.parallel must be a function");
3268    }
3269
3270    // --- BS-09: forge.parallel is undefined in search mode ---
3271    #[tokio::test]
3272    async fn bs_09_parallel_undefined_in_search_mode() {
3273        let exec = executor();
3274        let manifest = serde_json::json!({"servers": []});
3275
3276        let code = r#"async () => {
3277            return typeof forge.parallel;
3278        }"#;
3279
3280        let result = exec.execute_search(code, &manifest).await.unwrap();
3281        assert_eq!(
3282            result, "undefined",
3283            "forge.parallel must be undefined in search mode"
3284        );
3285    }
3286
3287    // --- BS-10: forge.server("x").cat.tool() still works (Proxy not broken) ---
3288    #[tokio::test]
3289    async fn bs_10_server_proxy_still_works() {
3290        let exec = executor();
3291        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3292        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3293        let stash_store = make_stash(Default::default());
3294        let stash = make_stash_dispatcher(stash_store, None);
3295
3296        let code = r#"async () => {
3297            const result = await forge.server("myserver").ast.parse({ file: "test.rs" });
3298            return result;
3299        }"#;
3300
3301        let result = exec
3302            .execute_code(code, dispatcher, Some(resource), Some(stash))
3303            .await
3304            .unwrap();
3305        assert_eq!(result["server"], "myserver");
3306        assert_eq!(result["tool"], "ast.parse");
3307        assert_eq!(result["args"]["file"], "test.rs");
3308    }
3309
3310    // --- BS-11: delete globalThis.Deno still happens after new APIs ---
3311    #[tokio::test]
3312    async fn bs_11_deno_deleted_in_execute_mode() {
3313        let exec = executor();
3314        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3315        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3316        let stash_store = make_stash(Default::default());
3317        let stash = make_stash_dispatcher(stash_store, None);
3318
3319        let code = r#"async () => {
3320            return typeof globalThis.Deno;
3321        }"#;
3322
3323        let result = exec
3324            .execute_code(code, dispatcher, Some(resource), Some(stash))
3325            .await
3326            .unwrap();
3327        assert_eq!(result, "undefined", "Deno must be deleted in execute mode");
3328    }
3329
3330    // --- BS-12: Function.prototype.constructor is still undefined ---
3331    #[tokio::test]
3332    async fn bs_12_function_constructor_undefined_in_execute_mode() {
3333        let exec = executor();
3334        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3335        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3336        let stash_store = make_stash(Default::default());
3337        let stash = make_stash_dispatcher(stash_store, None);
3338
3339        // After bootstrap, Function.prototype.constructor is undefined.
3340        // Since .constructor is undefined, (async fn).constructor is also undefined,
3341        // so we cannot chain .prototype.constructor — we verify via separate checks.
3342        let code = r#"async () => {
3343            const funcCtor = typeof Function.prototype.constructor;
3344            // AsyncFunction and GeneratorFunction constructors are also wiped
3345            // because they inherit from Function.prototype.
3346            const asyncFn = async function(){};
3347            const genFn = function*(){};
3348            const asyncCtor = typeof asyncFn.constructor;
3349            const genCtor = typeof genFn.constructor;
3350            return { funcCtor, asyncCtor, genCtor };
3351        }"#;
3352
3353        let result = exec
3354            .execute_code(code, dispatcher, Some(resource), Some(stash))
3355            .await
3356            .unwrap();
3357        assert_eq!(
3358            result["funcCtor"], "undefined",
3359            "Function.prototype.constructor must be undefined"
3360        );
3361        assert_eq!(
3362            result["asyncCtor"], "undefined",
3363            "AsyncFunction .constructor must be undefined"
3364        );
3365        assert_eq!(
3366            result["genCtor"], "undefined",
3367            "GeneratorFunction .constructor must be undefined"
3368        );
3369    }
3370
3371    // --- INV-01: search() mode cannot access forge.callTool ---
3372    #[tokio::test]
3373    async fn inv_01_search_mode_no_call_tool() {
3374        let exec = executor();
3375        let manifest = serde_json::json!({"servers": []});
3376
3377        let code = r#"async () => {
3378            return typeof forge.callTool;
3379        }"#;
3380
3381        let result = exec.execute_search(code, &manifest).await.unwrap();
3382        assert_eq!(
3383            result, "undefined",
3384            "forge.callTool must not exist in search mode"
3385        );
3386    }
3387
3388    // --- INV-02: search() mode cannot access forge.readResource ---
3389    #[tokio::test]
3390    async fn inv_02_search_mode_no_read_resource() {
3391        let exec = executor();
3392        let manifest = serde_json::json!({"servers": []});
3393
3394        let code = r#"async () => {
3395            return typeof forge.readResource;
3396        }"#;
3397
3398        let result = exec.execute_search(code, &manifest).await.unwrap();
3399        assert_eq!(
3400            result, "undefined",
3401            "forge.readResource must not exist in search mode"
3402        );
3403    }
3404
3405    // --- INV-03: search() mode cannot access forge.stash ---
3406    #[tokio::test]
3407    async fn inv_03_search_mode_no_stash() {
3408        let exec = executor();
3409        let manifest = serde_json::json!({"servers": []});
3410
3411        let code = r#"async () => {
3412            return typeof forge.stash;
3413        }"#;
3414
3415        let result = exec.execute_search(code, &manifest).await.unwrap();
3416        assert_eq!(
3417            result, "undefined",
3418            "forge.stash must not exist in search mode"
3419        );
3420    }
3421
3422    // --- INV-04: search() mode cannot access forge.parallel ---
3423    #[tokio::test]
3424    async fn inv_04_search_mode_no_parallel() {
3425        let exec = executor();
3426        let manifest = serde_json::json!({"servers": []});
3427
3428        let code = r#"async () => {
3429            return typeof forge.parallel;
3430        }"#;
3431
3432        let result = exec.execute_search(code, &manifest).await.unwrap();
3433        assert_eq!(
3434            result, "undefined",
3435            "forge.parallel must not exist in search mode"
3436        );
3437    }
3438
3439    // --- INV-05: eval is undefined in all modes ---
3440    #[tokio::test]
3441    async fn inv_05_eval_undefined_in_all_modes() {
3442        let exec = executor();
3443
3444        // Execute mode
3445        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3446        let code = r#"async () => { return typeof eval; }"#;
3447        let result = exec
3448            .execute_code(code, dispatcher, None, None)
3449            .await
3450            .unwrap();
3451        assert_eq!(
3452            result, "undefined",
3453            "eval must be undefined in execute mode"
3454        );
3455
3456        // Search mode
3457        let manifest = serde_json::json!({"servers": []});
3458        let result = exec.execute_search(code, &manifest).await.unwrap();
3459        assert_eq!(result, "undefined", "eval must be undefined in search mode");
3460    }
3461
3462    // --- INV-06: Function.prototype.constructor is undefined in all modes ---
3463    #[tokio::test]
3464    async fn inv_06_function_constructor_undefined_all_modes() {
3465        let exec = executor();
3466
3467        let code = r#"async () => {
3468            return typeof Function.prototype.constructor;
3469        }"#;
3470
3471        // Execute mode
3472        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3473        let result = exec
3474            .execute_code(code, dispatcher, None, None)
3475            .await
3476            .unwrap();
3477        assert_eq!(
3478            result, "undefined",
3479            "Function.prototype.constructor must be undefined in execute mode"
3480        );
3481
3482        // Search mode
3483        let manifest = serde_json::json!({"servers": []});
3484        let result = exec.execute_search(code, &manifest).await.unwrap();
3485        assert_eq!(
3486            result, "undefined",
3487            "Function.prototype.constructor must be undefined in search mode"
3488        );
3489    }
3490
3491    // --- INV-07: Deno is undefined after bootstrap in all modes ---
3492    #[tokio::test]
3493    async fn inv_07_deno_undefined_all_modes() {
3494        let exec = executor();
3495
3496        let code = r#"async () => { return typeof globalThis.Deno; }"#;
3497
3498        // Execute mode
3499        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3500        let result = exec
3501            .execute_code(code, dispatcher, None, None)
3502            .await
3503            .unwrap();
3504        assert_eq!(
3505            result, "undefined",
3506            "Deno must be undefined in execute mode"
3507        );
3508
3509        // Search mode
3510        let manifest = serde_json::json!({"servers": []});
3511        let result = exec.execute_search(code, &manifest).await.unwrap();
3512        assert_eq!(result, "undefined", "Deno must be undefined in search mode");
3513    }
3514
3515    // --- INV-08: forge object is frozen in all modes ---
3516    #[tokio::test]
3517    async fn inv_08_forge_frozen_all_modes() {
3518        let exec = executor();
3519
3520        let code = r#"async () => { return Object.isFrozen(forge); }"#;
3521
3522        // Execute mode
3523        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3524        let result = exec
3525            .execute_code(code, dispatcher, None, None)
3526            .await
3527            .unwrap();
3528        assert_eq!(result, true, "forge must be frozen in execute mode");
3529
3530        // Search mode
3531        let manifest = serde_json::json!({"servers": []});
3532        let result = exec.execute_search(code, &manifest).await.unwrap();
3533        assert_eq!(result, true, "forge must be frozen in search mode");
3534    }
3535
3536    // --- INV-09: forge.stash object is frozen in execute mode ---
3537    #[tokio::test]
3538    async fn inv_09_stash_frozen_in_execute_mode() {
3539        let exec = executor();
3540        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3541        let stash_store = make_stash(Default::default());
3542        let stash = make_stash_dispatcher(stash_store, None);
3543
3544        // Verify stash is frozen and cannot be modified
3545        let code = r#"async () => {
3546            const frozen = Object.isFrozen(forge.stash);
3547            let mutated = false;
3548            try {
3549                forge.stash.evil = () => {};
3550                mutated = forge.stash.evil !== undefined;
3551            } catch (e) {
3552                // TypeError in strict mode, which is fine
3553            }
3554            return { frozen, mutated };
3555        }"#;
3556
3557        let result = exec
3558            .execute_code(code, dispatcher, None, Some(stash))
3559            .await
3560            .unwrap();
3561        assert_eq!(result["frozen"], true, "forge.stash must be frozen");
3562        assert_eq!(result["mutated"], false, "forge.stash must not be mutable");
3563    }
3564
3565    // --- INV-10: error messages from all new ops pass through redact_error_for_llm ---
3566    #[tokio::test]
3567    async fn inv_10_error_messages_redacted() {
3568        let exec = executor();
3569
3570        // Use a resource dispatcher that fails with a message containing file paths
3571        let failing_resource: Arc<dyn ResourceDispatcher> = Arc::new(FailingResourceDispatcher {
3572            error_msg: "connection refused to /var/secret/db.sock".to_string(),
3573        });
3574        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3575
3576        // Structured errors are returned as values, not thrown
3577        let code = r#"async () => {
3578            const result = await forge.readResource("secret-server", "file:///data/log.txt");
3579            return result;
3580        }"#;
3581
3582        let result = exec
3583            .execute_code(code, dispatcher, Some(failing_resource), None)
3584            .await
3585            .unwrap();
3586        assert_eq!(
3587            result["error"], true,
3588            "should be structured error: {result}"
3589        );
3590        let error_msg = result["message"].as_str().unwrap();
3591        // Error should be redacted — should not contain raw file paths from the dispatcher
3592        assert!(
3593            !error_msg.contains("/var/secret/db.sock"),
3594            "error must be redacted, got: {error_msg}"
3595        );
3596        // Should mention the server name in a safe way
3597        assert!(
3598            error_msg.contains("secret-server"),
3599            "error should reference server name: {error_msg}"
3600        );
3601    }
3602
3603    // --- Structured error wiring tests (Phase R2) ---
3604
3605    /// Dispatcher that always returns ServerNotFound.
3606    struct ErrorDispatcher;
3607
3608    #[async_trait::async_trait]
3609    impl ToolDispatcher for ErrorDispatcher {
3610        async fn call_tool(
3611            &self,
3612            server: &str,
3613            _tool: &str,
3614            _args: serde_json::Value,
3615        ) -> Result<serde_json::Value, forge_error::DispatchError> {
3616            Err(forge_error::DispatchError::ServerNotFound(
3617                server.to_string(),
3618            ))
3619        }
3620    }
3621
3622    /// Dispatcher that returns ToolNotFound.
3623    struct ToolNotFoundDispatcher;
3624
3625    #[async_trait::async_trait]
3626    impl ToolDispatcher for ToolNotFoundDispatcher {
3627        async fn call_tool(
3628            &self,
3629            server: &str,
3630            tool: &str,
3631            _args: serde_json::Value,
3632        ) -> Result<serde_json::Value, forge_error::DispatchError> {
3633            Err(forge_error::DispatchError::ToolNotFound {
3634                server: server.to_string(),
3635                tool: tool.to_string(),
3636            })
3637        }
3638    }
3639
3640    #[tokio::test]
3641    async fn se_wire_01_tool_call_error_returns_structured_json() {
3642        let exec = executor();
3643        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(ErrorDispatcher);
3644
3645        let code = r#"async () => {
3646            const result = await forge.callTool("bad_server", "bad_tool", {});
3647            return result;
3648        }"#;
3649
3650        let result = exec
3651            .execute_code(code, dispatcher, None, None)
3652            .await
3653            .unwrap();
3654        assert_eq!(result["error"], true, "should be an error: {result}");
3655    }
3656
3657    #[tokio::test]
3658    async fn se_wire_02_structured_error_has_code_field() {
3659        let exec = executor();
3660        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(ErrorDispatcher);
3661
3662        let code = r#"async () => {
3663            const result = await forge.callTool("bad_server", "bad_tool", {});
3664            return result;
3665        }"#;
3666
3667        let result = exec
3668            .execute_code(code, dispatcher, None, None)
3669            .await
3670            .unwrap();
3671        assert_eq!(
3672            result["code"], "SERVER_NOT_FOUND",
3673            "should have code field: {result}"
3674        );
3675    }
3676
3677    #[tokio::test]
3678    async fn se_wire_03_structured_error_has_suggested_fix() {
3679        let exec = executor();
3680        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(ToolNotFoundDispatcher);
3681
3682        // Provide known_tools so fuzzy matching can suggest "find_symbols"
3683        let known_tools = vec![("narsil".to_string(), "find_symbols".to_string())];
3684
3685        let code = r#"async () => {
3686            const result = await forge.callTool("narsil", "fnd_symbols", {});
3687            return result;
3688        }"#;
3689
3690        let result = exec
3691            .execute_code_with_options(code, dispatcher, None, None, None, Some(known_tools))
3692            .await
3693            .unwrap();
3694        assert_eq!(result["code"], "TOOL_NOT_FOUND", "code: {result}");
3695        let fix = result["suggested_fix"]
3696            .as_str()
3697            .expect("should have suggested_fix");
3698        assert!(
3699            fix.contains("find_symbols"),
3700            "should suggest find_symbols, got: {fix}"
3701        );
3702    }
3703
3704    #[tokio::test]
3705    async fn se_wire_04_structured_error_message_is_redacted() {
3706        // Dispatcher that leaks a credential in its error message
3707        struct CredLeakDispatcher;
3708
3709        #[async_trait::async_trait]
3710        impl ToolDispatcher for CredLeakDispatcher {
3711            async fn call_tool(
3712                &self,
3713                server: &str,
3714                _tool: &str,
3715                _args: serde_json::Value,
3716            ) -> Result<serde_json::Value, forge_error::DispatchError> {
3717                Err(forge_error::DispatchError::Upstream {
3718                    server: server.to_string(),
3719                    message: "auth failed with Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxIn0.rg2e at https://internal.corp:9999/api".to_string(),
3720                })
3721            }
3722        }
3723
3724        let exec = executor();
3725        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(CredLeakDispatcher);
3726
3727        let code = r#"async () => {
3728            const result = await forge.callTool("narsil", "find", {});
3729            return result;
3730        }"#;
3731
3732        let result = exec
3733            .execute_code(code, dispatcher, None, None)
3734            .await
3735            .unwrap();
3736        let msg = result["message"].as_str().expect("should have message");
3737        assert!(!msg.contains("eyJhbGci"), "JWT should be redacted: {msg}");
3738        assert!(
3739            !msg.contains("internal.corp"),
3740            "URL should be redacted: {msg}"
3741        );
3742    }
3743}
forge_sandbox/executor.rs

forge_sandbox/
executor.rs