forge_sandbox/
executor.rs

1//! Sandbox executor — creates fresh V8 isolates and runs LLM-generated code.
2//!
3//! Each execution gets a brand new runtime. No state leaks between calls.
4//!
5//! V8 isolates are `!Send`, so all JsRuntime operations run on a dedicated
6//! thread with its own single-threaded tokio runtime. The public API is
7//! fully async and `Send`-safe.
8
9use std::sync::atomic::{AtomicBool, Ordering};
10use std::sync::Arc;
11use std::time::Duration;
12
13use deno_core::{v8, JsRuntime, PollEventLoopOptions, RuntimeOptions};
14use serde_json::Value;
15use tokio::sync::Semaphore;
16
17use crate::audit::{
18    AuditEntryBuilder, AuditLogger, AuditOperation, AuditingDispatcher, AuditingResourceDispatcher,
19    AuditingStashDispatcher, NoopAuditLogger, ResourceReadAudit, StashOperationAudit,
20    ToolCallAudit,
21};
22use crate::error::SandboxError;
23use crate::ops::{
24    forge_ext, CurrentGroup, ExecutionResult, KnownServers, KnownTools, MaxResourceSize,
25    StashCallLimits, ToolCallLimits,
26};
27use crate::validator::validate_code;
28use crate::{ResourceDispatcher, StashDispatcher, ToolDispatcher};
29
30/// How the sandbox executes code.
31#[derive(Debug, Clone, Default, PartialEq, Eq)]
32#[non_exhaustive]
33pub enum ExecutionMode {
34    /// Run V8 in-process on a dedicated thread (default, suitable for tests).
35    #[default]
36    InProcess,
37    /// Spawn an isolated child process per execution (production security mode).
38    ChildProcess,
39}
40
41/// Configuration for the sandbox executor.
42#[derive(Debug, Clone)]
43pub struct SandboxConfig {
44    /// Maximum execution time before the sandbox is terminated.
45    pub timeout: Duration,
46    /// Maximum size of LLM-generated code in bytes.
47    pub max_code_size: usize,
48    /// Maximum size of the JSON result in bytes.
49    pub max_output_size: usize,
50    /// V8 heap limit in bytes.
51    pub max_heap_size: usize,
52    /// Maximum concurrent sandbox executions.
53    pub max_concurrent: usize,
54    /// Maximum tool calls per execution.
55    pub max_tool_calls: usize,
56    /// Maximum stash operations per execution.
57    pub max_stash_calls: Option<usize>,
58    /// Maximum size of tool call arguments in bytes.
59    pub max_tool_call_args_size: usize,
60    /// Execution mode: in-process or child-process isolation.
61    pub execution_mode: ExecutionMode,
62    /// Maximum resource content size in bytes (default: 64 MB).
63    pub max_resource_size: usize,
64    /// Maximum concurrent calls in forge.parallel() (default: 8).
65    pub max_parallel: usize,
66    /// Maximum IPC message size in bytes (default: 65 MB).
67    pub max_ipc_message_size: usize,
68}
69
70impl Default for SandboxConfig {
71    fn default() -> Self {
72        Self {
73            timeout: Duration::from_secs(5),
74            max_code_size: 64 * 1024,        // 64 KB
75            max_output_size: 1024 * 1024,    // 1 MB
76            max_heap_size: 64 * 1024 * 1024, // 64 MB
77            max_concurrent: 8,
78            max_tool_calls: 50,
79            max_stash_calls: None,
80            max_tool_call_args_size: 1024 * 1024, // 1 MB
81            execution_mode: ExecutionMode::default(),
82            max_resource_size: crate::ipc::DEFAULT_MAX_RESOURCE_SIZE,
83            max_parallel: 8,
84            max_ipc_message_size: crate::ipc::DEFAULT_MAX_IPC_MESSAGE_SIZE,
85        }
86    }
87}
88
89/// The sandbox executor. Creates fresh V8 isolates for each execution.
90///
91/// This is `Send + Sync` safe — all V8 operations are dispatched to a
92/// dedicated thread internally. A concurrency semaphore limits the number
93/// of simultaneous V8 isolates.
94pub struct SandboxExecutor {
95    config: SandboxConfig,
96    semaphore: Arc<Semaphore>,
97    audit_logger: Arc<dyn AuditLogger>,
98    /// Optional worker pool for reusing child processes.
99    pool: Option<Arc<crate::pool::WorkerPool>>,
100}
101
102impl SandboxExecutor {
103    /// Create a new sandbox executor with the given configuration.
104    pub fn new(config: SandboxConfig) -> Self {
105        let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
106        Self {
107            config,
108            semaphore,
109            audit_logger: Arc::new(NoopAuditLogger),
110            pool: None,
111        }
112    }
113
114    /// Create a new sandbox executor with an audit logger.
115    pub fn with_audit_logger(config: SandboxConfig, logger: Arc<dyn AuditLogger>) -> Self {
116        let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
117        Self {
118            config,
119            semaphore,
120            audit_logger: logger,
121            pool: None,
122        }
123    }
124
125    /// Attach a worker pool for reusing child processes.
126    ///
127    /// When a pool is set and the execution mode is `ChildProcess`, workers
128    /// are acquired from the pool instead of spawning fresh processes.
129    pub fn with_pool(mut self, pool: Arc<crate::pool::WorkerPool>) -> Self {
130        self.pool = Some(pool);
131        self
132    }
133
134    /// Execute a `search()` call — runs code against the capability manifest.
135    ///
136    /// The manifest is injected as `globalThis.manifest` in the sandbox.
137    /// The LLM's code is an async arrow function that queries it.
138    /// Search always runs in-process (read-only, no credential exposure risk).
139    #[tracing::instrument(skip(self, code, manifest), fields(code_len = code.len()))]
140    pub async fn execute_search(
141        &self,
142        code: &str,
143        manifest: &Value,
144    ) -> Result<Value, SandboxError> {
145        tracing::info!("execute_search: starting");
146
147        let audit_builder = AuditEntryBuilder::new(code, AuditOperation::Search);
148
149        validate_code(code, Some(self.config.max_code_size))?;
150
151        let _permit = self.semaphore.clone().try_acquire_owned().map_err(|_| {
152            SandboxError::ConcurrencyLimit {
153                max: self.config.max_concurrent,
154            }
155        })?;
156
157        let code = code.to_string();
158        let manifest = manifest.clone();
159        let config = self.config.clone();
160
161        // V8 isolates are !Send — run everything on a dedicated thread
162        let (tx, rx) = tokio::sync::oneshot::channel();
163        std::thread::spawn(move || {
164            let rt = match tokio::runtime::Builder::new_current_thread()
165                .enable_all()
166                .build()
167            {
168                Ok(rt) => rt,
169                Err(e) => {
170                    if tx.send(Err(SandboxError::Execution(e.into()))).is_err() {
171                        tracing::warn!("sandbox result receiver dropped");
172                    }
173                    return;
174                }
175            };
176            let result = rt.block_on(run_search(&config, &code, &manifest));
177            if tx.send(result).is_err() {
178                tracing::warn!("sandbox result receiver dropped before result was sent");
179            }
180        });
181
182        let result = rx
183            .await
184            .map_err(|_| SandboxError::Execution(anyhow::anyhow!("sandbox thread panicked")))?;
185
186        // Emit audit entry
187        let entry = audit_builder.finish(&result);
188        self.audit_logger.log(&entry).await;
189
190        match &result {
191            Ok(_) => tracing::info!("execute_search: complete"),
192            Err(e) => tracing::warn!(error = %e, "execute_search: failed"),
193        }
194
195        result
196    }
197
198    /// Execute an `execute()` call — runs code against the tool API.
199    ///
200    /// Tool calls go through `forge.callTool(server, tool, args)` which
201    /// dispatches to the Rust-side ToolDispatcher via `op_forge_call_tool`.
202    /// Resource reads go through `forge.readResource(server, uri)` which
203    /// dispatches to the Rust-side ResourceDispatcher via `op_forge_read_resource`.
204    ///
205    /// In `ChildProcess` mode, spawns an isolated worker process. In `InProcess`
206    /// mode (default), runs V8 on a dedicated thread in the current process.
207    pub async fn execute_code(
208        &self,
209        code: &str,
210        dispatcher: Arc<dyn ToolDispatcher>,
211        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
212        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
213    ) -> Result<Value, SandboxError> {
214        self.execute_code_with_options(
215            code,
216            dispatcher,
217            resource_dispatcher,
218            stash_dispatcher,
219            None,
220            None,
221        )
222        .await
223    }
224
225    /// Execute code with additional options (known servers for SR-R6 validation,
226    /// known tools for structured error fuzzy matching).
227    #[tracing::instrument(skip(self, code, dispatcher, resource_dispatcher, stash_dispatcher, known_servers, known_tools), fields(code_len = code.len(), mode = ?self.config.execution_mode))]
228    pub async fn execute_code_with_options(
229        &self,
230        code: &str,
231        dispatcher: Arc<dyn ToolDispatcher>,
232        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
233        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
234        known_servers: Option<std::collections::HashSet<String>>,
235        known_tools: Option<Vec<(String, String)>>,
236    ) -> Result<Value, SandboxError> {
237        tracing::info!("execute_code: starting");
238
239        let mut audit_builder = AuditEntryBuilder::new(code, AuditOperation::Execute);
240
241        validate_code(code, Some(self.config.max_code_size))?;
242
243        let _permit = self.semaphore.clone().try_acquire_owned().map_err(|_| {
244            SandboxError::ConcurrencyLimit {
245                max: self.config.max_concurrent,
246            }
247        })?;
248
249        // Wrap dispatcher with audit tracking
250        let (audit_tx, mut audit_rx) = tokio::sync::mpsc::unbounded_channel::<ToolCallAudit>();
251        let auditing_dispatcher: Arc<dyn ToolDispatcher> =
252            Arc::new(AuditingDispatcher::new(dispatcher, audit_tx));
253
254        // Wrap resource dispatcher with audit tracking
255        let (resource_audit_tx, mut resource_audit_rx) =
256            tokio::sync::mpsc::unbounded_channel::<ResourceReadAudit>();
257        let auditing_resource_dispatcher = resource_dispatcher.map(|rd| {
258            Arc::new(AuditingResourceDispatcher::new(rd, resource_audit_tx))
259                as Arc<dyn ResourceDispatcher>
260        });
261
262        // Wrap stash dispatcher with audit tracking
263        let (stash_audit_tx, mut stash_audit_rx) =
264            tokio::sync::mpsc::unbounded_channel::<StashOperationAudit>();
265        let auditing_stash_dispatcher = stash_dispatcher.map(|sd| {
266            Arc::new(AuditingStashDispatcher::new(sd, stash_audit_tx)) as Arc<dyn StashDispatcher>
267        });
268
269        let result = match self.config.execution_mode {
270            ExecutionMode::ChildProcess => {
271                if let Some(ref pool) = self.pool {
272                    // Pool mode: acquire a warm worker, execute, release
273                    match pool.acquire(&self.config).await {
274                        Ok(mut worker) => {
275                            let exec_result = worker
276                                .execute(
277                                    code,
278                                    &self.config,
279                                    crate::pool::PooledExecutionContext {
280                                        dispatcher: auditing_dispatcher,
281                                        resource_dispatcher: auditing_resource_dispatcher,
282                                        stash_dispatcher: auditing_stash_dispatcher,
283                                        known_servers: known_servers.clone(),
284                                        known_tools: known_tools.clone(),
285                                    },
286                                )
287                                .await;
288                            let outcome = if is_fatal_sandbox_error(&exec_result) {
289                                crate::pool::ReleaseOutcome::Fatal
290                            } else {
291                                crate::pool::ReleaseOutcome::Ok
292                            };
293                            pool.release(worker, outcome).await;
294                            exec_result
295                        }
296                        Err(e) => {
297                            tracing::warn!(error = %e, "pool acquire failed, falling back to fresh process");
298                            crate::host::SandboxHost::execute_in_child(
299                                code,
300                                &self.config,
301                                auditing_dispatcher,
302                                auditing_resource_dispatcher,
303                                auditing_stash_dispatcher,
304                                known_servers,
305                                known_tools,
306                            )
307                            .await
308                        }
309                    }
310                } else {
311                    // No pool: spawn fresh child process
312                    crate::host::SandboxHost::execute_in_child(
313                        code,
314                        &self.config,
315                        auditing_dispatcher,
316                        auditing_resource_dispatcher,
317                        auditing_stash_dispatcher,
318                        known_servers,
319                        known_tools,
320                    )
321                    .await
322                }
323            }
324            ExecutionMode::InProcess => {
325                self.execute_code_in_process(
326                    code,
327                    auditing_dispatcher,
328                    auditing_resource_dispatcher,
329                    auditing_stash_dispatcher,
330                    known_servers,
331                    known_tools,
332                )
333                .await
334            }
335        };
336
337        // Collect tool call audits
338        while let Ok(tool_audit) = audit_rx.try_recv() {
339            audit_builder.record_tool_call(tool_audit);
340        }
341
342        // Collect resource read audits
343        while let Ok(resource_audit) = resource_audit_rx.try_recv() {
344            audit_builder.record_resource_read(resource_audit);
345        }
346
347        // Collect stash operation audits
348        while let Ok(stash_audit) = stash_audit_rx.try_recv() {
349            audit_builder.record_stash_op(stash_audit);
350        }
351
352        // Emit audit entry
353        let entry = audit_builder.finish(&result);
354        self.audit_logger.log(&entry).await;
355
356        match &result {
357            Ok(_) => tracing::info!("execute_code: complete"),
358            Err(e) => tracing::warn!(error = %e, "execute_code: failed"),
359        }
360
361        result
362    }
363
364    /// In-process execution: spawn a dedicated thread with its own V8 isolate.
365    async fn execute_code_in_process(
366        &self,
367        code: &str,
368        dispatcher: Arc<dyn ToolDispatcher>,
369        resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
370        stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
371        known_servers: Option<std::collections::HashSet<String>>,
372        known_tools: Option<Vec<(String, String)>>,
373    ) -> Result<Value, SandboxError> {
374        let code = code.to_string();
375        let config = self.config.clone();
376
377        let (tx, rx) = tokio::sync::oneshot::channel();
378        std::thread::spawn(move || {
379            let rt = match tokio::runtime::Builder::new_current_thread()
380                .enable_all()
381                .build()
382            {
383                Ok(rt) => rt,
384                Err(e) => {
385                    if tx.send(Err(SandboxError::Execution(e.into()))).is_err() {
386                        tracing::warn!("sandbox result receiver dropped");
387                    }
388                    return;
389                }
390            };
391            let result = rt.block_on(run_execute_with_known_servers(
392                &config,
393                &code,
394                dispatcher,
395                resource_dispatcher,
396                stash_dispatcher,
397                known_servers,
398                known_tools,
399            ));
400            if tx.send(result).is_err() {
401                tracing::warn!("sandbox result receiver dropped before result was sent");
402            }
403        });
404
405        rx.await
406            .map_err(|_| SandboxError::Execution(anyhow::anyhow!("sandbox thread panicked")))?
407    }
408}
409
410/// Determine if a sandbox execution result indicates a fatal worker condition.
411///
412/// Workers that time out or exceed their heap limit must be killed rather than
413/// reused. With the `ErrorKind` field in `ExecutionComplete`, the host now
414/// reconstructs the correct typed `SandboxError` variant, so this function
415/// only needs to match the native types.
416fn is_fatal_sandbox_error(result: &Result<Value, SandboxError>) -> bool {
417    matches!(
418        result,
419        Err(SandboxError::Timeout { .. }) | Err(SandboxError::HeapLimitExceeded)
420    )
421}
422
423/// State for the near-heap-limit callback.
424struct HeapLimitState {
425    handle: v8::IsolateHandle,
426    /// Whether the heap limit has been triggered. Uses AtomicBool so the callback
427    /// can use a shared `&` reference instead of `&mut`, eliminating aliasing concerns.
428    triggered: AtomicBool,
429}
430
431/// V8 near-heap-limit callback. Terminates execution and grants 1MB grace
432/// for the termination to propagate cleanly.
433extern "C" fn near_heap_limit_callback(
434    data: *mut std::ffi::c_void,
435    current_heap_limit: usize,
436    _initial_heap_limit: usize,
437) -> usize {
438    // SAFETY: `data` points to `heap_state` (Box<HeapLimitState>) allocated below.
439    // The Box outlives this callback because: (1) the watchdog thread is joined
440    // before heap_state is dropped, and (2) V8 only invokes this callback while the
441    // isolate's event loop is running, which completes before the join.
442    // We use a shared `&` reference (not `&mut`) because `triggered` is AtomicBool,
443    // so no aliasing concerns even if V8 were to call this callback re-entrantly.
444    let state = unsafe { &*(data as *const HeapLimitState) };
445    if !state.triggered.swap(true, Ordering::SeqCst) {
446        state.handle.terminate_execution();
447    }
448    // Grant 1MB grace so the termination exception can propagate
449    current_heap_limit + 1024 * 1024
450}
451
452/// Run a search operation on the current thread (must be called from a
453/// dedicated thread, not the main tokio runtime).
454///
455/// Public for reuse in the worker binary.
456pub async fn run_search(
457    config: &SandboxConfig,
458    code: &str,
459    manifest: &Value,
460) -> Result<Value, SandboxError> {
461    let mut runtime = create_runtime(
462        None,
463        None,
464        config.max_heap_size,
465        None,
466        None,
467        None,
468        None,
469        None,
470        None,
471    )?;
472
473    // Inject the manifest as a global
474    let manifest_json = serde_json::to_string(manifest)?;
475    let bootstrap = format!("globalThis.manifest = {};", manifest_json);
476    runtime
477        .execute_script("[forge:manifest]", bootstrap)
478        .map_err(|e| SandboxError::JsError {
479            message: e.to_string(),
480        })?;
481
482    // Bootstrap: capture ops in closures, create minimal forge object, delete Deno,
483    // and remove dangerous code generation primitives.
484    runtime
485        .execute_script(
486            "[forge:bootstrap]",
487            r#"
488                ((ops) => {
489                    const setResult = (json) => ops.op_forge_set_result(json);
490                    const log = (msg) => ops.op_forge_log(String(msg));
491                    globalThis.forge = Object.freeze({
492                        __setResult: setResult,
493                        log: log,
494                    });
495                    delete globalThis.Deno;
496
497                    // Remove code generation primitives to prevent prototype chain attacks.
498                    // Even with the validator banning eval( and Function(, an attacker could
499                    // reach Function via forge.log.constructor or similar prototype chain access.
500                    delete globalThis.eval;
501                    const AsyncFunction = (async function(){}).constructor;
502                    const GeneratorFunction = (function*(){}).constructor;
503                    Object.defineProperty(Function.prototype, 'constructor', {
504                        value: undefined, configurable: false, writable: false
505                    });
506                    Object.defineProperty(AsyncFunction.prototype, 'constructor', {
507                        value: undefined, configurable: false, writable: false
508                    });
509                    Object.defineProperty(GeneratorFunction.prototype, 'constructor', {
510                        value: undefined, configurable: false, writable: false
511                    });
512                })(Deno.core.ops);
513            "#,
514        )
515        .map_err(|e| SandboxError::JsError {
516            message: e.to_string(),
517        })?;
518
519    run_user_code(&mut runtime, code, config).await
520}
521
522/// Run an execute operation on the current thread.
523///
524/// Public for reuse in the worker binary.
525pub async fn run_execute(
526    config: &SandboxConfig,
527    code: &str,
528    dispatcher: Arc<dyn ToolDispatcher>,
529    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
530    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
531) -> Result<Value, SandboxError> {
532    run_execute_with_known_servers(
533        config,
534        code,
535        dispatcher,
536        resource_dispatcher,
537        stash_dispatcher,
538        None,
539        None,
540    )
541    .await
542}
543
544/// Run an execute operation with an optional set of known server names for SR-R6 validation
545/// and known tools for structured error fuzzy matching.
546pub async fn run_execute_with_known_servers(
547    config: &SandboxConfig,
548    code: &str,
549    dispatcher: Arc<dyn ToolDispatcher>,
550    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
551    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
552    known_servers: Option<std::collections::HashSet<String>>,
553    known_tools: Option<Vec<(String, String)>>,
554) -> Result<Value, SandboxError> {
555    let limits = ToolCallLimits {
556        max_calls: config.max_tool_calls,
557        max_args_size: config.max_tool_call_args_size,
558        calls_made: 0,
559    };
560    let stash_call_limits = config.max_stash_calls.map(|max_calls| StashCallLimits {
561        max_calls: Some(max_calls),
562        calls_made: 0,
563    });
564    let mut runtime = create_runtime(
565        Some(dispatcher),
566        resource_dispatcher.clone(),
567        config.max_heap_size,
568        Some(limits),
569        Some(config.max_resource_size),
570        stash_dispatcher.clone(),
571        stash_call_limits,
572        known_servers,
573        known_tools,
574    )?;
575
576    // Determine which capabilities are available
577    let has_resource_dispatcher = resource_dispatcher.is_some();
578    let has_stash_dispatcher = stash_dispatcher.is_some();
579
580    // Bootstrap: capture ops in closures, create full forge API, delete Deno,
581    // and remove dangerous code generation primitives.
582    // User code accesses tools via forge.callTool() or forge.server("x").cat.tool().
583    // Conditionally includes readResource and stash based on available dispatchers.
584    let bootstrap = build_execute_bootstrap(
585        has_resource_dispatcher,
586        has_stash_dispatcher,
587        config.max_parallel,
588    );
589
590    runtime
591        .execute_script("[forge:bootstrap]", bootstrap)
592        .map_err(|e| SandboxError::JsError {
593            message: e.to_string(),
594        })?;
595
596    run_user_code(&mut runtime, code, config).await
597}
598
599/// Build the bootstrap JavaScript for execute mode.
600///
601/// Conditionally includes `readResource` and `stash` APIs based on which
602/// dispatchers are available.
603fn build_execute_bootstrap(has_resource: bool, has_stash: bool, max_parallel: usize) -> String {
604    let mut parts = Vec::new();
605
606    // Always available ops + frozen concurrency cap
607    parts.push(format!(
608        r#"((ops) => {{
609                    const callToolOp = ops.op_forge_call_tool;
610                    const setResult = (json) => ops.op_forge_set_result(json);
611                    const log = (msg) => ops.op_forge_log(String(msg));
612                    const __MAX_PARALLEL = Object.freeze({max_parallel});
613
614                    const callTool = async (server, tool, args) => {{
615                        const resultJson = await callToolOp(
616                            server, tool, JSON.stringify(args || {{}})
617                        );
618                        return JSON.parse(resultJson);
619                    }};"#
620    ));
621
622    // readResource binding (conditional)
623    if has_resource {
624        parts.push(
625            r#"
626                    const readResourceOp = ops.op_forge_read_resource;
627                    const readResource = async (server, uri) => {
628                        const resultJson = await readResourceOp(server, uri);
629                        return JSON.parse(resultJson);
630                    };"#
631            .to_string(),
632        );
633    }
634
635    // stash bindings (conditional)
636    if has_stash {
637        parts.push(
638            r#"
639                    const stashPutOp = ops.op_forge_stash_put;
640                    const stashGetOp = ops.op_forge_stash_get;
641                    const stashDeleteOp = ops.op_forge_stash_delete;
642                    const stashKeysOp = ops.op_forge_stash_keys;"#
643                .to_string(),
644        );
645    }
646
647    // Build the forge object properties
648    let mut forge_props = vec![
649        "                        __setResult: setResult".to_string(),
650        "                        log: log".to_string(),
651        "                        callTool: callTool".to_string(),
652    ];
653
654    if has_resource {
655        forge_props.push("                        readResource: readResource".to_string());
656    }
657
658    if has_stash {
659        forge_props.push(
660            r#"                        stash: Object.freeze({
661                            put: async (key, value, opts) => {
662                                const ttl = (opts && opts.ttl) ? opts.ttl : 0;
663                                const resultJson = await stashPutOp(key, JSON.stringify(value), ttl);
664                                return JSON.parse(resultJson);
665                            },
666                            get: async (key) => {
667                                const resultJson = await stashGetOp(key);
668                                return JSON.parse(resultJson);
669                            },
670                            delete: async (key) => {
671                                const resultJson = await stashDeleteOp(key);
672                                return JSON.parse(resultJson);
673                            },
674                            keys: async () => {
675                                const resultJson = await stashKeysOp();
676                                return JSON.parse(resultJson);
677                            }
678                        })"#
679            .to_string(),
680        );
681    }
682
683    // server proxy is always included
684    forge_props.push(
685        r#"                        server: (name) => {
686                            return new Proxy({}, {
687                                get(_target, category) {
688                                    return new Proxy({}, {
689                                        get(_target2, tool) {
690                                            return async (args) => {
691                                                const toolName = category === 'general' ? tool : `${category}.${tool}`;
692                                                return callTool(
693                                                    name,
694                                                    toolName,
695                                                    args || {}
696                                                );
697                                            };
698                                        }
699                                    });
700                                }
701                            });
702                        }"#
703        .to_string(),
704    );
705
706    // forge.parallel() — bounded concurrency wrapper over callTool/readResource
707    forge_props.push(
708        r#"                        parallel: async (calls, opts) => {
709                            opts = opts || {};
710                            const concurrency = Math.min(
711                                opts.concurrency || __MAX_PARALLEL,
712                                __MAX_PARALLEL
713                            );
714                            const failFast = opts.failFast || false;
715                            const results = new Array(calls.length).fill(null);
716                            const errors = [];
717                            let aborted = false;
718
719                            for (let i = 0; i < calls.length && !aborted; i += concurrency) {
720                                const batch = calls.slice(i, i + concurrency);
721                                await Promise.allSettled(
722                                    batch.map((item, idx) => {
723                                        const fn_ = typeof item === 'function' ? item : item.fn;
724                                        return fn_().then(
725                                            val => {
726                                                if (val && val.error === true && val.code) {
727                                                    errors.push({ index: i + idx, error: val.message || val.code });
728                                                } else {
729                                                    results[i + idx] = val;
730                                                }
731                                                if (errors.length > 0 && failFast) aborted = true;
732                                            },
733                                            err => {
734                                                errors.push({ index: i + idx, error: err.message || String(err) });
735                                                if (failFast) aborted = true;
736                                            }
737                                        );
738                                    })
739                                );
740                            }
741
742                            return { results, errors, aborted };
743                        }"#
744        .to_string(),
745    );
746
747    let forge_obj = format!(
748        r#"
749                    globalThis.forge = Object.freeze({{
750{}
751                    }});"#,
752        forge_props.join(",\n")
753    );
754    parts.push(forge_obj);
755
756    // Security: remove dangerous globals
757    parts.push(
758        r#"
759                    delete globalThis.Deno;
760
761                    // Remove code generation primitives to prevent prototype chain attacks.
762                    delete globalThis.eval;
763                    const AsyncFunction = (async function(){}).constructor;
764                    const GeneratorFunction = (function*(){}).constructor;
765                    Object.defineProperty(Function.prototype, 'constructor', {
766                        value: undefined, configurable: false, writable: false
767                    });
768                    Object.defineProperty(AsyncFunction.prototype, 'constructor', {
769                        value: undefined, configurable: false, writable: false
770                    });
771                    Object.defineProperty(GeneratorFunction.prototype, 'constructor', {
772                        value: undefined, configurable: false, writable: false
773                    });
774                })(Deno.core.ops);"#
775            .to_string(),
776    );
777
778    parts.join("\n")
779}
780
781/// Create a fresh JsRuntime with the forge extension loaded and V8 heap limits set.
782#[allow(clippy::too_many_arguments)]
783pub(crate) fn create_runtime(
784    dispatcher: Option<Arc<dyn ToolDispatcher>>,
785    resource_dispatcher: Option<Arc<dyn ResourceDispatcher>>,
786    max_heap_size: usize,
787    tool_call_limits: Option<ToolCallLimits>,
788    max_resource_size: Option<usize>,
789    stash_dispatcher: Option<Arc<dyn StashDispatcher>>,
790    stash_call_limits: Option<StashCallLimits>,
791    known_servers: Option<std::collections::HashSet<String>>,
792    known_tools: Option<Vec<(String, String)>>,
793) -> Result<JsRuntime, SandboxError> {
794    let create_params = v8::CreateParams::default().heap_limits(0, max_heap_size);
795
796    let runtime = JsRuntime::new(RuntimeOptions {
797        extensions: vec![forge_ext::init()],
798        create_params: Some(create_params),
799        ..Default::default()
800    });
801
802    if let Some(d) = dispatcher {
803        runtime.op_state().borrow_mut().put(d);
804    }
805    if let Some(rd) = resource_dispatcher {
806        runtime.op_state().borrow_mut().put(rd);
807    }
808    if let Some(limits) = tool_call_limits {
809        runtime.op_state().borrow_mut().put(limits);
810    }
811    if let Some(size) = max_resource_size {
812        runtime.op_state().borrow_mut().put(MaxResourceSize(size));
813    }
814    if let Some(sd) = stash_dispatcher {
815        runtime.op_state().borrow_mut().put(sd);
816        // CurrentGroup defaults to None; the ForgeServer level sets the actual group
817        runtime.op_state().borrow_mut().put(CurrentGroup(None));
818    }
819    if let Some(limits) = stash_call_limits {
820        runtime.op_state().borrow_mut().put(limits);
821    }
822    if let Some(servers) = known_servers {
823        runtime.op_state().borrow_mut().put(KnownServers(servers));
824    }
825    if let Some(tools) = known_tools {
826        runtime.op_state().borrow_mut().put(KnownTools(tools));
827    }
828
829    Ok(runtime)
830}
831
832/// Wrap the user's async arrow function, execute it, and extract the result.
833///
834/// Sets up a CPU watchdog thread and near-heap-limit callback before running
835/// user code. The watchdog terminates V8 execution if the timeout elapses
836/// (handles CPU-bound infinite loops). The heap callback terminates execution
837/// if V8 approaches the heap limit (prevents OOM abort).
838async fn run_user_code(
839    runtime: &mut JsRuntime,
840    code: &str,
841    config: &SandboxConfig,
842) -> Result<Value, SandboxError> {
843    // --- Set up heap limit callback ---
844    let heap_state = Box::new(HeapLimitState {
845        handle: runtime.v8_isolate().thread_safe_handle(),
846        triggered: AtomicBool::new(false),
847    });
848    runtime.v8_isolate().add_near_heap_limit_callback(
849        near_heap_limit_callback,
850        &*heap_state as *const HeapLimitState as *mut std::ffi::c_void,
851    );
852
853    // --- Set up CPU watchdog ---
854    let watchdog_handle = runtime.v8_isolate().thread_safe_handle();
855    let timed_out = Arc::new(AtomicBool::new(false));
856    let watchdog_timed_out = timed_out.clone();
857    let timeout = config.timeout;
858    let (cancel_tx, cancel_rx) = std::sync::mpsc::channel::<()>();
859
860    let watchdog = std::thread::spawn(move || {
861        if let Err(std::sync::mpsc::RecvTimeoutError::Timeout) = cancel_rx.recv_timeout(timeout) {
862            watchdog_timed_out.store(true, Ordering::SeqCst);
863            watchdog_handle.terminate_execution();
864        }
865    });
866
867    // --- Execute user code ---
868    let wrapped = format!(
869        r#"
870        (async () => {{
871            try {{
872                const __userFn = {code};
873                const __result = await __userFn();
874                forge.__setResult(
875                    JSON.stringify({{ ok: __result }})
876                );
877            }} catch (e) {{
878                forge.__setResult(
879                    JSON.stringify({{ error: e.message || String(e) }})
880                );
881            }}
882        }})();
883        "#
884    );
885
886    let exec_error = match runtime.execute_script("[forge:execute]", wrapped) {
887        Ok(_) => {
888            // Drive the event loop to resolve async operations
889            match tokio::time::timeout(
890                config.timeout,
891                runtime.run_event_loop(PollEventLoopOptions::default()),
892            )
893            .await
894            {
895                Ok(Ok(())) => None,
896                Ok(Err(e)) => Some(e.to_string()),
897                Err(_) => Some("async timeout".to_string()),
898            }
899        }
900        Err(e) => Some(e.to_string()),
901    };
902
903    // --- Cleanup: cancel watchdog and wait for it to exit ---
904    // This ensures the watchdog thread is done before we drop the runtime,
905    // preventing use-after-free on the IsolateHandle.
906    let _ = cancel_tx.send(());
907    let _ = watchdog.join();
908
909    // --- Check error causes in priority order ---
910    if heap_state.triggered.load(Ordering::SeqCst) {
911        return Err(SandboxError::HeapLimitExceeded);
912    }
913
914    if timed_out.load(Ordering::SeqCst) {
915        return Err(SandboxError::Timeout {
916            timeout_ms: config.timeout.as_millis() as u64,
917        });
918    }
919
920    if let Some(err_msg) = exec_error {
921        return Err(SandboxError::JsError { message: err_msg });
922    }
923
924    // --- Extract result from OpState ---
925    let result_str = {
926        let state = runtime.op_state();
927        let state = state.borrow();
928        state
929            .try_borrow::<ExecutionResult>()
930            .map(|r| r.0.clone())
931            .ok_or_else(|| SandboxError::JsError {
932                message: "no result returned from sandbox execution".into(),
933            })?
934    };
935
936    if result_str.len() > config.max_output_size {
937        return Err(SandboxError::OutputTooLarge {
938            max: config.max_output_size,
939        });
940    }
941
942    let envelope: Value = serde_json::from_str(&result_str)?;
943
944    if let Some(error) = envelope.get("error") {
945        return Err(SandboxError::JsError {
946            message: error.as_str().unwrap_or("unknown error").to_string(),
947        });
948    }
949
950    Ok(envelope.get("ok").cloned().unwrap_or(Value::Null))
951}
952
953#[cfg(test)]
954mod tests {
955    use super::*;
956
957    fn executor() -> SandboxExecutor {
958        SandboxExecutor::new(SandboxConfig::default())
959    }
960
961    /// Test dispatcher that echoes back the server/tool/args.
962    struct TestDispatcher;
963
964    #[async_trait::async_trait]
965    impl ToolDispatcher for TestDispatcher {
966        async fn call_tool(
967            &self,
968            server: &str,
969            tool: &str,
970            args: serde_json::Value,
971        ) -> Result<serde_json::Value, forge_error::DispatchError> {
972            Ok(serde_json::json!({
973                "server": server,
974                "tool": tool,
975                "args": args,
976                "status": "ok"
977            }))
978        }
979    }
980
981    #[tokio::test]
982    async fn search_returns_manifest_data() {
983        let exec = executor();
984        let manifest = serde_json::json!({
985            "tools": [
986                {"name": "parse_ast", "category": "ast"},
987                {"name": "find_symbols", "category": "symbols"},
988            ]
989        });
990
991        let code = r#"async () => {
992            return manifest.tools.filter(t => t.category === "ast");
993        }"#;
994
995        let result = exec.execute_search(code, &manifest).await.unwrap();
996        let tools = result.as_array().unwrap();
997        assert_eq!(tools.len(), 1);
998        assert_eq!(tools[0]["name"], "parse_ast");
999    }
1000
1001    #[tokio::test]
1002    async fn search_handles_complex_queries() {
1003        let exec = executor();
1004        let manifest = serde_json::json!({
1005            "servers": [
1006                {
1007                    "name": "narsil",
1008                    "categories": {
1009                        "ast": { "tools": ["parse", "query", "walk"] },
1010                        "symbols": { "tools": ["find", "references"] }
1011                    }
1012                }
1013            ]
1014        });
1015
1016        let code = r#"async () => {
1017            return manifest.servers
1018                .map(s => ({ name: s.name, categories: Object.keys(s.categories) }));
1019        }"#;
1020
1021        let result = exec.execute_search(code, &manifest).await.unwrap();
1022        let servers = result.as_array().unwrap();
1023        assert_eq!(servers[0]["name"], "narsil");
1024    }
1025
1026    #[tokio::test]
1027    async fn timeout_is_enforced() {
1028        let exec = SandboxExecutor::new(SandboxConfig {
1029            timeout: Duration::from_millis(200),
1030            ..Default::default()
1031        });
1032        let manifest = serde_json::json!({});
1033
1034        // A never-resolving promise should trigger a timeout
1035        let code = r#"async () => {
1036            await new Promise(() => {});
1037        }"#;
1038
1039        let start = std::time::Instant::now();
1040        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1041        let elapsed = start.elapsed();
1042
1043        // Should be a timeout or a "no result" error (the event loop completes
1044        // when there are no more pending ops, even if the promise is unresolved)
1045        match &err {
1046            SandboxError::Timeout { .. } => {}
1047            SandboxError::JsError { message } if message.contains("no result") => {
1048                // deno_core's event loop exits when there are no pending ops,
1049                // so the never-resolving promise doesn't actually block
1050            }
1051            other => panic!("unexpected error: {other:?}, elapsed: {elapsed:?}"),
1052        }
1053    }
1054
1055    #[tokio::test]
1056    async fn js_errors_are_captured() {
1057        let exec = executor();
1058        let manifest = serde_json::json!({});
1059
1060        let code = r#"async () => {
1061            throw new Error("intentional test error");
1062        }"#;
1063
1064        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1065        assert!(matches!(err, SandboxError::JsError { .. }));
1066        let msg = err.to_string();
1067        assert!(msg.contains("intentional test error"));
1068    }
1069
1070    #[tokio::test]
1071    async fn no_filesystem_access() {
1072        let exec = executor();
1073        let manifest = serde_json::json!({});
1074
1075        // require() is a banned pattern — caught by validator
1076        let code = r#"async () => {
1077            const fs = require("fs");
1078            return "ESCAPED";
1079        }"#;
1080
1081        let err = exec.execute_search(code, &manifest).await;
1082        assert!(err.is_err());
1083    }
1084
1085    #[tokio::test]
1086    async fn no_network_access() {
1087        let exec = executor();
1088        let manifest = serde_json::json!({});
1089
1090        let code = r#"async () => {
1091            try {
1092                await fetch("https://example.com");
1093                return "ESCAPED";
1094            } catch(e) {
1095                return "CONTAINED";
1096            }
1097        }"#;
1098
1099        let result = exec.execute_search(code, &manifest).await.unwrap();
1100        assert_eq!(result, "CONTAINED");
1101    }
1102
1103    // --- WU4 new tests ---
1104
1105    #[tokio::test]
1106    async fn cpu_bound_infinite_loop_is_terminated() {
1107        let exec = SandboxExecutor::new(SandboxConfig {
1108            timeout: Duration::from_millis(500),
1109            ..Default::default()
1110        });
1111        let manifest = serde_json::json!({});
1112
1113        let code = r#"async () => {
1114            while(true) {}
1115        }"#;
1116
1117        let start = std::time::Instant::now();
1118        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1119        let elapsed = start.elapsed();
1120
1121        assert!(
1122            matches!(err, SandboxError::Timeout { .. }),
1123            "expected timeout, got: {err:?}"
1124        );
1125        assert!(
1126            elapsed < Duration::from_secs(5),
1127            "should complete reasonably fast, took: {elapsed:?}"
1128        );
1129    }
1130
1131    #[tokio::test]
1132    async fn heap_limit_prevents_oom() {
1133        let exec = SandboxExecutor::new(SandboxConfig {
1134            max_heap_size: 10 * 1024 * 1024,  // 10 MB
1135            timeout: Duration::from_secs(30), // Long timeout so heap fills first
1136            ..Default::default()
1137        });
1138        let manifest = serde_json::json!({});
1139
1140        // Rapidly allocate memory to exceed the heap limit
1141        let code = r#"async () => {
1142            const arr = [];
1143            while(true) {
1144                arr.push(new Array(100000).fill("x"));
1145            }
1146        }"#;
1147
1148        let err = exec.execute_search(code, &manifest).await.unwrap_err();
1149        assert!(
1150            matches!(
1151                err,
1152                SandboxError::HeapLimitExceeded | SandboxError::JsError { .. }
1153            ),
1154            "expected heap limit or JS error, got: {err:?}"
1155        );
1156    }
1157
1158    #[tokio::test]
1159    async fn concurrency_limit_enforced() {
1160        // Use max_concurrent=0 so no executions are allowed (deterministic test)
1161        let exec = SandboxExecutor::new(SandboxConfig {
1162            max_concurrent: 0,
1163            ..Default::default()
1164        });
1165
1166        let code = r#"async () => { return 1; }"#;
1167        let err = exec
1168            .execute_search(code, &serde_json::json!({}))
1169            .await
1170            .unwrap_err();
1171        assert!(
1172            matches!(err, SandboxError::ConcurrencyLimit { max: 0 }),
1173            "expected concurrency limit, got: {err:?}"
1174        );
1175    }
1176
1177    #[tokio::test]
1178    async fn deno_global_is_not_accessible() {
1179        let exec = executor();
1180        let manifest = serde_json::json!({});
1181
1182        let code = r#"async () => {
1183            const props = Object.getOwnPropertyNames(globalThis);
1184            return !props.includes("Deno");
1185        }"#;
1186
1187        let result = exec.execute_search(code, &manifest).await.unwrap();
1188        assert_eq!(result, true);
1189    }
1190
1191    #[tokio::test]
1192    async fn forge_object_is_frozen() {
1193        let exec = executor();
1194        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1195
1196        let code = r#"async () => {
1197            return Object.isFrozen(forge);
1198        }"#;
1199
1200        let result = exec
1201            .execute_code(code, dispatcher, None, None)
1202            .await
1203            .unwrap();
1204        assert_eq!(result, true);
1205    }
1206
1207    #[tokio::test]
1208    async fn tool_call_rate_limit() {
1209        let exec = SandboxExecutor::new(SandboxConfig {
1210            max_tool_calls: 2,
1211            ..Default::default()
1212        });
1213        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1214
1215        let code = r#"async () => {
1216            await forge.callTool("test", "tool1", {});
1217            await forge.callTool("test", "tool2", {});
1218            try {
1219                await forge.callTool("test", "tool3", {});
1220                return "should not reach here";
1221            } catch(e) {
1222                return e.message;
1223            }
1224        }"#;
1225
1226        let result = exec
1227            .execute_code(code, dispatcher, None, None)
1228            .await
1229            .unwrap();
1230        assert!(
1231            result
1232                .as_str()
1233                .unwrap()
1234                .contains("tool call limit exceeded"),
1235            "expected tool call limit message, got: {result:?}"
1236        );
1237    }
1238
1239    #[tokio::test]
1240    async fn tool_call_args_size_limit() {
1241        let exec = SandboxExecutor::new(SandboxConfig {
1242            max_tool_call_args_size: 100,
1243            ..Default::default()
1244        });
1245        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1246
1247        let code = r#"async () => {
1248            try {
1249                await forge.callTool("test", "tool", { data: "x".repeat(200) });
1250                return "should not reach here";
1251            } catch(e) {
1252                return e.message;
1253            }
1254        }"#;
1255
1256        let result = exec
1257            .execute_code(code, dispatcher, None, None)
1258            .await
1259            .unwrap();
1260        assert!(
1261            result.as_str().unwrap().contains("too large"),
1262            "expected args too large message, got: {result:?}"
1263        );
1264    }
1265
1266    #[tokio::test]
1267    async fn forge_log_works() {
1268        let exec = executor();
1269        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1270
1271        let code = r#"async () => {
1272            forge.log("test message from sandbox");
1273            return "ok";
1274        }"#;
1275
1276        let result = exec
1277            .execute_code(code, dispatcher, None, None)
1278            .await
1279            .unwrap();
1280        assert_eq!(result, "ok");
1281    }
1282
1283    #[tokio::test]
1284    async fn forge_server_proxy_calls_tool() {
1285        let exec = executor();
1286        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1287
1288        let code = r#"async () => {
1289            const result = await forge.server("narsil").ast.parse({ file: "test.rs" });
1290            return result;
1291        }"#;
1292
1293        let result = exec
1294            .execute_code(code, dispatcher, None, None)
1295            .await
1296            .unwrap();
1297        assert_eq!(result["server"], "narsil");
1298        assert_eq!(result["tool"], "ast.parse");
1299        assert_eq!(result["status"], "ok");
1300    }
1301
1302    #[tokio::test]
1303    async fn forge_server_proxy_general_category_strips_prefix() {
1304        let exec = executor();
1305        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1306
1307        // Tools in the "general" category should NOT have "general." prepended
1308        let code = r#"async () => {
1309            const result = await forge.server("narsil").general.find_symbols({ pattern: "main" });
1310            return result;
1311        }"#;
1312
1313        let result = exec
1314            .execute_code(code, dispatcher, None, None)
1315            .await
1316            .unwrap();
1317        assert_eq!(result["server"], "narsil");
1318        assert_eq!(result["tool"], "find_symbols");
1319        assert_eq!(result["status"], "ok");
1320    }
1321
1322    #[tokio::test]
1323    async fn multiple_tool_calls_in_single_execution() {
1324        let exec = executor();
1325        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1326
1327        let code = r#"async () => {
1328            const r1 = await forge.callTool("server1", "tool1", {});
1329            const r2 = await forge.callTool("server2", "tool2", {});
1330            return [r1, r2];
1331        }"#;
1332
1333        let result = exec
1334            .execute_code(code, dispatcher, None, None)
1335            .await
1336            .unwrap();
1337        let arr = result.as_array().unwrap();
1338        assert_eq!(arr.len(), 2);
1339        assert_eq!(arr[0]["server"], "server1");
1340        assert_eq!(arr[1]["server"], "server2");
1341    }
1342
1343    #[tokio::test]
1344    async fn eval_is_not_accessible() {
1345        let exec = executor();
1346        let manifest = serde_json::json!({});
1347
1348        let code = r#"async () => {
1349            return typeof globalThis.eval;
1350        }"#;
1351
1352        let result = exec.execute_search(code, &manifest).await.unwrap();
1353        assert_eq!(result, "undefined");
1354    }
1355
1356    #[tokio::test]
1357    async fn function_constructor_is_blocked() {
1358        let exec = executor();
1359        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1360
1361        // Try to access Function via prototype chain — should get undefined
1362        let code = r#"async () => {
1363            const ctor = forge.log.constructor;
1364            return String(ctor);
1365        }"#;
1366
1367        let result = exec
1368            .execute_code(code, dispatcher, None, None)
1369            .await
1370            .unwrap();
1371        assert_eq!(result, "undefined");
1372    }
1373
1374    #[tokio::test]
1375    async fn async_function_constructor_is_blocked() {
1376        let exec = executor();
1377        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1378
1379        // Try to access AsyncFunction via prototype chain
1380        let code = r#"async () => {
1381            const fn1 = async () => {};
1382            const ctor = fn1.constructor;
1383            return String(ctor);
1384        }"#;
1385
1386        let result = exec
1387            .execute_code(code, dispatcher, None, None)
1388            .await
1389            .unwrap();
1390        assert_eq!(result, "undefined");
1391    }
1392
1393    // --- v0.2 Resource read test dispatchers ---
1394
1395    /// Resource dispatcher that echoes back server/uri.
1396    struct TestResourceDispatcher;
1397
1398    #[async_trait::async_trait]
1399    impl ResourceDispatcher for TestResourceDispatcher {
1400        async fn read_resource(
1401            &self,
1402            server: &str,
1403            uri: &str,
1404        ) -> Result<serde_json::Value, forge_error::DispatchError> {
1405            Ok(serde_json::json!({
1406                "server": server,
1407                "uri": uri,
1408                "content": "test resource content"
1409            }))
1410        }
1411    }
1412
1413    /// Resource dispatcher that returns a large payload.
1414    struct LargeResourceDispatcher {
1415        content_size: usize,
1416    }
1417
1418    #[async_trait::async_trait]
1419    impl ResourceDispatcher for LargeResourceDispatcher {
1420        async fn read_resource(
1421            &self,
1422            _server: &str,
1423            _uri: &str,
1424        ) -> Result<serde_json::Value, forge_error::DispatchError> {
1425            Ok(serde_json::json!({
1426                "data": "x".repeat(self.content_size)
1427            }))
1428        }
1429    }
1430
1431    /// Resource dispatcher that always fails with a configurable error.
1432    struct FailingResourceDispatcher {
1433        error_msg: String,
1434    }
1435
1436    #[async_trait::async_trait]
1437    impl ResourceDispatcher for FailingResourceDispatcher {
1438        async fn read_resource(
1439            &self,
1440            _server: &str,
1441            _uri: &str,
1442        ) -> Result<serde_json::Value, forge_error::DispatchError> {
1443            Err(anyhow::anyhow!("{}", self.error_msg).into())
1444        }
1445    }
1446
1447    // --- RS-U01: readResource routes to correct server ---
1448    #[tokio::test]
1449    async fn rs_u01_read_resource_routes_to_correct_server() {
1450        let exec = executor();
1451        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1452        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1453            Some(Arc::new(TestResourceDispatcher));
1454
1455        let code = r#"async () => {
1456            const result = await forge.readResource("my-server", "file:///logs/app.log");
1457            return result;
1458        }"#;
1459
1460        let result = exec
1461            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1462            .await
1463            .unwrap();
1464        assert_eq!(result["server"], "my-server");
1465        assert_eq!(result["uri"], "file:///logs/app.log");
1466        assert_eq!(result["content"], "test resource content");
1467    }
1468
1469    // --- RS-U02: readResource increments ToolCallLimits.calls_made ---
1470    #[tokio::test]
1471    async fn rs_u02_read_resource_shares_rate_limit_with_tool_calls() {
1472        let exec = SandboxExecutor::new(SandboxConfig {
1473            max_tool_calls: 3,
1474            ..Default::default()
1475        });
1476        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1477        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1478            Some(Arc::new(TestResourceDispatcher));
1479
1480        // 1 tool call + 2 resource reads = 3 (limit), then 4th fails
1481        let code = r#"async () => {
1482            await forge.callTool("s", "t", {});
1483            await forge.readResource("s", "file:///a");
1484            await forge.readResource("s", "file:///b");
1485            try {
1486                await forge.readResource("s", "file:///c");
1487                return "should not reach here";
1488            } catch(e) {
1489                return e.message;
1490            }
1491        }"#;
1492
1493        let result = exec
1494            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1495            .await
1496            .unwrap();
1497        assert!(
1498            result
1499                .as_str()
1500                .unwrap()
1501                .contains("tool call limit exceeded"),
1502            "expected rate limit message, got: {result:?}"
1503        );
1504    }
1505
1506    // --- RS-U03: readResource rejects when limits exhausted ---
1507    #[tokio::test]
1508    async fn rs_u03_read_resource_rejects_when_limits_exhausted() {
1509        let exec = SandboxExecutor::new(SandboxConfig {
1510            max_tool_calls: 1,
1511            ..Default::default()
1512        });
1513        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1514        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1515            Some(Arc::new(TestResourceDispatcher));
1516
1517        let code = r#"async () => {
1518            await forge.readResource("s", "file:///a");
1519            try {
1520                await forge.readResource("s", "file:///b");
1521                return "should not reach here";
1522            } catch(e) {
1523                return e.message;
1524            }
1525        }"#;
1526
1527        let result = exec
1528            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1529            .await
1530            .unwrap();
1531        assert!(
1532            result
1533                .as_str()
1534                .unwrap()
1535                .contains("tool call limit exceeded"),
1536            "expected rate limit error, got: {result:?}"
1537        );
1538    }
1539
1540    // --- RS-U08: truncates response at max_resource_size ---
1541    #[tokio::test]
1542    async fn rs_u08_read_resource_truncates_at_max_resource_size() {
1543        let exec = SandboxExecutor::new(SandboxConfig {
1544            max_resource_size: 100, // very small limit
1545            ..Default::default()
1546        });
1547        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1548        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1549            Some(Arc::new(LargeResourceDispatcher { content_size: 500 }));
1550
1551        let code = r#"async () => {
1552            const result = await forge.readResource("s", "file:///big");
1553            return {
1554                truncated: result._truncated,
1555                fragment: result._data_is_fragment,
1556                shown: result._shown_bytes,
1557                original: result._original_bytes,
1558            };
1559        }"#;
1560
1561        let result = exec
1562            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1563            .await
1564            .unwrap();
1565        assert_eq!(
1566            result["truncated"], true,
1567            "large resource should be truncated"
1568        );
1569        assert_eq!(result["fragment"], true);
1570        assert!(result["shown"].as_u64().unwrap() <= 100);
1571        assert!(result["original"].as_u64().unwrap() > result["shown"].as_u64().unwrap());
1572    }
1573
1574    // --- RS-U09: errors redacted through redact_error_for_llm ---
1575    #[tokio::test]
1576    async fn rs_u09_read_resource_redacts_errors() {
1577        let exec = executor();
1578        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1579        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1580            Some(Arc::new(FailingResourceDispatcher {
1581                error_msg: "connection refused: http://internal.corp:9876/secret/path".into(),
1582            }));
1583
1584        // Structured errors are returned as values, not thrown
1585        let code = r#"async () => {
1586            const result = await forge.readResource("my-server", "file:///logs/secret.log");
1587            return result;
1588        }"#;
1589
1590        let result = exec
1591            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1592            .await
1593            .unwrap();
1594        assert_eq!(
1595            result["error"], true,
1596            "should be structured error: {result}"
1597        );
1598        let msg = result["message"].as_str().unwrap();
1599        assert!(
1600            !msg.contains("internal.corp"),
1601            "should not leak internal URL: {msg}"
1602        );
1603        assert!(!msg.contains("9876"), "should not leak port: {msg}");
1604        assert!(
1605            msg.contains("my-server"),
1606            "should mention server name: {msg}"
1607        );
1608    }
1609
1610    // --- RS-U10: binary content (base64 encoding) ---
1611    #[tokio::test]
1612    async fn rs_u10_read_resource_handles_binary_content() {
1613        struct Base64ResourceDispatcher;
1614
1615        #[async_trait::async_trait]
1616        impl ResourceDispatcher for Base64ResourceDispatcher {
1617            async fn read_resource(
1618                &self,
1619                _server: &str,
1620                _uri: &str,
1621            ) -> Result<serde_json::Value, forge_error::DispatchError> {
1622                Ok(serde_json::json!({
1623                    "content": "SGVsbG8gV29ybGQ=",
1624                    "_encoding": "base64"
1625                }))
1626            }
1627        }
1628
1629        let exec = executor();
1630        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1631        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1632            Some(Arc::new(Base64ResourceDispatcher));
1633
1634        let code = r#"async () => {
1635            const result = await forge.readResource("s", "file:///binary");
1636            return result;
1637        }"#;
1638
1639        let result = exec
1640            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1641            .await
1642            .unwrap();
1643        assert_eq!(result["_encoding"], "base64");
1644        assert_eq!(result["content"], "SGVsbG8gV29ybGQ=");
1645    }
1646
1647    // --- RS-U11: error for nonexistent resource ---
1648    #[tokio::test]
1649    async fn rs_u11_read_resource_error_for_nonexistent() {
1650        let exec = executor();
1651        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1652        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1653            Some(Arc::new(FailingResourceDispatcher {
1654                error_msg: "resource not found".into(),
1655            }));
1656
1657        // Structured errors are returned as values, not thrown
1658        let code = r#"async () => {
1659            const result = await forge.readResource("s", "file:///nonexistent");
1660            return result;
1661        }"#;
1662
1663        let result = exec
1664            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1665            .await
1666            .unwrap();
1667        assert_eq!(
1668            result["error"], true,
1669            "should be structured error: {result}"
1670        );
1671        let msg = result["message"].as_str().unwrap();
1672        assert!(
1673            msg.contains("failed"),
1674            "should indicate failure: {result:?}"
1675        );
1676    }
1677
1678    // --- RS-U12: handles large (>1MB) content ---
1679    #[tokio::test]
1680    async fn rs_u12_read_resource_handles_large_content() {
1681        let exec = SandboxExecutor::new(SandboxConfig {
1682            max_resource_size: 2 * 1024 * 1024, // 2 MB
1683            timeout: Duration::from_secs(10),
1684            ..Default::default()
1685        });
1686        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1687        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1688            Some(Arc::new(LargeResourceDispatcher {
1689                content_size: 1_100_000,
1690            }));
1691
1692        let code = r#"async () => {
1693            const result = await forge.readResource("s", "file:///large");
1694            return result.data.length;
1695        }"#;
1696
1697        let result = exec
1698            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1699            .await
1700            .unwrap();
1701        assert_eq!(result, 1_100_000);
1702    }
1703
1704    // --- RS-S05: URI for non-file-server — error redacted, no path leakage ---
1705    #[tokio::test]
1706    async fn rs_s05_error_on_invalid_resource_uri_for_server() {
1707        let exec = executor();
1708        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1709        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1710            Some(Arc::new(FailingResourceDispatcher {
1711                error_msg: "unknown resource URI: file:///etc/shadow".into(),
1712            }));
1713
1714        // Structured errors are returned as values, not thrown
1715        let code = r#"async () => {
1716            const result = await forge.readResource("postgres-server", "file:///etc/shadow");
1717            return result;
1718        }"#;
1719
1720        let result = exec
1721            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1722            .await
1723            .unwrap();
1724        assert_eq!(
1725            result["error"], true,
1726            "should be structured error: {result}"
1727        );
1728        let msg = result["message"].as_str().unwrap();
1729        // SR-R5: Error should use "readResource" not the raw URI
1730        assert!(
1731            !msg.contains("/etc/shadow"),
1732            "should not leak file path: {msg}"
1733        );
1734        // Should still mention server for context
1735        assert!(
1736            msg.contains("postgres-server"),
1737            "should mention server: {msg}"
1738        );
1739        assert!(
1740            msg.contains("readResource"),
1741            "should use safe identifier: {msg}"
1742        );
1743    }
1744
1745    // --- RS-S06: error message does not leak full URI path ---
1746    #[tokio::test]
1747    async fn rs_s06_error_message_does_not_leak_full_uri() {
1748        let exec = executor();
1749        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1750        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1751            Some(Arc::new(FailingResourceDispatcher {
1752                error_msg: "file not found: /var/secrets/database/credentials.json".into(),
1753            }));
1754
1755        let code = r#"async () => {
1756            try {
1757                await forge.readResource("server", "file:///var/secrets/database/credentials.json");
1758                return "should not reach here";
1759            } catch(e) {
1760                return e.message;
1761            }
1762        }"#;
1763
1764        let result = exec
1765            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1766            .await
1767            .unwrap();
1768        let msg = result.as_str().unwrap();
1769        // Paths are redacted by redact_error_message
1770        assert!(!msg.contains("/var/secrets"), "should not leak path: {msg}");
1771        assert!(
1772            !msg.contains("credentials.json"),
1773            "should not leak filename: {msg}"
1774        );
1775        // URI itself should not appear in error (SR-R5)
1776        assert!(
1777            !msg.contains("file:///var/secrets"),
1778            "should not leak URI: {msg}"
1779        );
1780    }
1781
1782    // --- RS-S07: large content truncated, not OOM ---
1783    #[tokio::test]
1784    async fn rs_s07_large_content_truncated_not_oom() {
1785        let exec = SandboxExecutor::new(SandboxConfig {
1786            max_resource_size: 1024, // 1 KB limit
1787            timeout: Duration::from_secs(10),
1788            ..Default::default()
1789        });
1790        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1791        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1792            Some(Arc::new(LargeResourceDispatcher {
1793                content_size: 1_000_000, // 1 MB, far exceeds 1 KB limit
1794            }));
1795
1796        let code = r#"async () => {
1797            const result = await forge.readResource("s", "file:///huge");
1798            return {
1799                truncated: result._truncated,
1800                len: result.data.length,
1801            };
1802        }"#;
1803
1804        // Must complete without OOM
1805        let result = exec
1806            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1807            .await;
1808        assert!(result.is_ok(), "should complete without OOM: {result:?}");
1809        let result = result.unwrap();
1810        assert_eq!(result["truncated"], true);
1811        assert!(result["len"].as_u64().unwrap() <= 1024);
1812    }
1813
1814    #[tokio::test]
1815    async fn rs_s07b_large_unicode_resource_truncates_without_panic() {
1816        struct UnicodeResourceDispatcher;
1817
1818        #[async_trait::async_trait]
1819        impl ResourceDispatcher for UnicodeResourceDispatcher {
1820            async fn read_resource(
1821                &self,
1822                _server: &str,
1823                _uri: &str,
1824            ) -> Result<serde_json::Value, forge_error::DispatchError> {
1825                Ok(serde_json::json!({
1826                    "data": "漢".repeat(1000)
1827                }))
1828            }
1829        }
1830
1831        let exec = SandboxExecutor::new(SandboxConfig {
1832            max_resource_size: 101,
1833            timeout: Duration::from_secs(10),
1834            ..Default::default()
1835        });
1836        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1837        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1838            Some(Arc::new(UnicodeResourceDispatcher));
1839
1840        let code = r#"async () => {
1841            const result = await forge.readResource("s", "file:///unicode");
1842            return {
1843                truncated: result._truncated,
1844                data: result.data,
1845            };
1846        }"#;
1847
1848        let result = exec
1849            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1850            .await
1851            .unwrap();
1852        assert_eq!(result["truncated"], true);
1853        assert!(result["data"].as_str().unwrap().is_char_boundary(0));
1854    }
1855
1856    // --- RS-S08: many resource reads hit rate limit ---
1857    #[tokio::test]
1858    async fn rs_s08_many_reads_hit_rate_limit() {
1859        let exec = SandboxExecutor::new(SandboxConfig {
1860            max_tool_calls: 5,
1861            ..Default::default()
1862        });
1863        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1864        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1865            Some(Arc::new(TestResourceDispatcher));
1866
1867        let code = r#"async () => {
1868            let count = 0;
1869            for (let i = 0; i < 1000; i++) {
1870                try {
1871                    await forge.readResource("s", "file:///r" + i);
1872                    count++;
1873                } catch(e) {
1874                    return { count, error: e.message };
1875                }
1876            }
1877            return { count, error: null };
1878        }"#;
1879
1880        let result = exec
1881            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1882            .await
1883            .unwrap();
1884        assert_eq!(
1885            result["count"], 5,
1886            "should allow exactly max_tool_calls reads"
1887        );
1888        assert!(result["error"]
1889            .as_str()
1890            .unwrap()
1891            .contains("tool call limit exceeded"));
1892    }
1893
1894    // --- RS-S09: search mode blocks resource read ---
1895    #[tokio::test]
1896    async fn rs_s09_search_mode_blocks_resource_read() {
1897        let exec = executor();
1898        let manifest = serde_json::json!({"servers": []});
1899
1900        // In search mode, forge.readResource should not exist
1901        let code = r#"async () => {
1902            return typeof forge.readResource;
1903        }"#;
1904
1905        let result = exec.execute_search(code, &manifest).await.unwrap();
1906        assert_eq!(
1907            result, "undefined",
1908            "readResource should not exist in search mode"
1909        );
1910    }
1911
1912    // --- SR-R6: unknown server rejected at op level ---
1913    #[tokio::test]
1914    async fn sr_r6_unknown_server_rejected_at_op_level() {
1915        let exec = executor();
1916        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1917        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1918            Some(Arc::new(TestResourceDispatcher));
1919
1920        // Use execute_code_with_options to set known servers
1921        let mut known = std::collections::HashSet::new();
1922        known.insert("allowed-server".to_string());
1923
1924        let code = r#"async () => {
1925            try {
1926                await forge.readResource("nonexistent_server", "file:///x");
1927                return "should not reach here";
1928            } catch(e) {
1929                return e.message;
1930            }
1931        }"#;
1932
1933        let result = exec
1934            .execute_code_with_options(
1935                code,
1936                tool_dispatcher,
1937                resource_dispatcher,
1938                None,
1939                Some(known),
1940                None,
1941            )
1942            .await
1943            .unwrap();
1944        let msg = result.as_str().unwrap();
1945        assert!(
1946            msg.contains("unknown server"),
1947            "expected 'unknown server' error, got: {msg}"
1948        );
1949        assert!(
1950            msg.contains("nonexistent_server"),
1951            "should mention the server name: {msg}"
1952        );
1953    }
1954
1955    // --- RS-S10: audit log records resource reads with URI hash ---
1956    #[tokio::test]
1957    async fn rs_s10_audit_records_resource_reads_with_uri_hash() {
1958        struct CapturingAuditLogger {
1959            entries: std::sync::Mutex<Vec<crate::audit::AuditEntry>>,
1960        }
1961
1962        #[async_trait::async_trait]
1963        impl crate::audit::AuditLogger for CapturingAuditLogger {
1964            async fn log(&self, entry: &crate::audit::AuditEntry) {
1965                self.entries.lock().unwrap().push(entry.clone());
1966            }
1967        }
1968
1969        let logger = Arc::new(CapturingAuditLogger {
1970            entries: std::sync::Mutex::new(Vec::new()),
1971        });
1972        let exec = SandboxExecutor::with_audit_logger(SandboxConfig::default(), logger.clone());
1973        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
1974        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
1975            Some(Arc::new(TestResourceDispatcher));
1976
1977        let code = r#"async () => {
1978            await forge.readResource("my-server", "file:///logs/app.log");
1979            return "done";
1980        }"#;
1981
1982        let _ = exec
1983            .execute_code(code, tool_dispatcher, resource_dispatcher, None)
1984            .await
1985            .unwrap();
1986
1987        let entries = logger.entries.lock().unwrap();
1988        assert_eq!(entries.len(), 1);
1989        let entry = &entries[0];
1990        assert_eq!(entry.resource_reads.len(), 1);
1991
1992        let read = &entry.resource_reads[0];
1993        assert_eq!(read.server, "my-server");
1994        assert!(read.success);
1995        // URI should be hashed, not raw
1996        assert_ne!(
1997            read.uri_hash, "file:///logs/app.log",
1998            "URI should be hashed, not stored raw"
1999        );
2000        // Verify it's a valid SHA-256 hex string
2001        assert_eq!(read.uri_hash.len(), 64, "should be SHA-256 hex");
2002        assert!(read.uri_hash.chars().all(|c| c.is_ascii_hexdigit()));
2003    }
2004
2005    #[tokio::test]
2006    async fn large_output_is_rejected() {
2007        let exec = SandboxExecutor::new(SandboxConfig {
2008            max_output_size: 100,
2009            ..Default::default()
2010        });
2011        let manifest = serde_json::json!({});
2012
2013        let code = r#"async () => {
2014            return "x".repeat(1000);
2015        }"#;
2016
2017        let err = exec.execute_search(code, &manifest).await.unwrap_err();
2018        assert!(
2019            matches!(err, SandboxError::OutputTooLarge { .. }),
2020            "expected output too large, got: {err:?}"
2021        );
2022    }
2023
2024    // --- Stash test infrastructure ---
2025
2026    /// Direct stash dispatcher wrapping an Arc<tokio::sync::Mutex<SessionStash>>.
2027    /// Used by integration/security tests without going through IPC.
2028    struct DirectStashDispatcher {
2029        stash: Arc<tokio::sync::Mutex<crate::stash::SessionStash>>,
2030        current_group: Option<String>,
2031    }
2032
2033    #[async_trait::async_trait]
2034    impl crate::StashDispatcher for DirectStashDispatcher {
2035        async fn put(
2036            &self,
2037            key: &str,
2038            value: serde_json::Value,
2039            ttl_secs: Option<u32>,
2040            _current_group: Option<String>,
2041        ) -> Result<serde_json::Value, forge_error::DispatchError> {
2042            let ttl = ttl_secs
2043                .filter(|&s| s > 0)
2044                .map(|s| std::time::Duration::from_secs(s as u64));
2045            let mut stash = self.stash.lock().await;
2046            stash
2047                .put(key, value, ttl, self.current_group.as_deref())
2048                .map_err(|e| forge_error::DispatchError::Internal(e.into()))?;
2049            Ok(serde_json::json!({"ok": true}))
2050        }
2051
2052        async fn get(
2053            &self,
2054            key: &str,
2055            _current_group: Option<String>,
2056        ) -> Result<serde_json::Value, forge_error::DispatchError> {
2057            let stash = self.stash.lock().await;
2058            match stash
2059                .get(key, self.current_group.as_deref())
2060                .map_err(|e| forge_error::DispatchError::Internal(e.into()))?
2061            {
2062                Some(v) => Ok(v.clone()),
2063                None => Ok(serde_json::Value::Null),
2064            }
2065        }
2066
2067        async fn delete(
2068            &self,
2069            key: &str,
2070            _current_group: Option<String>,
2071        ) -> Result<serde_json::Value, forge_error::DispatchError> {
2072            let mut stash = self.stash.lock().await;
2073            let deleted = stash
2074                .delete(key, self.current_group.as_deref())
2075                .map_err(|e| forge_error::DispatchError::Internal(e.into()))?;
2076            Ok(serde_json::json!({"deleted": deleted}))
2077        }
2078
2079        async fn keys(
2080            &self,
2081            _current_group: Option<String>,
2082        ) -> Result<serde_json::Value, forge_error::DispatchError> {
2083            let stash = self.stash.lock().await;
2084            let keys: Vec<&str> = stash.keys(self.current_group.as_deref());
2085            Ok(serde_json::json!(keys))
2086        }
2087    }
2088
2089    fn make_stash(
2090        config: crate::stash::StashConfig,
2091    ) -> Arc<tokio::sync::Mutex<crate::stash::SessionStash>> {
2092        Arc::new(tokio::sync::Mutex::new(crate::stash::SessionStash::new(
2093            config,
2094        )))
2095    }
2096
2097    fn make_stash_dispatcher(
2098        stash: Arc<tokio::sync::Mutex<crate::stash::SessionStash>>,
2099        group: Option<&str>,
2100    ) -> Arc<dyn crate::StashDispatcher> {
2101        Arc::new(DirectStashDispatcher {
2102            stash,
2103            current_group: group.map(str::to_string),
2104        })
2105    }
2106
2107    // --- ST-I01: Two execute_code calls sharing stash (put in first, get in second) ---
2108    #[tokio::test]
2109    async fn st_i01_stash_shared_across_executions() {
2110        let exec = executor();
2111        let stash = make_stash(crate::stash::StashConfig::default());
2112        let sd = make_stash_dispatcher(stash.clone(), None);
2113        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2114
2115        // First execution: put a value
2116        let code1 = r#"async () => {
2117            await forge.stash.put("shared-key", { value: 42 });
2118            return "stored";
2119        }"#;
2120        let result1 = exec
2121            .execute_code(code1, dispatcher.clone(), None, Some(sd.clone()))
2122            .await
2123            .unwrap();
2124        assert_eq!(result1, "stored");
2125
2126        // Second execution: get the value
2127        let sd2 = make_stash_dispatcher(stash, None);
2128        let code2 = r#"async () => {
2129            const v = await forge.stash.get("shared-key");
2130            return v;
2131        }"#;
2132        let result2 = exec
2133            .execute_code(code2, dispatcher, None, Some(sd2))
2134            .await
2135            .unwrap();
2136        assert_eq!(result2["value"], 42);
2137    }
2138
2139    // --- ST-I02: Stash put + get within single execution ---
2140    #[tokio::test]
2141    async fn st_i02_stash_put_get_single_execution() {
2142        let exec = executor();
2143        let stash = make_stash(crate::stash::StashConfig::default());
2144        let sd = make_stash_dispatcher(stash, None);
2145        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2146
2147        let code = r#"async () => {
2148            await forge.stash.put("key", "hello");
2149            const v = await forge.stash.get("key");
2150            return v;
2151        }"#;
2152        let result = exec
2153            .execute_code(code, dispatcher, None, Some(sd))
2154            .await
2155            .unwrap();
2156        assert_eq!(result, "hello");
2157    }
2158
2159    #[tokio::test]
2160    async fn st_i02b_stash_max_calls_is_enforced_per_execution() {
2161        let exec = SandboxExecutor::new(SandboxConfig {
2162            max_stash_calls: Some(1),
2163            ..Default::default()
2164        });
2165        let stash = make_stash(crate::stash::StashConfig::default());
2166        let sd = make_stash_dispatcher(stash, None);
2167        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2168
2169        let code = r#"async () => {
2170            try {
2171                await forge.stash.put("key", "hello");
2172                await forge.stash.get("key");
2173                return "not limited";
2174            } catch(e) {
2175                return e.message;
2176            }
2177        }"#;
2178        let result = exec
2179            .execute_code(code, dispatcher, None, Some(sd))
2180            .await
2181            .unwrap();
2182        assert!(
2183            result.as_str().unwrap().contains("stash operation limit"),
2184            "expected stash limit error, got: {result:?}"
2185        );
2186    }
2187
2188    // --- ST-I03: Stash group isolation (put with group A, get with group B fails) ---
2189    #[tokio::test]
2190    async fn st_i03_stash_group_isolation() {
2191        let exec = executor();
2192        let stash = make_stash(crate::stash::StashConfig::default());
2193        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2194
2195        // Put with group A
2196        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
2197        let code1 = r#"async () => {
2198            await forge.stash.put("secret", "group-a-data");
2199            return "stored";
2200        }"#;
2201        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2202            .await
2203            .unwrap();
2204
2205        // Get with group B should fail
2206        let sd_b = make_stash_dispatcher(stash, Some("group-b"));
2207        let code2 = r#"async () => {
2208            try {
2209                await forge.stash.get("secret");
2210                return "should not reach here";
2211            } catch(e) {
2212                return e.message;
2213            }
2214        }"#;
2215        let result = exec
2216            .execute_code(code2, dispatcher, None, Some(sd_b))
2217            .await
2218            .unwrap();
2219        assert!(
2220            result.as_str().unwrap().contains("cross-group"),
2221            "expected cross-group error, got: {result:?}"
2222        );
2223    }
2224
2225    // --- ST-I05: Stash combined with callTool + readResource ---
2226    #[tokio::test]
2227    async fn st_i05_stash_combined_with_tool_and_resource() {
2228        let exec = executor();
2229        let stash = make_stash(crate::stash::StashConfig::default());
2230        let sd = make_stash_dispatcher(stash, None);
2231        let tool_dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2232        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
2233            Some(Arc::new(TestResourceDispatcher));
2234
2235        let code = r#"async () => {
2236            // Call a tool
2237            const toolResult = await forge.callTool("s", "t", {});
2238
2239            // Read a resource
2240            const resource = await forge.readResource("s", "file:///data");
2241
2242            // Store combined result in stash
2243            await forge.stash.put("combined", {
2244                tool: toolResult.server,
2245                resource: resource.content
2246            });
2247
2248            // Read it back
2249            const v = await forge.stash.get("combined");
2250            return v;
2251        }"#;
2252        let result = exec
2253            .execute_code(code, tool_dispatcher, resource_dispatcher, Some(sd))
2254            .await
2255            .unwrap();
2256        assert_eq!(result["tool"], "s");
2257        assert_eq!(result["resource"], "test resource content");
2258    }
2259
2260    // --- ST-I06: Stash key limit produces clear error ---
2261    #[tokio::test]
2262    async fn st_i06_stash_key_limit_error() {
2263        let exec = executor();
2264        let stash = make_stash(crate::stash::StashConfig {
2265            max_keys: 2,
2266            ..Default::default()
2267        });
2268        let sd = make_stash_dispatcher(stash, None);
2269        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2270
2271        let code = r#"async () => {
2272            await forge.stash.put("k1", 1);
2273            await forge.stash.put("k2", 2);
2274            try {
2275                await forge.stash.put("k3", 3);
2276                return "should not reach here";
2277            } catch(e) {
2278                return e.message;
2279            }
2280        }"#;
2281        let result = exec
2282            .execute_code(code, dispatcher, None, Some(sd))
2283            .await
2284            .unwrap();
2285        assert!(
2286            result.as_str().unwrap().contains("key limit"),
2287            "expected key limit error, got: {result:?}"
2288        );
2289    }
2290
2291    // --- ST-I07: Stash value size limit produces clear error ---
2292    #[tokio::test]
2293    async fn st_i07_stash_value_size_limit_error() {
2294        let exec = executor();
2295        let stash = make_stash(crate::stash::StashConfig {
2296            max_value_size: 50,
2297            ..Default::default()
2298        });
2299        let sd = make_stash_dispatcher(stash, None);
2300        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2301
2302        let code = r#"async () => {
2303            try {
2304                await forge.stash.put("k", "x".repeat(100));
2305                return "should not reach here";
2306            } catch(e) {
2307                return e.message;
2308            }
2309        }"#;
2310        let result = exec
2311            .execute_code(code, dispatcher, None, Some(sd))
2312            .await
2313            .unwrap();
2314        assert!(
2315            result.as_str().unwrap().contains("too large"),
2316            "expected value too large error, got: {result:?}"
2317        );
2318    }
2319
2320    // --- ST-I08: Stash keys() returns correct subset for group context ---
2321    #[tokio::test]
2322    async fn st_i08_stash_keys_group_subset() {
2323        let exec = executor();
2324        let stash = make_stash(crate::stash::StashConfig::default());
2325        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2326
2327        // Put a public key and a group-A key
2328        let sd_none = make_stash_dispatcher(stash.clone(), None);
2329        let code1 = r#"async () => {
2330            await forge.stash.put("public-key", "pub");
2331            return "ok";
2332        }"#;
2333        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_none))
2334            .await
2335            .unwrap();
2336
2337        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
2338        let code2 = r#"async () => {
2339            await forge.stash.put("group-a-key", "secret");
2340            return "ok";
2341        }"#;
2342        exec.execute_code(code2, dispatcher.clone(), None, Some(sd_a))
2343            .await
2344            .unwrap();
2345
2346        // List keys from group-a perspective: should see both
2347        let sd_a2 = make_stash_dispatcher(stash.clone(), Some("group-a"));
2348        let code3 = r#"async () => {
2349            const k = await forge.stash.keys();
2350            k.sort();
2351            return k;
2352        }"#;
2353        let result = exec
2354            .execute_code(code3, dispatcher.clone(), None, Some(sd_a2))
2355            .await
2356            .unwrap();
2357        let keys = result.as_array().unwrap();
2358        assert_eq!(keys.len(), 2);
2359
2360        // List keys from ungrouped: should only see public
2361        let sd_none2 = make_stash_dispatcher(stash, None);
2362        let code4 = r#"async () => {
2363            const k = await forge.stash.keys();
2364            return k;
2365        }"#;
2366        let result2 = exec
2367            .execute_code(code4, dispatcher, None, Some(sd_none2))
2368            .await
2369            .unwrap();
2370        let keys2 = result2.as_array().unwrap();
2371        assert_eq!(keys2.len(), 1);
2372        assert_eq!(keys2[0], "public-key");
2373    }
2374
2375    // --- Security Tests ---
2376
2377    // --- ST-S01: Stash key with path traversal characters rejected ---
2378    #[tokio::test]
2379    async fn st_s01_stash_key_path_traversal_rejected() {
2380        let exec = executor();
2381        let stash = make_stash(crate::stash::StashConfig::default());
2382        let sd = make_stash_dispatcher(stash, None);
2383        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2384
2385        let code = r#"async () => {
2386            try {
2387                await forge.stash.put("../../etc/passwd", "evil");
2388                return "should not reach here";
2389            } catch(e) {
2390                return e.message;
2391            }
2392        }"#;
2393        let result = exec
2394            .execute_code(code, dispatcher, None, Some(sd))
2395            .await
2396            .unwrap();
2397        assert!(
2398            result.as_str().unwrap().contains("invalid"),
2399            "expected invalid key error, got: {result:?}"
2400        );
2401    }
2402
2403    // --- ST-S02: Stash key with script injection (<script>) rejected ---
2404    #[tokio::test]
2405    async fn st_s02_stash_key_script_injection_rejected() {
2406        let exec = executor();
2407        let stash = make_stash(crate::stash::StashConfig::default());
2408        let sd = make_stash_dispatcher(stash, None);
2409        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2410
2411        let code = r#"async () => {
2412            try {
2413                await forge.stash.put("<script>alert(1)</script>", "evil");
2414                return "should not reach here";
2415            } catch(e) {
2416                return e.message;
2417            }
2418        }"#;
2419        let result = exec
2420            .execute_code(code, dispatcher, None, Some(sd))
2421            .await
2422            .unwrap();
2423        assert!(
2424            result.as_str().unwrap().contains("invalid"),
2425            "expected invalid key error, got: {result:?}"
2426        );
2427    }
2428
2429    // --- ST-S03: Stash value containing JS code stored as inert data ---
2430    #[tokio::test]
2431    async fn st_s03_stash_value_js_code_is_inert() {
2432        let exec = executor();
2433        let stash = make_stash(crate::stash::StashConfig::default());
2434        let sd = make_stash_dispatcher(stash, None);
2435        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2436
2437        // Store a string that looks like executable JS code.
2438        // We build it from parts to avoid triggering the banned-pattern validator.
2439        let code = r#"async () => {
2440            const part1 = "function() { return ";
2441            const part2 = "globalThis.secret; }";
2442            const malicious = part1 + part2;
2443            await forge.stash.put("code-value", malicious);
2444            const v = await forge.stash.get("code-value");
2445            // The value should be a plain string, not executed
2446            return typeof v === "string" && v.includes("globalThis");
2447        }"#;
2448        let result = exec
2449            .execute_code(code, dispatcher, None, Some(sd))
2450            .await
2451            .unwrap();
2452        assert_eq!(result, true, "JS code in stash values should be inert data");
2453    }
2454
2455    // --- ST-S04: Stash put from group A, get from group B → error ---
2456    #[tokio::test]
2457    async fn st_s04_stash_cross_group_get_error() {
2458        let exec = executor();
2459        let stash = make_stash(crate::stash::StashConfig::default());
2460        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2461
2462        // Put with group A
2463        let sd_a = make_stash_dispatcher(stash.clone(), Some("team-alpha"));
2464        let code1 = r#"async () => {
2465            await forge.stash.put("alpha-secret", "classified");
2466            return "stored";
2467        }"#;
2468        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2469            .await
2470            .unwrap();
2471
2472        // Get with group B should error
2473        let sd_b = make_stash_dispatcher(stash, Some("team-beta"));
2474        let code2 = r#"async () => {
2475            try {
2476                await forge.stash.get("alpha-secret");
2477                return "leaked";
2478            } catch(e) {
2479                return e.message;
2480            }
2481        }"#;
2482        let result = exec
2483            .execute_code(code2, dispatcher, None, Some(sd_b))
2484            .await
2485            .unwrap();
2486        assert!(
2487            result.as_str().unwrap().contains("cross-group"),
2488            "expected cross-group error, got: {result:?}"
2489        );
2490    }
2491
2492    // --- ST-S05: Stash put from group A, get from ungrouped → error ---
2493    #[tokio::test]
2494    async fn st_s05_stash_grouped_entry_inaccessible_to_ungrouped() {
2495        let exec = executor();
2496        let stash = make_stash(crate::stash::StashConfig::default());
2497        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2498
2499        // Put with group A
2500        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-x"));
2501        let code1 = r#"async () => {
2502            await forge.stash.put("gx-data", 999);
2503            return "stored";
2504        }"#;
2505        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2506            .await
2507            .unwrap();
2508
2509        // Get from ungrouped should error
2510        let sd_none = make_stash_dispatcher(stash, None);
2511        let code2 = r#"async () => {
2512            try {
2513                await forge.stash.get("gx-data");
2514                return "leaked";
2515            } catch(e) {
2516                return e.message;
2517            }
2518        }"#;
2519        let result = exec
2520            .execute_code(code2, dispatcher, None, Some(sd_none))
2521            .await
2522            .unwrap();
2523        assert!(
2524            result.as_str().unwrap().contains("cross-group"),
2525            "expected cross-group error, got: {result:?}"
2526        );
2527    }
2528
2529    // --- ST-S06: Stash total size limit prevents OOM (many puts) ---
2530    #[tokio::test]
2531    async fn st_s06_stash_total_size_limit_prevents_oom() {
2532        let exec = executor();
2533        let stash = make_stash(crate::stash::StashConfig {
2534            max_total_size: 200,
2535            max_value_size: 1024,
2536            max_keys: 1000,
2537            ..Default::default()
2538        });
2539        let sd = make_stash_dispatcher(stash, None);
2540        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2541
2542        let code = r#"async () => {
2543            let count = 0;
2544            for (let i = 0; i < 100; i++) {
2545                try {
2546                    await forge.stash.put("k" + i, "x".repeat(50));
2547                    count++;
2548                } catch(e) {
2549                    return { count, error: e.message };
2550                }
2551            }
2552            return { count, error: null };
2553        }"#;
2554        let result = exec
2555            .execute_code(code, dispatcher, None, Some(sd))
2556            .await
2557            .unwrap();
2558        // Should have been stopped before 100 puts due to total_size=200
2559        let count = result["count"].as_i64().unwrap();
2560        assert!(
2561            count < 100,
2562            "total size limit should prevent all 100 puts, but {count} succeeded"
2563        );
2564        assert!(
2565            result["error"].as_str().unwrap().contains("total size"),
2566            "expected total size error, got: {:?}",
2567            result["error"]
2568        );
2569    }
2570
2571    // --- ST-S07: Stash ops in search() mode blocked ---
2572    #[tokio::test]
2573    async fn st_s07_stash_ops_blocked_in_search_mode() {
2574        let exec = executor();
2575        let manifest = serde_json::json!({"servers": []});
2576
2577        // In search mode, forge.stash should not exist
2578        let code = r#"async () => {
2579            return typeof forge.stash;
2580        }"#;
2581
2582        let result = exec.execute_search(code, &manifest).await.unwrap();
2583        assert_eq!(result, "undefined", "stash should not exist in search mode");
2584    }
2585
2586    // --- ST-S09: Error messages from stash ops don't leak other keys/values ---
2587    #[tokio::test]
2588    async fn st_s09_stash_error_messages_dont_leak_data() {
2589        let exec = executor();
2590        let stash = make_stash(crate::stash::StashConfig::default());
2591        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2592
2593        // Put a secret value with group-a
2594        let sd_a = make_stash_dispatcher(stash.clone(), Some("group-a"));
2595        let code1 = r#"async () => {
2596            await forge.stash.put("secret-key", "top-secret-value-12345");
2597            return "stored";
2598        }"#;
2599        exec.execute_code(code1, dispatcher.clone(), None, Some(sd_a))
2600            .await
2601            .unwrap();
2602
2603        // Try to access from group-b — error should not contain the value
2604        let sd_b = make_stash_dispatcher(stash, Some("group-b"));
2605        let code2 = r#"async () => {
2606            try {
2607                await forge.stash.get("secret-key");
2608                return "should not reach here";
2609            } catch(e) {
2610                return e.message;
2611            }
2612        }"#;
2613        let result = exec
2614            .execute_code(code2, dispatcher, None, Some(sd_b))
2615            .await
2616            .unwrap();
2617        let msg = result.as_str().unwrap();
2618        assert!(
2619            !msg.contains("top-secret-value-12345"),
2620            "error should not leak value: {msg}"
2621        );
2622        assert!(
2623            !msg.contains("secret-key"),
2624            "error should not leak key names: {msg}"
2625        );
2626    }
2627
2628    // --- ST-S10: TTL expiry enforced ---
2629    #[tokio::test]
2630    async fn st_s10_stash_ttl_expiry_enforced() {
2631        let exec = executor();
2632        let stash = make_stash(crate::stash::StashConfig::default());
2633        let sd = make_stash_dispatcher(stash.clone(), None);
2634        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2635
2636        // Put with 1-second TTL
2637        let code1 = r#"async () => {
2638            await forge.stash.put("ttl-key", "ephemeral", {ttl: 1});
2639            const v = await forge.stash.get("ttl-key");
2640            return v;
2641        }"#;
2642        let result1 = exec
2643            .execute_code(code1, dispatcher.clone(), None, Some(sd))
2644            .await
2645            .unwrap();
2646        assert_eq!(result1, "ephemeral", "should be readable immediately");
2647
2648        // Wait for TTL to expire
2649        tokio::time::sleep(std::time::Duration::from_millis(1100)).await;
2650
2651        // Get after expiry should return null
2652        let sd2 = make_stash_dispatcher(stash, None);
2653        let code2 = r#"async () => {
2654            const v = await forge.stash.get("ttl-key");
2655            return v;
2656        }"#;
2657        let result2 = exec
2658            .execute_code(code2, dispatcher, None, Some(sd2))
2659            .await
2660            .unwrap();
2661        assert_eq!(
2662            result2,
2663            serde_json::Value::Null,
2664            "expired key should return null"
2665        );
2666    }
2667
2668    // =========================================================================
2669    // Phase 7: forge.parallel() tests (PL-U01..PL-U09, PL-S01..PL-S05)
2670    // =========================================================================
2671
2672    // --- PL-U01: parallel with 3 successful calls returns all results ---
2673    #[tokio::test]
2674    async fn pl_u01_parallel_three_successful_calls() {
2675        let exec = executor();
2676        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2677
2678        let code = r#"async () => {
2679            const result = await forge.parallel([
2680                () => forge.callTool("s1", "t1", { id: 1 }),
2681                () => forge.callTool("s2", "t2", { id: 2 }),
2682                () => forge.callTool("s3", "t3", { id: 3 }),
2683            ]);
2684            return result;
2685        }"#;
2686
2687        let result = exec
2688            .execute_code(code, dispatcher, None, None)
2689            .await
2690            .unwrap();
2691        let results = result["results"].as_array().unwrap();
2692        assert_eq!(results.len(), 3);
2693        assert_eq!(results[0]["server"], "s1");
2694        assert_eq!(results[1]["server"], "s2");
2695        assert_eq!(results[2]["server"], "s3");
2696        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2697        assert_eq!(result["aborted"], false);
2698    }
2699
2700    // --- PL-U02: parallel with 1 failure returns partial results + error ---
2701    #[tokio::test]
2702    async fn pl_u02_parallel_partial_failure() {
2703        struct PartialFailDispatcher;
2704
2705        #[async_trait::async_trait]
2706        impl ToolDispatcher for PartialFailDispatcher {
2707            async fn call_tool(
2708                &self,
2709                _server: &str,
2710                tool: &str,
2711                _args: serde_json::Value,
2712            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2713                if tool == "fail" {
2714                    Err(anyhow::anyhow!("deliberate failure").into())
2715                } else {
2716                    Ok(serde_json::json!({"tool": tool, "ok": true}))
2717                }
2718            }
2719        }
2720
2721        let exec = executor();
2722        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(PartialFailDispatcher);
2723
2724        let code = r#"async () => {
2725            return await forge.parallel([
2726                () => forge.callTool("s", "ok1", {}),
2727                () => forge.callTool("s", "fail", {}),
2728                () => forge.callTool("s", "ok2", {}),
2729            ]);
2730        }"#;
2731
2732        let result = exec
2733            .execute_code(code, dispatcher, None, None)
2734            .await
2735            .unwrap();
2736        let results = result["results"].as_array().unwrap();
2737        assert!(results[0]["ok"] == true);
2738        assert!(results[1].is_null(), "failed call should have null result");
2739        assert!(results[2]["ok"] == true);
2740        let errors = result["errors"].as_array().unwrap();
2741        assert_eq!(errors.len(), 1);
2742        assert_eq!(errors[0]["index"], 1);
2743    }
2744
2745    // --- PL-U03: parallel with failFast aborts on first error ---
2746    #[tokio::test]
2747    async fn pl_u03_parallel_fail_fast() {
2748        let exec = SandboxExecutor::new(SandboxConfig {
2749            max_tool_calls: 50,
2750            max_parallel: 2, // batch size 2
2751            ..Default::default()
2752        });
2753
2754        struct FailOnSecondDispatcher {
2755            calls: std::sync::Mutex<u32>,
2756        }
2757
2758        #[async_trait::async_trait]
2759        impl ToolDispatcher for FailOnSecondDispatcher {
2760            async fn call_tool(
2761                &self,
2762                _server: &str,
2763                tool: &str,
2764                _args: serde_json::Value,
2765            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2766                let mut c = self.calls.lock().unwrap();
2767                *c += 1;
2768                if tool == "fail" {
2769                    Err(anyhow::anyhow!("fail").into())
2770                } else {
2771                    Ok(serde_json::json!({"ok": true}))
2772                }
2773            }
2774        }
2775
2776        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(FailOnSecondDispatcher {
2777            calls: std::sync::Mutex::new(0),
2778        });
2779
2780        // 4 calls with batch=2. Second call in first batch fails, so second batch should be skipped
2781        let code = r#"async () => {
2782            return await forge.parallel([
2783                () => forge.callTool("s", "ok", {}),
2784                () => forge.callTool("s", "fail", {}),
2785                () => forge.callTool("s", "ok", {}),
2786                () => forge.callTool("s", "ok", {}),
2787            ], { failFast: true });
2788        }"#;
2789
2790        let result = exec
2791            .execute_code(code, dispatcher, None, None)
2792            .await
2793            .unwrap();
2794        assert_eq!(result["aborted"], true);
2795        assert!(!result["errors"].as_array().unwrap().is_empty());
2796    }
2797
2798    // --- PL-U04: parallel respects concurrency limit ---
2799    #[tokio::test]
2800    async fn pl_u04_parallel_respects_concurrency_limit() {
2801        let exec = SandboxExecutor::new(SandboxConfig {
2802            max_parallel: 2,
2803            timeout: Duration::from_secs(10),
2804            ..Default::default()
2805        });
2806
2807        struct ConcurrencyTracker {
2808            current: std::sync::atomic::AtomicUsize,
2809            peak: std::sync::atomic::AtomicUsize,
2810        }
2811
2812        #[async_trait::async_trait]
2813        impl ToolDispatcher for ConcurrencyTracker {
2814            async fn call_tool(
2815                &self,
2816                _server: &str,
2817                _tool: &str,
2818                _args: serde_json::Value,
2819            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2820                let c = self
2821                    .current
2822                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
2823                    + 1;
2824                // Update peak
2825                self.peak.fetch_max(c, std::sync::atomic::Ordering::SeqCst);
2826                // Small delay to let concurrent calls overlap
2827                tokio::time::sleep(Duration::from_millis(10)).await;
2828                self.current
2829                    .fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
2830                Ok(serde_json::json!({"peak": self.peak.load(std::sync::atomic::Ordering::SeqCst)}))
2831            }
2832        }
2833
2834        let tracker = Arc::new(ConcurrencyTracker {
2835            current: std::sync::atomic::AtomicUsize::new(0),
2836            peak: std::sync::atomic::AtomicUsize::new(0),
2837        });
2838        let dispatcher: Arc<dyn ToolDispatcher> = tracker.clone();
2839
2840        // 6 calls with max_parallel=2
2841        let code = r#"async () => {
2842            return await forge.parallel([
2843                () => forge.callTool("s", "t", {}),
2844                () => forge.callTool("s", "t", {}),
2845                () => forge.callTool("s", "t", {}),
2846                () => forge.callTool("s", "t", {}),
2847                () => forge.callTool("s", "t", {}),
2848                () => forge.callTool("s", "t", {}),
2849            ]);
2850        }"#;
2851
2852        let result = exec
2853            .execute_code(code, dispatcher, None, None)
2854            .await
2855            .unwrap();
2856        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2857        let peak = tracker.peak.load(std::sync::atomic::Ordering::SeqCst);
2858        assert!(peak <= 2, "peak concurrency should be <= 2, was: {peak}");
2859    }
2860
2861    // --- PL-U05: parallel with empty array ---
2862    #[tokio::test]
2863    async fn pl_u05_parallel_empty_array() {
2864        let exec = executor();
2865        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2866
2867        let code = r#"async () => {
2868            return await forge.parallel([]);
2869        }"#;
2870
2871        let result = exec
2872            .execute_code(code, dispatcher, None, None)
2873            .await
2874            .unwrap();
2875        assert_eq!(result["results"].as_array().unwrap().len(), 0);
2876        assert_eq!(result["errors"].as_array().unwrap().len(), 0);
2877        assert_eq!(result["aborted"], false);
2878    }
2879
2880    // --- PL-U06: parallel with single call ---
2881    #[tokio::test]
2882    async fn pl_u06_parallel_single_call() {
2883        let exec = executor();
2884        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2885
2886        let code = r#"async () => {
2887            return await forge.parallel([
2888                () => forge.callTool("s", "t", { id: 1 }),
2889            ]);
2890        }"#;
2891
2892        let result = exec
2893            .execute_code(code, dispatcher, None, None)
2894            .await
2895            .unwrap();
2896        let results = result["results"].as_array().unwrap();
2897        assert_eq!(results.len(), 1);
2898        assert_eq!(results[0]["server"], "s");
2899    }
2900
2901    // --- PL-U07: parallel errors contain redacted messages ---
2902    #[tokio::test]
2903    async fn pl_u07_parallel_errors_redacted() {
2904        struct LeakyDispatcher;
2905
2906        #[async_trait::async_trait]
2907        impl ToolDispatcher for LeakyDispatcher {
2908            async fn call_tool(
2909                &self,
2910                _server: &str,
2911                _tool: &str,
2912                _args: serde_json::Value,
2913            ) -> Result<serde_json::Value, forge_error::DispatchError> {
2914                Err(anyhow::anyhow!("connection to http://internal.secret:9999/api failed").into())
2915            }
2916        }
2917
2918        let exec = executor();
2919        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(LeakyDispatcher);
2920
2921        let code = r#"async () => {
2922            return await forge.parallel([
2923                () => forge.callTool("server", "tool", {}),
2924            ]);
2925        }"#;
2926
2927        let result = exec
2928            .execute_code(code, dispatcher, None, None)
2929            .await
2930            .unwrap();
2931        let errors = result["errors"].as_array().unwrap();
2932        assert_eq!(errors.len(), 1);
2933        let msg = errors[0]["error"].as_str().unwrap();
2934        assert!(!msg.contains("internal.secret"), "should redact URL: {msg}");
2935    }
2936
2937    // --- PL-U08: parallel combined with readResource ---
2938    #[tokio::test]
2939    async fn pl_u08_parallel_with_read_resource() {
2940        let exec = executor();
2941        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2942        let resource_dispatcher: Option<Arc<dyn ResourceDispatcher>> =
2943            Some(Arc::new(TestResourceDispatcher));
2944
2945        let code = r#"async () => {
2946            return await forge.parallel([
2947                () => forge.callTool("s", "t", {}),
2948                () => forge.readResource("rs", "file:///log"),
2949            ]);
2950        }"#;
2951
2952        let result = exec
2953            .execute_code(code, dispatcher, resource_dispatcher, None)
2954            .await
2955            .unwrap();
2956        let results = result["results"].as_array().unwrap();
2957        assert_eq!(results.len(), 2);
2958        assert_eq!(results[0]["server"], "s");
2959        assert_eq!(results[1]["server"], "rs");
2960    }
2961
2962    // --- PL-U09: parallel exceeding max_tool_calls ---
2963    #[tokio::test]
2964    async fn pl_u09_parallel_exceeds_rate_limit() {
2965        let exec = SandboxExecutor::new(SandboxConfig {
2966            max_tool_calls: 3,
2967            ..Default::default()
2968        });
2969        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
2970
2971        let code = r#"async () => {
2972            return await forge.parallel([
2973                () => forge.callTool("s", "t1", {}),
2974                () => forge.callTool("s", "t2", {}),
2975                () => forge.callTool("s", "t3", {}),
2976                () => forge.callTool("s", "t4", {}),
2977                () => forge.callTool("s", "t5", {}),
2978            ]);
2979        }"#;
2980
2981        let result = exec
2982            .execute_code(code, dispatcher, None, None)
2983            .await
2984            .unwrap();
2985        // First 3 should succeed, remaining should error
2986        let errors = result["errors"].as_array().unwrap();
2987        assert!(!errors.is_empty(), "should have errors from rate limiting");
2988        // At least some results should be non-null
2989        let results = result["results"].as_array().unwrap();
2990        let successes = results.iter().filter(|r| !r.is_null()).count();
2991        assert_eq!(successes, 3, "should have exactly 3 successful calls");
2992    }
2993
2994    // --- PL-S01: cannot exceed __MAX_PARALLEL even with high concurrency opt ---
2995    #[tokio::test]
2996    async fn pl_s01_cannot_exceed_max_parallel() {
2997        let exec = SandboxExecutor::new(SandboxConfig {
2998            max_parallel: 2,
2999            timeout: Duration::from_secs(10),
3000            ..Default::default()
3001        });
3002
3003        struct ConcurrencyCounter {
3004            peak: std::sync::atomic::AtomicUsize,
3005            current: std::sync::atomic::AtomicUsize,
3006        }
3007
3008        #[async_trait::async_trait]
3009        impl ToolDispatcher for ConcurrencyCounter {
3010            async fn call_tool(
3011                &self,
3012                _server: &str,
3013                _tool: &str,
3014                _args: serde_json::Value,
3015            ) -> Result<serde_json::Value, forge_error::DispatchError> {
3016                let c = self
3017                    .current
3018                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
3019                    + 1;
3020                self.peak.fetch_max(c, std::sync::atomic::Ordering::SeqCst);
3021                tokio::time::sleep(Duration::from_millis(10)).await;
3022                self.current
3023                    .fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
3024                Ok(serde_json::json!({}))
3025            }
3026        }
3027
3028        let counter = Arc::new(ConcurrencyCounter {
3029            peak: std::sync::atomic::AtomicUsize::new(0),
3030            current: std::sync::atomic::AtomicUsize::new(0),
3031        });
3032        let dispatcher: Arc<dyn ToolDispatcher> = counter.clone();
3033
3034        // Request concurrency=9999 but max_parallel=2
3035        let code = r#"async () => {
3036            return await forge.parallel([
3037                () => forge.callTool("s", "t", {}),
3038                () => forge.callTool("s", "t", {}),
3039                () => forge.callTool("s", "t", {}),
3040                () => forge.callTool("s", "t", {}),
3041            ], { concurrency: 9999 });
3042        }"#;
3043
3044        let _ = exec
3045            .execute_code(code, dispatcher, None, None)
3046            .await
3047            .unwrap();
3048        let peak = counter.peak.load(std::sync::atomic::Ordering::SeqCst);
3049        assert!(
3050            peak <= 2,
3051            "peak should be capped at max_parallel=2, was: {peak}"
3052        );
3053    }
3054
3055    // --- PL-S02: parallel calls to mixed strict groups ---
3056    #[tokio::test]
3057    async fn pl_s02_parallel_mixed_strict_groups() {
3058        use crate::groups::{GroupEnforcingDispatcher, GroupPolicy};
3059        use std::collections::HashMap;
3060
3061        let mut groups = HashMap::new();
3062        groups.insert(
3063            "internal".to_string(),
3064            (vec!["vault".to_string()], "strict".to_string()),
3065        );
3066        groups.insert(
3067            "external".to_string(),
3068            (vec!["slack".to_string()], "strict".to_string()),
3069        );
3070        let policy = Arc::new(GroupPolicy::from_config(&groups));
3071        let inner: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3072        let enforcer = GroupEnforcingDispatcher::new(inner, policy);
3073        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(enforcer);
3074
3075        let exec = executor();
3076
3077        // Parallel calls: first locks to "internal", second to "external" should fail
3078        let code = r#"async () => {
3079            return await forge.parallel([
3080                () => forge.callTool("vault", "secrets.list", {}),
3081                () => forge.callTool("slack", "messages.send", {}),
3082            ]);
3083        }"#;
3084
3085        let result = exec
3086            .execute_code(code, dispatcher, None, None)
3087            .await
3088            .unwrap();
3089        let errors = result["errors"].as_array().unwrap();
3090        // At least one should fail with cross-group error
3091        assert!(
3092            !errors.is_empty(),
3093            "should have cross-group error: {result:?}"
3094        );
3095        let has_cross_group = errors
3096            .iter()
3097            .any(|e| e["error"].as_str().unwrap_or("").contains("cross-group"));
3098        assert!(has_cross_group, "should mention cross-group: {result:?}");
3099    }
3100
3101    // --- PL-S03: 500 parallel calls hits rate limit ---
3102    #[tokio::test]
3103    async fn pl_s03_many_parallel_calls_hit_rate_limit() {
3104        let exec = SandboxExecutor::new(SandboxConfig {
3105            max_tool_calls: 10,
3106            ..Default::default()
3107        });
3108        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3109
3110        let code = r#"async () => {
3111            const calls = [];
3112            for (let i = 0; i < 100; i++) {
3113                calls.push(() => forge.callTool("s", "t", { i }));
3114            }
3115            return await forge.parallel(calls);
3116        }"#;
3117
3118        let result = exec
3119            .execute_code(code, dispatcher, None, None)
3120            .await
3121            .unwrap();
3122        let errors = result["errors"].as_array().unwrap();
3123        let results = result["results"].as_array().unwrap();
3124        let successes = results.iter().filter(|r| !r.is_null()).count();
3125        assert_eq!(
3126            successes, 10,
3127            "should have exactly max_tool_calls successes"
3128        );
3129        assert_eq!(errors.len(), 90, "remaining 90 should be rate limited");
3130    }
3131
3132    // --- PL-S04: __MAX_PARALLEL is not modifiable ---
3133    #[tokio::test]
3134    async fn pl_s04_max_parallel_not_modifiable() {
3135        let exec = SandboxExecutor::new(SandboxConfig {
3136            max_parallel: 3,
3137            ..Default::default()
3138        });
3139        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3140
3141        // Attempt to modify the frozen constant — should fail silently or throw
3142        let code = r#"async () => {
3143            try {
3144                // __MAX_PARALLEL is a local const in the bootstrap closure,
3145                // not accessible from user code. Attempting to use it would fail.
3146                return typeof __MAX_PARALLEL;
3147            } catch(e) {
3148                return "error";
3149            }
3150        }"#;
3151
3152        let result = exec
3153            .execute_code(code, dispatcher, None, None)
3154            .await
3155            .unwrap();
3156        // __MAX_PARALLEL is scoped inside the IIFE, not visible to user code
3157        assert_eq!(
3158            result, "undefined",
3159            "__MAX_PARALLEL should not be accessible"
3160        );
3161    }
3162
3163    // --- PL-S05: raw Promise.all still hits rate limit ---
3164    #[tokio::test]
3165    async fn pl_s05_raw_promise_all_hits_rate_limit() {
3166        let exec = SandboxExecutor::new(SandboxConfig {
3167            max_tool_calls: 3,
3168            ..Default::default()
3169        });
3170        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3171
3172        // Bypass forge.parallel() and use raw Promise.allSettled
3173        let code = r#"async () => {
3174            const results = await Promise.allSettled([
3175                forge.callTool("s", "t1", {}),
3176                forge.callTool("s", "t2", {}),
3177                forge.callTool("s", "t3", {}),
3178                forge.callTool("s", "t4", {}),
3179                forge.callTool("s", "t5", {}),
3180            ]);
3181            const fulfilled = results.filter(r => r.status === "fulfilled").length;
3182            const rejected = results.filter(r => r.status === "rejected").length;
3183            return { fulfilled, rejected };
3184        }"#;
3185
3186        let result = exec
3187            .execute_code(code, dispatcher, None, None)
3188            .await
3189            .unwrap();
3190        assert_eq!(result["fulfilled"], 3, "should have 3 successful calls");
3191        assert_eq!(result["rejected"], 2, "should have 2 rate-limited calls");
3192    }
3193
3194    // =========================================================================
3195    // Phase 8: Bootstrap + Invariant Tests (BS-01..BS-12, INV-01..INV-10)
3196    // =========================================================================
3197
3198    // --- BS-01: forge object is frozen ---
3199    #[tokio::test]
3200    async fn bs_01_forge_object_is_frozen() {
3201        let exec = executor();
3202        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3203        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3204        let stash_store = make_stash(Default::default());
3205        let stash = make_stash_dispatcher(stash_store, None);
3206
3207        let code = r#"async () => {
3208            return Object.isFrozen(forge);
3209        }"#;
3210
3211        let result = exec
3212            .execute_code(code, dispatcher, Some(resource), Some(stash))
3213            .await
3214            .unwrap();
3215        assert_eq!(result, true, "forge object must be frozen");
3216    }
3217
3218    // --- BS-02: forge.stash is frozen ---
3219    #[tokio::test]
3220    async fn bs_02_forge_stash_is_frozen() {
3221        let exec = executor();
3222        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3223        let stash_store = make_stash(Default::default());
3224        let stash = make_stash_dispatcher(stash_store, None);
3225
3226        let code = r#"async () => {
3227            return Object.isFrozen(forge.stash);
3228        }"#;
3229
3230        let result = exec
3231            .execute_code(code, dispatcher, None, Some(stash))
3232            .await
3233            .unwrap();
3234        assert_eq!(result, true, "forge.stash must be frozen");
3235    }
3236
3237    // --- BS-03: __MAX_PARALLEL is not accessible from user code as a global ---
3238    #[tokio::test]
3239    async fn bs_03_max_parallel_not_accessible_as_global() {
3240        let exec = executor();
3241        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3242
3243        let code = r#"async () => {
3244            return {
3245                global: typeof globalThis.__MAX_PARALLEL,
3246                direct: typeof __MAX_PARALLEL,
3247            };
3248        }"#;
3249
3250        let result = exec
3251            .execute_code(code, dispatcher, None, None)
3252            .await
3253            .unwrap();
3254        assert_eq!(
3255            result["global"], "undefined",
3256            "__MAX_PARALLEL must not be on globalThis"
3257        );
3258        // __MAX_PARALLEL is a local const inside the bootstrap IIFE,
3259        // so direct access from user code (different scope) should fail.
3260        // User code runs in a separate eval context, so it shouldn't see the IIFE local.
3261        assert_eq!(
3262            result["direct"], "undefined",
3263            "__MAX_PARALLEL must not be accessible from user scope"
3264        );
3265    }
3266
3267    // --- BS-04: forge.readResource is a function in execute mode ---
3268    #[tokio::test]
3269    async fn bs_04_read_resource_is_function_in_execute_mode() {
3270        let exec = executor();
3271        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3272        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3273
3274        let code = r#"async () => {
3275            return typeof forge.readResource;
3276        }"#;
3277
3278        let result = exec
3279            .execute_code(code, dispatcher, Some(resource), None)
3280            .await
3281            .unwrap();
3282        assert_eq!(result, "function", "forge.readResource must be a function");
3283    }
3284
3285    // --- BS-05: forge.readResource is undefined in search mode ---
3286    #[tokio::test]
3287    async fn bs_05_read_resource_undefined_in_search_mode() {
3288        let exec = executor();
3289        let manifest = serde_json::json!({"servers": []});
3290
3291        let code = r#"async () => {
3292            return typeof forge.readResource;
3293        }"#;
3294
3295        let result = exec.execute_search(code, &manifest).await.unwrap();
3296        assert_eq!(
3297            result, "undefined",
3298            "forge.readResource must be undefined in search mode"
3299        );
3300    }
3301
3302    // --- BS-06: forge.stash has put/get/delete/keys in execute mode ---
3303    #[tokio::test]
3304    async fn bs_06_stash_has_all_methods_in_execute_mode() {
3305        let exec = executor();
3306        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3307        let stash_store = make_stash(Default::default());
3308        let stash = make_stash_dispatcher(stash_store, None);
3309
3310        let code = r#"async () => {
3311            return {
3312                type: typeof forge.stash,
3313                put: typeof forge.stash.put,
3314                get: typeof forge.stash.get,
3315                del: typeof forge.stash.delete,
3316                keys: typeof forge.stash.keys,
3317            };
3318        }"#;
3319
3320        let result = exec
3321            .execute_code(code, dispatcher, None, Some(stash))
3322            .await
3323            .unwrap();
3324        assert_eq!(result["type"], "object", "forge.stash must be an object");
3325        assert_eq!(result["put"], "function");
3326        assert_eq!(result["get"], "function");
3327        assert_eq!(result["del"], "function");
3328        assert_eq!(result["keys"], "function");
3329    }
3330
3331    // --- BS-07: forge.stash is undefined in search mode ---
3332    #[tokio::test]
3333    async fn bs_07_stash_undefined_in_search_mode() {
3334        let exec = executor();
3335        let manifest = serde_json::json!({"servers": []});
3336
3337        let code = r#"async () => {
3338            return typeof forge.stash;
3339        }"#;
3340
3341        let result = exec.execute_search(code, &manifest).await.unwrap();
3342        assert_eq!(
3343            result, "undefined",
3344            "forge.stash must be undefined in search mode"
3345        );
3346    }
3347
3348    // --- BS-08: forge.parallel is a function in execute mode ---
3349    #[tokio::test]
3350    async fn bs_08_parallel_is_function_in_execute_mode() {
3351        let exec = executor();
3352        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3353
3354        let code = r#"async () => {
3355            return typeof forge.parallel;
3356        }"#;
3357
3358        let result = exec
3359            .execute_code(code, dispatcher, None, None)
3360            .await
3361            .unwrap();
3362        assert_eq!(result, "function", "forge.parallel must be a function");
3363    }
3364
3365    // --- BS-09: forge.parallel is undefined in search mode ---
3366    #[tokio::test]
3367    async fn bs_09_parallel_undefined_in_search_mode() {
3368        let exec = executor();
3369        let manifest = serde_json::json!({"servers": []});
3370
3371        let code = r#"async () => {
3372            return typeof forge.parallel;
3373        }"#;
3374
3375        let result = exec.execute_search(code, &manifest).await.unwrap();
3376        assert_eq!(
3377            result, "undefined",
3378            "forge.parallel must be undefined in search mode"
3379        );
3380    }
3381
3382    // --- BS-10: forge.server("x").cat.tool() still works (Proxy not broken) ---
3383    #[tokio::test]
3384    async fn bs_10_server_proxy_still_works() {
3385        let exec = executor();
3386        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3387        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3388        let stash_store = make_stash(Default::default());
3389        let stash = make_stash_dispatcher(stash_store, None);
3390
3391        let code = r#"async () => {
3392            const result = await forge.server("myserver").ast.parse({ file: "test.rs" });
3393            return result;
3394        }"#;
3395
3396        let result = exec
3397            .execute_code(code, dispatcher, Some(resource), Some(stash))
3398            .await
3399            .unwrap();
3400        assert_eq!(result["server"], "myserver");
3401        assert_eq!(result["tool"], "ast.parse");
3402        assert_eq!(result["args"]["file"], "test.rs");
3403    }
3404
3405    // --- BS-11: delete globalThis.Deno still happens after new APIs ---
3406    #[tokio::test]
3407    async fn bs_11_deno_deleted_in_execute_mode() {
3408        let exec = executor();
3409        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3410        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3411        let stash_store = make_stash(Default::default());
3412        let stash = make_stash_dispatcher(stash_store, None);
3413
3414        let code = r#"async () => {
3415            return typeof globalThis.Deno;
3416        }"#;
3417
3418        let result = exec
3419            .execute_code(code, dispatcher, Some(resource), Some(stash))
3420            .await
3421            .unwrap();
3422        assert_eq!(result, "undefined", "Deno must be deleted in execute mode");
3423    }
3424
3425    // --- BS-12: Function.prototype.constructor is still undefined ---
3426    #[tokio::test]
3427    async fn bs_12_function_constructor_undefined_in_execute_mode() {
3428        let exec = executor();
3429        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3430        let resource: Arc<dyn ResourceDispatcher> = Arc::new(TestResourceDispatcher);
3431        let stash_store = make_stash(Default::default());
3432        let stash = make_stash_dispatcher(stash_store, None);
3433
3434        // After bootstrap, Function.prototype.constructor is undefined.
3435        // Since .constructor is undefined, (async fn).constructor is also undefined,
3436        // so we cannot chain .prototype.constructor — we verify via separate checks.
3437        let code = r#"async () => {
3438            const funcCtor = typeof Function.prototype.constructor;
3439            // AsyncFunction and GeneratorFunction constructors are also wiped
3440            // because they inherit from Function.prototype.
3441            const asyncFn = async function(){};
3442            const genFn = function*(){};
3443            const asyncCtor = typeof asyncFn.constructor;
3444            const genCtor = typeof genFn.constructor;
3445            return { funcCtor, asyncCtor, genCtor };
3446        }"#;
3447
3448        let result = exec
3449            .execute_code(code, dispatcher, Some(resource), Some(stash))
3450            .await
3451            .unwrap();
3452        assert_eq!(
3453            result["funcCtor"], "undefined",
3454            "Function.prototype.constructor must be undefined"
3455        );
3456        assert_eq!(
3457            result["asyncCtor"], "undefined",
3458            "AsyncFunction .constructor must be undefined"
3459        );
3460        assert_eq!(
3461            result["genCtor"], "undefined",
3462            "GeneratorFunction .constructor must be undefined"
3463        );
3464    }
3465
3466    // --- INV-01: search() mode cannot access forge.callTool ---
3467    #[tokio::test]
3468    async fn inv_01_search_mode_no_call_tool() {
3469        let exec = executor();
3470        let manifest = serde_json::json!({"servers": []});
3471
3472        let code = r#"async () => {
3473            return typeof forge.callTool;
3474        }"#;
3475
3476        let result = exec.execute_search(code, &manifest).await.unwrap();
3477        assert_eq!(
3478            result, "undefined",
3479            "forge.callTool must not exist in search mode"
3480        );
3481    }
3482
3483    // --- INV-02: search() mode cannot access forge.readResource ---
3484    #[tokio::test]
3485    async fn inv_02_search_mode_no_read_resource() {
3486        let exec = executor();
3487        let manifest = serde_json::json!({"servers": []});
3488
3489        let code = r#"async () => {
3490            return typeof forge.readResource;
3491        }"#;
3492
3493        let result = exec.execute_search(code, &manifest).await.unwrap();
3494        assert_eq!(
3495            result, "undefined",
3496            "forge.readResource must not exist in search mode"
3497        );
3498    }
3499
3500    // --- INV-03: search() mode cannot access forge.stash ---
3501    #[tokio::test]
3502    async fn inv_03_search_mode_no_stash() {
3503        let exec = executor();
3504        let manifest = serde_json::json!({"servers": []});
3505
3506        let code = r#"async () => {
3507            return typeof forge.stash;
3508        }"#;
3509
3510        let result = exec.execute_search(code, &manifest).await.unwrap();
3511        assert_eq!(
3512            result, "undefined",
3513            "forge.stash must not exist in search mode"
3514        );
3515    }
3516
3517    // --- INV-04: search() mode cannot access forge.parallel ---
3518    #[tokio::test]
3519    async fn inv_04_search_mode_no_parallel() {
3520        let exec = executor();
3521        let manifest = serde_json::json!({"servers": []});
3522
3523        let code = r#"async () => {
3524            return typeof forge.parallel;
3525        }"#;
3526
3527        let result = exec.execute_search(code, &manifest).await.unwrap();
3528        assert_eq!(
3529            result, "undefined",
3530            "forge.parallel must not exist in search mode"
3531        );
3532    }
3533
3534    // --- INV-05: eval is undefined in all modes ---
3535    #[tokio::test]
3536    async fn inv_05_eval_undefined_in_all_modes() {
3537        let exec = executor();
3538
3539        // Execute mode
3540        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3541        let code = r#"async () => { return typeof eval; }"#;
3542        let result = exec
3543            .execute_code(code, dispatcher, None, None)
3544            .await
3545            .unwrap();
3546        assert_eq!(
3547            result, "undefined",
3548            "eval must be undefined in execute mode"
3549        );
3550
3551        // Search mode
3552        let manifest = serde_json::json!({"servers": []});
3553        let result = exec.execute_search(code, &manifest).await.unwrap();
3554        assert_eq!(result, "undefined", "eval must be undefined in search mode");
3555    }
3556
3557    // --- INV-06: Function.prototype.constructor is undefined in all modes ---
3558    #[tokio::test]
3559    async fn inv_06_function_constructor_undefined_all_modes() {
3560        let exec = executor();
3561
3562        let code = r#"async () => {
3563            return typeof Function.prototype.constructor;
3564        }"#;
3565
3566        // Execute mode
3567        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3568        let result = exec
3569            .execute_code(code, dispatcher, None, None)
3570            .await
3571            .unwrap();
3572        assert_eq!(
3573            result, "undefined",
3574            "Function.prototype.constructor must be undefined in execute mode"
3575        );
3576
3577        // Search mode
3578        let manifest = serde_json::json!({"servers": []});
3579        let result = exec.execute_search(code, &manifest).await.unwrap();
3580        assert_eq!(
3581            result, "undefined",
3582            "Function.prototype.constructor must be undefined in search mode"
3583        );
3584    }
3585
3586    // --- INV-07: Deno is undefined after bootstrap in all modes ---
3587    #[tokio::test]
3588    async fn inv_07_deno_undefined_all_modes() {
3589        let exec = executor();
3590
3591        let code = r#"async () => { return typeof globalThis.Deno; }"#;
3592
3593        // Execute mode
3594        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3595        let result = exec
3596            .execute_code(code, dispatcher, None, None)
3597            .await
3598            .unwrap();
3599        assert_eq!(
3600            result, "undefined",
3601            "Deno must be undefined in execute mode"
3602        );
3603
3604        // Search mode
3605        let manifest = serde_json::json!({"servers": []});
3606        let result = exec.execute_search(code, &manifest).await.unwrap();
3607        assert_eq!(result, "undefined", "Deno must be undefined in search mode");
3608    }
3609
3610    // --- INV-08: forge object is frozen in all modes ---
3611    #[tokio::test]
3612    async fn inv_08_forge_frozen_all_modes() {
3613        let exec = executor();
3614
3615        let code = r#"async () => { return Object.isFrozen(forge); }"#;
3616
3617        // Execute mode
3618        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3619        let result = exec
3620            .execute_code(code, dispatcher, None, None)
3621            .await
3622            .unwrap();
3623        assert_eq!(result, true, "forge must be frozen in execute mode");
3624
3625        // Search mode
3626        let manifest = serde_json::json!({"servers": []});
3627        let result = exec.execute_search(code, &manifest).await.unwrap();
3628        assert_eq!(result, true, "forge must be frozen in search mode");
3629    }
3630
3631    // --- INV-09: forge.stash object is frozen in execute mode ---
3632    #[tokio::test]
3633    async fn inv_09_stash_frozen_in_execute_mode() {
3634        let exec = executor();
3635        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3636        let stash_store = make_stash(Default::default());
3637        let stash = make_stash_dispatcher(stash_store, None);
3638
3639        // Verify stash is frozen and cannot be modified
3640        let code = r#"async () => {
3641            const frozen = Object.isFrozen(forge.stash);
3642            let mutated = false;
3643            try {
3644                forge.stash.evil = () => {};
3645                mutated = forge.stash.evil !== undefined;
3646            } catch (e) {
3647                // TypeError in strict mode, which is fine
3648            }
3649            return { frozen, mutated };
3650        }"#;
3651
3652        let result = exec
3653            .execute_code(code, dispatcher, None, Some(stash))
3654            .await
3655            .unwrap();
3656        assert_eq!(result["frozen"], true, "forge.stash must be frozen");
3657        assert_eq!(result["mutated"], false, "forge.stash must not be mutable");
3658    }
3659
3660    // --- INV-10: error messages from all new ops pass through redact_error_for_llm ---
3661    #[tokio::test]
3662    async fn inv_10_error_messages_redacted() {
3663        let exec = executor();
3664
3665        // Use a resource dispatcher that fails with a message containing file paths
3666        let failing_resource: Arc<dyn ResourceDispatcher> = Arc::new(FailingResourceDispatcher {
3667            error_msg: "connection refused to /var/secret/db.sock".to_string(),
3668        });
3669        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(TestDispatcher);
3670
3671        // Structured errors are returned as values, not thrown
3672        let code = r#"async () => {
3673            const result = await forge.readResource("secret-server", "file:///data/log.txt");
3674            return result;
3675        }"#;
3676
3677        let result = exec
3678            .execute_code(code, dispatcher, Some(failing_resource), None)
3679            .await
3680            .unwrap();
3681        assert_eq!(
3682            result["error"], true,
3683            "should be structured error: {result}"
3684        );
3685        let error_msg = result["message"].as_str().unwrap();
3686        // Error should be redacted — should not contain raw file paths from the dispatcher
3687        assert!(
3688            !error_msg.contains("/var/secret/db.sock"),
3689            "error must be redacted, got: {error_msg}"
3690        );
3691        // Should mention the server name in a safe way
3692        assert!(
3693            error_msg.contains("secret-server"),
3694            "error should reference server name: {error_msg}"
3695        );
3696    }
3697
3698    // --- Structured error wiring tests (Phase R2) ---
3699
3700    /// Dispatcher that always returns ServerNotFound.
3701    struct ErrorDispatcher;
3702
3703    #[async_trait::async_trait]
3704    impl ToolDispatcher for ErrorDispatcher {
3705        async fn call_tool(
3706            &self,
3707            server: &str,
3708            _tool: &str,
3709            _args: serde_json::Value,
3710        ) -> Result<serde_json::Value, forge_error::DispatchError> {
3711            Err(forge_error::DispatchError::ServerNotFound(
3712                server.to_string(),
3713            ))
3714        }
3715    }
3716
3717    /// Dispatcher that returns ToolNotFound.
3718    struct ToolNotFoundDispatcher;
3719
3720    #[async_trait::async_trait]
3721    impl ToolDispatcher for ToolNotFoundDispatcher {
3722        async fn call_tool(
3723            &self,
3724            server: &str,
3725            tool: &str,
3726            _args: serde_json::Value,
3727        ) -> Result<serde_json::Value, forge_error::DispatchError> {
3728            Err(forge_error::DispatchError::ToolNotFound {
3729                server: server.to_string(),
3730                tool: tool.to_string(),
3731            })
3732        }
3733    }
3734
3735    #[tokio::test]
3736    async fn se_wire_01_tool_call_error_returns_structured_json() {
3737        let exec = executor();
3738        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(ErrorDispatcher);
3739
3740        let code = r#"async () => {
3741            const result = await forge.callTool("bad_server", "bad_tool", {});
3742            return result;
3743        }"#;
3744
3745        let result = exec
3746            .execute_code(code, dispatcher, None, None)
3747            .await
3748            .unwrap();
3749        assert_eq!(result["error"], true, "should be an error: {result}");
3750    }
3751
3752    #[tokio::test]
3753    async fn se_wire_02_structured_error_has_code_field() {
3754        let exec = executor();
3755        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(ErrorDispatcher);
3756
3757        let code = r#"async () => {
3758            const result = await forge.callTool("bad_server", "bad_tool", {});
3759            return result;
3760        }"#;
3761
3762        let result = exec
3763            .execute_code(code, dispatcher, None, None)
3764            .await
3765            .unwrap();
3766        assert_eq!(
3767            result["code"], "SERVER_NOT_FOUND",
3768            "should have code field: {result}"
3769        );
3770    }
3771
3772    #[tokio::test]
3773    async fn se_wire_03_structured_error_has_suggested_fix() {
3774        let exec = executor();
3775        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(ToolNotFoundDispatcher);
3776
3777        // Provide known_tools so fuzzy matching can suggest "find_symbols"
3778        let known_tools = vec![("narsil".to_string(), "find_symbols".to_string())];
3779
3780        let code = r#"async () => {
3781            const result = await forge.callTool("narsil", "fnd_symbols", {});
3782            return result;
3783        }"#;
3784
3785        let result = exec
3786            .execute_code_with_options(code, dispatcher, None, None, None, Some(known_tools))
3787            .await
3788            .unwrap();
3789        assert_eq!(result["code"], "TOOL_NOT_FOUND", "code: {result}");
3790        let fix = result["suggested_fix"]
3791            .as_str()
3792            .expect("should have suggested_fix");
3793        assert!(
3794            fix.contains("find_symbols"),
3795            "should suggest find_symbols, got: {fix}"
3796        );
3797    }
3798
3799    #[tokio::test]
3800    async fn se_wire_04_structured_error_message_is_redacted() {
3801        // Dispatcher that leaks a credential in its error message
3802        struct CredLeakDispatcher;
3803
3804        #[async_trait::async_trait]
3805        impl ToolDispatcher for CredLeakDispatcher {
3806            async fn call_tool(
3807                &self,
3808                server: &str,
3809                _tool: &str,
3810                _args: serde_json::Value,
3811            ) -> Result<serde_json::Value, forge_error::DispatchError> {
3812                Err(forge_error::DispatchError::Upstream {
3813                    server: server.to_string(),
3814                    message: "auth failed with Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxIn0.rg2e at https://internal.corp:9999/api".to_string(),
3815                })
3816            }
3817        }
3818
3819        let exec = executor();
3820        let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(CredLeakDispatcher);
3821
3822        let code = r#"async () => {
3823            const result = await forge.callTool("narsil", "find", {});
3824            return result;
3825        }"#;
3826
3827        let result = exec
3828            .execute_code(code, dispatcher, None, None)
3829            .await
3830            .unwrap();
3831        let msg = result["message"].as_str().expect("should have message");
3832        assert!(!msg.contains("eyJhbGci"), "JWT should be redacted: {msg}");
3833        assert!(
3834            !msg.contains("internal.corp"),
3835            "URL should be redacted: {msg}"
3836        );
3837    }
3838}
forge_sandbox/executor.rs

forge_sandbox/
executor.rs