forge-sandbox 0.6.0

V8 sandbox for executing LLM-generated JavaScript via deno_core
Documentation
//! Security integration tests for the Forge sandbox.
//!
//! These tests verify that security remediations work through the full
//! execution pipeline, not just at the unit level.

use std::sync::Arc;

use forge_sandbox::{SandboxConfig, SandboxExecutor, ToolDispatcher};

/// Stub dispatcher for tests that don't need real tool calls.
struct StubDispatcher;

#[async_trait::async_trait]
impl ToolDispatcher for StubDispatcher {
    async fn call_tool(
        &self,
        _server: &str,
        _tool: &str,
        _args: serde_json::Value,
    ) -> Result<serde_json::Value, forge_error::DispatchError> {
        Ok(serde_json::json!({"status": "ok"}))
    }
}

fn test_executor() -> SandboxExecutor {
    SandboxExecutor::new(SandboxConfig::default())
}

// --- SEC-E2E-01: URL-encoded traversal blocked through full execute pipeline ---
#[tokio::test]
async fn sec_e2e_01_url_encoded_traversal_blocked() {
    use forge_sandbox::validator::validate_code;

    // Verify that code using String.raw to construct traversal URIs is blocked
    let code = r#"async () => { return String.raw`../../../etc/passwd`; }"#;
    let result = validate_code(code, None);
    assert!(result.is_err(), "String.raw should be blocked by validator");

    // Also verify the executor blocks it
    let executor = test_executor();
    let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(StubDispatcher);
    let result = executor.execute_code(code, dispatcher, None, None).await;
    assert!(result.is_err(), "String.raw should be blocked in pipeline");
    let err = result.unwrap_err().to_string();
    assert!(
        err.contains("banned") || err.contains("String.raw"),
        "should mention banned pattern: {err}"
    );
}

// --- SEC-E2E-02: Invalid stash keys blocked through full execute pipeline ---
#[tokio::test]
async fn sec_e2e_02_invalid_stash_keys_blocked() {
    // Validator tests ensure banned patterns are caught before execution.
    // The stash key validation is at the op level, so we test the validator
    // for code that might try to use malicious keys.
    let executor = test_executor();
    let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(StubDispatcher);

    // Valid code that passes validation (to ensure we're not over-blocking)
    let code = r#"async () => { return "valid-stash-key"; }"#;
    let result = executor.execute_code(code, dispatcher, None, None).await;
    assert!(
        result.is_ok(),
        "simple valid code should pass: {:?}",
        result
    );
}

// --- SEC-E2E-03: Redacted errors don't leak AWS keys/JWTs ---
#[tokio::test]
async fn sec_e2e_03_redacted_errors_dont_leak_credentials() {
    use forge_sandbox::redact::redact_error_for_llm;

    // AWS key
    let msg = "auth error: key AKIAIOSFODNN7EXAMPLE rejected";
    let result = redact_error_for_llm("server", "tool", msg);
    assert!(
        !result.contains("AKIAIOSFODNN7"),
        "AWS key leaked: {result}"
    );
    assert!(
        result.contains("[REDACTED]"),
        "should be redacted: {result}"
    );

    // JWT
    let msg = "token eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U expired";
    let result = redact_error_for_llm("server", "tool", msg);
    assert!(!result.contains("eyJhbGci"), "JWT leaked: {result}");

    // GitHub token
    let msg = "invalid token ghp_ABCDEFGHIJKLMNOPQRSTuvwxyz1234";
    let result = redact_error_for_llm("server", "tool", msg);
    assert!(
        !result.contains("ghp_ABCDE"),
        "GitHub token leaked: {result}"
    );
}

// --- SEC-E2E-04: Validator blocks String.raw in full pipeline ---
#[tokio::test]
async fn sec_e2e_04_validator_blocks_string_raw() {
    let executor = test_executor();
    let dispatcher: Arc<dyn ToolDispatcher> = Arc::new(StubDispatcher);

    let code = r#"async () => { return String.raw`template`; }"#;
    let result = executor
        .execute_code(code, dispatcher.clone(), None, None)
        .await;
    assert!(result.is_err(), "String.raw should be blocked");

    // WebAssembly should also be blocked
    let code2 = r#"async () => { return typeof WebAssembly; }"#;
    let result2 = executor
        .execute_code(code2, dispatcher.clone(), None, None)
        .await;
    assert!(result2.is_err(), "WebAssembly should be blocked");

    // Symbol.toPrimitive should also be blocked
    let code3 = r#"async () => { return Symbol.toPrimitive; }"#;
    let result3 = executor.execute_code(code3, dispatcher, None, None).await;
    assert!(result3.is_err(), "Symbol.toPrimitive should be blocked");
}

// --- SEC-E2E-05: IPC message size limit enforced ---
#[tokio::test]
async fn sec_e2e_05_ipc_message_size_limit() {
    use forge_sandbox::ipc::{read_message_with_limit, write_message, ChildMessage};

    // Write a message that exceeds a small limit
    let msg = ChildMessage::Log {
        message: "x".repeat(2048),
    };
    let mut buf = Vec::new();
    write_message(&mut buf, &msg).await.unwrap();

    // Read with a limit smaller than the message
    let mut cursor = std::io::Cursor::new(buf);
    let result: Result<Option<ChildMessage>, _> = read_message_with_limit(&mut cursor, 64).await;
    assert!(result.is_err(), "oversized message should be rejected");
    let err = result.unwrap_err().to_string();
    assert!(
        err.contains("too large"),
        "error should mention size: {err}"
    );

    // Verify the default limit matches the documented default (65 MB = 64 MB
    // resource payload + 1 MB envelope overhead).
    assert_eq!(
        forge_sandbox::ipc::DEFAULT_MAX_IPC_MESSAGE_SIZE,
        65 * 1024 * 1024,
        "default IPC limit should be 65 MB"
    );
}