cortexai-wasm 0.1.0

//! WASM tool sandbox for executing untrusted code via wasmtime.
//!
//! Provides capability-based restrictions: controlled filesystem access,
//! network access, memory limits, and execution timeouts.

use std::sync::Arc;
use std::time::Duration;

use async_trait::async_trait;
use cortexai_core::errors::ToolError;
use cortexai_core::tool::{ExecutionContext, Tool, ToolSchema};
use wasmtime::{Engine, Linker, Module, Store, Trap, Val};

use crate::sandbox_error::SandboxError;

/// Default memory limit: 64 MB
const DEFAULT_MAX_MEMORY_BYTES: usize = 64 * 1024 * 1024;

/// Default execution timeout: 30 seconds
const DEFAULT_MAX_EXECUTION_TIME: Duration = Duration::from_secs(30);

/// Configuration for the WASM tool sandbox.
///
/// Controls resource limits and capability grants for sandboxed modules.
#[derive(Debug, Clone)]
pub struct SandboxConfig {
    max_memory_bytes: usize,
    max_execution_time: Duration,
    allowed_paths: Vec<(String, String)>,
    allow_network: bool,
    max_fuel: Option<u64>,
}

impl SandboxConfig {
    /// Maximum memory in bytes the WASM module may use.
    pub fn max_memory_bytes(&self) -> usize {
        self.max_memory_bytes
    }

    /// Maximum wall-clock execution time.
    pub fn max_execution_time(&self) -> Duration {
        self.max_execution_time
    }

    /// Guest-to-host path mappings for filesystem access.
    pub fn allowed_paths(&self) -> &[(String, String)] {
        &self.allowed_paths
    }

    /// Whether network access is permitted.
    pub fn allow_network(&self) -> bool {
        self.allow_network
    }

    /// Optional wasmtime fuel limit for CPU bounding.
    pub fn max_fuel(&self) -> Option<u64> {
        self.max_fuel
    }

    /// Create a new builder with default values.
    pub fn builder() -> SandboxConfigBuilder {
        SandboxConfigBuilder::default()
    }
}

/// Builder for [`SandboxConfig`].
#[derive(Debug, Clone)]
pub struct SandboxConfigBuilder {
    config: SandboxConfig,
}

impl Default for SandboxConfigBuilder {
    fn default() -> Self {
        Self {
            config: SandboxConfig::default(),
        }
    }
}

impl SandboxConfigBuilder {
    /// Set the maximum memory in bytes.
    pub fn max_memory_bytes(mut self, bytes: usize) -> Self {
        self.config.max_memory_bytes = bytes;
        self
    }

    /// Set the maximum execution time.
    pub fn max_execution_time(mut self, duration: Duration) -> Self {
        self.config.max_execution_time = duration;
        self
    }

    /// Add a guest-to-host path mapping.
    pub fn allowed_path(mut self, guest: String, host: String) -> Self {
        self.config.allowed_paths.push((guest, host));
        self
    }

    /// Set whether network access is allowed.
    pub fn allow_network(mut self, allow: bool) -> Self {
        self.config.allow_network = allow;
        self
    }

    /// Set the wasmtime fuel limit.
    pub fn max_fuel(mut self, fuel: u64) -> Self {
        self.config.max_fuel = Some(fuel);
        self
    }

    /// Build the [`SandboxConfig`].
    pub fn build(self) -> SandboxConfig {
        self.config
    }
}

impl Default for SandboxConfig {
    fn default() -> Self {
        Self {
            max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES,
            max_execution_time: DEFAULT_MAX_EXECUTION_TIME,
            allowed_paths: Vec::new(),
            allow_network: false,
            max_fuel: None,
        }
    }
}

/// A compiled WASM module ready for sandboxed execution.
#[derive(Clone)]
pub struct SandboxedModule {
    module: Module,
    engine: Engine,
    config: Arc<SandboxConfig>,
}

impl std::fmt::Debug for SandboxedModule {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SandboxedModule")
            .field("config", &self.config)
            .finish_non_exhaustive()
    }
}

impl SandboxedModule {
    /// Get the wasmtime [`Engine`] for this module.
    pub(crate) fn engine(&self) -> &Engine {
        &self.engine
    }

    /// Get the wasmtime [`Module`].
    pub(crate) fn module(&self) -> &Module {
        &self.module
    }

    /// Get the sandbox configuration.
    pub fn config(&self) -> &SandboxConfig {
        &self.config
    }
}

/// Sandbox for executing untrusted WASM tool modules.
///
/// Uses wasmtime with WASI to provide capability-based isolation.
pub struct ToolSandbox {
    engine: Engine,
    config: Arc<SandboxConfig>,
}

impl ToolSandbox {
    /// Create a new sandbox with the given configuration.
    pub fn new(config: SandboxConfig) -> Self {
        let mut engine_config = wasmtime::Config::new();
        engine_config.consume_fuel(config.max_fuel().is_some());

        let engine = Engine::new(&engine_config)
            .expect("Failed to create wasmtime engine");

        Self {
            engine,
            config: Arc::new(config),
        }
    }

    /// Access the sandbox configuration.
    pub fn config(&self) -> &SandboxConfig {
        &self.config
    }

    /// Compile a WASM binary into a sandboxed module.
    pub fn load_module(&self, wasm_bytes: &[u8]) -> Result<SandboxedModule, SandboxError> {
        let module = Module::new(&self.engine, wasm_bytes)
            .map_err(|e| SandboxError::CompilationFailed(e.to_string()))?;

        Ok(SandboxedModule {
            module,
            engine: self.engine.clone(),
            config: Arc::clone(&self.config),
        })
    }

    /// Execute a named function in a sandboxed module.
    ///
    /// `input` is passed as raw bytes; for simple numeric functions the caller
    /// encodes arguments as concatenated little-endian i32 values.
    /// Returns the result as little-endian bytes.
    pub fn execute(
        &self,
        sandboxed: &SandboxedModule,
        fn_name: &str,
        input: &[u8],
    ) -> Result<Vec<u8>, SandboxError> {
        let mut store = Store::new(sandboxed.engine(), ());
        self.configure_store(&mut store);

        let linker = Linker::new(sandboxed.engine());
        let instance = linker
            .instantiate(&mut store, sandboxed.module())
            .map_err(|e| SandboxError::ExecutionFailed(e.to_string()))?;

        let func = instance
            .get_func(&mut store, fn_name)
            .ok_or_else(|| {
                SandboxError::ExecutionFailed(format!("export '{}' not found", fn_name))
            })?;

        let params = parse_i32_params(input);
        let param_vals: Vec<Val> = params.iter().map(|&v| Val::I32(v)).collect();

        let func_type = func.ty(&store);
        let mut results = vec![Val::I32(0); func_type.results().len()];

        func.call(&mut store, &param_vals, &mut results)
            .map_err(|e| map_execution_error(e, &self.config))?;

        Ok(encode_results(&results))
    }

    /// Apply config limits to a store (fuel, epoch, etc).
    fn configure_store(&self, store: &mut Store<()>) {
        if let Some(fuel) = self.config.max_fuel() {
            store.set_fuel(fuel).ok();
        }
    }
}

/// Parse concatenated little-endian i32 values from a byte slice.
fn parse_i32_params(input: &[u8]) -> Vec<i32> {
    input
        .chunks_exact(4)
        .map(|chunk| {
            let bytes: [u8; 4] = chunk.try_into().expect("chunk is 4 bytes");
            i32::from_le_bytes(bytes)
        })
        .collect()
}

/// Map a wasmtime execution error to the appropriate SandboxError variant.
fn map_execution_error(error: wasmtime::Error, config: &SandboxConfig) -> SandboxError {
    // Check if the error is a trap with a specific code
    if let Some(trap) = error.downcast_ref::<Trap>() {
        if *trap == Trap::OutOfFuel {
            return SandboxError::FuelExhausted {
                fuel_limit: config.max_fuel().unwrap_or(0),
            };
        }
    }
    SandboxError::ExecutionFailed(error.to_string())
}

/// Encode wasmtime result values to little-endian bytes.
fn encode_results(results: &[Val]) -> Vec<u8> {
    let mut bytes = Vec::new();
    for val in results {
        match val {
            Val::I32(v) => bytes.extend_from_slice(&v.to_le_bytes()),
            Val::I64(v) => bytes.extend_from_slice(&v.to_le_bytes()),
            Val::F32(v) => bytes.extend_from_slice(&v.to_le_bytes()),
            Val::F64(v) => bytes.extend_from_slice(&v.to_le_bytes()),
            _ => {}
        }
    }
    bytes
}

/// A WASM module wrapped as a [`Tool`] for use in the ToolRegistry.
///
/// The WASM module must export:
/// - `__tool_schema() -> (i32, i32)` — returns (ptr, len) to a JSON ToolSchema in memory
/// - `__tool_execute(i32, i32) -> (i32, i32)` — takes (ptr, len) of JSON args, returns (ptr, len) of JSON result
/// - `memory` — exported linear memory
pub struct SandboxedTool {
    sandbox: ToolSandbox,
    module: SandboxedModule,
    schema: ToolSchema,
}

impl std::fmt::Debug for SandboxedTool {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SandboxedTool")
            .field("schema", &self.schema)
            .finish_non_exhaustive()
    }
}

impl SandboxedTool {
    /// Create a new SandboxedTool by loading schema from the module.
    pub fn new(
        sandbox: ToolSandbox,
        module: SandboxedModule,
    ) -> Result<Self, SandboxError> {
        let schema = Self::extract_schema(&sandbox, &module)?;
        Ok(Self {
            sandbox,
            module,
            schema,
        })
    }

    /// Call `__tool_schema` export to read the embedded schema JSON.
    fn extract_schema(
        _sandbox: &ToolSandbox,
        module: &SandboxedModule,
    ) -> Result<ToolSchema, SandboxError> {
        let mut store = Store::new(module.engine(), ());

        let linker = Linker::new(module.engine());
        let instance = linker
            .instantiate(&mut store, module.module())
            .map_err(|e| SandboxError::ExecutionFailed(e.to_string()))?;

        let schema_fn = instance
            .get_func(&mut store, "__tool_schema")
            .ok_or_else(|| {
                SandboxError::ExecutionFailed(
                    "module missing '__tool_schema' export".to_string(),
                )
            })?;

        let mut results = [Val::I32(0), Val::I32(0)];
        schema_fn
            .call(&mut store, &[], &mut results)
            .map_err(|e| SandboxError::ExecutionFailed(e.to_string()))?;

        let ptr = results[0].unwrap_i32() as u32 as usize;
        let len = results[1].unwrap_i32() as u32 as usize;

        let memory = instance
            .get_memory(&mut store, "memory")
            .ok_or_else(|| {
                SandboxError::ExecutionFailed(
                    "module missing 'memory' export".to_string(),
                )
            })?;

        let data = memory.data(&store);
        if ptr + len > data.len() {
            return Err(SandboxError::ExecutionFailed(
                "schema pointer out of bounds".to_string(),
            ));
        }

        let json_bytes = &data[ptr..ptr + len];
        let schema: ToolSchema = serde_json::from_slice(json_bytes).map_err(|e| {
            SandboxError::ExecutionFailed(format!("invalid schema JSON: {}", e))
        })?;

        Ok(schema)
    }

    /// Call `__tool_execute` with JSON args, read JSON result from memory.
    fn call_execute(
        &self,
        args_json: &[u8],
    ) -> Result<serde_json::Value, SandboxError> {
        let mut store = Store::new(self.module.engine(), ());
        self.sandbox.configure_store(&mut store);

        let linker = Linker::new(self.module.engine());
        let instance = linker
            .instantiate(&mut store, self.module.module())
            .map_err(|e| SandboxError::ExecutionFailed(e.to_string()))?;

        let memory = instance
            .get_memory(&mut store, "memory")
            .ok_or_else(|| {
                SandboxError::ExecutionFailed(
                    "module missing 'memory' export".to_string(),
                )
            })?;

        // Write args into guest memory at a known offset (after schema data)
        let args_offset: usize = 1024;
        let mem_data = memory.data_mut(&mut store);
        if args_offset + args_json.len() > mem_data.len() {
            return Err(SandboxError::MemoryLimitExceeded {
                limit_bytes: mem_data.len(),
            });
        }
        mem_data[args_offset..args_offset + args_json.len()]
            .copy_from_slice(args_json);

        let exec_fn = instance
            .get_func(&mut store, "__tool_execute")
            .ok_or_else(|| {
                SandboxError::ExecutionFailed(
                    "module missing '__tool_execute' export".to_string(),
                )
            })?;

        let params = [
            Val::I32(args_offset as i32),
            Val::I32(args_json.len() as i32),
        ];
        let mut results = [Val::I32(0), Val::I32(0)];
        exec_fn
            .call(&mut store, &params, &mut results)
            .map_err(|e| map_execution_error(e, self.module.config()))?;

        let out_ptr = results[0].unwrap_i32() as u32 as usize;
        let out_len = results[1].unwrap_i32() as u32 as usize;

        let data = memory.data(&store);
        if out_ptr + out_len > data.len() {
            return Err(SandboxError::ExecutionFailed(
                "result pointer out of bounds".to_string(),
            ));
        }

        let result_bytes = &data[out_ptr..out_ptr + out_len];
        serde_json::from_slice(result_bytes).map_err(|e| {
            SandboxError::ExecutionFailed(format!("invalid result JSON: {}", e))
        })
    }
}

#[async_trait]
impl Tool for SandboxedTool {
    fn schema(&self) -> ToolSchema {
        self.schema.clone()
    }

    async fn execute(
        &self,
        _context: &ExecutionContext,
        arguments: serde_json::Value,
    ) -> Result<serde_json::Value, ToolError> {
        let args_json = serde_json::to_vec(&arguments)
            .map_err(|e| ToolError::InvalidArguments(e.to_string()))?;

        self.call_execute(&args_json)
            .map_err(|e| ToolError::ExecutionFailed(e.to_string()))
    }
}