kavach 1.0.1 - Docs.rs

//! Sandbox lifecycle — create, start, exec, checkpoint, migrate, destroy.

use std::fmt;

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;

use crate::backend::{Backend, SandboxBackend};
use crate::credential::SecretRef;
use crate::policy::SandboxPolicy;
use crate::scanning::ExternalizationPolicy;

/// Unique sandbox identifier.
pub type SandboxId = Uuid;

/// Sandbox lifecycle state.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum SandboxState {
    /// Created but not yet started.
    Created,
    /// Running and accepting exec calls.
    Running,
    /// Paused (checkpointed).
    Paused,
    /// Stopped — no further execution possible.
    Stopped,
    /// Destroyed — resources released.
    Destroyed,
}

impl SandboxState {
    /// Check whether transitioning from this state to `to` is valid.
    #[inline]
    #[must_use]
    pub fn valid_transition(&self, to: &SandboxState) -> bool {
        matches!(
            (self, to),
            (Self::Created, Self::Running)
                | (Self::Running, Self::Paused)
                | (Self::Running, Self::Stopped)
                | (Self::Running, Self::Destroyed)
                | (Self::Paused, Self::Running)
                | (Self::Paused, Self::Stopped)
                | (Self::Paused, Self::Destroyed)
                | (Self::Stopped, Self::Destroyed)
        )
    }
}

impl fmt::Display for SandboxState {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Created => write!(f, "created"),
            Self::Running => write!(f, "running"),
            Self::Paused => write!(f, "paused"),
            Self::Stopped => write!(f, "stopped"),
            Self::Destroyed => write!(f, "destroyed"),
        }
    }
}

/// Configuration for creating a sandbox.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SandboxConfig {
    /// Backend to use.
    pub backend: Backend,
    /// Security policy.
    pub policy: SandboxPolicy,
    /// Secrets to inject.
    pub secrets: Vec<SecretRef>,
    /// Timeout for exec calls in milliseconds.
    pub timeout_ms: u64,
    /// Working directory inside the sandbox.
    pub workdir: Option<String>,
    /// Environment variables.
    pub env: Vec<(String, String)>,
    /// Agent ID that owns this sandbox.
    pub agent_id: Option<String>,
    /// Externalization policy for output scanning.
    pub externalization: Option<ExternalizationPolicy>,
    /// Optional inner backend for composite isolation (defense-in-depth).
    pub inner_backend: Option<Backend>,
}

impl Default for SandboxConfig {
    fn default() -> Self {
        Self {
            backend: Backend::Process,
            policy: SandboxPolicy::basic(),
            secrets: Vec::new(),
            timeout_ms: 30_000,
            workdir: None,
            env: Vec::new(),
            agent_id: None,
            externalization: None,
            inner_backend: None,
        }
    }
}

impl SandboxConfig {
    /// Create a new builder for `SandboxConfig`.
    #[must_use]
    pub fn builder() -> SandboxConfigBuilder {
        SandboxConfigBuilder::default()
    }
}

/// Builder for SandboxConfig.
#[derive(Debug, Default)]
pub struct SandboxConfigBuilder {
    config: SandboxConfig,
}

impl SandboxConfigBuilder {
    /// Set the sandbox backend.
    pub fn backend(mut self, backend: Backend) -> Self {
        self.config.backend = backend;
        self
    }

    /// Set the security policy.
    pub fn policy(mut self, policy: SandboxPolicy) -> Self {
        self.config.policy = policy;
        self
    }

    /// Enable seccomp with the given profile name.
    pub fn policy_seccomp(mut self, profile: &str) -> Self {
        self.config.policy.seccomp_enabled = true;
        self.config.policy.seccomp_profile = Some(profile.into());
        self
    }

    /// Enable or disable network access.
    pub fn network(mut self, enabled: bool) -> Self {
        self.config.policy.network.enabled = enabled;
        self
    }

    /// Set the execution timeout in milliseconds.
    pub fn timeout_ms(mut self, ms: u64) -> Self {
        self.config.timeout_ms = ms;
        self
    }

    /// Set the owning agent ID.
    pub fn agent_id(mut self, id: impl Into<String>) -> Self {
        self.config.agent_id = Some(id.into());
        self
    }

    /// Set the externalization scanning policy.
    pub fn externalization(mut self, policy: ExternalizationPolicy) -> Self {
        self.config.externalization = Some(policy);
        self
    }

    /// Set an inner backend for composite isolation (defense-in-depth).
    ///
    /// The outer backend (set via `backend()`) provides the runtime boundary.
    /// The inner backend's policy is merged to tighten isolation constraints.
    pub fn inner_backend(mut self, backend: Backend) -> Self {
        self.config.inner_backend = Some(backend);
        self
    }

    /// Build the `SandboxConfig`.
    #[must_use]
    pub fn build(self) -> SandboxConfig {
        self.config
    }
}

/// Result of executing a command inside a sandbox.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExecResult {
    /// Exit code (0 = success).
    pub exit_code: i32,
    /// Captured stdout.
    pub stdout: String,
    /// Captured stderr.
    pub stderr: String,
    /// Execution duration in milliseconds.
    pub duration_ms: u64,
    /// Whether the execution was killed due to timeout.
    pub timed_out: bool,
}

/// A sandbox instance with lifecycle management.
pub struct Sandbox {
    /// Unique identifier for this sandbox.
    pub id: SandboxId,
    /// Configuration used to create this sandbox.
    pub config: SandboxConfig,
    /// Current lifecycle state.
    pub state: SandboxState,
    /// Timestamp when the sandbox was created.
    pub created_at: DateTime<Utc>,
    /// Timestamp when the sandbox entered the Running state.
    pub started_at: Option<DateTime<Utc>>,
    /// Timestamp when the sandbox was stopped or destroyed.
    pub stopped_at: Option<DateTime<Utc>>,
    backend: Box<dyn SandboxBackend>,
    /// Cached externalization gate (created once, reused per-exec).
    #[cfg(feature = "process")]
    gate: Option<crate::scanning::ExternalizationGate>,
}

impl fmt::Debug for Sandbox {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("Sandbox")
            .field("id", &self.id)
            .field("config", &self.config)
            .field("state", &self.state)
            .field("created_at", &self.created_at)
            .field("started_at", &self.started_at)
            .field("stopped_at", &self.stopped_at)
            .field("backend", &self.config.backend.to_string())
            .finish()
    }
}

impl Sandbox {
    /// Create a new sandbox (does not start it).
    pub async fn create(config: SandboxConfig) -> crate::Result<Self> {
        if !config.backend.is_available() {
            return Err(crate::KavachError::BackendUnavailable(
                config.backend.to_string(),
            ));
        }

        let backend = crate::backend::create_backend(&config)?;

        // Pre-create the externalization gate if configured (avoids per-exec allocation)
        #[cfg(feature = "process")]
        let gate = config
            .externalization
            .as_ref()
            .map(|_| crate::scanning::ExternalizationGate::new());

        Ok(Self {
            id: Uuid::new_v4(),
            config,
            state: SandboxState::Created,
            created_at: Utc::now(),
            started_at: None,
            stopped_at: None,
            backend,
            #[cfg(feature = "process")]
            gate,
        })
    }

    /// Transition to a new state.
    pub fn transition(&mut self, to: SandboxState) -> crate::Result<()> {
        if !self.state.valid_transition(&to) {
            return Err(crate::KavachError::InvalidTransition {
                state: self.state.to_string(),
                target: to.to_string(),
                reason: "invalid state transition".into(),
            });
        }
        tracing::debug!(sandbox_id = %self.id, from = %self.state, to = %to, "sandbox state transition");
        self.state = to;
        match to {
            SandboxState::Running => self.started_at = Some(Utc::now()),
            SandboxState::Stopped | SandboxState::Destroyed => self.stopped_at = Some(Utc::now()),
            _ => {}
        }
        Ok(())
    }

    /// Execute a command — delegates to the backend, then applies externalization gate.
    pub async fn exec(&self, command: &str) -> crate::Result<ExecResult> {
        if self.state != SandboxState::Running {
            return Err(crate::KavachError::ExecFailed(format!(
                "sandbox is {}, not running",
                self.state
            )));
        }

        let result = self.backend.exec(command, &self.config.policy).await?;

        // Apply externalization gate if configured (requires process feature for regex scanning)
        #[cfg(feature = "process")]
        if let Some(ref ext_policy) = self.config.externalization
            && let Some(ref gate) = self.gate
        {
            return gate.apply(result, ext_policy);
        }

        Ok(result)
    }

    /// Spawn a long-running command in the sandbox.
    ///
    /// Unlike `exec`, this returns immediately with a handle to the running process.
    /// The caller is responsible for managing the process lifecycle.
    pub async fn spawn(
        &self,
        command: &str,
    ) -> crate::Result<crate::backend::exec_util::SpawnedProcess> {
        if self.state != SandboxState::Running {
            return Err(crate::KavachError::ExecFailed(format!(
                "sandbox is {}, not running",
                self.state
            )));
        }

        self.backend
            .spawn(command, &self.config.policy)
            .await?
            .ok_or_else(|| crate::KavachError::ExecFailed("backend does not support spawn".into()))
    }

    /// Destroy the sandbox and release backend resources.
    pub async fn destroy(mut self) -> crate::Result<()> {
        self.backend.destroy().await?;
        self.transition(SandboxState::Destroyed)?;
        Ok(())
    }
}

/// Pre-warmed sandbox pool for fast startup.
///
/// Maintains a configurable number of pre-created sandboxes that can be
/// claimed on demand, avoiding the cold-start cost of backend creation.
pub struct SandboxPool {
    /// Template config for creating new sandboxes.
    template: SandboxConfig,
    /// Pre-created sandboxes ready to be claimed.
    pool: Vec<Sandbox>,
    /// Target number of warm sandboxes to maintain.
    warm_count: usize,
}

impl SandboxPool {
    /// Create a new pool with the given config template and warm count.
    pub async fn new(template: SandboxConfig, warm_count: usize) -> crate::Result<Self> {
        let mut pool = Vec::with_capacity(warm_count);
        for _ in 0..warm_count {
            pool.push(Sandbox::create(template.clone()).await?);
        }
        tracing::debug!(warm_count, "sandbox pool initialized");
        Ok(Self {
            template,
            pool,
            warm_count,
        })
    }

    /// Claim a sandbox from the pool.
    ///
    /// Returns a pre-created sandbox in `Created` state. The caller must
    /// transition it to `Running` before use. If the pool is empty, creates
    /// a new sandbox on demand (cold start).
    pub async fn claim(&mut self) -> crate::Result<Sandbox> {
        if let Some(sandbox) = self.pool.pop() {
            tracing::debug!(remaining = self.pool.len(), "claimed sandbox from pool");
            Ok(sandbox)
        } else {
            tracing::debug!("pool empty, creating sandbox on demand");
            Sandbox::create(self.template.clone()).await
        }
    }

    /// Replenish the pool to maintain the target warm count.
    ///
    /// Call this periodically or after claiming sandboxes.
    pub async fn replenish(&mut self) -> crate::Result<()> {
        while self.pool.len() < self.warm_count {
            self.pool
                .push(Sandbox::create(self.template.clone()).await?);
        }
        Ok(())
    }

    /// Number of sandboxes currently available in the pool.
    #[must_use]
    pub fn available(&self) -> usize {
        self.pool.len()
    }

    /// Target warm count.
    #[must_use]
    pub fn warm_count(&self) -> usize {
        self.warm_count
    }
}

impl std::fmt::Debug for SandboxPool {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SandboxPool")
            .field("available", &self.pool.len())
            .field("warm_count", &self.warm_count)
            .field("backend", &self.template.backend)
            .finish()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn state_transitions() {
        assert!(SandboxState::Created.valid_transition(&SandboxState::Running));
        assert!(SandboxState::Running.valid_transition(&SandboxState::Stopped));
        assert!(SandboxState::Running.valid_transition(&SandboxState::Paused));
        assert!(SandboxState::Paused.valid_transition(&SandboxState::Running));
        assert!(!SandboxState::Destroyed.valid_transition(&SandboxState::Running));
        assert!(!SandboxState::Created.valid_transition(&SandboxState::Stopped));
    }

    #[test]
    fn config_builder() {
        let config = SandboxConfig::builder()
            .backend(Backend::GVisor)
            .policy_seccomp("strict")
            .network(false)
            .timeout_ms(60_000)
            .agent_id("agent-123")
            .build();

        assert_eq!(config.backend, Backend::GVisor);
        assert!(config.policy.seccomp_enabled);
        assert!(!config.policy.network.enabled);
        assert_eq!(config.timeout_ms, 60_000);
        assert_eq!(config.agent_id.unwrap(), "agent-123");
    }

    #[test]
    fn config_default() {
        let config = SandboxConfig::default();
        assert_eq!(config.backend, Backend::Process);
        assert!(config.policy.seccomp_enabled); // basic() enables seccomp
        assert_eq!(config.timeout_ms, 30_000);
    }

    #[tokio::test]
    async fn create_sandbox() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let sandbox = Sandbox::create(config).await.unwrap();
        assert_eq!(sandbox.state, SandboxState::Created);
    }

    #[tokio::test]
    async fn sandbox_lifecycle() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let mut sandbox = Sandbox::create(config).await.unwrap();

        sandbox.transition(SandboxState::Running).unwrap();
        assert!(sandbox.started_at.is_some());

        sandbox.transition(SandboxState::Stopped).unwrap();
        assert!(sandbox.stopped_at.is_some());

        sandbox.transition(SandboxState::Destroyed).unwrap();
        assert_eq!(sandbox.state, SandboxState::Destroyed);
    }

    #[tokio::test]
    async fn exec_requires_running() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let sandbox = Sandbox::create(config).await.unwrap();
        // Not started yet
        assert!(sandbox.exec("echo hello").await.is_err());
    }

    #[test]
    fn state_display() {
        assert_eq!(SandboxState::Running.to_string(), "running");
        assert_eq!(SandboxState::Destroyed.to_string(), "destroyed");
    }

    #[test]
    fn invalid_transition_error() {
        let mut sandbox_state = SandboxState::Created;
        // Created → Stopped is invalid
        assert!(!sandbox_state.valid_transition(&SandboxState::Stopped));
        // Created → Destroyed is invalid
        assert!(!sandbox_state.valid_transition(&SandboxState::Destroyed));
        // Destroyed → anything is invalid
        sandbox_state = SandboxState::Destroyed;
        assert!(!sandbox_state.valid_transition(&SandboxState::Created));
        assert!(!sandbox_state.valid_transition(&SandboxState::Running));
    }

    #[test]
    fn state_serde_roundtrip() {
        for state in [
            SandboxState::Created,
            SandboxState::Running,
            SandboxState::Paused,
            SandboxState::Stopped,
            SandboxState::Destroyed,
        ] {
            let json = serde_json::to_string(&state).unwrap();
            let back: SandboxState = serde_json::from_str(&json).unwrap();
            assert_eq!(state, back);
        }
    }

    /// Exhaustive FSM transition matrix verification.
    /// The FSM has 5 states × 5 states = 25 possible transitions.
    /// Exactly 8 should be valid.
    #[test]
    fn fsm_exhaustive_transition_matrix() {
        const STATES: &[SandboxState] = &[
            SandboxState::Created,
            SandboxState::Running,
            SandboxState::Paused,
            SandboxState::Stopped,
            SandboxState::Destroyed,
        ];

        // Expected valid transitions encoded as (from_idx, to_idx)
        let expected_valid: &[(usize, usize)] = &[
            (0, 1), // Created → Running
            (1, 2), // Running → Paused
            (1, 3), // Running → Stopped
            (1, 4), // Running → Destroyed
            (2, 1), // Paused → Running
            (2, 3), // Paused → Stopped
            (2, 4), // Paused → Destroyed
            (3, 4), // Stopped → Destroyed
        ];

        for (from_idx, from) in STATES.iter().enumerate() {
            for (to_idx, to) in STATES.iter().enumerate() {
                let is_valid = from.valid_transition(to);
                let should_be_valid = expected_valid.contains(&(from_idx, to_idx));
                assert_eq!(
                    is_valid, should_be_valid,
                    "{:?} → {:?}: got {is_valid}, expected {should_be_valid}",
                    from, to
                );
            }
        }
    }

    /// Verify state invariants: started_at only set when Running,
    /// stopped_at only set when Stopped.
    #[tokio::test]
    async fn fsm_state_invariants() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let mut sandbox = Sandbox::create(config).await.unwrap();

        // Created: no timestamps
        assert!(sandbox.started_at.is_none());
        assert!(sandbox.stopped_at.is_none());

        // Running: started_at set
        sandbox.transition(SandboxState::Running).unwrap();
        assert!(sandbox.started_at.is_some());
        assert!(sandbox.stopped_at.is_none());

        // Paused: started_at still set, stopped_at not
        sandbox.transition(SandboxState::Paused).unwrap();
        assert!(sandbox.started_at.is_some());
        assert!(sandbox.stopped_at.is_none());

        // Resume: same
        sandbox.transition(SandboxState::Running).unwrap();
        assert!(sandbox.started_at.is_some());

        // Stopped: stopped_at now set
        sandbox.transition(SandboxState::Stopped).unwrap();
        assert!(sandbox.started_at.is_some());
        assert!(sandbox.stopped_at.is_some());
    }

    /// Verify Destroyed is a terminal state — no transitions out.
    #[test]
    fn destroyed_is_terminal() {
        for state in [
            SandboxState::Created,
            SandboxState::Running,
            SandboxState::Paused,
            SandboxState::Stopped,
            SandboxState::Destroyed,
        ] {
            assert!(
                !SandboxState::Destroyed.valid_transition(&state),
                "Destroyed → {:?} should be invalid",
                state
            );
        }
    }

    #[test]
    fn exec_result_serde() {
        let result = ExecResult {
            exit_code: 0,
            stdout: "hello".into(),
            stderr: String::new(),
            duration_ms: 42,
            timed_out: false,
        };
        let json = serde_json::to_string(&result).unwrap();
        let back: ExecResult = serde_json::from_str(&json).unwrap();
        assert_eq!(back.exit_code, 0);
        assert_eq!(back.duration_ms, 42);
    }

    #[tokio::test]
    async fn sandbox_debug_format() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let sandbox = Sandbox::create(config).await.unwrap();
        let debug = format!("{:?}", sandbox);
        assert!(debug.contains("Sandbox"));
        assert!(debug.contains("noop"));
        assert!(debug.contains("Created"));
    }

    #[tokio::test]
    async fn sandbox_transition_error_message() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let mut sandbox = Sandbox::create(config).await.unwrap();
        let err = sandbox.transition(SandboxState::Stopped).unwrap_err();
        assert!(err.to_string().contains("created"));
        assert!(err.to_string().contains("stopped"));
    }

    #[tokio::test]
    async fn sandbox_exec_in_created_state() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let sandbox = Sandbox::create(config).await.unwrap();
        let err = sandbox.exec("echo test").await.unwrap_err();
        assert!(err.to_string().contains("not running"));
    }

    #[tokio::test]
    async fn sandbox_spawn_requires_running() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let sandbox = Sandbox::create(config).await.unwrap();
        let err = sandbox.spawn("echo test").await.unwrap_err();
        assert!(err.to_string().contains("not running"));
    }

    #[tokio::test]
    async fn sandbox_destroy_sets_destroyed() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let mut sandbox = Sandbox::create(config).await.unwrap();
        sandbox.transition(SandboxState::Running).unwrap();
        // Direct transition to Destroyed is valid
        sandbox.destroy().await.unwrap();
    }

    #[test]
    fn config_serde_roundtrip() {
        let config = SandboxConfig::builder()
            .backend(Backend::GVisor)
            .policy(SandboxPolicy::strict())
            .timeout_ms(60_000)
            .agent_id("test-agent")
            .build();
        let json = serde_json::to_string(&config).unwrap();
        let back: SandboxConfig = serde_json::from_str(&json).unwrap();
        assert_eq!(back.backend, Backend::GVisor);
        assert_eq!(back.timeout_ms, 60_000);
        assert_eq!(back.agent_id.as_deref(), Some("test-agent"));
    }

    #[test]
    fn state_display_all() {
        assert_eq!(SandboxState::Created.to_string(), "created");
        assert_eq!(SandboxState::Running.to_string(), "running");
        assert_eq!(SandboxState::Paused.to_string(), "paused");
        assert_eq!(SandboxState::Stopped.to_string(), "stopped");
        assert_eq!(SandboxState::Destroyed.to_string(), "destroyed");
    }

    #[tokio::test]
    async fn unavailable_backend_fails() {
        let config = SandboxConfig::builder()
            .backend(Backend::Firecracker)
            .build();
        let err = Sandbox::create(config).await.unwrap_err();
        assert!(err.to_string().contains("not available") || err.to_string().contains("not found"));
    }

    // ── SandboxPool tests ────────────────────────────────────────────

    #[tokio::test]
    async fn pool_create() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let pool = SandboxPool::new(config, 3).await.unwrap();
        assert_eq!(pool.available(), 3);
        assert_eq!(pool.warm_count(), 3);
    }

    #[tokio::test]
    async fn pool_claim() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let mut pool = SandboxPool::new(config, 2).await.unwrap();
        let sandbox = pool.claim().await.unwrap();
        assert_eq!(sandbox.state, SandboxState::Created);
        assert_eq!(pool.available(), 1);
    }

    #[tokio::test]
    async fn pool_claim_empty_creates_on_demand() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let mut pool = SandboxPool::new(config, 0).await.unwrap();
        assert_eq!(pool.available(), 0);
        let sandbox = pool.claim().await.unwrap();
        assert_eq!(sandbox.state, SandboxState::Created);
    }

    #[tokio::test]
    async fn pool_replenish() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let mut pool = SandboxPool::new(config, 3).await.unwrap();
        let _ = pool.claim().await.unwrap();
        let _ = pool.claim().await.unwrap();
        assert_eq!(pool.available(), 1);
        pool.replenish().await.unwrap();
        assert_eq!(pool.available(), 3);
    }

    #[tokio::test]
    async fn pool_debug() {
        let config = SandboxConfig::builder().backend(Backend::Noop).build();
        let pool = SandboxPool::new(config, 1).await.unwrap();
        let debug = format!("{pool:?}");
        assert!(debug.contains("SandboxPool"));
        assert!(debug.contains("Noop"));
    }
}