car-sandbox 0.15.1

//! Docker-backed sandboxed ToolExecutor.

use async_trait::async_trait;
use serde_json::{json, Value};
use std::path::PathBuf;
use tokio::sync::Mutex;

/// Configuration for a sandbox environment.
#[derive(Debug, Clone)]
pub struct SandboxConfig {
    /// Docker image to use (default: "python:3.11-slim").
    pub image: String,
    /// Working directory to mount into the container.
    pub working_dir: PathBuf,
    /// Additional environment variables.
    pub env: Vec<(String, String)>,
    /// Timeout for individual commands in seconds.
    pub command_timeout_secs: u64,
    /// Whether to keep the container running between commands (faster)
    /// or create a new one per command (more isolated).
    pub persistent: bool,
}

impl Default for SandboxConfig {
    fn default() -> Self {
        Self {
            image: "python:3.11-slim".into(),
            working_dir: PathBuf::from("."),
            env: Vec::new(),
            command_timeout_secs: 120,
            persistent: true,
        }
    }
}

#[derive(Debug, thiserror::Error)]
pub enum SandboxError {
    #[error("Docker not available: {0}")]
    DockerNotAvailable(String),
    #[error("Container failed: {0}")]
    ContainerFailed(String),
    #[error("Command timeout after {0}s")]
    Timeout(u64),
    #[error("IO error: {0}")]
    Io(#[from] std::io::Error),
}

/// A ToolExecutor that runs all commands inside a Docker container.
///
/// The container has the working directory mounted at `/workspace` and
/// runs with no safety restrictions — isolation provides safety.
///
/// Implements the same tool interface as TokhnToolExecutor (shell, read_file,
/// write_file, edit_file, etc.) but executes them inside the container.
pub struct SandboxExecutor {
    config: SandboxConfig,
    /// Container ID for persistent mode.
    container_id: Mutex<Option<String>>,
    /// Files that have been read (for read-before-edit enforcement).
    read_files: std::sync::Mutex<std::collections::HashSet<String>>,
}

impl SandboxExecutor {
    /// Create a new sandbox executor. Does NOT start the container yet —
    /// that happens lazily on first command.
    pub fn new(config: SandboxConfig) -> Self {
        Self {
            config,
            container_id: Mutex::new(None),
            read_files: std::sync::Mutex::new(std::collections::HashSet::new()),
        }
    }

    /// Create with just a working directory, using defaults for everything else.
    pub fn for_dir(working_dir: impl Into<PathBuf>) -> Self {
        Self::new(SandboxConfig {
            working_dir: working_dir.into(),
            ..Default::default()
        })
    }

    /// Ensure the container is running, return its ID.
    async fn ensure_container(&self) -> Result<String, String> {
        let mut guard = self.container_id.lock().await;
        if let Some(ref id) = *guard {
            // Check if still running
            let check = tokio::process::Command::new("docker")
                .args(["inspect", "--format", "{{.State.Running}}", id])
                .output()
                .await
                .map_err(|e| format!("docker inspect failed: {}", e))?;
            if String::from_utf8_lossy(&check.stdout).trim() == "true" {
                return Ok(id.clone());
            }
        }

        // Start a new container
        let cwd = std::fs::canonicalize(&self.config.working_dir)
            .unwrap_or_else(|_| self.config.working_dir.clone());

        let container_name = format!(
            "car-sandbox-{}",
            uuid::Uuid::new_v4().to_string().split('-').next().unwrap()
        );

        let mut args = vec![
            "run".to_string(),
            "-d".into(),
            "--name".into(),
            container_name.clone(),
            "-v".into(),
            format!("{}:/workspace", cwd.display()),
            "-w".into(),
            "/workspace".into(),
            "--network".into(),
            "none".into(), // No network by default for safety
        ];

        // Add environment variables
        for (k, v) in &self.config.env {
            args.push("-e".into());
            args.push(format!("{}={}", k, v));
        }

        args.push(self.config.image.clone());
        // Keep container alive with a sleep loop
        args.push("sleep".into());
        args.push("infinity".into());

        let output = tokio::process::Command::new("docker")
            .args(&args)
            .output()
            .await
            .map_err(|e| format!("failed to start container: {}", e))?;

        if !output.status.success() {
            let stderr = String::from_utf8_lossy(&output.stderr);
            return Err(format!("docker run failed: {}", stderr));
        }

        let id = String::from_utf8_lossy(&output.stdout).trim().to_string();
        tracing::info!(container_id = %id, image = %self.config.image, "sandbox container started");
        *guard = Some(id.clone());
        Ok(id)
    }

    /// Execute a command inside the container.
    async fn exec_in_container(&self, cmd: &[&str]) -> Result<(String, String, i32), String> {
        let container_id = self.ensure_container().await?;

        let timeout = std::time::Duration::from_secs(self.config.command_timeout_secs);
        let result = tokio::time::timeout(timeout, async {
            tokio::process::Command::new("docker")
                .args(["exec", &container_id])
                .args(cmd)
                .output()
                .await
        })
        .await;

        match result {
            Ok(Ok(output)) => {
                let stdout = String::from_utf8_lossy(&output.stdout).to_string();
                let stderr = String::from_utf8_lossy(&output.stderr).to_string();
                let code = output.status.code().unwrap_or(-1);
                Ok((stdout, stderr, code))
            }
            Ok(Err(e)) => Err(format!("exec failed: {}", e)),
            Err(_) => Err(format!(
                "command timed out after {}s",
                self.config.command_timeout_secs
            )),
        }
    }

    /// Stop and remove the container.
    pub async fn cleanup(&self) {
        let mut guard = self.container_id.lock().await;
        if let Some(ref id) = *guard {
            let _ = tokio::process::Command::new("docker")
                .args(["rm", "-f", id])
                .output()
                .await;
            tracing::info!(container_id = %id, "sandbox container removed");
        }
        *guard = None;
    }

    // --- Tool implementations ---

    async fn exec_shell(&self, params: &Value) -> Result<Value, String> {
        let command = params
            .get("command")
            .and_then(|v| v.as_str())
            .ok_or("missing 'command' parameter")?;

        let (stdout, stderr, exit_code) = self.exec_in_container(&["sh", "-c", command]).await?;

        Ok(json!({
            "stdout": stdout,
            "stderr": stderr,
            "exit_code": exit_code,
        }))
    }

    async fn exec_read_file(&self, params: &Value) -> Result<Value, String> {
        let path = params
            .get("path")
            .and_then(|v| v.as_str())
            .ok_or("missing 'path' parameter")?;
        let offset = params.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
        let limit = params
            .get("limit")
            .and_then(|v| v.as_u64())
            .map(|v| v as usize);

        // Read from the mounted workspace on the host (faster than docker exec)
        let full_path = self.config.working_dir.join(path);
        let content = tokio::fs::read_to_string(&full_path)
            .await
            .map_err(|e| format!("failed to read file: {}", e))?;

        let total_lines = content.lines().count();

        let result = if offset > 0 || limit.is_some() {
            let lines: Vec<&str> = content.lines().collect();
            let start = offset.min(lines.len());
            let end = limit
                .map(|l| (start + l).min(lines.len()))
                .unwrap_or(lines.len());
            lines[start..end].join("\n")
        } else {
            content
        };

        // Track as read
        if let Ok(mut set) = self.read_files.lock() {
            set.insert(path.to_string());
        }

        Ok(json!({ "content": result, "total_lines": total_lines }))
    }

    async fn exec_write_file(&self, params: &Value) -> Result<Value, String> {
        let path = params
            .get("path")
            .and_then(|v| v.as_str())
            .ok_or("missing 'path' parameter")?;
        let content = params
            .get("content")
            .and_then(|v| v.as_str())
            .ok_or("missing 'content' parameter")?;

        let full_path = self.config.working_dir.join(path);
        if let Some(parent) = full_path.parent() {
            tokio::fs::create_dir_all(parent).await.ok();
        }
        tokio::fs::write(&full_path, content)
            .await
            .map_err(|e| format!("failed to write file: {}", e))?;

        if let Ok(mut set) = self.read_files.lock() {
            set.insert(path.to_string());
        }

        Ok(json!({ "written": path }))
    }

    async fn exec_edit_file(&self, params: &Value) -> Result<Value, String> {
        let path = params
            .get("path")
            .and_then(|v| v.as_str())
            .ok_or("missing 'path' parameter")?;
        let old_text = params
            .get("old_text")
            .and_then(|v| v.as_str())
            .ok_or("missing 'old_text' parameter")?;
        let new_text = params
            .get("new_text")
            .and_then(|v| v.as_str())
            .ok_or("missing 'new_text' parameter")?;

        let full_path = self.config.working_dir.join(path);
        let content = tokio::fs::read_to_string(&full_path)
            .await
            .map_err(|e| format!("failed to read file: {}", e))?;

        let count = content.matches(old_text).count();
        if count == 0 {
            // Try whitespace-normalized match
            let normalize = |s: &str| -> String {
                s.lines()
                    .map(|l| l.trim_end())
                    .collect::<Vec<_>>()
                    .join("\n")
            };
            let norm_content = normalize(&content);
            let norm_old = normalize(old_text);
            if norm_content.matches(&norm_old).count() == 1 {
                let norm_pos = norm_content.find(&norm_old).unwrap();
                let start_line = norm_content[..norm_pos].matches('\n').count();
                let old_line_count = old_text.lines().count();
                let actual_lines: Vec<&str> = content.lines().collect();
                let end_line = (start_line + old_line_count).min(actual_lines.len());
                let actual_old = actual_lines[start_line..end_line].join("\n");
                if content.matches(&actual_old).count() == 1 {
                    let new_content = content.replacen(&actual_old, new_text, 1);
                    tokio::fs::write(&full_path, &new_content)
                        .await
                        .map_err(|e| format!("failed to write: {}", e))?;
                    return Ok(json!({
                        "edited": path,
                        "diff_summary": "whitespace-normalized match",
                    }));
                }
            }

            let first_line = old_text.lines().next().unwrap_or("").trim();
            let mut hint = String::new();
            if !first_line.is_empty() {
                for (i, line) in content.lines().enumerate() {
                    if line.contains(first_line) {
                        let lines: Vec<&str> = content.lines().collect();
                        let start = i.saturating_sub(2);
                        let end = (i + old_text.lines().count() + 2).min(lines.len());
                        hint = format!(
                            "\nActual content at lines {}-{}:\n```\n{}\n```",
                            start + 1,
                            end,
                            lines[start..end].join("\n")
                        );
                        break;
                    }
                }
            }
            return Err(format!("old_text not found in {}.{}", path, hint));
        }
        if count > 1 {
            return Err(format!(
                "old_text found {} times in {} — must be unique",
                count, path
            ));
        }

        let new_content = content.replacen(old_text, new_text, 1);
        tokio::fs::write(&full_path, &new_content)
            .await
            .map_err(|e| format!("failed to write: {}", e))?;

        let old_lines = old_text.lines().count();
        let new_lines = new_text.lines().count();
        let diff_summary = format!("replaced {} lines with {} lines", old_lines, new_lines);

        Ok(json!({
            "edited": path,
            "diff_summary": diff_summary,
            "new_content": if new_text.len() > 2000 { &new_text[..2000] } else { new_text },
        }))
    }

    async fn exec_list_dir(&self, params: &Value) -> Result<Value, String> {
        let path = params.get("path").and_then(|v| v.as_str()).unwrap_or(".");
        let full_path = self.config.working_dir.join(path);

        let mut entries = Vec::new();
        let mut reader = tokio::fs::read_dir(&full_path)
            .await
            .map_err(|e| format!("failed to read dir: {}", e))?;

        while let Ok(Some(entry)) = reader.next_entry().await {
            let name = entry.file_name().to_string_lossy().to_string();
            if name.starts_with('.') {
                continue;
            }
            let is_dir = entry.file_type().await.map(|t| t.is_dir()).unwrap_or(false);
            entries.push(json!({ "name": name, "is_dir": is_dir }));
        }

        Ok(json!({ "entries": entries }))
    }

    async fn exec_grep_files(&self, params: &Value) -> Result<Value, String> {
        let pattern = params
            .get("pattern")
            .and_then(|v| v.as_str())
            .ok_or("missing 'pattern' parameter")?;
        let path = params.get("path").and_then(|v| v.as_str()).unwrap_or(".");
        let max_results = params
            .get("max_results")
            .and_then(|v| v.as_u64())
            .unwrap_or(50) as usize;

        // Run grep inside the container for proper PATH/locale handling
        let cmd = format!("grep -rn -E --include='*.py' --include='*.rs' --include='*.js' --include='*.ts' --include='*.go' --include='*.java' --include='*.toml' --include='*.json' --include='*.yaml' --include='*.yml' --include='*.html' --include='*.css' --include='*.sh' --include='*.txt' --include='*.cfg' --include='*.ini' '{}' {} | head -{}",
            pattern.replace('\'', "'\\''"), path, max_results);

        let (stdout, _stderr, _code) = self.exec_in_container(&["sh", "-c", &cmd]).await?;
        let lines: Vec<&str> = stdout.lines().take(max_results).collect();

        Ok(json!({
            "matches": lines,
            "count": lines.len(),
            "truncated": stdout.lines().count() > max_results,
        }))
    }

    async fn exec_find_files(&self, params: &Value) -> Result<Value, String> {
        let pattern = params
            .get("pattern")
            .and_then(|v| v.as_str())
            .ok_or("missing 'pattern' parameter")?;
        let path = params.get("path").and_then(|v| v.as_str()).unwrap_or(".");

        let cmd = format!("find {} -name '{}' -not -path '*/.*' -not -path '*/node_modules/*' -not -path '*/__pycache__/*' | head -50",
            path, pattern.replace('\'', "'\\''"));

        let (stdout, _stderr, _code) = self.exec_in_container(&["sh", "-c", &cmd]).await?;
        let files: Vec<&str> = stdout.lines().collect();

        Ok(json!({ "files": files, "count": files.len() }))
    }
}

#[async_trait]
impl car_engine::ToolExecutor for SandboxExecutor {
    async fn execute(&self, tool: &str, params: &Value) -> Result<Value, String> {
        match tool {
            "shell" => self.exec_shell(params).await,
            "read_file" => self.exec_read_file(params).await,
            "write_file" => self.exec_write_file(params).await,
            "edit_file" => self.exec_edit_file(params).await,
            "list_dir" => self.exec_list_dir(params).await,
            "grep_files" => self.exec_grep_files(params).await,
            "find_files" => self.exec_find_files(params).await,
            "git_status" | "git_diff" | "git_log" | "git_add" | "git_commit" => {
                // Git operations run on host via the mounted volume
                let git_cmd = match tool {
                    "git_status" => "git status --short",
                    "git_diff" => {
                        let staged = params
                            .get("staged")
                            .and_then(|v| v.as_bool())
                            .unwrap_or(false);
                        if staged {
                            "git diff --cached"
                        } else {
                            "git diff"
                        }
                    }
                    "git_log" => "git log --oneline -20",
                    _ => return Err(format!("git tool {} not implemented in sandbox", tool)),
                };
                self.exec_shell(&json!({ "command": git_cmd })).await
            }
            _ => Err(format!("unknown tool in sandbox: {}", tool)),
        }
    }
}

impl Drop for SandboxExecutor {
    fn drop(&mut self) {
        // Best-effort cleanup — can't async in Drop, so use blocking
        if let Ok(guard) = self.container_id.try_lock() {
            if let Some(ref id) = *guard {
                let _ = std::process::Command::new("docker")
                    .args(["rm", "-f", id])
                    .output();
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn sandbox_config_defaults() {
        let config = SandboxConfig::default();
        assert_eq!(config.image, "python:3.11-slim");
        assert!(config.persistent);
        assert_eq!(config.command_timeout_secs, 120);
    }
}