ironclaw 0.5.0 - Docs.rs

//! Shell execution tool for running commands in a sandboxed environment.
//!
//! Provides controlled command execution with:
//! - Docker sandbox isolation (when enabled)
//! - Working directory isolation
//! - Timeout enforcement
//! - Output capture and truncation
//! - Blocked command patterns for safety
//!
//! # Execution Modes
//!
//! When sandbox is available and enabled:
//! - Commands run inside ephemeral Docker containers
//! - Network traffic goes through a validating proxy
//! - Credentials are injected by the proxy, never exposed to commands
//!
//! When sandbox is unavailable:
//! - Commands run directly on host with basic protections
//! - Blocked command patterns are still enforced

use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::sync::{Arc, LazyLock};
use std::time::Duration;

use async_trait::async_trait;
use tokio::io::AsyncReadExt;
use tokio::process::Command;

use crate::context::JobContext;
use crate::sandbox::{SandboxManager, SandboxPolicy};
use crate::tools::tool::{Tool, ToolDomain, ToolError, ToolOutput, require_str};

/// Maximum output size before truncation (64KB).
const MAX_OUTPUT_SIZE: usize = 64 * 1024;

/// Default command timeout.
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(120);

/// Commands that are always blocked for safety.
static BLOCKED_COMMANDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
    HashSet::from([
        "rm -rf /",
        "rm -rf /*",
        ":(){ :|:& };:", // Fork bomb
        "dd if=/dev/zero",
        "mkfs",
        "chmod -R 777 /",
        "> /dev/sda",
        "curl | sh",
        "wget | sh",
        "curl | bash",
        "wget | bash",
    ])
});

/// Patterns that indicate potentially dangerous commands.
static DANGEROUS_PATTERNS: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
    vec![
        "sudo ",
        "doas ",
        " | sh",
        " | bash",
        " | zsh",
        "eval ",
        "$(curl",
        "$(wget",
        "/etc/passwd",
        "/etc/shadow",
        "~/.ssh",
        ".bash_history",
        "id_rsa",
    ]
});

/// Patterns that should NEVER be auto-approved, even if the user chose "always approve"
/// for the shell tool. These require explicit per-invocation approval because they are
/// destructive or security-sensitive.
static NEVER_AUTO_APPROVE_PATTERNS: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
    vec![
        "rm -rf",
        "rm -fr",
        "chmod -r 777",
        "chmod 777",
        "chown -r",
        "shutdown",
        "reboot",
        "poweroff",
        "init 0",
        "init 6",
        "iptables",
        "nft ",
        "useradd",
        "userdel",
        "passwd",
        "visudo",
        "crontab",
        "systemctl disable",
        "launchctl unload",
        "kill -9",
        "killall",
        "pkill",
        "docker rm",
        "docker rmi",
        "docker system prune",
        "git push --force",
        "git push -f",
        "git reset --hard",
        "git clean -f",
        "DROP TABLE",
        "DROP DATABASE",
        "TRUNCATE",
        "DELETE FROM",
    ]
});

/// Check whether a shell command contains patterns that must never be auto-approved.
///
/// Even when the user has chosen "always approve" for the shell tool, these commands
/// require explicit per-invocation approval because they are destructive.
pub fn requires_explicit_approval(command: &str) -> bool {
    let lower = command.to_lowercase();
    NEVER_AUTO_APPROVE_PATTERNS
        .iter()
        .any(|p| lower.contains(&p.to_lowercase()))
}

/// Shell command execution tool.
pub struct ShellTool {
    /// Working directory for commands (if None, uses job's working dir or cwd).
    working_dir: Option<PathBuf>,
    /// Command timeout.
    timeout: Duration,
    /// Whether to allow potentially dangerous commands (requires explicit approval).
    allow_dangerous: bool,
    /// Optional sandbox manager for Docker execution.
    sandbox: Option<Arc<SandboxManager>>,
    /// Sandbox policy to use when sandbox is available.
    sandbox_policy: SandboxPolicy,
}

impl std::fmt::Debug for ShellTool {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("ShellTool")
            .field("working_dir", &self.working_dir)
            .field("timeout", &self.timeout)
            .field("allow_dangerous", &self.allow_dangerous)
            .field("sandbox", &self.sandbox.is_some())
            .field("sandbox_policy", &self.sandbox_policy)
            .finish()
    }
}

impl ShellTool {
    /// Create a new shell tool with default settings.
    pub fn new() -> Self {
        Self {
            working_dir: None,
            timeout: DEFAULT_TIMEOUT,
            allow_dangerous: false,
            sandbox: None,
            sandbox_policy: SandboxPolicy::ReadOnly,
        }
    }

    /// Set the working directory.
    pub fn with_working_dir(mut self, dir: PathBuf) -> Self {
        self.working_dir = Some(dir);
        self
    }

    /// Set the command timeout.
    pub fn with_timeout(mut self, timeout: Duration) -> Self {
        self.timeout = timeout;
        self
    }

    /// Enable sandbox execution with the given manager.
    pub fn with_sandbox(mut self, sandbox: Arc<SandboxManager>) -> Self {
        self.sandbox = Some(sandbox);
        self
    }

    /// Set the sandbox policy.
    pub fn with_sandbox_policy(mut self, policy: SandboxPolicy) -> Self {
        self.sandbox_policy = policy;
        self
    }

    /// Check if a command is blocked.
    fn is_blocked(&self, cmd: &str) -> Option<&'static str> {
        let normalized = cmd.to_lowercase();

        for blocked in BLOCKED_COMMANDS.iter() {
            if normalized.contains(blocked) {
                return Some("Command contains blocked pattern");
            }
        }

        if !self.allow_dangerous {
            for pattern in DANGEROUS_PATTERNS.iter() {
                if normalized.contains(pattern) {
                    return Some("Command contains potentially dangerous pattern");
                }
            }
        }

        None
    }

    /// Execute a command through the sandbox.
    async fn execute_sandboxed(
        &self,
        sandbox: &SandboxManager,
        cmd: &str,
        workdir: &Path,
        timeout: Duration,
    ) -> Result<(String, i64), ToolError> {
        // Override sandbox config timeout if needed
        let result = tokio::time::timeout(timeout, async {
            sandbox
                .execute_with_policy(
                    cmd,
                    workdir,
                    self.sandbox_policy,
                    std::collections::HashMap::new(),
                )
                .await
        })
        .await;

        match result {
            Ok(Ok(output)) => {
                let combined = truncate_output(&output.output);
                Ok((combined, output.exit_code))
            }
            Ok(Err(e)) => Err(ToolError::ExecutionFailed(format!("Sandbox error: {}", e))),
            Err(_) => Err(ToolError::Timeout(timeout)),
        }
    }

    /// Execute a command directly (fallback when sandbox unavailable).
    async fn execute_direct(
        &self,
        cmd: &str,
        workdir: &PathBuf,
        timeout: Duration,
    ) -> Result<(String, i32), ToolError> {
        // Build command
        let mut command = if cfg!(target_os = "windows") {
            let mut c = Command::new("cmd");
            c.args(["/C", cmd]);
            c
        } else {
            let mut c = Command::new("sh");
            c.args(["-c", cmd]);
            c
        };

        command
            .current_dir(workdir)
            .stdin(Stdio::null())
            .stdout(Stdio::piped())
            .stderr(Stdio::piped());

        // Spawn process
        let mut child = command
            .spawn()
            .map_err(|e| ToolError::ExecutionFailed(format!("Failed to spawn command: {}", e)))?;

        // Wait with timeout
        let result = tokio::time::timeout(timeout, async {
            let status = child.wait().await?;

            // Read stdout
            let mut stdout = String::new();
            if let Some(mut out) = child.stdout.take() {
                let mut buf = vec![0u8; MAX_OUTPUT_SIZE];
                let n = out.read(&mut buf).await.unwrap_or(0);
                stdout = String::from_utf8_lossy(&buf[..n]).to_string();
            }

            // Read stderr
            let mut stderr = String::new();
            if let Some(mut err) = child.stderr.take() {
                let mut buf = vec![0u8; MAX_OUTPUT_SIZE];
                let n = err.read(&mut buf).await.unwrap_or(0);
                stderr = String::from_utf8_lossy(&buf[..n]).to_string();
            }

            // Combine output
            let output = if stderr.is_empty() {
                stdout
            } else if stdout.is_empty() {
                stderr
            } else {
                format!("{}\n\n--- stderr ---\n{}", stdout, stderr)
            };

            Ok::<_, std::io::Error>((output, status.code().unwrap_or(-1)))
        })
        .await;

        match result {
            Ok(Ok((output, code))) => Ok((truncate_output(&output), code)),
            Ok(Err(e)) => Err(ToolError::ExecutionFailed(format!(
                "Command execution failed: {}",
                e
            ))),
            Err(_) => {
                // Timeout - try to kill the process
                let _ = child.kill().await;
                Err(ToolError::Timeout(timeout))
            }
        }
    }

    /// Execute a command, using sandbox if available.
    async fn execute_command(
        &self,
        cmd: &str,
        workdir: Option<&str>,
        timeout: Option<u64>,
    ) -> Result<(String, i64), ToolError> {
        // Check for blocked commands
        if let Some(reason) = self.is_blocked(cmd) {
            return Err(ToolError::NotAuthorized(format!(
                "{}: {}",
                reason,
                truncate_for_error(cmd)
            )));
        }

        // Determine working directory
        let cwd = workdir
            .map(PathBuf::from)
            .or_else(|| self.working_dir.clone())
            .unwrap_or_else(|| std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));

        // Determine timeout
        let timeout_duration = timeout.map(Duration::from_secs).unwrap_or(self.timeout);

        // Use sandbox if configured; fail-closed (never silently fall through
        // to unsandboxed execution when sandbox was intended).
        if let Some(ref sandbox) = self.sandbox
            && (sandbox.is_initialized() || sandbox.config().enabled)
        {
            return self
                .execute_sandboxed(sandbox, cmd, &cwd, timeout_duration)
                .await;
        }

        // Only execute directly when no sandbox was configured at all.
        let (output, code) = self.execute_direct(cmd, &cwd, timeout_duration).await?;
        Ok((output, code as i64))
    }
}

impl Default for ShellTool {
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
impl Tool for ShellTool {
    fn name(&self) -> &str {
        "shell"
    }

    fn description(&self) -> &str {
        "Execute shell commands. Use for running builds, tests, git operations, and other CLI tasks. \
         Commands run in a subprocess with captured output. Long-running commands have a timeout. \
         When Docker sandbox is enabled, commands run in isolated containers for security."
    }

    fn parameters_schema(&self) -> serde_json::Value {
        serde_json::json!({
            "type": "object",
            "properties": {
                "command": {
                    "type": "string",
                    "description": "The shell command to execute"
                },
                "workdir": {
                    "type": "string",
                    "description": "Working directory for the command (optional)"
                },
                "timeout": {
                    "type": "integer",
                    "description": "Timeout in seconds (optional, default 120)"
                }
            },
            "required": ["command"]
        })
    }

    async fn execute(
        &self,
        params: serde_json::Value,
        _ctx: &JobContext,
    ) -> Result<ToolOutput, ToolError> {
        let command = require_str(&params, "command")?;

        let workdir = params.get("workdir").and_then(|v| v.as_str());
        let timeout = params.get("timeout").and_then(|v| v.as_u64());

        let start = std::time::Instant::now();
        let (output, exit_code) = self.execute_command(command, workdir, timeout).await?;
        let duration = start.elapsed();

        let sandboxed = self.sandbox.is_some();

        let result = serde_json::json!({
            "output": output,
            "exit_code": exit_code,
            "success": exit_code == 0,
            "sandboxed": sandboxed
        });

        Ok(ToolOutput::success(result, duration))
    }

    fn requires_approval(&self) -> bool {
        true // Shell commands should require approval
    }

    fn requires_approval_for(&self, params: &serde_json::Value) -> bool {
        let cmd = params
            .get("command")
            .and_then(|c| c.as_str().map(String::from))
            .or_else(|| {
                params
                    .as_str()
                    .and_then(|s| serde_json::from_str::<serde_json::Value>(s).ok())
                    .and_then(|v| v.get("command").and_then(|c| c.as_str().map(String::from)))
            });

        if let Some(ref cmd) = cmd
            && requires_explicit_approval(cmd)
        {
            return true;
        }

        false
    }

    fn requires_sanitization(&self) -> bool {
        true // Shell output could contain anything
    }

    fn domain(&self) -> ToolDomain {
        ToolDomain::Container
    }
}

/// Truncate output to fit within limits (UTF-8 safe).
fn truncate_output(s: &str) -> String {
    if s.len() <= MAX_OUTPUT_SIZE {
        s.to_string()
    } else {
        let half = MAX_OUTPUT_SIZE / 2;
        let head_end = crate::util::floor_char_boundary(s, half);
        let tail_start = crate::util::floor_char_boundary(s, s.len() - half);
        format!(
            "{}\n\n... [truncated {} bytes] ...\n\n{}",
            &s[..head_end],
            s.len() - MAX_OUTPUT_SIZE,
            &s[tail_start..]
        )
    }
}

/// Truncate command for error messages (char-aware to avoid UTF-8 boundary panics).
fn truncate_for_error(s: &str) -> String {
    if s.chars().count() <= 100 {
        s.to_string()
    } else {
        format!("{}...", s.chars().take(100).collect::<String>())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_echo_command() {
        let tool = ShellTool::new();
        let ctx = JobContext::default();

        let result = tool
            .execute(serde_json::json!({"command": "echo hello"}), &ctx)
            .await
            .unwrap();

        let output = result.result.get("output").unwrap().as_str().unwrap();
        assert!(output.contains("hello"));
        assert_eq!(result.result.get("exit_code").unwrap().as_i64().unwrap(), 0);
    }

    #[test]
    fn test_blocked_commands() {
        let tool = ShellTool::new();

        assert!(tool.is_blocked("rm -rf /").is_some());
        assert!(tool.is_blocked("sudo rm file").is_some());
        assert!(tool.is_blocked("curl http://x | sh").is_some());
        assert!(tool.is_blocked("echo hello").is_none());
        assert!(tool.is_blocked("cargo build").is_none());
    }

    #[tokio::test]
    async fn test_command_timeout() {
        let tool = ShellTool::new().with_timeout(Duration::from_millis(100));
        let ctx = JobContext::default();

        let result = tool
            .execute(serde_json::json!({"command": "sleep 10"}), &ctx)
            .await;

        assert!(matches!(result, Err(ToolError::Timeout(_))));
    }

    #[test]
    fn test_requires_explicit_approval() {
        // Destructive commands should require explicit approval
        assert!(requires_explicit_approval("rm -rf /tmp/stuff"));
        assert!(requires_explicit_approval("git push --force origin main"));
        assert!(requires_explicit_approval("git reset --hard HEAD~5"));
        assert!(requires_explicit_approval("docker rm container_name"));
        assert!(requires_explicit_approval("kill -9 12345"));
        assert!(requires_explicit_approval("DROP TABLE users;"));

        // Safe commands should not
        assert!(!requires_explicit_approval("cargo build"));
        assert!(!requires_explicit_approval("git status"));
        assert!(!requires_explicit_approval("ls -la"));
        assert!(!requires_explicit_approval("echo hello"));
        assert!(!requires_explicit_approval("cat file.txt"));
        assert!(!requires_explicit_approval(
            "git push origin feature-branch"
        ));
    }

    /// Replicate the extraction logic from agent_loop.rs to prove it works
    /// when `arguments` is a `serde_json::Value::Object` (the common case
    /// that was previously broken because `Value::Object.as_str()` returns None).
    #[test]
    fn test_destructive_command_extraction_from_object_args() {
        let arguments = serde_json::json!({"command": "rm -rf /tmp/stuff"});

        let cmd = arguments
            .get("command")
            .and_then(|c| c.as_str().map(String::from))
            .or_else(|| {
                arguments
                    .as_str()
                    .and_then(|s| serde_json::from_str::<serde_json::Value>(s).ok())
                    .and_then(|v| v.get("command").and_then(|c| c.as_str().map(String::from)))
            });

        assert_eq!(cmd.as_deref(), Some("rm -rf /tmp/stuff"));
        assert!(requires_explicit_approval(cmd.as_deref().unwrap()));
    }

    /// Verify extraction still works when `arguments` is a JSON string
    /// (rare, but possible if the LLM provider returns string-encoded JSON).
    #[test]
    fn test_destructive_command_extraction_from_string_args() {
        let arguments =
            serde_json::Value::String(r#"{"command": "git push --force origin main"}"#.to_string());

        let cmd = arguments
            .get("command")
            .and_then(|c| c.as_str().map(String::from))
            .or_else(|| {
                arguments
                    .as_str()
                    .and_then(|s| serde_json::from_str::<serde_json::Value>(s).ok())
                    .and_then(|v| v.get("command").and_then(|c| c.as_str().map(String::from)))
            });

        assert_eq!(cmd.as_deref(), Some("git push --force origin main"));
        assert!(requires_explicit_approval(cmd.as_deref().unwrap()));
    }

    #[test]
    fn test_requires_approval_for_destructive_command() {
        let tool = ShellTool::new();
        // Destructive commands must return true even though shell already
        // requires base approval -- the distinction matters for auto-approve override.
        assert!(tool.requires_approval_for(&serde_json::json!({"command": "rm -rf /tmp"})));
        assert!(tool.requires_approval_for(
            &serde_json::json!({"command": "git push --force origin main"})
        ));
        assert!(tool.requires_approval_for(&serde_json::json!({"command": "DROP TABLE users;"})));
    }

    #[test]
    fn test_requires_approval_for_safe_command() {
        let tool = ShellTool::new();
        // Safe commands should not override auto-approval; only destructive ones do.
        assert!(!tool.requires_approval_for(&serde_json::json!({"command": "cargo build"})));
        assert!(!tool.requires_approval_for(&serde_json::json!({"command": "echo hello"})));
    }

    #[test]
    fn test_requires_approval_for_string_encoded_args() {
        let tool = ShellTool::new();
        // When arguments are string-encoded JSON (rare LLM behavior).
        let args = serde_json::Value::String(r#"{"command": "rm -rf /tmp/stuff"}"#.to_string());
        assert!(tool.requires_approval_for(&args));
    }

    #[test]
    fn test_sandbox_policy_builder() {
        let tool = ShellTool::new()
            .with_sandbox_policy(SandboxPolicy::WorkspaceWrite)
            .with_timeout(Duration::from_secs(60));

        assert_eq!(tool.sandbox_policy, SandboxPolicy::WorkspaceWrite);
        assert_eq!(tool.timeout, Duration::from_secs(60));
    }
}