codescout 0.15.0

//! Core command execution logic for run_command.

use std::path::Path;

use serde_json::{json, Value};

use super::super::{RecoverableError, ToolContext};
use super::output::handle_successful_output;

/// RAII guard: deletes a named temp file when dropped.
pub(crate) struct TmpfileGuard(pub(crate) String);

impl Drop for TmpfileGuard {
    fn drop(&mut self) {
        let _ = std::fs::remove_file(&self.0);
    }
}

/// RAII guard: aborts a spawned task when dropped.
struct AbortOnDrop(tokio::task::JoinHandle<()>);

impl Drop for AbortOnDrop {
    fn drop(&mut self) {
        self.0.abort();
    }
}

/// Guard that SIGKILLs a background child if dropped while armed. Used during
/// the `spawn_background_command` warm-up window so that a cancelled tool
/// future does not leave orphaned processes behind.
struct BackgroundKillGuard {
    pid: Option<u32>,
    disarmed: bool,
}

impl Drop for BackgroundKillGuard {
    fn drop(&mut self) {
        if self.disarmed {
            return;
        }
        if let Some(pid) = self.pid {
            #[cfg(unix)]
            // SAFETY: libc::kill with a PID obtained from a child we just spawned,
            // SIGKILL is safe to send. Worst case the PID was reaped and we
            // kill nothing (ESRCH), which is a no-op.
            unsafe {
                libc::kill(pid as libc::pid_t, libc::SIGKILL);
            }
            #[cfg(windows)]
            {
                // Win32 TerminateProcess (forced, ≈ `taskkill /F`) with no child
                // spawn. Spawning taskkill here would stall under EDR at the worst
                // possible moment — a cancellation Drop — defeating the guard's
                // cancel-fast intent. Mirrors the no-spawn unix SIGKILL arm above.
                let _ = crate::platform::terminate_process(pid);
            }
        }
    }
}

fn resolve_work_dir(root: &Path, cwd_param: Option<&str>) -> anyhow::Result<std::path::PathBuf> {
    if let Some(rel) = cwd_param {
        let candidate = root.join(rel);
        let canonical = candidate.canonicalize().map_err(|e| {
            RecoverableError::with_hint(
                format!("cwd '{}' is not a valid directory: {}", rel, e),
                "Provide a relative path to an existing subdirectory of the project.",
            )
        })?;
        let canonical_root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
        let tmp = crate::platform::temp_dir();
        let canonical_tmp = tmp.canonicalize().unwrap_or(tmp);
        let under_project = canonical.starts_with(canonical_root.as_path());
        let under_tmp = canonical.starts_with(canonical_tmp.as_path());
        if !under_project && !under_tmp {
            return Err(RecoverableError::with_hint(
                format!("cwd '{}' escapes project root", rel),
                "The cwd must be a subdirectory within the project, or a path under the \
                 platform temp directory.",
            )
            .into());
        }
        Ok(canonical)
    } else {
        Ok(root.to_path_buf())
    }
}

async fn spawn_background_command(
    resolved_command: &str,
    work_dir: &Path,
    ctx: &ToolContext,
) -> anyhow::Result<Value> {
    let log_tmp = tempfile::Builder::new()
        .prefix("codescout-bg-")
        .suffix(".log")
        .tempfile()?;
    let log_path = log_tmp.path().to_path_buf();
    let (log_file, _) = log_tmp.keep()?;
    let log_stderr = log_file.try_clone()?;

    let mut cmd = crate::platform::shell_command_configured(resolved_command);
    let child = cmd
        .current_dir(work_dir)
        .stdin(std::process::Stdio::null())
        .stdout(std::process::Stdio::from(log_file))
        .stderr(std::process::Stdio::from(log_stderr))
        .spawn()?;

    // Cancel-aware warm-up: during the 5s window we hold a guard that
    // SIGKILLs the child if this future is dropped (tool cancellation).
    // After the window elapses normally the guard disarms, the tokio
    // Child handle is dropped, and the process runs detached.
    let pid = child.id();
    drop(child);
    let mut kill_guard = BackgroundKillGuard {
        pid,
        disarmed: false,
    };

    tokio::time::sleep(std::time::Duration::from_secs(5)).await;
    kill_guard.disarmed = true;

    let log_content = std::fs::read_to_string(&log_path).unwrap_or_default();
    let tail_50: String = {
        let lines: Vec<&str> = log_content.lines().collect();
        let start = lines.len().saturating_sub(50);
        lines[start..].join("\n")
    };

    let ref_id = ctx.output_buffer.store_background(log_path);

    let mut bg_result = serde_json::json!({
        "output_id": ref_id,
        "hint": format!(
            "Process running. Output captured in {} — use run_command(\"tail -50 {}\") or grep/cat as needed.",
            ref_id, ref_id
        )
    });
    if !tail_50.is_empty() {
        bg_result["stdout"] = json!(tail_50);
    }
    Ok(bg_result)
}

fn inject_tee(
    resolved_command: &str,
    buffer_only: bool,
) -> anyhow::Result<(String, Option<TmpfileGuard>)> {
    use super::super::command_summary::detect_terminal_filter;
    if buffer_only {
        return Ok((resolved_command.to_string(), None));
    }
    if let Some(pipe_pos) = detect_terminal_filter(resolved_command) {
        // Use tempfile::NamedTempFile for unpredictable path (SF-3).
        // persist() converts it to a regular file we manage via TmpfileGuard.
        let named = tempfile::Builder::new()
            .prefix("codescout-unfiltered-")
            .tempfile()?;
        let tmppath = named.into_temp_path();
        let tmpfile = tmppath.to_string_lossy().to_string();
        // Keep the file on disk — TmpfileGuard handles cleanup.
        tmppath.keep()?;
        // Safety (SF-4): the path is generated by tempfile under $TMPDIR
        // and contains only alphanumeric chars, hyphens, and dots — no
        // shell metacharacters.
        if !tmpfile
            .chars()
            .all(|c| c.is_alphanumeric() || c == '/' || c == '-' || c == '_' || c == '.')
        {
            return Err(RecoverableError::new(format!(
                "temporary file path contains unexpected characters: {}",
                tmpfile,
            ))
            .into());
        }
        let cmd = format!(
            "{} | tee {} | {}",
            resolved_command[..pipe_pos].trim_end(),
            tmpfile,
            resolved_command[pipe_pos + 1..].trim_start()
        );
        Ok((cmd, Some(TmpfileGuard(tmpfile))))
    } else {
        Ok((resolved_command.to_string(), None))
    }
}

#[allow(dead_code)] // Kept as safety net for byte-level shell_output_limit_bytes config.
pub(crate) fn truncate_output(output: &str, limit: usize) -> (String, bool) {
    if output.len() > limit {
        let safe_end = crate::tools::floor_char_boundary(output, limit);
        (
            format!(
                "{}\n... (truncated, showing first {} of {} bytes)",
                &output[..safe_end],
                safe_end,
                output.len()
            ),
            true,
        )
    } else {
        (output.to_string(), false)
    }
}

/// Classify a command into a known-slow bucket. Returns a short label used
/// to tailor the timeout-error hint. Conservative — only matches strong
/// signals (test runners, build systems, ETL/training pipelines) so the
/// generic fallback hint still fires for ad-hoc commands.
///
/// Source for the patterns: docs/usage-reports/2026-05-27-usage-analysis.md
/// query F (slow run_commands) — the top-15 timeout offenders.
pub(crate) fn classify_slow_command(cmd: &str) -> Option<&'static str> {
    let lower = cmd.to_lowercase();
    if lower.contains("pytest")
        || lower.contains("cargo test")
        || lower.contains("npm test")
        || lower.contains("pnpm test")
        || lower.contains("yarn test")
        || lower.contains("tox ")
    {
        return Some("test suite");
    }
    if lower.contains("cargo build")
        || lower.contains("npm run build")
        || lower.contains("pnpm build")
        || lower.contains("yarn build")
        || lower.contains("make ")
        || lower.contains("gradle ")
        || lower.contains("./gradlew")
        || lower.contains("mvn ")
        || lower.contains("./scripts/build")
        || lower.contains("docker build")
    {
        return Some("build");
    }
    if lower.contains(" ingest")
        || lower.contains(" eval")
        || lower.contains("benchmark")
        || lower.contains(" training")
        || lower.contains(" train ")
    {
        return Some("ETL/eval/training");
    }
    if lower.contains("uv run python")
        || lower.contains("python -m ")
        || lower.contains("python scripts/")
    {
        return Some("python script");
    }
    None
}

#[allow(clippy::too_many_arguments)]
pub(crate) async fn run_command_inner(
    original_command: &str,
    resolved_command: &str,
    timeout_secs: u64,
    acknowledge_risk: bool,
    cwd_param: Option<&str>,
    buffer_only: bool,
    run_in_background: bool,
    root: &Path,
    security: &crate::util::path_security::PathSecurityConfig,
    ctx: &ToolContext,
) -> anyhow::Result<Value> {
    use crate::util::path_security::is_dangerous_command;

    // --- Step 2: Dangerous command gate ---
    // Order: (a) acknowledge_risk bypass → (b) pending_ack two-round-trip fallback.
    if !buffer_only && !acknowledge_risk {
        // Use resolved_command (with @refs substituted) so buffer-only grep/awk
        // commands don't get flagged for patterns in the buffer content.
        if let Some(reason) = is_dangerous_command(resolved_command, security) {
            let handle = ctx.output_buffer.store_dangerous(
                resolved_command.to_string(),
                cwd_param.map(str::to_string),
                timeout_secs,
            );
            return Ok(serde_json::json!({
                "pending_ack": handle,
                "reason": reason,
                "hint": format!("run_command(\"{handle}\") to execute")
            }));
        }
    }

    // --- Step 2.5: Source file access block ---
    if !buffer_only && !acknowledge_risk {
        if let Some(hint) = crate::util::path_security::check_source_file_access(resolved_command) {
            return Err(RecoverableError::with_hint(
                "shell access to source files is blocked",
                &hint,
            )
            .into());
        }
    }

    // --- Step 3: Shell command mode check (skip for buffer-only queries) ---
    if !buffer_only {
        match security.shell_command_mode.as_str() {
            "disabled" => {
                return Err(RecoverableError::with_hint(
                    "shell commands are disabled",
                    "Set security.shell_command_mode = \"warn\" or \"unrestricted\" in .codescout/project.toml",
                ).into());
            }
            "unrestricted" | "warn" | "" => {} // allowed
            other => {
                return Err(RecoverableError::with_hint(
                    format!("unknown shell_command_mode: '{}'", other),
                    "Use \"warn\", \"unrestricted\", or \"disabled\".",
                )
                .into());
            }
        }
    }

    // --- Step 4: Resolve working directory ---
    let work_dir = resolve_work_dir(root, cwd_param)?;

    // --- Step 4.7: Background spawn with warm return ---
    if run_in_background {
        if buffer_only {
            return Err(RecoverableError::with_hint(
                "run_in_background cannot be used with buffer queries",
                "Remove run_in_background, or run the query as a plain command without @ref interpolation.",
            )
            .into());
        }
        return spawn_background_command(resolved_command, &work_dir, ctx).await;
    }

    // --- Step 4.5: Tee injection for terminal filter commands ---
    // When the last pipe stage is a known filter (grep, head, tail, sed, awk, etc.),
    // inject `tee /tmp/codescout-unfiltered-XXXX` before the filter so the caller
    // can surface the unfiltered stream as a buffer ref without re-running the command.
    let (effective_command, unfiltered_tmpfile) = inject_tee(resolved_command, buffer_only)?;

    // --- Step 5: Execute command ---
    // On Unix we spawn into a new process group (process_group(0) → PGID = child PID)
    // so killpg() can reap the entire tree on timeout.  Without this, dropping the tokio
    // future orphans curl/grep/tee/head and they keep running until the download finishes.
    //
    // `kill_on_drop(true)` is the cancellation lifeline: when the rmcp request is
    // cancelled (user pressed Escape), call_tool_inner drops the tool future, which
    // drops `child_output_fut`, which drops the `Child` — and tokio then SIGKILLs the
    // immediate child.  We *also* keep the timeout-path killpg() below for the case
    // where the future isn't dropped: SIGKILL on the lone shell wouldn't propagate to
    // the pipeline (curl, grep, tee, etc.), but killpg() reaps the whole group.
    //
    // We also reset SIGPIPE to SIG_DFL in pre_exec.  Claude Code's Node.js parent sets
    // SIGPIPE=SIG_IGN; every spawned process inherits it.  With SIG_IGN, a `| head -N`
    // pipeline never terminates via SIGPIPE: tee ignores the broken pipe from head and
    // keeps draining curl's output into the tmpfile until the download completes.
    #[cfg(unix)]
    let (child_output_fut, child_pgid) = {
        // Base config (sh -c, GIT_PAGER, process_group(0), SIGPIPE reset,
        // stdin=null) comes from the shared platform builder; foreground only
        // adds piped stdio + kill_on_drop.
        let mut cmd = crate::platform::shell_command_configured(&effective_command);
        cmd.current_dir(&work_dir)
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
            .kill_on_drop(true); // SIGKILL on Drop — reaps shell on cancel
        let child = cmd.spawn()?;
        let pgid: Option<i32> = child.id().map(|id| id as i32);
        // Drop guard: if the future is cancelled, we want the *entire pipeline*
        // killed — not just the shell. tokio's kill_on_drop only SIGKILLs the
        // immediate child; killpg() walks the whole process group. We attach
        // the guard to the future so its Drop runs on cancellation.
        let pgid_for_guard = pgid;
        let fut: std::pin::Pin<
            Box<dyn std::future::Future<Output = std::io::Result<std::process::Output>> + Send>,
        > = Box::pin(async move {
            struct PgidKillGuard(Option<i32>);
            impl Drop for PgidKillGuard {
                fn drop(&mut self) {
                    if let Some(pgid) = self.0 {
                        // SAFETY: pgid was created with process_group(0); SIGKILL is
                        // safe to send to our own group. No-op if already reaped.
                        unsafe { libc::killpg(pgid, libc::SIGKILL) };
                    }
                }
            }
            let mut guard = PgidKillGuard(pgid_for_guard);
            let result = child.wait_with_output().await;
            // Successful completion: disarm the guard by clearing the pgid so
            // the Drop impl sees None and skips the SIGKILL.
            guard.0 = None;
            result
        });
        (fut, pgid)
    };

    // `_child_pgid` is unused on Windows — the process-group kill in the timeout
    // arm is `#[cfg(unix)]` only. The Unix branch above binds it as `child_pgid`.
    #[cfg(windows)]
    let (child_output_fut, _child_pgid) = {
        // Capture stdout/stderr via temp files instead of pipes, and wait on the
        // *process* rather than on pipe EOF.
        //
        // On Windows a grandchild — git's helper exe, a Python subprocess, or an
        // EDR/AV DLL injected into the child — inherits the write end of a
        // captured pipe and can hold it open after our direct child has exited.
        // `.output()` waits for pipe EOF, so it never returns even though the
        // command finished. (The timeout hint below — "output() never gets EOF" —
        // is exactly this failure.) Redirecting to files and waiting on the
        // process means a lingering grandchild keeping a file handle open does
        // not block our read.
        //
        // The cmd /C invocation, verbatim raw_arg cmdline (no MSVC-CRT quote
        // mangling — see the `cmd /?` outer-quote rule), GIT_PAGER, and stdin=NUL
        // all come from the shared platform builder
        // (`platform::shell_command_configured` / `build_windows_cmdline`).
        // Foreground only adds the file-capture stdio + kill_on_drop.
        let out_tmp = tempfile::Builder::new()
            .prefix("codescout-cmd-out-")
            .tempfile()?;
        let err_tmp = tempfile::Builder::new()
            .prefix("codescout-cmd-err-")
            .tempfile()?;
        let (out_file, out_path) = out_tmp.keep()?;
        let (err_file, err_path) = err_tmp.keep()?;
        // Establish cleanup guards BEFORE spawn so an early `?` (spawn failure)
        // still deletes the just-`keep()`d temp files. They are moved into the
        // future below, where they also drop on normal completion and on
        // future-drop (timeout / cancellation).
        let out_guard = TmpfileGuard(out_path.to_string_lossy().into_owned());
        let err_guard = TmpfileGuard(err_path.to_string_lossy().into_owned());

        let mut cmd = crate::platform::shell_command_configured(&effective_command);
        cmd.current_dir(&work_dir)
            .stdout(std::process::Stdio::from(out_file))
            .stderr(std::process::Stdio::from(err_file))
            .kill_on_drop(true); // SIGKILLs cmd on cancel/timeout (future drop)
        let mut child = cmd.spawn()?;

        let fut: std::pin::Pin<
            Box<dyn std::future::Future<Output = std::io::Result<std::process::Output>> + Send>,
        > = Box::pin(async move {
            // Guards (created before spawn) move in here; they drop on normal
            // completion *and* on future-drop (timeout / cancellation).
            let _out_guard = out_guard;
            let _err_guard = err_guard;
            let status = child.wait().await?;
            let stdout = std::fs::read(&out_path).unwrap_or_default();
            let stderr = std::fs::read(&err_path).unwrap_or_default();
            Ok(std::process::Output {
                status,
                stdout,
                stderr,
            })
        });
        (fut, None::<i32>)
    };

    // Heartbeat: send elapsed-seconds progress every 3s while the command runs.
    // AbortOnDrop guarantees the task is cancelled even when early `return`s fire.
    let progress_clone = ctx.progress.clone();
    let _heartbeat = AbortOnDrop(tokio::spawn(async move {
        let start = std::time::Instant::now();
        loop {
            tokio::time::sleep(std::time::Duration::from_secs(3)).await;
            if let Some(p) = &progress_clone {
                let elapsed = start.elapsed().as_secs();
                p.report_text(&format!("{}s elapsed", elapsed)).await;
            }
        }
    }));

    match tokio::time::timeout(
        std::time::Duration::from_secs(timeout_secs),
        child_output_fut,
    )
    .await
    {
        Ok(Ok(output)) => {
            handle_successful_output(
                original_command,
                String::from_utf8_lossy(&output.stdout).into_owned(),
                String::from_utf8_lossy(&output.stderr).into_owned(),
                output.status.code().unwrap_or(-1),
                buffer_only,
                unfiltered_tmpfile,
                ctx,
            )
            .await
        }
        Ok(Err(e)) => Err(RecoverableError::new(format!("command execution error: {}", e)).into()),
        Err(_) => {
            // Kill the entire process group so orphaned children (curl, grep, tee, etc.)
            // are reaped immediately rather than running to completion in the background.
            #[cfg(unix)]
            if let Some(pgid) = child_pgid {
                // SAFETY: pgid is the process group we created with process_group(0) above.
                // killpg with SIGKILL is the only reliable way to stop the whole pipeline
                // tree (sh + curl + grep + tee + head) in one shot.
                unsafe { libc::killpg(pgid, libc::SIGKILL) };
            }
            let next_timeout = timeout_secs.saturating_mul(3).max(300);
            let hint = match classify_slow_command(original_command) {
                Some(label) => format!(
                    "Looks like a {label} command (long-running by nature). \
                     Two options: (1) re-run with run_in_background: true — returns \
                     immediately, output streams to a log file you can tail/grep; \
                     (2) re-run with a higher timeout_secs (current: {timeout_secs}s; \
                     try {next_timeout}s). If the command launches background processes \
                     (with &), prefer run_in_background — shell & leaves them holding \
                     the stdout pipe open."
                ),
                None => format!(
                    "Command exceeded {timeout_secs}s. If it launches background \
                     processes (e.g. with &), use run_in_background: true — shell & \
                     leaves background processes holding the stdout pipe open, so \
                     output() never gets EOF. run_in_background spawns via a log file \
                     instead and returns immediately. For genuinely slow commands, \
                     pass a higher timeout_secs (try {next_timeout}s)."
                ),
            };
            Ok(json!({
                "timed_out": true,
                "stderr": format!("Command timed out after {} seconds", timeout_secs),
                "exit_code": null,
                "hint": hint,
            }))
        }
    }
}

/// Returns true when `command` is a bare `@ack_<8hex>` handle.
pub(crate) fn looks_like_ack_handle(command: &str) -> bool {
    let s = command.trim();
    if !s.starts_with("@ack_") {
        return false;
    }
    let suffix = &s[5..]; // after "@ack_"
    suffix.len() == 8 && suffix.chars().all(|c| c.is_ascii_hexdigit())
}