repotoire 0.7.1

//! Hook runtime — invoked by Claude Code via PreToolUse hook.
//!
//! Fail-open philosophy: ANY internal error must result in `ExitCode::SUCCESS` with no stdout.
//! The user must always be able to commit, even if the hook is broken.
//!
//! IMPORTANT: We emit the PreToolUse-specific `hookSpecificOutput` schema (per
//! code.claude.com/docs/en/hooks), NOT the legacy/common form
//! `{decision: "block", reason: "..."}`. Both forms appear in different Claude Code
//! docs (anthropics/claude-code#19115) but only the form below works for PreToolUse deny.
//! Do not "fix" this back to the legacy form.

use std::process::ExitCode;
use std::sync::LazyLock;

use regex::Regex;

/// Matches `git commit` and variants where `commit` is the git subcommand:
///   - `git commit`, `git commit -am 'x'`, `git commit --amend`
///   - `git -c user.email=x commit ...` (config-override flags between git and commit)
///   - `git checkout && git commit` (anywhere in a chained command)
///   - `'git commit'` quoted inside another command (acceptable false-positive)
///
/// Rejects:
///   - `git commit-tree`, `git commit-graph` (sibling subcommands)
///   - `gitlab commit` (different binary)
///   - `git status` (no commit)
///
/// Pattern reasoning: `\bgit\b` ensures `git` is a whole word (rejects `gitlab`).
/// `[^\n]*?` non-greedy match of anything-but-newline allows arbitrary args between
/// `git` and `commit`. `\bcommit` ensures `commit` starts on a word boundary
/// (rejects `recommit`). `(?:\s|$|[^\w-])` requires `commit` to be terminated by
/// whitespace, end-of-string, or a non-word non-hyphen character (rejects
/// `commit-tree`, `commit-graph`; accepts `commit`, `commit'`, `commit;`, etc.).
///
/// Case-sensitive (matches git's own dispatch on Unix).
static GIT_COMMIT_RE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"\bgit\b[^\n]*?\bcommit(?:\s|$|[^\w-])").expect("static regex compiles")
});

/// True if `cmd` issues a `git commit` (with or without arguments, with or without
/// preceding `git -c k=v`-style flags).
pub(super) fn matches_git_commit(cmd: &str) -> bool {
    GIT_COMMIT_RE.is_match(cmd)
}

/// Maximum bytes we'll accept on stdin. Hook payloads are tiny (~1 KiB typical);
/// 1 MiB is a comfortable cap that prevents memory exhaustion on malicious input.
const MAX_STDIN_BYTES: u64 = 1024 * 1024;

/// Hook runtime entry point. ALL internal errors → exit 0 with no stdout.
pub fn run() -> ExitCode {
    match try_run() {
        Ok(()) => ExitCode::SUCCESS,
        Err(e) => {
            tracing::debug!("claude-hook run: {e:#}");
            // Optional: surface to stderr for users debugging the hook themselves.
            // Claude Code does not display stderr to users.
            eprintln!("repotoire claude-hook: {e:#}");
            ExitCode::SUCCESS
        }
    }
}

/// Internal flow. Returning Err means "fail-open silently" — the wrapper turns it into ExitCode::SUCCESS.
fn try_run() -> anyhow::Result<()> {
    use std::io::Read;

    // 1. Read stdin (capped).
    let mut buf = String::new();
    std::io::stdin()
        .lock()
        .take(MAX_STDIN_BYTES)
        .read_to_string(&mut buf)
        .map_err(|e| anyhow::anyhow!("read stdin: {e}"))?;
    let payload: serde_json::Value =
        serde_json::from_str(&buf).map_err(|e| anyhow::anyhow!("parse stdin JSON: {e}"))?;

    // 2. Cheap pre-filter: tool_name must be "Bash".
    let tool_name = payload
        .get("tool_name")
        .and_then(|v| v.as_str())
        .unwrap_or("");
    if tool_name != "Bash" {
        return Ok(());
    }

    // 3. Extract and match the command.
    let command = payload
        .get("tool_input")
        .and_then(|v| v.get("command"))
        .and_then(|v| v.as_str())
        .unwrap_or("");
    if !matches_git_commit(command) {
        return Ok(());
    }

    // 4. Read cwd from the payload (NOT the inherited subprocess cwd).
    let cwd_str = payload
        .get("cwd")
        .and_then(|v| v.as_str())
        .ok_or_else(|| anyhow::anyhow!("payload.cwd missing"))?;
    let cwd = std::path::Path::new(cwd_str);
    if !cwd.is_dir() {
        return Ok(());
    }

    // 5. Locate repo root via `git rev-parse --show-toplevel`.
    let toplevel = std::process::Command::new("git")
        .arg("-C")
        .arg(cwd)
        .args(["rev-parse", "--show-toplevel"])
        .output()
        .map_err(|e| anyhow::anyhow!("spawn git rev-parse: {e}"))?;
    if !toplevel.status.success() {
        return Ok(());
    }
    let repo_root = String::from_utf8_lossy(&toplevel.stdout).trim().to_string();
    if repo_root.is_empty() {
        return Ok(());
    }
    let repo_root = std::path::PathBuf::from(repo_root);

    // 6. Skip if no commits yet.
    let head_check = std::process::Command::new("git")
        .arg("-C")
        .arg(&repo_root)
        .args(["rev-parse", "--verify", "HEAD"])
        .output()
        .map_err(|e| anyhow::anyhow!("spawn git rev-parse HEAD: {e}"))?;
    if !head_check.status.success() {
        return Ok(());
    }

    // 7. Skip if no baseline cached.
    //    Known race: `repotoire analyze` writes baseline_findings.json from a detached
    //    cache_results thread; a hook that fires immediately after analyze (before the
    //    write completes) may see a partially-written file and parse-fail in step 8.
    //    Fail-open is the intended outcome — the user's commit goes through and the
    //    hook fires correctly on the next attempt.
    let baseline = crate::cache::paths::cache_dir(&repo_root).join("baseline_findings.json");
    if !baseline.exists() {
        return Ok(());
    }

    // 8. Compute diff with NO inline analysis and NO telemetry.
    //    Pass `all=true` so we get every new finding vs baseline regardless of
    //    hunk attribution. The hook fires before the commit, so `git diff HEAD..HEAD`
    //    (the attribution source for `all=false`) is empty by definition.
    let opts = crate::cli::diff::SmartDiffOptions {
        allow_inline_analysis: false,
        emit_telemetry: false,
    };
    let telemetry = crate::telemetry::Telemetry::Disabled;
    let result = crate::cli::diff::compute_smart_diff(
        &repo_root,
        Some("HEAD"),
        true,  // all
        false, // changed
        opts,
        &telemetry,
    )?;
    let result = match result {
        Some(r) => r,
        None => return Ok(()),
    };

    // 9. Decide.
    let crit_high = result
        .new_findings
        .iter()
        .filter(|af| {
            matches!(
                af.finding.severity,
                crate::models::Severity::Critical | crate::models::Severity::High
            )
        })
        .count();
    if crit_high == 0 {
        return Ok(());
    }

    // 10. Emit deny response on stdout.
    let reason = format_deny_reason(&result);
    let response = build_deny_response(&reason);
    println!("{}", serde_json::to_string(&response)?);
    Ok(())
}

use crate::cli::diff::SmartDiffResult;
use crate::models::Severity;

/// Maximum bullet lines we put in the deny reason (sorted by severity desc, then file).
const MAX_BULLETS: usize = 5;

fn severity_label(sev: Severity) -> &'static str {
    match sev {
        Severity::Critical => "CRITICAL",
        Severity::High => "HIGH",
        Severity::Medium => "MEDIUM",
        Severity::Low => "LOW",
        Severity::Info => "INFO",
    }
}

fn severity_rank(sev: Severity) -> u8 {
    match sev {
        Severity::Critical => 0,
        Severity::High => 1,
        Severity::Medium => 2,
        Severity::Low => 3,
        Severity::Info => 4,
    }
}

/// Build the human-readable reason string Claude Code shows in the deny dialog.
pub(super) fn format_deny_reason(result: &SmartDiffResult) -> String {
    let mut out = String::new();

    let crit_high: Vec<&crate::cli::diff::AttributedFinding> = result
        .new_findings
        .iter()
        .filter(|af| matches!(af.finding.severity, Severity::Critical | Severity::High))
        .collect();
    let n_crit = crit_high
        .iter()
        .filter(|af| af.finding.severity == Severity::Critical)
        .count();
    let n_high = crit_high.len() - n_crit;

    out.push_str(&format!(
        "Repotoire found {n_crit} critical and {n_high} high severity issues in your changes:\n"
    ));

    if let (Some(before), Some(after)) = (result.score_before, result.score_after) {
        let delta = after - before;
        out.push_str(&format!(
            "Score: {before:.1} \u{2192} {after:.1} (\u{0394} {delta:+.1})\n"
        ));
    }
    out.push('\n');

    let mut sorted: Vec<&crate::cli::diff::AttributedFinding> = crit_high;
    sorted.sort_by(|a, b| {
        let sa = severity_rank(a.finding.severity);
        let sb = severity_rank(b.finding.severity);
        sa.cmp(&sb).then_with(|| {
            let fa = a
                .finding
                .affected_files
                .first()
                .map(|p| p.display().to_string())
                .unwrap_or_default();
            let fb = b
                .finding
                .affected_files
                .first()
                .map(|p| p.display().to_string())
                .unwrap_or_default();
            fa.cmp(&fb)
        })
    });

    for af in sorted.iter().take(MAX_BULLETS) {
        let file = af
            .finding
            .affected_files
            .first()
            .map(|p| p.display().to_string())
            .unwrap_or_else(|| "<unknown>".into());
        let line = af
            .finding
            .line_start
            .map(|l| format!(":{l}"))
            .unwrap_or_default();
        out.push_str(&format!(
            "- [{}] {} ({}{})\n",
            severity_label(af.finding.severity),
            af.finding.title,
            file,
            line,
        ));
    }
    if sorted.len() > MAX_BULLETS {
        out.push_str(&format!("- ...and {} more\n", sorted.len() - MAX_BULLETS));
    }

    out.push_str("\nFix these before committing.\n");
    out
}

/// Build the JSON response Claude Code expects on stdout.
pub(super) fn build_deny_response(reason: &str) -> serde_json::Value {
    serde_json::json!({
        "hookSpecificOutput": {
            "hookEventName": "PreToolUse",
            "permissionDecision": "deny",
            "permissionDecisionReason": reason,
        }
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn matches_basic_git_commit() {
        assert!(matches_git_commit("git commit"));
    }

    #[test]
    fn matches_git_commit_with_short_flags() {
        assert!(matches_git_commit("git commit -am 'fix bug'"));
    }

    #[test]
    fn matches_git_commit_with_amend() {
        assert!(matches_git_commit("git commit --amend"));
    }

    #[test]
    fn matches_git_commit_with_config_override() {
        assert!(matches_git_commit(
            "git -c user.email=x@y.z commit -am 'fix'"
        ));
    }

    #[test]
    fn matches_git_commit_with_extra_whitespace() {
        assert!(matches_git_commit("\t  git    commit\n"));
    }

    #[test]
    fn rejects_git_commit_tree() {
        assert!(!matches_git_commit("git commit-tree foo"));
    }

    #[test]
    fn rejects_git_commit_graph() {
        assert!(!matches_git_commit("git commit-graph write"));
    }

    #[test]
    fn rejects_gitlab_commit() {
        assert!(!matches_git_commit("gitlab commit something"));
    }

    #[test]
    fn rejects_git_status() {
        assert!(!matches_git_commit("git status"));
    }

    #[test]
    fn rejects_empty_string() {
        assert!(!matches_git_commit(""));
    }

    #[test]
    fn rejects_bare_git() {
        assert!(!matches_git_commit("git"));
    }

    #[test]
    fn matches_chained_git_commit() {
        // Acceptable false-positive per spec: command-substitution chained with `git commit`.
        assert!(matches_git_commit("git checkout main && git commit"));
    }

    #[test]
    fn matches_quoted_git_commit_in_echo() {
        // Acceptable false-positive per spec.
        assert!(matches_git_commit("echo 'git commit'"));
    }

    use crate::cli::diff::{AttributedFinding, SmartDiffResult};
    use crate::cli::diff_hunks::Attribution;
    use crate::models::{Finding, Severity};
    use std::path::PathBuf;

    fn fake_finding(sev: Severity, title: &str, file: &str, line: u32) -> AttributedFinding {
        AttributedFinding {
            finding: Finding {
                detector: "fake".into(),
                title: title.into(),
                severity: sev,
                affected_files: vec![PathBuf::from(file)],
                line_start: Some(line),
                ..Default::default()
            },
            attribution: Attribution::InChangedHunk,
        }
    }

    fn fake_result(
        findings: Vec<AttributedFinding>,
        before: Option<f64>,
        after: Option<f64>,
    ) -> SmartDiffResult {
        let n = findings.len();
        SmartDiffResult {
            base_ref: "cached".into(),
            head_ref: "HEAD".into(),
            files_changed: 1,
            new_findings: findings,
            all_new_count: n,
            fixed_findings: vec![],
            score_before: before,
            score_after: after,
        }
    }

    #[test]
    fn deny_reason_truncates_to_top_5() {
        let mut findings = Vec::new();
        for i in 0..50 {
            findings.push(fake_finding(
                Severity::Critical,
                &format!("F{i}"),
                "a.rs",
                i + 1,
            ));
        }
        let r = fake_result(findings, Some(95.0), Some(90.0));
        let s = format_deny_reason(&r);
        let bullet_count = s.matches("- [CRITICAL]").count();
        assert_eq!(bullet_count, 5, "should be exactly 5 bullets, got: {s}");
        assert!(
            s.contains("...and 45 more"),
            "should mention truncated count: {s}"
        );
    }

    #[test]
    fn deny_reason_includes_score_line_when_both_set() {
        let r = fake_result(
            vec![fake_finding(Severity::Critical, "X", "a.rs", 1)],
            Some(95.0),
            Some(90.0),
        );
        let s = format_deny_reason(&r);
        assert!(s.contains("Score: 95.0"), "missing score line: {s}");
        assert!(s.contains("90.0"), "missing after-score: {s}");
        assert!(s.contains("-5.0"), "missing delta: {s}");
    }

    #[test]
    fn deny_reason_omits_score_line_when_either_missing() {
        let r = fake_result(
            vec![fake_finding(Severity::Critical, "X", "a.rs", 1)],
            None,
            Some(90.0),
        );
        let s = format_deny_reason(&r);
        assert!(!s.contains("Score:"), "should omit score line: {s}");
    }

    #[test]
    fn deny_reason_sorts_critical_before_high() {
        let r = fake_result(
            vec![
                fake_finding(Severity::High, "HighOne", "a.rs", 10),
                fake_finding(Severity::Critical, "CritOne", "a.rs", 20),
            ],
            None,
            None,
        );
        let s = format_deny_reason(&r);
        let crit_pos = s.find("CritOne").expect("CritOne present");
        let high_pos = s.find("HighOne").expect("HighOne present");
        assert!(crit_pos < high_pos, "Critical should come before High");
    }

    #[test]
    fn deny_response_has_correct_schema() {
        let v = build_deny_response("hello");
        assert_eq!(v["hookSpecificOutput"]["hookEventName"], "PreToolUse");
        assert_eq!(v["hookSpecificOutput"]["permissionDecision"], "deny");
        assert_eq!(v["hookSpecificOutput"]["permissionDecisionReason"], "hello");
    }
}