repotoire 0.8.0

//! Output and formatting functions for the analyze command
//!
//! This module contains all output-related logic:
//! - Formatting reports (text, JSON, SARIF, etc.)
//! - Filtering and pagination
//! - Caching results
//! - Threshold checks for CI/CD

use crate::models::{Finding, FindingsSummary, HealthReport, Severity};
use crate::reporters;
use anyhow::Result;
use console::style;
use std::path::{Path, PathBuf};

/// Normalize a path to be relative
fn normalize_path(path: &Path) -> String {
    let path_str = path.display().to_string();
    if let Some(stripped) = path_str.strip_prefix("/tmp/") {
        if let Some(pos) = stripped.find('/') {
            return stripped[pos + 1..].to_string();
        }
    }
    if let Ok(home) = std::env::var("HOME") {
        if let Some(stripped) = path_str.strip_prefix(&home) {
            return stripped.trim_start_matches('/').to_string();
        }
    }
    path_str
}

/// Filter findings by severity and limit
pub(crate) fn filter_findings(
    findings: &mut Vec<Finding>,
    severity: Option<Severity>,
    top: Option<usize>,
) {
    if let Some(min) = severity {
        findings.retain(|f| f.severity >= min);
    }

    findings.sort_by_key(|f| std::cmp::Reverse(f.severity));

    if let Some(n) = top {
        findings.truncate(n);
    }
}

/// Paginate findings
pub(crate) fn paginate_findings(
    mut findings: Vec<Finding>,
    page: usize,
    per_page: usize,
) -> (Vec<Finding>, Option<(usize, usize, usize, usize)>) {
    // Sort for deterministic output: severity (desc), then file, then line (#47)
    findings.sort_by(|a, b| {
        (b.severity as u8)
            .cmp(&(a.severity as u8))
            .then_with(|| {
                let a_file = a
                    .affected_files
                    .first()
                    .map(|f| f.to_string_lossy().to_string())
                    .unwrap_or_default();
                let b_file = b
                    .affected_files
                    .first()
                    .map(|f| f.to_string_lossy().to_string())
                    .unwrap_or_default();
                a_file.cmp(&b_file)
            })
            .then_with(|| a.line_start.cmp(&b.line_start))
            .then_with(|| a.detector.cmp(&b.detector))
            .then_with(|| a.title.cmp(&b.title))
    });

    let displayed_findings = findings.len();

    if per_page > 0 {
        let total_pages = displayed_findings.div_ceil(per_page);
        let page = page.max(1).min(total_pages.max(1));
        let start = (page - 1) * per_page;
        let end = (start + per_page).min(displayed_findings);
        let paginated: Vec<_> = findings[start..end].to_vec();
        (
            paginated,
            Some((page, total_pages, per_page, displayed_findings)),
        )
    } else {
        (findings, None)
    }
}

/// Inputs for [`format_and_output`]: bundles the report, finding list, and
/// the renderer-specific knobs (format, output path, pagination flags).
pub(crate) struct FormatAndOutputArgs<'a> {
    pub report: &'a HealthReport,
    pub all_findings: &'a [Finding],
    pub format: reporters::OutputFormat,
    pub output_path: Option<&'a Path>,
    pub repotoire_dir: &'a Path,
    /// Optional pagination metadata: `(page, per_page, total_pages, total_findings)`.
    pub pagination_info: Option<(usize, usize, usize, usize)>,
    /// Number of findings actually displayed on the current page. Currently
    /// reserved; consumers compute their own counts from the slice.
    pub displayed_findings: usize,
    pub no_emoji: bool,
}

/// Format and output results
pub(crate) fn format_and_output(args: FormatAndOutputArgs<'_>) -> Result<()> {
    let FormatAndOutputArgs {
        report,
        all_findings,
        format,
        output_path,
        repotoire_dir,
        pagination_info,
        displayed_findings: _displayed_findings,
        no_emoji,
    } = args;
    use reporters::OutputFormat;

    // For file-based export formats (SARIF, HTML, Markdown), use ALL findings
    // to avoid truncating to page size. Pagination is for terminal display only.
    // Use all findings for file-based exports; JSON only when writing to file (#58)
    let use_all = matches!(
        format,
        OutputFormat::Sarif | OutputFormat::Html | OutputFormat::Markdown
    ) || (format == OutputFormat::Json && output_path.is_some());
    let report_for_output = if use_all && !all_findings.is_empty() {
        let mut full_report = report.clone();
        full_report.findings = all_findings.to_vec();
        full_report.findings_summary = FindingsSummary::from_findings(all_findings);
        full_report
    } else {
        // For JSON stdout / text: ensure findings_summary matches the
        // actual findings array (which may be paginated)
        let mut r = report.clone();
        r.findings_summary = FindingsSummary::from_findings(&r.findings);
        r
    };

    let output_str = reporters::report_with_format(&report_for_output, format)?;

    // Only write to file if --output was explicitly provided (#59)
    let write_to_file = output_path.is_some();

    if write_to_file {
        let out_path = if let Some(p) = output_path {
            p.to_path_buf()
        } else {
            let ext = reporters::file_extension(format);
            repotoire_dir.join(format!("report.{}", ext))
        };

        std::fs::write(&out_path, &output_str)?;
        let file_icon = if no_emoji { "" } else { "📄 " };
        // Use stderr for machine-readable formats to keep stdout clean
        eprintln!(
            "\n{}Report written to: {}",
            style(file_icon).bold(),
            style(out_path.display()).cyan()
        );
    } else {
        // For machine-readable formats, skip leading newline to keep stdout clean
        if !matches!(format, OutputFormat::Json | OutputFormat::Sarif) {
            println!();
        }
        println!("{}", output_str);
    }

    // Cache results
    cache_results(repotoire_dir, report, all_findings)?;

    // Show pagination info (suppress for machine-readable and file-based formats)
    let quiet_mode = matches!(
        format,
        OutputFormat::Json | OutputFormat::Sarif | OutputFormat::Html | OutputFormat::Markdown
    ) || output_path.is_some();
    if let Some((current_page, total_pages, per_page, total)) =
        pagination_info.filter(|_| !quiet_mode)
    {
        let page_icon = if no_emoji { "" } else { "📑 " };
        println!(
            "\n{}Showing page {} of {} ({} findings per page, {} total)",
            style(page_icon).bold(),
            style(current_page).cyan(),
            style(total_pages).cyan(),
            style(per_page).dim(),
            style(total).cyan(),
        );
        if current_page < total_pages {
            println!(
                "   Use {} to see more",
                style(format!("--page {}", current_page + 1)).yellow()
            );
        }
    }

    Ok(())
}

/// Check if fail threshold is met
pub(crate) fn check_fail_threshold(fail_on: Option<Severity>, report: &HealthReport) -> Result<()> {
    if let Some(threshold) = fail_on {
        let should_fail = match threshold {
            Severity::Critical => report.findings_summary.critical > 0,
            Severity::High => {
                report.findings_summary.critical > 0 || report.findings_summary.high > 0
            }
            Severity::Medium => {
                report.findings_summary.critical > 0
                    || report.findings_summary.high > 0
                    || report.findings_summary.medium > 0
            }
            Severity::Low => {
                report.findings_summary.critical > 0
                    || report.findings_summary.high > 0
                    || report.findings_summary.medium > 0
                    || report.findings_summary.low > 0
            }
            Severity::Info => {
                report.findings_summary.critical > 0
                    || report.findings_summary.high > 0
                    || report.findings_summary.medium > 0
                    || report.findings_summary.low > 0
            }
        };
        if should_fail {
            // Return error instead of process::exit to allow cleanup (#19)
            anyhow::bail!("Failing due to --fail-on={} threshold", threshold);
        }
    }
    Ok(())
}

/// Outcome of attempting to load the findings cache. The cache trust
/// boundary documented in
/// docs/superpowers/specs/2026-05-11-cache-validation.md requires
/// callers to distinguish "no analysis yet" (soft-fail, normal UX)
/// from "cache exists but is corrupt" (loud user-facing warning).
///
/// The previous `Option<Vec<Finding>>` return collapsed these into
/// one shape, so commands like `status` and `diff` produced
/// confident green-checkmark output on garbage caches. This enum is
/// the structural fix.
#[derive(Debug)]
pub enum CacheLoadOutcome {
    /// The cache file does not exist. This is the legitimate
    /// no-analysis-yet first-run case. Callers should fall through
    /// to their "no cached findings" UX.
    Missing,
    /// The cache exists but was written by a different binary
    /// version. We treat this as soft-fail (matches pre-fix
    /// behavior) — the user typically just needs to re-run analyze
    /// after a `cargo install` upgrade.
    ///
    /// The `cached` and `current` strings are available for callers
    /// that want to surface the mismatch in telemetry or a
    /// `--verbose` mode; the default `status`/`diff` UX treats this
    /// as a quiet no-op.
    VersionMismatch {
        #[allow(dead_code)]
        cached: String,
        #[allow(dead_code)]
        current: &'static str,
    },
    /// The cache parsed cleanly and contains at least one valid
    /// `Finding`.
    Findings(Vec<Finding>),
    /// The cache exists but failed to load. Callers MUST surface
    /// `reason` to the user (Bug 3: the previous `tracing::warn!`-
    /// only path was invisible to anyone redirecting stderr or
    /// scanning a big green checkmark).
    Corrupt { path: PathBuf, reason: String },
}

impl CacheLoadOutcome {
    /// Format a one-line, user-facing warning for the `Corrupt`
    /// variant. Returns `None` for non-`Corrupt` outcomes so callers
    /// can `if let Some(msg) = outcome.user_warning() { println!(...) }`.
    pub fn user_warning(&self) -> Option<String> {
        match self {
            CacheLoadOutcome::Corrupt { path, reason } => Some(format!(
                "⚠ Findings cache at {} appears stale or corrupt: {}. \
                 Run `repotoire analyze` to regenerate it.",
                path.display(),
                reason
            )),
            _ => None,
        }
    }
}

/// Load the findings cache from `path` with full outcome
/// classification. See [`CacheLoadOutcome`].
pub fn load_cached_findings_outcome_from(path: &Path) -> CacheLoadOutcome {
    load_findings_from_file_outcome(path)
}

/// Shared implementation for the cache reader.
///
/// We deserialize each `Finding` via its `Deserialize` impl rather than
/// hand-picking fields. Every field on `Finding` is `#[serde(default)]`
/// so older cache files (written before any given field existed) still
/// parse cleanly, and new fields (e.g. dual-branch payload) flow
/// through without anyone having to update this reader.
///
/// Outcomes are classified per [`CacheLoadOutcome`]:
/// `Missing`, `VersionMismatch`, `Findings`, or `Corrupt`. Callers
/// branch on the outcome; in particular, `Corrupt` is the loud
/// user-facing case fixed in audit pass 2 (Bug 3 in the
/// 2026-05-11-cache-validation spec).
///
/// Semantic validation: a JSON object like `{"bogus": "x"}` will
/// deserialize successfully (every `Finding` field is `serde(default)`).
/// We catch that here via `Finding::is_valid` and surface it as
/// `Corrupt`, not as a `Findings(...)` of all-default entries.
fn load_findings_from_file_outcome(path: &Path) -> CacheLoadOutcome {
    let data = match std::fs::read_to_string(path) {
        Ok(s) => s,
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return CacheLoadOutcome::Missing,
        Err(e) => {
            let reason = format!("failed to read file: {e}");
            tracing::warn!("Findings cache at {}: {reason}", path.display());
            return CacheLoadOutcome::Corrupt {
                path: path.to_path_buf(),
                reason,
            };
        }
    };
    let json: serde_json::Value = match serde_json::from_str(&data) {
        Ok(v) => v,
        Err(e) => {
            let reason = format!("invalid JSON: {e}");
            tracing::warn!("Findings cache at {}: {reason}", path.display());
            return CacheLoadOutcome::Corrupt {
                path: path.to_path_buf(),
                reason,
            };
        }
    };

    // Version check: cache from a different binary version is
    // expected after `cargo install` and not loud-worthy.
    let cached_version = json.get("version").and_then(|v| v.as_str()).unwrap_or("");
    if cached_version != env!("CARGO_PKG_VERSION") {
        tracing::debug!(
            "Findings cache version mismatch ({} vs {}), ignoring",
            cached_version,
            env!("CARGO_PKG_VERSION")
        );
        return CacheLoadOutcome::VersionMismatch {
            cached: cached_version.to_string(),
            current: env!("CARGO_PKG_VERSION"),
        };
    }

    let findings_value = match json.get("findings") {
        Some(v) => v.clone(),
        None => {
            let reason = "missing `findings` field".to_string();
            tracing::warn!("Findings cache at {}: {reason}", path.display());
            return CacheLoadOutcome::Corrupt {
                path: path.to_path_buf(),
                reason,
            };
        }
    };
    let findings: Vec<Finding> = match serde_json::from_value(findings_value) {
        Ok(v) => v,
        Err(e) => {
            let reason = format!("findings array failed to deserialize: {e}");
            tracing::warn!("Findings cache at {}: {reason}", path.display());
            return CacheLoadOutcome::Corrupt {
                path: path.to_path_buf(),
                reason,
            };
        }
    };

    // Semantic validation: deserialization is permissive (every
    // Finding field carries `#[serde(default)]`), so a JSON object
    // like `{"bogus": "x"}` parses into an all-default Finding.
    // Reject such entries; see `Finding::is_valid`.
    let original_count = findings.len();
    let (valid, invalid): (Vec<Finding>, Vec<Finding>) =
        findings.into_iter().partition(|f| f.is_valid());
    if !invalid.is_empty() {
        for (i, bad) in invalid.iter().enumerate() {
            tracing::warn!(
                "Cache entry {} of {} at {} is invalid and will be skipped: {:?}",
                i + 1,
                original_count,
                path.display(),
                bad.validation_errors(),
            );
        }
        if valid.is_empty() {
            // Total-corruption case (Bug 1 surface): all entries are
            // semantically empty. Classify as `Corrupt` so callers
            // surface the user-facing warning on stdout.
            return CacheLoadOutcome::Corrupt {
                path: path.to_path_buf(),
                reason: format!("all {} findings failed semantic validation", original_count),
            };
        }
        // Partial-corruption: keep the valid ones and emit a stdout
        // warning so the anomaly is visible.
        println!(
            "⚠ {} of {} findings in cache at {} are invalid and were skipped. \
             Re-run `repotoire analyze` to regenerate the cache.",
            invalid.len(),
            original_count,
            path.display()
        );
    }

    tracing::debug!(
        "Loaded {} post-processed findings from {}",
        valid.len(),
        path.display()
    );
    CacheLoadOutcome::Findings(valid)
}

/// Build the JSON envelope persisted to `last_findings.json`.
///
/// We serialize each `Finding` directly via its `Serialize` impl rather
/// than hand-listing fields. This makes the cache lossless by
/// construction and drift-resistant: any field added to `Finding`
/// (including `alternative_branch` / `prediction_reasons` from the
/// dual-branch work) is automatically persisted without anyone needing
/// to remember to update this writer.
///
/// We still normalize `affected_files` so the cache snapshot stays
/// stable across `$HOME` and `/tmp/<sandbox>/` location changes (used
/// by integration test fixtures). Path normalization is intentionally
/// scoped to `affected_files`; other location-bearing fields like
/// `prediction_reasons[*].evidence` use source-relative spans, not
/// absolute paths, so they need no rewriting.
fn cache_findings_payload(all_findings: &[Finding]) -> serde_json::Value {
    let normalized: Vec<Finding> = all_findings
        .iter()
        .map(|f| {
            let mut clone = f.clone();
            clone.affected_files = f
                .affected_files
                .iter()
                .map(|p| PathBuf::from(normalize_path(p)))
                .collect();
            clone
        })
        .collect();
    serde_json::json!({
        "version": env!("CARGO_PKG_VERSION"),
        "findings": normalized,
    })
}

/// Cache analysis results for other commands.
///
/// Before writing new results, snapshots the current cache as baseline
/// for `repotoire diff` to compare against.
pub fn cache_results(
    repotoire_dir: &Path,
    report: &HealthReport,
    all_findings: &[Finding],
) -> Result<()> {
    use std::fs;

    // Snapshot current cache as diff baseline (before overwriting).
    //
    // We validate `last_findings.json` semantically before copying it to
    // `baseline_findings.json`. A byte-faithful `fs::copy` would preserve a
    // tampered/partial-write/version-mismatched cache into the baseline,
    // where it would survive forever because no code path overwrites a
    // baseline unconditionally. That manifests as the `diff` command
    // emitting cache-corruption warnings indefinitely even after a fresh
    // `analyze` (see PR #118's degraded-but-noisy retry path).
    //
    // Policy: snapshot only on `Findings(_)`. On `Corrupt` /
    // `VersionMismatch`, drop any existing baseline so subsequent diffs
    // see well-defined "first-run, no baseline" semantics instead of
    // stale-or-invalid state. Reuses `load_cached_findings_outcome_from`
    // (the same validator the read side trusts post-PR #117) so we have
    // a single source of truth for "what is a valid findings cache."
    let findings_cache = repotoire_dir.join("last_findings.json");
    let baseline_findings = repotoire_dir.join("baseline_findings.json");
    if findings_cache.exists() {
        match load_cached_findings_outcome_from(&findings_cache) {
            CacheLoadOutcome::Findings(_) => {
                let _ = fs::copy(&findings_cache, &baseline_findings);
            }
            CacheLoadOutcome::Corrupt { .. } | CacheLoadOutcome::VersionMismatch { .. } => {
                // Don't propagate bad state. A missing baseline is
                // well-defined (PR #118); a stale baseline is confusing.
                let _ = fs::remove_file(&baseline_findings);
            }
            CacheLoadOutcome::Missing => {
                // Defensive: shouldn't reach here given `findings_cache.exists()`,
                // but treat as nothing-to-snapshot.
            }
        }
    }
    let health_cache = repotoire_dir.join("last_health.json");
    if health_cache.exists() {
        // `baseline_health.json` has no semantic validator (a small fixed
        // JSON blob parsed permissively by `load_score_from`), so we
        // continue to snapshot it byte-for-byte. A stale numeric score is
        // not a UX hazard the way a stale findings list is.
        let _ = fs::copy(&health_cache, repotoire_dir.join("baseline_health.json"));
    }

    // Write both `health_score` (read by `repotoire diff` score delta) and
    // `overall_score` (read by `repotoire benchmark`) so either consumer finds
    // what it expects without a second writer.
    let health_json = serde_json::json!({
        "health_score": report.overall_score,
        "overall_score": report.overall_score,
        "structure_score": report.structure_score,
        "quality_score": report.quality_score,
        "architecture_score": report.architecture_score,
        "grade": report.grade,
        "total_files": report.total_files,
        "total_functions": report.total_functions,
        "total_classes": report.total_classes,
        "total_loc": report.total_loc,
    });
    fs::write(&health_cache, serde_json::to_string_pretty(&health_json)?)?;

    // Belt-and-suspenders: the in-memory `Finding` instances coming
    // out of postprocess should already be semantically valid. Catch
    // any pipeline bug that builds a partial `Finding` here in debug
    // builds rather than silently writing garbage that the cache
    // reader will then reject. Release builds skip the check (the
    // reader's validation is the real boundary).
    debug_assert!(
        all_findings.iter().all(|f| f.is_valid()),
        "cache_results received an invalid Finding; this is a pipeline bug. \
         Offenders: {:?}",
        all_findings
            .iter()
            .filter(|f| !f.is_valid())
            .map(|f| (f.detector.clone(), f.title.clone(), f.validation_errors()))
            .collect::<Vec<_>>(),
    );
    let findings_json = cache_findings_payload(all_findings);
    fs::write(&findings_cache, serde_json::to_string(&findings_json)?)?;

    tracing::debug!("Cached analysis results to {}", repotoire_dir.display());
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::dual_branch::{
        AlternativeBranch, BranchLabel, PredictionReason, PredictionReasonKind,
    };
    use std::collections::BTreeSet;

    /// Building a `Finding` with a populated dual-branch payload, running
    /// it through `cache_findings_payload`, then deserializing it back
    /// must yield the original `alternative_branch` and `prediction_reasons`.
    ///
    /// Regression guard: the bespoke `serde_json::json!` writer that this
    /// commit replaces silently dropped both fields, leaving every
    /// downstream cache consumer (`findings`, `fix`, `diff`) blind to
    /// the predictor's reasoning.
    #[test]
    fn cache_round_trips_dual_branch_payload() {
        let mut finding = Finding {
            id: "test-1".to_string(),
            detector: "TestDetector".to_string(),
            title: "title".to_string(),
            severity: Severity::High,
            affected_files: vec![PathBuf::from("src/test.py")],
            line_start: Some(10),
            ..Default::default()
        };
        finding.alternative_branch = Some(AlternativeBranch {
            label: BranchLabel::Benign,
            severity: Severity::Info,
            title: "Benign interpretation".to_string(),
            description: "This may be intentional".to_string(),
            suggested_fix: Some("Annotate as safe".to_string()),
        });
        finding.prediction_reasons = vec![PredictionReason {
            kind: PredictionReasonKind::KeywordArgument {
                name: "verify".to_string(),
                value: "False".to_string(),
            },
            weight: -0.4,
            note: "verify=False on a TLS call leans RealBug.".to_string(),
        }];

        let payload = cache_findings_payload(std::slice::from_ref(&finding));
        let findings: Vec<Finding> = serde_json::from_value(payload["findings"].clone())
            .expect("findings array round-trips through serde");

        assert_eq!(findings.len(), 1);
        assert_eq!(findings[0].alternative_branch, finding.alternative_branch);
        assert_eq!(findings[0].prediction_reasons, finding.prediction_reasons);
    }

    /// Field-completeness tripwire: the cache payload's key set for a
    /// single `Finding` must match the key set produced by serializing
    /// `Finding` directly through serde.
    ///
    /// If a future change adds a field to `Finding` (or removes one),
    /// the cache payload's keys will diverge from `serde_json::to_value`
    /// only if someone hand-rolled the cache writer again. This test
    /// fails loudly in that case, forcing the author to think about
    /// cache surface rather than silently dropping the new field.
    ///
    /// The point of this test is to be brittle: brittleness here is a
    /// feature, not a bug.
    #[test]
    fn cache_payload_field_set_matches_finding_serialization() {
        let finding = Finding::default();
        let payload = cache_findings_payload(std::slice::from_ref(&finding));
        let cache_keys: BTreeSet<String> = payload["findings"][0]
            .as_object()
            .expect("finding entry is a JSON object")
            .keys()
            .cloned()
            .collect();

        let direct_value = serde_json::to_value(&finding).expect("Finding serializes via serde");
        let direct_keys: BTreeSet<String> = direct_value
            .as_object()
            .expect("Finding is a JSON object")
            .keys()
            .cloned()
            .collect();

        assert_eq!(
            cache_keys, direct_keys,
            "cache payload must persist every Finding field; \
             a divergence here means someone hand-rolled JSON \
             again instead of letting serde do it"
        );
    }

    /// Spot-check that fields previously dropped by the bespoke writer
    /// (the eight non-dual-branch ones) now survive a round trip too.
    /// This guards against a partial-revert that re-introduces a
    /// hand-rolled writer with most fields back but a few still missing.
    #[test]
    fn cache_round_trips_previously_dropped_scalar_fields() {
        let finding = Finding {
            id: "test-scalars".to_string(),
            detector: "TestDetector".to_string(),
            deterministic: true,
            estimated_effort: Some("low".to_string()),
            original_severity: Some(Severity::Critical),
            status: crate::models::FindingStatus::Baselined,
            attribution: crate::models::Attribution::InChangedNode,
            ..Default::default()
        };

        let payload = cache_findings_payload(std::slice::from_ref(&finding));
        let findings: Vec<Finding> = serde_json::from_value(payload["findings"].clone())
            .expect("findings array round-trips");

        assert!(findings[0].deterministic);
        assert_eq!(findings[0].estimated_effort.as_deref(), Some("low"));
        assert_eq!(findings[0].original_severity, Some(Severity::Critical));
        assert_eq!(findings[0].status, crate::models::FindingStatus::Baselined);
        assert_eq!(
            findings[0].attribution,
            crate::models::Attribution::InChangedNode
        );
    }

    /// The cache's `affected_files` must come out path-normalized
    /// (`/tmp/<sandbox>/...` and `$HOME/...` prefixes stripped) so
    /// integration test snapshots stay stable across machines.
    /// This is the one piece of post-processing we keep on top of
    /// the otherwise-direct `Finding` serialization.
    #[test]
    fn cache_normalizes_affected_files_paths() {
        let finding = Finding {
            affected_files: vec![PathBuf::from("/tmp/abc123/repo/src/main.py")],
            ..Default::default()
        };
        let payload = cache_findings_payload(&[finding]);
        let affected = payload["findings"][0]["affected_files"]
            .as_array()
            .expect("affected_files is an array");
        assert_eq!(affected.len(), 1);
        assert_eq!(affected[0].as_str(), Some("repo/src/main.py"));
    }

    /// Bug 1 regression guard: a cache containing only semantically
    /// empty findings (e.g. `[{"bogus": "x"}]`) must surface as
    /// `None`, not as `Some(vec![Finding::default()])`. Before the
    /// `is_valid()` filter the readers happily passed such garbage
    /// down to `status`/`diff`/`feedback`, which emitted confident
    /// green-checkmark output on corrupt input.
    #[test]
    fn load_findings_rejects_fully_invalid_cache() {
        use std::io::Write;
        let dir = tempfile::tempdir().expect("tempdir");
        let path = dir.path().join("last_findings.json");
        let mut f = std::fs::File::create(&path).expect("create");
        write!(
            f,
            r#"{{"version": "{}", "findings": [{{"bogus": "x"}}]}}"#,
            env!("CARGO_PKG_VERSION")
        )
        .expect("write");
        drop(f);
        let outcome = load_findings_from_file_outcome(&path);
        match outcome {
            CacheLoadOutcome::Corrupt { reason, .. } => {
                assert!(
                    reason.contains("semantic validation"),
                    "expected semantic-validation reason; got {reason}"
                );
            }
            other => panic!("expected Corrupt for fully-invalid cache; got {other:?}"),
        }
    }

    /// Partial corruption (some valid, some invalid) keeps the valid
    /// findings so the user is not blocked, but emits a stdout
    /// warning so the anomaly is visible.
    #[test]
    fn load_findings_filters_invalid_keeps_valid() {
        use std::io::Write;
        let dir = tempfile::tempdir().expect("tempdir");
        let path = dir.path().join("last_findings.json");
        let mut f = std::fs::File::create(&path).expect("create");
        write!(
            f,
            r#"{{"version": "{ver}", "findings": [
                {{"bogus": "x"}},
                {{"id": "f1", "detector": "Det", "title": "T",
                  "affected_files": ["src/main.py"], "line_start": 1}}
            ]}}"#,
            ver = env!("CARGO_PKG_VERSION"),
        )
        .expect("write");
        drop(f);
        match load_findings_from_file_outcome(&path) {
            CacheLoadOutcome::Findings(v) => {
                assert_eq!(v.len(), 1);
                assert_eq!(v[0].detector, "Det");
            }
            other => panic!("expected Findings for partial-valid cache; got {other:?}"),
        }
    }

    /// Bug 3 regression guard: an invalid-JSON cache classifies as
    /// `Corrupt`, not `Missing`. Callers branch on this to print the
    /// user-facing warning on stdout (instead of treating it as the
    /// soft-fail "no analysis yet" case).
    #[test]
    fn load_findings_classifies_invalid_json_as_corrupt() {
        let dir = tempfile::tempdir().expect("tempdir");
        let path = dir.path().join("last_findings.json");
        std::fs::write(&path, "not even json {{").expect("write");
        match load_findings_from_file_outcome(&path) {
            CacheLoadOutcome::Corrupt { path: p, reason } => {
                assert_eq!(p, path);
                assert!(
                    reason.contains("invalid JSON") || reason.contains("JSON"),
                    "expected JSON-mention reason; got {reason}"
                );
                // The user_warning() helper must produce an
                // actionable message naming the path and the
                // re-run-analyze remediation.
                let outcome = load_findings_from_file_outcome(&path);
                let msg = outcome
                    .user_warning()
                    .expect("Corrupt outcome must have user_warning");
                assert!(msg.contains("last_findings.json"));
                assert!(msg.contains("repotoire analyze"));
            }
            other => panic!("expected Corrupt; got {other:?}"),
        }
    }

    /// `Missing` (file absent) must NOT classify as `Corrupt` — that
    /// is the legitimate no-analysis-yet case and should remain
    /// soft-fail.
    #[test]
    fn load_findings_classifies_missing_correctly() {
        let dir = tempfile::tempdir().expect("tempdir");
        let path = dir.path().join("does-not-exist.json");
        assert!(matches!(
            load_findings_from_file_outcome(&path),
            CacheLoadOutcome::Missing
        ));
    }

    /// Helper: minimal `HealthReport` for `cache_results` exercise.
    fn empty_health_report() -> crate::models::HealthReport {
        crate::models::HealthReport {
            overall_score: 0.0,
            grade: crate::models::Grade::F,
            structure_score: 0.0,
            quality_score: 0.0,
            architecture_score: None,
            findings: vec![],
            findings_summary: crate::models::FindingsSummary::from_findings(&[]),
            total_files: 0,
            total_functions: 0,
            total_classes: 0,
            total_loc: 0,
        }
    }

    /// `cache_results` must validate `last_findings.json` semantically
    /// before snapshotting it to `baseline_findings.json`. If the
    /// existing `last_findings.json` is corrupt, the function must
    /// REMOVE any stale baseline (rather than overwriting it with the
    /// corrupt content) so the next `diff` sees well-defined
    /// "first-run, no baseline" semantics.
    ///
    /// Regression guard: prior to this commit `cache_results` used a
    /// byte-faithful `fs::copy` that propagated corruption into the
    /// baseline, where it survived forever because no code path
    /// overwrites a baseline unconditionally.
    #[test]
    fn cache_results_drops_baseline_when_last_findings_corrupt() {
        use std::io::Write;
        let dir = tempfile::tempdir().expect("tempdir");
        let repotoire_dir = dir.path();

        // Seed a CORRUPT last_findings.json (passes JSON parse, fails
        // semantic validation: all-default Finding rejected by
        // Finding::is_valid()).
        let last_findings = repotoire_dir.join("last_findings.json");
        let mut f = std::fs::File::create(&last_findings).expect("create last");
        write!(
            f,
            r#"{{"version": "{}", "findings": [{{"bogus": "x"}}]}}"#,
            env!("CARGO_PKG_VERSION")
        )
        .expect("write last");
        drop(f);

        // Seed a pre-existing baseline (a valid one from a prior run).
        // This is the file we expect cache_results to REMOVE rather
        // than overwrite with the corrupt last_findings.
        let baseline = repotoire_dir.join("baseline_findings.json");
        let mut f = std::fs::File::create(&baseline).expect("create baseline");
        write!(
            f,
            r#"{{"version": "{}", "findings": []}}"#,
            env!("CARGO_PKG_VERSION")
        )
        .expect("write baseline");
        drop(f);
        assert!(baseline.exists(), "precondition: baseline present");

        let report = empty_health_report();
        cache_results(repotoire_dir, &report, &[]).expect("cache_results");

        assert!(
            !baseline.exists(),
            "corrupt last_findings.json must cause baseline_findings.json to be removed, \
             not overwritten with the corrupt snapshot"
        );
    }

    /// Happy path: when `last_findings.json` validates, `cache_results`
    /// snapshots it to `baseline_findings.json` as before.
    #[test]
    fn cache_results_snapshots_baseline_when_last_findings_valid() {
        use std::io::Write;
        let dir = tempfile::tempdir().expect("tempdir");
        let repotoire_dir = dir.path();

        // Seed a VALID last_findings.json with one well-formed Finding.
        let last_findings = repotoire_dir.join("last_findings.json");
        let mut f = std::fs::File::create(&last_findings).expect("create last");
        write!(
            f,
            r#"{{"version": "{}", "findings": [
                {{"id": "f1", "detector": "Det", "title": "T",
                  "affected_files": ["src/main.py"], "line_start": 1}}
            ]}}"#,
            env!("CARGO_PKG_VERSION")
        )
        .expect("write last");
        drop(f);

        let baseline = repotoire_dir.join("baseline_findings.json");
        assert!(!baseline.exists(), "precondition: no prior baseline");

        let report = empty_health_report();
        cache_results(repotoire_dir, &report, &[]).expect("cache_results");

        assert!(
            baseline.exists(),
            "valid last_findings.json must produce a baseline snapshot"
        );
        // Byte-for-byte copy means the baseline must also pass validation.
        match load_findings_from_file_outcome(&baseline) {
            CacheLoadOutcome::Findings(v) => assert_eq!(v.len(), 1),
            other => panic!("expected valid Findings in snapshot baseline; got {other:?}"),
        }
    }
}