repotoire 0.8.3

//! Core data models for Repotoire
//!
//! These models are used throughout the codebase for representing
//! code entities, findings, and analysis results.

use serde::{Deserialize, Serialize};
use std::path::PathBuf;

/// Generate a deterministic finding ID based on content hash.
///
/// This ensures findings have stable IDs across runs, enabling:
/// - Tracking findings over time (fixed vs new vs recurring)
/// - Suppression by ID in config files
/// - Reliable deduplication
///
/// The ID is a 16-character hex string derived from hashing:
/// - detector name (which detector found it)
/// - file path (where it was found)
/// - line number (specific location)
/// - title (what the issue is)
pub fn deterministic_finding_id(detector: &str, file: &str, line: u32, _title: &str) -> String {
    // Note: postprocessing overwrites all IDs via finding_id() (#73).
    // Uses FNV-1a for cross-toolchain stability (see finding_id).
    crate::detectors::base::finding_id(detector, file, line)
}

/// Severity levels for findings
#[derive(
    Debug,
    Clone,
    Copy,
    PartialEq,
    Eq,
    PartialOrd,
    Ord,
    Hash,
    Serialize,
    Deserialize,
    Default,
    clap::ValueEnum,
)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
    #[default]
    Info,
    Low,
    Medium,
    High,
    Critical,
}

impl std::fmt::Display for Severity {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Severity::Info => write!(f, "info"),
            Severity::Low => write!(f, "low"),
            Severity::Medium => write!(f, "medium"),
            Severity::High => write!(f, "high"),
            Severity::Critical => write!(f, "critical"),
        }
    }
}

impl std::str::FromStr for Severity {
    type Err = anyhow::Error;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_lowercase().as_str() {
            "critical" => Ok(Severity::Critical),
            "high" => Ok(Severity::High),
            "medium" => Ok(Severity::Medium),
            "low" => Ok(Severity::Low),
            "info" => Ok(Severity::Info),
            _ => Err(anyhow::anyhow!(
                "Unknown severity '{}'. Valid: critical, high, medium, low, info",
                s
            )),
        }
    }
}

fn serialize_empty_map_as_null<S>(
    map: &std::collections::BTreeMap<String, String>,
    serializer: S,
) -> Result<S::Ok, S::Error>
where
    S: serde::Serializer,
{
    if map.is_empty() {
        Option::<std::collections::BTreeMap<String, String>>::None.serialize(serializer)
    } else {
        Some(map).serialize(serializer)
    }
}

/// Deserialize a BTreeMap that may be `null` in JSON (treat null as empty map)
fn deserialize_null_as_empty_map<'de, D>(
    // repotoire:ignore[surprisal]
    deserializer: D,
) -> Result<std::collections::BTreeMap<String, String>, D::Error>
where
    D: serde::Deserializer<'de>,
{
    let opt = Option::<std::collections::BTreeMap<String, String>>::deserialize(deserializer)?;
    Ok(opt.unwrap_or_default())
}

/// Status of a finding relative to the baseline.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum FindingStatus {
    #[default]
    New,
    Baselined,
    Fixed,
    Stale,
}

/// Attribution of a finding to a delta (changed code vs unrelated).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum Attribution {
    InChangedNode,
    InCallerOfChanged,
    #[default]
    InUnrelated,
}

/// Confidence level for a finding (Low < Medium < High).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Confidence {
    Low,
    Medium,
    High,
}

impl Confidence {
    /// Map a numeric score (0.0–1.0) to a confidence level.
    pub fn from_score(score: f64) -> Self {
        if score >= 0.75 {
            Confidence::High
        } else if score >= 0.5 {
            Confidence::Medium
        } else {
            Confidence::Low
        }
    }
}

/// Inclusive line range within a file, used both as a query target
/// (git history, blame) and as the source-position pair on `Finding`.
///
/// Construct via [`LineRange::new`] for a known range, or pass an
/// `Option<LineRange>` when either endpoint may be missing.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct LineRange {
    pub start: u32,
    pub end: u32,
}

impl LineRange {
    pub fn new(start: u32, end: u32) -> Self {
        Self { start, end }
    }

    /// True when the start line is non-zero and the end line is at
    /// least the start line. Mirrors the validation that several
    /// callers (e.g. `blame_lines`) already perform inline.
    pub fn is_valid(self) -> bool {
        self.start != 0 && self.end >= self.start
    }

    pub fn contains_line(self, line: u32) -> bool {
        line >= self.start && line <= self.end
    }
}

/// A code smell or issue finding
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Finding {
    #[serde(default)]
    pub id: String,
    #[serde(default)]
    pub detector: String,
    #[serde(default)]
    pub severity: Severity,
    #[serde(default)]
    pub title: String,
    #[serde(default)]
    pub description: String,
    #[serde(default)]
    pub affected_files: Vec<PathBuf>,
    #[serde(default)]
    pub line_start: Option<u32>,
    #[serde(default)]
    pub line_end: Option<u32>,
    #[serde(default)]
    pub suggested_fix: Option<String>,
    #[serde(default)]
    pub estimated_effort: Option<String>,
    #[serde(default)]
    pub category: Option<String>,
    #[serde(default)]
    pub cwe_id: Option<String>,
    #[serde(default)]
    pub why_it_matters: Option<String>,
    /// Confidence score from 0.0 to 1.0 (set by voting engine or detector)
    #[serde(default)]
    pub confidence: Option<f64>,
    /// Whether this finding was produced by a deterministic (mathematically provable) detector.
    /// Deterministic findings bypass statistical FP classifiers.
    #[serde(default)]
    pub deterministic: bool,
    /// Threshold metadata for adaptive explainability
    /// Keys: threshold_source, effective_threshold, actual_value, default_threshold
    #[serde(
        default,
        serialize_with = "serialize_empty_map_as_null",
        deserialize_with = "deserialize_null_as_empty_map"
    )]
    pub threshold_metadata: std::collections::BTreeMap<String, String>,
    /// Finding status relative to baseline (New, Baselined, Fixed, Stale).
    #[serde(default)]
    pub status: FindingStatus,
    /// Attribution to delta analysis (changed node, caller, or unrelated).
    #[serde(default)]
    pub attribution: Attribution,
    /// Original severity before config remap (if remapped).
    #[serde(default)]
    pub original_severity: Option<Severity>,
    // ── Dual-branch findings (Phase 1a) ──
    //
    // The fields below are additive (RFC decision D1 in
    // docs/superpowers/specs/2026-05-09-dual-branch-findings-design.md).
    // All are `serde(default)` so JSON produced by older versions (without
    // these keys) parses unchanged. We do NOT use `skip_serializing_if`:
    // the existing Finding schema serializes optional fields as `null` /
    // empty containers (see how `category`, `cwe_id`, `confidence`, etc.
    // are emitted), and the in-house `bitcode` cache serializer used by
    // `detectors::incremental_cache` rejects `skip_serializing_if`
    // attributes ("skip field is not supported"). Matching the existing
    // convention keeps the JSON shape uniform and the cache happy.
    //
    /// The alternative interpretation of this finding, when it has one.
    /// `None` (serialized as `null`) for findings with a single,
    /// definitive interpretation. See `dual_branch::AlternativeBranch`.
    #[serde(default)]
    pub alternative_branch: Option<crate::dual_branch::AlternativeBranch>,
    /// Typed evidence the predictor used to choose between branches.
    /// Empty `[]` for findings without a dual-branch interpretation.
    /// Phase 1b will populate this from existing `confidence_enrichment`
    /// signals.
    #[serde(default)]
    pub prediction_reasons: Vec<crate::dual_branch::PredictionReason>,
    /// Definitive resolution signals: code changes that, if applied,
    /// collapse the dual-branch finding to a single interpretation.
    /// Empty `[]` for findings without a dual-branch interpretation.
    #[serde(default)]
    pub resolution_signals: Vec<crate::dual_branch::ResolutionSignal>,
}

/// Default confidence value when no category-specific default applies.
const DEFAULT_CONFIDENCE: f64 = 0.70;

impl Finding {
    /// Semantic validity check for cache trust-boundary use.
    ///
    /// `Finding` deserialization is intentionally permissive: every
    /// field carries `#[serde(default)]` so that JSON written by an
    /// older binary version round-trips through a newer one without
    /// breakage. The cost of that permissiveness is that a JSON
    /// object like `{"bogus": "x"}` parses successfully into a
    /// `Finding` of all defaults — semantically empty, but
    /// syntactically valid. Without an explicit semantic check the
    /// cache readers (`status`, `diff`, `feedback`, `findings`)
    /// happily treat such garbage as a real finding and emit
    /// green-checkmark output on corrupt input.
    ///
    /// `is_valid()` defines the minimum contract a `Finding` must
    /// meet to be persisted to or trusted from the cache:
    ///
    /// - `detector` is non-empty (every finding has a producing
    ///   detector; empty defeats every grouping and feedback flow).
    /// - `title` is non-empty (every finding needs a human-readable
    ///   label; empty defeats every UI surface).
    /// - At least one locator: a non-empty `id` OR (a non-empty
    ///   `affected_files` AND `line_start.is_some()`). Findings
    ///   without any locator are not actionable — `fix`, `feedback`,
    ///   and `diff` all need one of these to do anything useful.
    ///   The `id` clause covers project-level findings; the
    ///   file-line clause covers detector findings.
    ///
    /// This is the function called by the cache writer's
    /// `debug_assert!` and by the cache readers when filtering
    /// loaded `Vec<Finding>`. See [`validation_errors`] for an
    /// actionable list of which fields are missing.
    pub fn is_valid(&self) -> bool {
        self.validation_errors().is_empty()
    }

    /// Return the list of field-level reasons this `Finding` fails
    /// [`is_valid`]. Empty `Vec` means the finding is valid.
    ///
    /// Intended for user-facing error messages: when a cache load
    /// rejects an entry, we want to tell the user *which* fields
    /// were missing, not just "invalid".
    pub fn validation_errors(&self) -> Vec<&'static str> {
        let mut errs = Vec::new();
        if self.detector.is_empty() {
            errs.push("detector is empty");
        }
        if self.title.is_empty() {
            errs.push("title is empty");
        }
        let has_id = !self.id.is_empty();
        let has_file_line = !self.affected_files.is_empty() && self.line_start.is_some();
        if !has_id && !has_file_line {
            errs.push("no locator (need non-empty id or affected_files+line_start)");
        }
        errs
    }

    /// Set a default confidence value if none has been set by a detector.
    ///
    /// This is a builder-style method: it returns `self` so it can be chained.
    /// If `self.confidence` is already `Some(_)`, the value is left untouched.
    pub fn with_default_confidence(mut self, default: f64) -> Self {
        if self.confidence.is_none() {
            self.confidence = Some(default);
        }
        self
    }

    /// Return the effective confidence for this finding.
    ///
    /// If a detector or the postprocess pipeline has set an explicit confidence,
    /// that value is returned. Otherwise falls back to 0.70.
    pub fn effective_confidence(&self) -> f64 {
        self.confidence.unwrap_or(DEFAULT_CONFIDENCE)
    }

    /// Return the default confidence for a finding based on its category string.
    ///
    /// | Category          | Default | Rationale                                  |
    /// |-------------------|---------|--------------------------------------------|
    /// | "architecture"    | 0.85    | Structural evidence is strong              |
    /// | "security"        | 0.75    | Taint analysis is good but not perfect     |
    /// | "design"          | 0.65    | Code smell detection has higher FP rate     |
    /// | "dead-code"/"dead_code" | 0.70 | Graph-based but may miss dynamic dispatch |
    /// | "ai_watchdog"     | 0.60    | Heuristic detection                        |
    /// | Others            | 0.70    | Reasonable default                         |
    pub fn default_confidence_for_category(category: Option<&str>) -> f64 {
        match category {
            Some("architecture") => 0.85,
            Some("security") => 0.75,
            Some("design") => 0.65,
            Some("dead-code") | Some("dead_code") => 0.70,
            Some("ai_watchdog") => 0.60,
            _ => DEFAULT_CONFIDENCE,
        }
    }

    // ── Dual-branch builder methods (Phase 1a) ──
    //
    // These are zero-cost, additive helpers. They do not change emission
    // behavior — they just provide the typed surface that Phase 1b will
    // call when bridging from existing signal sources. Each takes `mut
    // self` and returns `Self` for the standard builder pattern already
    // used by `with_default_confidence`.

    /// Attach the alternative interpretation of this finding.
    ///
    /// Calling this twice replaces the previous value. The caller is
    /// responsible for ensuring the alternative's `label` is the opposite
    /// of the predicted (primary) interpretation; see
    /// `dual_branch::BranchLabel::opposite`.
    #[must_use]
    pub fn with_alternative_branch(
        mut self,
        alternative: crate::dual_branch::AlternativeBranch,
    ) -> Self {
        self.alternative_branch = Some(alternative);
        self
    }

    /// Append one prediction reason. Order is preserved; downstream
    /// aggregation (Phase 1c) treats the order as informative for
    /// rendering only, not for weighting.
    #[must_use]
    pub fn with_prediction_reason(mut self, reason: crate::dual_branch::PredictionReason) -> Self {
        self.prediction_reasons.push(reason);
        self
    }

    /// Append one resolution signal. Order is preserved; the rendering
    /// path (Phase 2) shows them in append order under "How to resolve".
    #[must_use]
    pub fn with_resolution_signal(mut self, signal: crate::dual_branch::ResolutionSignal) -> Self {
        self.resolution_signals.push(signal);
        self
    }

    /// True iff this finding carries a dual-branch interpretation.
    ///
    /// Convention: a finding is dual-branch iff `alternative_branch`
    /// is `Some`. Prediction reasons or resolution signals without an
    /// alternative branch are valid but not "dual-branch" — they are
    /// just extra evidence on a single-interpretation finding.
    pub fn is_dual_branch(&self) -> bool {
        self.alternative_branch.is_some()
    }
}

/// Summary of findings by severity
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct FindingsSummary {
    pub critical: usize,
    pub high: usize,
    pub medium: usize,
    pub low: usize,
    pub info: usize,
    pub total: usize,
}

impl FindingsSummary {
    pub fn from_findings(findings: &[Finding]) -> Self {
        let mut summary = Self::default();
        for f in findings {
            match f.severity {
                Severity::Critical => summary.critical += 1,
                Severity::High => summary.high += 1,
                Severity::Medium => summary.medium += 1,
                Severity::Low => summary.low += 1,
                Severity::Info => summary.info += 1,
            }
            summary.total += 1;
        }
        summary
    }
}

/// Letter grades for code health (13 levels: A+ through F).
#[derive(
    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default,
)]
pub enum Grade {
    #[default]
    F,
    #[serde(rename = "D-")]
    DMinus,
    D,
    #[serde(rename = "D+")]
    DPlus,
    #[serde(rename = "C-")]
    CMinus,
    C,
    #[serde(rename = "C+")]
    CPlus,
    #[serde(rename = "B-")]
    BMinus,
    B,
    #[serde(rename = "B+")]
    BPlus,
    #[serde(rename = "A-")]
    AMinus,
    A,
    #[serde(rename = "A+")]
    APlus,
}

impl std::fmt::Display for Grade {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Grade::APlus => write!(f, "A+"),
            Grade::A => write!(f, "A"),
            Grade::AMinus => write!(f, "A-"),
            Grade::BPlus => write!(f, "B+"),
            Grade::B => write!(f, "B"),
            Grade::BMinus => write!(f, "B-"),
            Grade::CPlus => write!(f, "C+"),
            Grade::C => write!(f, "C"),
            Grade::CMinus => write!(f, "C-"),
            Grade::DPlus => write!(f, "D+"),
            Grade::D => write!(f, "D"),
            Grade::DMinus => write!(f, "D-"),
            Grade::F => write!(f, "F"),
        }
    }
}

impl std::str::FromStr for Grade {
    type Err = anyhow::Error;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "A+" => Ok(Grade::APlus),
            "A" => Ok(Grade::A),
            "A-" => Ok(Grade::AMinus),
            "B+" => Ok(Grade::BPlus),
            "B" => Ok(Grade::B),
            "B-" => Ok(Grade::BMinus),
            "C+" => Ok(Grade::CPlus),
            "C" => Ok(Grade::C),
            "C-" => Ok(Grade::CMinus),
            "D+" => Ok(Grade::DPlus),
            "D" => Ok(Grade::D),
            "D-" => Ok(Grade::DMinus),
            "F" => Ok(Grade::F),
            _ => Err(anyhow::anyhow!("Unknown grade '{}'", s)),
        }
    }
}

/// Overall health report for a codebase
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthReport {
    pub overall_score: f64,
    pub grade: Grade,
    pub structure_score: f64,
    pub quality_score: f64,
    pub architecture_score: Option<f64>,
    pub findings: Vec<Finding>,
    pub findings_summary: FindingsSummary,
    pub total_files: usize,
    pub total_functions: usize,
    pub total_classes: usize,
    pub total_loc: usize,
}

impl HealthReport {
    /// Calculate grade from score
    pub fn grade_from_score(score: f64) -> String {
        match score {
            s if s >= 90.0 => "A".to_string(),
            s if s >= 80.0 => "B".to_string(),
            s if s >= 70.0 => "C".to_string(),
            s if s >= 60.0 => "D".to_string(),
            _ => "F".to_string(),
        }
    }
}

/// A function in the code graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Function {
    pub name: String,
    pub qualified_name: String,
    pub file_path: PathBuf,
    pub line_start: u32,
    pub line_end: u32,
    pub parameters: Vec<String>,
    pub return_type: Option<String>,
    pub is_async: bool,
    pub complexity: Option<u32>,
    /// Maximum nesting depth within this function
    pub max_nesting: Option<u32>,
    /// Doc comment (Javadoc, JSDoc, Go doc, etc.)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub doc_comment: Option<String>,
    /// Annotations/decorators (e.g., Java @Override, @Deprecated)
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub annotations: Vec<String>,
}

/// A class in the code graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Class {
    pub name: String,
    pub qualified_name: String,
    pub file_path: PathBuf,
    pub line_start: u32,
    pub line_end: u32,
    pub methods: Vec<String>,
    /// Number of struct fields (Rust named/tuple) or enum variants.
    #[serde(default)]
    pub field_count: usize,
    pub bases: Vec<String>,
    /// Doc comment (Javadoc, JSDoc, Go doc, etc.)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub doc_comment: Option<String>,
    /// Annotations/decorators (e.g., Java @Override, @Deprecated)
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub annotations: Vec<String>,
}

/// A file in the code graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct File {
    pub path: PathBuf,
    pub language: String,
    pub lines_of_code: usize,
    pub functions: usize,
    pub classes: usize,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_finding_serde_round_trip() {
        let finding = Finding {
            id: "test-1".into(),
            detector: "TestDetector".into(),
            severity: Severity::High,
            title: "Test finding".into(),
            description: "A test".into(),
            threshold_metadata: {
                let mut m = std::collections::BTreeMap::new();
                m.insert("key".into(), "value".into());
                m
            },
            ..Default::default()
        };
        let json = serde_json::to_string(&finding).expect("serialize finding");
        let back: Finding = serde_json::from_str(&json).expect("deserialize finding");
        assert_eq!(back.id, "test-1");
        assert_eq!(
            back.threshold_metadata.get("key").expect("key exists"),
            "value"
        );
    }

    #[test]
    fn test_finding_deserialize_null_threshold_metadata() {
        let json = r#"{"id":"t1","detector":"D","severity":"high","title":"T","description":"","affected_files":[],"threshold_metadata":null}"#;
        let finding: Finding =
            serde_json::from_str(json).expect("deserialize finding with null metadata");
        assert!(finding.threshold_metadata.is_empty());
    }

    #[test]
    fn test_finding_deserialize_missing_threshold_metadata() {
        let json = r#"{"id":"t1","detector":"D","severity":"high","title":"T","description":"","affected_files":[]}"#;
        let finding: Finding =
            serde_json::from_str(json).expect("deserialize finding with missing metadata");
        assert!(finding.threshold_metadata.is_empty());
    }

    #[test]
    fn test_finding_bincode_round_trip_with_threshold_metadata() {
        let finding = Finding {
            id: "test-bin".into(),
            detector: "TestDetector".into(),
            severity: Severity::High,
            title: "Test finding".into(),
            description: "A test".into(),
            confidence: Some(0.85),
            threshold_metadata: {
                let mut m = std::collections::BTreeMap::new();
                m.insert("threshold_source".into(), "adaptive".into());
                m.insert("effective_threshold".into(), "15".into());
                m
            },
            ..Default::default()
        };

        let bytes = bitcode::serialize(&finding).expect("serialize finding");
        let back: Finding = bitcode::deserialize(&bytes).expect("deserialize finding");

        assert_eq!(back.id, "test-bin");
        assert_eq!(back.confidence, Some(0.85));
        assert_eq!(
            back.threshold_metadata
                .get("threshold_source")
                .expect("key exists"),
            "adaptive"
        );
        assert_eq!(
            back.threshold_metadata
                .get("effective_threshold")
                .expect("key exists"),
            "15"
        );
    }

    #[test]
    fn test_health_report_grade_from_score() {
        assert_eq!(HealthReport::grade_from_score(95.0), "A");
        assert_eq!(HealthReport::grade_from_score(85.0), "B");
        assert_eq!(HealthReport::grade_from_score(75.0), "C");
        assert_eq!(HealthReport::grade_from_score(65.0), "D");
        assert_eq!(HealthReport::grade_from_score(50.0), "F");
    }

    #[test]
    fn test_findings_summary_from_findings() {
        let findings = vec![
            Finding {
                severity: Severity::Critical,
                ..Default::default()
            },
            Finding {
                severity: Severity::High,
                ..Default::default()
            },
            Finding {
                severity: Severity::High,
                ..Default::default()
            },
            Finding {
                severity: Severity::Medium,
                ..Default::default()
            },
            Finding {
                severity: Severity::Low,
                ..Default::default()
            },
        ];
        let summary = FindingsSummary::from_findings(&findings);
        assert_eq!(summary.critical, 1);
        assert_eq!(summary.high, 2);
        assert_eq!(summary.medium, 1);
        assert_eq!(summary.low, 1);
        assert_eq!(summary.total, 5);
    }

    // ── with_default_confidence ────────────────────────────────────

    #[test]
    fn test_with_default_confidence_sets_when_none() {
        let finding = Finding {
            confidence: None,
            ..Default::default()
        };
        let finding = finding.with_default_confidence(0.85);
        assert_eq!(finding.confidence, Some(0.85));
    }

    #[test]
    fn test_with_default_confidence_preserves_existing() {
        let finding = Finding {
            confidence: Some(0.90),
            ..Default::default()
        };
        let finding = finding.with_default_confidence(0.50);
        assert_eq!(finding.confidence, Some(0.90));
    }

    // ── effective_confidence ────────────────────────────────────────

    #[test]
    fn test_effective_confidence_returns_set_value() {
        let finding = Finding {
            confidence: Some(0.42),
            ..Default::default()
        };
        assert!((finding.effective_confidence() - 0.42).abs() < f64::EPSILON);
    }

    #[test]
    fn test_effective_confidence_returns_default_when_none() {
        let finding = Finding {
            confidence: None,
            ..Default::default()
        };
        assert!((finding.effective_confidence() - 0.70).abs() < f64::EPSILON);
    }

    // ── default_confidence_for_category ─────────────────────────────

    #[test]
    fn test_default_confidence_architecture() {
        assert!(
            (Finding::default_confidence_for_category(Some("architecture")) - 0.85).abs()
                < f64::EPSILON
        );
    }

    #[test]
    fn test_default_confidence_security() {
        assert!(
            (Finding::default_confidence_for_category(Some("security")) - 0.75).abs()
                < f64::EPSILON
        );
    }

    #[test]
    fn test_default_confidence_design() {
        assert!(
            (Finding::default_confidence_for_category(Some("design")) - 0.65).abs() < f64::EPSILON
        );
    }

    #[test]
    fn test_default_confidence_dead_code_hyphen() {
        assert!(
            (Finding::default_confidence_for_category(Some("dead-code")) - 0.70).abs()
                < f64::EPSILON
        );
    }

    #[test]
    fn test_default_confidence_dead_code_underscore() {
        assert!(
            (Finding::default_confidence_for_category(Some("dead_code")) - 0.70).abs()
                < f64::EPSILON
        );
    }

    #[test]
    fn test_default_confidence_ai_watchdog() {
        assert!(
            (Finding::default_confidence_for_category(Some("ai_watchdog")) - 0.60).abs()
                < f64::EPSILON
        );
    }

    #[test]
    fn test_default_confidence_unknown_category() {
        assert!(
            (Finding::default_confidence_for_category(Some("testing")) - 0.70).abs() < f64::EPSILON
        );
    }

    #[test]
    fn test_default_confidence_none_category() {
        assert!((Finding::default_confidence_for_category(None) - 0.70).abs() < f64::EPSILON);
    }

    // ── Dual-branch additive-schema contract tests (Phase 1a, RFC D1) ──
    //
    // These tests pin the contract that Phase 1a is non-breaking: existing
    // JSON serializes byte-identically when no dual-branch fields are set,
    // and JSON produced by older versions (without these fields) parses
    // unchanged.

    #[test]
    fn dual_branch_fields_match_existing_finding_serialization_convention() {
        // The existing Finding schema serializes absent optional fields
        // as `null` and absent vectors as `[]` (see `category`, `cwe_id`,
        // `affected_files`, etc.). Phase 1a's three new fields must
        // follow that same convention so the JSON shape stays uniform
        // and the in-house `bitcode` cache (which rejects
        // `skip_serializing_if`) continues to work.
        //
        // RFC decision D1 ("additive non-breaking schema") is preserved:
        // older clients that don't know these field names will simply
        // ignore the extra `null` / `[]` keys, and newer clients
        // deserializing older JSON will get defaults via `serde(default)`.
        let finding = Finding {
            id: "x".into(),
            detector: "D".into(),
            severity: Severity::Low,
            ..Default::default()
        };
        let value = serde_json::to_value(&finding).expect("serialize finding");
        let obj = value.as_object().expect("finding serializes to object");
        assert_eq!(
            obj.get("alternative_branch"),
            Some(&serde_json::Value::Null),
            "alternative_branch should be null when unset; got: {value}"
        );
        assert_eq!(
            obj.get("prediction_reasons"),
            Some(&serde_json::Value::Array(vec![])),
            "prediction_reasons should be [] when unset; got: {value}"
        );
        assert_eq!(
            obj.get("resolution_signals"),
            Some(&serde_json::Value::Array(vec![])),
            "resolution_signals should be [] when unset; got: {value}"
        );
    }

    #[test]
    fn dual_branch_old_json_parses_unchanged() {
        // Pre-Phase-1a JSON had no dual-branch fields. New deserialization
        // must accept it. (This is the same contract test pattern as
        // test_finding_deserialize_missing_threshold_metadata above.)
        let json = r#"{"id":"old","detector":"D","severity":"high","title":"T","description":"","affected_files":[]}"#;
        let finding: Finding = serde_json::from_str(json).expect("deserialize legacy finding");
        assert_eq!(finding.id, "old");
        assert!(finding.alternative_branch.is_none());
        assert!(finding.prediction_reasons.is_empty());
        assert!(finding.resolution_signals.is_empty());
        assert!(!finding.is_dual_branch());
    }

    #[test]
    fn dual_branch_builder_methods_populate_fields() {
        use crate::dual_branch::{
            AlternativeBranch, BranchLabel, PredictionReason, PredictionReasonKind, ResolutionKind,
            ResolutionSignal,
        };
        let finding = Finding::default()
            .with_alternative_branch(AlternativeBranch {
                label: BranchLabel::Benign,
                severity: Severity::Info,
                title: "Annotated as non-security".into(),
                description: "Caller passed usedforsecurity=False.".into(),
                suggested_fix: None,
            })
            .with_prediction_reason(PredictionReason {
                kind: PredictionReasonKind::KeywordArgument {
                    name: "usedforsecurity".into(),
                    value: "False".into(),
                },
                weight: 0.9,
                note: "Authoritative non-security annotation present.".into(),
            })
            .with_resolution_signal(ResolutionSignal {
                kind: ResolutionKind::KeywordArgument {
                    name: "usedforsecurity".into(),
                    value: "False".into(),
                },
                description: "Python 3.9+ stdlib non-security annotation.".into(),
                example: None,
                collapses_to: BranchLabel::Benign,
            });

        assert!(finding.is_dual_branch());
        assert_eq!(finding.prediction_reasons.len(), 1);
        assert_eq!(finding.resolution_signals.len(), 1);
        assert_eq!(
            finding
                .alternative_branch
                .as_ref()
                .expect("alternative present")
                .label,
            BranchLabel::Benign
        );
    }

    #[test]
    fn dual_branch_finding_full_roundtrip() {
        // Full populated dual-branch finding must roundtrip cleanly so
        // SARIF / JSON consumers in Phase 2 can rely on parse-stability.
        use crate::dual_branch::{
            AlternativeBranch, BranchLabel, PredictionReason, PredictionReasonKind, ResolutionKind,
            ResolutionSignal,
        };
        let finding = Finding {
            id: "httpx-auth-309".into(),
            detector: "InsecureCryptoDetector".into(),
            severity: Severity::Medium,
            title: "SHA-1 used in HTTPX Digest auth".into(),
            description: "SHA-1 in cryptographic context.".into(),
            ..Default::default()
        }
        .with_alternative_branch(AlternativeBranch {
            label: BranchLabel::Benign,
            severity: Severity::Info,
            title: "RFC 7616 Digest auth requires SHA-1 for compatibility".into(),
            description: "Protocol-required, not vulnerable usage.".into(),
            suggested_fix: Some("# repotoire:protocol-required[RFC7616]".into()),
        })
        .with_prediction_reason(PredictionReason {
            kind: PredictionReasonKind::EnclosingScope {
                scope_kind: "class".into(),
                name: "DigestAuth".into(),
            },
            weight: 0.4,
            note: "Class name suggests RFC 7616 Digest authentication.".into(),
        })
        .with_resolution_signal(ResolutionSignal {
            kind: ResolutionKind::SourceAnnotation {
                syntax: "# repotoire:protocol-required[RFC7616]".into(),
            },
            description: "Mark this call as protocol-required.".into(),
            example: None,
            collapses_to: BranchLabel::Benign,
        });

        let json = serde_json::to_string(&finding).expect("serialize");
        let back: Finding = serde_json::from_str(&json).expect("deserialize");
        assert_eq!(back.id, finding.id);
        assert!(back.is_dual_branch());
        assert_eq!(back.prediction_reasons.len(), 1);
        assert_eq!(back.resolution_signals.len(), 1);
        assert_eq!(
            back.alternative_branch
                .as_ref()
                .expect("alternative present")
                .label,
            BranchLabel::Benign
        );
    }

    // ── Finding::is_valid / validation_errors ──
    //
    // These guard the cache trust boundary documented in
    // docs/superpowers/specs/2026-05-11-cache-validation.md.

    #[test]
    fn finding_default_is_not_valid() {
        let f = Finding::default();
        assert!(
            !f.is_valid(),
            "Finding::default() must NOT pass is_valid(); it would let \
             garbage like `{{\"bogus\": \"x\"}}` round-trip through the cache."
        );
        let errs = f.validation_errors();
        // Every required field is missing in the default; we want a
        // list (not a single error) so users see all of what's wrong.
        assert!(errs.iter().any(|e| e.contains("detector")));
        assert!(errs.iter().any(|e| e.contains("title")));
        assert!(errs.iter().any(|e| e.contains("locator")));
    }

    #[test]
    fn finding_with_detector_title_and_file_line_is_valid() {
        let f = Finding {
            detector: "Det".into(),
            title: "Title".into(),
            affected_files: vec![PathBuf::from("src/x.py")],
            line_start: Some(1),
            ..Default::default()
        };
        assert!(
            f.is_valid(),
            "expected valid finding (detector+title+file+line); errs: {:?}",
            f.validation_errors(),
        );
    }

    #[test]
    fn finding_with_detector_title_and_nonempty_id_is_valid() {
        // Project-level findings (no file/line) are still valid as
        // long as they carry a stable `id`.
        let f = Finding {
            id: "circular-dep-1".into(),
            detector: "Det".into(),
            title: "Title".into(),
            ..Default::default()
        };
        assert!(f.is_valid(), "errs: {:?}", f.validation_errors());
    }

    #[test]
    fn finding_with_file_but_no_line_is_not_valid() {
        // affected_files alone is not enough — we need line_start to
        // be Some so the locator points at code, not just a file.
        let f = Finding {
            detector: "Det".into(),
            title: "Title".into(),
            affected_files: vec![PathBuf::from("src/x.py")],
            ..Default::default()
        };
        assert!(!f.is_valid());
        assert!(f.validation_errors().iter().any(|e| e.contains("locator")));
    }

    #[test]
    fn finding_with_empty_detector_is_not_valid() {
        let f = Finding {
            detector: String::new(),
            title: "Title".into(),
            affected_files: vec![PathBuf::from("src/x.py")],
            line_start: Some(1),
            ..Default::default()
        };
        assert!(!f.is_valid());
        assert!(f.validation_errors().iter().any(|e| e.contains("detector")));
    }

    #[test]
    fn finding_with_empty_title_is_not_valid() {
        let f = Finding {
            detector: "Det".into(),
            title: String::new(),
            affected_files: vec![PathBuf::from("src/x.py")],
            line_start: Some(1),
            ..Default::default()
        };
        assert!(!f.is_valid());
        assert!(f.validation_errors().iter().any(|e| e.contains("title")));
    }

    #[test]
    fn finding_deserialized_from_bogus_object_is_not_valid() {
        // This is the exact Bug 1 corruption shape: a JSON object with
        // only an unrelated field. It deserializes successfully (every
        // field carries `serde(default)`) but is_valid() must catch it.
        let f: Finding = serde_json::from_str(r#"{"bogus": "x"}"#)
            .expect("permissive deserialize accepts anything");
        assert!(!f.is_valid());
    }
}