heal-cli 0.3.1 - Docs.rs

//! Codebase-relative metric calibration (`.heal/calibration.toml`).
//!
//! HEAL's Severity ladder is computed from the **codebase's own
//! distribution**, not absolute literature values, so a simple Python
//! script and a 200kloc service trigger differently for the same raw
//! CCN. Each metric carries its own quartile-style breaks (p75 / p90
//! / p95) plus an absolute "no defence" floor (`floor_critical`) drawn
//! from `McCabe` / `SonarQube` so a uniformly-rotten codebase cannot
//! quietly flatten the ladder.
//!
//! Hotspot uses an independent percentile space with **no floor** so
//! the top 10% (`score >= p90`) is structurally guaranteed regardless
//! of churn / size; the `hotspot=true` flag rides on top of Severity
//! rather than collapsing into it.
//!
//! Layout on disk:
//!
//! ```toml
//! [meta]
//! created_at      = "2026-04-28T09:00:00Z"
//! codebase_files  = 142
//! strategy        = "percentile"
//!
//! [calibration.ccn]
//! p50 = 4.2
//! p75 = 8.1
//! p90 = 14.3
//! p95 = 21.7
//! floor_critical = 25
//!
//! [calibration.hotspot]
//! p50 = 5.0
//! p75 = 18.0
//! p90 = 67.0
//! p95 = 145.0
//! ```
//!
//! The file is generated by `heal init` / `heal calibrate` and **not
//! intended for hand-editing** — only `floor_critical` is meant to be
//! overridden, and that override lives in `config.toml` so a re-
//! calibration doesn't clobber it.

use std::collections::BTreeMap;
use std::path::Path;

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};

use crate::core::config::{assign_workspace, Config, WorkspaceOverlay};
use crate::core::error::{Error, Result};
use crate::core::severity::Severity;

/// Built-in `floor_critical` values for the metrics shipped today.
/// These are the hard "structurally indefensible" thresholds; the
/// codebase-relative percentiles add the softer breaks above them.
pub const FLOOR_CCN: f64 = 25.0;
pub const FLOOR_COGNITIVE: f64 = 50.0;
pub const FLOOR_DUPLICATION_PCT: f64 = 30.0;

/// Built-in `floor_ok` values for proxy metrics — values strictly below
/// these classify as `Ok` regardless of percentile placement, giving
/// codebases a literature-anchored graduation gate. Without this floor
/// the percentile breaks would always flag the project's top decile,
/// even on a uniformly-clean codebase (Goodhart's Law).
///
/// - `FLOOR_OK_CCN`: `McCabe` (1976) — CCN 1–10 is "simple, low risk".
/// - `FLOOR_OK_COGNITIVE`: `Sonar` (2017) — under ~8 there's no nesting
///   penalty signal worth surfacing.
/// - `FLOOR_OK_HOTSPOT`: composite floor — `2 × FLOOR_OK_CCN = 22`.
///   Hotspot score = `commits × ccn_sum`; below this product we're
///   either looking at a barely-touched file or a low-complexity one,
///   neither of which should be flagged regardless of where the file
///   sits in the codebase's distribution. Without this gate, a
///   uniformly-cold repo's "top 10%" still gets flagged as hotspots.
pub const FLOOR_OK_CCN: f64 = 11.0;
pub const FLOOR_OK_COGNITIVE: f64 = 8.0;
pub const FLOOR_OK_HOTSPOT: f64 = 22.0;

/// Bundles the absolute floors a metric carries. `critical` is the
/// "structurally indefensible" Critical escape hatch; `ok` is the
/// graduation gate (values strictly below classify as Ok). Pass to
/// [`MetricCalibration::from_distribution`] so call sites stay
/// self-documenting and immune to argument transposition.
#[derive(Debug, Clone, Copy, Default, PartialEq)]
pub struct MetricFloors {
    pub critical: Option<f64>,
    pub ok: Option<f64>,
}

/// Default percentile strategy label written into
/// `meta.strategy`. Reserved for future expansion (e.g. winsorised
/// percentiles for very small samples).
pub const STRATEGY_PERCENTILE: &str = "percentile";

/// Sample-size guard — calibrations built from fewer values than this
/// fall back to `floor_critical`-only classification (everything
/// percentile-derived clamps to Ok). Five points is the minimum where
/// linear-interpolated quartiles aren't degenerate.
pub const MIN_SAMPLES_FOR_PERCENTILES: usize = 5;

/// Comment header prepended by [`Calibration::save`] so anyone opening
/// `.heal/calibration.toml` immediately sees its provenance and the
/// command that regenerates it. Recalibration is never automatic — the
/// user always invokes `heal calibrate --force` themselves.
const CALIBRATION_HEADER: &str = "\
# Generated by `heal calibrate` from this codebase's score distribution.
# Regenerate after the codebase shifts with `heal calibrate --force`
# (heal never recalibrates automatically).
# Hand edits are preserved on read but will be overwritten by --force;
# put `floor_critical` overrides in `config.toml` instead.

";

#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Calibration {
    pub meta: CalibrationMeta,
    /// Global / fallback breaks. When `[[project.workspaces]]` is empty
    /// (the v0.1+ default) this holds the whole-repo distribution.
    /// With workspaces declared, it holds the breaks for files outside
    /// every declared workspace — the minority cohort.
    #[serde(default)]
    pub calibration: MetricCalibrations,
    /// Per-workspace overrides keyed by the same path string the
    /// `[[project.workspaces]]` config uses. Empty when no workspaces
    /// are declared. Findings tagged with a workspace classify against
    /// the matching table here; everything else falls back to
    /// [`Self::calibration`].
    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
    pub workspaces: BTreeMap<String, MetricCalibrations>,
}

impl Calibration {
    /// Pick the right [`MetricCalibrations`] table to classify a
    /// finding at `file` against. Looks up the file's workspace via
    /// [`assign_workspace`] and prefers the per-workspace table when
    /// one exists; falls back to the global / fallback breaks
    /// (`self.calibration`) for files outside every declared workspace
    /// or when the workspace's table is missing.
    #[must_use]
    pub fn metrics_for_file(
        &self,
        file: &Path,
        workspaces: &[WorkspaceOverlay],
    ) -> &MetricCalibrations {
        self.metrics_for_workspace(assign_workspace(file, workspaces))
    }

    /// Variant of [`Self::metrics_for_file`] for callers that already
    /// resolved the workspace name (via [`assign_workspace`]) and don't
    /// need a second lookup. `change_coupling` resolves both pair
    /// endpoints to detect the cross-workspace case before calibration,
    /// so re-resolving on the canonical side would be wasted work.
    #[must_use]
    pub fn metrics_for_workspace(&self, workspace: Option<&str>) -> &MetricCalibrations {
        if let Some(ws) = workspace {
            if let Some(table) = self.workspaces.get(ws) {
                return table;
            }
        }
        &self.calibration
    }
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct CalibrationMeta {
    pub created_at: DateTime<Utc>,
    pub codebase_files: u32,
    pub strategy: String,
    /// HEAD sha at the moment `heal calibrate --force` (or `heal init`)
    /// produced this calibration. The heal-config skill compares it
    /// against the current `git rev-parse HEAD` to decide whether the
    /// codebase has drifted enough to suggest a recalibration. Optional
    /// so calibrations created outside a git worktree still load.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub calibrated_at_sha: Option<String>,
}

impl Default for CalibrationMeta {
    fn default() -> Self {
        Self {
            created_at: DateTime::<Utc>::from_timestamp(0, 0).unwrap_or_default(),
            codebase_files: 0,
            strategy: STRATEGY_PERCENTILE.to_owned(),
            calibrated_at_sha: None,
        }
    }
}

#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct MetricCalibrations {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub ccn: Option<MetricCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub cognitive: Option<MetricCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub duplication: Option<MetricCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub change_coupling: Option<MetricCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub hotspot: Option<HotspotCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub lcom: Option<MetricCalibration>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct MetricCalibration {
    pub p50: f64,
    pub p75: f64,
    pub p90: f64,
    pub p95: f64,
    /// Absolute "structurally indefensible" floor. Anything `>= floor`
    /// classifies as Critical regardless of percentile placement, so a
    /// codebase that's uniformly bad still surfaces its worst cases.
    /// `None` means "no floor" (rare — e.g. for `change_coupling`
    /// where the user-configured `min_coupling` already filters before
    /// classification).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub floor_critical: Option<f64>,
    /// Absolute "graduation gate". Anything strictly `< floor_ok`
    /// classifies as Ok regardless of where it lands on the percentile
    /// ladder — a codebase whose worst values are below the literature
    /// anchor escapes the "top 10% is always Critical" loop. `None`
    /// disables the gate (current default for non-proxy metrics whose
    /// scan-time filters already serve the same role).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub floor_ok: Option<f64>,
}

impl MetricCalibration {
    /// Severity classification with three gates layered above the
    /// percentile classifier:
    ///
    /// 1. `floor_critical` — uniformly-bad escape hatch (always Critical).
    /// 2. `floor_ok` — graduation gate (always Ok below it).
    /// 3. **Spread gate** — when `(p95 - p50)` is small relative to
    ///    `(floor_critical - floor_ok)`, the percentile classifier has
    ///    no signal (everyone clustered between the floors). Falls to
    ///    Ok. Threshold: `(floor_critical - floor_ok) / 2`. Both floors
    ///    must be present for the gate to fire — partial-floor
    ///    metrics keep the existing percentile-only behaviour.
    ///
    /// Then the percentile cascade: `value >= p95` → Critical, `p90` →
    /// High, `p75` → Medium, else Ok.
    ///
    /// Degenerate calibrations carry `NaN` percentiles (sample size
    /// below [`MIN_SAMPLES_FOR_PERCENTILES`]); `>=` against `NaN` is
    /// always false, so those cases fall through to the floor-only path.
    #[must_use]
    pub fn classify(&self, value: f64) -> Severity {
        if let Some(floor) = self.floor_critical {
            if value >= floor {
                return Severity::Critical;
            }
        }
        if let Some(floor) = self.floor_ok {
            if value < floor {
                return Severity::Ok;
            }
        }
        if !self.has_meaningful_spread() {
            return Severity::Ok;
        }
        if value >= self.p95 {
            Severity::Critical
        } else if value >= self.p90 {
            Severity::High
        } else if value >= self.p75 {
            Severity::Medium
        } else {
            Severity::Ok
        }
    }

    /// True iff the percentile classifier carries meaningful signal —
    /// the spread between the median and the 95th percentile is at
    /// least half of the floor band. When the codebase has graduated
    /// into a tight cluster between the floors, this returns `false`
    /// and `classify` falls to Ok regardless of relative position.
    /// Returns `true` (no gate) when either floor is missing or the
    /// percentiles are NaN (degenerate calibration).
    fn has_meaningful_spread(&self) -> bool {
        let (Some(critical), Some(ok)) = (self.floor_critical, self.floor_ok) else {
            return true;
        };
        if !self.p50.is_finite() || !self.p95.is_finite() {
            return true;
        }
        let band = critical - ok;
        if band <= 0.0 {
            return true;
        }
        let spread_min = band / 2.0;
        (self.p95 - self.p50) >= spread_min
    }

    /// Build a calibration from a sample of metric values plus the
    /// absolute floors. Samples below [`MIN_SAMPLES_FOR_PERCENTILES`]
    /// mark every percentile as `NaN` so `classify` ignores them — a
    /// Critical decision can still fire via `floors.critical`, and `Ok`
    /// graduation can still fire via `floors.ok`. Non-finite input
    /// values (`NaN` / `inf`) are dropped before sorting.
    #[must_use]
    pub fn from_distribution(values: &[f64], floors: MetricFloors) -> Self {
        let mut sorted: Vec<f64> = values.iter().copied().filter(|v| v.is_finite()).collect();
        if sorted.len() < MIN_SAMPLES_FOR_PERCENTILES {
            return Self {
                p50: f64::NAN,
                p75: f64::NAN,
                p90: f64::NAN,
                p95: f64::NAN,
                floor_critical: floors.critical,
                floor_ok: floors.ok,
            };
        }
        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
        Self {
            p50: percentile(&sorted, 50.0),
            p75: percentile(&sorted, 75.0),
            p90: percentile(&sorted, 90.0),
            p95: percentile(&sorted, 95.0),
            floor_critical: floors.critical,
            floor_ok: floors.ok,
        }
    }
}

#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct HotspotCalibration {
    pub p50: f64,
    pub p75: f64,
    pub p90: f64,
    pub p95: f64,
    /// Absolute graduation floor — scores strictly below this never
    /// flag as hotspots, even when they happen to sit in the top
    /// decile of a uniformly-cold codebase. See [`FLOOR_OK_HOTSPOT`]
    /// for the literature-anchored default. `None` disables the gate
    /// (legacy snapshots before v0.3+).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub floor_ok: Option<f64>,
}

impl HotspotCalibration {
    /// True iff `score >= p90` AND `score >= floor_ok` (when set) —
    /// i.e. the file sits in the top 10% AND its absolute composite
    /// score is non-trivial. Hotspot is a **flag**, not a Severity:
    /// it rides on top of any Finding via `Finding.hotspot` so the
    /// two axes (how bad? how often touched?) stay independent.
    #[must_use]
    pub fn flag(&self, score: f64) -> bool {
        if let Some(floor) = self.floor_ok {
            if score < floor {
                return false;
            }
        }
        score >= self.p90
    }

    #[must_use]
    pub fn from_distribution(scores: &[f64]) -> Self {
        Self::from_distribution_with_floor(scores, Some(FLOOR_OK_HOTSPOT))
    }

    /// Variant that takes an explicit `floor_ok` so tests and callers
    /// that want to opt out (legacy behaviour) can pass `None`. The
    /// default constructor [`Self::from_distribution`] applies the
    /// literature-anchored [`FLOOR_OK_HOTSPOT`].
    #[must_use]
    pub fn from_distribution_with_floor(scores: &[f64], floor_ok: Option<f64>) -> Self {
        if scores.len() < MIN_SAMPLES_FOR_PERCENTILES {
            return Self {
                p50: f64::NAN,
                p75: f64::NAN,
                p90: f64::NAN,
                p95: f64::NAN,
                floor_ok,
            };
        }
        let mut sorted: Vec<f64> = scores.iter().copied().filter(|v| v.is_finite()).collect();
        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
        Self {
            p50: percentile(&sorted, 50.0),
            p75: percentile(&sorted, 75.0),
            p90: percentile(&sorted, 90.0),
            p95: percentile(&sorted, 95.0),
            floor_ok,
        }
    }
}

impl Calibration {
    pub fn load(path: &Path) -> Result<Self> {
        let raw = std::fs::read_to_string(path).map_err(|e| Error::Io {
            path: path.to_path_buf(),
            source: e,
        })?;
        toml::from_str(&raw).map_err(|source| Error::ConfigParse {
            path: path.to_path_buf(),
            source,
        })
    }

    /// Persist the calibration atomically (temp file + rename) so a
    /// SIGINT mid-write can never leave a half-written
    /// `calibration.toml` that fails to parse on every subsequent read.
    /// A short comment header is prepended so the file's provenance and
    /// regeneration command are visible to anyone opening it.
    pub fn save(&self, path: &Path) -> Result<()> {
        let body = toml::to_string_pretty(self).expect("Calibration serialization is infallible");
        let mut out = String::with_capacity(body.len() + CALIBRATION_HEADER.len());
        out.push_str(CALIBRATION_HEADER);
        out.push_str(&body);
        crate::core::fs::atomic_write(path, out.as_bytes())
    }

    /// Apply config-side floor overrides. Each per-metric section in
    /// `[metrics.<name>]` may set `floor_critical = N` and/or
    /// `floor_ok = N` to raise (or lower) the absolute floors without
    /// touching `.heal/calibration.toml` — that way re-calibrating from
    /// a new codebase distribution doesn't clobber the user's
    /// preference.
    ///
    /// Per-workspace `[project.workspaces.metrics.<name>]` overrides
    /// apply *after* the global ones for the matching workspace's
    /// `MetricCalibrations` table, so workspace-specific values win
    /// when both are set. Other workspaces and the global cohort see
    /// only the global overrides.
    #[must_use]
    pub fn with_overrides(mut self, config: &Config) -> Self {
        apply_metric_overrides(&mut self.calibration, config);
        for (ws_path, table) in &mut self.workspaces {
            apply_metric_overrides(table, config);
            if let Some(overlay) = config
                .project
                .workspaces
                .iter()
                .find(|w| w.path.trim_end_matches('/') == ws_path.as_str())
            {
                apply_workspace_metric_overrides(table, &overlay.metrics);
            }
        }
        self
    }
}

/// Layer per-workspace `[project.workspaces.metrics.<m>] floor_*`
/// overrides onto a single workspace's `MetricCalibrations`. Mirrors
/// [`apply_metric_overrides`] but reads from `WorkspaceMetricsOverlay`
/// instead of the global `[metrics.<m>]` config.
fn apply_workspace_metric_overrides(
    table: &mut MetricCalibrations,
    overrides: &crate::core::config::WorkspaceMetricsOverlay,
) {
    if let Some(c) = table.ccn.as_mut() {
        if let Some(f) = overrides.ccn.floor_critical {
            c.floor_critical = Some(f);
        }
        if let Some(f) = overrides.ccn.floor_ok {
            c.floor_ok = Some(f);
        }
    }
    if let Some(c) = table.cognitive.as_mut() {
        if let Some(f) = overrides.cognitive.floor_critical {
            c.floor_critical = Some(f);
        }
        if let Some(f) = overrides.cognitive.floor_ok {
            c.floor_ok = Some(f);
        }
    }
    if let Some(c) = table.duplication.as_mut() {
        if let Some(f) = overrides.duplication.floor_critical {
            c.floor_critical = Some(f);
        }
    }
    if let Some(c) = table.change_coupling.as_mut() {
        if let Some(f) = overrides.change_coupling.floor_critical {
            c.floor_critical = Some(f);
        }
    }
    if let Some(c) = table.lcom.as_mut() {
        if let Some(f) = overrides.lcom.floor_critical {
            c.floor_critical = Some(f);
        }
    }
}

/// Layer config-side `floor_critical` / `floor_ok` overrides onto a
/// `MetricCalibrations` table. Shared by [`Calibration::with_overrides`]
/// across the global cohort and every per-workspace cohort so the
/// behaviour stays uniform: `[metrics.<m>] floor_*` settings take
/// effect everywhere a calibration table exists.
fn apply_metric_overrides(table: &mut MetricCalibrations, config: &Config) {
    if let Some(c) = table.ccn.as_mut() {
        if let Some(f) = config.metrics.ccn.floor_critical {
            c.floor_critical = Some(f);
        }
        if let Some(f) = config.metrics.ccn.floor_ok {
            c.floor_ok = Some(f);
        }
    }
    if let Some(c) = table.cognitive.as_mut() {
        if let Some(f) = config.metrics.cognitive.floor_critical {
            c.floor_critical = Some(f);
        }
        if let Some(f) = config.metrics.cognitive.floor_ok {
            c.floor_ok = Some(f);
        }
    }
    if let Some(c) = table.duplication.as_mut() {
        if let Some(f) = config.metrics.duplication.floor_critical {
            c.floor_critical = Some(f);
        }
    }
    if let Some(c) = table.change_coupling.as_mut() {
        if let Some(f) = config.metrics.change_coupling.floor_critical {
            c.floor_critical = Some(f);
        }
    }
    if let Some(c) = table.lcom.as_mut() {
        if let Some(f) = config.metrics.lcom.floor_critical {
            c.floor_critical = Some(f);
        }
    }
    if let Some(c) = table.hotspot.as_mut() {
        if let Some(f) = config.metrics.hotspot.floor_ok {
            c.floor_ok = Some(f);
        }
    }
}

/// Linear-interpolation percentile (`NumPy` default style). Computes
/// `values[k] + frac * (values[k+1] - values[k])` for the rank
/// `k = floor(p/100 * (n-1))`. The caller sorts ascending and drops
/// non-finite values; an empty slice returns 0.0.
fn percentile(sorted: &[f64], p: f64) -> f64 {
    let n = sorted.len();
    if n == 0 {
        return 0.0;
    }
    if n == 1 {
        return sorted[0];
    }
    #[allow(clippy::cast_precision_loss)]
    let rank = (p / 100.0) * (n as f64 - 1.0);
    #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
    let lo = rank.floor() as usize;
    let frac = rank - rank.floor();
    if lo + 1 >= n {
        return sorted[n - 1];
    }
    sorted[lo] + frac * (sorted[lo + 1] - sorted[lo])
}

#[cfg(test)]
mod tests {
    use super::*;

    fn cal(p50: f64, p75: f64, p90: f64, p95: f64, floor: Option<f64>) -> MetricCalibration {
        MetricCalibration {
            p50,
            p75,
            p90,
            p95,
            floor_critical: floor,
            floor_ok: None,
        }
    }

    #[test]
    fn classify_uses_floor_first() {
        let c = cal(1.0, 2.0, 3.0, 4.0, Some(10.0));
        assert_eq!(c.classify(11.0), Severity::Critical);
        // Above p95 but below floor — still Critical via the p95 break.
        assert_eq!(c.classify(5.0), Severity::Critical);
    }

    #[test]
    fn classify_floor_ok_forces_ok_below_threshold() {
        // p95=4 would normally make value=5 Critical, but floor_ok=11
        // gates anything < 11 to Ok regardless of percentile.
        let c = MetricCalibration {
            p50: 1.0,
            p75: 2.0,
            p90: 3.0,
            p95: 4.0,
            floor_critical: None,
            floor_ok: Some(11.0),
        };
        assert_eq!(c.classify(5.0), Severity::Ok);
        assert_eq!(c.classify(10.99), Severity::Ok);
        // At or above the floor, percentile classifier resumes.
        assert_eq!(c.classify(11.0), Severity::Critical);
    }

    #[test]
    fn classify_floor_critical_overrides_floor_ok() {
        // A uniformly-bad codebase: every value happens to sit below
        // floor_ok numerically (impossible-but-construct), but
        // floor_critical wins so the escape hatch isn't lost.
        let c = MetricCalibration {
            p50: 1.0,
            p75: 2.0,
            p90: 3.0,
            p95: 4.0,
            floor_critical: Some(5.0),
            floor_ok: Some(11.0),
        };
        assert_eq!(c.classify(6.0), Severity::Critical);
    }

    #[test]
    fn with_overrides_applies_floor_ok_from_config() {
        let cal = Calibration {
            meta: CalibrationMeta::default(),
            calibration: MetricCalibrations {
                ccn: Some(MetricCalibration::from_distribution(
                    &[1.0, 2.0, 3.0, 4.0, 5.0],
                    MetricFloors {
                        critical: Some(FLOOR_CCN),
                        ok: Some(FLOOR_OK_CCN),
                    },
                )),
                ..MetricCalibrations::default()
            },
            workspaces: BTreeMap::new(),
        };
        let mut config = Config::default();
        config.metrics.ccn.floor_ok = Some(15.0);
        config.metrics.ccn.floor_critical = Some(40.0);
        let merged = cal.with_overrides(&config);
        let ccn = merged.calibration.ccn.unwrap();
        assert_eq!(ccn.floor_ok, Some(15.0));
        assert_eq!(ccn.floor_critical, Some(40.0));
    }

    #[test]
    fn with_overrides_applies_workspace_metric_overlay() {
        // Two workspaces, both calibrated; web overrides ccn.floor_critical
        // to 40 while api inherits the global 25.
        let mut workspaces = BTreeMap::new();
        let make_table = || MetricCalibrations {
            ccn: Some(MetricCalibration::from_distribution(
                &[1.0, 2.0, 3.0, 4.0, 5.0],
                MetricFloors {
                    critical: Some(FLOOR_CCN),
                    ok: Some(FLOOR_OK_CCN),
                },
            )),
            ..MetricCalibrations::default()
        };
        workspaces.insert("packages/web".to_owned(), make_table());
        workspaces.insert("packages/api".to_owned(), make_table());

        let cal = Calibration {
            meta: CalibrationMeta::default(),
            calibration: make_table(),
            workspaces,
        };

        // Global ccn.floor_critical = 25, web overrides to 40.
        let cfg = Config::from_toml_str(
            r#"
            [metrics.ccn]
            enabled = true
            floor_critical = 25

            [[project.workspaces]]
            path = "packages/web"

            [project.workspaces.metrics.ccn]
            floor_critical = 40

            [[project.workspaces]]
            path = "packages/api"
            "#,
        )
        .unwrap();

        let merged = cal.with_overrides(&cfg);
        // Global cohort + api inherit global override.
        assert_eq!(
            merged.calibration.ccn.as_ref().unwrap().floor_critical,
            Some(25.0),
        );
        assert_eq!(
            merged.workspaces["packages/api"]
                .ccn
                .as_ref()
                .unwrap()
                .floor_critical,
            Some(25.0),
        );
        // Web wins with its workspace-specific override.
        assert_eq!(
            merged.workspaces["packages/web"]
                .ccn
                .as_ref()
                .unwrap()
                .floor_critical,
            Some(40.0),
        );
    }

    #[test]
    fn classify_spread_gate_graduates_tight_distribution() {
        // floor_ok=11, floor_critical=25, band=14, spread_min=7.
        // Distribution clustered tightly: p50=12, p95=14 (spread=2).
        // Even though p95 marks Critical, the spread gate forces Ok —
        // the percentile classifier has no signal in this band.
        let c = MetricCalibration {
            p50: 12.0,
            p75: 13.0,
            p90: 13.5,
            p95: 14.0,
            floor_critical: Some(25.0),
            floor_ok: Some(11.0),
        };
        // Value 14.0 ≥ p95 → would normally be Critical, but spread is 2 < 7.
        assert_eq!(c.classify(14.0), Severity::Ok);
        // Below floor_ok still Ok via the second gate.
        assert_eq!(c.classify(10.0), Severity::Ok);
        // Above floor_critical still Critical via the first gate.
        assert_eq!(c.classify(30.0), Severity::Critical);
    }

    #[test]
    fn classify_spread_gate_keeps_wide_distribution() {
        // Wide spread: p95=22, p50=4, spread=18 ≥ spread_min=7.
        let c = MetricCalibration {
            p50: 4.0,
            p75: 12.0,
            p90: 18.0,
            p95: 22.0,
            floor_critical: Some(25.0),
            floor_ok: Some(11.0),
        };
        assert_eq!(c.classify(22.0), Severity::Critical);
        assert_eq!(c.classify(18.0), Severity::High);
        assert_eq!(c.classify(12.0), Severity::Medium);
    }

    #[test]
    fn classify_spread_gate_silent_without_both_floors() {
        // Only floor_critical — spread gate must not fire (existing
        // metrics like duplication don't have floor_ok).
        let c = MetricCalibration {
            p50: 12.0,
            p75: 13.0,
            p90: 13.5,
            p95: 14.0,
            floor_critical: Some(25.0),
            floor_ok: None,
        };
        // p95=14 → Critical via the percentile cascade.
        assert_eq!(c.classify(14.0), Severity::Critical);
    }

    #[test]
    fn classify_floor_ok_works_with_nan_percentiles() {
        // Tiny sample falls back to NaN percentiles; floor_ok must still
        // graduate values below the literature anchor.
        let c = MetricCalibration::from_distribution(
            &[1.0, 2.0],
            MetricFloors {
                critical: Some(25.0),
                ok: Some(11.0),
            },
        );
        assert!(c.p95.is_nan());
        assert_eq!(c.classify(5.0), Severity::Ok);
        assert_eq!(c.classify(30.0), Severity::Critical);
    }

    #[test]
    fn classify_breaks_match_todo_ladder() {
        let c = cal(1.0, 2.0, 3.0, 4.0, None);
        assert_eq!(c.classify(0.5), Severity::Ok);
        assert_eq!(c.classify(2.0), Severity::Medium);
        assert_eq!(c.classify(3.0), Severity::High);
        assert_eq!(c.classify(4.0), Severity::Critical);
    }

    #[test]
    fn classify_inclusive_at_breaks() {
        // The TODO uses `>=` for every break — verify exact-boundary values.
        let c = cal(1.0, 2.0, 3.0, 4.0, None);
        assert_eq!(c.classify(2.0), Severity::Medium);
        assert_eq!(c.classify(3.0), Severity::High);
        assert_eq!(c.classify(4.0), Severity::Critical);
    }

    #[test]
    fn percentile_linear_interpolation() {
        let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
        // p50 of 5 evenly spaced values = 3.0
        assert!((percentile(&sorted, 50.0) - 3.0).abs() < 1e-9);
        // p75 = rank 3.0 = 4.0
        assert!((percentile(&sorted, 75.0) - 4.0).abs() < 1e-9);
        // p25 = rank 1.0 = 2.0
        assert!((percentile(&sorted, 25.0) - 2.0).abs() < 1e-9);
    }

    #[test]
    fn percentile_handles_edges() {
        assert!((percentile(&[], 50.0) - 0.0).abs() < 1e-9);
        assert!((percentile(&[7.0], 95.0) - 7.0).abs() < 1e-9);
    }

    #[test]
    fn from_distribution_marks_breaks_nan_below_min_samples() {
        let c = MetricCalibration::from_distribution(
            &[1.0, 2.0],
            MetricFloors {
                critical: Some(25.0),
                ok: None,
            },
        );
        assert!(c.p50.is_nan());
        assert!(c.p95.is_nan());
        assert_eq!(c.floor_critical, Some(25.0));
        // Floor still applies even when percentiles degenerate.
        assert_eq!(c.classify(30.0), Severity::Critical);
        // NaN comparisons short-circuit to Ok — a tiny sample shouldn't
        // promote every non-zero value to Critical. The minimum sample
        // guard (`MIN_SAMPLES_FOR_PERCENTILES`) is what keeps small
        // codebases from being painted red by accident.
        assert_eq!(c.classify(5.0), Severity::Ok);
    }

    #[test]
    fn from_distribution_drops_non_finite() {
        let values = vec![1.0, 2.0, f64::NAN, 3.0, f64::INFINITY, 4.0, 5.0];
        let c = MetricCalibration::from_distribution(&values, MetricFloors::default());
        // After filter the sorted set is [1,2,3,4,5] → p50 = 3.0.
        assert!((c.p50 - 3.0).abs() < 1e-9);
    }

    #[test]
    fn hotspot_floor_ok_blocks_top_decile_in_cold_codebase() {
        // Uniformly-cold codebase: top 10% sit at score=15, but
        // FLOOR_OK_HOTSPOT (=22) prevents flagging — these files just
        // aren't hotspots in absolute terms.
        let h = HotspotCalibration {
            p50: 3.0,
            p75: 8.0,
            p90: 15.0,
            p95: 18.0,
            floor_ok: Some(FLOOR_OK_HOTSPOT),
        };
        // Score 15 ≥ p90 but < floor_ok → not a hotspot.
        assert!(!h.flag(15.0));
        assert!(!h.flag(20.0));
        // Above the absolute floor → hotspot once it's also above p90.
        assert!(h.flag(25.0));
    }

    #[test]
    fn hotspot_floor_ok_disabled_falls_back_to_percentile() {
        // Legacy snapshots before v0.3+ have floor_ok = None; the
        // ancient percentile-only behaviour is preserved.
        let h = HotspotCalibration {
            p50: 3.0,
            p75: 8.0,
            p90: 15.0,
            p95: 18.0,
            floor_ok: None,
        };
        assert!(h.flag(15.0));
        assert!(!h.flag(14.0));
    }

    #[test]
    fn hotspot_flag_at_p90() {
        let h = HotspotCalibration {
            p50: 5.0,
            p75: 18.0,
            p90: 67.0,
            p95: 145.0,
            // High enough that p90 (67) is above floor — the percentile
            // gate decides the boundary case for this test.
            floor_ok: Some(FLOOR_OK_HOTSPOT),
        };
        assert!(!h.flag(50.0));
        assert!(h.flag(67.0));
        assert!(h.flag(200.0));
    }

    #[test]
    fn toml_roundtrip_with_deny_unknown_fields() {
        let cal = Calibration {
            meta: CalibrationMeta {
                created_at: DateTime::<Utc>::from_timestamp(1_700_000_000, 0).unwrap(),
                codebase_files: 142,
                strategy: STRATEGY_PERCENTILE.to_owned(),
                calibrated_at_sha: Some("abcd1234".into()),
            },
            calibration: MetricCalibrations {
                ccn: Some(MetricCalibration {
                    p50: 4.2,
                    p75: 8.1,
                    p90: 14.3,
                    p95: 21.7,
                    floor_critical: Some(FLOOR_CCN),
                    floor_ok: Some(FLOOR_OK_CCN),
                }),
                hotspot: Some(HotspotCalibration {
                    p50: 5.0,
                    p75: 18.0,
                    p90: 67.0,
                    p95: 145.0,
                    floor_ok: Some(FLOOR_OK_HOTSPOT),
                }),
                ..MetricCalibrations::default()
            },
            workspaces: BTreeMap::new(),
        };
        let s = toml::to_string_pretty(&cal).unwrap();
        assert!(s.contains("created_at"));
        assert!(s.contains("[calibration.ccn]"));
        assert!(s.contains("[calibration.hotspot]"));

        let back: Calibration = toml::from_str(&s).unwrap();
        assert_eq!(back, cal);
    }

    #[test]
    fn save_prepends_provenance_header_and_round_trips() {
        let dir = tempfile::TempDir::new().unwrap();
        let path = dir.path().join("calibration.toml");
        let cal = Calibration {
            meta: CalibrationMeta {
                created_at: DateTime::<Utc>::from_timestamp(1_700_000_000, 0).unwrap(),
                codebase_files: 7,
                strategy: STRATEGY_PERCENTILE.to_owned(),
                calibrated_at_sha: None,
            },
            calibration: MetricCalibrations::default(),
            workspaces: BTreeMap::new(),
        };
        cal.save(&path).unwrap();

        let raw = std::fs::read_to_string(&path).unwrap();
        // Header is at the top so opening the file in any editor surfaces
        // the regeneration command. Body still parses despite the comments.
        assert!(raw.starts_with("# Generated by `heal calibrate`"));
        assert!(raw.contains("heal calibrate --force"));
        let back = Calibration::load(&path).unwrap();
        assert_eq!(back, cal);
    }

    #[test]
    fn nan_breaks_round_trip_through_toml() {
        // `heal init` on a tiny codebase produces NaN percentile breaks
        // (sample size below `MIN_SAMPLES_FOR_PERCENTILES`). TOML
        // supports `nan` natively, so save/load must preserve the
        // degenerate-marker behaviour.
        let cal = Calibration {
            meta: CalibrationMeta::default(),
            calibration: MetricCalibrations {
                ccn: Some(MetricCalibration::from_distribution(
                    &[1.0, 2.0],
                    MetricFloors {
                        critical: Some(FLOOR_CCN),
                        ok: Some(FLOOR_OK_CCN),
                    },
                )),
                ..MetricCalibrations::default()
            },
            workspaces: BTreeMap::new(),
        };
        let s = toml::to_string_pretty(&cal).unwrap();
        let back: Calibration = toml::from_str(&s).unwrap();
        let breaks = back.calibration.ccn.as_ref().unwrap();
        assert!(breaks.p50.is_nan());
        assert!(breaks.p95.is_nan());
        assert_eq!(breaks.floor_critical, Some(FLOOR_CCN));
        // And classify still falls through to floor-only after the trip.
        assert_eq!(breaks.classify(30.0), Severity::Critical);
        assert_eq!(breaks.classify(5.0), Severity::Ok);
    }

    #[test]
    fn unknown_fields_are_rejected() {
        let bad = r#"
            [meta]
            created_at = "2026-04-28T09:00:00Z"
            codebase_files = 1
            strategy = "percentile"

            [calibration.ccn]
            p50 = 1.0
            p75 = 2.0
            p90 = 3.0
            p95 = 4.0
            unknown = 99
        "#;
        let err = toml::from_str::<Calibration>(bad).unwrap_err();
        assert!(err.to_string().contains("unknown"));
    }
}