heal-cli 0.2.1 - Docs.rs

//! Codebase-relative metric calibration (`.heal/calibration.toml`).
//!
//! HEAL's Severity ladder is computed from the **codebase's own
//! distribution**, not absolute literature values, so a simple Python
//! script and a 200kloc service trigger differently for the same raw
//! CCN. Each metric carries its own quartile-style breaks (p75 / p90
//! / p95) plus an absolute "no defence" floor (`floor_critical`) drawn
//! from `McCabe` / `SonarQube`. See TODO §Severity と Calibration for the
//! design rationale.
//!
//! Hotspot uses an independent percentile space with **no floor** so
//! the top 10% (`score >= p90`) is structurally guaranteed regardless
//! of churn / size — see TODO §Hotspot Score.
//!
//! Layout on disk:
//!
//! ```toml
//! [meta]
//! created_at      = "2026-04-28T09:00:00Z"
//! codebase_files  = 142
//! strategy        = "percentile"
//!
//! [calibration.ccn]
//! p50 = 4.2
//! p75 = 8.1
//! p90 = 14.3
//! p95 = 21.7
//! floor_critical = 25
//!
//! [calibration.hotspot]
//! p50 = 5.0
//! p75 = 18.0
//! p90 = 67.0
//! p95 = 145.0
//! ```
//!
//! The file is generated by `heal init` / `heal calibrate` and **not
//! intended for hand-editing** — only `floor_critical` is meant to be
//! overridden, and that override lives in `config.toml` so a re-
//! calibration doesn't clobber it.

use std::path::Path;

use chrono::{DateTime, Duration, Utc};
use serde::{Deserialize, Serialize};

use crate::core::config::Config;
use crate::core::error::{Error, Result};
use crate::core::eventlog::EventLog;
use crate::core::severity::Severity;
use crate::core::snapshot::MetricsSnapshot;

/// Built-in `floor_critical` values (TODO §v0.2 範囲のメトリクス対象).
/// These are the hard "structurally indefensible" thresholds; the
/// codebase-relative percentiles add the softer breaks above them.
pub const FLOOR_CCN: f64 = 25.0;
pub const FLOOR_COGNITIVE: f64 = 50.0;
pub const FLOOR_DUPLICATION_PCT: f64 = 30.0;

/// Default percentile strategy label written into
/// `meta.strategy`. Reserved for future expansion (e.g. winsorised
/// percentiles for very small samples).
pub const STRATEGY_PERCENTILE: &str = "percentile";

/// Sample-size guard — calibrations built from fewer values than this
/// fall back to `floor_critical`-only classification (everything
/// percentile-derived clamps to Ok). Five points is the minimum where
/// linear-interpolated quartiles aren't degenerate.
pub const MIN_SAMPLES_FOR_PERCENTILES: usize = 5;

/// Recalibration trigger thresholds — mirror the TODO §「自動検出
/// トリガー」spec. Surfaced via the default `heal calibrate`
/// invocation when `calibration.toml` already exists; the user always
/// decides whether to run `heal calibrate --force`.
pub const TRIGGER_AGE_DAYS: i64 = 90;
pub const TRIGGER_FILE_DELTA_PCT: f64 = 0.20;
pub const TRIGGER_CRITICAL_CLEAN_DAYS: i64 = 30;

/// Lookback buffer past `TRIGGER_CRITICAL_CLEAN_DAYS` so the streak
/// detector still sees the boundary commit when the cutoff lands close
/// to an event's timestamp. Without this slack a 30-day streak whose
/// oldest record sits a few seconds before `cutoff` would be cropped.
const STREAK_LOOKBACK_BUFFER_DAYS: i64 = 5;

/// Comment header prepended by [`Calibration::save`] so anyone opening
/// `.heal/calibration.toml` immediately sees its provenance and the
/// command that regenerates it. Recalibration is never automatic — the
/// user always invokes `heal calibrate --force` themselves.
const CALIBRATION_HEADER: &str = "\
# Generated by `heal calibrate` from this codebase's score distribution.
# Regenerate after the codebase shifts with `heal calibrate --force`
# (heal never recalibrates automatically).
# Hand edits are preserved on read but will be overwritten by --force;
# put `floor_critical` overrides in `config.toml` instead.

";

#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Calibration {
    pub meta: CalibrationMeta,
    #[serde(default)]
    pub calibration: MetricCalibrations,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct CalibrationMeta {
    pub created_at: DateTime<Utc>,
    pub codebase_files: u32,
    pub strategy: String,
}

impl Default for CalibrationMeta {
    fn default() -> Self {
        Self {
            created_at: DateTime::<Utc>::from_timestamp(0, 0).unwrap_or_default(),
            codebase_files: 0,
            strategy: STRATEGY_PERCENTILE.to_owned(),
        }
    }
}

#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct MetricCalibrations {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub ccn: Option<MetricCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub cognitive: Option<MetricCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub duplication: Option<MetricCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub change_coupling: Option<MetricCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub hotspot: Option<HotspotCalibration>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub lcom: Option<MetricCalibration>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct MetricCalibration {
    pub p50: f64,
    pub p75: f64,
    pub p90: f64,
    pub p95: f64,
    /// Absolute "structurally indefensible" floor. Anything `>= floor`
    /// classifies as Critical regardless of percentile placement, so a
    /// codebase that's uniformly bad still surfaces its worst cases.
    /// `None` means "no floor" (rare — e.g. for `change_coupling`
    /// where the user-configured `min_coupling` already filters before
    /// classification).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub floor_critical: Option<f64>,
}

impl MetricCalibration {
    /// Severity classification per TODO §「分類: 絶対フロア超過 →
    /// Critical / value >= p95 → Critical / >= p90 → High / >= p75 →
    /// Medium / それ以下 → Ok」. Degenerate calibrations carry `NaN`
    /// percentiles (sample size below
    /// [`MIN_SAMPLES_FOR_PERCENTILES`]); `>=` against `NaN` is always
    /// false, so those cases naturally fall through to the floor-only
    /// path.
    #[must_use]
    pub fn classify(&self, value: f64) -> Severity {
        if let Some(floor) = self.floor_critical {
            if value >= floor {
                return Severity::Critical;
            }
        }
        if value >= self.p95 {
            Severity::Critical
        } else if value >= self.p90 {
            Severity::High
        } else if value >= self.p75 {
            Severity::Medium
        } else {
            Severity::Ok
        }
    }

    /// Build a calibration from a sample of metric values plus an
    /// optional absolute floor. Samples below
    /// [`MIN_SAMPLES_FOR_PERCENTILES`] mark every percentile as `NaN`
    /// so `classify` ignores them — a Critical decision can still
    /// fire via `floor_critical`. Non-finite input values (`NaN` /
    /// `inf`) are dropped before sorting.
    #[must_use]
    pub fn from_distribution(values: &[f64], floor_critical: Option<f64>) -> Self {
        let mut sorted: Vec<f64> = values.iter().copied().filter(|v| v.is_finite()).collect();
        if sorted.len() < MIN_SAMPLES_FOR_PERCENTILES {
            return Self {
                p50: f64::NAN,
                p75: f64::NAN,
                p90: f64::NAN,
                p95: f64::NAN,
                floor_critical,
            };
        }
        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
        Self {
            p50: percentile(&sorted, 50.0),
            p75: percentile(&sorted, 75.0),
            p90: percentile(&sorted, 90.0),
            p95: percentile(&sorted, 95.0),
            floor_critical,
        }
    }
}

#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct HotspotCalibration {
    pub p50: f64,
    pub p75: f64,
    pub p90: f64,
    pub p95: f64,
}

impl HotspotCalibration {
    /// True iff `score >= p90` — i.e. the file sits in the top 10% of
    /// hotspot scores. Hotspot is a **flag**, not a Severity (TODO
    /// §「Severity と Hotspot は直交した属性」).
    #[must_use]
    pub fn flag(&self, score: f64) -> bool {
        score >= self.p90
    }

    #[must_use]
    pub fn from_distribution(scores: &[f64]) -> Self {
        if scores.len() < MIN_SAMPLES_FOR_PERCENTILES {
            return Self {
                p50: f64::NAN,
                p75: f64::NAN,
                p90: f64::NAN,
                p95: f64::NAN,
            };
        }
        let mut sorted: Vec<f64> = scores.iter().copied().filter(|v| v.is_finite()).collect();
        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
        Self {
            p50: percentile(&sorted, 50.0),
            p75: percentile(&sorted, 75.0),
            p90: percentile(&sorted, 90.0),
            p95: percentile(&sorted, 95.0),
        }
    }
}

impl Calibration {
    pub fn load(path: &Path) -> Result<Self> {
        let raw = std::fs::read_to_string(path).map_err(|e| Error::Io {
            path: path.to_path_buf(),
            source: e,
        })?;
        toml::from_str(&raw).map_err(|source| Error::ConfigParse {
            path: path.to_path_buf(),
            source,
        })
    }

    /// Persist the calibration atomically (temp file + rename) so a
    /// SIGINT mid-write can never leave a half-written
    /// `calibration.toml` that fails to parse on every subsequent read.
    /// A short comment header is prepended so the file's provenance and
    /// regeneration command are visible to anyone opening it.
    pub fn save(&self, path: &Path) -> Result<()> {
        let body = toml::to_string_pretty(self).expect("Calibration serialization is infallible");
        let mut out = String::with_capacity(body.len() + CALIBRATION_HEADER.len());
        out.push_str(CALIBRATION_HEADER);
        out.push_str(&body);
        crate::core::fs::atomic_write(path, out.as_bytes())
    }

    /// Project a [`Calibration`] into a [`CalibrationEvent`] suitable
    /// for the audit log. `reason` records why the recalibration fired;
    /// the live codebase size is read from `self.meta.codebase_files`.
    #[must_use]
    pub fn to_event(&self, reason: String) -> CalibrationEvent {
        CalibrationEvent {
            at: self.meta.created_at,
            p95_ccn: self.calibration.ccn.as_ref().map(|c| c.p95),
            p95_cognitive: self.calibration.cognitive.as_ref().map(|c| c.p95),
            p95_duplication: self.calibration.duplication.as_ref().map(|c| c.p95),
            p95_change_coupling: self.calibration.change_coupling.as_ref().map(|c| c.p95),
            p90_hotspot: self.calibration.hotspot.as_ref().map(|c| c.p90),
            files: self.meta.codebase_files,
            reason,
        }
    }

    /// Apply config-side `floor_critical` overrides. Each per-metric
    /// section in `[metrics.<name>]` may set `floor_critical = N` to
    /// raise (or, rarely, lower) the absolute floor without touching
    /// `.heal/calibration.toml` — that way re-calibrating from a new
    /// codebase distribution doesn't clobber the user's preference.
    #[must_use]
    pub fn with_overrides(mut self, config: &Config) -> Self {
        if let Some(c) = self.calibration.ccn.as_mut() {
            if let Some(f) = config.metrics.ccn.floor_critical {
                c.floor_critical = Some(f);
            }
        }
        if let Some(c) = self.calibration.cognitive.as_mut() {
            if let Some(f) = config.metrics.cognitive.floor_critical {
                c.floor_critical = Some(f);
            }
        }
        if let Some(c) = self.calibration.duplication.as_mut() {
            if let Some(f) = config.metrics.duplication.floor_critical {
                c.floor_critical = Some(f);
            }
        }
        if let Some(c) = self.calibration.change_coupling.as_mut() {
            if let Some(f) = config.metrics.change_coupling.floor_critical {
                c.floor_critical = Some(f);
            }
        }
        if let Some(c) = self.calibration.lcom.as_mut() {
            if let Some(f) = config.metrics.lcom.floor_critical {
                c.floor_critical = Some(f);
            }
        }
        self
    }
}

/// Audit-log entry written to `.heal/snapshots/` whenever
/// `heal calibrate` (or `heal init`) emits a fresh calibration. Recorded
/// alongside `commit` events; `MetricsSnapshot::latest_in_segments`
/// silently skips records that don't decode as a snapshot, so the two
/// event shapes coexist without interfering.
///
/// Serialised under `event = "calibrate"`. Holds only the headline
/// numbers — full breaks live in `.heal/calibration.toml`.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct CalibrationEvent {
    pub at: DateTime<Utc>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub p95_ccn: Option<f64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub p95_cognitive: Option<f64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub p95_duplication: Option<f64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub p95_change_coupling: Option<f64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub p90_hotspot: Option<f64>,
    pub files: u32,
    pub reason: String,
}

/// Outcome of the default `heal calibrate` drift evaluation. Each field
/// is `Some(...)` when the corresponding trigger fired, holding the
/// measured drift so the CLI can surface "what" without re-querying.
#[derive(Debug, Clone, Default, PartialEq)]
pub struct RecalibrationCheck {
    /// Days since the current calibration's `meta.created_at` when
    /// `> TRIGGER_AGE_DAYS`.
    pub age_exceeded_days: Option<i64>,
    /// Signed % change in `codebase_files` (current vs. last
    /// `CalibrationEvent.files`) when `|Δ| > TRIGGER_FILE_DELTA_PCT`.
    pub file_count_delta_pct: Option<f64>,
    /// Continuous "Critical == 0" run length in days when
    /// `>= TRIGGER_CRITICAL_CLEAN_DAYS`.
    pub critical_clean_streak_days: Option<i64>,
}

impl RecalibrationCheck {
    /// True iff at least one trigger fired.
    #[must_use]
    pub fn fired(&self) -> bool {
        self.age_exceeded_days.is_some()
            || self.file_count_delta_pct.is_some()
            || self.critical_clean_streak_days.is_some()
    }

    /// Evaluate every trigger against the snapshot directory and the
    /// current calibration. `now` is injected for deterministic tests;
    /// production callers pass `Utc::now()`.
    ///
    /// The implementation is read-only — no events are written. The
    /// CLI surfaces the result and the user explicitly decides whether
    /// to invoke `heal calibrate`.
    pub fn evaluate(snapshots: &EventLog, calibration: &Calibration, now: DateTime<Utc>) -> Self {
        let segments = snapshots.segments().unwrap_or_default();
        Self {
            age_exceeded_days: age_trigger(calibration, now),
            file_count_delta_pct: file_count_trigger(&segments, calibration),
            critical_clean_streak_days: critical_streak_trigger(&segments, now),
        }
    }
}

fn age_trigger(calibration: &Calibration, now: DateTime<Utc>) -> Option<i64> {
    let days = (now - calibration.meta.created_at).num_days();
    (days > TRIGGER_AGE_DAYS).then_some(days)
}

fn file_count_trigger(
    segments: &[crate::core::eventlog::Segment],
    calibration: &Calibration,
) -> Option<f64> {
    let curr = MetricsSnapshot::latest_in_segments(segments)
        .ok()
        .flatten()
        .and_then(|(_, m)| m.codebase_files)?;
    let baseline =
        latest_calibration_event(segments).map_or(calibration.meta.codebase_files, |e| e.files);
    if baseline == 0 {
        return None;
    }
    let pct = (f64::from(curr) - f64::from(baseline)) / f64::from(baseline);
    (pct.abs() > TRIGGER_FILE_DELTA_PCT).then_some(pct)
}

fn critical_streak_trigger(
    segments: &[crate::core::eventlog::Segment],
    now: DateTime<Utc>,
) -> Option<i64> {
    // Streak of consecutive most-recent `commit` snapshots whose
    // `severity_counts.critical == 0`. Filter by event type *before*
    // decoding — `MetricsSnapshot` deliberately omits
    // `deny_unknown_fields` for forward compat, so a `calibrate`
    // event's JSON would deserialize as a default snapshot and
    // silently break the streak.
    let cutoff = now - Duration::days(TRIGGER_CRITICAL_CLEAN_DAYS + STREAK_LOOKBACK_BUFFER_DAYS);
    let events: Vec<crate::core::eventlog::Event> = EventLog::iter_segments(segments.to_vec())
        .filter_map(std::result::Result::ok)
        .filter(|ev| ev.event == "commit" && ev.timestamp >= cutoff)
        .collect();
    let mut newest: Option<DateTime<Utc>> = None;
    let mut oldest: Option<DateTime<Utc>> = None;
    for ev in events.into_iter().rev() {
        let Ok(metrics) = serde_json::from_value::<MetricsSnapshot>(ev.data.clone()) else {
            break;
        };
        let Some(counts) = metrics.severity_counts else {
            // Legacy snapshot pre-Calibration — can't claim Critical-clean.
            break;
        };
        if counts.critical > 0 {
            break;
        }
        newest.get_or_insert(ev.timestamp);
        oldest = Some(ev.timestamp);
    }
    let (newest, oldest) = (newest?, oldest?);
    let streak = (newest - oldest).num_days();
    (streak >= TRIGGER_CRITICAL_CLEAN_DAYS).then_some(streak)
}

fn latest_calibration_event(
    segments: &[crate::core::eventlog::Segment],
) -> Option<CalibrationEvent> {
    EventLog::iter_segments(segments.to_vec())
        .filter_map(std::result::Result::ok)
        .filter(|ev| ev.event == "calibrate")
        .last()
        .and_then(|ev| serde_json::from_value::<CalibrationEvent>(ev.data).ok())
}

/// Linear-interpolation percentile (`NumPy` default style). Computes
/// `values[k] + frac * (values[k+1] - values[k])` for the rank
/// `k = floor(p/100 * (n-1))`. The caller sorts ascending and drops
/// non-finite values; an empty slice returns 0.0.
fn percentile(sorted: &[f64], p: f64) -> f64 {
    let n = sorted.len();
    if n == 0 {
        return 0.0;
    }
    if n == 1 {
        return sorted[0];
    }
    #[allow(clippy::cast_precision_loss)]
    let rank = (p / 100.0) * (n as f64 - 1.0);
    #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
    let lo = rank.floor() as usize;
    let frac = rank - rank.floor();
    if lo + 1 >= n {
        return sorted[n - 1];
    }
    sorted[lo] + frac * (sorted[lo + 1] - sorted[lo])
}

#[cfg(test)]
mod tests {
    use super::*;

    fn cal(p50: f64, p75: f64, p90: f64, p95: f64, floor: Option<f64>) -> MetricCalibration {
        MetricCalibration {
            p50,
            p75,
            p90,
            p95,
            floor_critical: floor,
        }
    }

    #[test]
    fn classify_uses_floor_first() {
        let c = cal(1.0, 2.0, 3.0, 4.0, Some(10.0));
        assert_eq!(c.classify(11.0), Severity::Critical);
        // Above p95 but below floor — still Critical via the p95 break.
        assert_eq!(c.classify(5.0), Severity::Critical);
    }

    #[test]
    fn classify_breaks_match_todo_ladder() {
        let c = cal(1.0, 2.0, 3.0, 4.0, None);
        assert_eq!(c.classify(0.5), Severity::Ok);
        assert_eq!(c.classify(2.0), Severity::Medium);
        assert_eq!(c.classify(3.0), Severity::High);
        assert_eq!(c.classify(4.0), Severity::Critical);
    }

    #[test]
    fn classify_inclusive_at_breaks() {
        // The TODO uses `>=` for every break — verify exact-boundary values.
        let c = cal(1.0, 2.0, 3.0, 4.0, None);
        assert_eq!(c.classify(2.0), Severity::Medium);
        assert_eq!(c.classify(3.0), Severity::High);
        assert_eq!(c.classify(4.0), Severity::Critical);
    }

    #[test]
    fn percentile_linear_interpolation() {
        let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
        // p50 of 5 evenly spaced values = 3.0
        assert!((percentile(&sorted, 50.0) - 3.0).abs() < 1e-9);
        // p75 = rank 3.0 = 4.0
        assert!((percentile(&sorted, 75.0) - 4.0).abs() < 1e-9);
        // p25 = rank 1.0 = 2.0
        assert!((percentile(&sorted, 25.0) - 2.0).abs() < 1e-9);
    }

    #[test]
    fn percentile_handles_edges() {
        assert!((percentile(&[], 50.0) - 0.0).abs() < 1e-9);
        assert!((percentile(&[7.0], 95.0) - 7.0).abs() < 1e-9);
    }

    #[test]
    fn from_distribution_marks_breaks_nan_below_min_samples() {
        let c = MetricCalibration::from_distribution(&[1.0, 2.0], Some(25.0));
        assert!(c.p50.is_nan());
        assert!(c.p95.is_nan());
        assert_eq!(c.floor_critical, Some(25.0));
        // Floor still applies even when percentiles degenerate.
        assert_eq!(c.classify(30.0), Severity::Critical);
        // NaN comparisons short-circuit to Ok — a tiny sample shouldn't
        // promote every non-zero value to Critical (TODO 未解決事項
        // §「sample 数下限ガード」).
        assert_eq!(c.classify(5.0), Severity::Ok);
    }

    #[test]
    fn from_distribution_drops_non_finite() {
        let values = vec![1.0, 2.0, f64::NAN, 3.0, f64::INFINITY, 4.0, 5.0];
        let c = MetricCalibration::from_distribution(&values, None);
        // After filter the sorted set is [1,2,3,4,5] → p50 = 3.0.
        assert!((c.p50 - 3.0).abs() < 1e-9);
    }

    #[test]
    fn hotspot_flag_at_p90() {
        let h = HotspotCalibration {
            p50: 5.0,
            p75: 18.0,
            p90: 67.0,
            p95: 145.0,
        };
        assert!(!h.flag(50.0));
        assert!(h.flag(67.0));
        assert!(h.flag(200.0));
    }

    #[test]
    fn toml_roundtrip_with_deny_unknown_fields() {
        let cal = Calibration {
            meta: CalibrationMeta {
                created_at: DateTime::<Utc>::from_timestamp(1_700_000_000, 0).unwrap(),
                codebase_files: 142,
                strategy: STRATEGY_PERCENTILE.to_owned(),
            },
            calibration: MetricCalibrations {
                ccn: Some(MetricCalibration {
                    p50: 4.2,
                    p75: 8.1,
                    p90: 14.3,
                    p95: 21.7,
                    floor_critical: Some(FLOOR_CCN),
                }),
                hotspot: Some(HotspotCalibration {
                    p50: 5.0,
                    p75: 18.0,
                    p90: 67.0,
                    p95: 145.0,
                }),
                ..MetricCalibrations::default()
            },
        };
        let s = toml::to_string_pretty(&cal).unwrap();
        assert!(s.contains("created_at"));
        assert!(s.contains("[calibration.ccn]"));
        assert!(s.contains("[calibration.hotspot]"));

        let back: Calibration = toml::from_str(&s).unwrap();
        assert_eq!(back, cal);
    }

    #[test]
    fn save_prepends_provenance_header_and_round_trips() {
        let dir = tempfile::TempDir::new().unwrap();
        let path = dir.path().join("calibration.toml");
        let cal = Calibration {
            meta: CalibrationMeta {
                created_at: DateTime::<Utc>::from_timestamp(1_700_000_000, 0).unwrap(),
                codebase_files: 7,
                strategy: STRATEGY_PERCENTILE.to_owned(),
            },
            calibration: MetricCalibrations::default(),
        };
        cal.save(&path).unwrap();

        let raw = std::fs::read_to_string(&path).unwrap();
        // Header is at the top so opening the file in any editor surfaces
        // the regeneration command. Body still parses despite the comments.
        assert!(raw.starts_with("# Generated by `heal calibrate`"));
        assert!(raw.contains("heal calibrate --force"));
        let back = Calibration::load(&path).unwrap();
        assert_eq!(back, cal);
    }

    #[test]
    fn nan_breaks_round_trip_through_toml() {
        // `heal init` on a tiny codebase produces NaN percentile breaks
        // (sample size below `MIN_SAMPLES_FOR_PERCENTILES`). TOML
        // supports `nan` natively, so save/load must preserve the
        // degenerate-marker behaviour.
        let cal = Calibration {
            meta: CalibrationMeta::default(),
            calibration: MetricCalibrations {
                ccn: Some(MetricCalibration::from_distribution(
                    &[1.0, 2.0],
                    Some(FLOOR_CCN),
                )),
                ..MetricCalibrations::default()
            },
        };
        let s = toml::to_string_pretty(&cal).unwrap();
        let back: Calibration = toml::from_str(&s).unwrap();
        let breaks = back.calibration.ccn.as_ref().unwrap();
        assert!(breaks.p50.is_nan());
        assert!(breaks.p95.is_nan());
        assert_eq!(breaks.floor_critical, Some(FLOOR_CCN));
        // And classify still falls through to floor-only after the trip.
        assert_eq!(breaks.classify(30.0), Severity::Critical);
        assert_eq!(breaks.classify(5.0), Severity::Ok);
    }

    fn cal_with_age(now: DateTime<Utc>, days_old: i64) -> Calibration {
        Calibration {
            meta: CalibrationMeta {
                created_at: now - Duration::days(days_old),
                codebase_files: 100,
                strategy: STRATEGY_PERCENTILE.to_owned(),
            },
            calibration: MetricCalibrations::default(),
        }
    }

    fn snapshot_event(
        at: DateTime<Utc>,
        critical: u32,
        codebase_files: u32,
    ) -> crate::core::eventlog::Event {
        let snap = MetricsSnapshot {
            severity_counts: Some(crate::core::snapshot::SeverityCounts {
                critical,
                ..Default::default()
            }),
            codebase_files: Some(codebase_files),
            ..MetricsSnapshot::default()
        };
        crate::core::eventlog::Event {
            timestamp: at,
            event: "commit".into(),
            data: serde_json::to_value(&snap).unwrap(),
        }
    }

    #[test]
    fn age_trigger_fires_after_90_days() {
        let now = Utc::now();
        let young = cal_with_age(now, 80);
        let old = cal_with_age(now, 100);
        assert!(age_trigger(&young, now).is_none());
        assert_eq!(age_trigger(&old, now), Some(100));
    }

    #[test]
    fn file_count_trigger_fires_at_20_pct_growth() {
        let dir = tempfile::tempdir().unwrap();
        let log = EventLog::new(dir.path());
        // Calibration was built at 100 files; latest snapshot says 125 = +25%.
        log.append(&snapshot_event(Utc::now(), 0, 125)).unwrap();
        let segments = log.segments().unwrap();
        let calibration = cal_with_age(Utc::now(), 10);
        let pct = file_count_trigger(&segments, &calibration).expect("trigger should fire");
        assert!((pct - 0.25).abs() < 1e-9);
    }

    #[test]
    fn file_count_trigger_quiet_within_threshold() {
        let dir = tempfile::tempdir().unwrap();
        let log = EventLog::new(dir.path());
        log.append(&snapshot_event(Utc::now(), 0, 110)).unwrap(); // +10%
        let segments = log.segments().unwrap();
        let calibration = cal_with_age(Utc::now(), 10);
        assert!(file_count_trigger(&segments, &calibration).is_none());
    }

    #[test]
    fn critical_streak_trigger_fires_after_30_clean_days() {
        let dir = tempfile::tempdir().unwrap();
        let log = EventLog::new(dir.path());
        let now = Utc::now();
        // 31-day clean streak: oldest 31 days ago, newest now, both critical = 0.
        log.append(&snapshot_event(now - Duration::days(31), 0, 100))
            .unwrap();
        log.append(&snapshot_event(now - Duration::days(15), 0, 100))
            .unwrap();
        log.append(&snapshot_event(now, 0, 100)).unwrap();
        let segments = log.segments().unwrap();
        let streak = critical_streak_trigger(&segments, now).expect("streak should fire");
        assert!(streak >= TRIGGER_CRITICAL_CLEAN_DAYS);
    }

    #[test]
    fn critical_streak_trigger_breaks_on_recent_critical() {
        let dir = tempfile::tempdir().unwrap();
        let log = EventLog::new(dir.path());
        let now = Utc::now();
        log.append(&snapshot_event(now - Duration::days(31), 0, 100))
            .unwrap();
        // Most-recent commit had Critical=2 — breaks the streak.
        log.append(&snapshot_event(now, 2, 100)).unwrap();
        let segments = log.segments().unwrap();
        assert!(critical_streak_trigger(&segments, now).is_none());
    }

    #[test]
    fn critical_streak_trigger_ignores_calibrate_events() {
        // A calibrate event interleaved with clean commits must not
        // break the streak. The bug we're guarding against: without an
        // `event == "commit"` filter, `calibrate` JSON deserialises as
        // a default `MetricsSnapshot` (no `deny_unknown_fields`), which
        // lacks `severity_counts` and would fall into the "legacy
        // snapshot" break arm.
        let dir = tempfile::tempdir().unwrap();
        let log = EventLog::new(dir.path());
        let now = Utc::now();
        log.append(&snapshot_event(now - Duration::days(31), 0, 100))
            .unwrap();
        let cal_event = CalibrationEvent {
            at: now - Duration::days(20),
            p95_ccn: None,
            p95_cognitive: None,
            p95_duplication: None,
            p95_change_coupling: None,
            p90_hotspot: None,
            files: 100,
            reason: "interleaved".into(),
        };
        log.append(&crate::core::eventlog::Event {
            timestamp: cal_event.at,
            event: "calibrate".into(),
            data: serde_json::to_value(&cal_event).unwrap(),
        })
        .unwrap();
        log.append(&snapshot_event(now, 0, 100)).unwrap();
        let segments = log.segments().unwrap();
        assert!(
            critical_streak_trigger(&segments, now).is_some(),
            "calibrate events between clean commits must not break the streak",
        );
    }

    #[test]
    fn evaluate_returns_none_when_quiet() {
        let dir = tempfile::tempdir().unwrap();
        let log = EventLog::new(dir.path());
        let now = Utc::now();
        log.append(&snapshot_event(now, 1, 100)).unwrap(); // Critical>0 → no streak
        let cal = cal_with_age(now, 10); // young
        let check = RecalibrationCheck::evaluate(&log, &cal, now);
        assert!(!check.fired(), "no triggers should fire on a fresh project");
    }

    #[test]
    fn calibration_event_round_trips_through_toml() {
        // CalibrationEvent is serialised as JSON inside snapshots/, but
        // verify our struct shape round-trips for completeness.
        let ev = CalibrationEvent {
            at: DateTime::<Utc>::from_timestamp(1_700_000_000, 0).unwrap(),
            p95_ccn: Some(21.7),
            p95_cognitive: None,
            p95_duplication: None,
            p95_change_coupling: None,
            p90_hotspot: Some(67.0),
            files: 142,
            reason: "manual".into(),
        };
        let json = serde_json::to_value(&ev).unwrap();
        let back: CalibrationEvent = serde_json::from_value(json).unwrap();
        assert_eq!(back, ev);
    }

    #[test]
    fn unknown_fields_are_rejected() {
        let bad = r#"
            [meta]
            created_at = "2026-04-28T09:00:00Z"
            codebase_files = 1
            strategy = "percentile"

            [calibration.ccn]
            p50 = 1.0
            p75 = 2.0
            p90 = 3.0
            p95 = 4.0
            unknown = 99
        "#;
        let err = toml::from_str::<Calibration>(bad).unwrap_err();
        assert!(err.to_string().contains("unknown"));
    }
}