Skip to main content

keyhog_core/
calibration.rs

1//! Bayesian Beta(α, β) calibration per detector.
2//!
3//! Tier-B moat innovation #4 from audits/legendary-2026-04-26: surface
4//! per-detector reliability based on observed true-positive vs false-
5//! positive history rather than a fixed threshold. Detectors with a long
6//! history of clean hits get a higher confidence multiplier; detectors
7//! that fire-then-suppress repeatedly get downweighted.
8//!
9//! Mathematical model:
10//!     each detector has a Beta(α, β) prior over P(true positive | match).
11//!     α counts confirmed TPs, β counts confirmed FPs (both incremented from
12//!     a starting prior of α=1, β=1 - uniform Beta(1, 1)).
13//!     posterior mean = α / (α + β)  ∈ [0, 1].
14//!
15//! Storage: JSON at `$XDG_CACHE_HOME/keyhog/calibration.json` with a schema
16//! version field. Load returns an empty store on miss / corrupted JSON /
17//! schema mismatch - never poison the cache from a damaged artifact.
18//!
19//! Coherence contract (audit organization/coherence finding): this module is
20//! the DATA layer, but it is now LIVE - the scanner's confidence-scoring path
21//! (`scanner::confidence::apply_calibration_multiplier`) reads these counters.
22//! Because a calibration artifact silently present on one machine but absent on
23//! another would make `tuned != benched != shipped`, the integration MUST be
24//! opt-in and deterministic: the scoring path only consults a calibration store
25//! when one is explicitly supplied, and the default / benchmark / CI scan runs
26//! with an [`empty`](Calibration::empty) store so two machines produce identical
27//! findings for the same input. A stray `$XDG_CACHE_HOME` artifact on a dev box
28//! must never silently alter results - that gating lives in the scanner crate.
29
30#![allow(missing_docs)]
31
32use std::collections::HashMap;
33use std::path::{Path, PathBuf};
34
35use parking_lot::RwLock;
36use serde::{Deserialize, Serialize};
37
38/// A detector's running Beta posterior counters. Always ≥1 each (Beta(1,1)
39/// uniform prior baseline) to avoid posterior_mean undefined when a detector
40/// has had no observations yet.
41#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
42pub struct BetaCounters {
43    pub alpha: u32,
44    pub beta: u32,
45}
46
47impl Default for BetaCounters {
48    fn default() -> Self {
49        Self { alpha: 1, beta: 1 }
50    }
51}
52
53impl BetaCounters {
54    /// Posterior mean: α / (α + β). Falls in [0, 1]; the higher, the more
55    /// reliable the detector is historically.
56    pub fn posterior_mean(&self) -> f64 {
57        let total = self.alpha as f64 + self.beta as f64;
58        if total == 0.0 {
59            0.5
60        } else {
61            self.alpha as f64 / total
62        }
63    }
64
65    /// Number of observations (excluding the prior) the posterior is built
66    /// on. Useful for "trust the recent history" UI gates.
67    ///
68    /// kimi-confidence audit: the previous form was
69    /// `alpha.saturating_sub(1) + beta.saturating_sub(1)` - the `+`
70    /// was a plain add and would panic in debug / wrap to 0 in release
71    /// once both counters reached ~`u32::MAX / 2`. Use `saturating_add`
72    /// so the result clamps at `u32::MAX` instead of wrapping. That's
73    /// still a frozen counter at saturation, but the posterior mean
74    /// stays correct and no detector silently gets disabled.
75    pub fn observations(&self) -> u32 {
76        // Subtract the Beta(1, 1) prior baseline.
77        self.alpha
78            .saturating_sub(1)
79            .saturating_add(self.beta.saturating_sub(1))
80    }
81}
82
83/// On-disk format. The version field gates breaking schema changes.
84#[derive(Debug, Serialize, Deserialize)]
85struct OnDisk {
86    version: u32,
87    detectors: HashMap<String, BetaCounters>,
88}
89
90const SCHEMA_VERSION: u32 = 1;
91
92/// Process-wide calibration store. Concurrent updates are serialized via
93/// a single `RwLock` because update events are rare (one per `keyhog
94/// calibrate` invocation or per verifier outcome) and the locked region is
95/// constant-time. We deliberately don't shard via DashMap - the persisted
96/// artifact is small enough that contention is a non-issue.
97#[derive(Debug, Default)]
98pub struct Calibration {
99    inner: RwLock<HashMap<String, BetaCounters>>,
100}
101
102impl Calibration {
103    pub fn empty() -> Self {
104        Self::default()
105    }
106
107    pub fn load(path: &Path) -> Self {
108        let bytes = match std::fs::read(path) {
109            Ok(b) => b,
110            Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Self::empty(),
111            Err(e) => {
112                tracing::warn!(
113                    cache = %path.display(),
114                    error = %e,
115                    "calibration file read failed; treating as cold start"
116                );
117                return Self::empty();
118            }
119        };
120        let on_disk: OnDisk = match serde_json::from_slice(&bytes) {
121            Ok(d) => d,
122            Err(e) => {
123                tracing::warn!(
124                    cache = %path.display(),
125                    error = %e,
126                    "calibration parse failed; treating as cold start"
127                );
128                return Self::empty();
129            }
130        };
131        if on_disk.version != SCHEMA_VERSION {
132            tracing::warn!(
133                cache = %path.display(),
134                version = on_disk.version,
135                expected = SCHEMA_VERSION,
136                "calibration schema mismatch; treating as cold start"
137            );
138            return Self::empty();
139        }
140        Self {
141            inner: RwLock::new(on_disk.detectors),
142        }
143    }
144
145    pub fn save(&self, path: &Path) -> std::io::Result<()> {
146        let detectors = self.inner.read().clone();
147        let on_disk = OnDisk {
148            version: SCHEMA_VERSION,
149            detectors,
150        };
151        let serialized = serde_json::to_vec_pretty(&on_disk)
152            .map_err(|e| std::io::Error::other(format!("calibration encode: {e}")))?;
153        let parent = path.parent().unwrap_or_else(|| std::path::Path::new("."));
154        std::fs::create_dir_all(parent)?;
155        // Same atomic-write-via-NamedTempFile pattern used by
156        // `merkle_index::save` - see that file's note for rationale.
157        let mut tmp = tempfile::NamedTempFile::new_in(parent)?;
158        std::io::Write::write_all(&mut tmp, &serialized)?;
159        tmp.as_file().sync_all()?;
160        tmp.persist(path).map_err(|e| e.error)?;
161        Ok(())
162    }
163
164    /// Record a true positive for `detector_id` (α += 1).
165    ///
166    /// kimi-confidence audit: bare `alpha += 1` would panic in debug
167    /// and wrap to 0 in release once a single detector accumulates
168    /// 2^32 observations. Wrapping to 0 silently mutes a previously
169    /// reliable detector (posterior mean drops to 0.0/1.0 = 0). Use
170    /// `saturating_add` so the worst case is a frozen counter at
171    /// `u32::MAX`, which keeps the posterior mean correct.
172    pub fn record_true_positive(&self, detector_id: &str) {
173        let mut guard = self.inner.write();
174        let entry = guard.entry(detector_id.to_string()).or_default();
175        entry.alpha = entry.alpha.saturating_add(1);
176    }
177
178    /// Record a false positive for `detector_id` (β += 1). Same
179    /// `saturating_add` rationale as [`record_true_positive`].
180    pub fn record_false_positive(&self, detector_id: &str) {
181        let mut guard = self.inner.write();
182        let entry = guard.entry(detector_id.to_string()).or_default();
183        entry.beta = entry.beta.saturating_add(1);
184    }
185
186    /// Return the posterior mean for `detector_id`, falling back to 0.5
187    /// when no observations exist (uniform prior over a never-calibrated
188    /// detector). The scanner's confidence-scoring path consumes this value,
189    /// but only when calibration is explicitly opted in (see the module-level
190    /// coherence contract) so default / benchmark scans stay deterministic.
191    pub fn confidence_multiplier(&self, detector_id: &str) -> f64 {
192        self.inner
193            .read()
194            .get(detector_id)
195            .copied()
196            .unwrap_or_default()
197            .posterior_mean()
198    }
199
200    /// Return the full counters for `detector_id` (defaults to Beta(1, 1)).
201    pub fn counters(&self, detector_id: &str) -> BetaCounters {
202        self.inner
203            .read()
204            .get(detector_id)
205            .copied()
206            .unwrap_or_default()
207    }
208
209    /// Iterate every recorded `(detector_id, counters)`. Useful for
210    /// `keyhog calibrate --show`.
211    pub fn entries(&self) -> Vec<(String, BetaCounters)> {
212        let mut out: Vec<_> = self
213            .inner
214            .read()
215            .iter()
216            .map(|(k, v)| (k.clone(), *v))
217            .collect();
218        out.sort_by(|a, b| a.0.cmp(&b.0));
219        out
220    }
221
222    /// Test-only hook for saturation oracle tests in `tests/unit/`.
223    #[doc(hidden)]
224    pub fn test_seed_counters(&self, id: &str, alpha: u32, beta: u32) {
225        let mut guard = self.inner.write();
226        let entry = guard.entry(id.to_string()).or_default();
227        entry.alpha = alpha;
228        entry.beta = beta;
229    }
230}
231
232/// Default cache location: `$XDG_CACHE_HOME/keyhog/calibration.json` (or
233/// the macOS/Windows equivalents via the `dirs` crate).
234pub fn default_cache_path() -> Option<PathBuf> {
235    dirs::cache_dir().map(|d| d.join("keyhog").join("calibration.json"))
236}