ai_memory/confidence/
mod.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! v0.7.0 Form 5 — auto-confidence + shadow-mode + calibration tooling
5//! (issue #758).
6//!
7//! The Batman 6-form audit (PR #753, `docs/internal/batman-framework-audit.md`)
8//! found Form 5 PARTIAL: the `memories.confidence` REAL column had
9//! existed since schema v2 and recall ranking consumed it
10//! (`+ confidence * 2.0` in the FTS5 score expression at
11//! `src/storage/mod.rs`), but the surrounding pipeline was missing the
12//! four substrate-honesty surfaces a "first-class confidence" claim
13//! requires:
14//!
15//!   * **Automatic assignment.** Every caller value was taken at face;
16//!     no source-age decay, atom-derivation bump, or prior-corroboration
17//!     boost ever rewrote it.
18//!   * **Shadow-mode telemetry.** No mechanism existed to compare a
19//!     caller-provided value against a derived one on a live workload.
20//!   * **Calibration.** No per-namespace / per-source-role baseline was
21//!     ever computed from observed samples.
22//!   * **Freshness decay.** An old fact at confidence=0.9 ranked
23//!     identically to a fresh fact at the same value, despite human
24//!     memory and downstream LLM reasoning both treating recency as a
25//!     trust signal.
26//!
27//! This module is the Rust-side closeout. The schema half lives in
28//! `migrations/sqlite/0033_v07_form5_confidence_calibration.sql` and
29//! `migrations/postgres/0020_v07_form5_confidence_calibration.sql`.
30//!
31//! # Surface
32//!
33//! * [`derive`] — deterministic auto-derivation from row signals. Opt-in
34//!   via `AI_MEMORY_AUTO_CONFIDENCE=1`.
35//! * [`shadow::observe`] — writes per-recall samples to
36//!   `confidence_shadow_observations` when
37//!   `AI_MEMORY_CONFIDENCE_SHADOW=1`. Audit-honest: the caller value is
38//!   still the one used downstream; shadow never silently overrides.
39//! * [`decay::decayed`] — exponential freshness decay
40//!   (`exp(-age / half_life)`); operator opts in with
41//!   `AI_MEMORY_CONFIDENCE_DECAY=1` or per-namespace
42//!   `confidence_decay_half_life_days` policy.
43//! * [`calibrate::calibrate_from_shadow`] — computes per-source
44//!   baselines from the shadow-observations table. Driven by the
45//!   `ai-memory calibrate confidence` CLI and the
46//!   `memory_calibrate_confidence` MCP tool.
47
48use crate::models::{ConfidenceSignals, ConfidenceSource, Memory};
49
50pub mod calibrate;
51pub mod decay;
52pub mod shadow;
53
54/// Environment-variable opt-in for the auto-derive engine. When unset
55/// or any value other than `"1"`, [`derive`] returns the caller's
56/// confidence verbatim — preserving the v0.6.x contract.
57pub const ENV_AUTO_CONFIDENCE: &str = "AI_MEMORY_AUTO_CONFIDENCE";
58
59/// Returns true when [`ENV_AUTO_CONFIDENCE`] is set to `"1"`. Centralised
60/// so the recall path, store path, and tests all read the same flag.
61#[must_use]
62pub fn auto_confidence_enabled() -> bool {
63    std::env::var(ENV_AUTO_CONFIDENCE).is_ok_and(|v| v == "1")
64}
65
66/// Context the [`derive`] engine consults at the moment it computes a
67/// fresh confidence value.
68///
69/// Pulled out of the [`Memory`] payload because three of the five signals
70/// require substrate-side queries (`prior_corroboration_count` is a
71/// `COUNT(*)` over `memory_links`, `baseline_per_source` is a lookup in
72/// the calibration table, `half_life_days` honours the per-namespace
73/// policy override) and the [`derive`] surface keeps the caller in
74/// charge of those substrate touches so this module stays pure.
75#[derive(Debug, Clone, Copy)]
76pub struct DeriveContext {
77    /// How long ago (in days) the cited source body was first observed.
78    /// Drives the `freshness_factor` exponent. The caller computes this
79    /// from `metadata.observed_at` (Form 4) or the row's `created_at`
80    /// as a fallback. Negative values are clamped to `0.0`.
81    pub source_age_days: f64,
82    /// Whether the row is an atom of an existing memory
83    /// (`atom_of IS NOT NULL`). Atom rows inherit a +0.1 confidence
84    /// bump because their provenance is anchored to a curator-validated
85    /// parent.
86    pub atom_derivation: bool,
87    /// Count of `memory_links` with this row as `source_id`. More
88    /// corroboration → higher confidence; the formula uses
89    /// `log10(1 + count)` to keep the bump sub-linear.
90    pub prior_corroboration_count: i64,
91    /// Per-(namespace, source) baseline from the calibration table.
92    /// Pass `0.5` when no calibrated baseline exists yet.
93    pub baseline_per_source: f64,
94    /// Half-life (in days) used in the freshness decay computation.
95    /// Defaults to 30 when the operator hasn't overridden the value
96    /// via namespace policy. Capped at `f64::EPSILON` from below so
97    /// the divisor in [`decay::decayed`] never goes to zero.
98    pub half_life_days: f64,
99}
100
101impl Default for DeriveContext {
102    fn default() -> Self {
103        Self {
104            source_age_days: 0.0,
105            atom_derivation: false,
106            prior_corroboration_count: 0,
107            baseline_per_source: 0.5,
108            half_life_days: 30.0,
109        }
110    }
111}
112
113/// Default half-life (in days) for the freshness-decay envelope.
114/// 30 days mirrors a working agent's "this month vs. last month"
115/// salience window; long-tier rows that survive a month already
116/// have meaningful corroboration through the `access_count`
117/// promotion loop, so the half-life acts as a soft-floor rather
118/// than a hard expiry.
119pub const DEFAULT_HALF_LIFE_DAYS: f64 = 30.0;
120
121/// Deterministically derive a confidence value from row signals.
122///
123/// Returns `(confidence, signals, source_marker)`:
124///
125/// * `confidence` — value in `[0.0, 1.0]`. The formula is:
126///
127///   ```text
128///   base = 0.5
129///        + 0.1 * is_atom
130///        + 0.05 * log10(1 + corroboration)
131///        - 0.02 * source_age_days * decay_rate
132///   value = clamp(base, 0.0, 1.0) * freshness_factor
133///         + (1 - freshness_factor) * baseline_per_source
134///   ```
135///
136///   where `decay_rate = ln(2) / half_life_days` and
137///   `freshness_factor = exp(-age / half_life)`. The blend with the
138///   per-source baseline lets a well-calibrated source survive aging
139///   without collapsing to the freshness floor.
140///
141/// * `signals` — the [`ConfidenceSignals`] envelope that produced the
142///   value. Stored alongside the row on `memories.confidence_signals`
143///   so the derivation is reproducible.
144///
145/// * `source_marker` — typed discriminator for the
146///   `memories.confidence_source` column. Always [`ConfidenceSource::AutoDerived`]
147///   here; the [`shadow`] and [`calibrate`] paths use the other
148///   variants.
149///
150/// # Audit honesty
151///
152/// This function is pure — it does **not** touch the substrate, fire a
153/// hook, or read environment variables. The caller is responsible for
154/// gating on [`auto_confidence_enabled`] and only persisting the
155/// returned value when the opt-in is active. Tests can call it directly
156/// with handcrafted [`DeriveContext`] values and get bit-identical
157/// outputs across runs.
158#[must_use]
159pub fn derive(_memory: &Memory, ctx: &DeriveContext) -> (f64, ConfidenceSignals, ConfidenceSource) {
160    let age = ctx.source_age_days.max(0.0);
161    let half_life = ctx.half_life_days.max(f64::EPSILON);
162    let decay_rate = std::f64::consts::LN_2 / half_life;
163    // freshness_factor follows the standard half-life convention:
164    // value halves every `half_life_days`. Matches `decay::decayed`.
165    let freshness_factor = (-age * std::f64::consts::LN_2 / half_life)
166        .exp()
167        .clamp(0.0, 1.0);
168
169    let atom_bump = if ctx.atom_derivation { 0.1 } else { 0.0 };
170    let corroboration_bump = 0.05
171        * (1.0_f64 + ctx.prior_corroboration_count.max(0) as f64)
172            .log10()
173            .max(0.0);
174    let age_penalty = 0.02 * age * decay_rate;
175
176    let raw_base = 0.5 + atom_bump + corroboration_bump - age_penalty;
177    let clamped_base = raw_base.clamp(0.0, 1.0);
178    let baseline = ctx.baseline_per_source.clamp(0.0, 1.0);
179
180    let blended = clamped_base.mul_add(freshness_factor, baseline * (1.0 - freshness_factor));
181    let value = blended.clamp(0.0, 1.0);
182
183    let signals = ConfidenceSignals {
184        source_age_days: age,
185        atom_derivation: ctx.atom_derivation,
186        prior_corroboration_count: ctx.prior_corroboration_count,
187        freshness_factor,
188        baseline_per_source: baseline,
189    };
190
191    (value, signals, ConfidenceSource::AutoDerived)
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    fn mem() -> Memory {
199        Memory {
200            id: "m1".into(),
201            ..Memory::default()
202        }
203    }
204
205    #[test]
206    fn derive_atom_bump_lifts_score() {
207        let ctx_no_atom = DeriveContext {
208            atom_derivation: false,
209            ..Default::default()
210        };
211        let ctx_atom = DeriveContext {
212            atom_derivation: true,
213            ..Default::default()
214        };
215        let (no_atom, _, _) = derive(&mem(), &ctx_no_atom);
216        let (atom, _, _) = derive(&mem(), &ctx_atom);
217        assert!(
218            atom > no_atom,
219            "atom-derivation should raise confidence: {atom} vs {no_atom}"
220        );
221    }
222
223    #[test]
224    fn derive_corroboration_lifts_score_sublinearly() {
225        let (low, _, _) = derive(
226            &mem(),
227            &DeriveContext {
228                prior_corroboration_count: 1,
229                ..Default::default()
230            },
231        );
232        let (high, _, _) = derive(
233            &mem(),
234            &DeriveContext {
235                prior_corroboration_count: 100,
236                ..Default::default()
237            },
238        );
239        assert!(high > low, "corroboration should monotonically raise score");
240    }
241
242    #[test]
243    fn derive_age_reduces_score() {
244        let (fresh, _, _) = derive(
245            &mem(),
246            &DeriveContext {
247                source_age_days: 0.0,
248                ..Default::default()
249            },
250        );
251        let (old, _, _) = derive(
252            &mem(),
253            &DeriveContext {
254                source_age_days: 365.0,
255                ..Default::default()
256            },
257        );
258        assert!(
259            fresh > old,
260            "older sources should have lower confidence: {fresh} vs {old}"
261        );
262    }
263
264    #[test]
265    fn derive_clamps_to_unit_interval() {
266        let ctx = DeriveContext {
267            source_age_days: 10_000.0,
268            atom_derivation: false,
269            prior_corroboration_count: 0,
270            baseline_per_source: 0.0,
271            half_life_days: 30.0,
272        };
273        let (value, _, _) = derive(&mem(), &ctx);
274        assert!((0.0..=1.0).contains(&value), "value out of range: {value}");
275    }
276
277    #[test]
278    fn derive_returns_signals_envelope_matching_inputs() {
279        let ctx = DeriveContext {
280            source_age_days: 15.0,
281            atom_derivation: true,
282            prior_corroboration_count: 5,
283            baseline_per_source: 0.6,
284            half_life_days: 30.0,
285        };
286        let (_value, signals, source) = derive(&mem(), &ctx);
287        assert_eq!(source, ConfidenceSource::AutoDerived);
288        assert!((signals.source_age_days - 15.0).abs() < f64::EPSILON);
289        assert!(signals.atom_derivation);
290        assert_eq!(signals.prior_corroboration_count, 5);
291        assert!((signals.baseline_per_source - 0.6).abs() < f64::EPSILON);
292        // freshness at age=15, half_life=30 → 2^-0.5 ≈ 0.707
293        assert!((signals.freshness_factor - 0.7071).abs() < 0.01);
294    }
295
296    #[test]
297    fn derive_is_deterministic() {
298        let ctx = DeriveContext {
299            source_age_days: 7.5,
300            atom_derivation: false,
301            prior_corroboration_count: 3,
302            baseline_per_source: 0.55,
303            half_life_days: 30.0,
304        };
305        let (a, _, _) = derive(&mem(), &ctx);
306        let (b, _, _) = derive(&mem(), &ctx);
307        assert!(
308            (a - b).abs() < f64::EPSILON,
309            "derive must be deterministic for fixed inputs: {a} vs {b}"
310        );
311    }
312
313    #[test]
314    fn derive_never_returns_one_for_default_context() {
315        // The default context yields a non-1.0 score (the legacy contract
316        // was "caller value taken at face = 1.0"; the auto-derive engine
317        // is designed to produce honest values away from the saturation
318        // points). Pinning at 0.5 (the baseline) for the default context.
319        let (value, _, _) = derive(&mem(), &DeriveContext::default());
320        assert!((value - 0.5).abs() < 0.05);
321    }
322
323    #[test]
324    fn auto_confidence_env_gating_default_off() {
325        // Per the audit-honest contract: opt-in only. With no env var
326        // set, the helper returns false and callers preserve the
327        // caller-provided value.
328        // We don't call std::env::set_var here (tests share process
329        // env); we just confirm the helper's predicate for "1".
330        unsafe { std::env::remove_var(ENV_AUTO_CONFIDENCE) };
331        assert!(!auto_confidence_enabled());
332        unsafe { std::env::set_var(ENV_AUTO_CONFIDENCE, "0") };
333        assert!(!auto_confidence_enabled());
334        unsafe { std::env::set_var(ENV_AUTO_CONFIDENCE, "1") };
335        assert!(auto_confidence_enabled());
336        unsafe { std::env::remove_var(ENV_AUTO_CONFIDENCE) };
337    }
338}
ai_memory/confidence/mod.rs

ai_memory/confidence/
mod.rs