Skip to main content

ai_memory/cli/commands/
calibrate_confidence.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! v0.7.0 Form 5 (issue #758) — `ai-memory calibrate confidence
5//! --from-shadow` CLI subcommand.
6//!
7//! Reads `confidence_shadow_observations` from the last `--days N`
8//! days and emits a per-(namespace, source) baseline report. Two output
9//! formats:
10//!
11//!   * `--output-format json` (default): structured JSON envelope of
12//!     [`crate::confidence::calibrate::CalibrationReport`].
13//!   * `--output-format table`: a human-readable ASCII table with
14//!     `(namespace, source, count, median, mean, bucket-histogram)`
15//!     columns for quick operator review.
16//!
17//! Audit-honest contract: the sweep is **read-only**. Operators review
18//! the report before deciding whether to persist baselines into a
19//! calibration store (operator-driven in a follow-up; v0.7.0 ships the
20//! observation pipeline + report only).
21
22use std::path::Path;
23
24use anyhow::Result;
25use clap::{Args, ValueEnum};
26use rusqlite::Connection;
27
28use crate::cli::CliOutput;
29use crate::confidence::calibrate::{CalibrationReport, DEFAULT_WINDOW_DAYS, calibrate_from_shadow};
30
31/// Output format for the calibration report.
32#[derive(Debug, Clone, Copy, ValueEnum, Default)]
33pub enum OutputFormat {
34    /// Structured JSON envelope ([`CalibrationReport`]) — default.
35    #[default]
36    Json,
37    /// Human-readable ASCII table.
38    Table,
39}
40
41/// Top-level CLI args for `ai-memory calibrate <subcommand>`.
42///
43/// Only `confidence` is wired today; the verb stays open for future
44/// calibration surfaces (e.g., recall blend weights) without re-pinning
45/// the public CLI surface.
46#[derive(Args, Debug, Clone)]
47pub struct CalibrateArgs {
48    #[command(subcommand)]
49    pub subcommand: CalibrateSubcommand,
50}
51
52/// Subcommand discriminator.
53#[derive(clap::Subcommand, Debug, Clone)]
54pub enum CalibrateSubcommand {
55    /// Scan `confidence_shadow_observations` and emit per-(namespace,
56    /// source) baselines.
57    Confidence(CalibrateConfidenceArgs),
58}
59
60/// CLI args for `ai-memory calibrate confidence --from-shadow`.
61#[derive(Args, Debug, Clone)]
62pub struct CalibrateConfidenceArgs {
63    /// Read shadow observations rather than caller-confidence rows.
64    /// Required in v0.7.0 (the only mode the sweep ships with); reserved
65    /// for future modes like `--from-recall-traces`.
66    #[arg(long, default_value_t = true)]
67    pub from_shadow: bool,
68
69    /// Window size in days. Defaults to 30
70    /// ([`crate::confidence::calibrate::DEFAULT_WINDOW_DAYS`]).
71    #[arg(long, default_value_t = DEFAULT_WINDOW_DAYS)]
72    pub days: i64,
73
74    /// Output format.
75    #[arg(long, value_enum, default_value_t = OutputFormat::Json)]
76    pub output_format: OutputFormat,
77}
78
79/// Dispatch entry-point. Called from `daemon_runtime::run`.
80///
81/// Returns `Ok(0)` on success and a non-zero exit code on a validated
82/// failure mode (DB unavailable, sweep error).
83///
84/// # Errors
85///
86/// Propagates DB and serialisation errors. The shadow observation
87/// table is created by the v39 migration; running the sweep against a
88/// pre-v39 DB surfaces the SQL error from the substrate.
89pub fn run(db_path: &Path, args: &CalibrateConfidenceArgs, out: &mut CliOutput<'_>) -> Result<i32> {
90    if !args.from_shadow {
91        writeln!(
92            out.stderr,
93            "calibrate confidence: --from-shadow is the only supported mode in v0.7.0; \
94             pass --from-shadow to scan the observation table."
95        )?;
96        return Ok(2);
97    }
98
99    let conn = Connection::open(db_path)?;
100    let report = calibrate_from_shadow(&conn, args.days, chrono::Utc::now())?;
101
102    let buf = match args.output_format {
103        OutputFormat::Json => serde_json::to_string_pretty(&report)?,
104        OutputFormat::Table => render_table(&report),
105    };
106    writeln!(out.stdout, "{buf}")?;
107    Ok(0)
108}
109
110/// Render the report as a fixed-width ASCII table. Format:
111///
112/// ```text
113/// CONFIDENCE CALIBRATION REPORT (window: 30 days, observations: 42)
114///
115/// NAMESPACE         SOURCE       COUNT  MEDIAN  MEAN   HISTOGRAM (0.0..1.0)
116/// ai-memory-mcp     user         12     0.62    0.61   ..#.##.#.##
117/// ai-memory-mcp     claude       8      0.74    0.73   ...#####.#.
118/// ```
119fn render_table(report: &CalibrationReport) -> String {
120    let mut out = String::new();
121    out.push_str(&format!(
122        "CONFIDENCE CALIBRATION REPORT (window: {} days, observations: {})\n\n",
123        report.window_days, report.total_observations
124    ));
125    out.push_str(&format!(
126        "{:<24}  {:<12}  {:>6}  {:>6}  {:>6}  HISTOGRAM (0.0..1.0)\n",
127        "NAMESPACE", "SOURCE", "COUNT", "MEDIAN", "MEAN"
128    ));
129    if report.baselines.is_empty() {
130        out.push_str("(no observations in window)\n");
131        return out;
132    }
133    for b in &report.baselines {
134        let hist: String = b
135            .buckets
136            .iter()
137            .map(|c| if *c == 0 { '.' } else { '#' })
138            .collect();
139        out.push_str(&format!(
140            "{:<24}  {:<12}  {:>6}  {:>6.2}  {:>6.2}  {hist}\n",
141            b.namespace, b.source, b.count, b.median, b.mean,
142        ));
143    }
144    out
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    use crate::cli::test_utils::TestEnv;
151
152    #[test]
153    fn run_rejects_without_from_shadow() {
154        let mut env = TestEnv::fresh();
155        let db = env.db_path.clone();
156        let args = CalibrateConfidenceArgs {
157            from_shadow: false,
158            days: 30,
159            output_format: OutputFormat::Json,
160        };
161        let code = {
162            let mut out = env.output();
163            run(&db, &args, &mut out).expect("ok")
164        };
165        assert_eq!(code, 2);
166        assert!(env.stderr_str().contains("--from-shadow"));
167    }
168
169    #[test]
170    fn run_json_output_on_fresh_db() {
171        let mut env = TestEnv::fresh();
172        let db = env.db_path.clone();
173        // db::open materialises the schema (incl. the v39 shadow table)
174        // so calibrate_from_shadow runs against a real, empty table.
175        let _ = crate::storage::open(&db).unwrap();
176        let args = CalibrateConfidenceArgs {
177            from_shadow: true,
178            days: 7,
179            output_format: OutputFormat::Json,
180        };
181        let code = {
182            let mut out = env.output();
183            run(&db, &args, &mut out).expect("ok")
184        };
185        assert_eq!(code, 0);
186        let parsed: serde_json::Value =
187            serde_json::from_str(env.stdout_str().trim()).expect("json");
188        assert_eq!(parsed["window_days"].as_i64(), Some(7));
189        assert_eq!(parsed["total_observations"].as_i64(), Some(0));
190    }
191
192    #[test]
193    fn run_table_output_on_fresh_db() {
194        let mut env = TestEnv::fresh();
195        let db = env.db_path.clone();
196        let _ = crate::storage::open(&db).unwrap();
197        let args = CalibrateConfidenceArgs {
198            from_shadow: true,
199            days: 30,
200            output_format: OutputFormat::Table,
201        };
202        let code = {
203            let mut out = env.output();
204            run(&db, &args, &mut out).expect("ok")
205        };
206        assert_eq!(code, 0);
207        assert!(env.stdout_str().contains("CONFIDENCE CALIBRATION REPORT"));
208    }
209
210    fn empty_report() -> CalibrationReport {
211        CalibrationReport {
212            window_days: 30,
213            total_observations: 0,
214            baselines: Vec::new(),
215        }
216    }
217
218    #[test]
219    fn render_table_handles_empty() {
220        let s = render_table(&empty_report());
221        assert!(s.contains("window: 30 days"));
222        assert!(s.contains("no observations in window"));
223    }
224
225    #[test]
226    fn render_table_emits_one_row_per_baseline() {
227        let r = CalibrationReport {
228            window_days: 7,
229            total_observations: 3,
230            baselines: vec![crate::confidence::calibrate::PerSourceBaseline {
231                namespace: "ns".to_string(),
232                source: "user".to_string(),
233                count: 3,
234                median: 0.5,
235                mean: 0.55,
236                buckets: [0, 0, 1, 0, 1, 1, 0, 0, 0, 0],
237            }],
238        };
239        let s = render_table(&r);
240        assert!(s.contains("ns"));
241        assert!(s.contains("user"));
242        assert!(s.contains("0.50"));
243    }
244}