use std::sync::OnceLock;
use anyhow::{Context, Result};
use rusqlite::{Connection, params};
use crate::models::ConfidenceSignals;
pub const ENV_SHADOW: &str = "AI_MEMORY_CONFIDENCE_SHADOW";
pub const ENV_SHADOW_SAMPLE_RATE: &str = "AI_MEMORY_CONFIDENCE_SHADOW_SAMPLE_RATE";
pub const DEFAULT_SHADOW_RETENTION_DAYS: i64 = 30;
#[derive(Debug, Clone, Copy)]
pub struct ShadowConfig {
pub enabled: bool,
pub sample_rate: f64,
}
impl ShadowConfig {
fn from_env() -> Self {
let enabled = std::env::var(ENV_SHADOW).is_ok_and(|v| v == "1");
let sample_rate = std::env::var(ENV_SHADOW_SAMPLE_RATE)
.ok()
.and_then(|s| s.parse::<f64>().ok())
.map(|v| v.clamp(0.0, 1.0))
.unwrap_or(1.0);
Self {
enabled,
sample_rate,
}
}
}
static SHADOW_CONFIG: OnceLock<ShadowConfig> = OnceLock::new();
#[must_use]
pub fn shadow_config() -> &'static ShadowConfig {
SHADOW_CONFIG.get_or_init(ShadowConfig::from_env)
}
#[must_use]
pub fn shadow_enabled() -> bool {
shadow_config().enabled
}
#[must_use]
pub fn sample_rate() -> f64 {
shadow_config().sample_rate
}
#[must_use]
pub fn should_sample(uniform_0_1: f64) -> bool {
let cfg = shadow_config();
if !cfg.enabled {
return false;
}
uniform_0_1 < cfg.sample_rate
}
#[cfg(test)]
#[doc(hidden)]
pub fn reset_shadow_config_for_tests() {
}
#[allow(clippy::too_many_arguments)]
pub fn observe(
conn: &Connection,
memory_id: &str,
namespace: &str,
source: &str,
caller_confidence: f64,
derived_confidence: f64,
signals: &ConfidenceSignals,
recall_outcome: Option<&str>,
) -> Result<i64> {
let signals_json =
serde_json::to_string(signals).context("serialise ConfidenceSignals envelope")?;
let observed_at = chrono::Utc::now().to_rfc3339();
conn.execute(
"INSERT INTO confidence_shadow_observations
(memory_id, namespace, source, caller_confidence, derived_confidence,
signals, recall_outcome, observed_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
params![
memory_id,
namespace,
source,
caller_confidence,
derived_confidence,
signals_json,
recall_outcome,
observed_at,
],
)?;
Ok(conn.last_insert_rowid())
}
pub fn observations_since(
conn: &Connection,
namespace: Option<&str>,
since: Option<&str>,
) -> Result<Vec<ShadowObservation>> {
let sql = "SELECT id, memory_id, namespace, source, caller_confidence, derived_confidence,
signals, recall_outcome, observed_at
FROM confidence_shadow_observations
WHERE (?1 IS NULL OR namespace = ?1)
AND (?2 IS NULL OR observed_at >= ?2)
ORDER BY observed_at ASC, id ASC";
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map(params![namespace, since], |row| {
Ok(ShadowObservation {
id: row.get(0)?,
memory_id: row.get(1)?,
namespace: row.get(2)?,
source: row.get(3)?,
caller_confidence: row.get(4)?,
derived_confidence: row.get(5)?,
signals_json: row.get(6)?,
recall_outcome: row.get(7)?,
observed_at: row.get(8)?,
})
})?;
Ok(rows.collect::<rusqlite::Result<Vec<_>>>()?)
}
pub fn gc_observations(conn: &Connection, retention_days: i64) -> Result<usize> {
if retention_days <= 0 {
return Ok(0);
}
let cutoff = (chrono::Utc::now() - chrono::Duration::days(retention_days)).to_rfc3339();
let n = conn.execute(
"DELETE FROM confidence_shadow_observations WHERE observed_at < ?1",
params![cutoff],
)?;
Ok(n)
}
#[derive(Debug, Clone)]
pub struct ShadowObservation {
pub id: i64,
pub memory_id: String,
pub namespace: String,
pub source: String,
pub caller_confidence: f64,
pub derived_confidence: f64,
pub signals_json: String,
pub recall_outcome: Option<String>,
pub observed_at: String,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::ConfidenceSignals;
use crate::storage::open as open_storage;
fn open_tmp() -> (Connection, tempfile::TempDir) {
let dir = tempfile::tempdir().expect("tmpdir");
let path = dir.path().join("test.db");
let _ = open_storage(&path).expect("open storage");
let conn = Connection::open(&path).expect("open conn");
(conn, dir)
}
fn signals_fixture() -> ConfidenceSignals {
ConfidenceSignals {
source_age_days: 7.0,
atom_derivation: false,
prior_corroboration_count: 2,
freshness_factor: 0.84,
baseline_per_source: 0.5,
}
}
#[test]
fn observe_appends_row() {
let (conn, _dir) = open_tmp();
conn.execute(
"INSERT INTO memories (id, tier, namespace, title, content, created_at, updated_at)
VALUES ('m1', 'mid', 'ns', 't', 'c', '2026-05-15T00:00:00Z', '2026-05-15T00:00:00Z')",
[],
)
.expect("seed mem");
let id = observe(
&conn,
"m1",
"ns",
"user",
0.9,
0.6,
&signals_fixture(),
None,
)
.expect("observe ok");
assert!(id > 0);
let rows = observations_since(&conn, Some("ns"), None).expect("read back");
assert_eq!(rows.len(), 1);
assert!((rows[0].caller_confidence - 0.9).abs() < f64::EPSILON);
assert!((rows[0].derived_confidence - 0.6).abs() < f64::EPSILON);
assert_eq!(rows[0].source, "user");
}
#[test]
fn observations_filter_by_namespace() {
let (conn, _dir) = open_tmp();
conn.execute(
"INSERT INTO memories (id, tier, namespace, title, content, created_at, updated_at)
VALUES ('m1', 'mid', 'ns_a', 't1', 'c', '2026-05-15T00:00:00Z', '2026-05-15T00:00:00Z')",
[],
)
.expect("seed mem a");
conn.execute(
"INSERT INTO memories (id, tier, namespace, title, content, created_at, updated_at)
VALUES ('m2', 'mid', 'ns_b', 't2', 'c', '2026-05-15T00:00:00Z', '2026-05-15T00:00:00Z')",
[],
)
.expect("seed mem b");
observe(
&conn,
"m1",
"ns_a",
"user",
0.9,
0.6,
&signals_fixture(),
None,
)
.unwrap();
observe(
&conn,
"m2",
"ns_b",
"user",
0.8,
0.5,
&signals_fixture(),
None,
)
.unwrap();
let a = observations_since(&conn, Some("ns_a"), None).expect("read ns_a");
assert_eq!(a.len(), 1);
assert_eq!(a[0].namespace, "ns_a");
let all = observations_since(&conn, None, None).expect("read all");
assert_eq!(all.len(), 2);
}
#[test]
fn gc_observations_drops_old_rows_only() {
let (conn, _dir) = open_tmp();
conn.execute(
"INSERT INTO memories (id, tier, namespace, title, content, created_at, updated_at)
VALUES ('m1', 'mid', 'ns', 't', 'c', '2026-05-15T00:00:00Z', '2026-05-15T00:00:00Z')",
[],
)
.unwrap();
for _ in 0..50 {
observe(
&conn,
"m1",
"ns",
"user",
0.9,
0.5,
&signals_fixture(),
None,
)
.unwrap();
}
for _ in 0..50 {
conn.execute(
"INSERT INTO confidence_shadow_observations
(memory_id, namespace, source, caller_confidence,
derived_confidence, signals, recall_outcome, observed_at)
VALUES ('m1', 'ns', 'user', 0.9, 0.5, '{}', NULL, '2020-01-01T00:00:00Z')",
[],
)
.unwrap();
}
let dropped = gc_observations(&conn, 30).expect("gc");
assert_eq!(dropped, 50);
let remaining: i64 = conn
.query_row(
"SELECT COUNT(*) FROM confidence_shadow_observations",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(remaining, 50);
}
#[test]
fn gc_observations_zero_retention_is_noop() {
let (conn, _dir) = open_tmp();
conn.execute(
"INSERT INTO memories (id, tier, namespace, title, content, created_at, updated_at)
VALUES ('m1', 'mid', 'ns', 't', 'c', '2026-05-15T00:00:00Z', '2026-05-15T00:00:00Z')",
[],
)
.unwrap();
for _ in 0..10 {
conn.execute(
"INSERT INTO confidence_shadow_observations
(memory_id, namespace, source, caller_confidence,
derived_confidence, signals, recall_outcome, observed_at)
VALUES ('m1', 'ns', 'user', 0.9, 0.5, '{}', NULL, '2020-01-01T00:00:00Z')",
[],
)
.unwrap();
}
assert_eq!(gc_observations(&conn, 0).expect("gc 0"), 0);
assert_eq!(gc_observations(&conn, -5).expect("gc -5"), 0);
let remaining: i64 = conn
.query_row(
"SELECT COUNT(*) FROM confidence_shadow_observations",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(remaining, 10);
}
#[test]
fn shadow_config_caches_on_first_call() {
let a = shadow_config();
let b = shadow_config();
assert_eq!(a.enabled, b.enabled);
assert!((a.sample_rate - b.sample_rate).abs() < f64::EPSILON);
assert!(std::ptr::eq(a, b));
}
#[test]
fn shadow_config_from_env_reads_both_vars() {
unsafe { std::env::remove_var(ENV_SHADOW) };
unsafe { std::env::remove_var(ENV_SHADOW_SAMPLE_RATE) };
let cfg = ShadowConfig::from_env();
assert!(!cfg.enabled);
assert!((cfg.sample_rate - 1.0).abs() < f64::EPSILON);
unsafe { std::env::set_var(ENV_SHADOW, "1") };
unsafe { std::env::set_var(ENV_SHADOW_SAMPLE_RATE, "0.5") };
let cfg = ShadowConfig::from_env();
assert!(cfg.enabled);
assert!((cfg.sample_rate - 0.5).abs() < f64::EPSILON);
unsafe { std::env::set_var(ENV_SHADOW_SAMPLE_RATE, "2.0") };
let cfg = ShadowConfig::from_env();
assert!((cfg.sample_rate - 1.0).abs() < f64::EPSILON);
unsafe { std::env::set_var(ENV_SHADOW_SAMPLE_RATE, "-1.0") };
let cfg = ShadowConfig::from_env();
assert!((cfg.sample_rate - 0.0).abs() < f64::EPSILON);
unsafe { std::env::set_var(ENV_SHADOW_SAMPLE_RATE, "garbage") };
let cfg = ShadowConfig::from_env();
assert!((cfg.sample_rate - 1.0).abs() < f64::EPSILON);
unsafe { std::env::remove_var(ENV_SHADOW) };
unsafe { std::env::remove_var(ENV_SHADOW_SAMPLE_RATE) };
}
}