Skip to main content

mnemo_core/anomaly/
outlier.rs

1use serde::{Deserialize, Serialize};
2
3use crate::model::embedding_baseline::{EmbeddingBaseline, MIN_BASELINE_SAMPLES};
4use crate::model::memory::MemoryRecord;
5
6/// Floor added to every per-dimension variance to stop the z-score from
7/// exploding on degenerate dimensions (constant values produce variance
8/// 0 which would otherwise divide by zero). Chosen an order of magnitude
9/// below the smallest variance observed across OpenAI + ONNX + MiniLM
10/// embeddings in the mnemo test corpus.
11const VARIANCE_FLOOR: f32 = 1e-6;
12
13/// Result of scoring one record against a baseline.
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct OutlierScore {
16    /// Normalised Mahalanobis-proxy: `sqrt(mean over dims of ((x - mu)^2 / var))`.
17    /// Normalisation by dimension count keeps the score comparable across
18    /// embedding sizes.
19    pub z_score: f32,
20    /// Threshold the caller supplied; `is_outlier = z_score >= threshold`.
21    pub threshold: f32,
22    /// Convenience flag.
23    pub is_outlier: bool,
24    /// Count of dimensions whose individual squared z-score exceeded 9.0
25    /// (i.e. >=3 standard deviations). A high count on long vectors is a
26    /// useful tie-breaker: a single rogue dimension can lift the mean
27    /// Mahalanobis without representing a real distribution shift.
28    pub dims_flagged: u32,
29    /// Baseline sample count — surfaced so callers can reason about
30    /// reliability (below `MIN_BASELINE_SAMPLES` the score is still
31    /// computed but `is_outlier` is pinned to `false`).
32    pub baseline_n: u64,
33}
34
35impl OutlierScore {
36    pub fn no_baseline(threshold: f32) -> Self {
37        Self {
38            z_score: 0.0,
39            threshold,
40            is_outlier: false,
41            dims_flagged: 0,
42            baseline_n: 0,
43        }
44    }
45}
46
47/// Score a single record's embedding against a trained baseline.
48///
49/// Returns a no-op score when:
50/// * the record has no embedding,
51/// * the baseline's dimensionality disagrees with the record's, or
52/// * the baseline holds fewer than [`MIN_BASELINE_SAMPLES`] samples.
53///
54/// Otherwise computes the mean per-dimension squared z-score and returns
55/// its square root, which is the standard normalised Mahalanobis proxy
56/// used in outlier-detection literature when only the diagonal is
57/// available.
58pub fn score_embedding_outlier(
59    record: &MemoryRecord,
60    baseline: &EmbeddingBaseline,
61    threshold: f32,
62) -> OutlierScore {
63    let Some(embedding) = record.embedding.as_ref() else {
64        return OutlierScore::no_baseline(threshold);
65    };
66    if embedding.len() != baseline.mu.len() || embedding.len() != baseline.cov_diag.len() {
67        return OutlierScore::no_baseline(threshold);
68    }
69    if baseline.n < MIN_BASELINE_SAMPLES {
70        return OutlierScore {
71            z_score: 0.0,
72            threshold,
73            is_outlier: false,
74            dims_flagged: 0,
75            baseline_n: baseline.n,
76        };
77    }
78
79    let d = embedding.len() as f32;
80    let mut sum_sq = 0.0f32;
81    let mut dims_flagged: u32 = 0;
82    for (i, &x) in embedding.iter().enumerate() {
83        let diff = x - baseline.mu[i];
84        let var = baseline.cov_diag[i].max(VARIANCE_FLOOR);
85        let sq_z = (diff * diff) / var;
86        if sq_z >= 9.0 {
87            dims_flagged += 1;
88        }
89        sum_sq += sq_z;
90    }
91    let z_score = (sum_sq / d).sqrt();
92    OutlierScore {
93        z_score,
94        threshold,
95        is_outlier: z_score >= threshold,
96        dims_flagged,
97        baseline_n: baseline.n,
98    }
99}
100
101/// Compute a fresh baseline from a slice of records. Records without
102/// embeddings are skipped; if fewer than 2 survive the function returns
103/// `None` — a baseline of 1 sample has zero variance everywhere and
104/// would pin `is_outlier` to `false` on every subsequent record anyway.
105///
106/// Variance is computed with Welford's online algorithm in one pass;
107/// although we don't need the online property here it's numerically
108/// stabler than the naive two-pass form on large batches.
109pub fn train_baseline(agent_id: &str, records: &[MemoryRecord]) -> Option<EmbeddingBaseline> {
110    let mut records_with_emb = records
111        .iter()
112        .filter_map(|r| r.embedding.as_ref().map(|e| (r, e)));
113
114    let (_first_record, first_emb) = records_with_emb.next()?;
115    let d = first_emb.len();
116    if d == 0 {
117        return None;
118    }
119    let mut count: u64 = 1;
120    let mut mean: Vec<f32> = first_emb.clone();
121    let mut m2: Vec<f32> = vec![0.0; d];
122
123    for (_r, emb) in records_with_emb {
124        if emb.len() != d {
125            continue; // skip dim-mismatched records silently
126        }
127        count += 1;
128        let n = count as f32;
129        for i in 0..d {
130            let x = emb[i];
131            let delta = x - mean[i];
132            mean[i] += delta / n;
133            let delta2 = x - mean[i];
134            m2[i] += delta * delta2;
135        }
136    }
137
138    if count < 2 {
139        return None;
140    }
141
142    let divisor = (count - 1) as f32;
143    let cov_diag: Vec<f32> = m2.iter().map(|v| v / divisor).collect();
144
145    Some(EmbeddingBaseline {
146        agent_id: agent_id.to_string(),
147        mu: mean,
148        cov_diag,
149        n: count,
150        updated_at: chrono::Utc::now().to_rfc3339(),
151    })
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157    use crate::model::memory::MemoryRecord;
158
159    fn record_with_embedding(embedding: Vec<f32>) -> MemoryRecord {
160        let mut r = MemoryRecord::new("test-agent".to_string(), "x".to_string());
161        r.embedding = Some(embedding);
162        r
163    }
164
165    fn make_records(mean: f32, stddev: f32, n: usize, d: usize) -> Vec<MemoryRecord> {
166        // Simple deterministic pseudo-normal draw using alternating offsets
167        // — avoids pulling a full PRNG dep for a unit test.
168        let mut out = Vec::with_capacity(n);
169        for i in 0..n {
170            let sign = if i % 2 == 0 { 1.0 } else { -1.0 };
171            let magnitude = stddev * ((i as f32 / n as f32).sin().abs() + 0.5);
172            let emb: Vec<f32> = (0..d)
173                .map(|k| mean + sign * magnitude + k as f32 * 0.001)
174                .collect();
175            out.push(record_with_embedding(emb));
176        }
177        out
178    }
179
180    #[test]
181    fn trains_baseline_from_records() {
182        let records = make_records(0.1, 0.05, 40, 8);
183        let baseline = train_baseline("test-agent", &records).expect("baseline");
184        assert_eq!(baseline.mu.len(), 8);
185        assert_eq!(baseline.cov_diag.len(), 8);
186        assert_eq!(baseline.n, 40);
187        assert_eq!(baseline.agent_id, "test-agent");
188    }
189
190    #[test]
191    fn returns_none_on_no_embeddings() {
192        let mut record = record_with_embedding(vec![0.1; 4]);
193        record.embedding = None;
194        assert!(train_baseline("a", &[record]).is_none());
195    }
196
197    #[test]
198    fn in_distribution_not_flagged() {
199        let records = make_records(0.1, 0.05, 60, 16);
200        let baseline = train_baseline("a", &records).unwrap();
201        // Score one of the training records — must not be flagged.
202        let score = score_embedding_outlier(&records[5], &baseline, 3.0);
203        assert!(
204            !score.is_outlier,
205            "in-distribution record flagged: z={} dims_flagged={}",
206            score.z_score, score.dims_flagged
207        );
208    }
209
210    #[test]
211    fn far_out_of_distribution_flagged() {
212        let records = make_records(0.1, 0.05, 60, 16);
213        let baseline = train_baseline("a", &records).unwrap();
214        // Construct a record 50 stddevs away in every dimension.
215        let mut attacker = records[0].clone();
216        let mu0 = baseline.mu[0];
217        let stddev0 = baseline.cov_diag[0].sqrt();
218        let push = mu0 + 50.0 * stddev0.max(0.01);
219        attacker.embedding = Some(vec![push; 16]);
220        let score = score_embedding_outlier(&attacker, &baseline, 3.0);
221        assert!(
222            score.is_outlier,
223            "far-OOD record not flagged: z={} threshold={}",
224            score.z_score, score.threshold
225        );
226    }
227
228    #[test]
229    fn noisy_baseline_pins_is_outlier_false() {
230        let records = make_records(0.1, 0.05, 5, 8);
231        // Train with too few samples.
232        let baseline = train_baseline("a", &records).unwrap();
233        let score = score_embedding_outlier(&records[0], &baseline, 3.0);
234        assert!(
235            !score.is_outlier,
236            "noisy baseline should pin is_outlier=false"
237        );
238        assert!(score.baseline_n < MIN_BASELINE_SAMPLES);
239    }
240
241    #[test]
242    fn dim_mismatch_returns_no_op() {
243        let records = make_records(0.1, 0.05, 40, 8);
244        let baseline = train_baseline("a", &records).unwrap();
245        let mut mismatched = records[0].clone();
246        mismatched.embedding = Some(vec![0.1; 16]);
247        let score = score_embedding_outlier(&mismatched, &baseline, 3.0);
248        assert_eq!(score.z_score, 0.0);
249        assert!(!score.is_outlier);
250    }
251}