Skip to main content

datasynth_eval/ml/
detectability.rs

1//! Detectability profile of a planted-anomaly population (engine feature A6).
2//!
3//! Each planted anomaly carries an *observability class* (A4): which detection arm can, in
4//! principle, surface it — per-JE **density**, relational **account-flow graph**, **temporal**, or
5//! cross-period **memory-only**. This analyzer summarizes how the planted population distributes
6//! across those arms, so a benchmark can report detection against the *per-signal ceiling* rather
7//! than one pooled score, and can surface the residual-faint **memory-only** tail — the family that
8//! defeats label-free residual detectors and motivates carry-forward memory (FINDINGS §12 / §40).
9//!
10//! Scope: this is the observability *ceiling* map computed from the ground-truth labels. The actual
11//! per-class detection ROC requires running the inverse-audit detector (the AuditDetector library /
12//! showcase); this report tells you what is *in principle* catchable by which arm.
13
14use crate::error::EvalResult;
15use datasynth_core::models::{LabeledAnomaly, ObservabilityClass};
16use serde::{Deserialize, Serialize};
17use std::collections::BTreeMap;
18
19/// The observability profile of a planted-anomaly population.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct DetectabilityReport {
22    /// Total planted anomalies considered.
23    pub total: usize,
24    /// Count per observability class (all four classes always present; 0 when absent).
25    pub by_class: BTreeMap<String, usize>,
26    /// Fraction of the population per observability class.
27    pub fraction_by_class: BTreeMap<String, f64>,
28    /// Fraction observable only with cross-period / cross-entity memory — the residual-faint tail
29    /// that label-free residual arms miss (FINDINGS §40-41).
30    pub memory_only_fraction: f64,
31    /// Fraction observable by a single-entry per-JE density residual.
32    pub per_je_density_fraction: f64,
33    /// Fraction observable in the relational account-flow graph.
34    pub relational_graph_fraction: f64,
35    /// Fraction observable only in the cross-period time series.
36    pub temporal_fraction: f64,
37    /// Counts cross-tabulated by observability class → anomaly category (Fraud/Error/…).
38    pub by_class_and_category: BTreeMap<String, BTreeMap<String, usize>>,
39}
40
41/// Computes the [`DetectabilityReport`] for a population of labeled anomalies.
42#[derive(Debug, Clone, Default)]
43pub struct DetectabilityAnalyzer;
44
45impl DetectabilityAnalyzer {
46    /// New analyzer.
47    pub fn new() -> Self {
48        Self
49    }
50
51    /// Summarize the observability profile of `labels`.
52    pub fn analyze(&self, labels: &[LabeledAnomaly]) -> EvalResult<DetectabilityReport> {
53        let total = labels.len();
54        let mut by_class: BTreeMap<String, usize> = BTreeMap::new();
55        let mut by_class_and_category: BTreeMap<String, BTreeMap<String, usize>> = BTreeMap::new();
56
57        // Seed all four classes so the schema is stable regardless of the population.
58        for c in [
59            ObservabilityClass::PerJeDensity,
60            ObservabilityClass::RelationalGraph,
61            ObservabilityClass::Temporal,
62            ObservabilityClass::MemoryOnly,
63        ] {
64            by_class.insert(c.as_str().to_string(), 0);
65        }
66
67        for l in labels {
68            let cls = l.observability.as_str().to_string();
69            *by_class.entry(cls.clone()).or_default() += 1;
70            *by_class_and_category
71                .entry(cls)
72                .or_default()
73                .entry(l.anomaly_type.category().to_string())
74                .or_default() += 1;
75        }
76
77        let denom = total.max(1) as f64;
78        let fraction_by_class: BTreeMap<String, f64> = by_class
79            .iter()
80            .map(|(k, v)| (k.clone(), *v as f64 / denom))
81            .collect();
82        let frac = |c: ObservabilityClass| *by_class.get(c.as_str()).unwrap_or(&0) as f64 / denom;
83
84        Ok(DetectabilityReport {
85            total,
86            memory_only_fraction: frac(ObservabilityClass::MemoryOnly),
87            per_je_density_fraction: frac(ObservabilityClass::PerJeDensity),
88            relational_graph_fraction: frac(ObservabilityClass::RelationalGraph),
89            temporal_fraction: frac(ObservabilityClass::Temporal),
90            by_class,
91            fraction_by_class,
92            by_class_and_category,
93        })
94    }
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use chrono::NaiveDate;
101    use datasynth_core::models::{AnomalyType, ErrorType, FraudType, RelationalAnomalyType};
102
103    fn label(at: AnomalyType) -> LabeledAnomaly {
104        LabeledAnomaly::new(
105            "A".to_string(),
106            at,
107            "JE".to_string(),
108            "JE".to_string(),
109            "1000".to_string(),
110            NaiveDate::from_ymd_opt(2026, 1, 1).unwrap(),
111        )
112    }
113
114    #[test]
115    fn profiles_population_across_observability_arms() {
116        let labels = vec![
117            label(AnomalyType::Fraud(FraudType::RoundDollarManipulation)), // per_je_density
118            label(AnomalyType::Fraud(FraudType::DuplicatePayment)),        // memory_only
119            label(AnomalyType::Relational(
120                RelationalAnomalyType::CircularTransaction,
121            )), // relational_graph
122            label(AnomalyType::Error(ErrorType::WrongPeriod)),             // temporal
123        ];
124        let report = DetectabilityAnalyzer::new().analyze(&labels).unwrap();
125
126        assert_eq!(report.total, 4);
127        assert_eq!(report.by_class["per_je_density"], 1);
128        assert_eq!(report.by_class["memory_only"], 1);
129        assert_eq!(report.by_class["relational_graph"], 1);
130        assert_eq!(report.by_class["temporal"], 1);
131        assert!((report.memory_only_fraction - 0.25).abs() < 1e-9);
132        assert!((report.per_je_density_fraction - 0.25).abs() < 1e-9);
133        // Cross-tab: the memory-only entry is a Fraud.
134        assert_eq!(report.by_class_and_category["memory_only"]["Fraud"], 1);
135    }
136
137    #[test]
138    fn empty_population_has_stable_zeroed_schema() {
139        let report = DetectabilityAnalyzer::new().analyze(&[]).unwrap();
140        assert_eq!(report.total, 0);
141        // All four classes are present and zeroed; fractions are 0 (no divide-by-zero).
142        assert_eq!(report.by_class.len(), 4);
143        assert_eq!(report.memory_only_fraction, 0.0);
144        assert_eq!(report.by_class["relational_graph"], 0);
145    }
146}