Skip to main content

anomalyx_detect/
lib.rs

1//! # ax-detect — the detector engine
2//!
3//! A [`Detector`] is a contract: given a normalized [`RecordSet`], it either
4//! *runs* and emits [`Finding`]s, or it declares honest [`Absence`] (e.g. "no
5//! numeric columns"). It never fabricates a clean result for data it couldn't
6//! assess. The [`Registry`] runs a set of detectors deterministically and
7//! collects everything into one [`Report`], which the CLI turns into a `tq1`
8//! envelope.
9//!
10//! All math routes through [`ax_core::det`], so every detector inherits
11//! order-independent, reproducible reductions.
12
13use ax_core::envelope::Absence;
14use ax_core::{AnomalyClass, Finding, RecordSet};
15
16pub mod cadence;
17pub mod calibrate;
18pub mod coll;
19pub mod config;
20pub mod ctx;
21pub mod dist;
22pub mod fdr;
23pub mod linalg;
24pub mod mv;
25pub mod point;
26pub mod robustz;
27pub mod structural;
28
29pub use cadence::CadenceDetector;
30pub use coll::CusumDetector;
31pub use config::DetectConfig;
32pub use ctx::SeasonalDetector;
33pub use dist::{Chi2Detector, KsDetector, PsiDetector};
34pub use mv::MahalanobisDetector;
35pub use point::PointDetector;
36pub use structural::SchemaDetector;
37
38/// The corpus (or pair of corpora) under assessment.
39///
40/// Single-corpus detectors (point, structural shape checks) read `current`.
41/// Drift detectors (distributional, schema-diff) require `baseline`; when it is
42/// `None` they declare honest [`Absence`] rather than inventing a comparison.
43#[derive(Debug, Clone, Copy)]
44pub struct ScanContext<'a> {
45    pub current: &'a RecordSet,
46    pub baseline: Option<&'a RecordSet>,
47}
48
49impl<'a> ScanContext<'a> {
50    /// A single-corpus context (no baseline).
51    pub fn single(current: &'a RecordSet) -> Self {
52        ScanContext {
53            current,
54            baseline: None,
55        }
56    }
57
58    /// A baseline-vs-current context.
59    pub fn compared(baseline: &'a RecordSet, current: &'a RecordSet) -> Self {
60        ScanContext {
61            current,
62            baseline: Some(baseline),
63        }
64    }
65}
66
67/// What a detector emits into the shared report. Detectors push findings and,
68/// when they cannot meaningfully run, mark themselves absent with a reason.
69#[derive(Debug, Default)]
70pub struct Report {
71    pub findings: Vec<Finding>,
72    pub absent: Vec<Absence>,
73}
74
75impl Report {
76    pub fn new() -> Self {
77        Report::default()
78    }
79
80    pub fn push(&mut self, f: Finding) {
81        self.findings.push(f);
82    }
83
84    /// Records that `detector` declined to run, with a machine-readable reason.
85    pub fn mark_absent(&mut self, detector: &str, reason: impl Into<String>) {
86        self.absent.push(Absence {
87            detector: detector.to_string(),
88            reason: reason.into(),
89        });
90    }
91
92    pub fn is_clean(&self) -> bool {
93        self.findings.is_empty()
94    }
95}
96
97/// A single anomaly-detection contract.
98pub trait Detector {
99    /// Stable, machine-readable identifier (appears in every finding).
100    fn id(&self) -> &'static str;
101
102    /// The taxonomy class this detector produces.
103    fn class(&self) -> AnomalyClass;
104
105    /// Assess `ctx`, pushing findings and/or an absence into `out`.
106    fn detect(&self, ctx: &ScanContext, cfg: &DetectConfig, out: &mut Report);
107}
108
109/// An ordered set of detectors. Order is fixed at registration, so output is
110/// deterministic; the envelope re-sorts findings, but absence order follows
111/// registration for a stable contract.
112pub struct Registry {
113    detectors: Vec<Box<dyn Detector>>,
114}
115
116impl Registry {
117    pub fn new() -> Self {
118        Registry {
119            detectors: Vec::new(),
120        }
121    }
122
123    /// The default detector set for this protocol version. Single-corpus
124    /// detectors run always; drift detectors run when a baseline is present and
125    /// otherwise report honest absence.
126    pub fn default_set() -> Self {
127        let mut r = Registry::new();
128        r.register(Box::new(PointDetector));
129        r.register(Box::new(SchemaDetector));
130        r.register(Box::new(KsDetector));
131        r.register(Box::new(PsiDetector));
132        r.register(Box::new(Chi2Detector));
133        r.register(Box::new(MahalanobisDetector));
134        r.register(Box::new(SeasonalDetector));
135        r.register(Box::new(CusumDetector));
136        r.register(Box::new(CadenceDetector));
137        r
138    }
139
140    pub fn register(&mut self, d: Box<dyn Detector>) -> &mut Self {
141        self.detectors.push(d);
142        self
143    }
144
145    pub fn ids(&self) -> Vec<&'static str> {
146        self.detectors.iter().map(|d| d.id()).collect()
147    }
148
149    /// Runs every detector against `ctx` and returns the merged report.
150    pub fn run(&self, ctx: &ScanContext, cfg: &DetectConfig) -> Report {
151        let mut out = Report::new();
152        for d in &self.detectors {
153            d.detect(ctx, cfg, &mut out);
154        }
155        out
156    }
157}
158
159impl Default for Registry {
160    fn default() -> Self {
161        Registry::default_set()
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168    use ax_core::{Column, Value};
169
170    #[test]
171    fn report_is_clean_only_without_findings() {
172        let mut r = Report::new();
173        assert!(r.is_clean());
174        r.push(Finding::new(
175            "d",
176            AnomalyClass::Point,
177            ax_core::Handle::Column { name: "x".into() },
178            0.9,
179            1.0,
180            "r",
181        ));
182        assert!(!r.is_clean());
183    }
184
185    #[test]
186    fn registry_registers_the_default_detector_set() {
187        let reg = Registry::default_set();
188        assert_eq!(
189            reg.ids(),
190            vec![
191                "point.modz",
192                "struct.schema",
193                "dist.ks",
194                "dist.psi",
195                "dist.chi2",
196                "mv.mahalanobis",
197                "ctx.seasonal",
198                "coll.cusum",
199                "cad.regularity"
200            ]
201        );
202    }
203
204    #[test]
205    fn single_corpus_clean_numeric_has_no_point_findings() {
206        let rs = RecordSet::new(
207            "-",
208            "test",
209            vec![Column::new(
210                "x",
211                (0..12).map(|i| Value::Int(10 + i % 3)).collect(),
212            )],
213        );
214        let report =
215            Registry::default_set().run(&ScanContext::single(&rs), &DetectConfig::default());
216        // point detector finds nothing; drift detectors are honestly absent
217        assert!(report.findings.is_empty());
218        assert!(report.absent.iter().any(|a| a.detector == "dist.ks"));
219    }
220
221    #[test]
222    fn registry_run_surfaces_point_finding() {
223        let mut cells: Vec<Value> = (0..12).map(|i| Value::Int(10 + i % 3)).collect();
224        cells.push(Value::Int(100_000)); // unmistakable outlier
225        let rs = RecordSet::new("-", "test", vec![Column::new("x", cells)]);
226        let report =
227            Registry::default_set().run(&ScanContext::single(&rs), &DetectConfig::default());
228        assert!(report.findings.iter().any(|f| f.detector == "point.modz"));
229    }
230}