Skip to main content

anomalyx_detect/
config.rs

1//! Detector configuration and the config-version fingerprint.
2//!
3//! The fingerprint goes into the envelope: *same input + same fingerprint ⇒
4//! same bytes*. Any change to a threshold that could change output also changes
5//! the fingerprint, so an agent can tell "the data changed" from "the tool's
6//! configuration changed."
7
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct DetectConfig {
12    /// Modified z-score threshold for the point detector (Iglewicz–Hoaglin
13    /// default is 3.5).
14    pub point_threshold: f64,
15    /// Minimum count of finite numeric values a column needs before the point
16    /// detector will assess it. Below this, statistics are unreliable.
17    pub point_min_n: usize,
18    /// When set, detectors consult each column's [`Role`](ax_core::Role) and skip
19    /// columns where their statistic is meaningless (e.g. the point detector
20    /// skips identifier/categorical/sequence columns). Roles are always reported
21    /// in the envelope; this only governs whether they affect detection.
22    pub column_roles: bool,
23    /// Optional false-discovery-rate (FDR) level for the point detector. When
24    /// set, the per-cell modified-z threshold is replaced by Benjamini–Hochberg
25    /// control at this level, applied within each column: a cell is flagged only
26    /// if its two-sided p-value survives BH, bounding the expected proportion of
27    /// false flags at `q`. `None` keeps the fixed `point_threshold` behavior.
28    pub point_fdr_q: Option<f64>,
29
30    /// Significance level for the KS and chi-square drift tests. A column is
31    /// flagged when the test's p-value falls below this.
32    pub dist_alpha: f64,
33    /// Population Stability Index threshold; PSI above this signals drift
34    /// (0.1 ≈ moderate, 0.2 ≈ significant by convention).
35    pub psi_threshold: f64,
36    /// Number of (baseline-quantile) bins used for PSI.
37    pub psi_bins: usize,
38    /// Minimum sample size (per side) before a distributional test runs.
39    pub dist_min_n: usize,
40
41    /// Null fraction above which the structural detector flags a column.
42    pub struct_null_rate: f64,
43
44    /// Significance level for the Mahalanobis multivariate test (per row).
45    /// Smaller than the per-column α because every row is tested.
46    pub mv_alpha: f64,
47    /// Minimum number of complete (no-missing) rows before the multivariate
48    /// detector will estimate a covariance and run.
49    pub mv_min_n: usize,
50    /// Relative ridge added to the covariance diagonal for numerical stability
51    /// (handles collinear / zero-variance columns). Scaled by the mean variance.
52    pub mv_ridge: f64,
53
54    /// Seasonal period for the contextual detector. `0` (or `1`) disables it —
55    /// seasonality is never guessed, so without a declared period the detector
56    /// reports honest absence.
57    pub ctx_period: usize,
58    /// Modified z-score threshold within a seasonal subseries.
59    pub ctx_threshold: f64,
60    /// Minimum finite values a phase needs before it is assessed.
61    pub ctx_min_per_phase: usize,
62
63    /// Minimum length of an ordered numeric column before the collective
64    /// (change-point) detector will run.
65    pub coll_min_n: usize,
66    /// Standardized mean-shift threshold for the collective detector. Set
67    /// conservatively because the change point is chosen by maximization.
68    pub coll_threshold: f64,
69
70    /// Column to assess for metronomic cadence (interpreted as event times).
71    /// `None` disables the cadence detector — which timestamps mean "time" is
72    /// never guessed, so without this it reports honest absence.
73    pub cadence_column: Option<String>,
74    /// Coefficient-of-variation threshold below which inter-arrival intervals
75    /// are flagged as suspiciously regular (automated).
76    pub cad_max_cv: f64,
77    /// Minimum number of intervals before cadence is assessed.
78    pub cad_min_n: usize,
79}
80
81impl Default for DetectConfig {
82    fn default() -> Self {
83        DetectConfig {
84            point_threshold: 3.5,
85            point_min_n: 8,
86            column_roles: true,
87            point_fdr_q: None,
88            dist_alpha: 0.05,
89            psi_threshold: 0.2,
90            psi_bins: 10,
91            dist_min_n: 20,
92            struct_null_rate: 0.5,
93            mv_alpha: 0.001,
94            mv_min_n: 20,
95            mv_ridge: 1e-9,
96            ctx_period: 0,
97            ctx_threshold: 3.5,
98            ctx_min_per_phase: 4,
99            coll_min_n: 20,
100            coll_threshold: 5.0,
101            cadence_column: None,
102            cad_max_cv: 0.05,
103            cad_min_n: 20,
104        }
105    }
106}
107
108impl DetectConfig {
109    /// A stable, human-legible fingerprint of the settings that affect output.
110    /// Deterministic: no wall-clock, no environment.
111    pub fn version(&self) -> String {
112        format!(
113            "anomalyx-cfg/9;pt={:.4};ptn={};cr={};pfdr={};da={:.4};psi={:.4};psib={};dmn={};snr={:.4};mva={:.5};mvn={};mvr={:e};cxp={};cxt={:.4};cxm={};cln={};clt={:.4};cdc={};cdcv={:.4};cdn={}",
114            self.point_threshold,
115            self.point_min_n,
116            self.column_roles,
117            self.point_fdr_q.map(|q| format!("{q:.4}")).unwrap_or_default(),
118            self.dist_alpha,
119            self.psi_threshold,
120            self.psi_bins,
121            self.dist_min_n,
122            self.struct_null_rate,
123            self.mv_alpha,
124            self.mv_min_n,
125            self.mv_ridge,
126            self.ctx_period,
127            self.ctx_threshold,
128            self.ctx_min_per_phase,
129            self.coll_min_n,
130            self.coll_threshold,
131            self.cadence_column.as_deref().unwrap_or(""),
132            self.cad_max_cv,
133            self.cad_min_n,
134        )
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    #[test]
143    fn version_is_stable_and_reflects_changes() {
144        let a = DetectConfig::default();
145        let b = DetectConfig::default();
146        assert_eq!(a.version(), b.version());
147
148        let c = DetectConfig {
149            point_threshold: 4.0,
150            ..DetectConfig::default()
151        };
152        assert_ne!(a.version(), c.version());
153
154        // Enabling FDR control changes the fingerprint (and the empty default
155        // renders as no value, so `pfdr=;` for the off case).
156        let f = DetectConfig {
157            point_fdr_q: Some(0.05),
158            ..DetectConfig::default()
159        };
160        assert_ne!(a.version(), f.version());
161        assert!(a.version().contains(";pfdr=;"));
162        assert!(f.version().contains(";pfdr=0.0500;"));
163
164        // Toggling column-role skipping changes the fingerprint.
165        let g = DetectConfig {
166            column_roles: false,
167            ..DetectConfig::default()
168        };
169        assert_ne!(a.version(), g.version());
170        assert!(a.version().contains(";cr=true;"));
171        assert!(g.version().contains(";cr=false;"));
172    }
173}