anomalyx_detect/config.rs
1//! Detector configuration and the config-version fingerprint.
2//!
3//! The fingerprint goes into the envelope: *same input + same fingerprint ⇒
4//! same bytes*. Any change to a threshold that could change output also changes
5//! the fingerprint, so an agent can tell "the data changed" from "the tool's
6//! configuration changed."
7
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct DetectConfig {
12 /// Modified z-score threshold for the point detector (Iglewicz–Hoaglin
13 /// default is 3.5).
14 pub point_threshold: f64,
15 /// Minimum count of finite numeric values a column needs before the point
16 /// detector will assess it. Below this, statistics are unreliable.
17 pub point_min_n: usize,
18 /// When set, detectors consult each column's [`Role`](ax_core::Role) and skip
19 /// columns where their statistic is meaningless (e.g. the point detector
20 /// skips identifier/categorical/sequence columns). Roles are always reported
21 /// in the envelope; this only governs whether they affect detection.
22 pub column_roles: bool,
23 /// Optional false-discovery-rate (FDR) level for the point detector. When
24 /// set, the per-cell modified-z threshold is replaced by Benjamini–Hochberg
25 /// control at this level, applied within each column: a cell is flagged only
26 /// if its two-sided p-value survives BH, bounding the expected proportion of
27 /// false flags at `q`. `None` keeps the fixed `point_threshold` behavior.
28 pub point_fdr_q: Option<f64>,
29
30 /// Significance level for the KS and chi-square drift tests. A column is
31 /// flagged when the test's p-value falls below this.
32 pub dist_alpha: f64,
33 /// Population Stability Index threshold; PSI above this signals drift
34 /// (0.1 ≈ moderate, 0.2 ≈ significant by convention).
35 pub psi_threshold: f64,
36 /// Number of (baseline-quantile) bins used for PSI.
37 pub psi_bins: usize,
38 /// Minimum sample size (per side) before a distributional test runs.
39 pub dist_min_n: usize,
40
41 /// Null fraction above which the structural detector flags a column.
42 pub struct_null_rate: f64,
43
44 /// Significance level for the Mahalanobis multivariate test (per row).
45 /// Smaller than the per-column α because every row is tested.
46 pub mv_alpha: f64,
47 /// Minimum number of complete (no-missing) rows before the multivariate
48 /// detector will estimate a covariance and run.
49 pub mv_min_n: usize,
50 /// Relative ridge added to the covariance diagonal for numerical stability
51 /// (handles collinear / zero-variance columns). Scaled by the mean variance.
52 pub mv_ridge: f64,
53
54 /// Seasonal period for the contextual detector. `0` (or `1`) disables it —
55 /// seasonality is never guessed, so without a declared period the detector
56 /// reports honest absence.
57 pub ctx_period: usize,
58 /// Modified z-score threshold within a seasonal subseries.
59 pub ctx_threshold: f64,
60 /// Minimum finite values a phase needs before it is assessed.
61 pub ctx_min_per_phase: usize,
62
63 /// Minimum length of an ordered numeric column before the collective
64 /// (change-point) detector will run.
65 pub coll_min_n: usize,
66 /// Standardized mean-shift threshold for the collective detector. Set
67 /// conservatively because the change point is chosen by maximization.
68 pub coll_threshold: f64,
69
70 /// Column to assess for metronomic cadence (interpreted as event times).
71 /// `None` disables the cadence detector — which timestamps mean "time" is
72 /// never guessed, so without this it reports honest absence.
73 pub cadence_column: Option<String>,
74 /// Coefficient-of-variation threshold below which inter-arrival intervals
75 /// are flagged as suspiciously regular (automated).
76 pub cad_max_cv: f64,
77 /// Minimum number of intervals before cadence is assessed.
78 pub cad_min_n: usize,
79}
80
81impl Default for DetectConfig {
82 fn default() -> Self {
83 DetectConfig {
84 point_threshold: 3.5,
85 point_min_n: 8,
86 column_roles: true,
87 point_fdr_q: None,
88 dist_alpha: 0.05,
89 psi_threshold: 0.2,
90 psi_bins: 10,
91 dist_min_n: 20,
92 struct_null_rate: 0.5,
93 mv_alpha: 0.001,
94 mv_min_n: 20,
95 mv_ridge: 1e-9,
96 ctx_period: 0,
97 ctx_threshold: 3.5,
98 ctx_min_per_phase: 4,
99 coll_min_n: 20,
100 coll_threshold: 5.0,
101 cadence_column: None,
102 cad_max_cv: 0.05,
103 cad_min_n: 20,
104 }
105 }
106}
107
108impl DetectConfig {
109 /// A stable, human-legible fingerprint of the settings that affect output.
110 /// Deterministic: no wall-clock, no environment.
111 pub fn version(&self) -> String {
112 format!(
113 "anomalyx-cfg/9;pt={:.4};ptn={};cr={};pfdr={};da={:.4};psi={:.4};psib={};dmn={};snr={:.4};mva={:.5};mvn={};mvr={:e};cxp={};cxt={:.4};cxm={};cln={};clt={:.4};cdc={};cdcv={:.4};cdn={}",
114 self.point_threshold,
115 self.point_min_n,
116 self.column_roles,
117 self.point_fdr_q.map(|q| format!("{q:.4}")).unwrap_or_default(),
118 self.dist_alpha,
119 self.psi_threshold,
120 self.psi_bins,
121 self.dist_min_n,
122 self.struct_null_rate,
123 self.mv_alpha,
124 self.mv_min_n,
125 self.mv_ridge,
126 self.ctx_period,
127 self.ctx_threshold,
128 self.ctx_min_per_phase,
129 self.coll_min_n,
130 self.coll_threshold,
131 self.cadence_column.as_deref().unwrap_or(""),
132 self.cad_max_cv,
133 self.cad_min_n,
134 )
135 }
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141
142 #[test]
143 fn version_is_stable_and_reflects_changes() {
144 let a = DetectConfig::default();
145 let b = DetectConfig::default();
146 assert_eq!(a.version(), b.version());
147
148 let c = DetectConfig {
149 point_threshold: 4.0,
150 ..DetectConfig::default()
151 };
152 assert_ne!(a.version(), c.version());
153
154 // Enabling FDR control changes the fingerprint (and the empty default
155 // renders as no value, so `pfdr=;` for the off case).
156 let f = DetectConfig {
157 point_fdr_q: Some(0.05),
158 ..DetectConfig::default()
159 };
160 assert_ne!(a.version(), f.version());
161 assert!(a.version().contains(";pfdr=;"));
162 assert!(f.version().contains(";pfdr=0.0500;"));
163
164 // Toggling column-role skipping changes the fingerprint.
165 let g = DetectConfig {
166 column_roles: false,
167 ..DetectConfig::default()
168 };
169 assert_ne!(a.version(), g.version());
170 assert!(a.version().contains(";cr=true;"));
171 assert!(g.version().contains(";cr=false;"));
172 }
173}