Skip to main content

anomalyx_core/
finding.rs

1//! The anomaly taxonomy, findings, and evidence handles.
2//!
3//! No Rust crate ships a coherent *taxonomy* of anomaly kinds wired to an
4//! explainable, ensembled output — that classification is the product. Every
5//! detector, whatever math it runs, lands its output in one [`AnomalyClass`]
6//! and emits [`Finding`]s carrying a stable [`Handle`] for `explain` to resolve.
7
8use crate::value::ColType;
9use serde::{Deserialize, Serialize};
10
11/// The top-level anomaly taxonomy. A detector declares which class it produces;
12/// the CLI groups and reports findings by class so an agent can reason about
13/// *kind* of deviation, not just "something is off."
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15#[serde(rename_all = "snake_case")]
16pub enum AnomalyClass {
17    /// A single value far from its column's distribution (z-score, MAD, IQR, ESD).
18    Point,
19    /// A value anomalous only in context (seasonal/time-of-day deviation).
20    Contextual,
21    /// A subsequence or group that is jointly anomalous (change-point, runs).
22    Collective,
23    /// The distribution itself shifted vs. a baseline (KS, PSI, KL, chi²).
24    Distributional,
25    /// Schema/type/shape violation (type drift, missing field, cardinality blowup).
26    Structural,
27    /// A multivariate point isolated in feature space (isolation forest, LOF, DBSCAN).
28    Multivariate,
29    /// Suspiciously regular timing (metronomic cadence).
30    Cadence,
31}
32
33impl AnomalyClass {
34    /// Stable machine token, also used as the dictionary key in the envelope.
35    pub fn token(self) -> &'static str {
36        match self {
37            AnomalyClass::Point => "point",
38            AnomalyClass::Contextual => "contextual",
39            AnomalyClass::Collective => "collective",
40            AnomalyClass::Distributional => "distributional",
41            AnomalyClass::Structural => "structural",
42            AnomalyClass::Multivariate => "multivariate",
43            AnomalyClass::Cadence => "cadence",
44        }
45    }
46
47    /// Every class, in stable order — the basis for `describe` output and for
48    /// deterministic grouping.
49    pub const ALL: [AnomalyClass; 7] = [
50        AnomalyClass::Point,
51        AnomalyClass::Contextual,
52        AnomalyClass::Collective,
53        AnomalyClass::Distributional,
54        AnomalyClass::Structural,
55        AnomalyClass::Multivariate,
56        AnomalyClass::Cadence,
57    ];
58}
59
60/// Severity buckets derived from confidence, used for the process exit code and
61/// for at-a-glance triage. Ordered so `max` is meaningful.
62#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
63#[serde(rename_all = "lowercase")]
64pub enum Severity {
65    Info,
66    Low,
67    Medium,
68    High,
69    Critical,
70}
71
72impl Severity {
73    /// Maps a calibrated confidence in `[0, 1]` to a severity bucket.
74    pub fn from_confidence(c: f64) -> Severity {
75        match c {
76            c if c >= 0.95 => Severity::Critical,
77            c if c >= 0.85 => Severity::High,
78            c if c >= 0.65 => Severity::Medium,
79            c if c >= 0.40 => Severity::Low,
80            _ => Severity::Info,
81        }
82    }
83}
84
85/// A stable, drill-able pointer to the evidence behind a finding.
86///
87/// Handles are the article's "handle-based evidence navigation": the `scan`
88/// summary stays compact, and `explain <handle>` resolves one back to the
89/// underlying record/column/cell. Their string form is canonical and stable
90/// across runs so an agent can cache and re-query them.
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92#[serde(tag = "kind", rename_all = "snake_case")]
93pub enum Handle {
94    /// A whole column, by name.
95    Column { name: String },
96    /// A single cell at `(column, row)`.
97    Cell { column: String, row: usize },
98    /// A contiguous row range `[start, end)` within a column.
99    Range {
100        column: String,
101        start: usize,
102        end: usize,
103    },
104    /// A distribution-level finding comparing `column` against a baseline.
105    Dist { column: String },
106    /// A whole row, by index — for multivariate findings that span columns.
107    Row { row: usize },
108}
109
110impl Handle {
111    /// Canonical wire form, e.g. `cell:amount:42` or `col:status`.
112    pub fn canonical(&self) -> String {
113        match self {
114            Handle::Column { name } => format!("col:{name}"),
115            Handle::Cell { column, row } => format!("cell:{column}:{row}"),
116            Handle::Range { column, start, end } => format!("range:{column}:{start}:{end}"),
117            Handle::Dist { column } => format!("dist:{column}"),
118            Handle::Row { row } => format!("row:{row}"),
119        }
120    }
121
122    /// Parses the canonical form. Returns `None` on any malformed handle so the
123    /// CLI can fail cleanly rather than guess (honest absence).
124    pub fn parse(s: &str) -> Option<Handle> {
125        let (kind, rest) = s.split_once(':')?;
126        match kind {
127            "col" => Some(Handle::Column {
128                name: rest.to_string(),
129            }),
130            "dist" => Some(Handle::Dist {
131                column: rest.to_string(),
132            }),
133            "row" => Some(Handle::Row {
134                row: rest.parse().ok()?,
135            }),
136            "cell" => {
137                let (column, row) = rest.rsplit_once(':')?;
138                Some(Handle::Cell {
139                    column: column.to_string(),
140                    row: row.parse().ok()?,
141                })
142            }
143            "range" => {
144                let mut it = rest.rsplitn(3, ':');
145                let end = it.next()?.parse().ok()?;
146                let start = it.next()?.parse().ok()?;
147                let column = it.next()?.to_string();
148                Some(Handle::Range { column, start, end })
149            }
150            _ => None,
151        }
152    }
153}
154
155/// One detected anomaly with everything an agent needs to act or drill in.
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct Finding {
158    /// Detector that produced this finding (stable id).
159    pub detector: String,
160    pub class: AnomalyClass,
161    pub handle: Handle,
162    /// Calibrated confidence in `[0, 1]`.
163    pub confidence: f64,
164    pub severity: Severity,
165    /// Raw detector score, before calibration (e.g. a z-score). Interpretation
166    /// is detector-specific; kept for evidence, not comparison across detectors.
167    pub score: f64,
168    /// Short, human/agent-readable reason. No prose padding.
169    pub reason: String,
170    /// Optional column type context, for structural findings.
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub col_type: Option<ColType>,
173}
174
175impl Finding {
176    /// Builds a finding, deriving severity from `confidence`.
177    pub fn new(
178        detector: impl Into<String>,
179        class: AnomalyClass,
180        handle: Handle,
181        confidence: f64,
182        score: f64,
183        reason: impl Into<String>,
184    ) -> Self {
185        let confidence = confidence.clamp(0.0, 1.0);
186        Finding {
187            detector: detector.into(),
188            class,
189            handle,
190            confidence,
191            severity: Severity::from_confidence(confidence),
192            score,
193            reason: reason.into(),
194            col_type: None,
195        }
196    }
197
198    pub fn with_col_type(mut self, ty: ColType) -> Self {
199        self.col_type = Some(ty);
200        self
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn handle_roundtrips() {
210        let cases = [
211            Handle::Column {
212                name: "status".into(),
213            },
214            Handle::Cell {
215                column: "amount".into(),
216                row: 42,
217            },
218            Handle::Range {
219                column: "ts".into(),
220                start: 3,
221                end: 9,
222            },
223            Handle::Dist {
224                column: "score".into(),
225            },
226            Handle::Row { row: 7 },
227        ];
228        for h in cases {
229            let s = h.canonical();
230            assert_eq!(Handle::parse(&s), Some(h), "roundtrip failed for {s}");
231        }
232    }
233
234    #[test]
235    fn handle_rejects_garbage() {
236        assert_eq!(Handle::parse("nope"), None);
237        assert_eq!(Handle::parse("cell:amount:notanumber"), None);
238    }
239
240    #[test]
241    fn class_tokens_are_exact() {
242        assert_eq!(AnomalyClass::Point.token(), "point");
243        assert_eq!(AnomalyClass::Distributional.token(), "distributional");
244        assert_eq!(AnomalyClass::Cadence.token(), "cadence");
245        // every class has a distinct, non-empty token
246        let mut seen = std::collections::HashSet::new();
247        for c in AnomalyClass::ALL {
248            assert!(!c.token().is_empty());
249            assert!(seen.insert(c.token()), "duplicate token {}", c.token());
250        }
251    }
252
253    #[test]
254    fn severity_buckets_are_exact_at_boundaries() {
255        let cases = [
256            (0.96, Severity::Critical),
257            (0.95, Severity::Critical),
258            (0.90, Severity::High),
259            (0.85, Severity::High),
260            (0.70, Severity::Medium),
261            (0.65, Severity::Medium),
262            (0.50, Severity::Low),
263            (0.40, Severity::Low),
264            (0.30, Severity::Info),
265            (0.0, Severity::Info),
266        ];
267        for (c, want) in cases {
268            assert_eq!(Severity::from_confidence(c), want, "confidence {c}");
269        }
270    }
271
272    #[test]
273    fn severity_is_monotonic_in_confidence() {
274        let mut prev = Severity::Info;
275        for c in [0.0, 0.4, 0.65, 0.85, 0.95, 1.0] {
276            let s = Severity::from_confidence(c);
277            assert!(s >= prev);
278            prev = s;
279        }
280    }
281
282    #[test]
283    fn confidence_is_clamped() {
284        let f = Finding::new(
285            "d",
286            AnomalyClass::Point,
287            Handle::Column { name: "x".into() },
288            5.0,
289            9.0,
290            "r",
291        );
292        assert_eq!(f.confidence, 1.0);
293        assert_eq!(f.severity, Severity::Critical);
294    }
295}