Skip to main content

anomstream_core/
score_ci.rs

1//! Anomaly score with confidence interval.
2//!
3//! The bare [`crate::RandomCutForest::score`] returns a single
4//! [`crate::AnomalyScore`] — the mean of per-tree scores. SOC
5//! threshold tuning benefits from also knowing how tightly the
6//! trees agree: a score of `2.1 ± 0.05` is qualitatively different
7//! from `2.1 ± 0.8` even though both produce the same alert under
8//! a fixed `> 1.5` threshold. [`ScoreWithConfidence`] packages the
9//! mean plus a symmetric Gaussian CI derived from the per-tree
10//! standard error (`sqrt(var / n)`).
11//!
12//! # Default confidence level
13//!
14//! The out-of-the-box factor is `z = 1.96` — the classical 95 %
15//! normal-approximation CI. Callers that want a different level
16//! (99 % → `z = 2.576`; 90 % → `z = 1.645`) can call
17//! [`ScoreWithConfidence::ci`] with the desired `z`.
18//!
19//! # Statistical caveat
20//!
21//! Per-tree scores are IID under the RCF sampling contract, but
22//! the Gaussian approximation leans on the CLT — at `num_trees ≤
23//! 30` the intervals widen slightly vs. a bootstrap estimate.
24//! Good enough for SOC tuning, not for publication-grade error
25//! bars.
26
27#[cfg(not(feature = "std"))]
28#[allow(unused_imports)]
29use num_traits::Float;
30
31use crate::domain::AnomalyScore;
32
33/// Default `z` factor for a 95 % normal-approximation CI.
34pub const DEFAULT_Z_FACTOR: f64 = 1.96;
35
36/// Scored point plus the per-tree sample statistics behind it.
37///
38/// Serialisable under the `serde` feature so SIEM / audit sinks
39/// can emit the full confidence context alongside the raw score.
40#[derive(Debug, Clone, Copy, PartialEq)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42pub struct ScoreWithConfidence {
43    /// Mean anomaly score — identical to
44    /// [`crate::RandomCutForest::score`]'s output.
45    pub score: AnomalyScore,
46    /// Number of trees that contributed (= ensemble size minus any
47    /// empty trees).
48    pub trees_evaluated: usize,
49    /// Unbiased sample standard deviation across per-tree scores.
50    /// `0.0` when only one tree contributed.
51    pub stddev: f64,
52    /// Standard error of the mean: `stddev / sqrt(n)`. The width of
53    /// the `z = 1.96` confidence interval is `2 · z · stderr`.
54    pub stderr: f64,
55}
56
57impl ScoreWithConfidence {
58    /// Symmetric confidence interval `(lower, upper)` at factor `z`.
59    /// The interval is clamped at zero on the lower side — anomaly
60    /// scores are non-negative by construction.
61    #[must_use]
62    pub fn ci(&self, z: f64) -> (f64, f64) {
63        let mean = f64::from(self.score);
64        let half = z * self.stderr;
65        ((mean - half).max(0.0), mean + half)
66    }
67
68    /// 95 % CI — convenience wrapper around [`Self::ci`] with
69    /// [`DEFAULT_Z_FACTOR`].
70    #[must_use]
71    pub fn ci95(&self) -> (f64, f64) {
72        self.ci(DEFAULT_Z_FACTOR)
73    }
74
75    /// Relative stderr — `stderr / max(|mean|, ε)`. Mirrors the
76    /// metric used by [`crate::RandomCutForest::score_early_term`]
77    /// so callers can compare the two paths.
78    #[must_use]
79    pub fn relative_stderr(&self) -> f64 {
80        let denom = f64::from(self.score).abs().max(f64::EPSILON);
81        self.stderr / denom
82    }
83}
84
85#[cfg(test)]
86#[allow(clippy::unwrap_used, clippy::panic, clippy::float_cmp)]
87mod tests {
88    use super::*;
89
90    fn mk(score: f64, n: usize, stddev: f64) -> ScoreWithConfidence {
91        let stderr = if n > 0 {
92            #[allow(clippy::cast_precision_loss)]
93            let inv = (n as f64).sqrt();
94            stddev / inv
95        } else {
96            0.0
97        };
98        ScoreWithConfidence {
99            score: AnomalyScore::new(score).unwrap(),
100            trees_evaluated: n,
101            stddev,
102            stderr,
103        }
104    }
105
106    #[test]
107    fn ci95_width_matches_1_96_stderr() {
108        let s = mk(2.0, 100, 0.5);
109        let (lo, hi) = s.ci95();
110        let width = hi - lo;
111        let expected = 2.0 * DEFAULT_Z_FACTOR * s.stderr;
112        assert!((width - expected).abs() < 1e-9);
113    }
114
115    #[test]
116    fn ci_lower_bound_clamps_to_zero() {
117        // Mean 0.1, huge stderr → raw lower < 0; clamped to 0.
118        let s = mk(0.1, 4, 5.0);
119        let (lo, _) = s.ci95();
120        assert!(lo >= 0.0);
121    }
122
123    #[test]
124    fn relative_stderr_equals_stderr_over_mean() {
125        let s = mk(2.0, 100, 0.5);
126        let rel = s.relative_stderr();
127        assert!((rel - s.stderr / 2.0).abs() < 1e-9);
128    }
129
130    #[test]
131    fn custom_z_factor() {
132        let s = mk(2.0, 100, 0.5);
133        let (lo99, hi99) = s.ci(2.576); // 99% CI
134        let (lo95, hi95) = s.ci95();
135        // 99% CI wider than 95% CI.
136        assert!(lo99 <= lo95);
137        assert!(hi99 >= hi95);
138    }
139}