anomstream_core/score_ci.rs
1//! Anomaly score with confidence interval.
2//!
3//! The bare [`crate::RandomCutForest::score`] returns a single
4//! [`crate::AnomalyScore`] — the mean of per-tree scores. SOC
5//! threshold tuning benefits from also knowing how tightly the
6//! trees agree: a score of `2.1 ± 0.05` is qualitatively different
7//! from `2.1 ± 0.8` even though both produce the same alert under
8//! a fixed `> 1.5` threshold. [`ScoreWithConfidence`] packages the
9//! mean plus a symmetric Gaussian CI derived from the per-tree
10//! standard error (`sqrt(var / n)`).
11//!
12//! # Default confidence level
13//!
14//! The out-of-the-box factor is `z = 1.96` — the classical 95 %
15//! normal-approximation CI. Callers that want a different level
16//! (99 % → `z = 2.576`; 90 % → `z = 1.645`) can call
17//! [`ScoreWithConfidence::ci`] with the desired `z`.
18//!
19//! # Statistical caveat
20//!
21//! Per-tree scores are IID under the RCF sampling contract, but
22//! the Gaussian approximation leans on the CLT — at `num_trees ≤
23//! 30` the intervals widen slightly vs. a bootstrap estimate.
24//! Good enough for SOC tuning, not for publication-grade error
25//! bars.
26
27#[cfg(not(feature = "std"))]
28#[allow(unused_imports)]
29use num_traits::Float;
30
31use crate::domain::AnomalyScore;
32
33/// Default `z` factor for a 95 % normal-approximation CI.
34pub const DEFAULT_Z_FACTOR: f64 = 1.96;
35
36/// Scored point plus the per-tree sample statistics behind it.
37///
38/// Serialisable under the `serde` feature so SIEM / audit sinks
39/// can emit the full confidence context alongside the raw score.
40#[derive(Debug, Clone, Copy, PartialEq)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42pub struct ScoreWithConfidence {
43 /// Mean anomaly score — identical to
44 /// [`crate::RandomCutForest::score`]'s output.
45 pub score: AnomalyScore,
46 /// Number of trees that contributed (= ensemble size minus any
47 /// empty trees).
48 pub trees_evaluated: usize,
49 /// Unbiased sample standard deviation across per-tree scores.
50 /// `0.0` when only one tree contributed.
51 pub stddev: f64,
52 /// Standard error of the mean: `stddev / sqrt(n)`. The width of
53 /// the `z = 1.96` confidence interval is `2 · z · stderr`.
54 pub stderr: f64,
55}
56
57impl ScoreWithConfidence {
58 /// Symmetric confidence interval `(lower, upper)` at factor `z`.
59 /// The interval is clamped at zero on the lower side — anomaly
60 /// scores are non-negative by construction.
61 #[must_use]
62 pub fn ci(&self, z: f64) -> (f64, f64) {
63 let mean = f64::from(self.score);
64 let half = z * self.stderr;
65 ((mean - half).max(0.0), mean + half)
66 }
67
68 /// 95 % CI — convenience wrapper around [`Self::ci`] with
69 /// [`DEFAULT_Z_FACTOR`].
70 #[must_use]
71 pub fn ci95(&self) -> (f64, f64) {
72 self.ci(DEFAULT_Z_FACTOR)
73 }
74
75 /// Relative stderr — `stderr / max(|mean|, ε)`. Mirrors the
76 /// metric used by [`crate::RandomCutForest::score_early_term`]
77 /// so callers can compare the two paths.
78 #[must_use]
79 pub fn relative_stderr(&self) -> f64 {
80 let denom = f64::from(self.score).abs().max(f64::EPSILON);
81 self.stderr / denom
82 }
83}
84
85#[cfg(test)]
86#[allow(clippy::unwrap_used, clippy::panic, clippy::float_cmp)]
87mod tests {
88 use super::*;
89
90 fn mk(score: f64, n: usize, stddev: f64) -> ScoreWithConfidence {
91 let stderr = if n > 0 {
92 #[allow(clippy::cast_precision_loss)]
93 let inv = (n as f64).sqrt();
94 stddev / inv
95 } else {
96 0.0
97 };
98 ScoreWithConfidence {
99 score: AnomalyScore::new(score).unwrap(),
100 trees_evaluated: n,
101 stddev,
102 stderr,
103 }
104 }
105
106 #[test]
107 fn ci95_width_matches_1_96_stderr() {
108 let s = mk(2.0, 100, 0.5);
109 let (lo, hi) = s.ci95();
110 let width = hi - lo;
111 let expected = 2.0 * DEFAULT_Z_FACTOR * s.stderr;
112 assert!((width - expected).abs() < 1e-9);
113 }
114
115 #[test]
116 fn ci_lower_bound_clamps_to_zero() {
117 // Mean 0.1, huge stderr → raw lower < 0; clamped to 0.
118 let s = mk(0.1, 4, 5.0);
119 let (lo, _) = s.ci95();
120 assert!(lo >= 0.0);
121 }
122
123 #[test]
124 fn relative_stderr_equals_stderr_over_mean() {
125 let s = mk(2.0, 100, 0.5);
126 let rel = s.relative_stderr();
127 assert!((rel - s.stderr / 2.0).abs() < 1e-9);
128 }
129
130 #[test]
131 fn custom_z_factor() {
132 let s = mk(2.0, 100, 0.5);
133 let (lo99, hi99) = s.ci(2.576); // 99% CI
134 let (lo95, hi95) = s.ci95();
135 // 99% CI wider than 95% CI.
136 assert!(lo99 <= lo95);
137 assert!(hi99 >= hi95);
138 }
139}