anomstream_core/early_term.rs
1//! Early-termination scoring — stop traversing trees once the
2//! running per-tree mean has converged tightly enough to be
3//! actionable.
4//!
5//! The classic [`crate::RandomCutForest::score`] always walks every
6//! tree, averaging the result. On most traffic — where the point
7//! sits cleanly inside or outside the baseline — the first ~20 of
8//! 100 trees already agree so closely that the remaining 80
9//! traversals only refine the last digit of the score. Stopping
10//! early cuts inline detection latency by 30-50 % on "obvious"
11//! cases without losing alerting signal.
12//!
13//! [`crate::RandomCutForest::score_early_term`] walks trees sequentially,
14//! maintains a Welford (1962) running mean / variance, and breaks
15//! when the standard error of the mean falls below
16//! [`EarlyTermConfig::confidence_threshold`] times the absolute
17//! mean. The returned [`EarlyTermScore`] reports how many trees
18//! were actually evaluated so callers can meter latency savings.
19//!
20//! The parallel [`crate::RandomCutForest::score`] path is unchanged — use
21//! it when you do not care about tail latency and want the full
22//! ensemble answer.
23
24use alloc::format;
25
26use crate::error::{RcfError, RcfResult};
27
28/// Default minimum tree count before the early-term check kicks
29/// in — picked so the running stderr estimate is stable enough to
30/// trust.
31pub const DEFAULT_MIN_TREES: usize = 16;
32
33/// Default relative standard-error threshold — a running
34/// `stderr / |mean|` below `0.05` (5 %) is narrow enough to stop.
35pub const DEFAULT_CONFIDENCE_THRESHOLD: f64 = 0.05;
36
37/// Validated early-term configuration.
38#[derive(Debug, Clone, Copy, PartialEq)]
39#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
40pub struct EarlyTermConfig {
41 /// Minimum tree count to evaluate before the early-term check
42 /// is even tried. The convergence test needs enough samples to
43 /// produce a non-degenerate stderr estimate.
44 pub min_trees: usize,
45 /// Relative standard-error threshold — stop as soon as
46 /// `stderr / max(|mean|, ε)` drops below this value.
47 pub confidence_threshold: f64,
48}
49
50impl Default for EarlyTermConfig {
51 fn default() -> Self {
52 Self {
53 min_trees: DEFAULT_MIN_TREES,
54 confidence_threshold: DEFAULT_CONFIDENCE_THRESHOLD,
55 }
56 }
57}
58
59impl EarlyTermConfig {
60 /// Validate every field.
61 ///
62 /// # Errors
63 ///
64 /// Returns [`RcfError::InvalidConfig`] when `min_trees == 0`,
65 /// when `confidence_threshold` is non-finite, or when it is
66 /// outside `(0, 1]`.
67 pub fn validate(&self) -> RcfResult<()> {
68 if self.min_trees == 0 {
69 return Err(RcfError::InvalidConfig(
70 "EarlyTermConfig::min_trees must be > 0".into(),
71 ));
72 }
73 if !self.confidence_threshold.is_finite()
74 || self.confidence_threshold <= 0.0
75 || self.confidence_threshold > 1.0
76 {
77 return Err(RcfError::InvalidConfig(
78 format!(
79 "EarlyTermConfig::confidence_threshold must be in (0, 1], got {}",
80 self.confidence_threshold
81 )
82 .into(),
83 ));
84 }
85 Ok(())
86 }
87}
88
89/// Outcome of a [`crate::RandomCutForest::score_early_term`] call.
90#[derive(Debug, Clone, Copy, PartialEq)]
91#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
92pub struct EarlyTermScore {
93 /// Final scalar anomaly score — running mean at break time,
94 /// identical in shape to the full-ensemble score.
95 pub score: crate::domain::AnomalyScore,
96 /// Number of trees that actually contributed before the
97 /// detector broke out of the loop.
98 pub trees_evaluated: usize,
99 /// Total trees available in the forest — use with
100 /// `trees_evaluated` to compute the latency savings.
101 pub trees_available: usize,
102 /// Standard error of the per-tree score mean at break time
103 /// (`sqrt(var / n)`). Useful for caller-side confidence
104 /// diagnostics.
105 pub stderr: f64,
106 /// `true` when the loop exited before every tree was walked,
107 /// `false` when the full ensemble was traversed (low
108 /// confidence, too few leaves in the forest, etc.).
109 pub early_stopped: bool,
110}