anomstream-core 2026.4.1

Core streaming anomaly detectors + companion primitives (Random Cut Forest, per-feature EWMA / CUSUM, drift detectors, streaming stats) — part of the anomstream toolkit
Documentation
//! Early-termination scoring — stop traversing trees once the
//! running per-tree mean has converged tightly enough to be
//! actionable.
//!
//! The classic [`crate::RandomCutForest::score`] always walks every
//! tree, averaging the result. On most traffic — where the point
//! sits cleanly inside or outside the baseline — the first ~20 of
//! 100 trees already agree so closely that the remaining 80
//! traversals only refine the last digit of the score. Stopping
//! early cuts inline detection latency by 30-50 % on "obvious"
//! cases without losing alerting signal.
//!
//! [`crate::RandomCutForest::score_early_term`] walks trees sequentially,
//! maintains a Welford (1962) running mean / variance, and breaks
//! when the standard error of the mean falls below
//! [`EarlyTermConfig::confidence_threshold`] times the absolute
//! mean. The returned [`EarlyTermScore`] reports how many trees
//! were actually evaluated so callers can meter latency savings.
//!
//! The parallel [`crate::RandomCutForest::score`] path is unchanged — use
//! it when you do not care about tail latency and want the full
//! ensemble answer.

use alloc::format;

use crate::error::{RcfError, RcfResult};

/// Default minimum tree count before the early-term check kicks
/// in — picked so the running stderr estimate is stable enough to
/// trust.
pub const DEFAULT_MIN_TREES: usize = 16;

/// Default relative standard-error threshold — a running
/// `stderr / |mean|` below `0.05` (5 %) is narrow enough to stop.
pub const DEFAULT_CONFIDENCE_THRESHOLD: f64 = 0.05;

/// Validated early-term configuration.
#[derive(Debug, Clone, Copy, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct EarlyTermConfig {
    /// Minimum tree count to evaluate before the early-term check
    /// is even tried. The convergence test needs enough samples to
    /// produce a non-degenerate stderr estimate.
    pub min_trees: usize,
    /// Relative standard-error threshold — stop as soon as
    /// `stderr / max(|mean|, ε)` drops below this value.
    pub confidence_threshold: f64,
}

impl Default for EarlyTermConfig {
    fn default() -> Self {
        Self {
            min_trees: DEFAULT_MIN_TREES,
            confidence_threshold: DEFAULT_CONFIDENCE_THRESHOLD,
        }
    }
}

impl EarlyTermConfig {
    /// Validate every field.
    ///
    /// # Errors
    ///
    /// Returns [`RcfError::InvalidConfig`] when `min_trees == 0`,
    /// when `confidence_threshold` is non-finite, or when it is
    /// outside `(0, 1]`.
    pub fn validate(&self) -> RcfResult<()> {
        if self.min_trees == 0 {
            return Err(RcfError::InvalidConfig(
                "EarlyTermConfig::min_trees must be > 0".into(),
            ));
        }
        if !self.confidence_threshold.is_finite()
            || self.confidence_threshold <= 0.0
            || self.confidence_threshold > 1.0
        {
            return Err(RcfError::InvalidConfig(
                format!(
                    "EarlyTermConfig::confidence_threshold must be in (0, 1], got {}",
                    self.confidence_threshold
                )
                .into(),
            ));
        }
        Ok(())
    }
}

/// Outcome of a [`crate::RandomCutForest::score_early_term`] call.
#[derive(Debug, Clone, Copy, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct EarlyTermScore {
    /// Final scalar anomaly score — running mean at break time,
    /// identical in shape to the full-ensemble score.
    pub score: crate::domain::AnomalyScore,
    /// Number of trees that actually contributed before the
    /// detector broke out of the loop.
    pub trees_evaluated: usize,
    /// Total trees available in the forest — use with
    /// `trees_evaluated` to compute the latency savings.
    pub trees_available: usize,
    /// Standard error of the per-tree score mean at break time
    /// (`sqrt(var / n)`). Useful for caller-side confidence
    /// diagnostics.
    pub stderr: f64,
    /// `true` when the loop exited before every tree was walked,
    /// `false` when the full ensemble was traversed (low
    /// confidence, too few leaves in the forest, etc.).
    pub early_stopped: bool,
}