Skip to main content

anomstream_core/
dynamic_forest.rs

1//! Runtime-dim wrapper for `RandomCutForest` — unblocks
2//! heterogeneous multi-tenant deployments where every tenant
3//! ships its own feature-vector width (MSSP pools, per-tenant
4//! feature extractors).
5//!
6//! The bare `RandomCutForest<D>` is const-generic on `D` — every
7//! distinct dim needs its own monomorphisation. A
8//! `TenantForestPool<K, D>` then has a single `D` across every
9//! tenant, forcing operators to whitelist dim values at compile
10//! time. [`DynamicForest<MAX_D>`] sidesteps the constraint by
11//! picking a maximum dim at compile time and zero-padding every
12//! caller-supplied point that is shorter than `MAX_D`. The forest
13//! internally scores the zero-padded vector; dims above the
14//! caller's `active_dim` contribute no range and therefore no
15//! attribution.
16//!
17//! # What this buys
18//!
19//! - One monomorphisation for every tenant whose dim `≤ MAX_D`.
20//! - No API break for callers who already have `[f64; D]` —
21//!   [`DynamicForest::update`] / [`DynamicForest::score`] take
22//!   `&[f64]` of runtime length.
23//!
24//! # What this costs
25//!
26//! - Zero-padding adds a dimension with permanent range `0.0` per
27//!   tenant. RCF's range-weighted cut sampling naturally skips
28//!   these dims (zero-range = never cut on); there is no AUC
29//!   impact, but reservoir memory is paid at `MAX_D` always.
30//! - Callers mixing narrow and wide tenants in the same pool pay
31//!   the `MAX_D` memory for every tenant; size `MAX_D` to the
32//!   widest expected tenant.
33//!
34//! # Not a replacement for the const-generic path
35//!
36//! Hot-path callers with a fixed known `D` should keep using
37//! [`crate::RandomCutForest<D>`] — the const-generic path is
38//! faster (fewer runtime checks, better inlining) and idiomatic.
39//! [`DynamicForest`] is the escape hatch for MSSP /
40//! heterogeneous-tenant deployments where compile-time `D` is a
41//! dealbreaker.
42
43#![cfg(feature = "std")]
44
45use crate::config::ForestBuilder;
46use crate::domain::{AnomalyScore, DiVector};
47use crate::error::{RcfError, RcfResult};
48use crate::forest::RandomCutForest;
49
50/// Runtime-dim facade over `RandomCutForest<MAX_D>`. `MAX_D` must
51/// be at compile time ≥ the widest `active_dim` the caller will
52/// ever feed.
53#[derive(Debug)]
54pub struct DynamicForest<const MAX_D: usize> {
55    /// Wrapped const-generic forest.
56    forest: RandomCutForest<MAX_D>,
57    /// Caller-declared dim count — every incoming point must have
58    /// exactly `active_dim` finite components; the remaining
59    /// `MAX_D − active_dim` slots are zero-padded.
60    active_dim: usize,
61}
62
63impl<const MAX_D: usize> DynamicForest<MAX_D> {
64    /// Build from a prepared [`ForestBuilder<MAX_D>`].
65    ///
66    /// # Errors
67    ///
68    /// - [`RcfError::InvalidConfig`] when `active_dim == 0` or
69    ///   `active_dim > MAX_D`.
70    /// - Propagates [`ForestBuilder::build`] failures.
71    pub fn new(builder: ForestBuilder<MAX_D>, active_dim: usize) -> RcfResult<Self> {
72        if active_dim == 0 {
73            return Err(RcfError::InvalidConfig(
74                "DynamicForest: active_dim must be > 0".into(),
75            ));
76        }
77        if active_dim > MAX_D {
78            return Err(RcfError::InvalidConfig(
79                format!("DynamicForest: active_dim {active_dim} exceeds MAX_D {MAX_D}").into(),
80            ));
81        }
82        let forest = builder.build()?;
83        Ok(Self { forest, active_dim })
84    }
85
86    /// Active dim of this facade — every input slice must have
87    /// this length.
88    #[must_use]
89    pub fn active_dim(&self) -> usize {
90        self.active_dim
91    }
92
93    /// Maximum dim the underlying const-generic forest supports.
94    #[must_use]
95    pub const fn max_dim(&self) -> usize {
96        MAX_D
97    }
98
99    /// Read-only handle to the underlying const-generic forest —
100    /// useful for inspecting metrics / persistence state.
101    #[must_use]
102    pub fn forest(&self) -> &RandomCutForest<MAX_D> {
103        &self.forest
104    }
105
106    /// Score a runtime-sized `point`. Returns
107    /// [`RcfError::DimensionMismatch`] when `point.len() != active_dim`.
108    ///
109    /// # Errors
110    ///
111    /// - [`RcfError::DimensionMismatch`] on length mismatch.
112    /// - [`RcfError::NaNValue`] on non-finite components.
113    /// - Propagates [`RandomCutForest::score`] failures.
114    pub fn score(&self, point: &[f64]) -> RcfResult<AnomalyScore> {
115        let padded = self.pad(point)?;
116        self.forest.score(&padded)
117    }
118
119    /// Fold a runtime-sized `point` into the forest.
120    ///
121    /// # Errors
122    ///
123    /// Same as [`Self::score`] plus [`RandomCutForest::update`]
124    /// failures.
125    pub fn update(&mut self, point: &[f64]) -> RcfResult<()> {
126        let padded = self.pad(point)?;
127        self.forest.update(padded)
128    }
129
130    /// Attribution for a runtime-sized `point`. Returns a
131    /// [`DiVector`] of `active_dim` entries (the zero-padded tail
132    /// is truncated from the output).
133    ///
134    /// # Errors
135    ///
136    /// Same as [`Self::score`] plus [`RandomCutForest::attribution`]
137    /// failures.
138    pub fn attribution(&self, point: &[f64]) -> RcfResult<DiVector> {
139        let padded = self.pad(point)?;
140        let di_full = self.forest.attribution(&padded)?;
141        // Truncate to active_dim — callers care only about their
142        // own feature-vector dims.
143        let mut di = DiVector::zeros(self.active_dim);
144        for d in 0..self.active_dim {
145            let _ = di.add_high(d, di_full.high()[d]);
146            let _ = di.add_low(d, di_full.low()[d]);
147        }
148        Ok(di)
149    }
150
151    /// Pad `point` to `[f64; MAX_D]`, validating length and
152    /// finite-ness.
153    fn pad(&self, point: &[f64]) -> RcfResult<[f64; MAX_D]> {
154        if point.len() != self.active_dim {
155            return Err(RcfError::DimensionMismatch {
156                expected: self.active_dim,
157                got: point.len(),
158            });
159        }
160        if !point.iter().all(|v| v.is_finite()) {
161            return Err(RcfError::NaNValue);
162        }
163        let mut padded = [0.0_f64; MAX_D];
164        padded[..self.active_dim].copy_from_slice(point);
165        Ok(padded)
166    }
167}
168
169#[cfg(test)]
170#[allow(
171    clippy::unwrap_used,
172    clippy::panic,
173    clippy::float_cmp,
174    clippy::cast_precision_loss
175)]
176mod tests {
177    use super::*;
178
179    fn builder() -> ForestBuilder<16> {
180        ForestBuilder::<16>::new()
181            .num_trees(50)
182            .sample_size(64)
183            .seed(2026)
184    }
185
186    #[test]
187    fn new_rejects_zero_active_dim() {
188        let err = DynamicForest::<16>::new(builder(), 0).unwrap_err();
189        assert!(matches!(err, RcfError::InvalidConfig(_)));
190    }
191
192    #[test]
193    fn new_rejects_active_dim_above_max() {
194        let err = DynamicForest::<16>::new(builder(), 32).unwrap_err();
195        assert!(matches!(err, RcfError::InvalidConfig(_)));
196    }
197
198    #[test]
199    fn update_then_score_preserves_dim_contract() {
200        let mut f = DynamicForest::<16>::new(builder(), 4).unwrap();
201        for i in 0..200 {
202            let v = f64::from(i) * 0.01;
203            f.update(&[v, v + 0.5, v * 2.0, v - 0.1]).unwrap();
204        }
205        let s = f.score(&[10.0, 10.0, 10.0, 10.0]).unwrap();
206        let raw: f64 = s.into();
207        assert!(raw.is_finite());
208        assert!(raw > 0.0);
209    }
210
211    #[test]
212    fn length_mismatch_rejected() {
213        let mut f = DynamicForest::<16>::new(builder(), 4).unwrap();
214        for _ in 0..50 {
215            f.update(&[0.1, 0.2, 0.3, 0.4]).unwrap();
216        }
217        assert!(matches!(
218            f.score(&[0.1, 0.2, 0.3]).unwrap_err(),
219            RcfError::DimensionMismatch { .. }
220        ));
221        assert!(matches!(
222            f.score(&[0.1, 0.2, 0.3, 0.4, 0.5]).unwrap_err(),
223            RcfError::DimensionMismatch { .. }
224        ));
225    }
226
227    #[test]
228    fn non_finite_rejected() {
229        let mut f = DynamicForest::<16>::new(builder(), 4).unwrap();
230        assert!(matches!(
231            f.update(&[f64::NAN, 0.0, 0.0, 0.0]).unwrap_err(),
232            RcfError::NaNValue
233        ));
234    }
235
236    #[test]
237    fn attribution_truncated_to_active_dim() {
238        let mut f = DynamicForest::<16>::new(builder(), 3).unwrap();
239        for i in 0..200 {
240            let v = f64::from(i) * 0.01;
241            f.update(&[v, v + 0.5, v * 2.0]).unwrap();
242        }
243        let di = f.attribution(&[10.0, 10.0, 10.0]).unwrap();
244        assert_eq!(di.dim(), 3);
245    }
246}