anomstream_core/dynamic_forest.rs
1//! Runtime-dim wrapper for `RandomCutForest` — unblocks
2//! heterogeneous multi-tenant deployments where every tenant
3//! ships its own feature-vector width (MSSP pools, per-tenant
4//! feature extractors).
5//!
6//! The bare `RandomCutForest<D>` is const-generic on `D` — every
7//! distinct dim needs its own monomorphisation. A
8//! `TenantForestPool<K, D>` then has a single `D` across every
9//! tenant, forcing operators to whitelist dim values at compile
10//! time. [`DynamicForest<MAX_D>`] sidesteps the constraint by
11//! picking a maximum dim at compile time and zero-padding every
12//! caller-supplied point that is shorter than `MAX_D`. The forest
13//! internally scores the zero-padded vector; dims above the
14//! caller's `active_dim` contribute no range and therefore no
15//! attribution.
16//!
17//! # What this buys
18//!
19//! - One monomorphisation for every tenant whose dim `≤ MAX_D`.
20//! - No API break for callers who already have `[f64; D]` —
21//! [`DynamicForest::update`] / [`DynamicForest::score`] take
22//! `&[f64]` of runtime length.
23//!
24//! # What this costs
25//!
26//! - Zero-padding adds a dimension with permanent range `0.0` per
27//! tenant. RCF's range-weighted cut sampling naturally skips
28//! these dims (zero-range = never cut on); there is no AUC
29//! impact, but reservoir memory is paid at `MAX_D` always.
30//! - Callers mixing narrow and wide tenants in the same pool pay
31//! the `MAX_D` memory for every tenant; size `MAX_D` to the
32//! widest expected tenant.
33//!
34//! # Not a replacement for the const-generic path
35//!
36//! Hot-path callers with a fixed known `D` should keep using
37//! [`crate::RandomCutForest<D>`] — the const-generic path is
38//! faster (fewer runtime checks, better inlining) and idiomatic.
39//! [`DynamicForest`] is the escape hatch for MSSP /
40//! heterogeneous-tenant deployments where compile-time `D` is a
41//! dealbreaker.
42
43#![cfg(feature = "std")]
44
45use crate::config::ForestBuilder;
46use crate::domain::{AnomalyScore, DiVector};
47use crate::error::{RcfError, RcfResult};
48use crate::forest::RandomCutForest;
49
50/// Runtime-dim facade over `RandomCutForest<MAX_D>`. `MAX_D` must
51/// be at compile time ≥ the widest `active_dim` the caller will
52/// ever feed.
53#[derive(Debug)]
54pub struct DynamicForest<const MAX_D: usize> {
55 /// Wrapped const-generic forest.
56 forest: RandomCutForest<MAX_D>,
57 /// Caller-declared dim count — every incoming point must have
58 /// exactly `active_dim` finite components; the remaining
59 /// `MAX_D − active_dim` slots are zero-padded.
60 active_dim: usize,
61}
62
63impl<const MAX_D: usize> DynamicForest<MAX_D> {
64 /// Build from a prepared [`ForestBuilder<MAX_D>`].
65 ///
66 /// # Errors
67 ///
68 /// - [`RcfError::InvalidConfig`] when `active_dim == 0` or
69 /// `active_dim > MAX_D`.
70 /// - Propagates [`ForestBuilder::build`] failures.
71 pub fn new(builder: ForestBuilder<MAX_D>, active_dim: usize) -> RcfResult<Self> {
72 if active_dim == 0 {
73 return Err(RcfError::InvalidConfig(
74 "DynamicForest: active_dim must be > 0".into(),
75 ));
76 }
77 if active_dim > MAX_D {
78 return Err(RcfError::InvalidConfig(
79 format!("DynamicForest: active_dim {active_dim} exceeds MAX_D {MAX_D}").into(),
80 ));
81 }
82 let forest = builder.build()?;
83 Ok(Self { forest, active_dim })
84 }
85
86 /// Active dim of this facade — every input slice must have
87 /// this length.
88 #[must_use]
89 pub fn active_dim(&self) -> usize {
90 self.active_dim
91 }
92
93 /// Maximum dim the underlying const-generic forest supports.
94 #[must_use]
95 pub const fn max_dim(&self) -> usize {
96 MAX_D
97 }
98
99 /// Read-only handle to the underlying const-generic forest —
100 /// useful for inspecting metrics / persistence state.
101 #[must_use]
102 pub fn forest(&self) -> &RandomCutForest<MAX_D> {
103 &self.forest
104 }
105
106 /// Score a runtime-sized `point`. Returns
107 /// [`RcfError::DimensionMismatch`] when `point.len() != active_dim`.
108 ///
109 /// # Errors
110 ///
111 /// - [`RcfError::DimensionMismatch`] on length mismatch.
112 /// - [`RcfError::NaNValue`] on non-finite components.
113 /// - Propagates [`RandomCutForest::score`] failures.
114 pub fn score(&self, point: &[f64]) -> RcfResult<AnomalyScore> {
115 let padded = self.pad(point)?;
116 self.forest.score(&padded)
117 }
118
119 /// Fold a runtime-sized `point` into the forest.
120 ///
121 /// # Errors
122 ///
123 /// Same as [`Self::score`] plus [`RandomCutForest::update`]
124 /// failures.
125 pub fn update(&mut self, point: &[f64]) -> RcfResult<()> {
126 let padded = self.pad(point)?;
127 self.forest.update(padded)
128 }
129
130 /// Attribution for a runtime-sized `point`. Returns a
131 /// [`DiVector`] of `active_dim` entries (the zero-padded tail
132 /// is truncated from the output).
133 ///
134 /// # Errors
135 ///
136 /// Same as [`Self::score`] plus [`RandomCutForest::attribution`]
137 /// failures.
138 pub fn attribution(&self, point: &[f64]) -> RcfResult<DiVector> {
139 let padded = self.pad(point)?;
140 let di_full = self.forest.attribution(&padded)?;
141 // Truncate to active_dim — callers care only about their
142 // own feature-vector dims.
143 let mut di = DiVector::zeros(self.active_dim);
144 for d in 0..self.active_dim {
145 let _ = di.add_high(d, di_full.high()[d]);
146 let _ = di.add_low(d, di_full.low()[d]);
147 }
148 Ok(di)
149 }
150
151 /// Pad `point` to `[f64; MAX_D]`, validating length and
152 /// finite-ness.
153 fn pad(&self, point: &[f64]) -> RcfResult<[f64; MAX_D]> {
154 if point.len() != self.active_dim {
155 return Err(RcfError::DimensionMismatch {
156 expected: self.active_dim,
157 got: point.len(),
158 });
159 }
160 if !point.iter().all(|v| v.is_finite()) {
161 return Err(RcfError::NaNValue);
162 }
163 let mut padded = [0.0_f64; MAX_D];
164 padded[..self.active_dim].copy_from_slice(point);
165 Ok(padded)
166 }
167}
168
169#[cfg(test)]
170#[allow(
171 clippy::unwrap_used,
172 clippy::panic,
173 clippy::float_cmp,
174 clippy::cast_precision_loss
175)]
176mod tests {
177 use super::*;
178
179 fn builder() -> ForestBuilder<16> {
180 ForestBuilder::<16>::new()
181 .num_trees(50)
182 .sample_size(64)
183 .seed(2026)
184 }
185
186 #[test]
187 fn new_rejects_zero_active_dim() {
188 let err = DynamicForest::<16>::new(builder(), 0).unwrap_err();
189 assert!(matches!(err, RcfError::InvalidConfig(_)));
190 }
191
192 #[test]
193 fn new_rejects_active_dim_above_max() {
194 let err = DynamicForest::<16>::new(builder(), 32).unwrap_err();
195 assert!(matches!(err, RcfError::InvalidConfig(_)));
196 }
197
198 #[test]
199 fn update_then_score_preserves_dim_contract() {
200 let mut f = DynamicForest::<16>::new(builder(), 4).unwrap();
201 for i in 0..200 {
202 let v = f64::from(i) * 0.01;
203 f.update(&[v, v + 0.5, v * 2.0, v - 0.1]).unwrap();
204 }
205 let s = f.score(&[10.0, 10.0, 10.0, 10.0]).unwrap();
206 let raw: f64 = s.into();
207 assert!(raw.is_finite());
208 assert!(raw > 0.0);
209 }
210
211 #[test]
212 fn length_mismatch_rejected() {
213 let mut f = DynamicForest::<16>::new(builder(), 4).unwrap();
214 for _ in 0..50 {
215 f.update(&[0.1, 0.2, 0.3, 0.4]).unwrap();
216 }
217 assert!(matches!(
218 f.score(&[0.1, 0.2, 0.3]).unwrap_err(),
219 RcfError::DimensionMismatch { .. }
220 ));
221 assert!(matches!(
222 f.score(&[0.1, 0.2, 0.3, 0.4, 0.5]).unwrap_err(),
223 RcfError::DimensionMismatch { .. }
224 ));
225 }
226
227 #[test]
228 fn non_finite_rejected() {
229 let mut f = DynamicForest::<16>::new(builder(), 4).unwrap();
230 assert!(matches!(
231 f.update(&[f64::NAN, 0.0, 0.0, 0.0]).unwrap_err(),
232 RcfError::NaNValue
233 ));
234 }
235
236 #[test]
237 fn attribution_truncated_to_active_dim() {
238 let mut f = DynamicForest::<16>::new(builder(), 3).unwrap();
239 for i in 0..200 {
240 let v = f64::from(i) * 0.01;
241 f.update(&[v, v + 0.5, v * 2.0]).unwrap();
242 }
243 let di = f.attribution(&[10.0, 10.0, 10.0]).unwrap();
244 assert_eq!(di.dim(), 3);
245 }
246}