Skip to main content

anomstream_core/tree/
random_cut_tree.rs

1//! Incrementally-maintained random cut binary partition.
2//!
3//! [`RandomCutTree`] sits on top of [`crate::tree::NodeStore`] and
4//! provides three operations:
5//!
6//! - [`add`](RandomCutTree::add) — insert a point following Guha
7//!   et al. (2016) §2: at every visited node, sample a cut over the
8//!   augmented bounding box; if the cut isolates the new point from
9//!   the existing subtree, splice a new internal node here, otherwise
10//!   descend along the existing cut.
11//! - [`delete`](RandomCutTree::delete) — remove a leaf, merge its
12//!   sibling up into the parent's slot, and recompute ancestor masses
13//!   and bounding boxes.
14//! - [`traverse`](RandomCutTree::traverse) — walk root→leaf along the
15//!   stored cuts, dispatching per-node callbacks to a
16//!   [`crate::visitor::Visitor`].
17//!
18//! The tree never owns point coordinates — callers (the forest
19//! layer) hand a [`PointAccessor`] in for any operation that needs
20//! to know a leaf's location.
21
22use alloc::borrow::Cow;
23use alloc::format;
24use alloc::vec::Vec;
25
26#[cfg(not(feature = "std"))]
27#[allow(unused_imports)]
28use num_traits::Float;
29use rand::Rng;
30
31use crate::domain::{BoundingBox, Cut, ensure_dim, ensure_finite};
32use crate::error::{RcfError, RcfResult};
33use crate::tree::node::{NodeRef, NodeView, NodeViewMut};
34use crate::tree::node_store::NodeStore;
35use crate::visitor::Visitor;
36
37/// Borrow points by index. Implemented by the forest layer and by
38/// `Vec<[f64; D]>` for in-tree tests.
39///
40/// `D` matches the tree's compile-time dimensionality so the returned
41/// reference is a fixed-size array rather than a slice — callers can
42/// always pass it to [`BoundingBox::from_point`] without paying for a
43/// runtime length check.
44///
45/// # Examples
46///
47/// ```
48/// use anomstream_core::tree::PointAccessor;
49///
50/// let v: Vec<[f64; 2]> = vec![[1.0, 2.0]];
51/// let p: &[f64; 2] = <Vec<[f64; 2]> as PointAccessor<2>>::point(&v, 0).unwrap();
52/// assert_eq!(p, &[1.0, 2.0]);
53/// ```
54pub trait PointAccessor<const D: usize> {
55    /// Borrow the point stored at `idx`, or return `None` when it
56    /// does not exist.
57    fn point(&self, idx: usize) -> Option<&[f64; D]>;
58}
59
60impl<const D: usize> PointAccessor<D> for Vec<[f64; D]> {
61    fn point(&self, idx: usize) -> Option<&[f64; D]> {
62        self.get(idx)
63    }
64}
65
66impl<const D: usize> PointAccessor<D> for [[f64; D]] {
67    fn point(&self, idx: usize) -> Option<&[f64; D]> {
68        self.get(idx)
69    }
70}
71
72/// Incrementally-maintained random cut tree over up to `capacity`
73/// distinct `D`-dimensional points.
74///
75/// # Examples
76///
77/// ```
78/// use rand::SeedableRng;
79/// use rand_chacha::ChaCha8Rng;
80/// use anomstream_core::RandomCutTree;
81///
82/// let mut tree = RandomCutTree::<2>::new(8).unwrap();
83/// let p = [1.0_f64, 2.0];
84/// let points = vec![p];
85/// let mut rng = ChaCha8Rng::seed_from_u64(42);
86/// tree.add(0, &p, &points, &mut rng).unwrap();
87/// assert!(tree.root().unwrap().is_leaf());
88/// ```
89#[derive(Debug)]
90#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
91pub struct RandomCutTree<const D: usize> {
92    /// Root reference; `None` when the tree holds no live leaves.
93    root: Option<NodeRef>,
94    /// Backing storage for all live nodes.
95    store: NodeStore<D>,
96    /// Reverse index: `point_idx → leaf NodeRef` for `O(1)` deletions.
97    /// Backed by a sparse `Vec<Option<NodeRef>>` indexed by `point_idx`
98    /// — the forest's `PointStore` reuses freed slots so the indices
99    /// stay dense in steady state. Maintained alongside
100    /// `distinct_count` so `distinct_point_count` stays `O(1)`.
101    leaf_index: Vec<Option<NodeRef>>,
102    /// Cached count of `Some` entries in `leaf_index`.
103    distinct_count: usize,
104}
105
106impl<const D: usize> RandomCutTree<D> {
107    /// Build an empty tree with room for `capacity` distinct points.
108    ///
109    /// # Errors
110    ///
111    /// Returns [`RcfError::InvalidConfig`] when `D == 0`, `capacity ==
112    /// 0`, or `capacity` exceeds the store limit.
113    pub fn new(capacity: u32) -> RcfResult<Self> {
114        if D == 0 {
115            return Err(RcfError::InvalidConfig(
116                "RandomCutTree dimension must be > 0".into(),
117            ));
118        }
119        Ok(Self {
120            root: None,
121            store: NodeStore::<D>::new(capacity)?,
122            leaf_index: Vec::new(),
123            distinct_count: 0,
124        })
125    }
126
127    /// Insert (or overwrite) `leaf_index[idx] = Some(node)`,
128    /// updating [`distinct_count`](Self::distinct_point_count). Grows
129    /// the backing `Vec` on demand.
130    #[inline]
131    fn leaf_index_set(&mut self, idx: usize, node: NodeRef) {
132        if idx >= self.leaf_index.len() {
133            self.leaf_index.resize(idx + 1, None);
134        }
135        if self.leaf_index[idx].is_none() {
136            self.distinct_count += 1;
137        }
138        self.leaf_index[idx] = Some(node);
139    }
140
141    /// Clear `leaf_index[idx]` if present, decrementing
142    /// [`distinct_count`](Self::distinct_point_count). No-op when
143    /// `idx` is out of range or already `None`.
144    #[inline]
145    fn leaf_index_clear(&mut self, idx: usize) {
146        if let Some(slot) = self.leaf_index.get_mut(idx)
147            && slot.is_some()
148        {
149            *slot = None;
150            self.distinct_count -= 1;
151        }
152    }
153
154    /// `Some(node_ref)` when `point_idx` is currently mapped.
155    #[inline]
156    #[must_use]
157    fn leaf_index_get(&self, idx: usize) -> Option<NodeRef> {
158        self.leaf_index.get(idx).copied().flatten()
159    }
160
161    /// Whether `point_idx` is currently mapped — `O(1)` `Vec` index.
162    #[inline]
163    #[must_use]
164    fn leaf_index_contains(&self, idx: usize) -> bool {
165        matches!(self.leaf_index.get(idx), Some(Some(_)))
166    }
167
168    /// Root node reference, or `None` when the tree is empty.
169    #[must_use]
170    pub fn root(&self) -> Option<NodeRef> {
171        self.root
172    }
173
174    /// Configured dimensionality (compile-time `D`).
175    #[must_use]
176    pub const fn dimension(&self) -> usize {
177        D
178    }
179
180    /// Number of distinct points currently stored (each leaf counts
181    /// once regardless of its mass).
182    #[must_use]
183    #[inline]
184    pub fn distinct_point_count(&self) -> usize {
185        self.distinct_count
186    }
187
188    /// Borrow the underlying node store. Used by tests and
189    /// persistence.
190    #[must_use]
191    pub fn store(&self) -> &NodeStore<D> {
192        &self.store
193    }
194
195    /// Whether the tree currently stores `point_idx`.
196    #[must_use]
197    #[inline]
198    pub fn contains(&self, point_idx: usize) -> bool {
199        self.leaf_index_contains(point_idx)
200    }
201
202    /// `NodeRef` of the leaf currently mapped to `point_idx`, or
203    /// `None` when the reservoir has evicted it (or never admitted
204    /// it). Exposed so forest-level scoring paths that need to walk
205    /// the leaf's ancestor chain (e.g. `codisp`-style probe scoring)
206    /// can locate the leaf without re-traversing from the root.
207    #[must_use]
208    #[inline]
209    pub fn leaf_of(&self, point_idx: usize) -> Option<NodeRef> {
210        self.leaf_index_get(point_idx)
211    }
212
213    /// Maximum depth from the root to any leaf, or `None` when the
214    /// tree is empty. Used by tests and diagnostics to verify the
215    /// expected `O(log n)` depth bound under uniform-random inserts.
216    #[must_use]
217    pub fn max_depth(&self) -> Option<usize> {
218        let root = self.root?;
219        Some(self.subtree_depth(root, 0).unwrap_or(0))
220    }
221
222    /// Recursive helper for [`max_depth`](Self::max_depth).
223    fn subtree_depth(&self, n: NodeRef, depth: usize) -> RcfResult<usize> {
224        match self.store.view(n)? {
225            NodeView::Leaf(_) => Ok(depth),
226            NodeView::Internal(i) => {
227                let l = self.subtree_depth(i.left, depth + 1)?;
228                let r = self.subtree_depth(i.right, depth + 1)?;
229                Ok(l.max(r))
230            }
231        }
232    }
233
234    /// Insert `point` (registered under `point_idx` in the caller's
235    /// point store) into the tree.
236    ///
237    /// When an identical point is already present, the existing leaf's
238    /// mass is incremented and `point_idx` is mapped to that same
239    /// leaf — duplicate-point handling per Guha 2016.
240    ///
241    /// # Errors
242    ///
243    /// - [`RcfError::DimensionMismatch`] when `point.len() != self.dimension()`.
244    /// - [`RcfError::NaNValue`] when `point` contains a non-finite component.
245    /// - [`RcfError::InvalidConfig`] when `point_idx` is already present.
246    /// - [`RcfError::InvalidConfig`] when the underlying store runs out
247    ///   of internal or leaf slots.
248    pub fn add<R, P>(
249        &mut self,
250        point_idx: usize,
251        point: &[f64],
252        points: &P,
253        rng: &mut R,
254    ) -> RcfResult<()>
255    where
256        R: Rng + ?Sized,
257        P: PointAccessor<D> + ?Sized,
258    {
259        ensure_dim(point, D)?;
260        ensure_finite(point)?;
261        if self.leaf_index_contains(point_idx) {
262            return Err(RcfError::InvalidConfig(
263                format!("RandomCutTree::add: point_idx {point_idx} already present").into(),
264            ));
265        }
266
267        let Some(root) = self.root else {
268            let leaf = self.store.add_leaf(point_idx, None, 1)?;
269            self.leaf_index_set(point_idx, leaf);
270            self.root = Some(leaf);
271            return Ok(());
272        };
273
274        self.insert_at(root, point_idx, point, points, rng)?;
275        Ok(())
276    }
277
278    /// Recursive insertion. Returns the (possibly new) `NodeRef` that
279    /// occupies the slot previously held by `n`.
280    // `drop(n_bbox)` releases the borrow of `self.store` held by the
281    // `Cow<BoundingBox<D>>` before we call `&mut self` helpers below.
282    // With `[f64; D]` storage `Cow<BoundingBox<D>>` no longer
283    // implements `Drop` directly, so clippy flags the explicit drops
284    // as redundant — they are not, they terminate the borrow.
285    #[allow(clippy::drop_non_drop)]
286    fn insert_at<R, P>(
287        &mut self,
288        n: NodeRef,
289        point_idx: usize,
290        point: &[f64],
291        points: &P,
292        rng: &mut R,
293    ) -> RcfResult<NodeRef>
294    where
295        R: Rng + ?Sized,
296        P: PointAccessor<D> + ?Sized,
297    {
298        let n_bbox = self.bbox_of(n, points)?;
299
300        // Sample over the *virtual* augmented bbox — no allocation
301        // unless we end up materialising the cached bbox for the
302        // splice path below.
303        if n_bbox.augmented_range_sum(point) <= 0.0 {
304            // Coincident with an existing leaf — bump its mass.
305            drop(n_bbox);
306            return self.absorb_duplicate(n, point_idx);
307        }
308
309        let cut = n_bbox.augmented_random_cut(point, rng)?;
310        let isolates = isolates_point(&cut, point, &n_bbox);
311
312        if isolates {
313            // Materialise the augmented bbox once — it becomes the
314            // cached bbox of the new internal node we are about to
315            // splice in (so the allocation is unavoidable here).
316            let mut augmented: BoundingBox<D> = (*n_bbox).clone();
317            drop(n_bbox);
318            augmented.extend(point)?;
319            self.splice_new_internal(n, point_idx, point, cut, augmented)
320        } else {
321            drop(n_bbox);
322            self.descend_or_split(n, point_idx, point, points, rng)
323        }
324    }
325
326    /// Absorb a duplicate point into an existing leaf and propagate
327    /// the mass increment up to the root.
328    fn absorb_duplicate(&mut self, n: NodeRef, point_idx: usize) -> RcfResult<NodeRef> {
329        if !n.is_leaf() {
330            return Err(RcfError::InvalidConfig(
331                "RandomCutTree::absorb_duplicate called on internal node".into(),
332            ));
333        }
334        let mass = self.store.view(n)?.mass();
335        self.store.set_mass(n, mass + 1)?;
336        self.leaf_index_set(point_idx, n);
337        let mut cur = n;
338        while let Some(parent) = self.store.parent(cur)? {
339            let m = self.store.view(parent)?.mass();
340            self.store.set_mass(parent, m + 1)?;
341            cur = parent;
342        }
343        Ok(n)
344    }
345
346    /// Splice a new internal node at the position currently held by
347    /// `n`, using `cut` as the partition over `bbox`.
348    fn splice_new_internal(
349        &mut self,
350        n: NodeRef,
351        point_idx: usize,
352        point: &[f64],
353        cut: Cut,
354        bbox: BoundingBox<D>,
355    ) -> RcfResult<NodeRef> {
356        let new_leaf = self.store.add_leaf(point_idx, None, 1)?;
357        self.leaf_index_set(point_idx, new_leaf);
358
359        let parent_of_n = self.store.parent(n)?;
360        let n_mass = self.store.view(n)?.mass();
361        let new_mass = n_mass + 1;
362
363        let (left, right) = if cut.left_of(point) {
364            (new_leaf, n)
365        } else {
366            (n, new_leaf)
367        };
368
369        let new_internal =
370            self.store
371                .add_internal(cut, bbox, left, right, parent_of_n, new_mass)?;
372
373        self.store.set_parent(new_leaf, Some(new_internal))?;
374        self.store.set_parent(n, Some(new_internal))?;
375
376        self.replace_in_parent(parent_of_n, n, new_internal)?;
377
378        if let Some(parent) = parent_of_n {
379            self.update_ancestors_after_insert(parent, point)?;
380        }
381
382        Ok(new_internal)
383    }
384
385    /// Cut did not isolate — descend into the matching subtree along
386    /// the existing internal cut, updating mass and bbox on the way
387    /// back up.
388    fn descend_or_split<R, P>(
389        &mut self,
390        n: NodeRef,
391        point_idx: usize,
392        point: &[f64],
393        points: &P,
394        rng: &mut R,
395    ) -> RcfResult<NodeRef>
396    where
397        R: Rng + ?Sized,
398        P: PointAccessor<D> + ?Sized,
399    {
400        let (existing_cut, left, right) = match self.store.view(n)? {
401            NodeView::Internal(i) => (i.cut, i.left, i.right),
402            NodeView::Leaf(_) => {
403                // Cut over a non-degenerate augmented bbox always
404                // isolates one of two distinct points; reaching here
405                // would indicate a bug elsewhere.
406                return Err(RcfError::InvalidConfig(
407                    "RandomCutTree::descend_or_split: leaf reached without isolation".into(),
408                ));
409            }
410        };
411
412        let go_left = existing_cut.left_of(point);
413        let next = if go_left { left } else { right };
414        let new_child = self.insert_at(next, point_idx, point, points, rng)?;
415
416        // Update child pointer in case the descendant call replaced its slot.
417        if new_child.raw() != next.raw() {
418            if go_left {
419                self.store.set_internal_children(n, new_child, right)?;
420            } else {
421                self.store.set_internal_children(n, left, new_child)?;
422            }
423            self.store.set_parent(new_child, Some(n))?;
424        }
425
426        // Mass + bbox already updated by insert_at via the
427        // splice/absorb helpers' own ancestor walks.
428        Ok(n)
429    }
430
431    /// Update the parent's child pointer after `old` is replaced by
432    /// `new`, or update `self.root` when there is no parent.
433    fn replace_in_parent(
434        &mut self,
435        parent_opt: Option<NodeRef>,
436        old: NodeRef,
437        new: NodeRef,
438    ) -> RcfResult<()> {
439        let Some(parent) = parent_opt else {
440            self.root = Some(new);
441            return Ok(());
442        };
443        let (l, r) = match self.store.view(parent)? {
444            NodeView::Internal(i) => (i.left, i.right),
445            NodeView::Leaf(_) => {
446                return Err(RcfError::InvalidConfig(
447                    "RandomCutTree::replace_in_parent: parent is leaf".into(),
448                ));
449            }
450        };
451        if l.raw() == old.raw() {
452            self.store.set_internal_children(parent, new, r)?;
453        } else if r.raw() == old.raw() {
454            self.store.set_internal_children(parent, l, new)?;
455        } else {
456            return Err(RcfError::InvalidConfig(
457                "RandomCutTree::replace_in_parent: orphan".into(),
458            ));
459        }
460        Ok(())
461    }
462
463    /// Walk from `start` up to the root incrementing mass and
464    /// extending each cached internal bounding box by `point` —
465    /// in-place extend via [`NodeStore::view_mut`] avoids the
466    /// `bbox.clone() + set_internal_bbox` round trip on every level.
467    fn update_ancestors_after_insert(&mut self, start: NodeRef, point: &[f64]) -> RcfResult<()> {
468        let mut cur = Some(start);
469        while let Some(node) = cur {
470            let parent = match self.store.view_mut(node)? {
471                NodeViewMut::Internal(i) => {
472                    i.mass += 1;
473                    i.bbox.extend(point)?;
474                    i.parent
475                }
476                NodeViewMut::Leaf(l) => {
477                    l.mass += 1;
478                    l.parent
479                }
480            };
481            cur = parent;
482        }
483        Ok(())
484    }
485
486    /// Remove the leaf currently mapped to `point_idx`. When the leaf
487    /// has mass `> 1` (duplicate point), only the mass is decremented
488    /// and the leaf is preserved.
489    ///
490    /// # Errors
491    ///
492    /// - [`RcfError::InvalidConfig`] when `point_idx` is not present.
493    /// - [`RcfError::OutOfBounds`] when the underlying store cannot
494    ///   look up a referenced point.
495    pub fn delete<P>(&mut self, point_idx: usize, points: &P) -> RcfResult<()>
496    where
497        P: PointAccessor<D> + ?Sized,
498    {
499        let leaf = self.leaf_index_get(point_idx).ok_or_else(|| {
500            RcfError::InvalidConfig(
501                format!("RandomCutTree::delete: point_idx {point_idx} not present").into(),
502            )
503        })?;
504
505        let leaf_mass = self.store.view(leaf)?.mass();
506        if leaf_mass > 1 {
507            self.store.set_mass(leaf, leaf_mass - 1)?;
508            let mut cur = leaf;
509            while let Some(parent) = self.store.parent(cur)? {
510                let m = self.store.view(parent)?.mass();
511                self.store.set_mass(parent, m - 1)?;
512                cur = parent;
513            }
514            // Drop this idx from the reverse index — the leaf still
515            // represents the other copies of the point under their own
516            // point_idx, but `point_idx` itself is gone.
517            self.leaf_index_clear(point_idx);
518            return Ok(());
519        }
520
521        // mass == 1: remove the leaf entirely.
522        let parent_of_leaf = self.store.parent(leaf)?;
523        self.leaf_index_clear(point_idx);
524        self.store.delete(leaf)?;
525
526        let Some(parent) = parent_of_leaf else {
527            self.root = None;
528            return Ok(());
529        };
530
531        let sibling = match self.store.view(parent)? {
532            NodeView::Internal(i) => {
533                if i.left.raw() == leaf.raw() {
534                    i.right
535                } else {
536                    i.left
537                }
538            }
539            NodeView::Leaf(_) => {
540                return Err(RcfError::InvalidConfig(
541                    "RandomCutTree::delete: parent is leaf".into(),
542                ));
543            }
544        };
545
546        let grandparent = self.store.parent(parent)?;
547        self.store.set_parent(sibling, grandparent)?;
548        self.store.delete(parent)?;
549
550        match grandparent {
551            None => {
552                self.root = Some(sibling);
553            }
554            Some(gp) => {
555                let (l, r) = match self.store.view(gp)? {
556                    NodeView::Internal(i) => (i.left, i.right),
557                    NodeView::Leaf(_) => {
558                        return Err(RcfError::InvalidConfig(
559                            "RandomCutTree::delete: grandparent is leaf".into(),
560                        ));
561                    }
562                };
563                if l.raw() == parent.raw() {
564                    self.store.set_internal_children(gp, sibling, r)?;
565                } else if r.raw() == parent.raw() {
566                    self.store.set_internal_children(gp, l, sibling)?;
567                } else {
568                    return Err(RcfError::InvalidConfig(
569                        "RandomCutTree::delete: parent not registered with grandparent".into(),
570                    ));
571                }
572                self.recompute_ancestors(gp, points)?;
573            }
574        }
575
576        Ok(())
577    }
578
579    /// Walk from `start` up to the root decrementing mass and
580    /// recomputing each internal bounding box from its (post-merge)
581    /// children.
582    fn recompute_ancestors<P>(&mut self, start: NodeRef, points: &P) -> RcfResult<()>
583    where
584        P: PointAccessor<D> + ?Sized,
585    {
586        let mut cur = Some(start);
587        while let Some(node) = cur {
588            let m = self.store.view(node)?.mass();
589            self.store.set_mass(node, m - 1)?;
590            if node.is_internal() {
591                let new_bbox = self.compute_internal_bbox(node, points)?;
592                self.store.set_internal_bbox(node, new_bbox)?;
593            }
594            cur = self.store.parent(node)?;
595        }
596        Ok(())
597    }
598
599    /// Compute the bounding box of an internal node from its children.
600    fn compute_internal_bbox<P>(&self, n: NodeRef, points: &P) -> RcfResult<BoundingBox<D>>
601    where
602        P: PointAccessor<D> + ?Sized,
603    {
604        let (left, right) = match self.store.view(n)? {
605            NodeView::Internal(i) => (i.left, i.right),
606            NodeView::Leaf(_) => {
607                return Err(RcfError::InvalidConfig(
608                    "RandomCutTree::compute_internal_bbox called on leaf".into(),
609                ));
610            }
611        };
612        let lb = self.bbox_of(left, points)?;
613        let rb = self.bbox_of(right, points)?;
614        Ok(lb.merged(&rb))
615    }
616
617    /// Borrow or build the bounding box of any node (internal: cached
618    /// — borrowed via [`Cow::Borrowed`] to skip an allocation; leaf:
619    /// built on the fly from the point store entry as
620    /// [`Cow::Owned`]).
621    fn bbox_of<'a, P>(&'a self, n: NodeRef, points: &'a P) -> RcfResult<Cow<'a, BoundingBox<D>>>
622    where
623        P: PointAccessor<D> + ?Sized,
624    {
625        match self.store.view(n)? {
626            NodeView::Internal(i) => Ok(Cow::Borrowed(&i.bbox)),
627            NodeView::Leaf(l) => {
628                let p = points.point(l.point_idx).ok_or(RcfError::OutOfBounds {
629                    index: l.point_idx,
630                    len: 0,
631                })?;
632                Ok(Cow::Owned(BoundingBox::<D>::from_point(p)?))
633            }
634        }
635    }
636
637    /// Non-mutating codisp estimate — walks root → leaf following
638    /// the stored cuts and accumulates the maximum per-depth ratio
639    /// `sibling_mass / subtree_mass` across the descent path.
640    ///
641    /// Matches the shape of the mutating [`crate::RandomCutForest::score_codisp`]
642    /// walk but **without** inserting the probe into the reservoir.
643    /// The classical `codisp` promises a frozen baseline (AWS /
644    /// rrcf); the mutating path's insert+delete cycle leaves
645    /// reservoir points evicted permanently, eroding that baseline
646    /// across long eval streams (observed on NAB `rogue_hold`:
647    /// score drifts from 0.69 to 0.20 after ~5 k probes).
648    ///
649    /// This path preserves the frozen-baseline promise exactly,
650    /// takes `&self` so it parallelises across trees, and costs
651    /// `O(depth · D)` per call — typically cheaper than the
652    /// mutating walk since there is no reservoir housekeeping.
653    ///
654    /// # Errors
655    ///
656    /// - [`RcfError::EmptyForest`] when the tree is empty.
657    /// - [`RcfError::DimensionMismatch`] when `point.len() != D`.
658    /// - [`RcfError::NaNValue`] when `point` contains a non-finite
659    ///   component.
660    pub fn codisp_stateless(&self, point: &[f64]) -> RcfResult<f64> {
661        ensure_dim(point, D)?;
662        ensure_finite(point)?;
663        let Some(root) = self.root else {
664            return Err(RcfError::EmptyForest);
665        };
666        let mut cur = root;
667        let mut max_disp = 0.0_f64;
668        loop {
669            match self.store.view(cur)? {
670                crate::tree::NodeView::Leaf(_) => return Ok(max_disp),
671                crate::tree::NodeView::Internal(i) => {
672                    let (next, sibling) = if i.cut.left_of(point) {
673                        (i.left, i.right)
674                    } else {
675                        (i.right, i.left)
676                    };
677                    let next_mass = self.store.view(next)?.mass();
678                    let sibling_mass = self.store.view(sibling)?.mass();
679                    if next_mass > 0 {
680                        #[allow(clippy::cast_precision_loss)]
681                        let disp = sibling_mass as f64 / next_mass as f64;
682                        if disp > max_disp {
683                            max_disp = disp;
684                        }
685                    }
686                    cur = next;
687                }
688            }
689        }
690    }
691
692    /// Walk from the root to the leaf matching `point` along the
693    /// stored cuts, dispatching callbacks to `visitor`. Returns the
694    /// visitor's final output.
695    ///
696    /// # Errors
697    ///
698    /// - [`RcfError::EmptyForest`] when the tree is empty.
699    /// - [`RcfError::DimensionMismatch`] when `point.len() != self.dimension()`.
700    /// - [`RcfError::NaNValue`] when `point` contains a non-finite component.
701    pub fn traverse<V: Visitor<D>>(&self, point: &[f64], mut visitor: V) -> RcfResult<V::Output> {
702        ensure_dim(point, D)?;
703        ensure_finite(point)?;
704        let Some(root) = self.root else {
705            return Err(RcfError::EmptyForest);
706        };
707        self.traverse_inner(root, point, 0, &mut visitor)?;
708        Ok(visitor.result())
709    }
710
711    /// Recursive helper for [`traverse`](Self::traverse).
712    fn traverse_inner<V: Visitor<D>>(
713        &self,
714        n: NodeRef,
715        point: &[f64],
716        depth: usize,
717        visitor: &mut V,
718    ) -> RcfResult<()> {
719        match self.store.view(n)? {
720            NodeView::Leaf(l) => {
721                visitor.accept_leaf(depth, l.mass, l.point_idx);
722                Ok(())
723            }
724            NodeView::Internal(i) => {
725                if visitor.needs_per_dim_prob() {
726                    let (prob, per_dim) = i.bbox.probability_of_cut(point)?;
727                    visitor.accept_internal(depth, i.mass, &i.cut, &i.bbox, prob, &per_dim);
728                } else {
729                    let prob = i.bbox.total_probability_of_cut(point)?;
730                    visitor.accept_internal(depth, i.mass, &i.cut, &i.bbox, prob, &[]);
731                }
732                let next = if i.cut.left_of(point) {
733                    i.left
734                } else {
735                    i.right
736                };
737                self.traverse_inner(next, point, depth + 1, visitor)
738            }
739        }
740    }
741}
742
743/// Whether `cut` strictly isolates `point` from `n_bbox` (i.e. `point`
744/// ends up alone on one side of the hyperplane).
745#[inline]
746fn isolates_point<const D: usize>(cut: &Cut, point: &[f64], n_bbox: &BoundingBox<D>) -> bool {
747    let d = cut.dim();
748    let v = cut.value();
749    let p_d = point[d];
750    let n_min = n_bbox.min()[d];
751    let n_max = n_bbox.max()[d];
752    // Case 1: point alone on the left (`p_d <= v < n_min`).
753    if p_d <= v && v < n_min {
754        return true;
755    }
756    // Case 2: point alone on the right (`n_max <= v < p_d`).
757    if n_max <= v && v < p_d {
758        return true;
759    }
760    false
761}
762
763#[cfg(test)]
764#[allow(clippy::float_cmp)] // Tests assert exact equality on bounding-box bounds.
765mod tests {
766    use super::*;
767    use rand::SeedableRng;
768    use rand_chacha::ChaCha8Rng;
769
770    use crate::visitor::Visitor;
771
772    /// Visitor that records the path it observed. Generic over `D` so
773    /// the trait impl matches every test tree dimensionality.
774    struct PathRecorder {
775        depths: Vec<usize>,
776        leaf_idx: Option<usize>,
777    }
778    impl PathRecorder {
779        fn new() -> Self {
780            Self {
781                depths: Vec::new(),
782                leaf_idx: None,
783            }
784        }
785    }
786    impl<const D: usize> Visitor<D> for PathRecorder {
787        type Output = (Vec<usize>, Option<usize>);
788        fn accept_internal(
789            &mut self,
790            depth: usize,
791            _mass: u64,
792            _cut: &Cut,
793            _bbox: &BoundingBox<D>,
794            _prob: f64,
795            _per_dim: &[f64],
796        ) {
797            self.depths.push(depth);
798        }
799        fn accept_leaf(&mut self, depth: usize, _mass: u64, point_idx: usize) {
800            self.depths.push(depth);
801            self.leaf_idx = Some(point_idx);
802        }
803        fn result(self) -> Self::Output {
804            (self.depths, self.leaf_idx)
805        }
806    }
807
808    fn fresh_rng(seed: u64) -> ChaCha8Rng {
809        ChaCha8Rng::seed_from_u64(seed)
810    }
811
812    #[test]
813    fn new_rejects_zero_dimension() {
814        assert!(matches!(
815            RandomCutTree::<0>::new(8).unwrap_err(),
816            RcfError::InvalidConfig(_)
817        ));
818    }
819
820    #[test]
821    fn new_rejects_zero_capacity() {
822        assert!(matches!(
823            RandomCutTree::<2>::new(0).unwrap_err(),
824            RcfError::InvalidConfig(_)
825        ));
826    }
827
828    #[test]
829    fn empty_tree_root_is_none() {
830        let t = RandomCutTree::<2>::new(8).unwrap();
831        assert!(t.root().is_none());
832        assert_eq!(t.distinct_point_count(), 0);
833        assert_eq!(t.dimension(), 2);
834    }
835
836    #[test]
837    fn add_first_point_creates_root_leaf() {
838        let mut t = RandomCutTree::<2>::new(8).unwrap();
839        let points: Vec<[f64; 2]> = vec![[1.0, 2.0]];
840        let mut rng = fresh_rng(42);
841        t.add(0, &points[0], &points, &mut rng).unwrap();
842        let root = t.root().unwrap();
843        assert!(root.is_leaf());
844        assert_eq!(t.distinct_point_count(), 1);
845    }
846
847    #[test]
848    fn add_two_points_creates_internal_root() {
849        let mut t = RandomCutTree::<2>::new(8).unwrap();
850        let p0 = [0.0_f64, 0.0];
851        let p1 = [1.0_f64, 1.0];
852        let points = vec![p0, p1];
853        let mut rng = fresh_rng(7);
854        t.add(0, &p0, &points, &mut rng).unwrap();
855        t.add(1, &p1, &points, &mut rng).unwrap();
856        let root = t.root().unwrap();
857        assert!(root.is_internal());
858        assert_eq!(t.distinct_point_count(), 2);
859        assert_eq!(t.store().view(root).unwrap().mass(), 2);
860    }
861
862    #[test]
863    fn add_rejects_dimension_mismatch() {
864        let mut t = RandomCutTree::<3>::new(8).unwrap();
865        let points: Vec<[f64; 3]> = vec![];
866        let mut rng = fresh_rng(1);
867        assert!(matches!(
868            t.add(0, &[1.0, 2.0], &points, &mut rng).unwrap_err(),
869            RcfError::DimensionMismatch { .. }
870        ));
871    }
872
873    #[test]
874    fn add_rejects_non_finite() {
875        let mut t = RandomCutTree::<2>::new(8).unwrap();
876        let points: Vec<[f64; 2]> = vec![];
877        let mut rng = fresh_rng(1);
878        assert!(matches!(
879            t.add(0, &[1.0, f64::NAN], &points, &mut rng).unwrap_err(),
880            RcfError::NaNValue
881        ));
882    }
883
884    #[test]
885    fn add_rejects_duplicate_point_idx() {
886        let mut t = RandomCutTree::<2>::new(8).unwrap();
887        let p = [0.0_f64, 0.0];
888        let points = vec![p];
889        let mut rng = fresh_rng(1);
890        t.add(0, &p, &points, &mut rng).unwrap();
891        assert!(matches!(
892            t.add(0, &p, &points, &mut rng).unwrap_err(),
893            RcfError::InvalidConfig(_)
894        ));
895    }
896
897    #[test]
898    fn duplicate_coordinate_increments_leaf_mass() {
899        let mut t = RandomCutTree::<2>::new(8).unwrap();
900        let p = [3.0_f64, 4.0];
901        let mut points = vec![p];
902        let mut rng = fresh_rng(1);
903        t.add(0, &p, &points, &mut rng).unwrap();
904        points.push(p);
905        t.add(1, &p, &points, &mut rng).unwrap();
906        let root = t.root().unwrap();
907        assert!(root.is_leaf(), "single-point tree stays a leaf");
908        assert_eq!(t.store().view(root).unwrap().mass(), 2);
909        assert_eq!(t.distinct_point_count(), 2);
910    }
911
912    #[test]
913    fn add_many_distinct_points_keeps_mass_invariant() {
914        let mut t = RandomCutTree::<4>::new(64).unwrap();
915        let mut rng = fresh_rng(99);
916        let mut points: Vec<[f64; 4]> = Vec::new();
917        for i in 0_u32..32 {
918            let f = f64::from(i);
919            let p = [f, f * 2.0, f * 0.5, -f];
920            points.push(p);
921            t.add(i as usize, &p, &points, &mut rng).unwrap();
922        }
923        let root = t.root().unwrap();
924        assert!(root.is_internal());
925        assert_eq!(t.store().view(root).unwrap().mass(), 32);
926        assert_eq!(t.distinct_point_count(), 32);
927    }
928
929    #[test]
930    fn delete_unknown_point_idx_is_err() {
931        let mut t = RandomCutTree::<2>::new(8).unwrap();
932        let points: Vec<[f64; 2]> = vec![];
933        assert!(matches!(
934            t.delete(99, &points).unwrap_err(),
935            RcfError::InvalidConfig(_)
936        ));
937    }
938
939    #[test]
940    fn delete_root_leaf_clears_tree() {
941        let mut t = RandomCutTree::<2>::new(8).unwrap();
942        let p = [1.0_f64, 2.0];
943        let points = vec![p];
944        let mut rng = fresh_rng(1);
945        t.add(0, &p, &points, &mut rng).unwrap();
946        t.delete(0, &points).unwrap();
947        assert!(t.root().is_none());
948        assert_eq!(t.distinct_point_count(), 0);
949        assert_eq!(t.store().live_count(), 0);
950    }
951
952    #[test]
953    fn delete_one_of_two_points_leaves_sibling_as_root() {
954        let mut t = RandomCutTree::<2>::new(8).unwrap();
955        let p0 = [0.0_f64, 0.0];
956        let p1 = [1.0_f64, 1.0];
957        let points = vec![p0, p1];
958        let mut rng = fresh_rng(7);
959        t.add(0, &p0, &points, &mut rng).unwrap();
960        t.add(1, &p1, &points, &mut rng).unwrap();
961        t.delete(0, &points).unwrap();
962        let root = t.root().unwrap();
963        assert!(root.is_leaf());
964        let leaf = t.store().leaf(root).unwrap();
965        assert_eq!(leaf.point_idx, 1);
966        assert_eq!(leaf.mass, 1);
967        assert_eq!(t.store().live_count(), 1);
968    }
969
970    #[test]
971    fn delete_duplicate_decrements_mass_only() {
972        let mut t = RandomCutTree::<2>::new(8).unwrap();
973        let p = [1.0_f64, 1.0];
974        let mut points = vec![p, p];
975        let mut rng = fresh_rng(1);
976        t.add(0, &p, &points, &mut rng).unwrap();
977        t.add(1, &p, &points, &mut rng).unwrap();
978        let root = t.root().unwrap();
979        assert_eq!(t.store().view(root).unwrap().mass(), 2);
980        t.delete(1, &points).unwrap();
981        assert_eq!(t.store().view(root).unwrap().mass(), 1);
982        assert!(t.root().unwrap().is_leaf());
983        assert!(!t.contains(1));
984        assert!(t.contains(0));
985        assert_eq!(t.store().view(root).unwrap().mass(), 1);
986        points.pop();
987    }
988
989    #[test]
990    fn delete_then_re_add_keeps_capacity_bounded() {
991        let mut t = RandomCutTree::<2>::new(4).unwrap();
992        let mut rng = fresh_rng(11);
993        let mut points: Vec<[f64; 2]> = Vec::new();
994        let mut live_idxs: Vec<usize> = Vec::new();
995        for i in 0_u32..4 {
996            let f = f64::from(i);
997            let p = [f, f + 1.0];
998            points.push(p);
999            let idx = i as usize;
1000            t.add(idx, &p, &points, &mut rng).unwrap();
1001            live_idxs.push(idx);
1002        }
1003        for _ in 0..10 {
1004            let old = live_idxs.remove(0);
1005            t.delete(old, &points).unwrap();
1006            let new_idx = points.len();
1007            let p = points[old];
1008            points.push(p);
1009            t.add(new_idx, &p, &points, &mut rng).unwrap();
1010            live_idxs.push(new_idx);
1011        }
1012        assert_eq!(t.distinct_point_count(), 4);
1013    }
1014
1015    #[test]
1016    fn traverse_empty_tree_is_err() {
1017        let t = RandomCutTree::<2>::new(4).unwrap();
1018        let v = PathRecorder::new();
1019        assert!(matches!(
1020            t.traverse(&[1.0, 2.0], v).unwrap_err(),
1021            RcfError::EmptyForest
1022        ));
1023    }
1024
1025    #[test]
1026    fn traverse_single_leaf_visits_only_leaf() {
1027        let mut t = RandomCutTree::<2>::new(4).unwrap();
1028        let p = [1.0_f64, 2.0];
1029        let points = vec![p];
1030        let mut rng = fresh_rng(0);
1031        t.add(0, &p, &points, &mut rng).unwrap();
1032        let v = PathRecorder::new();
1033        let (depths, leaf_idx) = t.traverse(&p, v).unwrap();
1034        assert_eq!(depths, vec![0]);
1035        assert_eq!(leaf_idx, Some(0));
1036    }
1037
1038    #[test]
1039    fn traverse_visits_in_root_to_leaf_order() {
1040        let mut t = RandomCutTree::<2>::new(8).unwrap();
1041        let p0 = [0.0_f64, 0.0];
1042        let p1 = [10.0_f64, 10.0];
1043        let points = vec![p0, p1];
1044        let mut rng = fresh_rng(123);
1045        t.add(0, &p0, &points, &mut rng).unwrap();
1046        t.add(1, &p1, &points, &mut rng).unwrap();
1047        let v = PathRecorder::new();
1048        let (depths, leaf_idx) = t.traverse(&p1, v).unwrap();
1049        assert!(depths.array_windows::<2>().all(|[a, b]| a < b));
1050        assert!(leaf_idx == Some(0) || leaf_idx == Some(1));
1051    }
1052
1053    #[test]
1054    fn traverse_rejects_dim_mismatch_and_nan() {
1055        let mut t = RandomCutTree::<2>::new(4).unwrap();
1056        let p = [1.0_f64, 2.0];
1057        let points = vec![p];
1058        let mut rng = fresh_rng(0);
1059        t.add(0, &p, &points, &mut rng).unwrap();
1060        assert!(matches!(
1061            t.traverse(&[1.0], PathRecorder::new()).unwrap_err(),
1062            RcfError::DimensionMismatch { .. }
1063        ));
1064        assert!(matches!(
1065            t.traverse(&[f64::NAN, 0.0], PathRecorder::new())
1066                .unwrap_err(),
1067            RcfError::NaNValue
1068        ));
1069    }
1070
1071    #[test]
1072    fn deterministic_tree_under_fixed_seed() {
1073        fn build(seed: u64) -> RandomCutTree<3> {
1074            let mut t = RandomCutTree::<3>::new(64).unwrap();
1075            let mut rng = fresh_rng(seed);
1076            let mut points: Vec<[f64; 3]> = Vec::new();
1077            for i in 0_u32..16 {
1078                let f = f64::from(i);
1079                let p = [f, f * 2.0, f * 0.25 + 1.0];
1080                points.push(p);
1081                t.add(i as usize, &p, &points, &mut rng).unwrap();
1082            }
1083            t
1084        }
1085        let t1 = build(42);
1086        let t2 = build(42);
1087        assert_eq!(t1.store().live_count(), t2.store().live_count());
1088        assert_eq!(t1.distinct_point_count(), t2.distinct_point_count());
1089        assert_eq!(
1090            t1.store().live_internal_count(),
1091            t2.store().live_internal_count()
1092        );
1093        assert_eq!(t1.store().live_leaf_count(), t2.store().live_leaf_count());
1094    }
1095
1096    #[test]
1097    fn isolates_point_above_separated() {
1098        let mut bbox = BoundingBox::<2>::from_point(&[0.0, 0.0]).unwrap();
1099        bbox.extend(&[1.0, 1.0]).unwrap();
1100        assert!(isolates_point(&Cut::new(0, 5.0), &[10.0, 0.5], &bbox));
1101        assert!(!isolates_point(&Cut::new(0, 0.5), &[10.0, 0.5], &bbox));
1102    }
1103
1104    #[test]
1105    fn isolates_point_below_separated() {
1106        let mut bbox = BoundingBox::<2>::from_point(&[0.0, 0.0]).unwrap();
1107        bbox.extend(&[1.0, 1.0]).unwrap();
1108        assert!(isolates_point(&Cut::new(1, -2.0), &[0.5, -10.0], &bbox));
1109    }
1110
1111    #[test]
1112    fn isolates_point_inside_never() {
1113        let mut bbox = BoundingBox::<2>::from_point(&[0.0, 0.0]).unwrap();
1114        bbox.extend(&[10.0, 10.0]).unwrap();
1115        assert!(!isolates_point(&Cut::new(0, 5.0), &[5.0, 5.0], &bbox));
1116    }
1117
1118    // Property test: under uniform-random insertions, the tree depth
1119    // stays within `4 · ⌈log₂ N⌉ + 4` — the "expected `O(log n)`"
1120    // bound from Guha 2016 §2 with a generous constant to absorb
1121    // the natural variance of random cuts.
1122    proptest::proptest! {
1123        #![proptest_config(proptest::test_runner::Config { cases: 32, ..proptest::test_runner::Config::default() })]
1124        #[test]
1125        fn depth_bounded_under_uniform_inserts(seed in 0_u64..10_000) {
1126            const N: usize = 64;
1127            const D: usize = 4;
1128            #[allow(clippy::cast_possible_truncation)]
1129            let mut t = RandomCutTree::<D>::new(N as u32).unwrap();
1130            let mut rng = ChaCha8Rng::seed_from_u64(seed);
1131            let mut points: Vec<[f64; D]> = Vec::with_capacity(N);
1132
1133            for i in 0..N {
1134                let mut p = [0.0_f64; D];
1135                for slot in &mut p {
1136                    *slot = <ChaCha8Rng as rand::RngExt>::random::<f64>(&mut rng) * 100.0;
1137                }
1138                points.push(p);
1139                t.add(i, &p, &points, &mut rng).unwrap();
1140            }
1141
1142            #[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation, clippy::cast_sign_loss)]
1143            let log2_n = (N as f64).log2().ceil() as usize;
1144            let bound = 4 * log2_n + 4;
1145            let depth = t.max_depth().expect("non-empty tree");
1146            proptest::prop_assert!(
1147                depth <= bound,
1148                "depth = {} exceeds bound 4·⌈log₂ {}⌉ + 4 = {} (seed={})",
1149                depth, N, bound, seed,
1150            );
1151        }
1152    }
1153}