Skip to main content

anomstream_core/domain/
cut.rs

1//! A single random cut: a hyperplane perpendicular to one axis at a
2//! given coordinate.
3//!
4//! [`Cut::random_cut`] picks the dimension proportionally to the
5//! per-dimension range of the [`BoundingBox`] (Guha et al. 2016, §2)
6//! then samples the cut value uniformly in `[min_d, max_d]`. Callers
7//! pass any [`rand::Rng`] so reproducibility is purely a function
8//! of the seed they own.
9//!
10//! When the bounding box is fully degenerate (every dimension has
11//! zero range) [`Cut::random_cut`] returns [`RcfError::EmptyBoundingBox`]
12//! — there is no meaningful cut.
13
14use rand::{Rng, RngExt};
15
16use crate::domain::bounding_box::BoundingBox;
17use crate::error::{RcfError, RcfResult};
18
19/// A random cut along one dimension at a given coordinate.
20#[derive(Debug, Clone, Copy, PartialEq)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22pub struct Cut {
23    /// Dimension that the cut is perpendicular to.
24    dim: usize,
25    /// Cut coordinate in `[bbox.min[dim], bbox.max[dim]]`.
26    value: f64,
27}
28
29impl Cut {
30    /// Build a cut explicitly. Used by tests and the persistence layer.
31    #[must_use]
32    pub fn new(dim: usize, value: f64) -> Self {
33        Self { dim, value }
34    }
35
36    /// Cut dimension.
37    #[must_use]
38    #[inline]
39    pub fn dim(&self) -> usize {
40        self.dim
41    }
42
43    /// Cut coordinate.
44    #[must_use]
45    #[inline]
46    pub fn value(&self) -> f64 {
47        self.value
48    }
49
50    /// Decide which side of the cut a `point` lies on.
51    ///
52    /// Returns `true` when `point[dim] <= self.value`. The exact-tie
53    /// behaviour is consistent with the AWS reference (left subtree
54    /// gets the equal-or-below half).
55    ///
56    /// # Panics
57    ///
58    /// Panics when `self.dim() >= point.len()` — call sites always
59    /// size-check the point first via
60    /// [`crate::domain::ensure_dim`].
61    #[must_use]
62    #[inline]
63    pub fn left_of(&self, point: &[f64]) -> bool {
64        point[self.dim] <= self.value
65    }
66
67    /// Sample a random cut over `bbox` with the dimension chosen
68    /// proportionally to its range and the value uniform in
69    /// `[min_d, max_d]`.
70    ///
71    /// # Errors
72    ///
73    /// Returns [`RcfError::EmptyBoundingBox`] when every per-dimension
74    /// range is zero (no cut is meaningful).
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// use rand::SeedableRng;
80    /// use rand_chacha::ChaCha8Rng;
81    /// use anomstream_core::domain::{BoundingBox, Cut};
82    ///
83    /// let mut rng = ChaCha8Rng::seed_from_u64(42);
84    /// let mut bbox = BoundingBox::<2>::from_point(&[0.0, 0.0]).unwrap();
85    /// bbox.extend(&[1.0, 4.0]).unwrap(); // dim 1 has 4× the range of dim 0
86    ///
87    /// let cut = Cut::random_cut(&bbox, &mut rng).unwrap();
88    /// assert!(cut.dim() < bbox.dim());
89    /// let lo = bbox.min()[cut.dim()];
90    /// let hi = bbox.max()[cut.dim()];
91    /// assert!(cut.value() >= lo && cut.value() <= hi);
92    /// ```
93    pub fn random_cut<const D: usize, R: Rng + ?Sized>(
94        bbox: &BoundingBox<D>,
95        rng: &mut R,
96    ) -> RcfResult<Self> {
97        let total = bbox.range_sum();
98        if total <= 0.0 {
99            return Err(RcfError::EmptyBoundingBox);
100        }
101
102        let mut target = rng.random::<f64>() * total;
103        let mut chosen = 0_usize;
104        for d in 0..bbox.dim() {
105            let r = bbox.range_at(d);
106            if target < r {
107                chosen = d;
108                break;
109            }
110            target -= r;
111            chosen = d;
112        }
113
114        let lo = bbox.min()[chosen];
115        let hi = bbox.max()[chosen];
116        let value = if (hi - lo).abs() < f64::EPSILON {
117            // Edge case: chosen dim was zero-range (only possible
118            // through floating accumulation drift). Fall back to the
119            // axis coordinate so the cut is degenerate but well-formed.
120            lo
121        } else {
122            lo + rng.random::<f64>() * (hi - lo)
123        };
124
125        Ok(Self { dim: chosen, value })
126    }
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132    use rand::SeedableRng;
133    use rand_chacha::ChaCha8Rng;
134
135    fn unit_box<const D: usize>() -> BoundingBox<D> {
136        let mut b = BoundingBox::<D>::from_point(&vec![0.0; D]).unwrap();
137        b.extend(&vec![1.0; D]).unwrap();
138        b
139    }
140
141    #[test]
142    fn left_of_strictly_below_is_left() {
143        let cut = Cut::new(0, 0.5);
144        assert!(cut.left_of(&[0.4, 9.9]));
145    }
146
147    #[test]
148    fn left_of_at_value_is_left() {
149        let cut = Cut::new(1, 2.0);
150        assert!(cut.left_of(&[1.0, 2.0]));
151    }
152
153    #[test]
154    fn left_of_strictly_above_is_right() {
155        let cut = Cut::new(0, 0.5);
156        assert!(!cut.left_of(&[0.6, 9.9]));
157    }
158
159    #[test]
160    fn random_cut_is_in_range() {
161        let mut rng = ChaCha8Rng::seed_from_u64(1);
162        let bbox: BoundingBox<3> = unit_box();
163        for _ in 0..100 {
164            let cut = Cut::random_cut(&bbox, &mut rng).unwrap();
165            assert!(cut.dim() < bbox.dim());
166            assert!(cut.value() >= bbox.min()[cut.dim()]);
167            assert!(cut.value() <= bbox.max()[cut.dim()]);
168        }
169    }
170
171    #[test]
172    fn random_cut_degenerate_box_fails() {
173        let mut rng = ChaCha8Rng::seed_from_u64(1);
174        let bbox = BoundingBox::<2>::from_point(&[0.0, 0.0]).unwrap();
175        let err = Cut::random_cut(&bbox, &mut rng).unwrap_err();
176        assert!(matches!(err, RcfError::EmptyBoundingBox));
177    }
178
179    #[test]
180    fn random_cut_dim_distribution_proportional_to_range() {
181        let mut bbox = BoundingBox::<2>::from_point(&[0.0, 0.0]).unwrap();
182        bbox.extend(&[1.0, 9.0]).unwrap();
183
184        let mut rng = ChaCha8Rng::seed_from_u64(7);
185        let mut counts = [0_u32; 2];
186        let trials = 5000;
187        for _ in 0..trials {
188            let cut = Cut::random_cut(&bbox, &mut rng).unwrap();
189            counts[cut.dim()] += 1;
190        }
191        let p1 = f64::from(counts[1]) / f64::from(trials);
192        assert!(
193            (0.87..=0.93).contains(&p1),
194            "dim-1 share = {p1} outside [0.87, 0.93]"
195        );
196    }
197
198    #[test]
199    fn random_cut_deterministic_for_same_seed() {
200        let bbox: BoundingBox<4> = unit_box();
201        let mut rng_a = ChaCha8Rng::seed_from_u64(42);
202        let mut rng_b = ChaCha8Rng::seed_from_u64(42);
203        for _ in 0..20 {
204            let a = Cut::random_cut(&bbox, &mut rng_a).unwrap();
205            let b = Cut::random_cut(&bbox, &mut rng_b).unwrap();
206            assert_eq!(a, b);
207        }
208    }
209
210    #[test]
211    fn cut_constructor_accessors() {
212        let c = Cut::new(7, 1.25);
213        assert_eq!(c.dim(), 7);
214        assert!((c.value() - 1.25).abs() < f64::EPSILON);
215    }
216}