anomstream_core/domain/
cut.rs1use rand::{Rng, RngExt};
15
16use crate::domain::bounding_box::BoundingBox;
17use crate::error::{RcfError, RcfResult};
18
19#[derive(Debug, Clone, Copy, PartialEq)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22pub struct Cut {
23 dim: usize,
25 value: f64,
27}
28
29impl Cut {
30 #[must_use]
32 pub fn new(dim: usize, value: f64) -> Self {
33 Self { dim, value }
34 }
35
36 #[must_use]
38 #[inline]
39 pub fn dim(&self) -> usize {
40 self.dim
41 }
42
43 #[must_use]
45 #[inline]
46 pub fn value(&self) -> f64 {
47 self.value
48 }
49
50 #[must_use]
62 #[inline]
63 pub fn left_of(&self, point: &[f64]) -> bool {
64 point[self.dim] <= self.value
65 }
66
67 pub fn random_cut<const D: usize, R: Rng + ?Sized>(
94 bbox: &BoundingBox<D>,
95 rng: &mut R,
96 ) -> RcfResult<Self> {
97 let total = bbox.range_sum();
98 if total <= 0.0 {
99 return Err(RcfError::EmptyBoundingBox);
100 }
101
102 let mut target = rng.random::<f64>() * total;
103 let mut chosen = 0_usize;
104 for d in 0..bbox.dim() {
105 let r = bbox.range_at(d);
106 if target < r {
107 chosen = d;
108 break;
109 }
110 target -= r;
111 chosen = d;
112 }
113
114 let lo = bbox.min()[chosen];
115 let hi = bbox.max()[chosen];
116 let value = if (hi - lo).abs() < f64::EPSILON {
117 lo
121 } else {
122 lo + rng.random::<f64>() * (hi - lo)
123 };
124
125 Ok(Self { dim: chosen, value })
126 }
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132 use rand::SeedableRng;
133 use rand_chacha::ChaCha8Rng;
134
135 fn unit_box<const D: usize>() -> BoundingBox<D> {
136 let mut b = BoundingBox::<D>::from_point(&vec![0.0; D]).unwrap();
137 b.extend(&vec![1.0; D]).unwrap();
138 b
139 }
140
141 #[test]
142 fn left_of_strictly_below_is_left() {
143 let cut = Cut::new(0, 0.5);
144 assert!(cut.left_of(&[0.4, 9.9]));
145 }
146
147 #[test]
148 fn left_of_at_value_is_left() {
149 let cut = Cut::new(1, 2.0);
150 assert!(cut.left_of(&[1.0, 2.0]));
151 }
152
153 #[test]
154 fn left_of_strictly_above_is_right() {
155 let cut = Cut::new(0, 0.5);
156 assert!(!cut.left_of(&[0.6, 9.9]));
157 }
158
159 #[test]
160 fn random_cut_is_in_range() {
161 let mut rng = ChaCha8Rng::seed_from_u64(1);
162 let bbox: BoundingBox<3> = unit_box();
163 for _ in 0..100 {
164 let cut = Cut::random_cut(&bbox, &mut rng).unwrap();
165 assert!(cut.dim() < bbox.dim());
166 assert!(cut.value() >= bbox.min()[cut.dim()]);
167 assert!(cut.value() <= bbox.max()[cut.dim()]);
168 }
169 }
170
171 #[test]
172 fn random_cut_degenerate_box_fails() {
173 let mut rng = ChaCha8Rng::seed_from_u64(1);
174 let bbox = BoundingBox::<2>::from_point(&[0.0, 0.0]).unwrap();
175 let err = Cut::random_cut(&bbox, &mut rng).unwrap_err();
176 assert!(matches!(err, RcfError::EmptyBoundingBox));
177 }
178
179 #[test]
180 fn random_cut_dim_distribution_proportional_to_range() {
181 let mut bbox = BoundingBox::<2>::from_point(&[0.0, 0.0]).unwrap();
182 bbox.extend(&[1.0, 9.0]).unwrap();
183
184 let mut rng = ChaCha8Rng::seed_from_u64(7);
185 let mut counts = [0_u32; 2];
186 let trials = 5000;
187 for _ in 0..trials {
188 let cut = Cut::random_cut(&bbox, &mut rng).unwrap();
189 counts[cut.dim()] += 1;
190 }
191 let p1 = f64::from(counts[1]) / f64::from(trials);
192 assert!(
193 (0.87..=0.93).contains(&p1),
194 "dim-1 share = {p1} outside [0.87, 0.93]"
195 );
196 }
197
198 #[test]
199 fn random_cut_deterministic_for_same_seed() {
200 let bbox: BoundingBox<4> = unit_box();
201 let mut rng_a = ChaCha8Rng::seed_from_u64(42);
202 let mut rng_b = ChaCha8Rng::seed_from_u64(42);
203 for _ in 0..20 {
204 let a = Cut::random_cut(&bbox, &mut rng_a).unwrap();
205 let b = Cut::random_cut(&bbox, &mut rng_b).unwrap();
206 assert_eq!(a, b);
207 }
208 }
209
210 #[test]
211 fn cut_constructor_accessors() {
212 let c = Cut::new(7, 1.25);
213 assert_eq!(c.dim(), 7);
214 assert!((c.value() - 1.25).abs() < f64::EPSILON);
215 }
216}