Skip to main content

irithyll_core/histogram/
uniform.rs

1//! Equal-width (uniform) binning strategy.
2//!
3//! The simplest binning approach: observe the data range (min, max), then
4//! divide it into `n_bins` equal-width intervals. Fast and deterministic,
5//! but can produce poor splits when data is heavily skewed. Serves as the
6//! baseline strategy and fallback when quantile/k-means binning is unnecessary.
7
8use alloc::boxed::Box;
9use alloc::vec;
10use alloc::vec::Vec;
11
12use crate::histogram::{BinEdges, BinningStrategy};
13
14/// Equal-width binning: observes min/max, divides range into equal bins.
15///
16/// # Example
17///
18/// ```ignore
19/// let mut binner = UniformBinning::new();
20/// for &v in &[1.0, 3.0, 5.0, 7.0, 9.0] {
21///     binner.observe(v);
22/// }
23/// let edges = binner.compute_edges(4);
24/// // edges.edges == [3.0, 5.0, 7.0]  (dividing [1.0, 9.0] into 4 bins)
25/// ```
26#[derive(Debug, Clone)]
27pub struct UniformBinning {
28    min: f64,
29    max: f64,
30    count: u64,
31}
32
33impl UniformBinning {
34    /// Create a new `UniformBinning` in its initial (empty) state.
35    pub fn new() -> Self {
36        Self {
37            min: f64::MAX,
38            max: f64::MIN,
39            count: 0,
40        }
41    }
42}
43
44impl Default for UniformBinning {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50impl BinningStrategy for UniformBinning {
51    /// Observe a single value, updating the running min/max.
52    fn observe(&mut self, value: f64) {
53        if value < self.min {
54            self.min = value;
55        }
56        if value > self.max {
57            self.max = value;
58        }
59        self.count += 1;
60    }
61
62    /// Compute bin edges by dividing `[min, max]` into `n_bins` equal-width intervals.
63    ///
64    /// Returns `n_bins - 1` internal boundary values. If fewer than 2 values have
65    /// been observed or all values are identical, returns a single edge at min
66    /// (producing 2 bins with all data in one of them), which is the degenerate
67    /// but safe fallback.
68    fn compute_edges(&self, n_bins: usize) -> BinEdges {
69        // Degenerate cases: not enough data to define a range
70        if self.count < 2 || (self.max - self.min).abs() < f64::EPSILON {
71            return BinEdges {
72                edges: vec![self.min],
73            };
74        }
75
76        let range = self.max - self.min;
77        let bin_width = range / n_bins as f64;
78        let n_edges = n_bins.saturating_sub(1);
79
80        let edges: Vec<f64> = (1..=n_edges)
81            .map(|i| self.min + bin_width * i as f64)
82            .collect();
83
84        BinEdges { edges }
85    }
86
87    /// Reset to the initial empty state.
88    fn reset(&mut self) {
89        self.min = f64::MAX;
90        self.max = f64::MIN;
91        self.count = 0;
92    }
93
94    /// Create a fresh (empty) instance.
95    fn clone_fresh(&self) -> Box<dyn BinningStrategy> {
96        Box::new(UniformBinning::new())
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103
104    #[test]
105    fn new_starts_empty() {
106        let b = UniformBinning::new();
107        assert_eq!(b.count, 0);
108        assert_eq!(b.min, f64::MAX);
109        assert_eq!(b.max, f64::MIN);
110    }
111
112    #[test]
113    fn observe_updates_min_max() {
114        let mut b = UniformBinning::new();
115        b.observe(5.0);
116        assert_eq!(b.min, 5.0);
117        assert_eq!(b.max, 5.0);
118        assert_eq!(b.count, 1);
119
120        b.observe(2.0);
121        assert_eq!(b.min, 2.0);
122        assert_eq!(b.max, 5.0);
123
124        b.observe(8.0);
125        assert_eq!(b.min, 2.0);
126        assert_eq!(b.max, 8.0);
127        assert_eq!(b.count, 3);
128    }
129
130    #[test]
131    fn edges_evenly_spaced() {
132        let mut b = UniformBinning::new();
133        for &v in &[0.0, 2.0, 4.0, 6.0, 8.0, 10.0] {
134            b.observe(v);
135        }
136
137        // 5 bins over [0.0, 10.0] -> width = 2.0, edges at 2.0, 4.0, 6.0, 8.0
138        let edges = b.compute_edges(5);
139        assert_eq!(edges.edges.len(), 4);
140        assert_eq!(edges.n_bins(), 5);
141
142        let expected = [2.0, 4.0, 6.0, 8.0];
143        for (got, want) in edges.edges.iter().zip(expected.iter()) {
144            assert!((got - want).abs() < 1e-12, "edge {got} != expected {want}");
145        }
146
147        // Verify equal spacing
148        let width = edges.edges[1] - edges.edges[0];
149        for i in 1..edges.edges.len() {
150            let gap = edges.edges[i] - edges.edges[i - 1];
151            assert!(
152                (gap - width).abs() < 1e-12,
153                "uneven spacing: gap {gap} != width {width}"
154            );
155        }
156    }
157
158    #[test]
159    fn edges_two_bins() {
160        let mut b = UniformBinning::new();
161        b.observe(0.0);
162        b.observe(10.0);
163
164        // 2 bins -> 1 edge at midpoint
165        let edges = b.compute_edges(2);
166        assert_eq!(edges.edges.len(), 1);
167        assert!((edges.edges[0] - 5.0).abs() < 1e-12);
168    }
169
170    #[test]
171    fn edges_single_value_degenerate() {
172        let mut b = UniformBinning::new();
173        b.observe(42.0);
174        b.observe(42.0);
175        b.observe(42.0);
176
177        let edges = b.compute_edges(4);
178        // All values identical -> degenerate fallback: single edge at min
179        assert_eq!(edges.edges.len(), 1);
180        assert!((edges.edges[0] - 42.0).abs() < 1e-12);
181        assert_eq!(edges.n_bins(), 2);
182    }
183
184    #[test]
185    fn edges_too_few_observations() {
186        let mut b = UniformBinning::new();
187        b.observe(5.0);
188
189        // Only 1 observation -> degenerate
190        let edges = b.compute_edges(4);
191        assert_eq!(edges.edges.len(), 1);
192        assert!((edges.edges[0] - 5.0).abs() < 1e-12);
193    }
194
195    #[test]
196    fn edges_empty_state() {
197        let b = UniformBinning::new();
198        let edges = b.compute_edges(4);
199        // count == 0 -> degenerate, single edge at f64::MAX
200        assert_eq!(edges.edges.len(), 1);
201        assert_eq!(edges.n_bins(), 2);
202    }
203
204    #[test]
205    fn reset_clears_state() {
206        let mut b = UniformBinning::new();
207        b.observe(1.0);
208        b.observe(10.0);
209        assert_eq!(b.count, 2);
210
211        b.reset();
212        assert_eq!(b.count, 0);
213        assert_eq!(b.min, f64::MAX);
214        assert_eq!(b.max, f64::MIN);
215    }
216
217    #[test]
218    fn clone_fresh_returns_empty() {
219        let mut b = UniformBinning::new();
220        b.observe(1.0);
221        b.observe(10.0);
222
223        let fresh = b.clone_fresh();
224        // Fresh should produce degenerate edges (empty state)
225        let edges = fresh.compute_edges(4);
226        assert_eq!(edges.edges.len(), 1);
227    }
228
229    #[test]
230    fn many_bins_fine_grained() {
231        let mut b = UniformBinning::new();
232        b.observe(0.0);
233        b.observe(100.0);
234
235        let edges = b.compute_edges(100);
236        assert_eq!(edges.edges.len(), 99);
237        assert_eq!(edges.n_bins(), 100);
238
239        // Each edge should be at 1.0, 2.0, ..., 99.0
240        for (i, &e) in edges.edges.iter().enumerate() {
241            let expected = (i + 1) as f64;
242            assert!(
243                (e - expected).abs() < 1e-10,
244                "edge[{i}] = {e}, expected {expected}"
245            );
246        }
247    }
248
249    #[test]
250    fn negative_range() {
251        let mut b = UniformBinning::new();
252        b.observe(-10.0);
253        b.observe(-2.0);
254
255        // 4 bins over [-10.0, -2.0], width = 2.0, edges at -8.0, -6.0, -4.0
256        let edges = b.compute_edges(4);
257        assert_eq!(edges.edges.len(), 3);
258
259        let expected = [-8.0, -6.0, -4.0];
260        for (got, want) in edges.edges.iter().zip(expected.iter()) {
261            assert!((got - want).abs() < 1e-12, "edge {got} != expected {want}");
262        }
263    }
264
265    #[test]
266    fn single_bin_no_edges() {
267        let mut b = UniformBinning::new();
268        b.observe(0.0);
269        b.observe(10.0);
270
271        // 1 bin -> 0 internal edges
272        let edges = b.compute_edges(1);
273        assert_eq!(edges.edges.len(), 0);
274        assert_eq!(edges.n_bins(), 1);
275    }
276
277    #[test]
278    fn find_bin_with_uniform_edges() {
279        let mut b = UniformBinning::new();
280        for &v in &[0.0, 10.0] {
281            b.observe(v);
282        }
283        let edges = b.compute_edges(5);
284        // edges at 2.0, 4.0, 6.0, 8.0
285
286        assert_eq!(edges.find_bin(0.0), 0); // below first edge
287        assert_eq!(edges.find_bin(1.9), 0); // still in first bin
288        assert_eq!(edges.find_bin(2.0), 1); // exact edge -> next bin
289        assert_eq!(edges.find_bin(3.0), 1); // between 2.0 and 4.0
290        assert_eq!(edges.find_bin(6.0), 3); // exact edge at 6.0 -> bin 3
291        assert_eq!(edges.find_bin(9.0), 4); // above last edge
292        assert_eq!(edges.find_bin(10.0), 4); // at max
293    }
294}