1#[derive(Debug, Clone, PartialEq)]
7pub struct Bin<T> {
8 pub x0: f64,
10 pub x1: f64,
12 pub values: Vec<T>,
14}
15
16impl<T> Bin<T> {
17 pub fn len(&self) -> usize {
19 self.values.len()
20 }
21
22 pub fn is_empty(&self) -> bool {
24 self.values.is_empty()
25 }
26}
27
28pub struct BinGenerator<T> {
30 value: Option<Box<dyn Fn(&T) -> f64 + Send + Sync>>,
31 domain: Option<(f64, f64)>,
32 thresholds: ThresholdStrategy,
33}
34
35impl<T> std::fmt::Debug for BinGenerator<T> {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 f.debug_struct("BinGenerator")
38 .field("value", &self.value.is_some())
39 .field("domain", &self.domain)
40 .field("thresholds", &self.thresholds)
41 .finish()
42 }
43}
44
45#[derive(Debug, Clone, Default)]
47pub enum ThresholdStrategy {
48 Count(usize),
50 Values(Vec<f64>),
52 #[default]
54 Sturges,
55 FreedmanDiaconis,
57 Scott,
59}
60
61impl<T: Clone> Default for BinGenerator<T> {
62 fn default() -> Self {
63 Self::new()
64 }
65}
66
67impl<T: Clone> BinGenerator<T> {
68 pub fn new() -> Self {
70 Self {
71 value: None,
72 domain: None,
73 thresholds: ThresholdStrategy::Sturges,
74 }
75 }
76
77 pub fn value<F>(mut self, f: F) -> Self
79 where
80 F: Fn(&T) -> f64 + Send + Sync + 'static,
81 {
82 self.value = Some(Box::new(f));
83 self
84 }
85
86 pub fn domain(mut self, min: f64, max: f64) -> Self {
88 self.domain = Some((min, max));
89 self
90 }
91
92 pub fn thresholds_count(mut self, count: usize) -> Self {
94 self.thresholds = ThresholdStrategy::Count(count);
95 self
96 }
97
98 pub fn thresholds(mut self, values: Vec<f64>) -> Self {
100 self.thresholds = ThresholdStrategy::Values(values);
101 self
102 }
103
104 pub fn thresholds_sturges(mut self) -> Self {
106 self.thresholds = ThresholdStrategy::Sturges;
107 self
108 }
109
110 pub fn thresholds_freedman_diaconis(mut self) -> Self {
112 self.thresholds = ThresholdStrategy::FreedmanDiaconis;
113 self
114 }
115
116 pub fn thresholds_scott(mut self) -> Self {
118 self.thresholds = ThresholdStrategy::Scott;
119 self
120 }
121
122 pub fn generate(&self, data: &[T]) -> Vec<Bin<T>> {
124 if data.is_empty() {
125 return vec![];
126 }
127
128 let values: Vec<f64> = data
130 .iter()
131 .map(|x| {
132 if let Some(ref accessor) = self.value {
133 accessor(x)
134 } else {
135 0.0
138 }
139 })
140 .collect();
141
142 let (min, max) = self.domain.unwrap_or_else(|| {
144 let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
145 let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
146 (min, max)
147 });
148
149 if min == max {
150 return vec![Bin {
152 x0: min,
153 x1: max,
154 values: data.to_vec(),
155 }];
156 }
157
158 let thresholds = self.compute_thresholds(&values, min, max);
160
161 let mut bins: Vec<Bin<T>> = Vec::with_capacity(thresholds.len() - 1);
163 for i in 0..thresholds.len() - 1 {
164 bins.push(Bin {
165 x0: thresholds[i],
166 x1: thresholds[i + 1],
167 values: Vec::new(),
168 });
169 }
170
171 for (value, item) in values.iter().zip(data.iter()) {
173 let idx = self.find_bin_index(&thresholds, *value);
174 if idx < bins.len() {
175 bins[idx].values.push(item.clone());
176 }
177 }
178
179 bins
180 }
181
182 fn compute_thresholds(&self, values: &[f64], min: f64, max: f64) -> Vec<f64> {
183 let n = values.len();
184 let range = max - min;
185
186 let count = match &self.thresholds {
187 ThresholdStrategy::Count(c) => *c,
188 ThresholdStrategy::Values(v) => return v.clone(),
189 ThresholdStrategy::Sturges => {
190 ((n as f64).log2() + 1.0).ceil() as usize
192 }
193 ThresholdStrategy::FreedmanDiaconis => {
194 let mut sorted = values.to_vec();
196 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
197 let q1 = sorted[n / 4];
198 let q3 = sorted[3 * n / 4];
199 let iqr = q3 - q1;
200 if iqr > 0.0 {
201 let bin_width = 2.0 * iqr / (n as f64).powf(1.0 / 3.0);
202 (range / bin_width).ceil() as usize
203 } else {
204 ((n as f64).log2() + 1.0).ceil() as usize
205 }
206 }
207 ThresholdStrategy::Scott => {
208 let mean: f64 = values.iter().sum::<f64>() / n as f64;
210 let variance: f64 =
211 values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
212 let std = variance.sqrt();
213 if std > 0.0 {
214 let bin_width = 3.5 * std / (n as f64).powf(1.0 / 3.0);
215 (range / bin_width).ceil() as usize
216 } else {
217 ((n as f64).log2() + 1.0).ceil() as usize
218 }
219 }
220 };
221
222 let step = range / count as f64;
224 (0..=count).map(|i| min + i as f64 * step).collect()
225 }
226
227 fn find_bin_index(&self, thresholds: &[f64], value: f64) -> usize {
228 let mut lo = 0;
230 let mut hi = thresholds.len() - 1;
231 while lo < hi {
232 let mid = lo + (hi - lo) / 2;
233 if thresholds[mid + 1] <= value {
234 lo = mid + 1;
235 } else {
236 hi = mid;
237 }
238 }
239 lo.min(thresholds.len().saturating_sub(2))
241 }
242}
243
244pub fn bin(data: &[f64], count: usize) -> Vec<Bin<f64>> {
258 BinGenerator::new()
259 .value(|x: &f64| *x)
260 .thresholds_count(count)
261 .generate(data)
262}
263
264pub fn threshold_sturges(n: usize) -> usize {
266 ((n as f64).log2() + 1.0).ceil() as usize
267}
268
269pub fn nice_bin_edges(min: f64, max: f64, count: usize) -> Vec<f64> {
281 if min == max || count == 0 {
282 return vec![min, max];
283 }
284
285 let range = max - min;
286 let rough_step = range / count as f64;
287
288 let exp = rough_step.log10().floor();
290 let fraction = rough_step / 10_f64.powf(exp);
291 let nice_fraction = if fraction <= 1.0 {
292 1.0
293 } else if fraction <= 2.0 {
294 2.0
295 } else if fraction <= 5.0 {
296 5.0
297 } else {
298 10.0
299 };
300 let step = nice_fraction * 10_f64.powf(exp);
301
302 let nice_min = (min / step).floor() * step;
304 let nice_max = (max / step).ceil() * step;
305
306 let mut edges = Vec::new();
308 let mut edge = nice_min;
309 while edge <= nice_max + step * 0.5 {
310 edges.push(edge);
311 edge += step;
312 }
313
314 edges
315}
316
317#[cfg(test)]
318mod tests {
319 use super::*;
320
321 #[test]
322 fn test_bin_basic() {
323 let data = vec![1.0, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 6.0, 7.0, 8.0, 9.0];
324 let bins = bin(&data, 4);
325
326 assert_eq!(bins.len(), 4);
327 assert!(bins[0].x0 < bins[0].x1);
328
329 let total: usize = bins.iter().map(|b| b.values.len()).sum();
331 assert_eq!(total, data.len());
332 }
333
334 #[test]
335 fn test_bin_generator() {
336 #[derive(Clone)]
337 struct Point {
338 x: f64,
339 }
340
341 let data: Vec<Point> = (0..100).map(|i| Point { x: i as f64 }).collect();
342
343 let bins = BinGenerator::new()
344 .value(|p: &Point| p.x)
345 .thresholds_count(10)
346 .generate(&data);
347
348 assert_eq!(bins.len(), 10);
349 }
350
351 #[test]
352 fn test_nice_bin_edges() {
353 let edges = nice_bin_edges(0.3, 9.7, 5);
354 assert!(edges[0] <= 0.3);
355 assert!(*edges.last().unwrap() >= 9.7);
356 }
357
358 #[test]
359 fn test_threshold_sturges() {
360 assert_eq!(threshold_sturges(10), 5);
361 assert_eq!(threshold_sturges(100), 8);
362 assert_eq!(threshold_sturges(1000), 11);
363 }
364}