1#![warn(missing_docs, clippy::all, clippy::pedantic)]
48
49use crate::{
50 histogram::{errors::BinsBuildError, Bins, Edges},
51 quantile::{interpolate::Nearest, Quantile1dExt, QuantileExt},
52};
53use ndarray::prelude::*;
54use noisy_float::types::n64;
55use num_traits::{FromPrimitive, NumOps, Zero};
56
57pub trait BinsBuildingStrategy {
67 #[allow(missing_docs)]
68 type Elem: Ord;
69 fn from_array(array: &ArrayRef<Self::Elem, Ix1>) -> Result<Self, BinsBuildError>
79 where
80 Self: std::marker::Sized;
81
82 fn build(&self) -> Bins<Self::Elem>;
86
87 fn n_bins(&self) -> usize;
89}
90
91#[derive(Debug)]
92struct EquiSpaced<T> {
93 bin_width: T,
94 min: T,
95 max: T,
96}
97
98#[derive(Debug)]
112pub struct Sqrt<T> {
113 builder: EquiSpaced<T>,
114}
115
116#[derive(Debug)]
133pub struct Rice<T> {
134 builder: EquiSpaced<T>,
135}
136
137#[derive(Debug)]
153pub struct Sturges<T> {
154 builder: EquiSpaced<T>,
155}
156
157#[derive(Debug)]
179pub struct FreedmanDiaconis<T> {
180 builder: EquiSpaced<T>,
181}
182
183#[derive(Debug)]
184enum SturgesOrFD<T> {
185 Sturges(Sturges<T>),
186 FreedmanDiaconis(FreedmanDiaconis<T>),
187}
188
189#[derive(Debug)]
209pub struct Auto<T> {
210 builder: SturgesOrFD<T>,
211}
212
213impl<T> EquiSpaced<T>
214where
215 T: Ord + Clone + FromPrimitive + NumOps + Zero,
216{
217 fn new(bin_width: T, min: T, max: T) -> Result<Self, BinsBuildError> {
220 if (bin_width <= T::zero()) || (min >= max) {
221 Err(BinsBuildError::Strategy)
222 } else {
223 Ok(Self {
224 bin_width,
225 min,
226 max,
227 })
228 }
229 }
230
231 fn build(&self) -> Bins<T> {
232 let n_bins = self.n_bins();
233 let mut edges: Vec<T> = vec![];
234 for i in 0..=n_bins {
235 let edge = self.min.clone() + T::from_usize(i).unwrap() * self.bin_width.clone();
236 edges.push(edge);
237 }
238 Bins::new(Edges::from(edges))
239 }
240
241 fn n_bins(&self) -> usize {
242 let mut max_edge = self.min.clone();
243 let mut n_bins = 0;
244 while max_edge <= self.max {
245 max_edge = max_edge + self.bin_width.clone();
246 n_bins += 1;
247 }
248 n_bins
249 }
250
251 fn bin_width(&self) -> T {
252 self.bin_width.clone()
253 }
254}
255
256impl<T> BinsBuildingStrategy for Sqrt<T>
257where
258 T: Ord + Clone + FromPrimitive + NumOps + Zero,
259{
260 type Elem = T;
261
262 fn from_array(a: &ArrayRef<T, Ix1>) -> Result<Self, BinsBuildError> {
266 let n_elems = a.len();
267 #[allow(clippy::cast_precision_loss)]
270 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
272 let n_bins = (n_elems as f64).sqrt().round() as usize;
273 let min = a.min()?;
274 let max = a.max()?;
275 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
276 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
277 Ok(Self { builder })
278 }
279
280 fn build(&self) -> Bins<T> {
281 self.builder.build()
282 }
283
284 fn n_bins(&self) -> usize {
285 self.builder.n_bins()
286 }
287}
288
289impl<T> Sqrt<T>
290where
291 T: Ord + Clone + FromPrimitive + NumOps + Zero,
292{
293 pub fn bin_width(&self) -> T {
295 self.builder.bin_width()
296 }
297}
298
299impl<T> BinsBuildingStrategy for Rice<T>
300where
301 T: Ord + Clone + FromPrimitive + NumOps + Zero,
302{
303 type Elem = T;
304
305 fn from_array(a: &ArrayRef<T, Ix1>) -> Result<Self, BinsBuildError> {
309 let n_elems = a.len();
310 #[allow(clippy::cast_precision_loss)]
313 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
315 let n_bins = (2. * (n_elems as f64).powf(1. / 3.)).round() as usize;
316 let min = a.min()?;
317 let max = a.max()?;
318 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
319 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
320 Ok(Self { builder })
321 }
322
323 fn build(&self) -> Bins<T> {
324 self.builder.build()
325 }
326
327 fn n_bins(&self) -> usize {
328 self.builder.n_bins()
329 }
330}
331
332impl<T> Rice<T>
333where
334 T: Ord + Clone + FromPrimitive + NumOps + Zero,
335{
336 pub fn bin_width(&self) -> T {
338 self.builder.bin_width()
339 }
340}
341
342impl<T> BinsBuildingStrategy for Sturges<T>
343where
344 T: Ord + Clone + FromPrimitive + NumOps + Zero,
345{
346 type Elem = T;
347
348 fn from_array(a: &ArrayRef<T, Ix1>) -> Result<Self, BinsBuildError> {
352 let n_elems = a.len();
353 #[allow(clippy::cast_precision_loss)]
356 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
358 let n_bins = (n_elems as f64).log2().round() as usize + 1;
359 let min = a.min()?;
360 let max = a.max()?;
361 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
362 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
363 Ok(Self { builder })
364 }
365
366 fn build(&self) -> Bins<T> {
367 self.builder.build()
368 }
369
370 fn n_bins(&self) -> usize {
371 self.builder.n_bins()
372 }
373}
374
375impl<T> Sturges<T>
376where
377 T: Ord + Clone + FromPrimitive + NumOps + Zero,
378{
379 pub fn bin_width(&self) -> T {
381 self.builder.bin_width()
382 }
383}
384
385impl<T> BinsBuildingStrategy for FreedmanDiaconis<T>
386where
387 T: Ord + Clone + FromPrimitive + NumOps + Zero,
388{
389 type Elem = T;
390
391 fn from_array(a: &ArrayRef<T, Ix1>) -> Result<Self, BinsBuildError> {
395 let n_points = a.len();
396 if n_points == 0 {
397 return Err(BinsBuildError::EmptyInput);
398 }
399
400 let mut a_copy = a.to_owned();
401 let first_quartile = a_copy.quantile_mut(n64(0.25), &Nearest).unwrap();
402 let third_quartile = a_copy.quantile_mut(n64(0.75), &Nearest).unwrap();
403 let iqr = third_quartile - first_quartile;
404
405 let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr);
406 let min = a.min()?;
407 let max = a.max()?;
408 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
409 Ok(Self { builder })
410 }
411
412 fn build(&self) -> Bins<T> {
413 self.builder.build()
414 }
415
416 fn n_bins(&self) -> usize {
417 self.builder.n_bins()
418 }
419}
420
421impl<T> FreedmanDiaconis<T>
422where
423 T: Ord + Clone + FromPrimitive + NumOps + Zero,
424{
425 fn compute_bin_width(n_bins: usize, iqr: T) -> T {
426 #[allow(clippy::cast_precision_loss)]
429 let denominator = (n_bins as f64).powf(1. / 3.);
430 T::from_usize(2).unwrap() * iqr / T::from_f64(denominator).unwrap()
431 }
432
433 pub fn bin_width(&self) -> T {
435 self.builder.bin_width()
436 }
437}
438
439impl<T> BinsBuildingStrategy for Auto<T>
440where
441 T: Ord + Clone + FromPrimitive + NumOps + Zero,
442{
443 type Elem = T;
444
445 fn from_array(a: &ArrayRef<T, Ix1>) -> Result<Self, BinsBuildError> {
449 let fd_builder = FreedmanDiaconis::from_array(&a);
450 let sturges_builder = Sturges::from_array(&a);
451 match (fd_builder, sturges_builder) {
452 (Err(_), Ok(sturges_builder)) => {
453 let builder = SturgesOrFD::Sturges(sturges_builder);
454 Ok(Self { builder })
455 }
456 (Ok(fd_builder), Err(_)) => {
457 let builder = SturgesOrFD::FreedmanDiaconis(fd_builder);
458 Ok(Self { builder })
459 }
460 (Ok(fd_builder), Ok(sturges_builder)) => {
461 let builder = if fd_builder.bin_width() > sturges_builder.bin_width() {
462 SturgesOrFD::Sturges(sturges_builder)
463 } else {
464 SturgesOrFD::FreedmanDiaconis(fd_builder)
465 };
466 Ok(Self { builder })
467 }
468 (Err(err), Err(_)) => Err(err),
469 }
470 }
471
472 fn build(&self) -> Bins<T> {
473 match &self.builder {
475 SturgesOrFD::FreedmanDiaconis(b) => b.build(),
476 SturgesOrFD::Sturges(b) => b.build(),
477 }
478 }
479
480 fn n_bins(&self) -> usize {
481 match &self.builder {
483 SturgesOrFD::FreedmanDiaconis(b) => b.n_bins(),
484 SturgesOrFD::Sturges(b) => b.n_bins(),
485 }
486 }
487}
488
489impl<T> Auto<T>
490where
491 T: Ord + Clone + FromPrimitive + NumOps + Zero,
492{
493 pub fn bin_width(&self) -> T {
495 match &self.builder {
497 SturgesOrFD::FreedmanDiaconis(b) => b.bin_width(),
498 SturgesOrFD::Sturges(b) => b.bin_width(),
499 }
500 }
501}
502
503fn compute_bin_width<T>(min: T, max: T, n_bins: usize) -> T
510where
511 T: Ord + Clone + FromPrimitive + NumOps + Zero,
512{
513 let range = max - min;
514 range / T::from_usize(n_bins).unwrap()
515}
516
517#[cfg(test)]
518mod equispaced_tests {
519 use super::EquiSpaced;
520
521 #[test]
522 fn bin_width_has_to_be_positive() {
523 assert!(EquiSpaced::new(0, 0, 200).is_err());
524 }
525
526 #[test]
527 fn min_has_to_be_strictly_smaller_than_max() {
528 assert!(EquiSpaced::new(10, 0, 0).is_err());
529 }
530}
531
532#[cfg(test)]
533mod sqrt_tests {
534 use super::{BinsBuildingStrategy, Sqrt};
535 use ndarray::array;
536
537 #[test]
538 fn constant_array_are_bad() {
539 assert!(Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1])
540 .unwrap_err()
541 .is_strategy());
542 }
543
544 #[test]
545 fn empty_arrays_are_bad() {
546 assert!(Sqrt::<usize>::from_array(&array![])
547 .unwrap_err()
548 .is_empty_input());
549 }
550}
551
552#[cfg(test)]
553mod rice_tests {
554 use super::{BinsBuildingStrategy, Rice};
555 use ndarray::array;
556
557 #[test]
558 fn constant_array_are_bad() {
559 assert!(Rice::from_array(&array![1, 1, 1, 1, 1, 1, 1])
560 .unwrap_err()
561 .is_strategy());
562 }
563
564 #[test]
565 fn empty_arrays_are_bad() {
566 assert!(Rice::<usize>::from_array(&array![])
567 .unwrap_err()
568 .is_empty_input());
569 }
570}
571
572#[cfg(test)]
573mod sturges_tests {
574 use super::{BinsBuildingStrategy, Sturges};
575 use ndarray::array;
576
577 #[test]
578 fn constant_array_are_bad() {
579 assert!(Sturges::from_array(&array![1, 1, 1, 1, 1, 1, 1])
580 .unwrap_err()
581 .is_strategy());
582 }
583
584 #[test]
585 fn empty_arrays_are_bad() {
586 assert!(Sturges::<usize>::from_array(&array![])
587 .unwrap_err()
588 .is_empty_input());
589 }
590}
591
592#[cfg(test)]
593mod fd_tests {
594 use super::{BinsBuildingStrategy, FreedmanDiaconis};
595 use ndarray::array;
596
597 #[test]
598 fn constant_array_are_bad() {
599 assert!(FreedmanDiaconis::from_array(&array![1, 1, 1, 1, 1, 1, 1])
600 .unwrap_err()
601 .is_strategy());
602 }
603
604 #[test]
605 fn zero_iqr_is_bad() {
606 assert!(
607 FreedmanDiaconis::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20])
608 .unwrap_err()
609 .is_strategy()
610 );
611 }
612
613 #[test]
614 fn empty_arrays_are_bad() {
615 assert!(FreedmanDiaconis::<usize>::from_array(&array![])
616 .unwrap_err()
617 .is_empty_input());
618 }
619}
620
621#[cfg(test)]
622mod auto_tests {
623 use super::{Auto, BinsBuildingStrategy};
624 use ndarray::array;
625
626 #[test]
627 fn constant_array_are_bad() {
628 assert!(Auto::from_array(&array![1, 1, 1, 1, 1, 1, 1])
629 .unwrap_err()
630 .is_strategy());
631 }
632
633 #[test]
634 fn zero_iqr_is_handled_by_sturged() {
635 assert!(Auto::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]).is_ok());
636 }
637
638 #[test]
639 fn empty_arrays_are_bad() {
640 assert!(Auto::<usize>::from_array(&array![])
641 .unwrap_err()
642 .is_empty_input());
643 }
644}