1use crate::{
49 histogram::{Bins, Edges, errors::BinsBuildError},
50 quantile::{Quantile1dExt, QuantileExt, interpolate::Nearest},
51};
52use ndarray::{Data, prelude::*};
53use num_traits::{FromPrimitive, NumOps, ToPrimitive, Zero};
54
55pub trait BinsBuildingStrategy {
65 #[allow(missing_docs)]
66 type Elem: Ord + Send;
67 fn from_array<S>(array: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
80 where
81 S: Data<Elem = Self::Elem>,
82 Self: std::marker::Sized,
83 {
84 Self::from_array_with_max(array, u16::MAX.into())
85 }
86
87 fn from_array_with_max<S>(
98 array: &ArrayBase<S, Ix1>,
99 max_n_bins: usize,
100 ) -> Result<Self, BinsBuildError>
101 where
102 S: Data<Elem = Self::Elem>,
103 Self: std::marker::Sized;
104
105 fn build(&self) -> Bins<Self::Elem>;
109
110 fn n_bins(&self) -> usize;
112}
113
114#[derive(Debug)]
115struct EquiSpaced<T> {
116 bin_width: T,
117 min: T,
118 max: T,
119}
120
121#[derive(Debug)]
135pub struct Sqrt<T> {
136 builder: EquiSpaced<T>,
137}
138
139#[derive(Debug)]
156pub struct Rice<T> {
157 builder: EquiSpaced<T>,
158}
159
160#[derive(Debug)]
176pub struct Sturges<T> {
177 builder: EquiSpaced<T>,
178}
179
180#[derive(Debug)]
208pub struct FreedmanDiaconis<T> {
209 builder: EquiSpaced<T>,
210}
211
212#[derive(Debug)]
213enum SturgesOrFD<T> {
214 Sturges(Sturges<T>),
215 FreedmanDiaconis(FreedmanDiaconis<T>),
216}
217
218#[derive(Debug)]
238pub struct Auto<T> {
239 builder: SturgesOrFD<T>,
240}
241
242impl<T> EquiSpaced<T>
243where
244 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
245{
246 fn new(bin_width: T, min: T, max: T) -> Result<Self, BinsBuildError> {
249 if (bin_width <= T::zero()) || (min >= max) {
250 Err(BinsBuildError::Strategy)
251 } else {
252 Ok(Self {
253 bin_width,
254 min,
255 max,
256 })
257 }
258 }
259
260 fn build(&self) -> Bins<T> {
261 let n_bins = self.n_bins();
262 let mut edges: Vec<T> = vec![];
263 for i in 0..=n_bins {
264 let edge = self.min.clone() + T::from_usize(i).unwrap() * self.bin_width.clone();
265 edges.push(edge);
266 }
267 Bins::new(Edges::from(edges))
268 }
269
270 fn n_bins(&self) -> usize {
271 let min = self.min.to_f64().unwrap();
272 let max = self.max.to_f64().unwrap();
273 let bin_width = self.bin_width.to_f64().unwrap();
274 usize::from_f64(((max - min) / bin_width + 0.5).ceil()).unwrap_or(usize::MAX)
275 }
276
277 fn bin_width(&self) -> T {
278 self.bin_width.clone()
279 }
280}
281
282impl<T> BinsBuildingStrategy for Sqrt<T>
283where
284 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
285{
286 type Elem = T;
287
288 fn from_array_with_max<S>(
292 a: &ArrayBase<S, Ix1>,
293 max_n_bins: usize,
294 ) -> Result<Self, BinsBuildError>
295 where
296 S: Data<Elem = Self::Elem>,
297 {
298 let n_elems = a.len();
299 #[allow(clippy::cast_precision_loss)]
302 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
304 let n_bins = (n_elems as f64).sqrt().round() as usize;
305 let min = a.min()?;
306 let max = a.max()?;
307 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
308 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
309 if builder.n_bins() > max_n_bins {
310 Err(BinsBuildError::Strategy)
311 } else {
312 Ok(Self { builder })
313 }
314 }
315
316 fn build(&self) -> Bins<T> {
317 self.builder.build()
318 }
319
320 fn n_bins(&self) -> usize {
321 self.builder.n_bins()
322 }
323}
324
325impl<T> Sqrt<T>
326where
327 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
328{
329 pub fn bin_width(&self) -> T {
331 self.builder.bin_width()
332 }
333}
334
335impl<T> BinsBuildingStrategy for Rice<T>
336where
337 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
338{
339 type Elem = T;
340
341 fn from_array_with_max<S>(
345 a: &ArrayBase<S, Ix1>,
346 max_n_bins: usize,
347 ) -> Result<Self, BinsBuildError>
348 where
349 S: Data<Elem = Self::Elem>,
350 {
351 let n_elems = a.len();
352 #[allow(clippy::cast_precision_loss)]
355 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
357 let n_bins = (2. * (n_elems as f64).powf(1. / 3.)).round() as usize;
358 let min = a.min()?;
359 let max = a.max()?;
360 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
361 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
362 if builder.n_bins() > max_n_bins {
363 Err(BinsBuildError::Strategy)
364 } else {
365 Ok(Self { builder })
366 }
367 }
368
369 fn build(&self) -> Bins<T> {
370 self.builder.build()
371 }
372
373 fn n_bins(&self) -> usize {
374 self.builder.n_bins()
375 }
376}
377
378impl<T> Rice<T>
379where
380 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
381{
382 pub fn bin_width(&self) -> T {
384 self.builder.bin_width()
385 }
386}
387
388impl<T> BinsBuildingStrategy for Sturges<T>
389where
390 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
391{
392 type Elem = T;
393
394 fn from_array_with_max<S>(
398 a: &ArrayBase<S, Ix1>,
399 max_n_bins: usize,
400 ) -> Result<Self, BinsBuildError>
401 where
402 S: Data<Elem = Self::Elem>,
403 {
404 let n_elems = a.len();
405 #[allow(clippy::cast_precision_loss)]
408 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
410 let n_bins = (n_elems as f64).log2().round() as usize + 1;
411 let min = a.min()?;
412 let max = a.max()?;
413 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
414 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
415 if builder.n_bins() > max_n_bins {
416 Err(BinsBuildError::Strategy)
417 } else {
418 Ok(Self { builder })
419 }
420 }
421
422 fn build(&self) -> Bins<T> {
423 self.builder.build()
424 }
425
426 fn n_bins(&self) -> usize {
427 self.builder.n_bins()
428 }
429}
430
431impl<T> Sturges<T>
432where
433 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
434{
435 pub fn bin_width(&self) -> T {
437 self.builder.bin_width()
438 }
439}
440
441impl<T> BinsBuildingStrategy for FreedmanDiaconis<T>
442where
443 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
444{
445 type Elem = T;
446
447 fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
451 where
452 S: Data<Elem = Self::Elem>,
453 {
454 Self::from_array_with_max(a, u16::MAX.into())
455 }
456
457 fn from_array_with_max<S>(
461 a: &ArrayBase<S, Ix1>,
462 max_n_bins: usize,
463 ) -> Result<Self, BinsBuildError>
464 where
465 S: Data<Elem = Self::Elem>,
466 {
467 let n_points = a.len();
468 if n_points == 0 {
469 return Err(BinsBuildError::EmptyInput);
470 }
471
472 let n_cbrt = f64::from_usize(n_points).unwrap().powf(1. / 3.);
473 let min = a.min()?;
474 let max = a.max()?;
475 let mut a_copy = a.to_owned();
476 let mut at = 0.5;
481 while at >= 1. / 512. {
482 at *= 0.5;
483 let first_quartile = a_copy.quantile_mut(at, &Nearest).unwrap();
484 let third_quartile = a_copy.quantile_mut(1. - at, &Nearest).unwrap();
485 let iqr = third_quartile - first_quartile;
486 let denom = T::from_f64((1. - 2. * at) * n_cbrt).unwrap();
487 if denom == T::zero() {
488 continue;
489 }
490 let bin_width = iqr.clone() / denom;
491 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
492 if builder.n_bins() > max_n_bins {
493 continue;
494 }
495 return Ok(Self { builder });
496 }
497 let m = a.iter().cloned().fold(T::zero(), |s, v| s + v) / T::from_usize(n_points).unwrap();
500 let s = a
501 .iter()
502 .cloned()
503 .map(|v| (v.clone() - m.clone()) * (v - m.clone()))
504 .fold(T::zero(), |s, v| s + v);
505 let s = (s / T::from_usize(n_points - 1).unwrap())
506 .to_f64()
507 .unwrap()
508 .sqrt();
509 let bin_width = T::from_f64(3.49 * s).unwrap() / T::from_f64(n_cbrt).unwrap();
510 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
511 if builder.n_bins() > max_n_bins {
512 return Err(BinsBuildError::Strategy);
513 }
514 Ok(Self { builder })
515 }
516
517 fn build(&self) -> Bins<T> {
518 self.builder.build()
519 }
520
521 fn n_bins(&self) -> usize {
522 self.builder.n_bins()
523 }
524}
525
526impl<T> FreedmanDiaconis<T>
527where
528 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
529{
530 pub fn bin_width(&self) -> T {
532 self.builder.bin_width()
533 }
534}
535
536impl<T> BinsBuildingStrategy for Auto<T>
537where
538 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
539{
540 type Elem = T;
541
542 fn from_array_with_max<S>(
546 a: &ArrayBase<S, Ix1>,
547 max_n_bins: usize,
548 ) -> Result<Self, BinsBuildError>
549 where
550 S: Data<Elem = Self::Elem>,
551 {
552 let fd_builder = FreedmanDiaconis::from_array_with_max(a, max_n_bins);
553 let sturges_builder = Sturges::from_array_with_max(a, max_n_bins);
554 match (fd_builder, sturges_builder) {
555 (Err(_), Ok(sturges_builder)) => {
556 let builder = SturgesOrFD::Sturges(sturges_builder);
557 Ok(Self { builder })
558 }
559 (Ok(fd_builder), Err(_)) => {
560 let builder = SturgesOrFD::FreedmanDiaconis(fd_builder);
561 Ok(Self { builder })
562 }
563 (Ok(fd_builder), Ok(sturges_builder)) => {
564 let builder = if fd_builder.bin_width() > sturges_builder.bin_width() {
565 SturgesOrFD::Sturges(sturges_builder)
566 } else {
567 SturgesOrFD::FreedmanDiaconis(fd_builder)
568 };
569 Ok(Self { builder })
570 }
571 (Err(err), Err(_)) => Err(err),
572 }
573 }
574
575 fn build(&self) -> Bins<T> {
576 match &self.builder {
578 SturgesOrFD::FreedmanDiaconis(b) => b.build(),
579 SturgesOrFD::Sturges(b) => b.build(),
580 }
581 }
582
583 fn n_bins(&self) -> usize {
584 match &self.builder {
586 SturgesOrFD::FreedmanDiaconis(b) => b.n_bins(),
587 SturgesOrFD::Sturges(b) => b.n_bins(),
588 }
589 }
590}
591
592impl<T> Auto<T>
593where
594 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
595{
596 pub fn bin_width(&self) -> T {
598 match &self.builder {
600 SturgesOrFD::FreedmanDiaconis(b) => b.bin_width(),
601 SturgesOrFD::Sturges(b) => b.bin_width(),
602 }
603 }
604}
605
606fn compute_bin_width<T>(min: T, max: T, n_bins: usize) -> T
613where
614 T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
615{
616 let range = max - min;
617 range / T::from_usize(n_bins).unwrap()
618}
619
620#[cfg(test)]
621mod equispaced_tests {
622 use super::EquiSpaced;
623
624 #[test]
625 fn bin_width_has_to_be_positive() {
626 assert!(EquiSpaced::new(0, 0, 200).is_err());
627 }
628
629 #[test]
630 fn min_has_to_be_strictly_smaller_than_max() {
631 assert!(EquiSpaced::new(10, 0, 0).is_err());
632 }
633}
634
635#[cfg(test)]
636mod sqrt_tests {
637 use super::{BinsBuildingStrategy, Sqrt};
638 use ndarray::array;
639
640 #[test]
641 fn constant_array_are_bad() {
642 assert!(
643 Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1])
644 .unwrap_err()
645 .is_strategy()
646 );
647 }
648
649 #[test]
650 fn empty_arrays_are_bad() {
651 assert!(
652 Sqrt::<usize>::from_array(&array![])
653 .unwrap_err()
654 .is_empty_input()
655 );
656 }
657}
658
659#[cfg(test)]
660mod rice_tests {
661 use super::{BinsBuildingStrategy, Rice};
662 use ndarray::array;
663
664 #[test]
665 fn constant_array_are_bad() {
666 assert!(
667 Rice::from_array(&array![1, 1, 1, 1, 1, 1, 1])
668 .unwrap_err()
669 .is_strategy()
670 );
671 }
672
673 #[test]
674 fn empty_arrays_are_bad() {
675 assert!(
676 Rice::<usize>::from_array(&array![])
677 .unwrap_err()
678 .is_empty_input()
679 );
680 }
681}
682
683#[cfg(test)]
684mod sturges_tests {
685 use super::{BinsBuildingStrategy, Sturges};
686 use ndarray::array;
687
688 #[test]
689 fn constant_array_are_bad() {
690 assert!(
691 Sturges::from_array(&array![1, 1, 1, 1, 1, 1, 1])
692 .unwrap_err()
693 .is_strategy()
694 );
695 }
696
697 #[test]
698 fn empty_arrays_are_bad() {
699 assert!(
700 Sturges::<usize>::from_array(&array![])
701 .unwrap_err()
702 .is_empty_input()
703 );
704 }
705}
706
707#[cfg(test)]
708mod fd_tests {
709 use super::{BinsBuildingStrategy, FreedmanDiaconis};
710 use ndarray::array;
711
712 #[test]
713 fn constant_array_are_bad() {
714 assert!(
715 FreedmanDiaconis::from_array(&array![1, 1, 1, 1, 1, 1, 1])
716 .unwrap_err()
717 .is_strategy()
718 );
719 }
720
721 #[test]
722 fn zero_iqr_is_bad() {
723 assert!(
724 FreedmanDiaconis::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20])
725 .unwrap_err()
726 .is_strategy()
727 );
728 }
729
730 #[test]
731 fn empty_arrays_are_bad() {
732 assert!(
733 FreedmanDiaconis::<usize>::from_array(&array![])
734 .unwrap_err()
735 .is_empty_input()
736 );
737 }
738}
739
740#[cfg(test)]
741mod auto_tests {
742 use super::{Auto, BinsBuildingStrategy};
743 use ndarray::array;
744
745 #[test]
746 fn constant_array_are_bad() {
747 assert!(
748 Auto::from_array(&array![1, 1, 1, 1, 1, 1, 1])
749 .unwrap_err()
750 .is_strategy()
751 );
752 }
753
754 #[test]
755 fn zero_iqr_is_handled_by_sturged() {
756 assert!(Auto::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]).is_ok());
757 }
758
759 #[test]
760 fn empty_arrays_are_bad() {
761 assert!(
762 Auto::<usize>::from_array(&array![])
763 .unwrap_err()
764 .is_empty_input()
765 );
766 }
767}