1#![warn(missing_docs, clippy::all, clippy::pedantic)]
48
49use crate::{
50 histogram::{errors::BinsBuildError, Bins, Edges},
51 quantile::{interpolate::Nearest, Quantile1dExt, QuantileExt},
52};
53use ndarray::{prelude::*, Data};
54use noisy_float::types::n64;
55use num_traits::{FromPrimitive, NumOps, Zero};
56
57pub trait BinsBuildingStrategy {
67 #[allow(missing_docs)]
68 type Elem: Ord;
69 fn from_array<S>(array: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
79 where
80 S: Data<Elem = Self::Elem>,
81 Self: std::marker::Sized;
82
83 fn build(&self) -> Bins<Self::Elem>;
87
88 fn n_bins(&self) -> usize;
90}
91
92#[derive(Debug)]
93struct EquiSpaced<T> {
94 bin_width: T,
95 min: T,
96 max: T,
97}
98
99#[derive(Debug)]
113pub struct Sqrt<T> {
114 builder: EquiSpaced<T>,
115}
116
117#[derive(Debug)]
134pub struct Rice<T> {
135 builder: EquiSpaced<T>,
136}
137
138#[derive(Debug)]
154pub struct Sturges<T> {
155 builder: EquiSpaced<T>,
156}
157
158#[derive(Debug)]
180pub struct FreedmanDiaconis<T> {
181 builder: EquiSpaced<T>,
182}
183
184#[derive(Debug)]
185enum SturgesOrFD<T> {
186 Sturges(Sturges<T>),
187 FreedmanDiaconis(FreedmanDiaconis<T>),
188}
189
190#[derive(Debug)]
210pub struct Auto<T> {
211 builder: SturgesOrFD<T>,
212}
213
214impl<T> EquiSpaced<T>
215where
216 T: Ord + Clone + FromPrimitive + NumOps + Zero,
217{
218 fn new(bin_width: T, min: T, max: T) -> Result<Self, BinsBuildError> {
221 if (bin_width <= T::zero()) || (min >= max) {
222 Err(BinsBuildError::Strategy)
223 } else {
224 Ok(Self {
225 bin_width,
226 min,
227 max,
228 })
229 }
230 }
231
232 fn build(&self) -> Bins<T> {
233 let n_bins = self.n_bins();
234 let mut edges: Vec<T> = vec![];
235 for i in 0..=n_bins {
236 let edge = self.min.clone() + T::from_usize(i).unwrap() * self.bin_width.clone();
237 edges.push(edge);
238 }
239 Bins::new(Edges::from(edges))
240 }
241
242 fn n_bins(&self) -> usize {
243 let mut max_edge = self.min.clone();
244 let mut n_bins = 0;
245 while max_edge <= self.max {
246 max_edge = max_edge + self.bin_width.clone();
247 n_bins += 1;
248 }
249 n_bins
250 }
251
252 fn bin_width(&self) -> T {
253 self.bin_width.clone()
254 }
255}
256
257impl<T> BinsBuildingStrategy for Sqrt<T>
258where
259 T: Ord + Clone + FromPrimitive + NumOps + Zero,
260{
261 type Elem = T;
262
263 fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
267 where
268 S: Data<Elem = Self::Elem>,
269 {
270 let n_elems = a.len();
271 #[allow(clippy::cast_precision_loss)]
274 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
276 let n_bins = (n_elems as f64).sqrt().round() as usize;
277 let min = a.min()?;
278 let max = a.max()?;
279 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
280 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
281 Ok(Self { builder })
282 }
283
284 fn build(&self) -> Bins<T> {
285 self.builder.build()
286 }
287
288 fn n_bins(&self) -> usize {
289 self.builder.n_bins()
290 }
291}
292
293impl<T> Sqrt<T>
294where
295 T: Ord + Clone + FromPrimitive + NumOps + Zero,
296{
297 pub fn bin_width(&self) -> T {
299 self.builder.bin_width()
300 }
301}
302
303impl<T> BinsBuildingStrategy for Rice<T>
304where
305 T: Ord + Clone + FromPrimitive + NumOps + Zero,
306{
307 type Elem = T;
308
309 fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
313 where
314 S: Data<Elem = Self::Elem>,
315 {
316 let n_elems = a.len();
317 #[allow(clippy::cast_precision_loss)]
320 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
322 let n_bins = (2. * (n_elems as f64).powf(1. / 3.)).round() as usize;
323 let min = a.min()?;
324 let max = a.max()?;
325 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
326 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
327 Ok(Self { builder })
328 }
329
330 fn build(&self) -> Bins<T> {
331 self.builder.build()
332 }
333
334 fn n_bins(&self) -> usize {
335 self.builder.n_bins()
336 }
337}
338
339impl<T> Rice<T>
340where
341 T: Ord + Clone + FromPrimitive + NumOps + Zero,
342{
343 pub fn bin_width(&self) -> T {
345 self.builder.bin_width()
346 }
347}
348
349impl<T> BinsBuildingStrategy for Sturges<T>
350where
351 T: Ord + Clone + FromPrimitive + NumOps + Zero,
352{
353 type Elem = T;
354
355 fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
359 where
360 S: Data<Elem = Self::Elem>,
361 {
362 let n_elems = a.len();
363 #[allow(clippy::cast_precision_loss)]
366 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
368 let n_bins = (n_elems as f64).log2().round() as usize + 1;
369 let min = a.min()?;
370 let max = a.max()?;
371 let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
372 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
373 Ok(Self { builder })
374 }
375
376 fn build(&self) -> Bins<T> {
377 self.builder.build()
378 }
379
380 fn n_bins(&self) -> usize {
381 self.builder.n_bins()
382 }
383}
384
385impl<T> Sturges<T>
386where
387 T: Ord + Clone + FromPrimitive + NumOps + Zero,
388{
389 pub fn bin_width(&self) -> T {
391 self.builder.bin_width()
392 }
393}
394
395impl<T> BinsBuildingStrategy for FreedmanDiaconis<T>
396where
397 T: Ord + Clone + FromPrimitive + NumOps + Zero,
398{
399 type Elem = T;
400
401 fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
405 where
406 S: Data<Elem = Self::Elem>,
407 {
408 let n_points = a.len();
409 if n_points == 0 {
410 return Err(BinsBuildError::EmptyInput);
411 }
412
413 let mut a_copy = a.to_owned();
414 let first_quartile = a_copy.quantile_mut(n64(0.25), &Nearest).unwrap();
415 let third_quartile = a_copy.quantile_mut(n64(0.75), &Nearest).unwrap();
416 let iqr = third_quartile - first_quartile;
417
418 let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr);
419 let min = a.min()?;
420 let max = a.max()?;
421 let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
422 Ok(Self { builder })
423 }
424
425 fn build(&self) -> Bins<T> {
426 self.builder.build()
427 }
428
429 fn n_bins(&self) -> usize {
430 self.builder.n_bins()
431 }
432}
433
434impl<T> FreedmanDiaconis<T>
435where
436 T: Ord + Clone + FromPrimitive + NumOps + Zero,
437{
438 fn compute_bin_width(n_bins: usize, iqr: T) -> T {
439 #[allow(clippy::cast_precision_loss)]
442 let denominator = (n_bins as f64).powf(1. / 3.);
443 T::from_usize(2).unwrap() * iqr / T::from_f64(denominator).unwrap()
444 }
445
446 pub fn bin_width(&self) -> T {
448 self.builder.bin_width()
449 }
450}
451
452impl<T> BinsBuildingStrategy for Auto<T>
453where
454 T: Ord + Clone + FromPrimitive + NumOps + Zero,
455{
456 type Elem = T;
457
458 fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
462 where
463 S: Data<Elem = Self::Elem>,
464 {
465 let fd_builder = FreedmanDiaconis::from_array(&a);
466 let sturges_builder = Sturges::from_array(&a);
467 match (fd_builder, sturges_builder) {
468 (Err(_), Ok(sturges_builder)) => {
469 let builder = SturgesOrFD::Sturges(sturges_builder);
470 Ok(Self { builder })
471 }
472 (Ok(fd_builder), Err(_)) => {
473 let builder = SturgesOrFD::FreedmanDiaconis(fd_builder);
474 Ok(Self { builder })
475 }
476 (Ok(fd_builder), Ok(sturges_builder)) => {
477 let builder = if fd_builder.bin_width() > sturges_builder.bin_width() {
478 SturgesOrFD::Sturges(sturges_builder)
479 } else {
480 SturgesOrFD::FreedmanDiaconis(fd_builder)
481 };
482 Ok(Self { builder })
483 }
484 (Err(err), Err(_)) => Err(err),
485 }
486 }
487
488 fn build(&self) -> Bins<T> {
489 match &self.builder {
491 SturgesOrFD::FreedmanDiaconis(b) => b.build(),
492 SturgesOrFD::Sturges(b) => b.build(),
493 }
494 }
495
496 fn n_bins(&self) -> usize {
497 match &self.builder {
499 SturgesOrFD::FreedmanDiaconis(b) => b.n_bins(),
500 SturgesOrFD::Sturges(b) => b.n_bins(),
501 }
502 }
503}
504
505impl<T> Auto<T>
506where
507 T: Ord + Clone + FromPrimitive + NumOps + Zero,
508{
509 pub fn bin_width(&self) -> T {
511 match &self.builder {
513 SturgesOrFD::FreedmanDiaconis(b) => b.bin_width(),
514 SturgesOrFD::Sturges(b) => b.bin_width(),
515 }
516 }
517}
518
519fn compute_bin_width<T>(min: T, max: T, n_bins: usize) -> T
526where
527 T: Ord + Clone + FromPrimitive + NumOps + Zero,
528{
529 let range = max - min;
530 range / T::from_usize(n_bins).unwrap()
531}
532
533#[cfg(test)]
534mod equispaced_tests {
535 use super::EquiSpaced;
536
537 #[test]
538 fn bin_width_has_to_be_positive() {
539 assert!(EquiSpaced::new(0, 0, 200).is_err());
540 }
541
542 #[test]
543 fn min_has_to_be_strictly_smaller_than_max() {
544 assert!(EquiSpaced::new(10, 0, 0).is_err());
545 }
546}
547
548#[cfg(test)]
549mod sqrt_tests {
550 use super::{BinsBuildingStrategy, Sqrt};
551 use ndarray::array;
552
553 #[test]
554 fn constant_array_are_bad() {
555 assert!(Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1])
556 .unwrap_err()
557 .is_strategy());
558 }
559
560 #[test]
561 fn empty_arrays_are_bad() {
562 assert!(Sqrt::<usize>::from_array(&array![])
563 .unwrap_err()
564 .is_empty_input());
565 }
566}
567
568#[cfg(test)]
569mod rice_tests {
570 use super::{BinsBuildingStrategy, Rice};
571 use ndarray::array;
572
573 #[test]
574 fn constant_array_are_bad() {
575 assert!(Rice::from_array(&array![1, 1, 1, 1, 1, 1, 1])
576 .unwrap_err()
577 .is_strategy());
578 }
579
580 #[test]
581 fn empty_arrays_are_bad() {
582 assert!(Rice::<usize>::from_array(&array![])
583 .unwrap_err()
584 .is_empty_input());
585 }
586}
587
588#[cfg(test)]
589mod sturges_tests {
590 use super::{BinsBuildingStrategy, Sturges};
591 use ndarray::array;
592
593 #[test]
594 fn constant_array_are_bad() {
595 assert!(Sturges::from_array(&array![1, 1, 1, 1, 1, 1, 1])
596 .unwrap_err()
597 .is_strategy());
598 }
599
600 #[test]
601 fn empty_arrays_are_bad() {
602 assert!(Sturges::<usize>::from_array(&array![])
603 .unwrap_err()
604 .is_empty_input());
605 }
606}
607
608#[cfg(test)]
609mod fd_tests {
610 use super::{BinsBuildingStrategy, FreedmanDiaconis};
611 use ndarray::array;
612
613 #[test]
614 fn constant_array_are_bad() {
615 assert!(FreedmanDiaconis::from_array(&array![1, 1, 1, 1, 1, 1, 1])
616 .unwrap_err()
617 .is_strategy());
618 }
619
620 #[test]
621 fn zero_iqr_is_bad() {
622 assert!(
623 FreedmanDiaconis::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20])
624 .unwrap_err()
625 .is_strategy()
626 );
627 }
628
629 #[test]
630 fn empty_arrays_are_bad() {
631 assert!(FreedmanDiaconis::<usize>::from_array(&array![])
632 .unwrap_err()
633 .is_empty_input());
634 }
635}
636
637#[cfg(test)]
638mod auto_tests {
639 use super::{Auto, BinsBuildingStrategy};
640 use ndarray::array;
641
642 #[test]
643 fn constant_array_are_bad() {
644 assert!(Auto::from_array(&array![1, 1, 1, 1, 1, 1, 1])
645 .unwrap_err()
646 .is_strategy());
647 }
648
649 #[test]
650 fn zero_iqr_is_handled_by_sturged() {
651 assert!(Auto::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]).is_ok());
652 }
653
654 #[test]
655 fn empty_arrays_are_bad() {
656 assert!(Auto::<usize>::from_array(&array![])
657 .unwrap_err()
658 .is_empty_input());
659 }
660}