use crate::{
histogram::{Bins, Edges, errors::BinsBuildError},
quantile::{Quantile1dExt, QuantileExt, interpolate::Nearest},
};
use ndarray::{Data, prelude::*};
use num_traits::{FromPrimitive, NumOps, ToPrimitive, Zero};
pub trait BinsBuildingStrategy {
#[allow(missing_docs)]
type Elem: Ord + Send;
fn from_array<S>(array: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
Self: std::marker::Sized,
{
Self::from_array_with_max(array, u16::MAX.into())
}
fn from_array_with_max<S>(
array: &ArrayBase<S, Ix1>,
max_n_bins: usize,
) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
Self: std::marker::Sized;
fn build(&self) -> Bins<Self::Elem>;
fn n_bins(&self) -> usize;
}
#[derive(Debug)]
struct EquiSpaced<T> {
bin_width: T,
min: T,
max: T,
}
#[derive(Debug)]
pub struct Sqrt<T> {
builder: EquiSpaced<T>,
}
#[derive(Debug)]
pub struct Rice<T> {
builder: EquiSpaced<T>,
}
#[derive(Debug)]
pub struct Sturges<T> {
builder: EquiSpaced<T>,
}
#[derive(Debug)]
pub struct FreedmanDiaconis<T> {
builder: EquiSpaced<T>,
}
#[derive(Debug)]
enum SturgesOrFD<T> {
Sturges(Sturges<T>),
FreedmanDiaconis(FreedmanDiaconis<T>),
}
#[derive(Debug)]
pub struct Auto<T> {
builder: SturgesOrFD<T>,
}
impl<T> EquiSpaced<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
fn new(bin_width: T, min: T, max: T) -> Result<Self, BinsBuildError> {
if (bin_width <= T::zero()) || (min >= max) {
Err(BinsBuildError::Strategy)
} else {
Ok(Self {
bin_width,
min,
max,
})
}
}
fn build(&self) -> Bins<T> {
let n_bins = self.n_bins();
let mut edges: Vec<T> = vec![];
for i in 0..=n_bins {
let edge = self.min.clone() + T::from_usize(i).unwrap() * self.bin_width.clone();
edges.push(edge);
}
Bins::new(Edges::from(edges))
}
fn n_bins(&self) -> usize {
let min = self.min.to_f64().unwrap();
let max = self.max.to_f64().unwrap();
let bin_width = self.bin_width.to_f64().unwrap();
usize::from_f64(((max - min) / bin_width + 0.5).ceil()).unwrap_or(usize::MAX)
}
fn bin_width(&self) -> T {
self.bin_width.clone()
}
}
impl<T> BinsBuildingStrategy for Sqrt<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array_with_max<S>(
a: &ArrayBase<S, Ix1>,
max_n_bins: usize,
) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let n_elems = a.len();
#[allow(clippy::cast_precision_loss)]
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
let n_bins = (n_elems as f64).sqrt().round() as usize;
let min = a.min()?;
let max = a.max()?;
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
if builder.n_bins() > max_n_bins {
Err(BinsBuildError::Strategy)
} else {
Ok(Self { builder })
}
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Sqrt<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Rice<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array_with_max<S>(
a: &ArrayBase<S, Ix1>,
max_n_bins: usize,
) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let n_elems = a.len();
#[allow(clippy::cast_precision_loss)]
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
let n_bins = (2. * (n_elems as f64).powf(1. / 3.)).round() as usize;
let min = a.min()?;
let max = a.max()?;
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
if builder.n_bins() > max_n_bins {
Err(BinsBuildError::Strategy)
} else {
Ok(Self { builder })
}
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Rice<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Sturges<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array_with_max<S>(
a: &ArrayBase<S, Ix1>,
max_n_bins: usize,
) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let n_elems = a.len();
#[allow(clippy::cast_precision_loss)]
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
let n_bins = (n_elems as f64).log2().round() as usize + 1;
let min = a.min()?;
let max = a.max()?;
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
if builder.n_bins() > max_n_bins {
Err(BinsBuildError::Strategy)
} else {
Ok(Self { builder })
}
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Sturges<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for FreedmanDiaconis<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
Self::from_array_with_max(a, u16::MAX.into())
}
fn from_array_with_max<S>(
a: &ArrayBase<S, Ix1>,
max_n_bins: usize,
) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let n_points = a.len();
if n_points == 0 {
return Err(BinsBuildError::EmptyInput);
}
let n_cbrt = f64::from_usize(n_points).unwrap().powf(1. / 3.);
let min = a.min()?;
let max = a.max()?;
let mut a_copy = a.to_owned();
let mut at = 0.5;
while at >= 1. / 512. {
at *= 0.5;
let first_quartile = a_copy.quantile_mut(at, &Nearest).unwrap();
let third_quartile = a_copy.quantile_mut(1. - at, &Nearest).unwrap();
let iqr = third_quartile - first_quartile;
let denom = T::from_f64((1. - 2. * at) * n_cbrt).unwrap();
if denom == T::zero() {
continue;
}
let bin_width = iqr.clone() / denom;
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
if builder.n_bins() > max_n_bins {
continue;
}
return Ok(Self { builder });
}
let m = a.iter().cloned().fold(T::zero(), |s, v| s + v) / T::from_usize(n_points).unwrap();
let s = a
.iter()
.cloned()
.map(|v| (v.clone() - m.clone()) * (v - m.clone()))
.fold(T::zero(), |s, v| s + v);
let s = (s / T::from_usize(n_points - 1).unwrap())
.to_f64()
.unwrap()
.sqrt();
let bin_width = T::from_f64(3.49 * s).unwrap() / T::from_f64(n_cbrt).unwrap();
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
if builder.n_bins() > max_n_bins {
return Err(BinsBuildError::Strategy);
}
Ok(Self { builder })
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> FreedmanDiaconis<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Auto<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array_with_max<S>(
a: &ArrayBase<S, Ix1>,
max_n_bins: usize,
) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let fd_builder = FreedmanDiaconis::from_array_with_max(a, max_n_bins);
let sturges_builder = Sturges::from_array_with_max(a, max_n_bins);
match (fd_builder, sturges_builder) {
(Err(_), Ok(sturges_builder)) => {
let builder = SturgesOrFD::Sturges(sturges_builder);
Ok(Self { builder })
}
(Ok(fd_builder), Err(_)) => {
let builder = SturgesOrFD::FreedmanDiaconis(fd_builder);
Ok(Self { builder })
}
(Ok(fd_builder), Ok(sturges_builder)) => {
let builder = if fd_builder.bin_width() > sturges_builder.bin_width() {
SturgesOrFD::Sturges(sturges_builder)
} else {
SturgesOrFD::FreedmanDiaconis(fd_builder)
};
Ok(Self { builder })
}
(Err(err), Err(_)) => Err(err),
}
}
fn build(&self) -> Bins<T> {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.build(),
SturgesOrFD::Sturges(b) => b.build(),
}
}
fn n_bins(&self) -> usize {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.n_bins(),
SturgesOrFD::Sturges(b) => b.n_bins(),
}
}
}
impl<T> Auto<T>
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.bin_width(),
SturgesOrFD::Sturges(b) => b.bin_width(),
}
}
}
fn compute_bin_width<T>(min: T, max: T, n_bins: usize) -> T
where
T: Ord + Send + Clone + FromPrimitive + ToPrimitive + NumOps + Zero,
{
let range = max - min;
range / T::from_usize(n_bins).unwrap()
}
#[cfg(test)]
mod equispaced_tests {
use super::EquiSpaced;
#[test]
fn bin_width_has_to_be_positive() {
assert!(EquiSpaced::new(0, 0, 200).is_err());
}
#[test]
fn min_has_to_be_strictly_smaller_than_max() {
assert!(EquiSpaced::new(10, 0, 0).is_err());
}
}
#[cfg(test)]
mod sqrt_tests {
use super::{BinsBuildingStrategy, Sqrt};
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(
Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy()
);
}
#[test]
fn empty_arrays_are_bad() {
assert!(
Sqrt::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input()
);
}
}
#[cfg(test)]
mod rice_tests {
use super::{BinsBuildingStrategy, Rice};
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(
Rice::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy()
);
}
#[test]
fn empty_arrays_are_bad() {
assert!(
Rice::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input()
);
}
}
#[cfg(test)]
mod sturges_tests {
use super::{BinsBuildingStrategy, Sturges};
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(
Sturges::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy()
);
}
#[test]
fn empty_arrays_are_bad() {
assert!(
Sturges::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input()
);
}
}
#[cfg(test)]
mod fd_tests {
use super::{BinsBuildingStrategy, FreedmanDiaconis};
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(
FreedmanDiaconis::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy()
);
}
#[test]
fn zero_iqr_is_bad() {
assert!(
FreedmanDiaconis::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20])
.unwrap_err()
.is_strategy()
);
}
#[test]
fn empty_arrays_are_bad() {
assert!(
FreedmanDiaconis::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input()
);
}
}
#[cfg(test)]
mod auto_tests {
use super::{Auto, BinsBuildingStrategy};
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(
Auto::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy()
);
}
#[test]
fn zero_iqr_is_handled_by_sturged() {
assert!(Auto::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]).is_ok());
}
#[test]
fn empty_arrays_are_bad() {
assert!(
Auto::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input()
);
}
}