use ndarray::prelude::*;
use ndarray::Data;
use num_traits::{FromPrimitive, NumOps, Zero};
use super::super::{QuantileExt, Quantile1dExt};
use super::super::interpolate::Nearest;
use super::{Edges, Bins};
pub trait BinsBuildingStrategy
{
type Elem: Ord;
fn from_array<S>(array: &ArrayBase<S, Ix1>) -> Self
where
S: Data<Elem=Self::Elem>;
fn build(&self) -> Bins<Self::Elem>;
fn n_bins(&self) -> usize;
}
struct EquiSpaced<T> {
bin_width: T,
min: T,
max: T,
}
pub struct Sqrt<T> {
builder: EquiSpaced<T>,
}
pub struct Rice<T> {
builder: EquiSpaced<T>,
}
pub struct Sturges<T> {
builder: EquiSpaced<T>,
}
pub struct FreedmanDiaconis<T> {
builder: EquiSpaced<T>,
}
enum SturgesOrFD<T> {
Sturges(Sturges<T>),
FreedmanDiaconis(FreedmanDiaconis<T>),
}
pub struct Auto<T> {
builder: SturgesOrFD<T>,
}
impl<T> EquiSpaced<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
fn new(bin_width: T, min: T, max: T) -> Self
{
assert!(bin_width > T::zero());
Self { bin_width, min, max }
}
fn build(&self) -> Bins<T> {
let n_bins = self.n_bins();
let mut edges: Vec<T> = vec![];
for i in 0..(n_bins+1) {
let edge = self.min.clone() + T::from_usize(i).unwrap()*self.bin_width.clone();
edges.push(edge);
}
Bins::new(Edges::from(edges))
}
fn n_bins(&self) -> usize {
let mut max_edge = self.min.clone();
let mut n_bins = 0;
while max_edge <= self.max {
max_edge = max_edge + self.bin_width.clone();
n_bins += 1;
}
return n_bins
}
fn bin_width(&self) -> T {
self.bin_width.clone()
}
}
impl<T> BinsBuildingStrategy for Sqrt<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Self
where
S: Data<Elem=Self::Elem>
{
let n_elems = a.len();
let n_bins = (n_elems as f64).sqrt().round() as usize;
let min = a.min().unwrap().clone();
let max = a.max().unwrap().clone();
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min, max);
Self { builder }
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Sqrt<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Rice<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Self
where
S: Data<Elem=Self::Elem>
{
let n_elems = a.len();
let n_bins = (2. * (n_elems as f64).powf(1./3.)).round() as usize;
let min = a.min().unwrap().clone();
let max = a.max().unwrap().clone();
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min, max);
Self { builder }
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Rice<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Sturges<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Self
where
S: Data<Elem=Self::Elem>
{
let n_elems = a.len();
let n_bins = (n_elems as f64).log2().round() as usize + 1;
let min = a.min().unwrap().clone();
let max = a.max().unwrap().clone();
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min, max);
Self { builder }
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Sturges<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for FreedmanDiaconis<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Self
where
S: Data<Elem=Self::Elem>
{
let n_points = a.len();
let mut a_copy = a.to_owned();
let first_quartile = a_copy.quantile_mut::<Nearest>(0.25).unwrap();
let third_quartile = a_copy.quantile_mut::<Nearest>(0.75).unwrap();
let iqr = third_quartile - first_quartile;
let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr);
let min = a_copy.min().unwrap().clone();
let max = a_copy.max().unwrap().clone();
let builder = EquiSpaced::new(bin_width, min, max);
Self { builder }
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> FreedmanDiaconis<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
fn compute_bin_width(n_bins: usize, iqr: T) -> T
{
let denominator = (n_bins as f64).powf(1. / 3.);
let bin_width = T::from_usize(2).unwrap() * iqr / T::from_f64(denominator).unwrap();
bin_width
}
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Auto<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Self
where
S: Data<Elem=Self::Elem>
{
let fd_builder = FreedmanDiaconis::from_array(&a);
let sturges_builder = Sturges::from_array(&a);
let builder = {
if fd_builder.bin_width() > sturges_builder.bin_width() {
SturgesOrFD::Sturges(sturges_builder)
} else {
SturgesOrFD::FreedmanDiaconis(fd_builder)
}
};
Self { builder }
}
fn build(&self) -> Bins<T> {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.build(),
SturgesOrFD::Sturges(b) => b.build(),
}
}
fn n_bins(&self) -> usize {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.n_bins(),
SturgesOrFD::Sturges(b) => b.n_bins(),
}
}
}
impl<T> Auto<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero
{
pub fn bin_width(&self) -> T {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.bin_width(),
SturgesOrFD::Sturges(b) => b.bin_width(),
}
}
}
fn compute_bin_width<T>(min: T, max: T, n_bins: usize) -> T
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
let range = max.clone() - min.clone();
let bin_width = range / T::from_usize(n_bins).unwrap();
bin_width
}
#[cfg(test)]
mod equispaced_tests {
use super::*;
#[should_panic]
#[test]
fn bin_width_has_to_be_positive() {
EquiSpaced::new(0, 0, 200);
}
}
#[cfg(test)]
mod sqrt_tests {
use super::*;
#[should_panic]
#[test]
fn constant_array_are_bad() {
Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1]);
}
#[should_panic]
#[test]
fn empty_arrays_cause_panic() {
let _: Sqrt<usize> = Sqrt::from_array(&array![]);
}
}
#[cfg(test)]
mod rice_tests {
use super::*;
#[should_panic]
#[test]
fn constant_array_are_bad() {
Rice::from_array(&array![1, 1, 1, 1, 1, 1, 1]);
}
#[should_panic]
#[test]
fn empty_arrays_cause_panic() {
let _: Rice<usize> = Rice::from_array(&array![]);
}
}
#[cfg(test)]
mod sturges_tests {
use super::*;
#[should_panic]
#[test]
fn constant_array_are_bad() {
Sturges::from_array(&array![1, 1, 1, 1, 1, 1, 1]);
}
#[should_panic]
#[test]
fn empty_arrays_cause_panic() {
let _: Sturges<usize> = Sturges::from_array(&array![]);
}
}
#[cfg(test)]
mod fd_tests {
use super::*;
#[should_panic]
#[test]
fn constant_array_are_bad() {
FreedmanDiaconis::from_array(&array![1, 1, 1, 1, 1, 1, 1]);
}
#[should_panic]
#[test]
fn zero_iqr_causes_panic() {
FreedmanDiaconis::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]);
}
#[should_panic]
#[test]
fn empty_arrays_cause_panic() {
let _: FreedmanDiaconis<usize> = FreedmanDiaconis::from_array(&array![]);
}
}
#[cfg(test)]
mod auto_tests {
use super::*;
#[should_panic]
#[test]
fn constant_array_are_bad() {
Auto::from_array(&array![1, 1, 1, 1, 1, 1, 1]);
}
#[should_panic]
#[test]
fn zero_iqr_causes_panic() {
Auto::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]);
}
#[should_panic]
#[test]
fn empty_arrays_cause_panic() {
let _: Auto<usize> = Auto::from_array(&array![]);
}
}