#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss)]
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub enum BinMethod {
#[default]
Sturges,
FreedmanDiaconis,
Scott,
Count(usize),
Width(f64),
}
impl BinMethod {
pub fn bin_count(&self, data: &[f64]) -> usize {
let n = data.len();
if n == 0 {
return 1;
}
match self {
Self::Count(k) => *k,
Self::Sturges => (1.0 + (n as f64).log2()).ceil() as usize,
Self::FreedmanDiaconis => {
let q1 = percentile(data, 0.25);
let q3 = percentile(data, 0.75);
let iqr = q3 - q1;
if iqr <= 0.0 {
return Self::Sturges.bin_count(data);
}
let h = 2.0 * iqr * (n as f64).powf(-1.0 / 3.0);
let range = data.iter().copied().fold(f64::NEG_INFINITY, f64::max)
- data.iter().copied().fold(f64::INFINITY, f64::min);
(range / h).ceil() as usize
}
Self::Scott => {
let mean = data.iter().sum::<f64>() / n as f64;
let var = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n as f64;
let std = var.sqrt();
if std <= 0.0 {
return Self::Sturges.bin_count(data);
}
let h = 3.5 * std * (n as f64).powf(-1.0 / 3.0);
let range = data.iter().copied().fold(f64::NEG_INFINITY, f64::max)
- data.iter().copied().fold(f64::INFINITY, f64::min);
(range / h).ceil() as usize
}
Self::Width(w) => {
let range = data.iter().copied().fold(f64::NEG_INFINITY, f64::max)
- data.iter().copied().fold(f64::INFINITY, f64::min);
if *w > 0.0 {
(range / w).ceil() as usize
} else {
1
}
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Bin {
pub left: f64,
pub right: f64,
pub count: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub struct BinTransform {
pub method: BinMethod,
}
impl BinTransform {
#[must_use]
pub fn new(method: BinMethod) -> Self {
Self { method }
}
#[must_use]
pub fn compute(&self, data: &[f64]) -> Vec<Bin> {
if data.is_empty() {
return vec![Bin {
left: 0.0,
right: 1.0,
count: 0,
}];
}
let k = self.method.bin_count(data);
let dmin = data.iter().copied().fold(f64::INFINITY, f64::min);
let dmax = data.iter().copied().fold(f64::NEG_INFINITY, f64::max);
if (dmax - dmin).abs() < f64::EPSILON {
return vec![Bin {
left: dmin - 0.5,
right: dmax + 0.5,
count: data.len(),
}];
}
let width = (dmax - dmin) / k as f64;
let mut bins = Vec::with_capacity(k);
for i in 0..k {
bins.push(Bin {
left: dmin + i as f64 * width,
right: dmin + (i + 1) as f64 * width,
count: 0,
});
}
for &v in data {
let idx = ((v - dmin) / width).floor() as usize;
let idx = idx.min(k - 1);
bins[idx].count += 1;
}
bins
}
}
fn percentile(data: &[f64], p: f64) -> f64 {
if data.is_empty() {
return f64::NAN;
}
let mut sorted: Vec<f64> = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let idx = (p * (sorted.len() - 1) as f64).floor() as usize;
let frac = p * (sorted.len() - 1) as f64 - idx as f64;
if idx + 1 >= sorted.len() {
return sorted[idx];
}
sorted[idx] * (1.0 - frac) + sorted[idx + 1] * frac
}
#[cfg(test)]
mod tests {
use super::{Bin, BinMethod, BinTransform, percentile};
#[test]
fn bin_transform_new() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let transform = BinTransform::new(BinMethod::Count(5));
let bins = transform.compute(&data);
assert_eq!(bins.len(), 5);
}
#[test]
fn bin_transform_empty() {
let data = vec![];
let transform = BinTransform::new(BinMethod::default());
let bins = transform.compute(&data);
assert_eq!(bins.len(), 1);
}
#[test]
fn bin_method_count() {
let data = vec![1.0; 100];
assert_eq!(BinMethod::Count(10).bin_count(&data), 10);
}
#[test]
fn bin_method_width() {
let data: Vec<f64> = (0..100).map(f64::from).collect();
let method = BinMethod::Width(10.0);
let count = method.bin_count(&data);
assert_eq!(count, 10);
}
#[test]
fn bin_method_freedman_diaconis() {
let data: Vec<f64> = (0..100).map(f64::from).collect();
let method = BinMethod::FreedmanDiaconis;
let count = method.bin_count(&data);
assert!(count > 0);
}
#[test]
fn bin_method_freedman_diaconis_equal_iqr() {
let data = vec![1.0, 1.0, 1.0, 1.0, 1.0];
let method = BinMethod::FreedmanDiaconis;
let count = method.bin_count(&data);
assert!(count > 0);
}
#[test]
fn bin_method_scott() {
let data: Vec<f64> = (0..100).map(f64::from).collect();
let method = BinMethod::Scott;
let count = method.bin_count(&data);
assert!(count > 0);
}
#[test]
fn bin_method_scott_zero_std() {
let data = vec![5.0, 5.0, 5.0, 5.0, 5.0];
let method = BinMethod::Scott;
let count = method.bin_count(&data);
assert!(count > 0);
}
#[test]
fn bin_method_width_zero_range() {
let data = vec![5.0, 5.0, 5.0];
let method = BinMethod::Width(1.0);
let count = method.bin_count(&data);
assert_eq!(count, 0);
}
#[test]
fn bin_method_width_negative() {
let data = vec![1.0, 2.0, 3.0];
let method = BinMethod::Width(-1.0);
let count = method.bin_count(&data);
assert_eq!(count, 1);
}
#[test]
fn bin_values() {
let bins = [
Bin {
left: 0.0,
right: 10.0,
count: 5,
},
Bin {
left: 10.0,
right: 20.0,
count: 3,
},
];
assert_eq!(bins[0].count, 5);
assert_eq!(bins[1].left, 10.0);
}
#[test]
fn bin_transform_compute_even_distribution() {
let data: Vec<f64> = (0..20).map(f64::from).collect();
let transform = BinTransform::new(BinMethod::Count(4));
let bins = transform.compute(&data);
assert_eq!(bins.len(), 4);
assert_eq!(bins[0].count, 5);
}
#[test]
fn bin_transform_single_value() {
let data = vec![5.0, 5.0, 5.0];
let transform = BinTransform::new(BinMethod::Count(1));
let bins = transform.compute(&data);
assert_eq!(bins[0].count, 3);
}
#[test]
fn percentile_empty() {
let result = percentile(&[], 0.5);
assert!(result.is_nan());
}
#[test]
fn percentile_median() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let p = percentile(&data, 0.5);
assert!((p - 3.0).abs() < 0.01);
}
#[test]
fn percentile_100() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let p = percentile(&data, 1.0);
assert!((p - 5.0).abs() < 0.01);
}
#[test]
fn bin_count_empty_returns_one() {
for method in [
BinMethod::Sturges,
BinMethod::FreedmanDiaconis,
BinMethod::Scott,
BinMethod::Count(5),
BinMethod::Width(1.0),
] {
assert_eq!(method.bin_count(&[]), 1, "{method:?}");
}
}
}