#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss)]
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub enum BinMethod {
#[default]
Sturges,
FreedmanDiaconis,
Scott,
Count(usize),
Width(f64),
}
impl BinMethod {
pub fn bin_count(&self, data: &[f64]) -> usize {
let n = data.len();
if n == 0 {
return 1;
}
match self {
Self::Count(k) => *k,
Self::Sturges => (1.0 + (n as f64).log2()).ceil() as usize,
Self::FreedmanDiaconis => {
let q1 = percentile(data, 0.25);
let q3 = percentile(data, 0.75);
let iqr = q3 - q1;
if iqr <= 0.0 {
return Self::Sturges.bin_count(data);
}
let h = 2.0 * iqr * (n as f64).powf(-1.0 / 3.0);
let range = data.iter().copied().fold(f64::NEG_INFINITY, f64::max)
- data.iter().copied().fold(f64::INFINITY, f64::min);
(range / h).ceil() as usize
}
Self::Scott => {
let mean = data.iter().sum::<f64>() / n as f64;
let var = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n as f64;
let std = var.sqrt();
if std <= 0.0 {
return Self::Sturges.bin_count(data);
}
let h = 3.5 * std * (n as f64).powf(-1.0 / 3.0);
let range = data.iter().copied().fold(f64::NEG_INFINITY, f64::max)
- data.iter().copied().fold(f64::INFINITY, f64::min);
(range / h).ceil() as usize
}
Self::Width(w) => {
let range = data.iter().copied().fold(f64::NEG_INFINITY, f64::max)
- data.iter().copied().fold(f64::INFINITY, f64::min);
if *w > 0.0 {
(range / w).ceil() as usize
} else {
1
}
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Bin {
pub left: f64,
pub right: f64,
pub count: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub struct BinTransform {
pub method: BinMethod,
}
impl BinTransform {
#[must_use]
pub fn new(method: BinMethod) -> Self {
Self { method }
}
#[must_use]
pub fn compute(&self, data: &[f64]) -> Vec<Bin> {
if data.is_empty() {
return vec![Bin {
left: 0.0,
right: 1.0,
count: 0,
}];
}
let k = self.method.bin_count(data);
let dmin = data.iter().copied().fold(f64::INFINITY, f64::min);
let dmax = data.iter().copied().fold(f64::NEG_INFINITY, f64::max);
if (dmax - dmin).abs() < f64::EPSILON {
return vec![Bin {
left: dmin - 0.5,
right: dmax + 0.5,
count: data.len(),
}];
}
let width = (dmax - dmin) / k as f64;
let mut bins = Vec::with_capacity(k);
for i in 0..k {
bins.push(Bin {
left: dmin + i as f64 * width,
right: dmin + (i + 1) as f64 * width,
count: 0,
});
}
for &v in data {
let idx = ((v - dmin) / width).floor() as usize;
let idx = idx.min(k - 1);
bins[idx].count += 1;
}
bins
}
}
#[must_use]
pub fn percentile(data: &[f64], p: f64) -> f64 {
if data.is_empty() {
return f64::NAN;
}
let mut sorted: Vec<f64> = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let idx = (p * (sorted.len() - 1) as f64).floor() as usize;
let frac = p * (sorted.len() - 1) as f64 - idx as f64;
if idx + 1 >= sorted.len() {
return sorted[idx];
}
sorted[idx] * (1.0 - frac) + sorted[idx + 1] * frac
}
#[must_use]
pub fn std_dev(data: &[f64]) -> f64 {
let n = data.len();
if n < 2 {
return 0.0;
}
let mean = data.iter().sum::<f64>() / n as f64;
let var = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n as f64;
var.sqrt()
}
#[must_use]
pub fn silverman_bandwidth(data: &[f64]) -> f64 {
let n = data.len();
if n < 2 {
return 0.0;
}
let sigma = std_dev(data);
let q1 = percentile(data, 0.25);
let q3 = percentile(data, 0.75);
let iqr = q3 - q1;
let spread = if iqr > 0.0 {
sigma.min(iqr / 1.34)
} else {
sigma
};
if spread <= 0.0 {
return 0.0;
}
0.9 * spread * (n as f64).powf(-0.2)
}
#[must_use]
pub fn scott_bandwidth(data: &[f64]) -> f64 {
let n = data.len();
if n < 2 {
return 0.0;
}
let sigma = std_dev(data);
if sigma <= 0.0 {
return 0.0;
}
1.06 * sigma * (n as f64).powf(-0.2)
}
#[derive(Debug, Clone, PartialEq)]
pub struct BoxPlotStats {
pub min: f64,
pub q1: f64,
pub median: f64,
pub q3: f64,
pub max: f64,
pub outliers: Vec<f64>,
}
impl BoxPlotStats {
#[must_use]
pub fn compute(data: &[f64]) -> Self {
let filtered: Vec<f64> = data.iter().copied().filter(|v| !v.is_nan()).collect();
if filtered.is_empty() {
return Self {
min: 0.0,
q1: 0.0,
median: 0.0,
q3: 0.0,
max: 0.0,
outliers: Vec::new(),
};
}
let q1 = percentile(&filtered, 0.25);
let median = percentile(&filtered, 0.50);
let q3 = percentile(&filtered, 0.75);
let iqr = q3 - q1;
let lower_fence = q1 - 1.5 * iqr;
let upper_fence = q3 + 1.5 * iqr;
let mut min = f64::INFINITY;
let mut max = f64::NEG_INFINITY;
let mut outliers = Vec::new();
for &v in &filtered {
if v < lower_fence || v > upper_fence {
outliers.push(v);
} else {
if v < min {
min = v;
}
if v > max {
max = v;
}
}
}
if !min.is_finite() {
min = q1;
max = q3;
}
Self {
min,
q1,
median,
q3,
max,
outliers,
}
}
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[non_exhaustive]
pub enum Kernel {
#[default]
Gaussian,
}
#[derive(Clone, Copy, Debug, Default, PartialEq)]
#[non_exhaustive]
pub enum Bandwidth {
#[default]
Silverman,
Scott,
Manual(f64),
}
#[derive(Clone, Copy, Debug, Default, PartialEq)]
pub struct Kde {
pub bandwidth: Bandwidth,
pub kernel: Kernel,
}
impl Kde {
#[must_use]
pub fn new(bandwidth: Bandwidth, kernel: Kernel) -> Self {
Self { bandwidth, kernel }
}
#[must_use]
pub fn resolve_bandwidth(&self, data: &[f64]) -> f64 {
match self.bandwidth {
Bandwidth::Silverman => silverman_bandwidth(data),
Bandwidth::Scott => scott_bandwidth(data),
Bandwidth::Manual(h) => h,
}
}
#[must_use]
pub fn evaluate_at(&self, y: f64, data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
let h = self.resolve_bandwidth(data);
if h <= 0.0 {
return 0.0;
}
let n = data.len() as f64;
let inv_nh = 1.0 / (n * h);
match self.kernel {
Kernel::Gaussian => {
const INV_SQRT_2PI: f64 = 0.398_942_280_401_433;
let mut sum = 0.0;
for &xi in data {
let u = (y - xi) / h;
sum += INV_SQRT_2PI * (-0.5 * u * u).exp();
}
inv_nh * sum
}
}
}
#[must_use]
pub fn evaluate_grid(&self, points: &[f64], data: &[f64]) -> Vec<f64> {
points.iter().map(|&y| self.evaluate_at(y, data)).collect()
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Polyline {
pub points: Vec<(f64, f64)>,
pub level: f64,
}
#[derive(Clone, Debug)]
pub struct Grid {
pub values: Vec<f64>,
pub nx: usize,
pub ny: usize,
pub x_min: f64,
pub x_max: f64,
pub y_min: f64,
pub y_max: f64,
}
impl Grid {
#[must_use]
pub fn new(
values: Vec<f64>,
nx: usize,
ny: usize,
x_min: f64,
x_max: f64,
y_min: f64,
y_max: f64,
) -> Option<Self> {
if nx < 2 || ny < 2 || values.len() != nx * ny {
return None;
}
Some(Self {
values,
nx,
ny,
x_min,
x_max,
y_min,
y_max,
})
}
#[must_use]
pub fn sample<F: Fn(f64, f64) -> f64>(
nx: usize,
ny: usize,
x_min: f64,
x_max: f64,
y_min: f64,
y_max: f64,
f: F,
) -> Self {
let dx = if nx > 1 {
(x_max - x_min) / (nx - 1) as f64
} else {
0.0
};
let dy = if ny > 1 {
(y_max - y_min) / (ny - 1) as f64
} else {
0.0
};
let mut values = Vec::with_capacity(nx * ny);
for i in 0..ny {
let y = y_min + i as f64 * dy;
for j in 0..nx {
let x = x_min + j as f64 * dx;
values.push(f(x, y));
}
}
Self {
values,
nx,
ny,
x_min,
x_max,
y_min,
y_max,
}
}
fn at(&self, j: usize, i: usize) -> f64 {
self.values[i * self.nx + j]
}
}
pub struct Contour;
impl Contour {
#[must_use]
#[allow(clippy::many_single_char_names)] pub fn compute(grid: &Grid, levels: &[f64]) -> Vec<Polyline> {
if grid.nx < 2 || grid.ny < 2 || levels.is_empty() {
return Vec::new();
}
let dx = (grid.x_max - grid.x_min) / (grid.nx - 1) as f64;
let dy = (grid.y_max - grid.y_min) / (grid.ny - 1) as f64;
let mut out = Vec::new();
for &level in levels {
for cell_i in 0..(grid.ny - 1) {
for cell_j in 0..(grid.nx - 1) {
let v0 = grid.at(cell_j, cell_i); let v1 = grid.at(cell_j + 1, cell_i); let v2 = grid.at(cell_j + 1, cell_i + 1); let v3 = grid.at(cell_j, cell_i + 1); if !(v0.is_finite() && v1.is_finite() && v2.is_finite() && v3.is_finite()) {
continue;
}
let mut mask = 0u8;
if v0 > level {
mask |= 1;
}
if v1 > level {
mask |= 2;
}
if v2 > level {
mask |= 4;
}
if v3 > level {
mask |= 8;
}
if mask == 0 || mask == 15 {
continue;
}
let edge_pt = |e: u8| -> (f64, f64) {
let (a_v, b_v, a_uv, b_uv) = match e {
0 => (v0, v1, (0.0, 0.0), (1.0, 0.0)),
1 => (v1, v2, (1.0, 0.0), (1.0, 1.0)),
2 => (v2, v3, (1.0, 1.0), (0.0, 1.0)),
3 => (v3, v0, (0.0, 1.0), (0.0, 0.0)),
_ => unreachable!(),
};
let denom = b_v - a_v;
let t = if denom.abs() < f64::EPSILON {
0.5
} else {
((level - a_v) / denom).clamp(0.0, 1.0)
};
let u = a_uv.0 + t * (b_uv.0 - a_uv.0);
let v = a_uv.1 + t * (b_uv.1 - a_uv.1);
let x = grid.x_min + (cell_j as f64 + u) * dx;
let y = grid.y_min + (cell_i as f64 + v) * dy;
(x, y)
};
let segments: &[(u8, u8)] = match mask {
1 | 14 => &[(3, 0)],
2 | 13 => &[(0, 1)],
3 | 12 => &[(3, 1)],
4 | 11 => &[(1, 2)],
6 | 9 => &[(0, 2)],
7 | 8 => &[(2, 3)],
5 => {
let avg = (v0 + v1 + v2 + v3) * 0.25;
if avg > level {
&[(0, 1), (2, 3)]
} else {
&[(3, 0), (1, 2)]
}
}
10 => {
let avg = (v0 + v1 + v2 + v3) * 0.25;
if avg > level {
&[(3, 0), (1, 2)]
} else {
&[(0, 1), (2, 3)]
}
}
_ => &[],
};
for &(ea, eb) in segments {
let pa = edge_pt(ea);
let pb = edge_pt(eb);
out.push(Polyline {
points: vec![pa, pb],
level,
});
}
}
}
}
out
}
}
#[cfg(test)]
mod tests {
use super::{
Bandwidth, Bin, BinMethod, BinTransform, BoxPlotStats, Contour, Grid, Kde, Kernel,
percentile, scott_bandwidth, silverman_bandwidth, std_dev,
};
#[test]
fn bin_transform_new() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let transform = BinTransform::new(BinMethod::Count(5));
let bins = transform.compute(&data);
assert_eq!(bins.len(), 5);
}
#[test]
fn bin_transform_empty() {
let data = vec![];
let transform = BinTransform::new(BinMethod::default());
let bins = transform.compute(&data);
assert_eq!(bins.len(), 1);
}
#[test]
fn bin_method_count() {
let data = vec![1.0; 100];
assert_eq!(BinMethod::Count(10).bin_count(&data), 10);
}
#[test]
fn bin_method_width() {
let data: Vec<f64> = (0..100).map(f64::from).collect();
let method = BinMethod::Width(10.0);
let count = method.bin_count(&data);
assert_eq!(count, 10);
}
#[test]
fn bin_method_freedman_diaconis() {
let data: Vec<f64> = (0..100).map(f64::from).collect();
let method = BinMethod::FreedmanDiaconis;
let count = method.bin_count(&data);
assert!(count > 0);
}
#[test]
fn bin_method_freedman_diaconis_equal_iqr() {
let data = vec![1.0, 1.0, 1.0, 1.0, 1.0];
let method = BinMethod::FreedmanDiaconis;
let count = method.bin_count(&data);
assert!(count > 0);
}
#[test]
fn bin_method_scott() {
let data: Vec<f64> = (0..100).map(f64::from).collect();
let method = BinMethod::Scott;
let count = method.bin_count(&data);
assert!(count > 0);
}
#[test]
fn bin_method_scott_zero_std() {
let data = vec![5.0, 5.0, 5.0, 5.0, 5.0];
let method = BinMethod::Scott;
let count = method.bin_count(&data);
assert!(count > 0);
}
#[test]
fn bin_method_width_zero_range() {
let data = vec![5.0, 5.0, 5.0];
let method = BinMethod::Width(1.0);
let count = method.bin_count(&data);
assert_eq!(count, 0);
}
#[test]
fn bin_method_width_negative() {
let data = vec![1.0, 2.0, 3.0];
let method = BinMethod::Width(-1.0);
let count = method.bin_count(&data);
assert_eq!(count, 1);
}
#[test]
fn bin_values() {
let bins = [
Bin {
left: 0.0,
right: 10.0,
count: 5,
},
Bin {
left: 10.0,
right: 20.0,
count: 3,
},
];
assert_eq!(bins[0].count, 5);
assert_eq!(bins[1].left, 10.0);
}
#[test]
fn bin_transform_compute_even_distribution() {
let data: Vec<f64> = (0..20).map(f64::from).collect();
let transform = BinTransform::new(BinMethod::Count(4));
let bins = transform.compute(&data);
assert_eq!(bins.len(), 4);
assert_eq!(bins[0].count, 5);
}
#[test]
fn bin_transform_single_value() {
let data = vec![5.0, 5.0, 5.0];
let transform = BinTransform::new(BinMethod::Count(1));
let bins = transform.compute(&data);
assert_eq!(bins[0].count, 3);
}
#[test]
fn percentile_empty() {
let result = percentile(&[], 0.5);
assert!(result.is_nan());
}
#[test]
fn percentile_median() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let p = percentile(&data, 0.5);
assert!((p - 3.0).abs() < 0.01);
}
#[test]
fn percentile_100() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let p = percentile(&data, 1.0);
assert!((p - 5.0).abs() < 0.01);
}
#[test]
fn bin_count_empty_returns_one() {
for method in [
BinMethod::Sturges,
BinMethod::FreedmanDiaconis,
BinMethod::Scott,
BinMethod::Count(5),
BinMethod::Width(1.0),
] {
assert_eq!(method.bin_count(&[]), 1, "{method:?}");
}
}
#[test]
fn std_dev_empty_or_single_is_zero() {
assert_eq!(std_dev(&[]), 0.0);
assert_eq!(std_dev(&[42.0]), 0.0);
}
#[test]
fn std_dev_known_sample() {
let s = std_dev(&[1.0, 2.0, 3.0, 4.0, 5.0]);
assert!((s - 2.0_f64.sqrt()).abs() < 1e-12);
}
#[test]
fn silverman_bandwidth_constant_data_returns_zero() {
assert_eq!(silverman_bandwidth(&[5.0; 10]), 0.0);
}
#[test]
fn silverman_bandwidth_matches_reference() {
let data: Vec<f64> = (1..=10).map(f64::from).collect();
let h = silverman_bandwidth(&data);
assert!((h - 1.631_2).abs() < 1e-3);
}
#[test]
fn scott_bandwidth_matches_reference() {
let data: Vec<f64> = (1..=10).map(f64::from).collect();
let h = scott_bandwidth(&data);
assert!((h - 1.921_2).abs() < 1e-3);
}
#[test]
fn boxplot_stats_textbook_example() {
let stats = BoxPlotStats::compute(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 20.0]);
assert!((stats.q1 - 3.25).abs() < 1e-9);
assert!((stats.median - 5.5).abs() < 1e-9);
assert!((stats.q3 - 7.75).abs() < 1e-9);
assert!((stats.min - 1.0).abs() < 1e-9);
assert!((stats.max - 9.0).abs() < 1e-9);
assert_eq!(stats.outliers, vec![20.0]);
}
#[test]
fn boxplot_stats_empty_is_degenerate() {
let stats = BoxPlotStats::compute(&[]);
assert_eq!(stats.q1, 0.0);
assert_eq!(stats.median, 0.0);
assert_eq!(stats.q3, 0.0);
assert!(stats.outliers.is_empty());
}
#[test]
fn boxplot_stats_filters_nan() {
let stats = BoxPlotStats::compute(&[1.0, f64::NAN, 2.0, 3.0]);
assert!((stats.median - 2.0).abs() < 1e-9);
assert!(stats.outliers.is_empty());
}
#[test]
fn kde_evaluate_at_constant_data_returns_zero() {
let kde = Kde::new(Bandwidth::Silverman, Kernel::Gaussian);
assert_eq!(kde.evaluate_at(1.0, &[1.0; 5]), 0.0);
}
#[test]
fn kde_evaluate_at_known_density() {
let data = vec![-2.0, -1.0, 0.0, 1.0, 2.0];
let kde = Kde::new(Bandwidth::Silverman, Kernel::Gaussian);
let center = kde.evaluate_at(0.0, &data);
let edge = kde.evaluate_at(2.0, &data);
assert!(center > edge, "center {center} should exceed edge {edge}");
}
#[test]
fn kde_evaluate_grid_matches_pointwise() {
let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
let kde = Kde::new(Bandwidth::Manual(0.5), Kernel::Gaussian);
let points = vec![-1.0, 0.5, 2.0, 3.5];
let grid = kde.evaluate_grid(&points, &data);
for (g, &y) in grid.iter().zip(&points) {
assert!((*g - kde.evaluate_at(y, &data)).abs() < 1e-12);
}
}
#[test]
fn grid_new_rejects_size_mismatch() {
assert!(Grid::new(vec![0.0; 3], 2, 2, 0.0, 1.0, 0.0, 1.0).is_none());
}
#[test]
fn grid_new_rejects_too_small() {
assert!(Grid::new(vec![0.0; 2], 1, 2, 0.0, 1.0, 0.0, 1.0).is_none());
assert!(Grid::new(vec![0.0; 2], 2, 1, 0.0, 1.0, 0.0, 1.0).is_none());
}
#[test]
fn grid_sample_evaluates_at_corners() {
let g = Grid::sample(2, 2, 0.0, 1.0, 0.0, 1.0, |x, _| x);
assert_eq!(g.values, vec![0.0, 1.0, 0.0, 1.0]);
}
#[test]
fn contour_empty_levels_returns_empty() {
let g = Grid::sample(4, 4, 0.0, 1.0, 0.0, 1.0, |x, y| x + y);
assert!(Contour::compute(&g, &[]).is_empty());
}
#[test]
fn contour_plane_x_at_level_05_cuts_vertically() {
let g = Grid::sample(5, 5, 0.0, 1.0, 0.0, 1.0, |x, _| x);
let polys = Contour::compute(&g, &[0.5]);
assert!(!polys.is_empty(), "expected contour segments at level 0.5");
for p in &polys {
assert_eq!(p.points.len(), 2);
assert_eq!(p.level, 0.5);
for &(x, _) in &p.points {
assert!(
(0.25..=0.75).contains(&x),
"segment x out of expected band: {x}"
);
}
}
}
#[test]
fn contour_saddle_emits_two_segments_per_saddle_cell() {
let values = vec![1.5, 0.0, 0.0, 1.0]; let g = Grid::new(values, 2, 2, 0.0, 1.0, 0.0, 1.0).unwrap();
let polys = Contour::compute(&g, &[0.5]);
assert_eq!(polys.len(), 2);
for p in &polys {
assert_eq!(p.points.len(), 2);
assert_eq!(p.level, 0.5);
}
}
#[test]
fn contour_skips_cells_with_nan() {
let mut values = vec![0.0; 9];
values[4] = f64::NAN; let g = Grid::new(values, 3, 3, 0.0, 1.0, 0.0, 1.0).unwrap();
let polys = Contour::compute(&g, &[10.0]);
assert!(polys.is_empty());
}
#[test]
fn contour_uniform_grid_emits_nothing() {
let g = Grid::sample(5, 5, 0.0, 1.0, 0.0, 1.0, |_, _| 1.0);
assert!(Contour::compute(&g, &[0.5]).is_empty());
}
}