#![allow(clippy::needless_range_loop)]
use std::collections::VecDeque;
use super::types::{NormalDistribution, PcaResult, StatRng};
pub fn mean(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
data.iter().sum::<f64>() / data.len() as f64
}
pub fn variance(data: &[f64]) -> f64 {
if data.len() < 2 {
return 0.0;
}
let m = mean(data);
data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / (data.len() as f64 - 1.0)
}
pub fn std_dev(data: &[f64]) -> f64 {
variance(data).sqrt()
}
pub fn median(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
if n % 2 == 1 {
sorted[n / 2]
} else {
(sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
}
}
pub fn percentile(data: &[f64], p: f64) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
if n == 1 {
return sorted[0];
}
let rank = (p / 100.0) * (n as f64 - 1.0);
let lo = rank.floor() as usize;
let hi = (lo + 1).min(n - 1);
let frac = rank - lo as f64;
sorted[lo] + frac * (sorted[hi] - sorted[lo])
}
pub fn covariance(x: &[f64], y: &[f64]) -> f64 {
assert_eq!(
x.len(),
y.len(),
"covariance: x and y must have the same length"
);
let n = x.len();
if n < 2 {
return 0.0;
}
let mx = mean(x);
let my = mean(y);
x.iter()
.zip(y.iter())
.map(|(xi, yi)| (xi - mx) * (yi - my))
.sum::<f64>()
/ (n as f64 - 1.0)
}
pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
let sx = std_dev(x);
let sy = std_dev(y);
if sx == 0.0 || sy == 0.0 {
return 0.0;
}
covariance(x, y) / (sx * sy)
}
pub fn skewness(data: &[f64]) -> f64 {
let n = data.len();
if n < 3 {
return 0.0;
}
let m = mean(data);
let s = std_dev(data);
if s == 0.0 {
return 0.0;
}
let nf = n as f64;
let third_moment = data.iter().map(|x| ((x - m) / s).powi(3)).sum::<f64>() / nf;
third_moment * (nf * (nf - 1.0)).sqrt() / (nf - 2.0)
}
pub fn kurtosis(data: &[f64]) -> f64 {
let n = data.len();
if n < 4 {
return 0.0;
}
let m = mean(data);
let s = std_dev(data);
if s == 0.0 {
return 0.0;
}
let nf = n as f64;
let fourth = data.iter().map(|x| ((x - m) / s).powi(4)).sum::<f64>() / nf;
fourth - 3.0
}
pub fn histogram(data: &[f64], n_bins: usize) -> Vec<(f64, f64, usize)> {
if data.is_empty() || n_bins == 0 {
return vec![];
}
let min = data.iter().cloned().fold(f64::INFINITY, f64::min);
let max = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let width = if (max - min).abs() < f64::EPSILON {
1.0
} else {
(max - min) / n_bins as f64
};
let mut counts = vec![0usize; n_bins];
for &v in data {
let idx = ((v - min) / width) as usize;
let idx = idx.min(n_bins - 1);
counts[idx] += 1;
}
(0..n_bins)
.map(|i| {
let lo = min + i as f64 * width;
let hi = lo + width;
(lo, hi, counts[i])
})
.collect()
}
pub fn t_test_one_sample(data: &[f64], mu0: f64) -> (f64, f64) {
let n = data.len();
if n < 2 {
return (0.0, 1.0);
}
let m = mean(data);
let s = std_dev(data);
if s == 0.0 {
return (0.0, 1.0);
}
let t = (m - mu0) / (s / (n as f64).sqrt());
let p = 2.0 * (1.0 - NormalDistribution::new(0.0, 1.0).cdf(t.abs()));
(t, p)
}
pub fn chi_squared_test(observed: &[f64], expected: &[f64]) -> f64 {
assert_eq!(
observed.len(),
expected.len(),
"chi_squared_test: slices must have the same length"
);
observed
.iter()
.zip(expected.iter())
.filter(|&(_, &e)| e > 0.0)
.map(|(&o, &e)| {
let diff = o - e;
diff * diff / e
})
.sum()
}
pub fn ks_statistic(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len() as f64;
sorted
.iter()
.enumerate()
.map(|(i, &x)| {
let fn_hi = (i as f64 + 1.0) / n;
let fn_lo = i as f64 / n;
let fx = cdf(x);
(fn_hi - fx).abs().max((fx - fn_lo).abs())
})
.fold(0.0_f64, f64::max)
}
pub fn pearson_correlation(x: &[f64], y: &[f64]) -> f64 {
correlation(x, y)
}
pub fn running_average(window: usize) -> impl FnMut(f64) -> f64 {
let mut buf: VecDeque<f64> = VecDeque::with_capacity(window.max(1));
let mut sum = 0.0_f64;
move |value: f64| -> f64 {
if window == 0 {
return 0.0;
}
if buf.len() == window
&& let Some(old) = buf.pop_front()
{
sum -= old;
}
buf.push_back(value);
sum += value;
sum / buf.len() as f64
}
}
pub fn block_average(data: &[f64], block_size: usize) -> (f64, f64) {
if data.is_empty() || block_size == 0 {
return (0.0, 0.0);
}
let block_means: Vec<f64> = data.chunks_exact(block_size).map(mean).collect();
let n_blocks = block_means.len();
if n_blocks < 2 {
return (mean(data), 0.0);
}
let grand_mean = mean(&block_means);
let sem = std_dev(&block_means) / (n_blocks as f64).sqrt();
(grand_mean, sem)
}
pub fn autocorrelation_time(data: &[f64], max_lag: usize) -> f64 {
let n = data.len();
if n < 2 || max_lag == 0 {
return 1.0;
}
let m = mean(data);
let c0: f64 = data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / n as f64;
if c0 == 0.0 {
return 1.0;
}
let mut tau = 1.0_f64;
for t in 1..=max_lag.min(n - 1) {
let ct: f64 = data[..n - t]
.iter()
.zip(data[t..].iter())
.map(|(a, b)| (a - m) * (b - m))
.sum::<f64>()
/ n as f64;
let rho = ct / c0;
if rho <= 0.0 {
break;
}
tau += 2.0 * rho;
}
tau
}
pub fn maxwell_boltzmann_speed(mass: f64, temp: f64, kb: f64) -> f64 {
(2.0 * kb * temp / mass).sqrt()
}
pub fn boltzmann_factor(energy: f64, temp: f64, kb: f64) -> f64 {
(-energy / (kb * temp)).exp()
}
pub fn partition_function(energies: &[f64], temp: f64, kb: f64) -> f64 {
energies
.iter()
.map(|&e| boltzmann_factor(e, temp, kb))
.sum()
}
pub fn free_energy_from_partition(z: f64, temp: f64, kb: f64) -> f64 {
-kb * temp * z.ln()
}
pub fn correlation_matrix(data: &[Vec<f64>]) -> Vec<Vec<f64>> {
if data.is_empty() {
return Vec::new();
}
let d = data[0].len();
if d == 0 {
return Vec::new();
}
let col = |j: usize| -> Vec<f64> { data.iter().map(|row| row[j]).collect() };
let mut mat = vec![vec![0.0f64; d]; d];
for i in 0..d {
for j in i..d {
let ci = col(i);
let cj = col(j);
let r = if i == j { 1.0 } else { correlation(&ci, &cj) };
mat[i][j] = r;
mat[j][i] = r;
}
}
mat
}
pub fn covariance_matrix(data: &[Vec<f64>]) -> Vec<Vec<f64>> {
if data.is_empty() {
return Vec::new();
}
let d = data[0].len();
if d == 0 {
return Vec::new();
}
let col = |j: usize| -> Vec<f64> { data.iter().map(|row| row[j]).collect() };
let mut mat = vec![vec![0.0f64; d]; d];
for i in 0..d {
let ci = col(i);
for j in i..d {
let cj = col(j);
let cov = covariance(&ci, &cj);
mat[i][j] = cov;
mat[j][i] = cov;
}
}
mat
}
pub fn pca(data: &[Vec<f64>], n_components: usize) -> Option<PcaResult> {
if data.is_empty() {
return None;
}
let n = data.len();
let d = data[0].len();
if d == 0 || n < 2 {
return None;
}
let n_comp = n_components.min(d);
let mean_vec: Vec<f64> = (0..d)
.map(|j| data.iter().map(|row| row[j]).sum::<f64>() / n as f64)
.collect();
let centred: Vec<Vec<f64>> = data
.iter()
.map(|row| row.iter().zip(&mean_vec).map(|(x, m)| x - m).collect())
.collect();
let cov: Vec<Vec<f64>> = (0..d)
.map(|i| {
(0..d)
.map(|j| centred.iter().map(|row| row[i] * row[j]).sum::<f64>() / (n as f64 - 1.0))
.collect()
})
.collect();
let mut components = Vec::with_capacity(n_comp);
let mut variances = Vec::with_capacity(n_comp);
let mut deflated = cov.clone();
let mat_vec = |m: &Vec<Vec<f64>>, v: &Vec<f64>| -> Vec<f64> {
(0..d)
.map(|i| m[i].iter().zip(v.iter()).map(|(a, b)| a * b).sum::<f64>())
.collect()
};
let norm_vec = |v: &Vec<f64>| -> f64 { v.iter().map(|x| x * x).sum::<f64>().sqrt() };
for _ in 0..n_comp {
let mut q: Vec<f64> = (0..d).map(|i| if i == 0 { 1.0 } else { 0.0 }).collect();
for _ in 0..200 {
let aq = mat_vec(&deflated, &q);
let nrm = norm_vec(&aq);
if nrm < 1e-30 {
break;
}
q = aq.iter().map(|x| x / nrm).collect();
}
let aq = mat_vec(&deflated, &q);
let eigenvalue: f64 = q.iter().zip(aq.iter()).map(|(qi, aqj)| qi * aqj).sum();
for i in 0..d {
for j in 0..d {
deflated[i][j] -= eigenvalue * q[i] * q[j];
}
}
components.push(q);
variances.push(eigenvalue);
}
Some(PcaResult {
mean: mean_vec,
components,
explained_variance: variances,
})
}
pub fn pca_transform(x: &[f64], result: &PcaResult) -> Vec<f64> {
let centred: Vec<f64> = x.iter().zip(&result.mean).map(|(xi, mi)| xi - mi).collect();
result
.components
.iter()
.map(|pc| pc.iter().zip(¢red).map(|(pci, ci)| pci * ci).sum())
.collect()
}
pub fn acf(data: &[f64], max_lag: usize) -> Vec<f64> {
let n = data.len();
if n < 2 {
return vec![1.0];
}
let m = mean(data);
let c0: f64 = data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / n as f64;
if c0 < f64::EPSILON {
return vec![1.0; max_lag + 1];
}
(0..=max_lag.min(n - 1))
.map(|lag| {
if lag == 0 {
1.0
} else {
let ct: f64 = data[..n - lag]
.iter()
.zip(data[lag..].iter())
.map(|(a, b)| (a - m) * (b - m))
.sum::<f64>()
/ n as f64;
ct / c0
}
})
.collect()
}
pub fn pacf(data: &[f64], max_lag: usize) -> Vec<f64> {
let rho = acf(data, max_lag);
let m = rho.len();
if m < 2 {
return vec![1.0];
}
let mut phi = vec![1.0_f64];
for k in 1..m {
let prev_order = phi.len() - 1;
if prev_order == 0 {
phi.push(rho[k]);
continue;
}
let prev_phi: Vec<f64> = phi[1..].to_vec();
let num: f64 = rho[k]
- prev_phi
.iter()
.zip(1..=prev_order)
.map(|(p, j)| p * rho[k - j])
.sum::<f64>();
let den: f64 = 1.0
- prev_phi
.iter()
.zip(1..=prev_order)
.map(|(p, j)| p * rho[j])
.sum::<f64>();
let phi_kk = if den.abs() < 1e-30 { 0.0 } else { num / den };
phi.push(phi_kk);
}
phi
}
pub fn bootstrap_ci(
data: &[f64],
statistic: impl Fn(&[f64]) -> f64,
n_boot: usize,
alpha: f64,
rng: &mut StatRng,
) -> (f64, f64) {
let n = data.len();
if n == 0 {
return (0.0, 0.0);
}
let mut boot_stats: Vec<f64> = Vec::with_capacity(n_boot);
for _ in 0..n_boot {
let resample: Vec<f64> = (0..n)
.map(|_| {
let idx = (rng.next_f64() * n as f64) as usize;
data[idx.min(n - 1)]
})
.collect();
boot_stats.push(statistic(&resample));
}
boot_stats.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let lo_idx = ((alpha / 2.0) * n_boot as f64) as usize;
let hi_idx = ((1.0 - alpha / 2.0) * n_boot as f64) as usize;
(
boot_stats[lo_idx.min(n_boot.saturating_sub(1))],
boot_stats[hi_idx.min(n_boot.saturating_sub(1))],
)
}
pub fn bootstrap_se(
data: &[f64],
statistic: impl Fn(&[f64]) -> f64,
n_boot: usize,
rng: &mut StatRng,
) -> f64 {
let n = data.len();
if n == 0 {
return 0.0;
}
let boot_stats: Vec<f64> = (0..n_boot)
.map(|_| {
let resample: Vec<f64> = (0..n)
.map(|_| {
let idx = (rng.next_f64() * n as f64) as usize;
data[idx.min(n - 1)]
})
.collect();
statistic(&resample)
})
.collect();
std_dev(&boot_stats)
}
pub fn t_test_two_sample(x: &[f64], y: &[f64]) -> (f64, f64) {
if x.len() < 2 || y.len() < 2 {
return (0.0, 1.0);
}
let mx = mean(x);
let my = mean(y);
let sx2 = variance(x);
let sy2 = variance(y);
let nx = x.len() as f64;
let ny = y.len() as f64;
let se = (sx2 / nx + sy2 / ny).sqrt();
if se < f64::EPSILON {
return (0.0, 1.0);
}
let t = (mx - my) / se;
let p = 2.0 * (1.0 - NormalDistribution::new(0.0, 1.0).cdf(t.abs()));
(t, p)
}
pub fn mann_whitney_u(x: &[f64], y: &[f64]) -> f64 {
let nx = x.len() as f64;
let ny = y.len() as f64;
if x.is_empty() || y.is_empty() {
return 0.0;
}
let u: f64 = x
.iter()
.map(|&xi| {
y.iter()
.map(|&yj| {
if xi > yj {
1.0
} else if (xi - yj).abs() < f64::EPSILON {
0.5
} else {
0.0
}
})
.sum::<f64>()
})
.sum();
let _ = (nx, ny);
u
}
pub fn wilcoxon_signed_rank(x: &[f64], y: &[f64]) -> f64 {
assert_eq!(
x.len(),
y.len(),
"wilcoxon_signed_rank: x and y must have same length"
);
let diffs: Vec<f64> = x.iter().zip(y.iter()).map(|(xi, yi)| xi - yi).collect();
let nonzero: Vec<f64> = diffs
.into_iter()
.filter(|&d| d.abs() > f64::EPSILON)
.collect();
if nonzero.is_empty() {
return 0.0;
}
let mut indexed: Vec<(f64, f64)> = nonzero.iter().map(|&d| (d.abs(), d.signum())).collect();
indexed.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let w_plus: f64 = indexed
.iter()
.enumerate()
.filter(|(_, (_, sign))| *sign > 0.0)
.map(|(rank, _)| rank as f64 + 1.0)
.sum();
w_plus
}
pub fn shapiro_wilk_w(data: &[f64]) -> f64 {
let n = data.len();
if n < 3 {
return 1.0;
}
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let m = mean(&sorted);
let s2: f64 = sorted.iter().map(|x| (x - m).powi(2)).sum::<f64>();
if s2 < f64::EPSILON {
return 1.0;
}
let half = n / 2;
let mut b_sum = 0.0f64;
for i in 0..half {
let rank = n - 1 - i;
let p = (rank as f64 + 1.0 - 0.375) / (n as f64 + 0.25);
let a_i = normal_quantile_approx(p);
b_sum += a_i * (sorted[rank] - sorted[i]);
}
(b_sum * b_sum) / s2
}
pub(super) fn normal_quantile_approx(p: f64) -> f64 {
if p <= 0.0 || p >= 1.0 {
return 0.0;
}
let pp = if p < 0.5 { p } else { 1.0 - p };
let t = (-2.0 * pp.ln()).sqrt();
let c0 = 2.515517_f64;
let c1 = 0.802853_f64;
let c2 = 0.010328_f64;
let d1 = 1.432788_f64;
let d2 = 0.189269_f64;
let d3 = 0.001308_f64;
let x = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
if p < 0.5 { -x } else { x }
}
pub(super) fn erf_approx(x: f64) -> f64 {
pub(super) const P: f64 = 0.3275911;
pub(super) const A1: f64 = 0.254829592;
pub(super) const A2: f64 = -0.284496736;
pub(super) const A3: f64 = 1.421413741;
pub(super) const A4: f64 = -1.453152027;
pub(super) const A5: f64 = 1.061405429;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + P * x);
let poly = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5))));
sign * (1.0 - poly * (-x * x).exp())
}
pub(super) fn log_gamma_stirling(z: f64) -> f64 {
if z < 7.0 {
return log_gamma_stirling(z + 1.0) - z.ln();
}
let z = z - 1.0;
0.5 * (std::f64::consts::TAU / z).ln()
+ z * ((z + 1.0 / (12.0 * z) - 1.0 / (360.0 * z.powi(3))) / std::f64::consts::E).ln()
}
pub(super) fn log_factorial(k: u64) -> f64 {
if k == 0 || k == 1 {
return 0.0;
}
if k <= 20 {
(1..=k).map(|i| (i as f64).ln()).sum()
} else {
log_gamma_stirling(k as f64 + 1.0)
}
}
pub fn quartiles(data: &[f64]) -> (f64, f64, f64) {
if data.is_empty() {
return (0.0, 0.0, 0.0);
}
(
percentile(data, 25.0),
percentile(data, 50.0),
percentile(data, 75.0),
)
}
pub fn iqr(data: &[f64]) -> f64 {
let (q1, _, q3) = quartiles(data);
q3 - q1
}
pub fn spearman_correlation(x: &[f64], y: &[f64]) -> f64 {
assert_eq!(
x.len(),
y.len(),
"spearman_correlation: x and y must be same length"
);
let n = x.len();
if n < 2 {
return 0.0;
}
let rank = |data: &[f64]| -> Vec<f64> {
let mut indexed: Vec<(usize, f64)> = data.iter().cloned().enumerate().collect();
indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let mut ranks = vec![0.0_f64; data.len()];
let mut i = 0;
while i < n {
let mut j = i;
while j + 1 < n && (indexed[j + 1].1 - indexed[i].1).abs() < f64::EPSILON {
j += 1;
}
let avg_rank = (i + j) as f64 / 2.0 + 1.0;
for k in i..=j {
ranks[indexed[k].0] = avg_rank;
}
i = j + 1;
}
ranks
};
let rx = rank(x);
let ry = rank(y);
correlation(&rx, &ry)
}
pub fn linear_regression(x: &[f64], y: &[f64]) -> (f64, f64, f64) {
assert_eq!(
x.len(),
y.len(),
"linear_regression: x and y must be same length"
);
let n = x.len();
if n < 2 {
return (0.0, 0.0, 0.0);
}
let mx = mean(x);
let my = mean(y);
let sxx: f64 = x.iter().map(|xi| (xi - mx).powi(2)).sum();
let sxy: f64 = x
.iter()
.zip(y.iter())
.map(|(xi, yi)| (xi - mx) * (yi - my))
.sum();
if sxx.abs() < f64::EPSILON {
return (0.0, my, 0.0);
}
let slope = sxy / sxx;
let intercept = my - slope * mx;
let ss_res: f64 = x
.iter()
.zip(y.iter())
.map(|(xi, yi)| (yi - (slope * xi + intercept)).powi(2))
.sum();
let ss_tot: f64 = y.iter().map(|yi| (yi - my).powi(2)).sum();
let r_squared = if ss_tot < f64::EPSILON {
1.0
} else {
1.0 - ss_res / ss_tot
};
(slope, intercept, r_squared)
}
pub fn ks_test_one_sample(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
ks_statistic(data, cdf)
}
pub fn ks_test_two_sample(x: &[f64], y: &[f64]) -> f64 {
if x.is_empty() || y.is_empty() {
return 0.0;
}
let mut xs = x.to_vec();
let mut ys = y.to_vec();
xs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
ys.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let nx = xs.len() as f64;
let ny = ys.len() as f64;
let mut all: Vec<f64> = xs.iter().chain(ys.iter()).cloned().collect();
all.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
all.dedup_by(|a, b| (*a - *b).abs() < f64::EPSILON);
let mut d_max = 0.0_f64;
for &t in &all {
let fx = xs.partition_point(|&v| v <= t) as f64 / nx;
let fy = ys.partition_point(|&v| v <= t) as f64 / ny;
let d = (fx - fy).abs();
if d > d_max {
d_max = d;
}
}
d_max
}
pub fn chi_squared_statistic(observed: &[u64], expected: &[f64]) -> f64 {
assert_eq!(
observed.len(),
expected.len(),
"chi_squared_statistic: length mismatch"
);
observed
.iter()
.zip(expected.iter())
.filter(|&(_, e)| *e > 1e-10)
.map(|(o, e)| {
let diff = *o as f64 - *e;
diff * diff / *e
})
.sum()
}
pub fn sample_skewness(data: &[f64]) -> f64 {
skewness(data)
}
pub fn sample_kurtosis(data: &[f64]) -> f64 {
kurtosis(data)
}
pub fn pearson_r(x: &[f64], y: &[f64]) -> f64 {
correlation(x, y)
}
pub fn t_test_one_sample_full(data: &[f64], mu0: f64) -> (f64, f64) {
t_test_one_sample(data, mu0)
}
pub fn welch_t_test(x: &[f64], y: &[f64]) -> (f64, f64) {
t_test_two_sample(x, y)
}
pub fn chi_squared_gof(observed: &[f64], expected: &[f64]) -> (f64, usize) {
let chi2 = chi_squared_test(observed, expected);
let df = observed.len().saturating_sub(1);
(chi2, df)
}
#[allow(dead_code)]
pub fn mad(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
let m = median(data);
let mut deviations: Vec<f64> = data.iter().map(|&x| (x - m).abs()).collect();
deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = deviations.len();
if n % 2 == 1 {
deviations[n / 2]
} else {
(deviations[n / 2 - 1] + deviations[n / 2]) / 2.0
}
}
#[allow(dead_code)]
pub fn huber_m_estimator(data: &[f64], k: f64, max_iter: usize) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut mu = median(data);
let sigma_scale = mad(data) * 1.4826;
let sigma = if sigma_scale < 1e-10 {
1.0
} else {
sigma_scale
};
for _ in 0..max_iter {
let weights: Vec<f64> = data
.iter()
.map(|&x| {
let r = (x - mu) / sigma;
if r.abs() <= k { 1.0 } else { k / r.abs() }
})
.collect();
let sum_w: f64 = weights.iter().sum();
if sum_w < 1e-30 {
break;
}
let new_mu: f64 = data
.iter()
.zip(weights.iter())
.map(|(&x, &w)| x * w)
.sum::<f64>()
/ sum_w;
if (new_mu - mu).abs() < 1e-8 {
mu = new_mu;
break;
}
mu = new_mu;
}
mu
}
#[allow(dead_code)]
pub fn tukey_biweight_estimator(data: &[f64], c: f64, max_iter: usize) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut mu = median(data);
let sigma_scale = mad(data) * 1.4826;
let sigma = if sigma_scale < 1e-10 {
1.0
} else {
sigma_scale
};
for _ in 0..max_iter {
let weights: Vec<f64> = data
.iter()
.map(|&x| {
let u = (x - mu) / (c * sigma);
if u.abs() >= 1.0 {
0.0
} else {
let t = 1.0 - u * u;
t * t
}
})
.collect();
let sum_w: f64 = weights.iter().sum();
if sum_w < 1e-30 {
break;
}
let new_mu = data
.iter()
.zip(weights.iter())
.map(|(&x, &w)| x * w)
.sum::<f64>()
/ sum_w;
if (new_mu - mu).abs() < 1e-8 {
mu = new_mu;
break;
}
mu = new_mu;
}
mu
}
#[allow(dead_code)]
pub fn empirical_cdf(data: &[f64]) -> Vec<(f64, f64)> {
if data.is_empty() {
return vec![];
}
let n = data.len() as f64;
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
sorted
.iter()
.enumerate()
.map(|(i, &x)| (x, (i as f64 + 1.0) / n))
.collect()
}
#[allow(dead_code)]
pub fn ecdf_at(data: &[f64], x: f64) -> f64 {
if data.is_empty() {
return 0.0;
}
let count = data.iter().filter(|&&v| v <= x).count();
count as f64 / data.len() as f64
}
#[allow(dead_code)]
pub fn anderson_darling_statistic(data: &[f64], cdf: impl Fn(f64) -> f64) -> f64 {
let n = data.len();
if n < 2 {
return 0.0;
}
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let nf = n as f64;
let s: f64 = sorted
.iter()
.enumerate()
.map(|(i, &x)| {
let fi = cdf(x).clamp(1e-15, 1.0 - 1e-15);
let fn_i = cdf(*sorted
.iter()
.rev()
.nth(i)
.expect("sorted has enough elements"))
.clamp(1e-15, 1.0 - 1e-15);
(2.0 * (i as f64 + 1.0) - 1.0) * (fi.ln() + fn_i.ln())
})
.sum();
-nf - s / nf
}
#[allow(dead_code)]
pub fn kruskal_wallis_h(groups: &[&[f64]]) -> f64 {
let k = groups.len();
if k < 2 {
return 0.0;
}
let mut all: Vec<(f64, usize)> = groups
.iter()
.enumerate()
.flat_map(|(g, &data)| data.iter().map(move |&x| (x, g)))
.collect();
all.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let n = all.len() as f64;
let mut ranks = vec![0.0_f64; all.len()];
let mut i = 0;
while i < all.len() {
let mut j = i;
while j + 1 < all.len() && (all[j + 1].0 - all[i].0).abs() < f64::EPSILON {
j += 1;
}
let avg_rank = (i + j) as f64 / 2.0 + 1.0;
for r in &mut ranks[i..=j] {
*r = avg_rank;
}
i = j + 1;
}
let mut rank_sums = vec![0.0_f64; k];
let mut group_sizes = vec![0usize; k];
for (r, (_, g)) in ranks.iter().zip(all.iter()) {
rank_sums[*g] += *r;
group_sizes[*g] += 1;
}
let h_numerator: f64 = rank_sums
.iter()
.zip(group_sizes.iter())
.filter(|&(_, &sz)| sz > 0)
.map(|(&rs, &sz)| rs * rs / sz as f64)
.sum();
12.0 / (n * (n + 1.0)) * h_numerator - 3.0 * (n + 1.0)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ExponentialDistribution;
use crate::KernelDensityEstimate;
use crate::KernelDensityEstimate2D;
use crate::PoissonDistribution;
use crate::SlidingWindowStats;
use crate::WelfordOnline;
#[test]
fn test_mean_known() {
let data = [2.0, 4.0, 6.0, 8.0];
assert!((mean(&data) - 5.0).abs() < 1e-12);
}
#[test]
fn test_variance_known() {
let data = [1.0, 3.0, 5.0];
assert!((variance(&data) - 4.0).abs() < 1e-12);
}
#[test]
fn test_median_odd() {
let data = [3.0, 1.0, 4.0, 1.0, 5.0];
assert!((median(&data) - 3.0).abs() < 1e-12);
}
#[test]
fn test_median_even() {
let data = [1.0, 2.0, 3.0, 4.0];
assert!((median(&data) - 2.5).abs() < 1e-12);
}
#[test]
fn test_normal_pdf_at_mean() {
let nd = NormalDistribution::new(0.0, 1.0);
assert!(nd.pdf(0.0) > 0.0);
assert!((nd.pdf(0.0) - 0.3989422802).abs() < 1e-8);
}
#[test]
fn test_normal_cdf_at_mean() {
let nd = NormalDistribution::new(0.0, 1.0);
assert!((nd.cdf(0.0) - 0.5).abs() < 1e-6);
}
#[test]
fn test_exponential_sample_mean() {
let dist = ExponentialDistribution::new(2.0);
let mut rng = StatRng::new(42);
let samples: Vec<f64> = (0..10_000).map(|_| dist.sample(&mut rng)).collect();
let m = mean(&samples);
assert!(
(m - 0.5).abs() < 0.05,
"exponential mean {m} not close to 0.5"
);
}
#[test]
fn test_poisson_pmf_sums_to_one() {
let dist = PoissonDistribution::new(3.0);
let total: f64 = (0u64..50).map(|k| dist.pmf(k)).sum();
assert!((total - 1.0).abs() < 1e-6);
}
#[test]
fn test_kde_positive_at_data_point() {
let data = vec![1.0, 2.0, 3.0];
let kde = KernelDensityEstimate::new(data, 0.5);
assert!(kde.evaluate(2.0) > 0.0);
}
#[test]
fn test_histogram_counts_sum() {
let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
let bins = histogram(&data, 10);
let total: usize = bins.iter().map(|(_, _, c)| c).sum();
assert_eq!(total, data.len());
}
#[test]
fn test_correlation_perfect() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
assert!((correlation(&x, &y) - 1.0).abs() < 1e-12);
}
#[test]
fn test_mean_empty() {
assert_eq!(mean(&[]), 0.0);
}
#[test]
fn test_variance_single() {
assert_eq!(variance(&[42.0]), 0.0);
}
#[test]
fn test_std_dev() {
let data = [1.0, 3.0, 5.0];
assert!((std_dev(&data) - 2.0).abs() < 1e-12);
}
#[test]
fn test_covariance_perfect() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
assert!((covariance(&x, &y) - 5.0).abs() < 1e-12);
}
#[test]
fn test_pearson_constant() {
let x = [1.0, 1.0, 1.0];
let y = [1.0, 2.0, 3.0];
assert_eq!(pearson_correlation(&x, &y), 0.0);
}
#[test]
fn test_histogram_empty() {
assert!(histogram(&[], 5).is_empty());
}
#[test]
fn test_histogram_zero_bins() {
let data = [1.0, 2.0, 3.0];
assert!(histogram(&data, 0).is_empty());
}
#[test]
fn test_running_average() {
let mut avg = running_average(3);
assert!((avg(1.0) - 1.0).abs() < 1e-12);
assert!((avg(2.0) - 1.5).abs() < 1e-12);
assert!((avg(3.0) - 2.0).abs() < 1e-12);
assert!((avg(4.0) - 3.0).abs() < 1e-12);
}
#[test]
fn test_running_average_zero_window() {
let mut avg = running_average(0);
assert_eq!(avg(5.0), 0.0);
}
#[test]
fn test_block_average_consistent_with_mean() {
let data: Vec<f64> = (1..=20).map(|i| i as f64).collect();
let (grand_mean, _) = block_average(&data, 4);
assert!((grand_mean - mean(&data)).abs() < 1e-10);
}
#[test]
fn test_block_average_empty() {
assert_eq!(block_average(&[], 4), (0.0, 0.0));
}
#[test]
fn test_autocorrelation_time_iid() {
let data: Vec<f64> = (0..200)
.map(|i| if i % 2 == 0 { 1.0 } else { -1.0 })
.collect();
let tau = autocorrelation_time(&data, 10);
assert!((tau - 1.0).abs() < 1e-10);
}
#[test]
fn test_boltzmann_factor_infinite_temp() {
let result = boltzmann_factor(100.0, 1e300, 1.0);
assert!((result - 1.0).abs() < 1e-6);
}
#[test]
fn test_boltzmann_factor_zero_energy() {
assert!((boltzmann_factor(0.0, 300.0, 1.38e-23) - 1.0).abs() < 1e-12);
}
#[test]
fn test_partition_function() {
let energies = [0.0, 1.0];
let z = partition_function(&energies, 1.0, 1.0);
let expected = 1.0 + (-1.0_f64).exp();
assert!((z - expected).abs() < 1e-12);
}
#[test]
fn test_free_energy_from_partition() {
let z = 1.0_f64.exp();
let f = free_energy_from_partition(z, 1.0, 1.0);
assert!((f - (-1.0)).abs() < 1e-12);
}
#[test]
fn test_maxwell_boltzmann_speed() {
assert!((maxwell_boltzmann_speed(2.0, 1.0, 1.0) - 1.0).abs() < 1e-12);
}
#[test]
fn test_correlation_matrix_identity_inputs() {
let data: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64, i as f64]).collect();
let cm = correlation_matrix(&data);
assert_eq!(cm.len(), 2);
assert!(
(cm[0][1] - 1.0).abs() < 1e-8,
"identical variables should correlate 1"
);
}
#[test]
fn test_correlation_matrix_orthogonal() {
let data: Vec<Vec<f64>> = (0..5)
.map(|i| vec![i as f64, if i % 2 == 0 { 1.0 } else { -1.0 }])
.collect();
let cm = correlation_matrix(&data);
assert!(cm[0][0] - 1.0 < 1e-10);
assert!(cm[1][1] - 1.0 < 1e-10);
}
#[test]
fn test_covariance_matrix_diagonal() {
let data: Vec<Vec<f64>> = vec![vec![1.0, 10.0], vec![2.0, 10.0], vec![3.0, 10.0]];
let cov = covariance_matrix(&data);
assert!(cov[0][0] > 0.0, "variance of first var should be positive");
assert!(
(cov[1][1]).abs() < 1e-10,
"constant second var should have zero variance"
);
assert!(
(cov[0][1]).abs() < 1e-10,
"covariance with constant should be 0"
);
}
#[test]
fn test_pca_returns_components() {
let data: Vec<Vec<f64>> = (0..20).map(|i| vec![i as f64, 2.0 * i as f64]).collect();
let result = pca(&data, 2).expect("PCA should succeed");
assert_eq!(result.components.len(), 2);
assert_eq!(result.mean.len(), 2);
assert_eq!(result.explained_variance.len(), 2);
assert!(result.explained_variance[0] >= result.explained_variance[1] - 1e-6);
}
#[test]
fn test_pca_transform_shape() {
let data: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64, -i as f64, 0.0]).collect();
let result = pca(&data, 2).unwrap();
let scores = pca_transform(&[1.0, 2.0, 3.0], &result);
assert_eq!(scores.len(), 2);
}
#[test]
fn test_acf_lag0_is_one() {
let data: Vec<f64> = (0..20).map(|i| i as f64).collect();
let a = acf(&data, 5);
assert!((a[0] - 1.0).abs() < 1e-12, "ACF lag 0 should be 1");
}
#[test]
fn test_acf_iid_near_zero_high_lag() {
let data: Vec<f64> = (0..100)
.map(|i| if i % 2 == 0 { 1.0 } else { -1.0 })
.collect();
let a = acf(&data, 2);
assert_eq!(a.len(), 3);
assert!(
a[1] < -0.9,
"ACF lag 1 for alternating series should be near -1, got {}",
a[1]
);
}
#[test]
fn test_acf_constant_returns_ones() {
let data = vec![5.0; 10];
let a = acf(&data, 3);
for v in &a {
assert!((*v - 1.0).abs() < 1e-10 || *v == 1.0);
}
}
#[test]
fn test_pacf_lag0_is_one() {
let data: Vec<f64> = (0..20).map(|i| i as f64).collect();
let p = pacf(&data, 3);
assert!((p[0] - 1.0).abs() < 1e-12, "PACF lag 0 should be 1");
}
#[test]
fn test_pacf_length() {
let data: Vec<f64> = (0..30).map(|i| i as f64).collect();
let p = pacf(&data, 5);
assert_eq!(p.len(), 6, "PACF should have max_lag+1 entries");
}
#[test]
fn test_bootstrap_ci_contains_true_mean() {
let data: Vec<f64> = (0..50).map(|i| i as f64).collect();
let true_mean = mean(&data);
let mut rng = StatRng::new(42);
let (lo, hi) = bootstrap_ci(&data, mean, 1000, 0.05, &mut rng);
assert!(
lo <= true_mean && true_mean <= hi,
"bootstrap CI [{lo}, {hi}] should contain true mean {true_mean}"
);
}
#[test]
fn test_bootstrap_se_positive() {
let data: Vec<f64> = (0..30).map(|i| i as f64).collect();
let mut rng = StatRng::new(123);
let se = bootstrap_se(&data, mean, 500, &mut rng);
assert!(se > 0.0, "bootstrap SE should be positive");
}
#[test]
fn test_bootstrap_ci_empty() {
let mut rng = StatRng::new(1);
let (lo, hi) = bootstrap_ci(&[], mean, 100, 0.05, &mut rng);
assert_eq!((lo, hi), (0.0, 0.0));
}
#[test]
fn test_kde2d_positive_at_data_point() {
let data = vec![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]];
let kde = KernelDensityEstimate2D::new(data, 0.5, 0.5);
assert!(
kde.evaluate(1.0, 1.0) > 0.0,
"KDE should be positive at data point"
);
}
#[test]
fn test_kde2d_optimal_bandwidths() {
let data: Vec<[f64; 2]> = (0..20).map(|i| [i as f64, i as f64 * 2.0]).collect();
let (bwx, bwy) = KernelDensityEstimate2D::optimal_bandwidths(&data);
assert!(bwx > 0.0 && bwy > 0.0, "bandwidths should be positive");
}
#[test]
fn test_kde2d_empty_returns_zero() {
let kde = KernelDensityEstimate2D::new(vec![], 0.5, 0.5);
assert_eq!(kde.evaluate(0.0, 0.0), 0.0);
}
#[test]
fn test_t_test_two_sample_equal_means() {
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let (t, p) = t_test_two_sample(&x, &y);
assert!((t).abs() < 1e-10, "t should be 0 for equal samples");
assert!(p > 0.9, "p should be high for equal samples, got {p}");
}
#[test]
fn test_t_test_two_sample_different_means() {
let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
let y: Vec<f64> = (100..120).map(|i| i as f64).collect();
let (t, p) = t_test_two_sample(&x, &y);
assert!(
t < -1.0,
"t should be very negative for well-separated samples"
);
assert!(p < 0.01, "p should be very small, got {p}");
}
#[test]
fn test_mann_whitney_u_symmetric() {
let x = vec![1.0, 2.0, 3.0];
let y = vec![4.0, 5.0, 6.0];
let u_xy = mann_whitney_u(&x, &y);
let u_yx = mann_whitney_u(&y, &x);
assert!(
(u_xy + u_yx - 9.0).abs() < 1e-10,
"U_xy + U_yx should equal nx*ny=9"
);
}
#[test]
fn test_wilcoxon_signed_rank_positive_diffs() {
let x = vec![2.0, 3.0, 4.0];
let y = vec![1.0, 1.0, 1.0];
let w = wilcoxon_signed_rank(&x, &y);
assert!(
(w - 6.0).abs() < 1e-10,
"W+ = 6 for all positive diffs, got {w}"
);
}
#[test]
fn test_shapiro_wilk_w_normal_data() {
let data = vec![-1.5, -1.2, -0.8, -0.4, -0.1, 0.1, 0.4, 0.8, 1.2, 1.5];
let w = shapiro_wilk_w(&data);
assert!(w > 0.0, "W should be positive for any data, got {w}");
assert!(w.is_finite(), "W should be finite, got {w}");
}
#[test]
fn test_shapiro_wilk_w_constant_data() {
let data = vec![3.0; 5];
let w = shapiro_wilk_w(&data);
assert_eq!(w, 1.0);
}
#[test]
fn test_quartiles_known() {
let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
let (q1, q2, q3) = quartiles(&data);
assert!((q2 - 4.0).abs() < 1e-10, "median should be 4, got {q2}");
assert!(q1 < q2, "Q1 < Q2");
assert!(q2 < q3, "Q2 < Q3");
}
#[test]
fn test_quartiles_empty() {
assert_eq!(quartiles(&[]), (0.0, 0.0, 0.0));
}
#[test]
fn test_iqr_known() {
let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
let v = iqr(&data);
assert!(v > 0.0, "IQR should be positive");
}
#[test]
fn test_iqr_constant() {
let data = vec![5.0; 10];
assert!((iqr(&data)).abs() < 1e-10);
}
#[test]
fn test_spearman_perfect_positive() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
assert!((spearman_correlation(&x, &y) - 1.0).abs() < 1e-10);
}
#[test]
fn test_spearman_perfect_negative() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [5.0, 4.0, 3.0, 2.0, 1.0];
assert!((spearman_correlation(&x, &y) + 1.0).abs() < 1e-10);
}
#[test]
fn test_spearman_with_ties() {
let x = [1.0, 1.0, 2.0, 3.0];
let y = [1.0, 2.0, 3.0, 4.0];
let r = spearman_correlation(&x, &y);
assert!(
r > 0.0 && r <= 1.0,
"Spearman with ties should be in (0,1], got {r}"
);
}
#[test]
fn test_linear_regression_perfect_fit() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
let (slope, intercept, r2) = linear_regression(&x, &y);
assert!(
(slope - 2.0).abs() < 1e-10,
"slope should be 2, got {slope}"
);
assert!(
intercept.abs() < 1e-10,
"intercept should be 0, got {intercept}"
);
assert!((r2 - 1.0).abs() < 1e-10, "R2 should be 1, got {r2}");
}
#[test]
fn test_linear_regression_horizontal() {
let x = [1.0, 2.0, 3.0];
let y = [5.0, 5.0, 5.0];
let (slope, intercept, _r2) = linear_regression(&x, &y);
assert!(slope.abs() < 1e-10, "slope should be 0 for constant y");
assert!((intercept - 5.0).abs() < 1e-10, "intercept should be 5");
}
#[test]
fn test_linear_regression_r2_nonnegative() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [1.0, 4.0, 9.0, 16.0, 25.0];
let (_, _, r2) = linear_regression(&x, &y);
assert!((0.0..=1.0).contains(&r2), "R2 should be in [0,1], got {r2}");
}
#[test]
fn test_ks_one_sample_uniform() {
let data: Vec<f64> = (0..=10).map(|i| i as f64 / 10.0).collect();
let d = ks_test_one_sample(&data, |x| x.clamp(0.0, 1.0));
assert!(
d < 0.2,
"KS stat for uniform sample vs uniform CDF should be small, got {d}"
);
}
#[test]
fn test_ks_two_sample_identical() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let d = ks_test_two_sample(&data, &data);
assert!(d < 1e-10, "KS2 of identical samples should be 0, got {d}");
}
#[test]
fn test_ks_two_sample_different() {
let x = vec![0.0, 0.1, 0.2, 0.3, 0.4];
let y = vec![0.6, 0.7, 0.8, 0.9, 1.0];
let d = ks_test_two_sample(&x, &y);
assert!(
d > 0.5,
"KS2 for well-separated samples should be large, got {d}"
);
}
#[test]
fn test_chi_squared_statistic_uniform() {
let observed = [10u64; 5];
let expected = [10.0_f64; 5];
let chi2 = chi_squared_statistic(&observed, &expected);
assert!(
chi2.abs() < 1e-10,
"chi2 should be 0 for perfect fit, got {chi2}"
);
}
#[test]
fn test_chi_squared_statistic_deviation() {
let observed = [20u64, 5];
let expected = [12.5_f64, 12.5];
let chi2 = chi_squared_statistic(&observed, &expected);
assert!(chi2 > 0.0, "chi2 should be positive for deviating counts");
}
#[test]
fn test_chi_squared_gof_df() {
let observed = [10.0, 20.0, 30.0];
let expected = [20.0, 20.0, 20.0];
let (_chi2, df) = chi_squared_gof(&observed, &expected);
assert_eq!(df, 2, "df = n - 1 = 2");
}
#[test]
fn test_welford_online_mean() {
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
let mut w = WelfordOnline::new();
for &x in &data {
w.update(x);
}
assert!(
(w.mean - 3.0).abs() < 1e-12,
"mean should be 3, got {}",
w.mean
);
}
#[test]
fn test_welford_online_variance() {
let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
let mut w = WelfordOnline::new();
for &x in &data {
w.update(x);
}
let expected_var = variance(&data);
assert!(
(w.variance() - expected_var).abs() < 1e-10,
"Welford variance {:.6} should match batch variance {:.6}",
w.variance(),
expected_var
);
}
#[test]
fn test_welford_online_single_observation() {
let mut w = WelfordOnline::new();
w.update(42.0);
assert!((w.mean - 42.0).abs() < 1e-12);
assert!(
w.variance().abs() < 1e-12,
"variance of single obs should be 0"
);
}
#[test]
fn test_welford_online_population_variance() {
let data = [1.0, 2.0, 3.0];
let mut w = WelfordOnline::new();
for &x in &data {
w.update(x);
}
let pop_var = w.population_variance();
assert!(
(pop_var - 2.0 / 3.0).abs() < 1e-10,
"pop var should be 2/3, got {pop_var}"
);
}
#[test]
fn test_welford_online_empty() {
let w = WelfordOnline::new();
assert_eq!(w.n, 0);
assert_eq!(w.variance(), 0.0);
assert_eq!(w.population_variance(), 0.0);
}
#[test]
fn test_sliding_window_mean() {
let mut sw = SlidingWindowStats::new(3);
sw.push(1.0);
sw.push(2.0);
sw.push(3.0);
assert!((sw.mean() - 2.0).abs() < 1e-12);
sw.push(4.0);
assert!((sw.mean() - 3.0).abs() < 1e-12);
}
#[test]
fn test_sliding_window_variance() {
let mut sw = SlidingWindowStats::new(4);
for x in [2.0, 4.0, 6.0, 8.0] {
sw.push(x);
}
let expected = variance(&[2.0, 4.0, 6.0, 8.0]);
assert!((sw.variance() - expected).abs() < 1e-10);
}
#[test]
fn test_sliding_window_empty() {
let sw = SlidingWindowStats::new(5);
assert!(sw.is_empty());
assert_eq!(sw.mean(), 0.0);
}
#[test]
fn test_pearson_r_alias() {
let x = [1.0, 2.0, 3.0];
let y = [3.0, 2.0, 1.0];
assert!((pearson_r(&x, &y) + 1.0).abs() < 1e-10);
}
#[test]
fn test_sample_skewness_symmetric() {
let data = [-2.0, -1.0, 0.0, 1.0, 2.0];
let s = sample_skewness(&data);
assert!(
s.abs() < 1e-10,
"symmetric data should have ~0 skewness, got {s}"
);
}
#[test]
fn test_sample_kurtosis_normal_approx() {
let data: Vec<f64> = (0..50).map(|i| i as f64 - 25.0).collect();
let k = sample_kurtosis(&data);
assert!(k.is_finite(), "kurtosis should be finite");
}
#[test]
fn test_welch_t_test_same_distribution() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [1.0, 2.0, 3.0, 4.0, 5.0];
let (t, _p) = welch_t_test(&x, &y);
assert!(t.abs() < 1e-10, "t should be ~0 for identical samples");
}
}