use statrs::distribution::Binomial;
use statrs::distribution::Continuous;
use statrs::distribution::ContinuousCDF;
use statrs::distribution::Discrete;
use statrs::distribution::DiscreteCDF;
use statrs::distribution::Normal;
use statrs::distribution::Uniform;
use statrs::statistics::Data;
use statrs::statistics::Distribution;
use statrs::statistics::Max;
use statrs::statistics::Min;
use statrs::statistics::OrderStatistics;
#[must_use]
pub fn mean(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
data.iter().sum::<f64>() / (data.len() as f64)
}
#[must_use]
pub fn variance(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mean_val = mean(data);
data.iter()
.map(|&val| (val - mean_val).powi(2))
.sum::<f64>()
/ (data.len() as f64)
}
#[must_use]
pub fn std_dev(data: &[f64]) -> f64 {
let data_vec: Vec<f64> = data.to_vec();
let _data_container = Data::new(data_vec);
let data_vec: Vec<f64> = data.to_vec();
let data_container = Data::new(data_vec);
data_container.std_dev().unwrap_or(f64::NAN)
}
pub fn median(data: &mut [f64]) -> f64 {
let mut data_container = Data::new(data);
data_container.median()
}
pub fn percentile(
data: &mut [f64],
p: f64,
) -> f64 {
let mut data_container = Data::new(data);
if p < 0.0 {
return f64::NAN;
}
#[allow(clippy::cast_sign_loss)]
data_container.percentile(p as usize)
}
#[must_use]
pub fn covariance(
data1: &[f64],
data2: &[f64],
) -> f64 {
let data1_vec = data1.to_vec();
let data2_vec = data2.to_vec();
let mean1 = mean(&data1_vec);
let mean2 = mean(&data2_vec);
let n = data1.len() as f64;
data1
.iter()
.zip(data2.iter())
.map(|(&x, &y)| (x - mean1) * (y - mean2))
.sum::<f64>()
/ (n - 1.0)
}
#[must_use]
pub fn correlation(
data1: &[f64],
data2: &[f64],
) -> f64 {
let cov = covariance(data1, data2);
let std_dev1 = std_dev(data1);
let std_dev2 = std_dev(data2);
cov / (std_dev1 * std_dev2)
}
pub struct NormalDist(Normal);
impl NormalDist {
#[must_use]
pub fn pdf(
&self,
x: f64,
) -> f64 {
self.0.pdf(x)
}
#[must_use]
pub fn cdf(
&self,
x: f64,
) -> f64 {
self.0.cdf(x)
}
}
pub struct UniformDist(Uniform);
impl UniformDist {
pub fn new(
min: f64,
max: f64,
) -> Result<Self, String> {
Uniform::new(min, max)
.map(UniformDist)
.map_err(|e| e.to_string())
}
#[must_use]
pub fn pdf(
&self,
x: f64,
) -> f64 {
self.0.pdf(x)
}
#[must_use]
pub fn cdf(
&self,
x: f64,
) -> f64 {
self.0.cdf(x)
}
}
pub struct BinomialDist(Binomial);
impl BinomialDist {
pub fn new(
n: u64,
p: f64,
) -> Result<Self, String> {
Binomial::new(p, n)
.map(BinomialDist)
.map_err(|e| e.to_string())
}
#[must_use]
pub fn pmf(
&self,
k: u64,
) -> f64 {
self.0.pmf(k)
}
#[must_use]
pub fn cdf(
&self,
k: u64,
) -> f64 {
self.0.cdf(k)
}
}
#[must_use]
pub fn simple_linear_regression(data: &[(f64, f64)]) -> (f64, f64) {
if data.is_empty() {
return (f64::NAN, f64::NAN);
}
let (xs, ys): (Vec<_>, Vec<_>) = data.iter().copied().unzip();
let mean_x = mean(&xs);
let mean_y = mean(&ys);
let numerator: f64 = xs
.iter()
.zip(ys.iter())
.map(|(&x, &y)| (x - mean_x) * (y - mean_y))
.sum();
let denominator: f64 = xs.iter().map(|&x| (x - mean_x).powi(2)).sum();
if denominator == 0.0 {
return (f64::NAN, mean_y);
}
let b1 = numerator / denominator;
let b0 = b1.mul_add(-mean_x, mean_y);
(b1, b0)
}
pub fn min(data: &mut [f64]) -> f64 {
let data_container = Data::new(data);
data_container.min()
}
pub fn max(data: &mut [f64]) -> f64 {
let data_container = Data::new(data);
data_container.max()
}
pub fn skewness(data: &mut [f64]) -> f64 {
let data_container = Data::new(data);
data_container.skewness().unwrap_or(f64::NAN)
}
pub fn kurtosis(data: &mut [f64]) -> f64 {
let n = data.len() as f64;
if n < 4.0 {
return f64::NAN;
}
let mean = mean(data);
let m2 = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
let m4 = data.iter().map(|&x| (x - mean).powi(4)).sum::<f64>() / n;
if m2 == 0.0 {
return 0.0;
}
let g2 = m4 / m2.powi(2) - 3.0;
let term1 = n.mul_add(n, -1.0) / ((n - 2.0) * (n - 3.0));
let term2 = (g2 + 3.0) - 3.0 * (n - 1.0).powi(2) / ((n - 2.0) * (n - 3.0));
term1 * term2
}
pub struct PoissonDist(statrs::distribution::Poisson);
impl PoissonDist {
pub fn new(rate: f64) -> Result<Self, String> {
statrs::distribution::Poisson::new(rate)
.map(PoissonDist)
.map_err(|e| e.to_string())
}
#[must_use]
pub fn pmf(
&self,
k: u64,
) -> f64 {
self.0.pmf(k)
}
#[must_use]
pub fn cdf(
&self,
k: u64,
) -> f64 {
self.0.cdf(k)
}
}
pub struct ExponentialDist(statrs::distribution::Exp);
impl ExponentialDist {
pub fn new(rate: f64) -> Result<Self, String> {
statrs::distribution::Exp::new(rate)
.map(ExponentialDist)
.map_err(|e| e.to_string())
}
#[must_use]
pub fn pdf(
&self,
x: f64,
) -> f64 {
self.0.pdf(x)
}
#[must_use]
pub fn cdf(
&self,
x: f64,
) -> f64 {
self.0.cdf(x)
}
}
pub struct GammaDist(statrs::distribution::Gamma);
impl GammaDist {
pub fn new(
shape: f64,
rate: f64,
) -> Result<Self, String> {
statrs::distribution::Gamma::new(shape, rate)
.map(GammaDist)
.map_err(|e| e.to_string())
}
#[must_use]
pub fn pdf(
&self,
x: f64,
) -> f64 {
self.0.pdf(x)
}
#[must_use]
pub fn cdf(
&self,
x: f64,
) -> f64 {
self.0.cdf(x)
}
}
pub fn one_way_anova(groups: &mut [&mut [f64]]) -> (f64, f64) {
let k = groups.len() as f64;
if k < 2.0 {
return (f64::NAN, f64::NAN);
}
let all_data: Vec<f64> = groups.iter().flat_map(|g| g.iter()).copied().collect();
let n_total = all_data.len() as f64;
let grand_mean = mean(&all_data);
let mut ss_between = 0.0;
for group in groups.iter_mut() {
let n_group = group.len() as f64;
let mean_group = mean(group);
ss_between += n_group * (mean_group - grand_mean).powi(2);
}
let df_between = k - 1.0;
let ms_between = ss_between / df_between;
let mut ss_within = 0.0;
for group in groups.iter_mut() {
let mean_group = mean(group);
ss_within += group.iter().map(|&x| (x - mean_group).powi(2)).sum::<f64>();
}
let df_within = n_total - k;
let ms_within = ss_within / df_within;
if ms_within == 0.0 {
return (f64::INFINITY, 0.0);
}
let f_stat = ms_between / ms_within;
let f_dist = match statrs::distribution::FisherSnedecor::new(df_between, df_within) {
| Ok(dist) => dist,
| Err(_) => return (f64::NAN, f64::NAN),
};
let p_value = 1.0 - f_dist.cdf(f_stat);
(f_stat, p_value)
}
#[must_use]
pub fn two_sample_t_test(
sample1: &[f64],
sample2: &[f64],
) -> (f64, f64) {
let n1 = sample1.len() as f64;
let n2 = sample2.len() as f64;
let sample1_vec = sample1.to_vec();
let sample2_vec = sample2.to_vec();
let mean1 = mean(&sample1_vec);
let mean2 = mean(&sample2_vec);
let var1 = variance(&sample1_vec);
let var2 = variance(&sample2_vec);
let s_p_sq = (n1 - 1.0).mul_add(var1, (n2 - 1.0) * var2) / (n1 + n2 - 2.0);
let t_stat = (mean1 - mean2) / (s_p_sq * (1.0 / n1 + 1.0 / n2)).sqrt();
let df = n1 + n2 - 2.0;
let t_dist = match statrs::distribution::StudentsT::new(0.0, 1.0, df) {
| Ok(dist) => dist,
| Err(_) => return (f64::NAN, f64::NAN),
};
let p_value = 2.0 * (1.0 - t_dist.cdf(t_stat.abs()));
(t_stat, p_value)
}
#[must_use]
pub fn shannon_entropy(probabilities: &[f64]) -> f64 {
probabilities
.iter()
.filter(|&&p| p > 0.0)
.map(|&p| -p * p.log2())
.sum()
}
#[must_use]
pub fn geometric_mean(data: &[f64]) -> f64 {
if data.is_empty() {
return f64::NAN;
}
let log_sum: f64 = data.iter().map(|&x| x.ln()).sum();
(log_sum / data.len() as f64).exp()
}
#[must_use]
pub fn harmonic_mean(data: &[f64]) -> f64 {
if data.is_empty() {
return f64::NAN;
}
let reciprocal_sum: f64 = data.iter().map(|&x| 1.0 / x).sum();
data.len() as f64 / reciprocal_sum
}
#[must_use]
pub fn range(data: &[f64]) -> f64 {
if data.is_empty() {
return f64::NAN;
}
let max_val = data.iter().copied().fold(f64::NEG_INFINITY, f64::max);
let min_val = data.iter().copied().fold(f64::INFINITY, f64::min);
max_val - min_val
}
pub fn iqr(data: &mut [f64]) -> f64 {
if data.len() < 4 {
return f64::NAN;
}
let mut data_container = Data::new(data);
let q1 = data_container.percentile(25);
let q3 = data_container.percentile(75);
q3 - q1
}
#[must_use]
pub fn z_scores(data: &[f64]) -> Vec<f64> {
if data.is_empty() {
return vec![];
}
let m = mean(data);
let s = std_dev(data);
if s == 0.0 || s.is_nan() {
return vec![0.0; data.len()];
}
data.iter().map(|&x| (x - m) / s).collect()
}
#[must_use]
pub fn mode(
data: &[f64],
decimal_places: u32,
) -> Option<f64> {
if data.is_empty() {
return None;
}
let factor = 10_f64.powi(decimal_places as i32);
let mut counts = std::collections::HashMap::new();
for &val in data {
let rounded = (val * factor).round() as i64;
*counts.entry(rounded).or_insert(0) += 1;
}
let max_count = *counts.values().max()?;
if max_count == 1 {
return None; }
let mode_key = counts.into_iter().find(|(_, v)| *v == max_count)?.0;
Some(mode_key as f64 / factor)
}
#[must_use]
pub fn welch_t_test(
sample1: &[f64],
sample2: &[f64],
) -> (f64, f64) {
let n1 = sample1.len() as f64;
let n2 = sample2.len() as f64;
if n1 < 2.0 || n2 < 2.0 {
return (f64::NAN, f64::NAN);
}
let mean1 = mean(sample1);
let mean2 = mean(sample2);
let var1 = variance(sample1);
let var2 = variance(sample2);
let s1_sq = var1 * n1 / (n1 - 1.0);
let s2_sq = var2 * n2 / (n2 - 1.0);
let se = (s1_sq / n1 + s2_sq / n2).sqrt();
if se == 0.0 {
return (f64::NAN, f64::NAN);
}
let t_stat = (mean1 - mean2) / se;
let num = (s1_sq / n1 + s2_sq / n2).powi(2);
let denom = (s1_sq / n1).powi(2) / (n1 - 1.0) + (s2_sq / n2).powi(2) / (n2 - 1.0);
let df = num / denom;
let t_dist = match statrs::distribution::StudentsT::new(0.0, 1.0, df) {
| Ok(dist) => dist,
| Err(_) => return (f64::NAN, f64::NAN),
};
let p_value = 2.0 * (1.0 - t_dist.cdf(t_stat.abs()));
(t_stat, p_value)
}
#[must_use]
pub fn chi_squared_test(
observed: &[f64],
expected: &[f64],
) -> (f64, f64) {
if observed.len() != expected.len() || observed.is_empty() {
return (f64::NAN, f64::NAN);
}
let chi_sq: f64 = observed
.iter()
.zip(expected.iter())
.map(|(&o, &e)| {
if e == 0.0 {
0.0
} else {
(o - e).powi(2) / e
}
})
.sum();
let df = (observed.len() - 1) as f64;
if df <= 0.0 {
return (chi_sq, f64::NAN);
}
let chi_dist = match statrs::distribution::ChiSquared::new(df) {
| Ok(dist) => dist,
| Err(_) => return (chi_sq, f64::NAN),
};
let p_value = 1.0 - chi_dist.cdf(chi_sq);
(chi_sq, p_value)
}
#[must_use]
pub fn coefficient_of_variation(data: &[f64]) -> f64 {
let m = mean(data);
if m == 0.0 {
return f64::NAN;
}
std_dev(data) / m
}
#[must_use]
pub fn standard_error(data: &[f64]) -> f64 {
if data.is_empty() {
return f64::NAN;
}
std_dev(data) / (data.len() as f64).sqrt()
}