use crate::distributions;
use crate::error::{StatsError, StatsResult};
use crate::tests::ttest::{Alternative, TTestResult};
use crate::{mean, std};
use scirs2_core::ndarray::ArrayView1;
use scirs2_core::numeric::{Float, NumCast};
pub mod anova;
pub mod chi2_test;
pub mod homogeneity;
#[cfg(test)]
mod homogeneity_tests;
pub mod nonparametric;
#[cfg(test)]
mod nonparametric_tests;
pub mod normality;
#[cfg(test)]
mod normality_tests;
pub mod ttest;
pub use anova::{one_way_anova, tukey_hsd};
pub use chi2_test::{chi2_gof, chi2_independence, chi2_yates};
pub use homogeneity::{bartlett, brown_forsythe, levene};
pub use nonparametric::{friedman, kruskal_wallis, mann_whitney, wilcoxon};
pub use normality::{anderson_darling, dagostino_k2, ks_2samp, shapiro_wilk};
pub use ttest::{
ttest_1samp as enhanced_ttest_1samp, ttest_ind as enhanced_ttest_ind, ttest_ind_from_stats,
ttest_rel as enhanced_ttest_rel,
};
#[allow(dead_code)]
pub fn ttest_1samp<F>(
x: &ArrayView1<F>,
popmean: F,
alternative: Alternative,
nan_policy: &str,
) -> StatsResult<TTestResult<F>>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ NumCast
+ std::marker::Send
+ std::marker::Sync
+ std::fmt::Display
+ 'static
+ scirs2_core::simd_ops::SimdUnifiedOps,
{
crate::tests::ttest::ttest_1samp(x, popmean, alternative, nan_policy)
}
#[allow(dead_code)]
pub fn ttest_ind<F>(
x: &ArrayView1<F>,
y: &ArrayView1<F>,
equal_var: bool,
_alternative: Alternative,
nan_policy: &str,
) -> StatsResult<TTestResult<F>>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ NumCast
+ std::marker::Send
+ std::marker::Sync
+ std::fmt::Display
+ 'static
+ scirs2_core::simd_ops::SimdUnifiedOps,
{
if x.is_empty() || y.is_empty() {
return Err(StatsError::InvalidArgument(
"Input arrays cannot be empty".to_string(),
));
}
let mean_x = mean(x)?;
let mean_y = mean(y)?;
let n_x = F::from(x.len()).expect("Test operation failed");
let n_y = F::from(y.len()).expect("Test operation failed");
let std_x = std(x, 1, None)?;
let std_y = std(y, 1, None)?;
let t_stat: F;
let df: F;
if equal_var {
let pooled_var = ((n_x - F::one()) * std_x * std_x + (n_y - F::one()) * std_y * std_y)
/ (n_x + n_y - F::from(2.0).expect("Failed to convert constant to float"));
let se = (pooled_var * (F::one() / n_x + F::one() / n_y)).sqrt();
t_stat = (mean_x - mean_y) / se;
df = n_x + n_y - F::from(2.0).expect("Failed to convert constant to float");
} else {
let var_x_over_n_x = (std_x * std_x) / n_x;
let var_y_over_n_y = (std_y * std_y) / n_y;
let se = (var_x_over_n_x + var_y_over_n_y).sqrt();
t_stat = (mean_x - mean_y) / se;
let numerator = (var_x_over_n_x + var_y_over_n_y).powi(2);
let denominator = (var_x_over_n_x.powi(2) / (n_x - F::one()))
+ (var_y_over_n_y.powi(2) / (n_y - F::one()));
df = numerator / denominator;
}
let t_dist = distributions::t(df, F::zero(), F::one())?;
let abs_t = t_stat.abs();
let p_value =
F::from(2.0).expect("Failed to convert constant to float") * (F::one() - t_dist.cdf(abs_t));
Ok(TTestResult {
statistic: t_stat,
pvalue: p_value,
df,
alternative: Alternative::TwoSided,
info: None,
})
}
#[allow(dead_code)]
pub fn ttest_rel<F>(
x: &ArrayView1<F>,
y: &ArrayView1<F>,
_alternative: Alternative,
nan_policy: &str,
) -> StatsResult<TTestResult<F>>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ NumCast
+ std::marker::Send
+ std::marker::Sync
+ std::fmt::Display
+ 'static
+ scirs2_core::simd_ops::SimdUnifiedOps,
{
if x.is_empty() || y.is_empty() {
return Err(StatsError::InvalidArgument(
"Input arrays cannot be empty".to_string(),
));
}
if x.len() != y.len() {
return Err(StatsError::DimensionMismatch(
"Input arrays must have the same length for paired t-test".to_string(),
));
}
let mut differences = Vec::with_capacity(x.len());
for i in 0..x.len() {
differences.push(x[i] - y[i]);
}
let diff_array = scirs2_core::ndarray::Array1::from(differences);
ttest_1samp(
&diff_array.view(),
F::zero(),
Alternative::TwoSided,
"propagate",
)
}
#[allow(dead_code)]
pub fn kstest<F, G>(x: &ArrayView1<F>, cdf: G) -> StatsResult<(F, F)>
where
F: Float + std::iter::Sum<F> + std::ops::Div<Output = F> + NumCast,
G: Fn(F) -> F,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Input array cannot be empty".to_string(),
));
}
let mut data = Vec::with_capacity(x.len());
for &value in x.iter() {
data.push(value);
}
data.sort_by(|a, b| a.partial_cmp(b).expect("Test operation failed"));
let n = F::from(data.len()).expect("Test operation failed");
let mut d_plus = F::zero();
let mut d_minus = F::zero();
for (i, &value) in data.iter().enumerate() {
let i_f = F::from(i + 1).expect("Failed to convert to float");
let ecdf = i_f / n;
let tcdf = cdf(value);
let current_d_plus = ecdf - tcdf;
if current_d_plus > d_plus {
d_plus = current_d_plus;
}
let current_d_minus = tcdf - (i_f - F::one()) / n;
if current_d_minus > d_minus {
d_minus = current_d_minus;
}
}
let ks_stat = if d_plus > d_minus { d_plus } else { d_minus };
let p_value = calculate_ks_p_value(ks_stat, n);
Ok((ks_stat, p_value))
}
#[allow(dead_code)]
fn calculate_ks_p_value<F: Float + NumCast>(_ksstat: F, n: F) -> F {
let n_effective = n;
let z = _ksstat * n_effective.sqrt();
if z < F::from(0.27).expect("Failed to convert constant to float") {
F::one()
} else if z < F::one() {
let z2 = z * z;
let z3 = z2 * z;
let z4 = z3 * z;
let z5 = z4 * z;
let z6 = z5 * z;
let p = F::one()
- F::from(2.506628275).expect("Failed to convert constant to float")
* (F::one() / z - F::from(1.0 / 3.0).expect("Failed to convert to float")
+ F::from(7.0 / 90.0).expect("Failed to convert to float") * z2
- F::from(2.0 / 105.0).expect("Failed to convert to float") * z3
+ F::from(2.0 / 1575.0).expect("Failed to convert to float") * z4
- F::from(2.0 / 14175.0).expect("Failed to convert to float") * z5
+ F::from(2.0 / 467775.0).expect("Failed to convert to float") * z6);
p.max(F::zero())
} else {
let z2 = z * z;
let two = F::from(2.0).expect("Failed to convert constant to float");
let mut p = two * (-z2).exp();
p = p.min(F::one()).max(F::zero());
p
}
}
#[allow(dead_code)]
pub fn shapiro<F>(x: &ArrayView1<F>) -> StatsResult<(F, F)>
where
F: Float + std::iter::Sum<F> + std::ops::Div<Output = F> + NumCast + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Input array cannot be empty".to_string(),
));
}
if x.len() < 3 {
return Err(StatsError::InvalidArgument(
"Sample size must be at least 3 for the Shapiro-Wilk test".to_string(),
));
}
if x.len() > 5000 {
return Err(StatsError::InvalidArgument(
"Sample too large for Shapiro-Wilk test (max size is 5000)".to_string(),
));
}
let mut data = Vec::with_capacity(x.len());
for &value in x.iter() {
data.push(value);
}
data.sort_by(|a, b| a.partial_cmp(b).expect("Test operation failed"));
let mean =
data.iter().cloned().sum::<F>() / F::from(data.len()).expect("Test operation failed");
let ssq = data.iter().map(|&x| (x - mean).powi(2)).sum::<F>();
let n = data.len();
let a = get_shapiro_coefficients(n)?;
let mut numerator = F::zero();
for i in 0..n / 2 {
let a_i = F::from(a[i]).expect("Failed to convert to float");
numerator = numerator + a_i * (data[n - 1 - i] - data[i]);
}
let w = numerator.powi(2) / ssq;
let p = calculate_shapiro_p_value(w, n);
Ok((w, p))
}
#[allow(dead_code)]
fn get_shapiro_coefficients(n: usize) -> StatsResult<Vec<f64>> {
if n > 50 {
return Err(StatsError::InvalidArgument(
"Simplified Shapiro-Wilk test only implemented for n <= 50".to_string(),
));
}
let mut coeffs = vec![0.0; n / 2];
let n_f = n as f64;
for (i, coef) in coeffs.iter_mut().enumerate().take(n / 2) {
let i_f = (i + 1) as f64;
let m = (i_f - 0.375) / (n_f + 0.25);
let inverse_normal = if m <= 0.5 {
let t = (m * m).sqrt();
-0.322232431088 * t
+ 0.9982 * t.powi(2)
+ -0.342242088547 * t.powi(3)
+ 0.0038560700634 * t.powi(4)
} else {
let t = ((1.0 - m) * (1.0 - m)).sqrt();
0.322232431088 * t - 0.9982 * t.powi(2) + 0.342242088547 * t.powi(3)
- 0.0038560700634 * t.powi(4)
};
*coef = inverse_normal;
}
let sum_sq = coeffs.iter().map(|&c| c * c).sum::<f64>().sqrt();
for coef in &mut coeffs {
*coef /= sum_sq;
}
Ok(coeffs)
}
#[allow(dead_code)]
fn calculate_shapiro_p_value<F: Float + NumCast>(w: F, n: usize) -> F {
let n_f = F::from(n as f64).expect("Failed to convert to float");
let w_f = <f64 as NumCast>::from(w).expect("Test operation failed");
let y = (1.0 - w_f).ln();
let _gamma = F::from(0.459).expect("Failed to convert constant to float")
* n_f.powf(F::from(-2.0).expect("Failed to convert constant to float"))
- F::from(2.273).expect("Failed to convert constant to float")
* n_f.powf(F::from(-1.0).expect("Failed to convert constant to float"));
let mu = F::from(-0.0006714).expect("Failed to convert constant to float")
* n_f.powf(F::from(3.0).expect("Failed to convert constant to float"))
+ F::from(0.025054).expect("Failed to convert constant to float")
* n_f.powf(F::from(2.0).expect("Failed to convert constant to float"))
- F::from(0.39978).expect("Failed to convert constant to float") * n_f
+ F::from(0.5440).expect("Failed to convert constant to float");
let sigma = (F::from(-0.0020322).expect("Failed to convert constant to float")
* n_f.powf(F::from(2.0).expect("Failed to convert constant to float"))
+ F::from(0.1348).expect("Failed to convert constant to float") * n_f
+ F::from(0.029184).expect("Failed to convert constant to float"))
.exp();
let z = (F::from(y).expect("Failed to convert to float") - mu) / sigma;
let z_f64 = <f64 as NumCast>::from(z).expect("Test operation failed");
let p_value = if z_f64 < 0.0 {
let abs_z = -z_f64;
let t = 1.0 / (1.0 + 0.2316419 * abs_z);
let poly = t
* (0.319381530
+ t * (-0.356563782 + t * (1.781477937 + t * (-1.821255978 + t * 1.330274429))));
let cdf = 1.0 - 0.39894228 * (-0.5 * abs_z * abs_z).exp() * poly;
F::from(cdf).expect("Failed to convert to float")
} else {
let t = 1.0 / (1.0 + 0.2316419 * z_f64);
let poly = t
* (0.319381530
+ t * (-0.356563782 + t * (1.781477937 + t * (-1.821255978 + t * 1.330274429))));
let cdf = 0.39894228 * (-0.5 * z_f64 * z_f64).exp() * poly;
F::from(1.0 - cdf).expect("Failed to convert to float")
};
p_value.min(F::one()).max(F::zero())
}
#[allow(dead_code)]
fn calculate_mann_whitney_p_value<F: Float + NumCast>(u: F, n1: usize, n2: usize) -> F {
let n1_f = F::from(n1).expect("Failed to convert to float");
let n2_f = F::from(n2).expect("Failed to convert to float");
let mu_u = n1_f * n2_f / F::from(2.0).expect("Failed to convert constant to float");
let sigma_u = (n1_f * n2_f * (n1_f + n2_f + F::one())
/ F::from(12.0).expect("Failed to convert constant to float"))
.sqrt();
let z = (u - mu_u).abs() / sigma_u;
let z_f64 = <f64 as NumCast>::from(z).expect("Test operation failed");
let cdf = if z_f64 < 0.0 {
0.5
} else {
let t = 1.0 / (1.0 + 0.2316419 * z_f64);
let poly = t
* (0.319381530
+ t * (-0.356563782 + t * (1.781477937 + t * (-1.821255978 + t * 1.330274429))));
1.0 - 0.5 * 0.39894228 * (-0.5 * z_f64 * z_f64).exp() * poly
};
let p_value = F::from(2.0 * (1.0 - cdf)).expect("Test operation failed");
p_value.min(F::one()).max(F::zero())
}