use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::ArrayView1;
use scirs2_core::numeric::{Float, NumCast};
#[derive(Debug, Clone)]
pub struct EffectSizeResult<F: Float> {
pub estimate: F,
pub measure: String,
pub ci_lower: Option<F>,
pub ci_upper: Option<F>,
pub confidence_level: Option<F>,
}
fn normal_quantile(p: f64) -> f64 {
if p <= 0.0 {
return f64::NEG_INFINITY;
}
if p >= 1.0 {
return f64::INFINITY;
}
if (p - 0.5).abs() < 1e-15 {
return 0.0;
}
let a = [
-3.969683028665376e+01,
2.209460984245205e+02,
-2.759285104469687e+02,
1.383577518672690e+02,
-3.066479806614716e+01,
2.506628277459239e+00,
];
let b = [
-5.447609879822406e+01,
1.615858368580409e+02,
-1.556989798598866e+02,
6.680131188771972e+01,
-1.328068155288572e+01,
];
let c = [
-7.784894002430293e-03,
-3.223964580411365e-01,
-2.400758277161838e+00,
-2.549732539343734e+00,
4.374664141464968e+00,
2.938163982698783e+00,
];
let d = [
7.784695709041462e-03,
3.224671290700398e-01,
2.445134137142996e+00,
3.754408661907416e+00,
];
let p_low = 0.02425;
let p_high = 1.0 - p_low;
if p < p_low {
let q = (-2.0 * p.ln()).sqrt();
(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
/ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
} else if p <= p_high {
let q = p - 0.5;
let r = q * q;
(((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q
/ (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1.0)
} else {
let q = (-2.0 * (1.0 - p).ln()).sqrt();
-(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
/ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
}
}
fn normal_cdf_f64(x: f64) -> f64 {
if x < -8.0 {
return 0.0;
}
if x > 8.0 {
return 1.0;
}
let abs_x = x.abs();
let t = 1.0 / (1.0 + 0.2316419 * abs_x);
let d = 0.39894228040143268 * (-0.5 * x * x).exp();
let p = t
* (0.319381530
+ t * (-0.356563782 + t * (1.781477937 + t * (-1.821255978 + t * 1.330274429))));
if x >= 0.0 {
1.0 - d * p
} else {
d * p
}
}
pub fn cohens_d<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<EffectSizeResult<F>>
where
F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
{
if x.len() < 2 || y.len() < 2 {
return Err(StatsError::InvalidArgument(
"Both samples must have at least 2 observations for Cohen's d".to_string(),
));
}
let (mean_x, var_x, n1) = sample_stats(x)?;
let (mean_y, var_y, n2) = sample_stats(y)?;
let pooled_var = ((n1 - 1.0) * var_x + (n2 - 1.0) * var_y) / (n1 + n2 - 2.0);
let pooled_sd = pooled_var.sqrt();
if pooled_sd < 1e-30 {
return Err(StatsError::ComputationError(
"Pooled standard deviation is zero; Cohen's d is undefined".to_string(),
));
}
let d = (mean_x - mean_y) / pooled_sd;
Ok(EffectSizeResult {
estimate: F::from(d).unwrap_or(F::zero()),
measure: "Cohen's d".to_string(),
ci_lower: None,
ci_upper: None,
confidence_level: None,
})
}
pub fn hedges_g<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<EffectSizeResult<F>>
where
F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
{
if x.len() < 2 || y.len() < 2 {
return Err(StatsError::InvalidArgument(
"Both samples must have at least 2 observations for Hedges' g".to_string(),
));
}
let d_result = cohens_d(x, y)?;
let d = <f64 as NumCast>::from(d_result.estimate).unwrap_or(0.0);
let df = (x.len() + y.len() - 2) as f64;
let j = 1.0 - 3.0 / (4.0 * df - 1.0);
let g = d * j;
Ok(EffectSizeResult {
estimate: F::from(g).unwrap_or(F::zero()),
measure: "Hedges' g".to_string(),
ci_lower: None,
ci_upper: None,
confidence_level: None,
})
}
pub fn glass_delta<F>(
treatment: &ArrayView1<F>,
control: &ArrayView1<F>,
) -> StatsResult<EffectSizeResult<F>>
where
F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
{
if treatment.len() < 2 || control.len() < 2 {
return Err(StatsError::InvalidArgument(
"Both samples must have at least 2 observations for Glass's delta".to_string(),
));
}
let (mean_t, _, _) = sample_stats(treatment)?;
let (mean_c, var_c, _) = sample_stats(control)?;
let sd_c = var_c.sqrt();
if sd_c < 1e-30 {
return Err(StatsError::ComputationError(
"Control group standard deviation is zero; Glass's delta is undefined".to_string(),
));
}
let delta = (mean_t - mean_c) / sd_c;
Ok(EffectSizeResult {
estimate: F::from(delta).unwrap_or(F::zero()),
measure: "Glass's delta".to_string(),
ci_lower: None,
ci_upper: None,
confidence_level: None,
})
}
pub fn point_biserial_correlation<F>(
binary: &ArrayView1<F>,
continuous: &ArrayView1<F>,
) -> StatsResult<EffectSizeResult<F>>
where
F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
{
if binary.len() != continuous.len() {
return Err(StatsError::DimensionMismatch(
"Binary and continuous arrays must have equal length".to_string(),
));
}
if binary.len() < 3 {
return Err(StatsError::InvalidArgument(
"At least 3 observations required for point-biserial correlation".to_string(),
));
}
let n = binary.len();
let nf = n as f64;
let mut group0: Vec<f64> = Vec::new();
let mut group1: Vec<f64> = Vec::new();
for i in 0..n {
let b = <f64 as NumCast>::from(binary[i]).unwrap_or(0.0);
let c = <f64 as NumCast>::from(continuous[i]).unwrap_or(0.0);
if (b - 0.0).abs() < 1e-10 {
group0.push(c);
} else if (b - 1.0).abs() < 1e-10 {
group1.push(c);
} else {
return Err(StatsError::InvalidArgument(format!(
"Binary variable must contain only 0 and 1, got {} at index {}",
b, i
)));
}
}
if group0.is_empty() || group1.is_empty() {
return Err(StatsError::InvalidArgument(
"Both groups must have at least one observation".to_string(),
));
}
let n0 = group0.len() as f64;
let n1 = group1.len() as f64;
let mean0: f64 = group0.iter().sum::<f64>() / n0;
let mean1: f64 = group1.iter().sum::<f64>() / n1;
let overall_mean: f64 = continuous
.iter()
.map(|&v| <f64 as NumCast>::from(v).unwrap_or(0.0))
.sum::<f64>()
/ nf;
let overall_var: f64 = continuous
.iter()
.map(|&v| {
let vf = <f64 as NumCast>::from(v).unwrap_or(0.0);
(vf - overall_mean) * (vf - overall_mean)
})
.sum::<f64>()
/ (nf - 1.0);
let overall_sd = overall_var.sqrt();
if overall_sd < 1e-30 {
return Err(StatsError::ComputationError(
"Standard deviation is zero; point-biserial correlation is undefined".to_string(),
));
}
let rpb = (mean1 - mean0) / overall_sd * (n0 * n1).sqrt() / nf;
Ok(EffectSizeResult {
estimate: F::from(rpb).unwrap_or(F::zero()),
measure: "Point-biserial correlation".to_string(),
ci_lower: None,
ci_upper: None,
confidence_level: None,
})
}
pub fn eta_squared<F>(groups: &[ArrayView1<F>]) -> StatsResult<EffectSizeResult<F>>
where
F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
{
if groups.len() < 2 {
return Err(StatsError::InvalidArgument(
"At least 2 groups required for eta-squared".to_string(),
));
}
for (i, g) in groups.iter().enumerate() {
if g.is_empty() {
return Err(StatsError::InvalidArgument(format!("Group {} is empty", i)));
}
}
let (ss_between, ss_total) = compute_ss(groups)?;
if ss_total < 1e-30 {
return Err(StatsError::ComputationError(
"Total sum of squares is zero; eta-squared is undefined".to_string(),
));
}
let eta2 = ss_between / ss_total;
Ok(EffectSizeResult {
estimate: F::from(eta2).unwrap_or(F::zero()),
measure: "Eta-squared".to_string(),
ci_lower: None,
ci_upper: None,
confidence_level: None,
})
}
pub fn partial_eta_squared<F>(ss_effect: F, ss_error: F) -> StatsResult<EffectSizeResult<F>>
where
F: Float + NumCast + std::fmt::Display,
{
let ef = <f64 as NumCast>::from(ss_effect).unwrap_or(0.0);
let er = <f64 as NumCast>::from(ss_error).unwrap_or(0.0);
if ef < 0.0 || er < 0.0 {
return Err(StatsError::InvalidArgument(
"Sums of squares must be non-negative".to_string(),
));
}
let denom = ef + er;
if denom < 1e-30 {
return Err(StatsError::ComputationError(
"SS_effect + SS_error is zero; partial eta-squared is undefined".to_string(),
));
}
let partial_eta2 = ef / denom;
Ok(EffectSizeResult {
estimate: F::from(partial_eta2).unwrap_or(F::zero()),
measure: "Partial eta-squared".to_string(),
ci_lower: None,
ci_upper: None,
confidence_level: None,
})
}
pub fn omega_squared<F>(groups: &[ArrayView1<F>]) -> StatsResult<EffectSizeResult<F>>
where
F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
{
if groups.len() < 2 {
return Err(StatsError::InvalidArgument(
"At least 2 groups required for omega-squared".to_string(),
));
}
for (i, g) in groups.iter().enumerate() {
if g.is_empty() {
return Err(StatsError::InvalidArgument(format!("Group {} is empty", i)));
}
}
let k = groups.len();
let n_total: usize = groups.iter().map(|g| g.len()).sum();
let (ss_between, ss_total) = compute_ss(groups)?;
let ss_within = ss_total - ss_between;
let df_between = (k - 1) as f64;
let df_within = (n_total - k) as f64;
if df_within < 1.0 {
return Err(StatsError::InvalidArgument(
"Insufficient degrees of freedom for omega-squared".to_string(),
));
}
let ms_error = ss_within / df_within;
let denom = ss_total + ms_error;
if denom < 1e-30 {
return Err(StatsError::ComputationError(
"Denominator is zero; omega-squared is undefined".to_string(),
));
}
let omega2 = (ss_between - df_between * ms_error) / denom;
let omega2 = omega2.max(0.0);
Ok(EffectSizeResult {
estimate: F::from(omega2).unwrap_or(F::zero()),
measure: "Omega-squared".to_string(),
ci_lower: None,
ci_upper: None,
confidence_level: None,
})
}
pub fn cohens_d_ci<F>(
x: &ArrayView1<F>,
y: &ArrayView1<F>,
confidence_level: F,
) -> StatsResult<EffectSizeResult<F>>
where
F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
{
let cl = <f64 as NumCast>::from(confidence_level).unwrap_or(0.95);
if cl <= 0.0 || cl >= 1.0 {
return Err(StatsError::InvalidArgument(format!(
"confidence_level must be in (0, 1), got {}",
cl
)));
}
let d_result = cohens_d(x, y)?;
let d = <f64 as NumCast>::from(d_result.estimate).unwrap_or(0.0);
let n1 = x.len() as f64;
let n2 = y.len() as f64;
let se = (1.0 / n1 + 1.0 / n2 + d * d / (2.0 * (n1 + n2))).sqrt();
let alpha = 1.0 - cl;
let z = normal_quantile(1.0 - alpha / 2.0);
let lower = d - z * se;
let upper = d + z * se;
Ok(EffectSizeResult {
estimate: F::from(d).unwrap_or(F::zero()),
measure: "Cohen's d".to_string(),
ci_lower: Some(F::from(lower).unwrap_or(F::zero())),
ci_upper: Some(F::from(upper).unwrap_or(F::zero())),
confidence_level: Some(confidence_level),
})
}
pub fn correlation_ci<F>(r: F, n: usize, confidence_level: F) -> StatsResult<EffectSizeResult<F>>
where
F: Float + NumCast + std::fmt::Display,
{
let rf = <f64 as NumCast>::from(r).unwrap_or(0.0);
let cl = <f64 as NumCast>::from(confidence_level).unwrap_or(0.95);
if rf < -1.0 || rf > 1.0 {
return Err(StatsError::InvalidArgument(format!(
"Correlation must be in [-1, 1], got {}",
rf
)));
}
if n < 4 {
return Err(StatsError::InvalidArgument(
"Sample size must be at least 4 for correlation CI".to_string(),
));
}
if cl <= 0.0 || cl >= 1.0 {
return Err(StatsError::InvalidArgument(format!(
"confidence_level must be in (0, 1), got {}",
cl
)));
}
let z = 0.5 * ((1.0 + rf) / (1.0 - rf)).ln();
let se_z = 1.0 / ((n as f64 - 3.0).sqrt());
let alpha = 1.0 - cl;
let z_crit = normal_quantile(1.0 - alpha / 2.0);
let z_lower = z - z_crit * se_z;
let z_upper = z + z_crit * se_z;
let r_lower = z_lower.tanh();
let r_upper = z_upper.tanh();
Ok(EffectSizeResult {
estimate: r,
measure: "Pearson correlation".to_string(),
ci_lower: Some(F::from(r_lower).unwrap_or(F::zero())),
ci_upper: Some(F::from(r_upper).unwrap_or(F::zero())),
confidence_level: Some(confidence_level),
})
}
pub fn d_to_r(d: f64, n1: usize, n2: usize) -> f64 {
let n1f = n1 as f64;
let n2f = n2 as f64;
let a = (n1f + n2f) * (n1f + n2f) / (n1f * n2f);
d / (d * d + a).sqrt()
}
pub fn r_to_d(r: f64) -> f64 {
if (1.0 - r * r) < 1e-15 {
if r > 0.0 {
f64::INFINITY
} else {
f64::NEG_INFINITY
}
} else {
2.0 * r / (1.0 - r * r).sqrt()
}
}
fn sample_stats<F>(x: &ArrayView1<F>) -> StatsResult<(f64, f64, f64)>
where
F: Float + std::iter::Sum<F> + NumCast,
{
let n = x.len();
if n == 0 {
return Err(StatsError::InvalidArgument(
"Input array cannot be empty".to_string(),
));
}
let nf = n as f64;
let mean: f64 = x
.iter()
.map(|&v| <f64 as NumCast>::from(v).unwrap_or(0.0))
.sum::<f64>()
/ nf;
let var: f64 = x
.iter()
.map(|&v| {
let vf = <f64 as NumCast>::from(v).unwrap_or(0.0);
(vf - mean) * (vf - mean)
})
.sum::<f64>()
/ (nf - 1.0).max(1.0);
Ok((mean, var, nf))
}
fn compute_ss<F>(groups: &[ArrayView1<F>]) -> StatsResult<(f64, f64)>
where
F: Float + std::iter::Sum<F> + NumCast,
{
let mut grand_sum = 0.0_f64;
let mut n_total = 0_usize;
for g in groups {
for &v in g.iter() {
grand_sum += <f64 as NumCast>::from(v).unwrap_or(0.0);
n_total += 1;
}
}
if n_total == 0 {
return Err(StatsError::InvalidArgument(
"All groups are empty".to_string(),
));
}
let grand_mean = grand_sum / n_total as f64;
let mut ss_total = 0.0_f64;
for g in groups {
for &v in g.iter() {
let vf = <f64 as NumCast>::from(v).unwrap_or(0.0);
ss_total += (vf - grand_mean) * (vf - grand_mean);
}
}
let mut ss_between = 0.0_f64;
for g in groups {
let ni = g.len() as f64;
let group_mean: f64 = g
.iter()
.map(|&v| <f64 as NumCast>::from(v).unwrap_or(0.0))
.sum::<f64>()
/ ni;
ss_between += ni * (group_mean - grand_mean) * (group_mean - grand_mean);
}
Ok((ss_between, ss_total))
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_cohens_d_basic() {
let x = array![5.0, 6.0, 7.0, 8.0, 9.0];
let y = array![3.0, 4.0, 5.0, 6.0, 7.0];
let result = cohens_d(&x.view(), &y.view()).expect("Cohen's d failed");
assert!(result.estimate > 1.0);
assert!(result.estimate < 2.0);
}
#[test]
fn test_cohens_d_equal_means() {
let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
let y = array![1.0, 2.0, 3.0, 4.0, 5.0];
let result = cohens_d(&x.view(), &y.view()).expect("Cohen's d failed");
assert!(result.estimate.abs() < 1e-10);
}
#[test]
fn test_cohens_d_too_small() {
let x = array![1.0];
let y = array![2.0, 3.0];
assert!(cohens_d(&x.view(), &y.view()).is_err());
}
#[test]
fn test_hedges_g_correction() {
let x = array![5.0, 6.0, 7.0, 8.0, 9.0];
let y = array![3.0, 4.0, 5.0, 6.0, 7.0];
let d = cohens_d(&x.view(), &y.view()).expect("d failed");
let g = hedges_g(&x.view(), &y.view()).expect("g failed");
assert!(g.estimate.abs() < d.estimate.abs() + 1e-10);
assert!(g.estimate.abs() > 0.0);
}
#[test]
fn test_glass_delta_basic() {
let treatment = array![6.0, 7.0, 8.0, 9.0, 10.0];
let control = array![1.0, 2.0, 3.0, 4.0, 5.0];
let result = glass_delta(&treatment.view(), &control.view()).expect("Glass's delta failed");
assert!(result.estimate > 2.0);
}
#[test]
fn test_glass_delta_symmetric() {
let a = array![1.0, 2.0, 3.0, 4.0, 5.0];
let b = array![3.0, 4.0, 5.0, 6.0, 7.0];
let delta_ab = glass_delta(&a.view(), &b.view()).expect("delta AB failed");
let delta_ba = glass_delta(&b.view(), &a.view()).expect("delta BA failed");
assert!(delta_ab.estimate < 0.0);
assert!(delta_ba.estimate > 0.0);
}
#[test]
fn test_point_biserial_basic() {
let binary = array![0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0];
let continuous = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let result =
point_biserial_correlation(&binary.view(), &continuous.view()).expect("rpb failed");
assert!(result.estimate > 0.0);
assert!(result.estimate <= 1.0);
}
#[test]
fn test_point_biserial_no_correlation() {
let binary = array![0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0];
let continuous = array![5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0];
let result = point_biserial_correlation(&binary.view(), &continuous.view());
assert!(result.is_err());
}
#[test]
fn test_point_biserial_invalid_binary() {
let binary = array![0.0, 1.0, 2.0];
let continuous = array![1.0, 2.0, 3.0];
let result = point_biserial_correlation(&binary.view(), &continuous.view());
assert!(result.is_err());
}
#[test]
fn test_eta_squared_basic() {
let g1 = array![1.0, 2.0, 3.0];
let g2 = array![4.0, 5.0, 6.0];
let g3 = array![7.0, 8.0, 9.0];
let groups = vec![g1.view(), g2.view(), g3.view()];
let result = eta_squared(&groups).expect("eta2 failed");
assert!(result.estimate > 0.0);
assert!(result.estimate <= 1.0);
assert!(result.estimate > 0.8);
}
#[test]
fn test_eta_squared_no_effect() {
let g1 = array![5.0, 5.0, 5.0];
let g2 = array![5.0, 5.0, 5.0];
let groups = vec![g1.view(), g2.view()];
let result = eta_squared(&groups);
assert!(result.is_err());
}
#[test]
fn test_partial_eta_squared() {
let result = partial_eta_squared(10.0_f64, 40.0_f64).expect("partial eta2 failed");
assert!((result.estimate - 0.2).abs() < 1e-10); }
#[test]
fn test_partial_eta_squared_zero() {
let result = partial_eta_squared(0.0_f64, 0.0_f64);
assert!(result.is_err());
}
#[test]
fn test_omega_squared_basic() {
let g1 = array![1.0, 2.0, 3.0, 4.0, 5.0];
let g2 = array![6.0, 7.0, 8.0, 9.0, 10.0];
let groups = vec![g1.view(), g2.view()];
let result = omega_squared(&groups).expect("omega2 failed");
assert!(result.estimate > 0.0);
assert!(result.estimate <= 1.0);
let eta2_result = eta_squared(&groups).expect("eta2 failed");
assert!(result.estimate <= eta2_result.estimate + 1e-10);
}
#[test]
fn test_cohens_d_ci() {
let x = array![5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0];
let y = array![3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let result = cohens_d_ci(&x.view(), &y.view(), 0.95).expect("CI failed");
assert!(result.ci_lower.is_some());
assert!(result.ci_upper.is_some());
let lower = result.ci_lower.expect("no lower bound");
let upper = result.ci_upper.expect("no upper bound");
assert!(lower < result.estimate);
assert!(upper > result.estimate);
assert!((result.confidence_level.expect("no CL") - 0.95).abs() < 1e-10);
}
#[test]
fn test_correlation_ci() {
let result = correlation_ci(0.5_f64, 100, 0.95).expect("corr CI failed");
let lower = result.ci_lower.expect("no lower");
let upper = result.ci_upper.expect("no upper");
assert!(lower < 0.5);
assert!(upper > 0.5);
assert!(lower > 0.0); }
#[test]
fn test_correlation_ci_invalid() {
assert!(correlation_ci(1.5_f64, 100, 0.95).is_err());
assert!(correlation_ci(0.5_f64, 2, 0.95).is_err());
assert!(correlation_ci(0.5_f64, 100, 0.0).is_err());
}
#[test]
fn test_d_to_r_and_back() {
let d = 0.8;
let r = d_to_r(d, 50, 50);
assert!(r > 0.0 && r < 1.0);
let d_back = r_to_d(r);
assert!((d_back - d).abs() < 0.1); }
#[test]
fn test_r_to_d_extremes() {
assert!(r_to_d(0.0).abs() < 1e-10);
assert!(r_to_d(0.999) > 10.0);
assert!(r_to_d(-0.999) < -10.0);
}
#[test]
fn test_normal_quantile() {
assert!((normal_quantile(0.5) - 0.0).abs() < 1e-10);
assert!((normal_quantile(0.975) - 1.96).abs() < 0.01);
assert!((normal_quantile(0.025) + 1.96).abs() < 0.01);
}
#[test]
fn test_effect_size_result_fields() {
let x = array![5.0, 6.0, 7.0, 8.0, 9.0];
let y = array![3.0, 4.0, 5.0, 6.0, 7.0];
let result = cohens_d(&x.view(), &y.view()).expect("d failed");
assert_eq!(result.measure, "Cohen's d");
assert!(result.ci_lower.is_none());
assert!(result.ci_upper.is_none());
}
}