use ferray_core::error::{FerrayError, FerrayResult};
use ferray_core::{Array, Dimension, Element, Ix2};
fn betainc(a: f64, b: f64, x: f64) -> f64 {
if !(0.0..=1.0).contains(&x) || a <= 0.0 || b <= 0.0 {
return f64::NAN;
}
if x == 0.0 {
return 0.0;
}
if x == 1.0 {
return 1.0;
}
let bt = (ln_gamma(a + b) - ln_gamma(a) - ln_gamma(b) + a * x.ln() + b * (1.0 - x).ln()).exp();
if x < (a + 1.0) / (a + b + 2.0) {
bt * betacf(a, b, x) / a
} else {
1.0 - bt * betacf(b, a, 1.0 - x) / b
}
}
fn ln_gamma(x: f64) -> f64 {
const G: f64 = 5.0;
const COEFFS: [f64; 7] = [
1.000_000_000_190_015,
76.180_091_729_471_46,
-86.505_320_329_416_77,
24.014_098_240_830_91,
-1.231_739_572_450_155,
0.001_208_650_973_866_179,
-0.000_005_395_239_384_953,
];
let mut sum = COEFFS[0];
for (i, &c) in COEFFS.iter().enumerate().skip(1) {
sum += c / (x + i as f64);
}
let half_log_2pi = 0.918_938_533_204_672_8;
half_log_2pi + (x + 0.5) * (x + G + 0.5).ln() - (x + G + 0.5) + (sum / x).ln()
}
fn betacf(a: f64, b: f64, x: f64) -> f64 {
const MAX_ITER: usize = 200;
const EPS: f64 = 3e-16;
let qab = a + b;
let qap = a + 1.0;
let qam = a - 1.0;
let mut c = 1.0;
let mut d = 1.0 - qab * x / qap;
if d.abs() < 1e-30 {
d = 1e-30;
}
d = 1.0 / d;
let mut h = d;
for m in 1..=MAX_ITER {
let mf = m as f64;
let m2 = 2.0 * mf;
let aa = mf * (b - mf) * x / ((qam + m2) * (a + m2));
d = 1.0 + aa * d;
if d.abs() < 1e-30 {
d = 1e-30;
}
c = 1.0 + aa / c;
if c.abs() < 1e-30 {
c = 1e-30;
}
d = 1.0 / d;
h *= d * c;
let aa = -(a + mf) * (qab + mf) * x / ((a + m2) * (qap + m2));
d = 1.0 + aa * d;
if d.abs() < 1e-30 {
d = 1e-30;
}
c = 1.0 + aa / c;
if c.abs() < 1e-30 {
c = 1e-30;
}
d = 1.0 / d;
let del = d * c;
h *= del;
if (del - 1.0).abs() < EPS {
return h;
}
}
h
}
fn t_two_sided_p(t: f64, df: f64) -> f64 {
if !t.is_finite() || df <= 0.0 {
return f64::NAN;
}
betainc(df / 2.0, 0.5, df / (df + t * t))
}
pub fn pearsonr<T, D>(x: &Array<T, D>, y: &Array<T, D>) -> FerrayResult<(f64, f64)>
where
T: Element + Copy + Into<f64>,
D: Dimension,
{
let xs: Vec<f64> = x.iter().copied().map(Into::into).collect();
let ys: Vec<f64> = y.iter().copied().map(Into::into).collect();
if xs.len() != ys.len() {
return Err(FerrayError::shape_mismatch(format!(
"pearsonr: arrays have different lengths ({} vs {})",
xs.len(),
ys.len()
)));
}
let n = xs.len();
if n < 2 {
return Err(FerrayError::invalid_value(
"pearsonr: at least 2 observations required",
));
}
let nf = n as f64;
let mx = xs.iter().sum::<f64>() / nf;
let my = ys.iter().sum::<f64>() / nf;
let mut sxx = 0.0_f64;
let mut syy = 0.0_f64;
let mut sxy = 0.0_f64;
for (a, b) in xs.iter().zip(ys.iter()) {
let dx = a - mx;
let dy = b - my;
sxx += dx * dx;
syy += dy * dy;
sxy += dx * dy;
}
let denom = (sxx * syy).sqrt();
if denom == 0.0 {
return Ok((f64::NAN, f64::NAN));
}
let r = (sxy / denom).clamp(-1.0, 1.0);
let df = nf - 2.0;
let t = r * (df / (1.0 - r * r).max(f64::MIN_POSITIVE)).sqrt();
let p = t_two_sided_p(t, df);
Ok((r, p))
}
pub fn spearmanr<T, D>(x: &Array<T, D>, y: &Array<T, D>) -> FerrayResult<(f64, f64)>
where
T: Element + Copy + PartialOrd + Into<f64>,
D: Dimension,
{
let xs: Vec<f64> = x.iter().copied().map(Into::into).collect();
let ys: Vec<f64> = y.iter().copied().map(Into::into).collect();
if xs.len() != ys.len() {
return Err(FerrayError::shape_mismatch(format!(
"spearmanr: arrays have different lengths ({} vs {})",
xs.len(),
ys.len()
)));
}
if xs.len() < 2 {
return Err(FerrayError::invalid_value(
"spearmanr: at least 2 observations required",
));
}
let rx = average_ranks(&xs);
let ry = average_ranks(&ys);
pearsonr_f64_slices(&rx, &ry)
}
fn pearsonr_f64_slices(xs: &[f64], ys: &[f64]) -> FerrayResult<(f64, f64)> {
let n = xs.len();
if n < 2 {
return Err(FerrayError::invalid_value(
"pearsonr: at least 2 observations required",
));
}
let nf = n as f64;
let mx = xs.iter().sum::<f64>() / nf;
let my = ys.iter().sum::<f64>() / nf;
let mut sxx = 0.0_f64;
let mut syy = 0.0_f64;
let mut sxy = 0.0_f64;
for (a, b) in xs.iter().zip(ys.iter()) {
let dx = a - mx;
let dy = b - my;
sxx += dx * dx;
syy += dy * dy;
sxy += dx * dy;
}
let denom = (sxx * syy).sqrt();
if denom == 0.0 {
return Ok((f64::NAN, f64::NAN));
}
let r = (sxy / denom).clamp(-1.0, 1.0);
let df = nf - 2.0;
let t = r * (df / (1.0 - r * r).max(f64::MIN_POSITIVE)).sqrt();
let p = t_two_sided_p(t, df);
Ok((r, p))
}
fn average_ranks(xs: &[f64]) -> Vec<f64> {
let n = xs.len();
let mut idx: Vec<usize> = (0..n).collect();
idx.sort_by(|&a, &b| {
xs[a]
.partial_cmp(&xs[b])
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut ranks = vec![0.0_f64; n];
let mut i = 0;
while i < n {
let mut j = i;
while j + 1 < n && xs[idx[j + 1]] == xs[idx[i]] {
j += 1;
}
let avg = (i as f64 + j as f64) / 2.0 + 1.0;
for k in i..=j {
ranks[idx[k]] = avg;
}
i = j + 1;
}
ranks
}
pub fn ttest_1samp<T, D>(a: &Array<T, D>, popmean: f64) -> FerrayResult<(f64, f64)>
where
T: Element + Copy + Into<f64>,
D: Dimension,
{
let xs: Vec<f64> = a.iter().copied().map(Into::into).collect();
let n = xs.len();
if n < 2 {
return Err(FerrayError::invalid_value(
"ttest_1samp: at least 2 observations required",
));
}
let nf = n as f64;
let mean = xs.iter().sum::<f64>() / nf;
let var = xs.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (nf - 1.0);
if var == 0.0 {
return Ok((f64::NAN, f64::NAN));
}
let std = var.sqrt();
let se = std / nf.sqrt();
let t = (mean - popmean) / se;
let p = t_two_sided_p(t, nf - 1.0);
Ok((t, p))
}
pub fn ttest_ind<T, D>(a: &Array<T, D>, b: &Array<T, D>) -> FerrayResult<(f64, f64)>
where
T: Element + Copy + Into<f64>,
D: Dimension,
{
let xs: Vec<f64> = a.iter().copied().map(Into::into).collect();
let ys: Vec<f64> = b.iter().copied().map(Into::into).collect();
if xs.len() < 2 || ys.len() < 2 {
return Err(FerrayError::invalid_value(
"ttest_ind: each sample needs at least 2 observations",
));
}
let n1 = xs.len() as f64;
let n2 = ys.len() as f64;
let m1 = xs.iter().sum::<f64>() / n1;
let m2 = ys.iter().sum::<f64>() / n2;
let v1 = xs.iter().map(|x| (x - m1).powi(2)).sum::<f64>() / (n1 - 1.0);
let v2 = ys.iter().map(|x| (x - m2).powi(2)).sum::<f64>() / (n2 - 1.0);
if v1 == 0.0 && v2 == 0.0 {
return Ok((f64::NAN, f64::NAN));
}
let se = (v1 / n1 + v2 / n2).sqrt();
let t = (m1 - m2) / se;
let num = (v1 / n1 + v2 / n2).powi(2);
let denom = (v1 / n1).powi(2) / (n1 - 1.0) + (v2 / n2).powi(2) / (n2 - 1.0);
let df = num / denom;
let p = t_two_sided_p(t, df);
Ok((t, p))
}
#[derive(Debug, Clone)]
pub struct Chi2ContingencyResult {
pub statistic: f64,
pub p_value: f64,
pub dof: usize,
pub expected: Array<f64, Ix2>,
}
pub fn chi2_contingency<T>(observed: &Array<T, Ix2>) -> FerrayResult<Chi2ContingencyResult>
where
T: Element + Copy + Into<f64>,
{
let shape = observed.shape();
let nr = shape[0];
let nc = shape[1];
if nr == 0 || nc == 0 {
return Err(FerrayError::invalid_value(
"chi2_contingency: table must be non-empty along both axes",
));
}
let data: Vec<f64> = observed.iter().copied().map(Into::into).collect();
let mut row_sums = vec![0.0_f64; nr];
let mut col_sums = vec![0.0_f64; nc];
let mut total = 0.0_f64;
for i in 0..nr {
for j in 0..nc {
let v = data[i * nc + j];
row_sums[i] += v;
col_sums[j] += v;
total += v;
}
}
if total == 0.0 {
return Err(FerrayError::invalid_value(
"chi2_contingency: total frequency is zero",
));
}
if row_sums.contains(&0.0) || col_sums.contains(&0.0) {
return Err(FerrayError::invalid_value(
"chi2_contingency: a row or column has zero total — independence model is degenerate",
));
}
let mut expected = vec![0.0_f64; nr * nc];
let mut chi2 = 0.0_f64;
for i in 0..nr {
for j in 0..nc {
let e = row_sums[i] * col_sums[j] / total;
expected[i * nc + j] = e;
let o = data[i * nc + j];
let d = o - e;
chi2 += d * d / e;
}
}
let dof = (nr - 1) * (nc - 1);
let p = chi2_sf(chi2, dof as f64);
Ok(Chi2ContingencyResult {
statistic: chi2,
p_value: p,
dof,
expected: Array::<f64, Ix2>::from_vec(Ix2::new([nr, nc]), expected)?,
})
}
fn chi2_sf(x: f64, df: f64) -> f64 {
if !x.is_finite() || x < 0.0 || df <= 0.0 {
return f64::NAN;
}
if x == 0.0 {
return 1.0;
}
gamma_q(df / 2.0, x / 2.0)
}
fn gamma_q(a: f64, x: f64) -> f64 {
if a <= 0.0 || x < 0.0 || !x.is_finite() {
return f64::NAN;
}
if x == 0.0 {
return 1.0;
}
if x < a + 1.0 {
1.0 - gamma_p_series(a, x)
} else {
gamma_q_cf(a, x)
}
}
fn gamma_p_series(a: f64, x: f64) -> f64 {
const MAX_ITER: usize = 200;
const EPS: f64 = 3e-16;
let mut ap = a;
let mut sum = 1.0_f64 / a;
let mut term = sum;
for _ in 0..MAX_ITER {
ap += 1.0;
term *= x / ap;
sum += term;
if term.abs() < sum.abs() * EPS {
break;
}
}
sum * (-x + a * x.ln() - ln_gamma(a)).exp()
}
fn gamma_q_cf(a: f64, x: f64) -> f64 {
const MAX_ITER: usize = 200;
const EPS: f64 = 3e-16;
let mut b = x + 1.0 - a;
let mut c = 1.0 / 1e-30;
let mut d = 1.0 / b;
let mut h = d;
for i in 1..=MAX_ITER {
let an = -(i as f64) * (i as f64 - a);
b += 2.0;
d = an * d + b;
if d.abs() < 1e-30 {
d = 1e-30;
}
c = b + an / c;
if c.abs() < 1e-30 {
c = 1e-30;
}
d = 1.0 / d;
let del = d * c;
h *= del;
if (del - 1.0).abs() < EPS {
break;
}
}
h * (-x + a * x.ln() - ln_gamma(a)).exp()
}
pub fn ks_2samp<T, D>(a: &Array<T, D>, b: &Array<T, D>) -> FerrayResult<(f64, f64)>
where
T: Element + Copy + PartialOrd + Into<f64>,
D: Dimension,
{
let mut xs: Vec<f64> = a.iter().copied().map(Into::into).collect();
let mut ys: Vec<f64> = b.iter().copied().map(Into::into).collect();
if xs.is_empty() || ys.is_empty() {
return Err(FerrayError::invalid_value(
"ks_2samp: both samples must be non-empty",
));
}
xs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
ys.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n1 = xs.len() as f64;
let n2 = ys.len() as f64;
let mut union: Vec<f64> = Vec::with_capacity(xs.len() + ys.len());
union.extend_from_slice(&xs);
union.extend_from_slice(&ys);
union.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mut d = 0.0_f64;
for &v in &union {
let i = xs.partition_point(|x| *x <= v);
let j = ys.partition_point(|y| *y <= v);
let here = (i as f64 / n1 - j as f64 / n2).abs();
if here > d {
d = here;
}
}
let en = (n1 * n2 / (n1 + n2)).sqrt();
let lambda = (en + 0.12 + 0.11 / en) * d;
let p = ks_p_kolmogorov(lambda).clamp(0.0, 1.0);
Ok((d, p))
}
fn ks_p_kolmogorov(lambda: f64) -> f64 {
if lambda <= 0.0 {
return 1.0;
}
let mut sum = 0.0_f64;
let mut sign = 1.0_f64;
let mut prev = 0.0_f64;
for k in 1..=200 {
let kf = k as f64;
let term = sign * (-2.0 * kf * kf * lambda * lambda).exp();
sum += term;
if (sum - prev).abs() < 1e-15 {
break;
}
prev = sum;
sign = -sign;
}
2.0 * sum
}
#[cfg(test)]
mod tests {
use super::*;
use ferray_core::Ix1;
fn arr1(v: Vec<f64>) -> Array<f64, Ix1> {
let n = v.len();
Array::from_vec(Ix1::new([n]), v).unwrap()
}
#[test]
fn pearsonr_perfect_positive() {
let x = arr1(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let y = arr1(vec![2.0, 4.0, 6.0, 8.0, 10.0]);
let (r, p) = pearsonr(&x, &y).unwrap();
assert!((r - 1.0).abs() < 1e-12);
assert!(p < 1e-6);
}
#[test]
fn pearsonr_perfect_negative() {
let x = arr1(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let y = arr1(vec![5.0, 4.0, 3.0, 2.0, 1.0]);
let (r, p) = pearsonr(&x, &y).unwrap();
assert!((r + 1.0).abs() < 1e-12);
assert!(p < 1e-6);
}
#[test]
fn pearsonr_uncorrelated_high_p() {
let x = arr1(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
let y = arr1(vec![7.5, -1.0, 2.0, 5.5, -3.0, 6.0, 0.5, 1.5]);
let (r, p) = pearsonr(&x, &y).unwrap();
assert!(r.abs() < 1.0);
assert!(p > 0.0 && p <= 1.0);
}
#[test]
fn pearsonr_constant_returns_nan() {
let x = arr1(vec![3.0, 3.0, 3.0, 3.0]);
let y = arr1(vec![1.0, 2.0, 3.0, 4.0]);
let (r, p) = pearsonr(&x, &y).unwrap();
assert!(r.is_nan());
assert!(p.is_nan());
}
#[test]
fn pearsonr_length_mismatch_errors() {
let x = arr1(vec![1.0, 2.0]);
let y = arr1(vec![1.0, 2.0, 3.0]);
assert!(pearsonr(&x, &y).is_err());
}
#[test]
fn spearmanr_monotonic_nonlinear_is_one() {
let x = arr1(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let y = arr1(vec![1.0, 4.0, 9.0, 16.0, 25.0]);
let (rho, _) = spearmanr(&x, &y).unwrap();
assert!((rho - 1.0).abs() < 1e-12);
}
#[test]
fn ttest_1samp_known() {
let a = arr1(vec![4.0, 5.0, 6.0]);
let (t, p) = ttest_1samp(&a, 5.0).unwrap();
assert!(t.abs() < 1e-12);
assert!((p - 1.0).abs() < 1e-12);
}
#[test]
fn ttest_1samp_strong_signal() {
let a = arr1(vec![10.0, 11.0, 9.0, 10.5, 9.5, 10.0]);
let (_, p) = ttest_1samp(&a, 0.0).unwrap();
assert!(p < 1e-6);
}
#[test]
fn ttest_ind_identical_samples_t_zero() {
let a = arr1(vec![1.0, 2.0, 3.0, 4.0]);
let b = arr1(vec![1.0, 2.0, 3.0, 4.0]);
let (t, p) = ttest_ind(&a, &b).unwrap();
assert!(t.abs() < 1e-12);
assert!((p - 1.0).abs() < 1e-12);
}
#[test]
fn ttest_ind_strong_difference() {
let a = arr1(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let b = arr1(vec![10.0, 11.0, 12.0, 13.0, 14.0]);
let (_, p) = ttest_ind(&a, &b).unwrap();
assert!(p < 1e-3, "p too large: {p}");
}
#[test]
fn ks_2samp_identical_samples_d_zero() {
let a = arr1(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let b = arr1(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let (d, p) = ks_2samp(&a, &b).unwrap();
assert!(d < 1e-12);
assert!(p > 0.99);
}
#[test]
fn ks_2samp_clearly_different_distributions() {
let a = arr1(vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]);
let b = arr1(vec![
10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0,
]);
let (d, p) = ks_2samp(&a, &b).unwrap();
assert!((d - 1.0).abs() < 1e-12);
assert!(p < 0.01);
}
#[test]
fn ks_2samp_partial_overlap() {
let a = arr1(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let b = arr1(vec![3.0, 4.0, 5.0, 6.0, 7.0]);
let (d, _p) = ks_2samp(&a, &b).unwrap();
assert!(d > 0.0 && d < 1.0);
}
#[test]
fn empty_inputs_error() {
let empty = arr1(vec![]);
let one = arr1(vec![1.0]);
let two = arr1(vec![1.0, 2.0]);
assert!(pearsonr(&one, &one).is_err());
assert!(spearmanr(&one, &one).is_err());
assert!(ttest_1samp(&one, 0.0).is_err());
assert!(ttest_ind(&one, &two).is_err());
assert!(ks_2samp(&empty, &two).is_err());
}
#[test]
fn chi2_independent_table_yields_high_p() {
let obs = Array::<f64, ferray_core::Ix2>::from_vec(
ferray_core::Ix2::new([2, 2]),
vec![10.0, 10.0, 20.0, 20.0],
)
.unwrap();
let r = chi2_contingency(&obs).unwrap();
assert!(r.statistic.abs() < 1e-12);
assert!((r.p_value - 1.0).abs() < 1e-12);
assert_eq!(r.dof, 1);
}
#[test]
fn chi2_strong_dependence_low_p() {
let obs = Array::<f64, ferray_core::Ix2>::from_vec(
ferray_core::Ix2::new([2, 2]),
vec![100.0, 0.0, 0.0, 100.0],
)
.unwrap();
let r = chi2_contingency(&obs).unwrap();
assert!(r.statistic > 100.0);
assert!(r.p_value < 1e-6);
assert_eq!(r.dof, 1);
}
#[test]
fn chi2_known_value_2x3() {
let obs = Array::<f64, ferray_core::Ix2>::from_vec(
ferray_core::Ix2::new([2, 3]),
vec![10.0, 20.0, 30.0, 40.0, 50.0, 60.0],
)
.unwrap();
let r = chi2_contingency(&obs).unwrap();
assert_eq!(r.dof, 2);
assert!(r.p_value > 0.0 && r.p_value <= 1.0);
let e00 = r.expected.as_slice().unwrap()[0];
assert!((e00 - 60.0 * 50.0 / 210.0).abs() < 1e-10);
}
#[test]
fn chi2_zero_row_total_errors() {
let obs = Array::<f64, ferray_core::Ix2>::from_vec(
ferray_core::Ix2::new([2, 2]),
vec![0.0, 0.0, 1.0, 1.0],
)
.unwrap();
assert!(chi2_contingency(&obs).is_err());
}
#[test]
fn chi2_empty_table_errors() {
let obs = Array::<f64, ferray_core::Ix2>::from_vec(ferray_core::Ix2::new([0, 0]), vec![])
.unwrap();
assert!(chi2_contingency(&obs).is_err());
}
#[test]
fn gamma_q_known_values() {
assert!((gamma_q(1.0, 1.0) - (-1.0_f64).exp()).abs() < 1e-12);
assert!((gamma_q(1.0, 2.0) - (-2.0_f64).exp()).abs() < 1e-12);
assert!((gamma_q(2.5, 0.0) - 1.0).abs() < 1e-12);
}
#[test]
fn betainc_known_values() {
assert!((betainc(1.0, 1.0, 0.5) - 0.5).abs() < 1e-10);
assert_eq!(betainc(2.0, 3.0, 0.0), 0.0);
assert_eq!(betainc(2.0, 3.0, 1.0), 1.0);
}
}