use std::fmt;
use num::Float;
use statrs::distribution::{
ContinuousCDF,
Normal,
};
use statrs::statistics::Statistics;
pub fn pearson_r<X, Y>(
x: &[X],
y: &[Y],
) -> f64
where
X: num::ToPrimitive
+ num::FromPrimitive
+ PartialOrd
+ Copy
+ Clone
+ num::traits::NumOps
+ fmt::Debug,
Y: num::ToPrimitive
+ num::FromPrimitive
+ PartialOrd
+ Copy
+ Clone
+ num::traits::NumOps
+ fmt::Debug, {
if x.len() != y.len() {
return 0.0;
}
if x.is_empty() {
return 0.0;
}
let x_f64 = x.iter().map(|x| x.to_f64().unwrap()).collect::<Vec<_>>();
let y_f64 = y.iter().map(|y| y.to_f64().unwrap()).collect::<Vec<_>>();
let x_mean = x_f64.iter().mean();
let y_mean = y_f64.iter().mean();
let numerator = x_f64
.iter()
.zip(y_f64.iter())
.map(|(valx, valy)| (valx - x_mean) * (valy - y_mean))
.sum::<f64>();
let denominator = {
let x_dev: f64 = x_f64.iter().map(|valx| (valx - x_mean).powi(2)).sum();
let y_dev: f64 = y_f64.iter().map(|valy| (valy - y_mean).powi(2)).sum();
(x_dev * y_dev).sqrt()
};
if denominator == 0.0 {
return 0.0;
}
let r = numerator / denominator;
r
}
#[derive(Debug)]
struct Observation<F: Float> {
value: F,
group: usize,
rank: f64,
}
pub fn mann_whitney_u<F: Float>(
group1: &[F],
group2: &[F],
) -> (f64, f64) {
let n1 = F::from(group1.len()).unwrap();
let n2 = F::from(group2.len()).unwrap();
let n_total = n1 + n2;
if group1.is_empty() || group2.is_empty() {
return (0.0, 1.0);
}
let mut observations: Vec<Observation<F>> = group1
.iter()
.map(|&v| {
Observation {
value: v,
group: 0,
rank: 0.0,
}
})
.chain(group2.iter().map(|&v| {
Observation {
value: v,
group: 1,
rank: 0.0,
}
}))
.collect();
observations.sort_by(|a, b| {
a.value
.partial_cmp(&b.value)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut tie_groups: Vec<usize> = Vec::new();
let mut i = 0;
while i < observations.len() {
let start = i;
let mut end = i + 1;
#[allow(unsafe_code)]
unsafe {
while end < observations.len()
&& (observations.get_unchecked(end).value
- observations.get_unchecked(start).value)
.abs()
< F::from(1e-6).unwrap()
{
end += 1;
}
}
let count = end - start;
let avg_rank = (start as f64 + 1.0 + end as f64) / 2.0;
for val in observations.iter_mut().take(end).skip(start) {
val.rank = avg_rank;
}
if count > 1 {
tie_groups.push(count);
}
i = end;
}
let r1: F = F::from(
observations
.iter()
.filter(|obs| obs.group == 0)
.map(|obs| obs.rank)
.sum::<f64>(),
)
.unwrap();
let u1 = r1 - n1 * (n1 + F::from(1.0).unwrap()) / F::from(2.0).unwrap();
let u2 = n1 * n2 - u1;
let u_stat = u1.min(u2);
let mean_u = n1 * n2 / F::from(2.0).unwrap();
let tie_sum = F::from(
tie_groups
.iter()
.map(|&t| (t * t * t - t) as f64)
.sum::<f64>(),
)
.unwrap();
let variance_u = n1 * n2 / F::from(12.0).unwrap()
* ((n_total + F::from(1.0).unwrap())
- tie_sum / (n_total * (n_total - F::from(1.0).unwrap())));
let z = if variance_u > F::from(0.0).unwrap() {
(u_stat - mean_u + F::from(0.5).unwrap()) / variance_u.sqrt()
}
else {
F::from(0.0).unwrap()
};
let z = z.to_f64().unwrap();
let normal = Normal::new(0.0, 1.0).unwrap();
let p_value = 2.0 * normal.cdf(-z.abs());
let p_value = p_value.min(1.0);
(u_stat.to_f64().unwrap(), p_value)
}
#[cfg(test)]
mod tests {
use assert_approx_eq::assert_approx_eq;
use crate::utils::{
mann_whitney_u,
pearson_r,
};
#[test]
fn test_utest() {
let group1 = vec![1.5, 2.3, 3.1, 4.8, 5.7, 5.6];
let group2 = vec![2.0, 3.5, 3.8, 4.0, 6.2, 3.5];
let (u, p) = mann_whitney_u(&group1, &group2);
let (_uleft, pleft) = mann_whitney_u(&group2, &group1);
assert_approx_eq!(pleft, p);
println!("Mann–Whitney U statistic: {:.3}", u);
println!("Two-tailed p–value: {:.5}", p);
}
#[test]
fn pearson_r_test() {
let x = vec![1, 2, 3, 4, 5, 6];
let y = vec![6, 7, 8, 9, 10, 11];
assert_eq!(pearson_r(&x, &y), 1f64);
}
}