pub mod intraclass;
use crate::error::StatsResult;
use crate::error_standardization::ErrorMessages;
use crate::{mean, std};
use scirs2_core::ndarray::{s, Array1, Array2, ArrayBase, Data, Dimension, Ix1, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::parallel_ops::*;
#[inline(always)]
fn const_f64<F: Float + NumCast>(value: f64) -> F {
F::from(value).expect("Failed to convert constant to target float type")
}
#[allow(dead_code)]
pub fn pearson_r<F, D>(x: &ArrayBase<D, Ix1>, y: &ArrayBase<D, Ix1>) -> StatsResult<F>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ scirs2_core::simd_ops::SimdUnifiedOps
+ 'static,
D: Data<Elem = F>,
Ix1: Dimension,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if y.is_empty() {
return Err(ErrorMessages::empty_array("y"));
}
if x.len() != y.len() {
return Err(ErrorMessages::length_mismatch("x", x.len(), "y", y.len()));
}
let mean_x = mean(&x.view())?;
let mean_y = mean(&y.view())?;
let sum_xy: F;
let sum_x2: F;
let sum_y2: F;
use std::any::TypeId;
let n = x.len();
if TypeId::of::<F>() == TypeId::of::<f64>() && n >= 64 {
let x_dev: Vec<f64> = x
.iter()
.map(|&val| {
let val_f64: f64 = unsafe { std::mem::transmute_copy(&val) };
let mean_f64: f64 = unsafe { std::mem::transmute_copy(&mean_x) };
val_f64 - mean_f64
})
.collect();
let y_dev: Vec<f64> = y
.iter()
.map(|&val| {
let val_f64: f64 = unsafe { std::mem::transmute_copy(&val) };
let mean_f64: f64 = unsafe { std::mem::transmute_copy(&mean_y) };
val_f64 - mean_f64
})
.collect();
let sum_xy_f64 = scirs2_core::simd_ops::simd_dot_product_f64(&x_dev, &y_dev);
let sum_x2_f64 = scirs2_core::simd_ops::simd_dot_product_f64(&x_dev, &x_dev);
let sum_y2_f64 = scirs2_core::simd_ops::simd_dot_product_f64(&y_dev, &y_dev);
sum_xy = unsafe { std::mem::transmute_copy(&sum_xy_f64) };
sum_x2 = unsafe { std::mem::transmute_copy(&sum_x2_f64) };
sum_y2 = unsafe { std::mem::transmute_copy(&sum_y2_f64) };
} else if TypeId::of::<F>() == TypeId::of::<f32>() && n >= 64 {
let x_dev: Vec<f32> = x
.iter()
.map(|&val| {
let val_f32: f32 = unsafe { std::mem::transmute_copy(&val) };
let mean_f32: f32 = unsafe { std::mem::transmute_copy(&mean_x) };
val_f32 - mean_f32
})
.collect();
let y_dev: Vec<f32> = y
.iter()
.map(|&val| {
let val_f32: f32 = unsafe { std::mem::transmute_copy(&val) };
let mean_f32: f32 = unsafe { std::mem::transmute_copy(&mean_y) };
val_f32 - mean_f32
})
.collect();
let sum_xy_f32 = scirs2_core::simd_ops::simd_dot_product_f32(&x_dev, &y_dev);
let sum_x2_f32 = scirs2_core::simd_ops::simd_dot_product_f32(&x_dev, &x_dev);
let sum_y2_f32 = scirs2_core::simd_ops::simd_dot_product_f32(&y_dev, &y_dev);
sum_xy = unsafe { std::mem::transmute_copy(&sum_xy_f32) };
sum_x2 = unsafe { std::mem::transmute_copy(&sum_x2_f32) };
sum_y2 = unsafe { std::mem::transmute_copy(&sum_y2_f32) };
} else {
let mut sum_xy_tmp = F::zero();
let mut sum_x2_tmp = F::zero();
let mut sum_y2_tmp = F::zero();
for i in 0..n {
let x_dev = x[i] - mean_x;
let y_dev = y[i] - mean_y;
sum_xy_tmp = sum_xy_tmp + x_dev * y_dev;
sum_x2_tmp = sum_x2_tmp + x_dev * x_dev;
sum_y2_tmp = sum_y2_tmp + y_dev * y_dev;
}
sum_xy = sum_xy_tmp;
sum_x2 = sum_x2_tmp;
sum_y2 = sum_y2_tmp;
}
if sum_x2 <= F::epsilon() || sum_y2 <= F::epsilon() {
return Err(ErrorMessages::numerical_instability(
"correlation calculation",
"Cannot compute correlation when one or both variables have zero variance. Check that your data has sufficient variation."
));
}
let corr = sum_xy / (sum_x2 * sum_y2).sqrt();
let corr = if corr > F::one() {
F::one()
} else if corr < -F::one() {
-F::one()
} else {
corr
};
Ok(corr)
}
#[allow(dead_code)]
pub fn spearman_r<F, D>(x: &ArrayBase<D, Ix1>, y: &ArrayBase<D, Ix1>) -> StatsResult<F>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ scirs2_core::simd_ops::SimdUnifiedOps
+ 'static,
D: Data<Elem = F>,
Ix1: Dimension,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if y.is_empty() {
return Err(ErrorMessages::empty_array("y"));
}
if x.len() != y.len() {
return Err(ErrorMessages::length_mismatch("x", x.len(), "y", y.len()));
}
let mut x_idx = vec![];
let mut y_idx = vec![];
for (i, (&x_val, &y_val)) in x.iter().zip(y.iter()).enumerate() {
x_idx.push((x_val, i));
y_idx.push((y_val, i));
}
x_idx.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
y_idx.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let mut x_ranks = vec![F::zero(); x.len()];
let mut y_ranks = vec![F::zero(); y.len()];
assign_ranks(&x_idx, &mut x_ranks)?;
assign_ranks(&y_idx, &mut y_ranks)?;
let x_ranks = scirs2_core::ndarray::Array1::from(x_ranks);
let y_ranks = scirs2_core::ndarray::Array1::from(y_ranks);
pearson_r::<F, _>(&x_ranks.view(), &y_ranks.view())
}
#[allow(dead_code)]
fn assign_ranks<F: Float>(sorteddata: &[(F, usize)], ranks: &mut [F]) -> StatsResult<()> {
let n = sorteddata.len();
let mut i = 0;
while i < n {
let current_val = sorteddata[i].0;
let mut j = i;
while j < n - 1 && sorteddata[j + 1].0 == current_val {
j += 1;
}
let avg_rank = F::from((i + j) as f64 / 2.0 + 1.0).expect("Test/example failed");
for item in sorteddata.iter().take(j + 1).skip(i) {
let original_idx = item.1;
ranks[original_idx] = avg_rank;
}
i = j + 1;
}
Ok(())
}
#[allow(dead_code)]
pub fn kendall_tau<F, D>(
x: &ArrayBase<D, Ix1>,
y: &ArrayBase<D, Ix1>,
method: &str,
) -> StatsResult<F>
where
F: Float + std::fmt::Debug + NumCast + std::iter::Sum<F> + std::fmt::Display,
D: Data<Elem = F>,
Ix1: Dimension,
{
if method != "b" && method != "c" {
return Err(crate::error::StatsError::InvalidArgument(format!(
"Method must be 'b' or 'c', got '{}'. Use 'b' for Kendall tau-b or 'c' for Kendall tau-c.",
method
)));
}
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if y.is_empty() {
return Err(ErrorMessages::empty_array("y"));
}
if x.len() != y.len() {
return Err(ErrorMessages::length_mismatch("x", x.len(), "y", y.len()));
}
let mut concordant = 0;
let mut discordant = 0;
let mut ties_x = 0;
let mut ties_y = 0;
let mut _ties_xy = 0;
for i in 0..x.len() {
for j in (i + 1)..x.len() {
let x_diff = x[j] - x[i];
let y_diff = y[j] - y[i];
if x_diff.is_zero() && y_diff.is_zero() {
_ties_xy += 1;
} else if x_diff.is_zero() {
ties_x += 1;
} else if y_diff.is_zero() {
ties_y += 1;
} else if (x_diff > F::zero() && y_diff > F::zero())
|| (x_diff < F::zero() && y_diff < F::zero())
{
concordant += 1;
} else {
discordant += 1;
}
}
}
let n = x.len();
let _n0 = F::from(n * (n - 1) / 2).expect("Test/example failed");
let tau = match method {
"b" => {
let n1 = F::from(concordant + discordant + ties_x).expect("Failed to convert to float");
let n2 = F::from(concordant + discordant + ties_y).expect("Failed to convert to float");
if n1 == F::zero() || n2 == F::zero() {
return Err(crate::error::StatsError::InvalidArgument(
"Cannot compute Kendall's tau-b when all values are tied in one variable. Ensure both variables have some variation.".to_string(),
));
}
F::from(concordant - discordant).expect("Failed to convert to float") / (n1 * n2).sqrt()
}
"c" => {
let m = F::from(n.min(2)).expect("Test/example failed");
(const_f64::<F>(2.0)
* m
* F::from(concordant - discordant).expect("Failed to convert to float"))
/ (F::from(n).expect("Failed to convert to float").powi(2) * (m - F::one()))
}
_ => unreachable!(), };
Ok(tau)
}
#[allow(dead_code)]
pub fn partial_corr<F, D1, D2>(
x: &ArrayBase<D1, Ix1>,
y: &ArrayBase<D1, Ix1>,
z: &ArrayBase<D2, Ix2>,
) -> StatsResult<F>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ 'static
+ scirs2_core::simd_ops::SimdUnifiedOps,
D1: Data<Elem = F>,
D2: Data<Elem = F>,
Ix1: Dimension,
Ix2: Dimension,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if y.is_empty() {
return Err(ErrorMessages::empty_array("y"));
}
if x.len() != y.len() {
return Err(ErrorMessages::length_mismatch("x", x.len(), "y", y.len()));
}
if x.len() != z.shape()[0] {
return Err(ErrorMessages::length_mismatch(
"x/y",
x.len(),
"z rows",
z.shape()[0],
));
}
let x_resid = compute_residuals(x, z)?;
let y_resid = compute_residuals(y, z)?;
pearson_r::<F, _>(&x_resid.view(), &y_resid.view())
}
#[allow(dead_code)]
pub fn partial_corrr<F, D1, D2>(
x: &ArrayBase<D1, Ix1>,
y: &ArrayBase<D1, Ix1>,
z: &ArrayBase<D2, Ix2>,
alternative: &str,
) -> StatsResult<(F, F)>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ 'static
+ scirs2_core::simd_ops::SimdUnifiedOps,
D1: Data<Elem = F>,
D2: Data<Elem = F>,
{
let pr = partial_corr::<F, D1, D2>(x, y, z)?;
let n = x.len();
let p = z.shape()[1];
let df = F::from(n - 2 - p).expect("Failed to convert to float");
if df <= const_f64::<F>(2.0) {
return Ok((pr, F::one()));
}
match alternative {
"two-sided" | "less" | "greater" => {}
_ => {
return Err(crate::error::StatsError::InvalidArgument(format!(
"Invalid alternative parameter: '{}'. Use 'two-sided', 'less', or 'greater'.",
alternative
)));
}
}
let t_stat = if pr.abs() >= F::one() {
if pr > F::zero() {
const_f64::<F>(1e6) } else {
const_f64::<F>(-1e6) }
} else {
pr * (df / (F::one() - pr * pr)).sqrt()
};
let p_value = match alternative {
"less" => {
if pr >= F::zero() {
F::one() } else {
student_t_cdf(t_stat, df)
}
}
"greater" => {
if pr <= F::zero() {
F::one() } else {
F::one() - student_t_cdf(t_stat, df)
}
}
_ => {
const_f64::<F>(2.0) * (F::one() - student_t_cdf(t_stat.abs(), df))
}
};
Ok((pr, p_value))
}
#[allow(dead_code)]
fn compute_residuals<F, D1, D2>(
y: &ArrayBase<D1, Ix1>,
x: &ArrayBase<D2, Ix2>,
) -> StatsResult<scirs2_core::ndarray::Array1<F>>
where
F: Float + std::fmt::Debug + NumCast + std::iter::Sum<F> + 'static,
D1: Data<Elem = F>,
D2: Data<Elem = F>,
{
let n = y.len();
let p = x.shape()[1];
let mut x_with_const = Array2::<F>::ones((n, p + 1));
for i in 0..n {
for j in 0..p {
x_with_const[[i, j + 1]] = x[[i, j]];
}
}
let xtx = x_with_const.t().dot(&x_with_const);
let mut xty = Array1::<F>::zeros(p + 1);
for j in 0..p + 1 {
for i in 0..n {
xty[j] = xty[j] + x_with_const[[i, j]] * y[i];
}
}
let beta = simple_linear_solve(&xtx, &xty)?;
let mut y_hat = Array1::<F>::zeros(n);
for i in 0..n {
let mut pred = F::zero();
for j in 0..p + 1 {
pred = pred + x_with_const[[i, j]] * beta[j];
}
y_hat[i] = pred;
}
let mut residuals = Array1::<F>::zeros(n);
for i in 0..n {
residuals[i] = y[i] - y_hat[i];
}
Ok(residuals)
}
#[allow(dead_code)]
fn simple_linear_solve<F>(
a: &scirs2_core::ndarray::Array2<F>,
b: &scirs2_core::ndarray::Array1<F>,
) -> StatsResult<scirs2_core::ndarray::Array1<F>>
where
F: Float + std::fmt::Debug + NumCast + 'static,
{
let n = a.shape()[0];
if a.shape()[0] != a.shape()[1] {
return Err(crate::error::StatsError::InvalidArgument(
"Coefficient matrix must be square for linear system solving.".to_string(),
));
}
if a.shape()[0] != b.len() {
return Err(ErrorMessages::dimension_mismatch(
&format!("{}x{} matrix", a.shape()[0], a.shape()[1]),
&format!("vector of length {}", b.len()),
));
}
let mut aug = Array2::<F>::zeros((n, n + 1));
for i in 0..n {
for j in 0..n {
aug[[i, j]] = a[[i, j]];
}
aug[[i, n]] = b[i];
}
for i in 0..n {
let mut max_row = i;
for j in (i + 1)..n {
if aug[[j, i]].abs() > aug[[max_row, i]].abs() {
max_row = j;
}
}
if max_row != i {
for j in 0..=n {
let temp = aug[[i, j]];
aug[[i, j]] = aug[[max_row, j]];
aug[[max_row, j]] = temp;
}
}
if aug[[i, i]].abs() <= F::epsilon() {
return Err(crate::error::StatsError::InvalidArgument(
"Coefficient matrix is singular and cannot be solved. Try regularization or check for linear dependencies.".to_string(),
));
}
for j in (i + 1)..n {
let factor = aug[[j, i]] / aug[[i, i]];
for k in i..=n {
aug[[j, k]] = aug[[j, k]] - factor * aug[[i, k]];
}
}
}
let mut x = scirs2_core::ndarray::Array1::zeros(n);
for i in (0..n).rev() {
let mut sum = F::zero();
for j in (i + 1)..n {
sum = sum + aug[[i, j]] * x[j];
}
x[i] = (aug[[i, n]] - sum) / aug[[i, i]];
}
Ok(x)
}
#[allow(dead_code)]
pub fn point_biserial<F, D>(
binary: &ArrayBase<D, Ix1>,
continuous: &ArrayBase<D, Ix1>,
) -> StatsResult<F>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ Send
+ Sync
+ scirs2_core::simd_ops::SimdUnifiedOps,
D: Data<Elem = F>,
Ix1: Dimension,
{
if binary.len() != continuous.len() {
return Err(ErrorMessages::length_mismatch(
"binary",
binary.len(),
"continuous",
continuous.len(),
));
}
if binary.is_empty() {
return Err(ErrorMessages::empty_array("binary"));
}
for &val in binary.iter() {
if val != F::zero() && val != F::one() {
return Err(crate::error::StatsError::InvalidArgument(
"Binary variable must contain only 0 and 1 values for point-biserial correlation."
.to_string(),
));
}
}
let mut n1 = 0;
let mut n0 = 0;
for &val in binary.iter() {
if val == F::one() {
n1 += 1;
} else {
n0 += 1;
}
}
if n1 == 0 || n0 == 0 {
return Err(crate::error::StatsError::InvalidArgument(
"Binary variable must have at least one 0 and one 1 for meaningful correlation."
.to_string(),
));
}
let mut mean1 = F::zero();
let mut mean0 = F::zero();
for i in 0..binary.len() {
if binary[i] == F::one() {
mean1 = mean1 + continuous[i];
} else {
mean0 = mean0 + continuous[i];
}
}
mean1 = mean1 / F::from(n1).expect("Failed to convert to float");
mean0 = mean0 / F::from(n0).expect("Failed to convert to float");
let std_y = std(&continuous.view(), 0, None)?;
let n = F::from(binary.len()).expect("Test/example failed");
let n1_f = F::from(n1).expect("Failed to convert to float");
let n0_f = F::from(n0).expect("Failed to convert to float");
let corr = ((mean1 - mean0) / std_y) * (n1_f * n0_f / (n * n)).sqrt();
Ok(corr)
}
#[allow(dead_code)]
pub fn point_biserialr<F, D>(
binary: &ArrayBase<D, Ix1>,
continuous: &ArrayBase<D, Ix1>,
alternative: &str,
) -> StatsResult<(F, F)>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ Send
+ Sync
+ scirs2_core::simd_ops::SimdUnifiedOps,
D: Data<Elem = F>,
{
let rpb = point_biserial::<F, D>(binary, continuous)?;
let n = binary.len();
if n <= 3 {
return Ok((rpb, F::one()));
}
match alternative {
"two-sided" | "less" | "greater" => {}
_ => {
return Err(crate::error::StatsError::InvalidArgument(format!(
"Invalid alternative parameter: '{}'. Use 'two-sided', 'less', or 'greater'.",
alternative
)));
}
}
let df = F::from(n - 2).expect("Failed to convert to float");
let t_stat = if rpb.abs() >= F::one() {
if rpb > F::zero() {
const_f64::<F>(1e6) } else {
const_f64::<F>(-1e6) }
} else {
rpb * (df / (F::one() - rpb * rpb)).sqrt()
};
let p_value = match alternative {
"less" => {
if rpb >= F::zero() {
F::one() } else {
student_t_cdf(t_stat, df)
}
}
"greater" => {
if rpb <= F::zero() {
F::one() } else {
F::one() - student_t_cdf(t_stat, df)
}
}
_ => {
const_f64::<F>(2.0) * (F::one() - student_t_cdf(t_stat.abs(), df))
}
};
Ok((rpb, p_value))
}
#[allow(dead_code)]
pub fn corrcoef<F, D>(
data: &ArrayBase<D, Ix2>,
method: &str,
) -> StatsResult<scirs2_core::ndarray::Array2<F>>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ std::marker::Send
+ scirs2_core::simd_ops::SimdUnifiedOps
+ 'static,
D: Data<Elem = F> + Sync,
Ix2: Dimension,
{
match method {
"pearson" | "spearman" | "kendall" => {}
_ => {
return Err(crate::error::StatsError::InvalidArgument(format!(
"Method must be 'pearson', 'spearman', or 'kendall', got '{}'.",
method
)))
}
}
let (n, p) = (data.shape()[0], data.shape()[1]);
if n == 0 || p == 0 {
return Err(ErrorMessages::empty_array("data"));
}
let mut corr_mat = Array2::<F>::zeros((p, p));
for i in 0..p {
corr_mat[[i, i]] = F::one();
}
let mut pairs = Vec::new();
for i in 0..p {
for j in (i + 1)..p {
pairs.push((i, j));
}
}
let correlations: StatsResult<Vec<((usize, usize), F)>> = if pairs.len() > 50 {
pairs
.par_iter()
.map(|&(i, j)| {
let var_i = data.slice(s![.., i]);
let var_j = data.slice(s![.., j]);
let corr = match method {
"pearson" => pearson_r::<F, _>(&var_i, &var_j)?,
"spearman" => spearman_r::<F, _>(&var_i, &var_j)?,
"kendall" => kendall_tau::<F, _>(&var_i, &var_j, "b")?,
_ => unreachable!(),
};
Ok(((i, j), corr))
})
.collect()
} else {
pairs
.iter()
.map(|&(i, j)| {
let var_i = data.slice(s![.., i]);
let var_j = data.slice(s![.., j]);
let corr = match method {
"pearson" => pearson_r::<F, _>(&var_i, &var_j)?,
"spearman" => spearman_r::<F, _>(&var_i, &var_j)?,
"kendall" => kendall_tau::<F, _>(&var_i, &var_j, "b")?,
_ => unreachable!(),
};
Ok(((i, j), corr))
})
.collect()
};
for ((i, j), corr) in correlations? {
corr_mat[[i, j]] = corr;
corr_mat[[j, i]] = corr;
}
Ok(corr_mat)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_pearson_correlation() {
let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
let y = array![2.0, 4.0, 6.0, 8.0, 10.0];
let corr = pearson_r(&x.view(), &y.view()).expect("Test/example failed");
assert_abs_diff_eq!(corr, 1.0, epsilon = 1e-10);
let y = array![10.0, 8.0, 6.0, 4.0, 2.0];
let corr = pearson_r(&x.view(), &y.view()).expect("Test/example failed");
assert_abs_diff_eq!(corr, -1.0, epsilon = 1e-10);
let y = array![5.0, 2.0, 8.0, 1.0, 9.0];
let corr = pearson_r(&x.view(), &y.view()).expect("Test/example failed");
assert!(corr.abs() < 0.5); }
#[test]
fn test_spearman_correlation() {
let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
let y = array![1.0, 4.0, 9.0, 16.0, 25.0];
let corr = spearman_r(&x.view(), &y.view()).expect("Test/example failed");
assert_abs_diff_eq!(corr, 1.0, epsilon = 1e-10);
let y = array![25.0, 16.0, 9.0, 4.0, 1.0];
let corr = spearman_r(&x.view(), &y.view()).expect("Test/example failed");
assert_abs_diff_eq!(corr, -1.0, epsilon = 1e-10);
}
#[test]
fn test_kendall_tau() {
let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
let y = array![10.0, 11.0, 12.0, 13.0, 14.0];
let corr = kendall_tau(&x.view(), &y.view(), "b").expect("Test/example failed");
assert_abs_diff_eq!(corr, 1.0, epsilon = 1e-10);
let y = array![14.0, 13.0, 12.0, 11.0, 10.0];
let corr = kendall_tau(&x.view(), &y.view(), "b").expect("Test/example failed");
assert_abs_diff_eq!(corr, -1.0, epsilon = 1e-10);
let x = array![1.0, 2.0, 3.0, 3.0, 5.0];
let y = array![10.0, 11.0, 12.0, 12.0, 14.0];
let corr = kendall_tau(&x.view(), &y.view(), "b").expect("Test/example failed");
assert!(corr > 0.9); }
#[test]
fn test_correlation_matrix() {
let data = array![
[1.0, 5.0, 10.0],
[2.0, 4.0, 9.0],
[3.0, 3.0, 8.0],
[4.0, 2.0, 7.0],
[5.0, 1.0, 6.0]
];
let corr_mat = corrcoef(&data.view(), "pearson").expect("Test/example failed");
assert_abs_diff_eq!(corr_mat[[0, 0]], 1.0, epsilon = 1e-10);
assert_abs_diff_eq!(corr_mat[[1, 1]], 1.0, epsilon = 1e-10);
assert_abs_diff_eq!(corr_mat[[2, 2]], 1.0, epsilon = 1e-10);
assert_abs_diff_eq!(corr_mat[[0, 1]], corr_mat[[1, 0]], epsilon = 1e-10);
assert_abs_diff_eq!(corr_mat[[0, 2]], corr_mat[[2, 0]], epsilon = 1e-10);
assert_abs_diff_eq!(corr_mat[[1, 2]], corr_mat[[2, 1]], epsilon = 1e-10);
assert_abs_diff_eq!(corr_mat[[0, 1]], -1.0, epsilon = 1e-10); assert_abs_diff_eq!(corr_mat[[0, 2]], -1.0, epsilon = 1e-10); }
#[test]
fn test_pearsonr_with_pvalue() {
let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
let y = array![2.0, 4.0, 6.0, 8.0, 10.0];
let (r, p) = pearsonr(&x.view(), &y.view(), "two-sided").expect("Test/example failed");
assert_abs_diff_eq!(r, 1.0, epsilon = 1e-10);
assert!(p < 0.05);
let x_small = array![1.0, 2.0];
let y_small = array![2.0, 4.0];
let (_, p) =
pearsonr(&x_small.view(), &y_small.view(), "two-sided").expect("Test/example failed");
assert_abs_diff_eq!(p, 1.0, epsilon = 1e-10);
}
}
#[allow(dead_code)]
pub fn pearsonr<F, D>(
x: &ArrayBase<D, Ix1>,
y: &ArrayBase<D, Ix1>,
alternative: &str,
) -> StatsResult<(F, F)>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ scirs2_core::simd_ops::SimdUnifiedOps
+ 'static,
D: Data<Elem = F>,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if y.is_empty() {
return Err(ErrorMessages::empty_array("y"));
}
if x.len() != y.len() {
return Err(ErrorMessages::length_mismatch("x", x.len(), "y", y.len()));
}
let n = x.len();
if n < 2 {
return Err(ErrorMessages::insufficientdata(
"correlation analysis",
2,
n,
));
}
match alternative {
"two-sided" | "less" | "greater" => {}
_ => {
return Err(crate::error::StatsError::InvalidArgument(format!(
"Invalid alternative parameter: '{}'. Use 'two-sided', 'less', or 'greater'.",
alternative
)));
}
}
let r = pearson_r::<F, D>(x, y)?;
if n == 2 {
return Ok((r, F::one()));
}
let r_abs = r.abs();
let df = F::from(n - 2).expect("Failed to convert to float");
let t_stat = r_abs * (df / (F::one() - r_abs * r_abs)).sqrt();
let p_value = match alternative {
"less" => {
if r >= F::zero() {
F::one() } else {
student_t_cdf(t_stat, df)
}
}
"greater" => {
if r <= F::zero() {
F::one() } else {
F::one() - student_t_cdf(t_stat, df)
}
}
_ => {
const_f64::<F>(2.0) * (F::one() - student_t_cdf(t_stat, df))
}
};
Ok((r, p_value))
}
#[allow(dead_code)]
fn student_t_cdf<F: Float + NumCast>(t: F, df: F) -> F {
let t_f64 = <f64 as NumCast>::from(t).expect("Test/example failed");
let df_f64 = <f64 as NumCast>::from(df).expect("Test/example failed");
let x = df_f64 / (df_f64 + t_f64 * t_f64);
let p = if t_f64 <= 0.0 {
0.5 * beta_cdf(x, df_f64 / 2.0, 0.5)
} else {
1.0 - 0.5 * beta_cdf(x, df_f64 / 2.0, 0.5)
};
F::from(p).expect("Failed to convert to float")
}
#[allow(dead_code)]
fn beta_cdf(x: f64, a: f64, b: f64) -> f64 {
if x <= 0.0 {
return 0.0;
}
if x >= 1.0 {
return 1.0;
}
if x <= (a / (a + b)) {
let beta_x = beta_incomplete(a, b, x);
let beta_full = beta_function(a, b);
beta_x / beta_full
} else {
let beta_x = beta_incomplete(b, a, 1.0 - x);
let beta_full = beta_function(a, b);
1.0 - beta_x / beta_full
}
}
#[allow(dead_code)]
fn beta_incomplete(a: f64, b: f64, x: f64) -> f64 {
if x <= 0.0 {
return 0.0;
}
if x >= 1.0 {
return beta_function(a, b);
}
if x < (a + 1.0) / (a + b + 2.0) {
let bt = beta_continued_fraction(a, b, x);
bt * x.powf(a) * (1.0 - x).powf(b) / a
} else {
let bt = beta_continued_fraction(b, a, 1.0 - x);
beta_function(a, b) - bt * (1.0 - x).powf(b) * x.powf(a) / b
}
}
#[allow(dead_code)]
fn beta_continued_fraction(a: f64, b: f64, x: f64) -> f64 {
let max_iter = 100;
let epsilon = 1e-10;
let qab = a + b;
let qap = a + 1.0;
let qam = a - 1.0;
let mut c = 1.0;
let mut d = 1.0 - qab * x / qap;
if d.abs() < epsilon {
d = epsilon;
}
d = 1.0 / d;
let mut h = d;
for m in 1..max_iter {
let m2 = 2 * m;
let aa = m as f64 * (b - m as f64) * x / ((qam + m2 as f64) * (a + m2 as f64));
d = 1.0 + aa * d;
if d.abs() < epsilon {
d = epsilon;
}
c = 1.0 + aa / c;
if c.abs() < epsilon {
c = epsilon;
}
d = 1.0 / d;
h *= d * c;
let aa = -(a + m as f64) * (qab + m as f64) * x / ((a + m2 as f64) * (qap + m2 as f64));
d = 1.0 + aa * d;
if d.abs() < epsilon {
d = epsilon;
}
c = 1.0 + aa / c;
if c.abs() < epsilon {
c = epsilon;
}
d = 1.0 / d;
h *= d * c;
if (d * c - 1.0).abs() < epsilon {
break;
}
}
h
}
#[allow(dead_code)]
fn beta_function(a: f64, b: f64) -> f64 {
gamma_function(a) * gamma_function(b) / gamma_function(a + b)
}
#[allow(dead_code)]
fn gamma_function(x: f64) -> f64 {
if x <= 0.0 {
panic!("Gamma function not defined for non-positive values");
}
if x < 0.5 {
return std::f64::consts::PI / ((std::f64::consts::PI * x).sin() * gamma_function(1.0 - x));
}
let p = [
676.5203681218851,
-1259.1392167224028,
771.323428777653,
-176.61502916214,
12.507343278687,
-0.1385710952657,
9.984369578019e-6,
1.50563273515e-7,
];
let z = x - 1.0;
let mut result = 0.9999999999998;
for (i, &value) in p.iter().enumerate() {
result += value / (z + (i + 1) as f64);
}
let t = z + p.len() as f64 - 0.5;
2.506628274631 * t.powf(z + 0.5) * (-t).exp() * result
}
#[allow(dead_code)]
pub fn spearmanr<F, D>(
x: &ArrayBase<D, Ix1>,
y: &ArrayBase<D, Ix1>,
alternative: &str,
) -> StatsResult<(F, F)>
where
F: Float
+ std::fmt::Debug
+ NumCast
+ std::iter::Sum<F>
+ std::fmt::Display
+ scirs2_core::simd_ops::SimdUnifiedOps
+ 'static,
D: Data<Elem = F>,
{
let rho = spearman_r::<F, D>(x, y)?;
let n = x.len();
if n <= 3 {
return Ok((rho, F::one()));
}
match alternative {
"two-sided" | "less" | "greater" => {}
_ => {
return Err(crate::error::StatsError::InvalidArgument(format!(
"Invalid alternative parameter: '{}'. Use 'two-sided', 'less', or 'greater'.",
alternative
)));
}
}
let rho_abs = rho.abs();
let df = F::from(n - 2).expect("Failed to convert to float");
let t_stat = if rho_abs >= F::one() {
df.sqrt() * const_f64::<F>(1e6) } else {
rho * (df / (F::one() - rho * rho)).sqrt()
};
let p_value = match alternative {
"less" => {
if rho >= F::zero() {
F::one() } else {
student_t_cdf(t_stat, df)
}
}
"greater" => {
if rho <= F::zero() {
F::one() } else {
F::one() - student_t_cdf(t_stat, df)
}
}
_ => {
const_f64::<F>(2.0) * (F::one() - student_t_cdf(t_stat.abs(), df))
}
};
Ok((rho, p_value))
}
#[allow(dead_code)]
pub fn kendalltau<F, D>(
x: &ArrayBase<D, Ix1>,
y: &ArrayBase<D, Ix1>,
method: &str,
alternative: &str,
) -> StatsResult<(F, F)>
where
F: Float + std::fmt::Debug + NumCast + std::iter::Sum<F> + std::fmt::Display,
D: Data<Elem = F>,
{
let tau = kendall_tau::<F, D>(x, y, method)?;
let n = x.len();
if n <= 3 {
return Ok((tau, F::one()));
}
match alternative {
"two-sided" | "less" | "greater" => {}
_ => {
return Err(crate::error::StatsError::InvalidArgument(format!(
"Invalid alternative parameter: '{}'. Use 'two-sided', 'less', or 'greater'.",
alternative
)));
}
}
let n_f = F::from(n).expect("Failed to convert to float");
let mut concordant = 0;
let mut discordant = 0;
let mut ties_x = 0;
let mut ties_y = 0;
let mut _ties_xy = 0;
for i in 0..n {
for j in (i + 1)..n {
let x_diff = x[j] - x[i];
let y_diff = y[j] - y[i];
if x_diff.is_zero() && y_diff.is_zero() {
_ties_xy += 1;
} else if x_diff.is_zero() {
ties_x += 1;
} else if y_diff.is_zero() {
ties_y += 1;
} else if (x_diff > F::zero() && y_diff > F::zero())
|| (x_diff < F::zero() && y_diff < F::zero())
{
concordant += 1;
} else {
discordant += 1;
}
}
}
let n0 = n * (n - 1) / 2;
let n1 = concordant + discordant + ties_x;
let n2 = concordant + discordant + ties_y;
let var_tau = if method == "b" {
let n1_f = F::from(n1).expect("Failed to convert to float");
let n2_f = F::from(n2).expect("Failed to convert to float");
let n0_f = F::from(n0).expect("Failed to convert to float");
if n1 == 0 || n2 == 0 {
return Ok((tau, F::one()));
}
let v0 =
F::from(n * (n - 1) * (2 * n + 5)).expect("Operation failed") / const_f64::<F>(18.0);
let v1 = F::from(ties_x * (ties_x - 1) * (2 * ties_x + 5)).expect("Operation failed")
/ const_f64::<F>(18.0);
let v2 = F::from(ties_y * (ties_y - 1) * (2 * ties_y + 5)).expect("Operation failed")
/ const_f64::<F>(18.0);
let v = (v0 - v1 - v2) / (n1_f * n2_f).sqrt();
v / n0_f
} else {
let m = n.min(2);
let m_f = F::from(m).expect("Failed to convert to float");
(const_f64::<F>(2.0) * (const_f64::<F>(2.0) * m_f + const_f64::<F>(1.0)))
/ (const_f64::<F>(9.0) * m_f * n_f * (n_f - F::one()))
};
let z = tau / var_tau.sqrt();
let p_value = match alternative {
"less" => {
if tau >= F::zero() {
F::one() } else {
normal_cdf(z)
}
}
"greater" => {
if tau <= F::zero() {
F::one() } else {
F::one() - normal_cdf(z)
}
}
_ => {
const_f64::<F>(2.0) * F::min(normal_cdf(z.abs()), F::one() - normal_cdf(z.abs()))
}
};
Ok((tau, p_value))
}
#[allow(dead_code)]
fn normal_cdf<F: Float + NumCast>(z: F) -> F {
let z_f64 = <f64 as NumCast>::from(z).expect("Test/example failed");
let abs_z = z_f64.abs();
let t = 1.0 / (1.0 + 0.2316419 * abs_z);
let poly = t
* (0.319381530
+ t * (-0.356563782 + t * (1.781477937 + t * (-1.821255978 + t * 1.330274429))));
let p = if z_f64 >= 0.0 {
1.0 - (1.0 / (2.0 * std::f64::consts::PI).sqrt()) * (-0.5 * z_f64 * z_f64).exp() * poly
} else {
(1.0 / (2.0 * std::f64::consts::PI).sqrt()) * (-0.5 * z_f64 * z_f64).exp() * poly
};
F::from(p).expect("Failed to convert to float")
}