use crate::descriptive_simd::mean_simd;
use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Data, Ix1, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::simd_ops::{AutoOptimizer, SimdUnifiedOps};
pub fn covariance_matrix_simd<F, D>(
data: &ArrayBase<D, Ix2>,
rowvar: bool,
ddof: usize,
) -> StatsResult<Array2<F>>
where
F: Float + NumCast + SimdUnifiedOps,
D: Data<Elem = F>,
{
let (n_vars, n_obs) = if rowvar {
(data.nrows(), data.ncols())
} else {
(data.ncols(), data.nrows())
};
if n_obs <= ddof {
return Err(StatsError::invalid_argument(
"Not enough observations for the given degrees of freedom",
));
}
let optimizer = AutoOptimizer::new();
let mut cov_matrix = Array2::zeros((n_vars, n_vars));
let means: Vec<F> = (0..n_vars)
.map(|i| {
let var = if rowvar {
data.slice(s![i, ..])
} else {
data.slice(s![.., i])
};
mean_simd(&var).unwrap_or_else(|_| F::zero())
})
.collect();
for i in 0..n_vars {
for j in i..n_vars {
let var_i = if rowvar {
data.slice(s![i, ..])
} else {
data.slice(s![.., i])
};
let var_j = if rowvar {
data.slice(s![j, ..])
} else {
data.slice(s![.., j])
};
let cov = if optimizer.should_use_simd(n_obs) {
let mean_i_array = Array1::from_elem(n_obs, means[i]);
let mean_j_array = Array1::from_elem(n_obs, means[j]);
let dev_i = F::simd_sub(&var_i, &mean_i_array.view());
let dev_j = F::simd_sub(&var_j, &mean_j_array.view());
let products = F::simd_mul(&dev_i.view(), &dev_j.view());
let sum_products = F::simd_sum(&products.view());
sum_products / F::from(n_obs - ddof).unwrap_or_else(|| F::one())
} else {
let mut sum = F::zero();
for k in 0..n_obs {
let dev_i = var_i[k] - means[i];
let dev_j = var_j[k] - means[j];
sum = sum + dev_i * dev_j;
}
sum / F::from(n_obs - ddof).unwrap_or_else(|| F::one())
};
cov_matrix[(i, j)] = cov;
if i != j {
cov_matrix[(j, i)] = cov; }
}
}
Ok(cov_matrix)
}
pub fn spearman_r_simd<F, D>(x: &ArrayBase<D, Ix1>, y: &ArrayBase<D, Ix1>) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps,
D: Data<Elem = F>,
{
if x.len() != y.len() {
return Err(StatsError::dimension_mismatch(
"Arrays must have the same length",
));
}
if x.is_empty() {
return Err(StatsError::invalid_argument("Arrays cannot be empty"));
}
let n = x.len();
let rank_x = compute_ranks(x);
let rank_y = compute_ranks(y);
let mean_rx = mean_simd(&rank_x.view())?;
let mean_ry = mean_simd(&rank_y.view())?;
let optimizer = AutoOptimizer::new();
if optimizer.should_use_simd(n) {
let mean_rx_array = Array1::from_elem(n, mean_rx);
let mean_ry_array = Array1::from_elem(n, mean_ry);
let rx_dev = F::simd_sub(&rank_x.view(), &mean_rx_array.view());
let ry_dev = F::simd_sub(&rank_y.view(), &mean_ry_array.view());
let xy_dev = F::simd_mul(&rx_dev.view(), &ry_dev.view());
let rx_dev_sq = F::simd_mul(&rx_dev.view(), &rx_dev.view());
let ry_dev_sq = F::simd_mul(&ry_dev.view(), &ry_dev.view());
let sum_xy = F::simd_sum(&xy_dev.view());
let sum_rx2 = F::simd_sum(&rx_dev_sq.view());
let sum_ry2 = F::simd_sum(&ry_dev_sq.view());
if sum_rx2 <= F::epsilon() || sum_ry2 <= F::epsilon() {
return Err(StatsError::invalid_argument(
"Cannot compute correlation when one or both variables have zero variance",
));
}
let corr = sum_xy / (sum_rx2 * sum_ry2).sqrt();
Ok(corr.max(-F::one()).min(F::one()))
} else {
let mut sum_xy = F::zero();
let mut sum_rx2 = F::zero();
let mut sum_ry2 = F::zero();
for i in 0..n {
let rx_dev = rank_x[i] - mean_rx;
let ry_dev = rank_y[i] - mean_ry;
sum_xy = sum_xy + rx_dev * ry_dev;
sum_rx2 = sum_rx2 + rx_dev * rx_dev;
sum_ry2 = sum_ry2 + ry_dev * ry_dev;
}
if sum_rx2 <= F::epsilon() || sum_ry2 <= F::epsilon() {
return Err(StatsError::invalid_argument(
"Cannot compute correlation when one or both variables have zero variance",
));
}
let corr = sum_xy / (sum_rx2 * sum_ry2).sqrt();
Ok(corr.max(-F::one()).min(F::one()))
}
}
fn compute_ranks<F, D>(data: &ArrayBase<D, Ix1>) -> Array1<F>
where
F: Float + NumCast,
D: Data<Elem = F>,
{
let n = data.len();
let mut indexed: Vec<(usize, F)> = data.iter().copied().enumerate().collect();
indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let mut ranks = Array1::zeros(n);
let mut i = 0;
while i < n {
let mut j = i;
while j < n && (indexed[j].1 - indexed[i].1).abs() < F::epsilon() {
j += 1;
}
let avg_rank = F::from((i + j + 1) as f64 / 2.0).unwrap_or_else(|| F::zero());
for k in i..j {
ranks[indexed[k].0] = avg_rank;
}
i = j;
}
ranks
}
pub fn partial_correlation_simd<F>(
x: &ArrayView1<F>,
y: &ArrayView1<F>,
z: &ArrayView2<F>,
) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps,
{
if x.len() != y.len() || x.len() != z.nrows() {
return Err(StatsError::dimension_mismatch(
"All arrays must have compatible dimensions",
));
}
use crate::correlation_simd::pearson_r_simd;
if z.ncols() == 1 {
let z_col = z.column(0);
let rxy = pearson_r_simd(x, y)?;
let rxz = pearson_r_simd(x, &z_col)?;
let ryz = pearson_r_simd(y, &z_col)?;
let numerator = rxy - rxz * ryz;
let denominator = ((F::one() - rxz * rxz) * (F::one() - ryz * ryz)).sqrt();
if denominator <= F::epsilon() {
return Err(StatsError::invalid_argument(
"Cannot compute partial correlation - controlling variable perfectly predicts x or y",
));
}
Ok(numerator / denominator)
} else {
Err(StatsError::not_implemented(
"Partial correlation with multiple controlling variables not yet implemented",
))
}
}
pub fn rolling_correlation_simd<F, D>(
x: &ArrayBase<D, Ix1>,
y: &ArrayBase<D, Ix1>,
window_size: usize,
) -> StatsResult<Array1<F>>
where
F: Float + NumCast + SimdUnifiedOps,
D: Data<Elem = F>,
{
if x.len() != y.len() {
return Err(StatsError::dimension_mismatch(
"Arrays must have the same length",
));
}
if window_size < 2 {
return Err(StatsError::invalid_argument(
"Window size must be at least 2",
));
}
let n = x.len();
if n < window_size {
return Err(StatsError::invalid_argument(
"Array length must be at least window size",
));
}
let n_windows = n - window_size + 1;
let mut result = Array1::zeros(n_windows);
use crate::correlation_simd::pearson_r_simd;
for i in 0..n_windows {
let x_window = x.slice(s![i..i + window_size]);
let y_window = y.slice(s![i..i + window_size]);
result[i] = pearson_r_simd(&x_window, &y_window)?;
}
Ok(result)
}
pub fn simd_pearson_correlation_matrix(data: &ArrayView2<f64>) -> StatsResult<Array2<f64>> {
let n_obs = data.nrows();
let n_vars = data.ncols();
if n_obs < 2 {
return Err(StatsError::invalid_argument(
"At least 2 observations are required for Pearson correlation",
));
}
if n_vars < 2 {
return Err(StatsError::invalid_argument(
"At least 2 variables are required for a correlation matrix",
));
}
use scirs2_core::parallel_ops::*;
let optimizer = AutoOptimizer::new();
let means: Vec<f64> = (0..n_vars)
.map(|j| {
let col = data.column(j);
if optimizer.should_use_simd(n_obs) {
let arr = col.to_owned();
F64::simd_sum(&arr.view()) / n_obs as f64
} else {
col.iter().sum::<f64>() / n_obs as f64
}
})
.collect();
let stds: Vec<f64> = (0..n_vars)
.map(|j| {
let col = data.column(j);
let m = means[j];
let var: f64 = if optimizer.should_use_simd(n_obs) {
let arr = col.to_owned();
let mean_arr = Array1::from_elem(n_obs, m);
let dev = F64::simd_sub(&arr.view(), &mean_arr.view());
let sq = F64::simd_mul(&dev.view(), &dev.view());
F64::simd_sum(&sq.view()) / n_obs as f64
} else {
col.iter().map(|&x| (x - m).powi(2)).sum::<f64>() / n_obs as f64
};
var.sqrt()
})
.collect();
for (j, &sd) in stds.iter().enumerate() {
if sd <= f64::EPSILON {
return Err(StatsError::invalid_argument(&format!(
"Variable {j} has zero variance; Pearson r is undefined"
)));
}
}
let pairs: Vec<(usize, usize)> = (0..n_vars)
.flat_map(|i| ((i + 1)..n_vars).map(move |j| (i, j)))
.collect();
type SIMDf64 = f64; let corrs: Result<Vec<((usize, usize), f64)>, StatsError> = pairs
.par_iter()
.map(|&(i, j)| {
let col_i = data.column(i).to_owned();
let col_j = data.column(j).to_owned();
let mi = means[i];
let mj = means[j];
let si = stds[i];
let sj = stds[j];
let r: SIMDf64 = if optimizer.should_use_simd(n_obs) {
let mean_i_arr = Array1::from_elem(n_obs, mi);
let mean_j_arr = Array1::from_elem(n_obs, mj);
let dev_i = F64::simd_sub(&col_i.view(), &mean_i_arr.view());
let dev_j = F64::simd_sub(&col_j.view(), &mean_j_arr.view());
let products = F64::simd_mul(&dev_i.view(), &dev_j.view());
let cov = F64::simd_sum(&products.view()) / n_obs as f64;
cov / (si * sj)
} else {
let cov: f64 = col_i
.iter()
.zip(col_j.iter())
.map(|(&xi, &xj)| (xi - mi) * (xj - mj))
.sum::<f64>()
/ n_obs as f64;
cov / (si * sj)
};
Ok(((i, j), r.clamp(-1.0, 1.0)))
})
.collect();
let corrs = corrs?;
let mut out = Array2::<f64>::zeros((n_vars, n_vars));
for j in 0..n_vars {
out[(j, j)] = 1.0;
}
for ((i, j), r) in corrs {
out[(i, j)] = r;
out[(j, i)] = r;
}
Ok(out)
}
struct F64;
impl F64 {
#[inline]
fn simd_sub(
a: &scirs2_core::ndarray::ArrayView1<f64>,
b: &scirs2_core::ndarray::ArrayView1<f64>,
) -> Array1<f64> {
<f64 as SimdUnifiedOps>::simd_sub(a, b)
}
#[inline]
fn simd_mul(
a: &scirs2_core::ndarray::ArrayView1<f64>,
b: &scirs2_core::ndarray::ArrayView1<f64>,
) -> Array1<f64> {
<f64 as SimdUnifiedOps>::simd_mul(a, b)
}
#[inline]
fn simd_sum(a: &scirs2_core::ndarray::ArrayView1<f64>) -> f64 {
<f64 as SimdUnifiedOps>::simd_sum(a)
}
}
pub fn simd_spearman_correlation_batch(
x: &ArrayView1<f64>,
ys: &ArrayView2<f64>,
) -> StatsResult<Array1<f64>> {
let n = x.len();
if n == 0 {
return Err(StatsError::invalid_argument(
"Reference vector x must not be empty",
));
}
if ys.nrows() != n {
return Err(StatsError::dimension_mismatch(
"Number of rows in ys must equal the length of x",
));
}
let m = ys.ncols();
if m == 0 {
return Err(StatsError::invalid_argument(
"ys must have at least one column",
));
}
use scirs2_core::parallel_ops::*;
let x_owned = x.to_owned();
let rank_x = compute_ranks(&x_owned.view());
let results: Result<Vec<f64>, StatsError> = (0..m)
.into_par_iter()
.map(|j| {
let col = ys.column(j).to_owned();
let rank_y = compute_ranks(&col.view());
spearman_r_simd_on_ranks(&rank_x.view(), &rank_y.view(), n)
})
.collect();
let results = results?;
Ok(Array1::from_vec(results))
}
fn spearman_r_simd_on_ranks(
rank_x: &ArrayView1<f64>,
rank_y: &ArrayView1<f64>,
n: usize,
) -> StatsResult<f64> {
let optimizer = AutoOptimizer::new();
let mean_rx: f64 = if optimizer.should_use_simd(n) {
F64::simd_sum(rank_x) / n as f64
} else {
rank_x.iter().sum::<f64>() / n as f64
};
let mean_ry: f64 = if optimizer.should_use_simd(n) {
F64::simd_sum(rank_y) / n as f64
} else {
rank_y.iter().sum::<f64>() / n as f64
};
let (sum_xy, sum_rx2, sum_ry2) = if optimizer.should_use_simd(n) {
let mrx = Array1::from_elem(n, mean_rx);
let mry = Array1::from_elem(n, mean_ry);
let rx_dev = F64::simd_sub(rank_x, &mrx.view());
let ry_dev = F64::simd_sub(rank_y, &mry.view());
let xy = F64::simd_mul(&rx_dev.view(), &ry_dev.view());
let rx2 = F64::simd_mul(&rx_dev.view(), &rx_dev.view());
let ry2 = F64::simd_mul(&ry_dev.view(), &ry_dev.view());
(
F64::simd_sum(&xy.view()),
F64::simd_sum(&rx2.view()),
F64::simd_sum(&ry2.view()),
)
} else {
let mut sxy = 0.0_f64;
let mut srx2 = 0.0_f64;
let mut sry2 = 0.0_f64;
for i in 0..n {
let dx = rank_x[i] - mean_rx;
let dy = rank_y[i] - mean_ry;
sxy += dx * dy;
srx2 += dx * dx;
sry2 += dy * dy;
}
(sxy, srx2, sry2)
};
if sum_rx2 <= f64::EPSILON || sum_ry2 <= f64::EPSILON {
return Err(StatsError::invalid_argument(
"Cannot compute Spearman r: one or both rank vectors have zero variance",
));
}
let corr = (sum_xy / (sum_rx2 * sum_ry2).sqrt()).clamp(-1.0, 1.0);
Ok(corr)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_covariance_matrix_simd() {
let data = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]];
let cov = covariance_matrix_simd(&data.view(), false, 1).expect("Failed");
assert_abs_diff_eq!(cov[(0, 1)], cov[(1, 0)], epsilon = 1e-10);
assert!(cov[(0, 0)] > 0.0);
assert!(cov[(1, 1)] > 0.0);
}
#[test]
fn test_spearman_r_simd() {
let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
let y = array![5.0, 4.0, 3.0, 2.0, 1.0];
let rho = spearman_r_simd(&x.view(), &y.view()).expect("Failed");
assert_abs_diff_eq!(rho, -1.0, epsilon = 1e-10);
}
#[test]
fn test_rolling_correlation_simd() {
let x = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let y = array![10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
let rolling_corr = rolling_correlation_simd(&x.view(), &y.view(), 3).expect("Failed");
assert_eq!(rolling_corr.len(), 8);
}
#[test]
fn test_simd_pearson_correlation_matrix_diagonal() {
let data = array![
[1.0_f64, 3.0, 7.0],
[2.0_f64, 2.0, 5.0],
[3.0_f64, 1.0, 3.0],
[4.0_f64, 4.0, 9.0],
];
let r = simd_pearson_correlation_matrix(&data.view()).expect("should succeed");
assert_eq!(r.shape(), &[3, 3]);
for i in 0..3 {
assert_abs_diff_eq!(r[(i, i)], 1.0_f64, epsilon = 1e-10);
}
}
#[test]
fn test_simd_pearson_correlation_matrix_symmetry() {
let data = array![
[1.0_f64, 4.0, 9.0],
[2.0_f64, 3.0, 6.0],
[3.0_f64, 2.0, 3.0],
[4.0_f64, 1.0, 0.0],
];
let r = simd_pearson_correlation_matrix(&data.view()).expect("should succeed");
for i in 0..3 {
for j in 0..3 {
assert_abs_diff_eq!(r[(i, j)], r[(j, i)], epsilon = 1e-12);
}
}
}
#[test]
fn test_simd_pearson_correlation_matrix_perfect_negative() {
let data = array![
[1.0_f64, -1.0_f64],
[2.0_f64, -2.0_f64],
[3.0_f64, -3.0_f64],
[4.0_f64, -4.0_f64],
];
let r = simd_pearson_correlation_matrix(&data.view()).expect("should succeed");
assert_abs_diff_eq!(r[(0, 1)], -1.0_f64, epsilon = 1e-10);
}
#[test]
fn test_simd_pearson_correlation_matrix_perfect_positive() {
let data = array![
[1.0_f64, 2.0_f64],
[2.0_f64, 4.0_f64],
[3.0_f64, 6.0_f64],
[4.0_f64, 8.0_f64],
];
let r = simd_pearson_correlation_matrix(&data.view()).expect("should succeed");
assert_abs_diff_eq!(r[(0, 1)], 1.0_f64, epsilon = 1e-10);
}
#[test]
fn test_simd_pearson_correlation_matrix_rejects_few_obs() {
let data = array![[1.0_f64, 2.0_f64]]; assert!(simd_pearson_correlation_matrix(&data.view()).is_err());
}
#[test]
fn test_simd_pearson_correlation_matrix_rejects_single_var() {
let data = array![[1.0_f64], [2.0_f64], [3.0_f64]];
assert!(simd_pearson_correlation_matrix(&data.view()).is_err());
}
#[test]
fn test_simd_pearson_correlation_matrix_values_in_range() {
use scirs2_core::ndarray::Array2;
let n_obs = 20;
let n_vars = 5;
let data = Array2::from_shape_fn((n_obs, n_vars), |(i, j)| {
((i * n_vars + j) as f64).sin() * 3.7 + 1.2
});
let r = simd_pearson_correlation_matrix(&data.view()).expect("should succeed");
for &val in r.iter() {
assert!(
val >= -1.0 - 1e-12 && val <= 1.0 + 1e-12,
"r={val} outside [-1,1]"
);
}
}
#[test]
fn test_simd_spearman_correlation_batch_perfect_positive() {
let x = array![1.0_f64, 2.0, 3.0, 4.0, 5.0];
use scirs2_core::ndarray::Array2;
let ys = Array2::from_shape_fn((5, 2), |(i, _j)| (i + 1) as f64);
let rhos = simd_spearman_correlation_batch(&x.view(), &ys.view()).expect("should succeed");
assert_eq!(rhos.len(), 2);
assert_abs_diff_eq!(rhos[0], 1.0_f64, epsilon = 1e-10);
assert_abs_diff_eq!(rhos[1], 1.0_f64, epsilon = 1e-10);
}
#[test]
fn test_simd_spearman_correlation_batch_perfect_negative() {
let x = array![1.0_f64, 2.0, 3.0, 4.0, 5.0];
use scirs2_core::ndarray::Array2;
let ys = Array2::from_shape_fn((5, 1), |(i, _j)| (5 - i) as f64);
let rhos = simd_spearman_correlation_batch(&x.view(), &ys.view()).expect("should succeed");
assert_abs_diff_eq!(rhos[0], -1.0_f64, epsilon = 1e-10);
}
#[test]
fn test_simd_spearman_correlation_batch_mixed() {
let x = array![1.0_f64, 2.0, 3.0, 4.0, 5.0];
use scirs2_core::ndarray::Array2;
let ys = Array2::from_shape_fn((5, 2), |(i, j)| {
if j == 0 {
(i + 1) as f64
} else {
(5 - i) as f64
}
});
let rhos = simd_spearman_correlation_batch(&x.view(), &ys.view()).expect("should succeed");
assert_abs_diff_eq!(rhos[0], 1.0_f64, epsilon = 1e-10);
assert_abs_diff_eq!(rhos[1], -1.0_f64, epsilon = 1e-10);
}
#[test]
fn test_simd_spearman_correlation_batch_output_length() {
let x = array![1.0_f64, 2.0, 3.0, 4.0];
use scirs2_core::ndarray::Array2;
let ys = Array2::from_shape_fn((4, 7), |(i, j)| (i * 7 + j) as f64);
let rhos = simd_spearman_correlation_batch(&x.view(), &ys.view()).expect("should succeed");
assert_eq!(rhos.len(), 7);
}
#[test]
fn test_simd_spearman_correlation_batch_values_in_range() {
let x = array![3.0_f64, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
use scirs2_core::ndarray::Array2;
let n_cols = 10;
let ys =
Array2::from_shape_fn((8, n_cols), |(i, j)| ((i + j * 3) as f64).cos() * 5.0 + 2.0);
let rhos = simd_spearman_correlation_batch(&x.view(), &ys.view()).expect("should succeed");
for &r in rhos.iter() {
assert!(
r >= -1.0 - 1e-12 && r <= 1.0 + 1e-12,
"rho={r} outside [-1,1]"
);
}
}
#[test]
fn test_simd_spearman_correlation_batch_rejects_empty_x() {
use scirs2_core::ndarray::{Array1, Array2};
let x: Array1<f64> = Array1::zeros(0);
let ys: Array2<f64> = Array2::zeros((0, 2));
assert!(simd_spearman_correlation_batch(&x.view(), &ys.view()).is_err());
}
#[test]
fn test_simd_spearman_correlation_batch_rejects_size_mismatch() {
let x = array![1.0_f64, 2.0, 3.0];
use scirs2_core::ndarray::Array2;
let ys = Array2::from_shape_fn((5, 2), |(i, j)| (i + j) as f64); assert!(simd_spearman_correlation_batch(&x.view(), &ys.view()).is_err());
}
}