pub(super) fn correlation(x: &[f32], y: &[f32]) -> f32 {
if x.len() != y.len() || x.is_empty() {
return 0.0;
}
let n = x.len() as f32;
let mean_x = x.iter().sum::<f32>() / n;
let mean_y = y.iter().sum::<f32>() / n;
let mut cov = 0.0f32;
let mut var_x = 0.0f32;
let mut var_y = 0.0f32;
for (&xi, &yi) in x.iter().zip(y.iter()) {
let dx = xi - mean_x;
let dy = yi - mean_y;
cov += dx * dy;
var_x += dx * dx;
var_y += dy * dy;
}
let std_x = var_x.sqrt();
let std_y = var_y.sqrt();
if std_x < 1e-10 || std_y < 1e-10 {
return 0.0;
}
cov / (std_x * std_y)
}
pub(super) fn compute_correlation_matrix(
data: &[f32],
num_features: usize,
num_rows: usize,
) -> Vec<f32> {
let mut correlations = vec![0.0f32; num_features * num_features];
let means: Vec<f32> = (0..num_features)
.map(|f| {
let sum: f32 = (0..num_rows).map(|r| data[r * num_features + f]).sum();
sum / num_rows as f32
})
.collect();
let stds: Vec<f32> = (0..num_features)
.map(|f| {
let var: f32 = (0..num_rows)
.map(|r| {
let diff = data[r * num_features + f] - means[f];
diff * diff
})
.sum::<f32>()
/ num_rows as f32;
var.sqrt().max(1e-10)
})
.collect();
for i in 0..num_features {
for j in 0..num_features {
if i == j {
correlations[i * num_features + j] = 1.0;
} else if j > i {
let covar: f32 = (0..num_rows)
.map(|r| {
let xi = data[r * num_features + i] - means[i];
let xj = data[r * num_features + j] - means[j];
xi * xj
})
.sum::<f32>()
/ num_rows as f32;
let corr = covar / (stds[i] * stds[j]);
correlations[i * num_features + j] = corr;
correlations[j * num_features + i] = corr;
}
}
}
correlations
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_correlation_perfect_positive() {
let x = vec![1.0, 2.0, 3.0, 4.0];
let y = vec![2.0, 4.0, 6.0, 8.0];
let corr = correlation(&x, &y);
assert!((corr - 1.0).abs() < 1e-6);
}
#[test]
fn test_correlation_perfect_negative() {
let x = vec![1.0, 2.0, 3.0, 4.0];
let y = vec![8.0, 6.0, 4.0, 2.0];
let corr = correlation(&x, &y);
assert!((corr - (-1.0)).abs() < 1e-6);
}
#[test]
fn test_correlation_zero_variance() {
let x = vec![5.0, 5.0, 5.0, 5.0]; let y = vec![1.0, 2.0, 3.0, 4.0];
let corr = correlation(&x, &y);
assert!((corr - 0.0).abs() < 1e-6);
}
#[test]
fn test_correlation_matrix() {
let data = vec![1.0, 2.0, 2.0, 4.0, 3.0, 6.0];
let corr = compute_correlation_matrix(&data, 2, 3);
assert!((corr[0] - 1.0).abs() < 1e-6); assert!((corr[1] - 1.0).abs() < 1e-6); assert!((corr[2] - 1.0).abs() < 1e-6); assert!((corr[3] - 1.0).abs() < 1e-6); }
#[test]
fn test_correlation_matrix_uncorrelated() {
let data = vec![
1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, ];
let corr = compute_correlation_matrix(&data, 2, 4);
assert!((corr[0] - 1.0).abs() < 1e-6); assert!((corr[3] - 1.0).abs() < 1e-6); assert!((corr[1] - (-1.0)).abs() < 1e-6); }
}