scirs2_core/ndarray_ext/stats/
correlation.rs

1//! Correlation functions for ndarray arrays
2//!
3//! This module provides functions for calculating correlation coefficients
4//! and covariance matrices.
5
6use ::ndarray::{Array, ArrayView, Ix1, Ix2};
7use num_traits::{Float, FromPrimitive};
8
9/// Calculate correlation coefficient between two 1D arrays
10///
11/// # Arguments
12///
13/// * `x` - First input array
14/// * `y` - Second input array (must have same shape as x)
15///
16/// # Returns
17///
18/// Pearson's correlation coefficient
19///
20/// # Examples
21///
22/// ```
23/// use ::ndarray::array;
24/// use scirs2_core::ndarray_ext::stats::corrcoef;
25///
26/// let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
27/// let y = array![5.0, 4.0, 3.0, 2.0, 1.0];
28/// let corr = corrcoef(x.view(), y.view()).unwrap();
29/// assert!((corr + 1.0_f64).abs() < 1e-10); // Perfect negative correlation (-1.0)
30/// ```
31///
32/// This function is similar to ``NumPy``'s `np.corrcoef` function but returns a single value.
33#[allow(dead_code)]
34pub fn corrcoef<T>(x: ArrayView<T, Ix1>, y: ArrayView<T, Ix1>) -> Result<T, &'static str>
35where
36    T: Clone + Float + FromPrimitive,
37{
38    if x.is_empty() || y.is_empty() {
39        return Err("Cannot compute correlation of empty arrays");
40    }
41
42    if x.len() != y.len() {
43        return Err("Arrays must have the same length");
44    }
45
46    // Calculate means
47    let n = T::from_usize(x.len()).unwrap();
48    let mut sum_x = T::zero();
49    let mut sum_y = T::zero();
50
51    for (&x_val, &y_val) in x.iter().zip(y.iter()) {
52        sum_x = sum_x + x_val;
53        sum_y = sum_y + y_val;
54    }
55
56    let mean_x = sum_x / n;
57    let mean_y = sum_y / n;
58
59    // Calculate covariance and variances
60    let mut cov_xy = T::zero();
61    let mut var_x = T::zero();
62    let mut var_y = T::zero();
63
64    for (&x_val, &y_val) in x.iter().zip(y.iter()) {
65        let dx = x_val - mean_x;
66        let dy = y_val - mean_y;
67        cov_xy = cov_xy + dx * dy;
68        var_x = var_x + dx * dx;
69        var_y = var_y + dy * dy;
70    }
71
72    // Calculate correlation coefficient
73    if var_x.is_zero() || var_y.is_zero() {
74        return Err("Correlation coefficient is not defined when either array has zero variance");
75    }
76
77    Ok(cov_xy / (var_x * var_y).sqrt())
78}
79
80/// Calculate the covariance matrix of a 2D array
81///
82/// # Arguments
83///
84/// * `array` - The input 2D array where rows are observations and columns are variables
85/// * `ddof` - Delta degrees of freedom (default 1)
86///
87/// # Returns
88///
89/// The covariance matrix
90///
91/// # Examples
92///
93/// ```
94/// use ::ndarray::array;
95/// use scirs2_core::ndarray_ext::stats::cov;
96///
97/// let data = array![
98///     [1.0, 2.0, 3.0],
99///     [4.0, 5.0, 6.0],
100///     [7.0, 8.0, 9.0],
101///     [10.0, 11.0, 12.0]
102/// ];
103/// let covmatrix = cov(data.view(), 1).unwrap();
104/// assert_eq!(covmatrix.shape(), &[3, 3]);
105/// ```
106///
107/// This function is equivalent to ``NumPy``'s `np.cov` function.
108#[allow(dead_code)]
109pub fn cov<T>(array: ArrayView<T, Ix2>, ddof: usize) -> Result<Array<T, Ix2>, &'static str>
110where
111    T: Clone + Float + FromPrimitive,
112{
113    if array.is_empty() {
114        return Err("Cannot compute covariance of an empty array");
115    }
116
117    let (n_samples, n_features) = (array.shape()[0], array.shape()[1]);
118
119    if n_samples <= ddof {
120        return Err("Not enough data points for covariance calculation with given ddof");
121    }
122
123    // Calculate means for each feature
124    let mut feature_means = Array::<T, Ix1>::zeros(n_features);
125
126    for j in 0..n_features {
127        let mut sum = T::zero();
128        for i in 0..n_samples {
129            sum = sum + array[[i, j]];
130        }
131        feature_means[j] = sum / T::from_usize(n_samples).unwrap();
132    }
133
134    // Calculate covariance matrix
135    let mut covmatrix = Array::<T, Ix2>::zeros((n_features, n_features));
136    let scale = T::from_usize(n_samples - ddof).unwrap();
137
138    for i in 0..n_features {
139        for j in 0..=i {
140            let mut cov_ij = T::zero();
141
142            for k in 0..n_samples {
143                let dev_i = array[[k, i]] - feature_means[i];
144                let dev_j = array[[k, j]] - feature_means[j];
145                cov_ij = cov_ij + dev_i * dev_j;
146            }
147
148            cov_ij = cov_ij / scale;
149            covmatrix[[0, j]] = cov_ij;
150
151            // Fill symmetric part
152            if 0 != j {
153                covmatrix[[j, 0]] = cov_ij;
154            }
155        }
156    }
157
158    Ok(covmatrix)
159}