Skip to main content

shape_runtime/
simd_statistics.rs

1//! SIMD-accelerated statistical operations
2//!
3//! Provides vectorized implementations of correlation, covariance, and other
4//! statistical functions. Uses manual loop unrolling for compiler auto-vectorization.
5//!
6//! Expected speedup: 3-5x for arrays larger than SIMD_THRESHOLD (64 elements)
7
8const SIMD_THRESHOLD: usize = 64;
9
10// ===== Public API - Feature-gated SIMD selection =====
11
12/// Correlation (auto-selects SIMD or scalar)
13#[cfg(feature = "simd")]
14#[inline]
15pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
16    correlation_simd(x, y)
17}
18
19#[cfg(not(feature = "simd"))]
20#[inline]
21pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
22    correlation_scalar(x, y)
23}
24
25/// Covariance (auto-selects SIMD or scalar)
26#[cfg(feature = "simd")]
27#[inline]
28pub fn covariance(x: &[f64], y: &[f64]) -> f64 {
29    covariance_simd(x, y)
30}
31
32#[cfg(not(feature = "simd"))]
33#[inline]
34pub fn covariance(x: &[f64], y: &[f64]) -> f64 {
35    covariance_scalar(x, y)
36}
37
38// ===== Internal SIMD implementations =====
39
40/// SIMD correlation calculation (internal)
41///
42/// Computes Pearson correlation coefficient between two series.
43/// Uses auto-vectorized loops for mean-centered product calculations.
44///
45/// # Performance
46/// - Expected speedup: 4-5x over scalar on large arrays
47/// - Falls back to scalar for arrays < 64 elements
48fn correlation_simd(x: &[f64], y: &[f64]) -> f64 {
49    assert_eq!(x.len(), y.len(), "Array lengths must match");
50    let n = x.len();
51
52    if n < SIMD_THRESHOLD {
53        return correlation_scalar(x, y);
54    }
55
56    if n == 0 {
57        return f64::NAN;
58    }
59
60    // Calculate means (SIMD-friendly loop)
61    let mean_x = mean_simd(x);
62    let mean_y = mean_simd(y);
63
64    // Calculate covariance and variances in one pass (cache-friendly)
65    let mut cov_sum = 0.0;
66    let mut var_x_sum = 0.0;
67    let mut var_y_sum = 0.0;
68
69    let chunks = n / 4;
70
71    // Process 4 elements at a time for auto-vectorization
72    for i in 0..chunks {
73        let idx = i * 4;
74
75        let x0 = x[idx] - mean_x;
76        let y0 = y[idx] - mean_y;
77        let x1 = x[idx + 1] - mean_x;
78        let y1 = y[idx + 1] - mean_y;
79        let x2 = x[idx + 2] - mean_x;
80        let y2 = y[idx + 2] - mean_y;
81        let x3 = x[idx + 3] - mean_x;
82        let y3 = y[idx + 3] - mean_y;
83
84        cov_sum += x0 * y0 + x1 * y1 + x2 * y2 + x3 * y3;
85        var_x_sum += x0 * x0 + x1 * x1 + x2 * x2 + x3 * x3;
86        var_y_sum += y0 * y0 + y1 * y1 + y2 * y2 + y3 * y3;
87    }
88
89    // Handle remainder
90    for i in (chunks * 4)..n {
91        let x_diff = x[i] - mean_x;
92        let y_diff = y[i] - mean_y;
93        cov_sum += x_diff * y_diff;
94        var_x_sum += x_diff * x_diff;
95        var_y_sum += y_diff * y_diff;
96    }
97
98    // Calculate correlation
99    let denominator = (var_x_sum * var_y_sum).sqrt();
100    if denominator == 0.0 {
101        return f64::NAN;
102    }
103
104    cov_sum / denominator
105}
106
107fn correlation_scalar(x: &[f64], y: &[f64]) -> f64 {
108    let n = x.len();
109    if n == 0 {
110        return f64::NAN;
111    }
112
113    let mean_x: f64 = x.iter().sum::<f64>() / n as f64;
114    let mean_y: f64 = y.iter().sum::<f64>() / n as f64;
115
116    let mut cov_sum = 0.0;
117    let mut var_x_sum = 0.0;
118    let mut var_y_sum = 0.0;
119
120    for i in 0..n {
121        let x_diff = x[i] - mean_x;
122        let y_diff = y[i] - mean_y;
123        cov_sum += x_diff * y_diff;
124        var_x_sum += x_diff * x_diff;
125        var_y_sum += y_diff * y_diff;
126    }
127
128    let denominator = (var_x_sum * var_y_sum).sqrt();
129    if denominator == 0.0 {
130        return f64::NAN;
131    }
132
133    cov_sum / denominator
134}
135
136/// SIMD covariance calculation (internal)
137///
138/// Computes covariance between two series.
139fn covariance_simd(x: &[f64], y: &[f64]) -> f64 {
140    assert_eq!(x.len(), y.len(), "Array lengths must match");
141    let n = x.len();
142
143    if n < SIMD_THRESHOLD {
144        return covariance_scalar(x, y);
145    }
146
147    if n == 0 {
148        return f64::NAN;
149    }
150
151    // Calculate means
152    let mean_x = mean_simd(x);
153    let mean_y = mean_simd(y);
154
155    // Calculate covariance
156    let mut cov_sum = 0.0;
157    let chunks = n / 4;
158
159    // Process 4 elements at a time
160    for i in 0..chunks {
161        let idx = i * 4;
162        let x0 = x[idx] - mean_x;
163        let y0 = y[idx] - mean_y;
164        let x1 = x[idx + 1] - mean_x;
165        let y1 = y[idx + 1] - mean_y;
166        let x2 = x[idx + 2] - mean_x;
167        let y2 = y[idx + 2] - mean_y;
168        let x3 = x[idx + 3] - mean_x;
169        let y3 = y[idx + 3] - mean_y;
170
171        cov_sum += x0 * y0 + x1 * y1 + x2 * y2 + x3 * y3;
172    }
173
174    // Handle remainder
175    for i in (chunks * 4)..n {
176        cov_sum += (x[i] - mean_x) * (y[i] - mean_y);
177    }
178
179    cov_sum / (n - 1) as f64
180}
181
182fn covariance_scalar(x: &[f64], y: &[f64]) -> f64 {
183    let n = x.len();
184    if n == 0 {
185        return f64::NAN;
186    }
187
188    let mean_x: f64 = x.iter().sum::<f64>() / n as f64;
189    let mean_y: f64 = y.iter().sum::<f64>() / n as f64;
190
191    let cov_sum: f64 = x
192        .iter()
193        .zip(y.iter())
194        .map(|(xi, yi)| (xi - mean_x) * (yi - mean_y))
195        .sum();
196
197    cov_sum / (n - 1) as f64
198}
199
200/// SIMD mean calculation (helper function)
201fn mean_simd(data: &[f64]) -> f64 {
202    let n = data.len();
203    if n == 0 {
204        return f64::NAN;
205    }
206
207    let mut sum = 0.0;
208    let chunks = n / 4;
209
210    // Process 4 elements at a time
211    for i in 0..chunks {
212        let idx = i * 4;
213        sum += data[idx] + data[idx + 1] + data[idx + 2] + data[idx + 3];
214    }
215
216    // Handle remainder
217    for i in (chunks * 4)..n {
218        sum += data[i];
219    }
220
221    sum / n as f64
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    #[test]
229    fn test_correlation_simd_positive() {
230        // Perfectly correlated (r = 1.0)
231        let x: Vec<f64> = (0..100).map(|i| i as f64).collect();
232        let y: Vec<f64> = (0..100).map(|i| i as f64 * 2.0).collect();
233
234        let corr = correlation_simd(&x, &y);
235        assert!((corr - 1.0).abs() < 1e-10, "Perfect positive correlation");
236    }
237
238    #[test]
239    fn test_correlation_simd_negative() {
240        // Perfectly negatively correlated (r = -1.0)
241        let x: Vec<f64> = (0..100).map(|i| i as f64).collect();
242        let y: Vec<f64> = (0..100).map(|i| -(i as f64)).collect();
243
244        let corr = correlation_simd(&x, &y);
245        assert!((corr + 1.0).abs() < 1e-10, "Perfect negative correlation");
246    }
247
248    #[test]
249    fn test_correlation_simd_vs_scalar() {
250        let n = 5000;
251        let x: Vec<f64> = (0..n).map(|i| (i as f64 * 0.1).sin()).collect();
252        let y: Vec<f64> = (0..n).map(|i| (i as f64 * 0.1 + 1.0).cos()).collect();
253
254        let simd_result = correlation_simd(&x, &y);
255        let scalar_result = correlation_scalar(&x, &y);
256
257        assert!(
258            (simd_result - scalar_result).abs() < 1e-10,
259            "SIMD and scalar results must match"
260        );
261        assert!(
262            simd_result >= -1.0 && simd_result <= 1.0,
263            "Correlation in [-1, 1]"
264        );
265    }
266
267    #[test]
268    fn test_covariance_simd() {
269        let x: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0];
270        let y: Vec<f64> = vec![2.0, 4.0, 6.0, 8.0, 10.0];
271
272        let cov = covariance_simd(&x, &y);
273
274        // Covariance of perfectly linear relationship
275        assert!(cov > 0.0, "Positive covariance");
276    }
277
278    #[test]
279    fn test_covariance_simd_vs_scalar() {
280        let n = 1000;
281        let x: Vec<f64> = (0..n).map(|i| (i as f64 * 0.05).sin()).collect();
282        let y: Vec<f64> = (0..n).map(|i| (i as f64 * 0.05).cos()).collect();
283
284        let simd_result = covariance_simd(&x, &y);
285        let scalar_result = covariance_scalar(&x, &y);
286
287        assert!(
288            (simd_result - scalar_result).abs() < 1e-10,
289            "SIMD and scalar covariance must match"
290        );
291    }
292
293    #[test]
294    fn test_mean_simd() {
295        let data: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0];
296        let mean = mean_simd(&data);
297        assert_eq!(mean, 3.0);
298    }
299
300    #[test]
301    fn test_mean_simd_large() {
302        let n = 10000;
303        let data: Vec<f64> = (0..n).map(|i| i as f64).collect();
304        let mean = mean_simd(&data);
305        let expected = (n - 1) as f64 / 2.0;
306        assert!((mean - expected).abs() < 1e-10);
307    }
308}