shape_runtime/simd_rolling/
window.rs

1//! Rolling window operations (sum, mean)
2
3use wide::f64x4;
4
5use super::SIMD_THRESHOLD;
6
7// ===== Public API - Feature-gated SIMD selection =====
8
9/// Rolling sum (auto-selects SIMD or scalar)
10#[cfg(feature = "simd")]
11#[inline]
12pub fn rolling_sum(data: &[f64], window: usize) -> Vec<f64> {
13    rolling_sum_simd(data, window)
14}
15
16#[cfg(not(feature = "simd"))]
17#[inline]
18pub fn rolling_sum(data: &[f64], window: usize) -> Vec<f64> {
19    rolling_sum_scalar(data, window)
20}
21
22/// Rolling mean (auto-selects SIMD or scalar)
23#[cfg(feature = "simd")]
24#[inline]
25pub fn rolling_mean(data: &[f64], window: usize) -> Vec<f64> {
26    rolling_mean_simd(data, window)
27}
28
29#[cfg(not(feature = "simd"))]
30#[inline]
31pub fn rolling_mean(data: &[f64], window: usize) -> Vec<f64> {
32    rolling_mean_scalar(data, window)
33}
34
35// ===== Internal SIMD implementations =====
36
37/// SIMD-accelerated rolling sum (internal)
38///
39/// Uses f64x4 vectors to process 4 elements at a time.
40/// Falls back to scalar for small arrays (<64 elements).
41///
42/// # Performance
43/// - Expected speedup: 2-4x over scalar on large arrays
44/// - Complexity: O(n)
45fn rolling_sum_simd(data: &[f64], window: usize) -> Vec<f64> {
46    let n = data.len();
47
48    if window == 0 || window > n {
49        return vec![f64::NAN; n];
50    }
51
52    // Use scalar for small arrays
53    if n < SIMD_THRESHOLD {
54        return rolling_sum_scalar(data, window);
55    }
56
57    let mut result = vec![f64::NAN; n];
58
59    // Initial window sum (scalar)
60    let mut sum: f64 = data[..window].iter().sum();
61    result[window - 1] = sum;
62
63    // SIMD sliding window
64    // Each iteration: sum = sum - data[i-w] + data[i]
65    let remaining = n - window;
66    let simd_chunks = remaining / 4;
67
68    for chunk_idx in 0..simd_chunks {
69        let base = window + chunk_idx * 4;
70
71        // Load 4 values to add (new values entering window)
72        let add_vals = f64x4::new([data[base], data[base + 1], data[base + 2], data[base + 3]]);
73
74        // Load 4 values to subtract (old values leaving window)
75        let sub_vals = f64x4::new([
76            data[base - window],
77            data[base - window + 1],
78            data[base - window + 2],
79            data[base - window + 3],
80        ]);
81
82        // Compute deltas
83        let deltas = add_vals - sub_vals;
84        let delta_arr = deltas.to_array();
85
86        // Apply deltas sequentially (dependency chain prevents full vectorization)
87        for j in 0..4 {
88            sum += delta_arr[j];
89            result[base + j] = sum;
90        }
91    }
92
93    // Handle remaining elements
94    let processed = window + simd_chunks * 4;
95    for i in processed..n {
96        sum = sum - data[i - window] + data[i];
97        result[i] = sum;
98    }
99
100    result
101}
102
103/// Scalar fallback for rolling sum
104fn rolling_sum_scalar(data: &[f64], window: usize) -> Vec<f64> {
105    let n = data.len();
106    let mut result = vec![f64::NAN; n];
107
108    if window == 0 || window > n {
109        return result;
110    }
111
112    let mut sum: f64 = data[..window].iter().sum();
113    result[window - 1] = sum;
114
115    for i in window..n {
116        sum = sum - data[i - window] + data[i];
117        result[i] = sum;
118    }
119
120    result
121}
122
123#[cfg(not(feature = "simd"))]
124fn rolling_mean_scalar(data: &[f64], window: usize) -> Vec<f64> {
125    let sums = rolling_sum_scalar(data, window);
126    sums.iter()
127        .map(|&s| {
128            if s.is_nan() {
129                f64::NAN
130            } else {
131                s / window as f64
132            }
133        })
134        .collect()
135}
136
137/// SIMD-accelerated rolling mean
138///
139/// Reuses rolling_sum_simd and divides by window size.
140///
141/// # Performance
142/// - Expected speedup: 2-4x over scalar
143/// - Complexity: O(n)
144fn rolling_mean_simd(data: &[f64], window: usize) -> Vec<f64> {
145    let sums = rolling_sum_simd(data, window);
146    let window_f64 = window as f64;
147
148    // Vectorize the division
149    let mut result = vec![f64::NAN; sums.len()];
150    let chunks = sums.len() / 4;
151
152    for chunk in 0..chunks {
153        let i = chunk * 4;
154        let sum_vec = f64x4::new([sums[i], sums[i + 1], sums[i + 2], sums[i + 3]]);
155        let window_vec = f64x4::splat(window_f64);
156        let mean = sum_vec / window_vec;
157        let arr = mean.to_array();
158        result[i..i + 4].copy_from_slice(&arr);
159    }
160
161    // Remainder
162    for i in (chunks * 4)..sums.len() {
163        result[i] = sums[i] / window_f64;
164    }
165
166    result
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn test_rolling_sum_simd() {
175        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
176        let result = rolling_sum_simd(&data, 3);
177
178        // First two are NaN (not enough data)
179        assert!(result[0].is_nan());
180        assert!(result[1].is_nan());
181
182        // Rolling sums
183        assert_eq!(result[2], 6.0); // 1+2+3
184        assert_eq!(result[3], 9.0); // 2+3+4
185        assert_eq!(result[4], 12.0); // 3+4+5
186        assert_eq!(result[5], 15.0); // 4+5+6
187        assert_eq!(result[6], 18.0); // 5+6+7
188        assert_eq!(result[7], 21.0); // 6+7+8
189    }
190
191    #[test]
192    fn test_rolling_mean_simd() {
193        let data = vec![2.0, 4.0, 6.0, 8.0, 10.0];
194        let result = rolling_mean_simd(&data, 3);
195
196        assert!(result[0].is_nan());
197        assert!(result[1].is_nan());
198        assert_eq!(result[2], 4.0); // (2+4+6)/3
199        assert_eq!(result[3], 6.0); // (4+6+8)/3
200        assert_eq!(result[4], 8.0); // (6+8+10)/3
201    }
202}
shape_runtime/simd_rolling/window.rs

shape_runtime/simd_rolling/
window.rs