Skip to main content

shape_runtime/
simd_i64.rs

1//! SIMD-style i64 operations for integer-typed arrays.
2//!
3//! Provides vectorized accumulation patterns for i64 data, avoiding the
4//! cost of i64->f64 conversion and preserving integer type fidelity.
5//! Uses chunks-of-4 manual unrolling for compiler auto-vectorization.
6//!
7//! Operations that are inherently floating-point (mean, correlation,
8//! std dev, pct_change) remain f64-only.
9
10use std::collections::VecDeque;
11
12// ============================================================================
13// Aggregation: sum, min, max
14// ============================================================================
15
16/// SIMD-style i64 sum using 4-wide accumulator lanes.
17///
18/// Uses `wrapping_add` to avoid panic on overflow (matches V8 BigInt semantics).
19/// For arrays up to ~2^62 elements of bounded values, no overflow occurs.
20///
21/// # Performance
22/// - Expected speedup: 2-4x over scalar for large arrays
23/// - The 4-lane accumulator enables auto-vectorization on x86-64 and aarch64
24#[inline]
25pub fn simd_sum_i64(data: &[i64]) -> i64 {
26    let mut acc = [0i64; 4];
27    let chunks = data.chunks_exact(4);
28    let remainder = chunks.remainder();
29    for chunk in chunks {
30        acc[0] = acc[0].wrapping_add(chunk[0]);
31        acc[1] = acc[1].wrapping_add(chunk[1]);
32        acc[2] = acc[2].wrapping_add(chunk[2]);
33        acc[3] = acc[3].wrapping_add(chunk[3]);
34    }
35    let mut total = acc[0]
36        .wrapping_add(acc[1])
37        .wrapping_add(acc[2])
38        .wrapping_add(acc[3]);
39    for &v in remainder {
40        total = total.wrapping_add(v);
41    }
42    total
43}
44
45/// SIMD-style i64 minimum.
46///
47/// Returns `None` for empty slices. Uses 4-wide lane reduction.
48#[inline]
49pub fn simd_min_i64(data: &[i64]) -> Option<i64> {
50    if data.is_empty() {
51        return None;
52    }
53    let mut mins = [i64::MAX; 4];
54    let chunks = data.chunks_exact(4);
55    let remainder = chunks.remainder();
56    for chunk in chunks {
57        if chunk[0] < mins[0] {
58            mins[0] = chunk[0];
59        }
60        if chunk[1] < mins[1] {
61            mins[1] = chunk[1];
62        }
63        if chunk[2] < mins[2] {
64            mins[2] = chunk[2];
65        }
66        if chunk[3] < mins[3] {
67            mins[3] = chunk[3];
68        }
69    }
70    let mut result = mins[0].min(mins[1]).min(mins[2]).min(mins[3]);
71    for &v in remainder {
72        if v < result {
73            result = v;
74        }
75    }
76    Some(result)
77}
78
79/// SIMD-style i64 maximum.
80///
81/// Returns `None` for empty slices. Uses 4-wide lane reduction.
82#[inline]
83pub fn simd_max_i64(data: &[i64]) -> Option<i64> {
84    if data.is_empty() {
85        return None;
86    }
87    let mut maxs = [i64::MIN; 4];
88    let chunks = data.chunks_exact(4);
89    let remainder = chunks.remainder();
90    for chunk in chunks {
91        if chunk[0] > maxs[0] {
92            maxs[0] = chunk[0];
93        }
94        if chunk[1] > maxs[1] {
95            maxs[1] = chunk[1];
96        }
97        if chunk[2] > maxs[2] {
98            maxs[2] = chunk[2];
99        }
100        if chunk[3] > maxs[3] {
101            maxs[3] = chunk[3];
102        }
103    }
104    let mut result = maxs[0].max(maxs[1]).max(maxs[2]).max(maxs[3]);
105    for &v in remainder {
106        if v > result {
107            result = v;
108        }
109    }
110    Some(result)
111}
112
113// ============================================================================
114// Rolling window: sum, min, max
115// ============================================================================
116
117/// Rolling sum over i64 data with O(n) sliding window.
118///
119/// Positions before the window is full are set to `None` (caller maps to NaN
120/// or whatever sentinel is appropriate). Uses wrapping arithmetic.
121///
122/// Returns a Vec of length `data.len()`. Elements `0..window-1` are `None`.
123pub fn rolling_sum_i64(data: &[i64], window: usize) -> Vec<Option<i64>> {
124    let n = data.len();
125    if window == 0 || window > n {
126        return vec![None; n];
127    }
128
129    let mut result = vec![None; n];
130
131    // Initial window sum
132    let mut sum: i64 = data[..window].iter().copied().fold(0i64, i64::wrapping_add);
133    result[window - 1] = Some(sum);
134
135    // Slide: subtract leaving element, add entering element
136    for i in window..n {
137        sum = sum.wrapping_sub(data[i - window]).wrapping_add(data[i]);
138        result[i] = Some(sum);
139    }
140
141    result
142}
143
144/// Rolling minimum over i64 data using monotonic deque. O(n) amortized.
145///
146/// Elements before the window is full are `None`.
147pub fn rolling_min_i64(data: &[i64], window: usize) -> Vec<Option<i64>> {
148    let n = data.len();
149    if window == 0 || window > n {
150        return vec![None; n];
151    }
152
153    let mut result = vec![None; n];
154    let mut deque: VecDeque<(usize, i64)> = VecDeque::with_capacity(window);
155
156    for i in 0..n {
157        // Remove elements outside the window
158        while let Some(&(idx, _)) = deque.front() {
159            if idx <= i.saturating_sub(window) {
160                deque.pop_front();
161            } else {
162                break;
163            }
164        }
165
166        // Maintain ascending order: remove elements >= current
167        while let Some(&(_, val)) = deque.back() {
168            if val >= data[i] {
169                deque.pop_back();
170            } else {
171                break;
172            }
173        }
174
175        deque.push_back((i, data[i]));
176
177        if i >= window - 1 {
178            result[i] = Some(deque.front().unwrap().1);
179        }
180    }
181
182    result
183}
184
185/// Rolling maximum over i64 data using monotonic deque. O(n) amortized.
186///
187/// Elements before the window is full are `None`.
188pub fn rolling_max_i64(data: &[i64], window: usize) -> Vec<Option<i64>> {
189    let n = data.len();
190    if window == 0 || window > n {
191        return vec![None; n];
192    }
193
194    let mut result = vec![None; n];
195    let mut deque: VecDeque<(usize, i64)> = VecDeque::with_capacity(window);
196
197    for i in 0..n {
198        // Remove elements outside the window
199        while let Some(&(idx, _)) = deque.front() {
200            if idx <= i.saturating_sub(window) {
201                deque.pop_front();
202            } else {
203                break;
204            }
205        }
206
207        // Maintain descending order: remove elements <= current
208        while let Some(&(_, val)) = deque.back() {
209            if val <= data[i] {
210                deque.pop_back();
211            } else {
212                break;
213            }
214        }
215
216        deque.push_back((i, data[i]));
217
218        if i >= window - 1 {
219            result[i] = Some(deque.front().unwrap().1);
220        }
221    }
222
223    result
224}
225
226// ============================================================================
227// Cumulative: cumsum, diff
228// ============================================================================
229
230/// Cumulative sum over i64 data. Uses wrapping arithmetic.
231#[inline]
232pub fn cumsum_i64(data: &[i64]) -> Vec<i64> {
233    let mut result = Vec::with_capacity(data.len());
234    let mut acc: i64 = 0;
235    for &v in data {
236        acc = acc.wrapping_add(v);
237        result.push(acc);
238    }
239    result
240}
241
242/// First-difference over i64 data.
243///
244/// `result[0]` is `None` (no predecessor). `result[i] = data[i] - data[i-1]`.
245#[inline]
246pub fn diff_i64(data: &[i64], period: usize) -> Vec<Option<i64>> {
247    let mut result = Vec::with_capacity(data.len());
248    for i in 0..data.len() {
249        if i < period {
250            result.push(None);
251        } else {
252            result.push(Some(data[i].wrapping_sub(data[i - period])));
253        }
254    }
255    result
256}
257
258// ============================================================================
259// Tests
260// ============================================================================
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    // --- simd_sum_i64 ---
267
268    #[test]
269    fn test_sum_i64_empty() {
270        assert_eq!(simd_sum_i64(&[]), 0);
271    }
272
273    #[test]
274    fn test_sum_i64_single() {
275        assert_eq!(simd_sum_i64(&[42]), 42);
276    }
277
278    #[test]
279    fn test_sum_i64_small() {
280        assert_eq!(simd_sum_i64(&[1, 2, 3, 4, 5]), 15);
281    }
282
283    #[test]
284    fn test_sum_i64_exact_chunk() {
285        // Exactly 4 elements (one full chunk, no remainder)
286        assert_eq!(simd_sum_i64(&[10, 20, 30, 40]), 100);
287    }
288
289    #[test]
290    fn test_sum_i64_large() {
291        let data: Vec<i64> = (1..=1000).collect();
292        let expected: i64 = 1000 * 1001 / 2;
293        assert_eq!(simd_sum_i64(&data), expected);
294    }
295
296    #[test]
297    fn test_sum_i64_negative() {
298        assert_eq!(simd_sum_i64(&[-5, -3, 10, -2]), 0);
299    }
300
301    #[test]
302    fn test_sum_i64_matches_naive() {
303        let data: Vec<i64> = (0..513).map(|i| i * 7 - 200).collect();
304        let naive: i64 = data.iter().sum();
305        assert_eq!(simd_sum_i64(&data), naive);
306    }
307
308    // --- simd_min_i64 ---
309
310    #[test]
311    fn test_min_i64_empty() {
312        assert_eq!(simd_min_i64(&[]), None);
313    }
314
315    #[test]
316    fn test_min_i64_single() {
317        assert_eq!(simd_min_i64(&[99]), Some(99));
318    }
319
320    #[test]
321    fn test_min_i64_all_same() {
322        assert_eq!(simd_min_i64(&[7, 7, 7, 7, 7]), Some(7));
323    }
324
325    #[test]
326    fn test_min_i64_typical() {
327        assert_eq!(simd_min_i64(&[5, 3, 7, 2, 9, 1, 8]), Some(1));
328    }
329
330    #[test]
331    fn test_min_i64_negative() {
332        assert_eq!(simd_min_i64(&[-10, -20, 5, 0]), Some(-20));
333    }
334
335    #[test]
336    fn test_min_i64_large() {
337        let data: Vec<i64> = (0..1000).collect();
338        assert_eq!(simd_min_i64(&data), Some(0));
339    }
340
341    // --- simd_max_i64 ---
342
343    #[test]
344    fn test_max_i64_empty() {
345        assert_eq!(simd_max_i64(&[]), None);
346    }
347
348    #[test]
349    fn test_max_i64_single() {
350        assert_eq!(simd_max_i64(&[99]), Some(99));
351    }
352
353    #[test]
354    fn test_max_i64_all_same() {
355        assert_eq!(simd_max_i64(&[7, 7, 7, 7, 7]), Some(7));
356    }
357
358    #[test]
359    fn test_max_i64_typical() {
360        assert_eq!(simd_max_i64(&[5, 3, 7, 2, 9, 1, 8]), Some(9));
361    }
362
363    #[test]
364    fn test_max_i64_negative() {
365        assert_eq!(simd_max_i64(&[-10, -20, 5, 0]), Some(5));
366    }
367
368    #[test]
369    fn test_max_i64_large() {
370        let data: Vec<i64> = (0..1000).collect();
371        assert_eq!(simd_max_i64(&data), Some(999));
372    }
373
374    // --- rolling_sum_i64 ---
375
376    #[test]
377    fn test_rolling_sum_i64_basic() {
378        let data = vec![1, 2, 3, 4, 5, 6, 7, 8];
379        let result = rolling_sum_i64(&data, 3);
380        assert_eq!(result[0], None);
381        assert_eq!(result[1], None);
382        assert_eq!(result[2], Some(6)); // 1+2+3
383        assert_eq!(result[3], Some(9)); // 2+3+4
384        assert_eq!(result[4], Some(12)); // 3+4+5
385        assert_eq!(result[5], Some(15)); // 4+5+6
386        assert_eq!(result[6], Some(18)); // 5+6+7
387        assert_eq!(result[7], Some(21)); // 6+7+8
388    }
389
390    #[test]
391    fn test_rolling_sum_i64_window_equals_len() {
392        let data = vec![1, 2, 3];
393        let result = rolling_sum_i64(&data, 3);
394        assert_eq!(result, vec![None, None, Some(6)]);
395    }
396
397    #[test]
398    fn test_rolling_sum_i64_window_too_large() {
399        let data = vec![1, 2, 3];
400        let result = rolling_sum_i64(&data, 5);
401        assert_eq!(result, vec![None, None, None]);
402    }
403
404    #[test]
405    fn test_rolling_sum_i64_window_zero() {
406        let data = vec![1, 2, 3];
407        let result = rolling_sum_i64(&data, 0);
408        assert_eq!(result, vec![None, None, None]);
409    }
410
411    #[test]
412    fn test_rolling_sum_i64_window_one() {
413        let data = vec![10, 20, 30];
414        let result = rolling_sum_i64(&data, 1);
415        assert_eq!(result, vec![Some(10), Some(20), Some(30)]);
416    }
417
418    // --- rolling_min_i64 ---
419
420    #[test]
421    fn test_rolling_min_i64_basic() {
422        let data = vec![5, 3, 7, 2, 9, 1, 8];
423        let result = rolling_min_i64(&data, 3);
424        assert_eq!(result[0], None);
425        assert_eq!(result[1], None);
426        assert_eq!(result[2], Some(3)); // min(5,3,7)
427        assert_eq!(result[3], Some(2)); // min(3,7,2)
428        assert_eq!(result[4], Some(2)); // min(7,2,9)
429        assert_eq!(result[5], Some(1)); // min(2,9,1)
430        assert_eq!(result[6], Some(1)); // min(9,1,8)
431    }
432
433    // --- rolling_max_i64 ---
434
435    #[test]
436    fn test_rolling_max_i64_basic() {
437        let data = vec![5, 3, 7, 2, 9, 1, 8];
438        let result = rolling_max_i64(&data, 3);
439        assert_eq!(result[0], None);
440        assert_eq!(result[1], None);
441        assert_eq!(result[2], Some(7)); // max(5,3,7)
442        assert_eq!(result[3], Some(7)); // max(3,7,2)
443        assert_eq!(result[4], Some(9)); // max(7,2,9)
444        assert_eq!(result[5], Some(9)); // max(2,9,1)
445        assert_eq!(result[6], Some(9)); // max(9,1,8)
446    }
447
448    // --- cumsum_i64 ---
449
450    #[test]
451    fn test_cumsum_i64_basic() {
452        assert_eq!(cumsum_i64(&[1, 2, 3, 4, 5]), vec![1, 3, 6, 10, 15]);
453    }
454
455    #[test]
456    fn test_cumsum_i64_empty() {
457        assert_eq!(cumsum_i64(&[]), Vec::<i64>::new());
458    }
459
460    #[test]
461    fn test_cumsum_i64_negative() {
462        assert_eq!(cumsum_i64(&[-1, 2, -3, 4]), vec![-1, 1, -2, 2]);
463    }
464
465    // --- diff_i64 ---
466
467    #[test]
468    fn test_diff_i64_period_1() {
469        let result = diff_i64(&[10, 15, 12, 20], 1);
470        assert_eq!(result, vec![None, Some(5), Some(-3), Some(8)]);
471    }
472
473    #[test]
474    fn test_diff_i64_period_2() {
475        let result = diff_i64(&[10, 15, 12, 20], 2);
476        assert_eq!(result, vec![None, None, Some(2), Some(5)]);
477    }
478
479    #[test]
480    fn test_diff_i64_empty() {
481        assert_eq!(diff_i64(&[], 1), Vec::<Option<i64>>::new());
482    }
483
484    // --- Cross-validation: i64 sum matches f64 sum for same values ---
485
486    #[test]
487    fn test_sum_i64_matches_f64_sum() {
488        let i64_data: Vec<i64> = (1..=500).collect();
489        let f64_data: Vec<f64> = i64_data.iter().map(|&v| v as f64).collect();
490        let i64_sum = simd_sum_i64(&i64_data);
491        let f64_sum: f64 = f64_data.iter().sum();
492        assert_eq!(i64_sum as f64, f64_sum);
493    }
494
495    #[test]
496    fn test_min_max_i64_matches_f64() {
497        let i64_data: Vec<i64> = vec![100, -50, 200, 0, -100, 75];
498        let i64_min = simd_min_i64(&i64_data).unwrap();
499        let i64_max = simd_max_i64(&i64_data).unwrap();
500        assert_eq!(i64_min, -100);
501        assert_eq!(i64_max, 200);
502    }
503
504    // --- Edge case: very large values ---
505
506    #[test]
507    fn test_sum_i64_large_values() {
508        let data = vec![i64::MAX / 4, i64::MAX / 4, i64::MAX / 4, i64::MAX / 4];
509        let result = simd_sum_i64(&data);
510        // 4 * (MAX/4) should be close to MAX (off by rounding in integer division)
511        let expected = (i64::MAX / 4) * 4;
512        assert_eq!(result, expected);
513    }
514
515    #[test]
516    fn test_rolling_sum_i64_negative_values() {
517        let data = vec![-1, -2, -3, -4, -5];
518        let result = rolling_sum_i64(&data, 2);
519        assert_eq!(result[0], None);
520        assert_eq!(result[1], Some(-3)); // -1 + -2
521        assert_eq!(result[2], Some(-5)); // -2 + -3
522        assert_eq!(result[3], Some(-7)); // -3 + -4
523        assert_eq!(result[4], Some(-9)); // -4 + -5
524    }
525}