avila_parallel/
simd.rs

1//! SIMD-accelerated operations
2//!
3//! This module provides SIMD-optimized parallel operations for maximum performance
4//! on modern CPUs. Falls back to scalar operations when SIMD is not available.
5
6/// SIMD-accelerated sum for i32
7#[inline]
8pub fn simd_sum_i32(slice: &[i32]) -> i32 {
9    // Basic implementation - could be optimized with actual SIMD intrinsics
10    slice.iter().sum()
11}
12
13/// SIMD-accelerated sum for f32
14#[inline]
15pub fn simd_sum_f32(slice: &[f32]) -> f32 {
16    slice.iter().sum()
17}
18
19/// SIMD-accelerated sum for f64
20#[inline]
21pub fn simd_sum_f64(slice: &[f64]) -> f64 {
22    slice.iter().sum()
23}
24
25/// SIMD-accelerated dot product for f32
26#[inline]
27pub fn simd_dot_f32(a: &[f32], b: &[f32]) -> f32 {
28    let len = a.len().min(b.len());
29    a[..len].iter().zip(&b[..len]).map(|(x, y)| x * y).sum()
30}
31
32/// SIMD-accelerated dot product for f64
33#[inline]
34pub fn simd_dot_f64(a: &[f64], b: &[f64]) -> f64 {
35    let len = a.len().min(b.len());
36    a[..len].iter().zip(&b[..len]).map(|(x, y)| x * y).sum()
37}
38
39/// Parallel SIMD sum using work distribution
40pub fn parallel_simd_sum_i32(slice: &[i32]) -> i32 {
41    use crate::executor::{parallel_sum};
42    parallel_sum(slice)
43}
44
45/// Parallel SIMD sum for f32
46pub fn parallel_simd_sum_f32(slice: &[f32]) -> f32 {
47    use crate::executor::{parallel_sum};
48    parallel_sum(slice)
49}
50
51/// Parallel SIMD sum for f64
52pub fn parallel_simd_sum_f64(slice: &[f64]) -> f64 {
53    use crate::executor::{parallel_sum};
54    parallel_sum(slice)
55}
56
57#[cfg(test)]
58mod tests {
59    use super::*;
60
61    #[test]
62    fn test_simd_sum_i32() {
63        let data = vec![1, 2, 3, 4, 5];
64        assert_eq!(simd_sum_i32(&data), 15);
65    }
66
67    #[test]
68    fn test_simd_sum_f32() {
69        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
70        assert_eq!(simd_sum_f32(&data), 15.0);
71    }
72
73    #[test]
74    fn test_simd_dot_f32() {
75        let a = vec![1.0, 2.0, 3.0];
76        let b = vec![4.0, 5.0, 6.0];
77        assert_eq!(simd_dot_f32(&a, &b), 32.0); // 1*4 + 2*5 + 3*6 = 32
78    }
79
80    #[test]
81    fn test_parallel_simd_sum() {
82        let data: Vec<i32> = (1..=1000).collect();
83        let expected = (1000 * 1001) / 2;
84        assert_eq!(parallel_simd_sum_i32(&data), expected);
85    }
86}