parallel_simd/
parallel_simd.rs1#![feature(portable_simd)]
13
14use std::hint::black_box;
15use std::simd::num::{SimdFloat, SimdInt};
16use std::simd::{LaneCount, Simd, SupportedLaneCount};
17use std::time::Instant;
18
19use minarrow::{Buffer, Vec64};
20#[cfg(feature = "parallel_proc")]
21use rayon::iter::ParallelIterator;
22#[cfg(feature = "parallel_proc")]
23use rayon::slice::ParallelSlice;
24
25const N: usize = 1_000_000_000;
26const SIMD_LANES: usize = 4;
27
28#[inline(always)]
30fn simd_sum_i64<const LANES: usize>(data: &[i64]) -> i64
31where
32 LaneCount<LANES>: SupportedLaneCount
33{
34 let n = data.len();
35 let simd_width = LANES;
36 let simd_chunks = n / simd_width;
37
38 let mut acc_simd = Simd::<i64, LANES>::splat(0);
39 for i in 0..simd_chunks {
40 let v = Simd::<i64, LANES>::from_slice(&data[i * simd_width..][..simd_width]);
41 acc_simd += v;
42 }
43 let mut result = acc_simd.reduce_sum();
44 for i in (simd_chunks * simd_width)..n {
45 result += data[i];
46 }
47 result
48}
49
50#[inline(always)]
52fn simd_sum_f64<const LANES: usize>(data: &[f64]) -> f64
53where
54 LaneCount<LANES>: SupportedLaneCount
55{
56 let n = data.len();
57 let simd_width = LANES;
58 let simd_chunks = n / simd_width;
59
60 let mut acc_simd = Simd::<f64, LANES>::splat(0.0);
61 for i in 0..simd_chunks {
62 let v = Simd::<f64, LANES>::from_slice(&data[i * simd_width..][..simd_width]);
63 acc_simd += v;
64 }
65 let mut result = acc_simd.reduce_sum();
66 for i in (simd_chunks * simd_width)..n {
67 result += data[i];
68 }
69 result
70}
71
72#[cfg(feature = "parallel_proc")]
74fn rayon_simd_sum_i64(buffer: &Buffer<i64>) -> i64 {
75 let slice = buffer.as_slice();
76 let chunk_size = 1 << 20; slice.par_chunks(chunk_size).map(|chunk| simd_sum_i64::<SIMD_LANES>(chunk)).sum()
78}
79
80#[cfg(feature = "parallel_proc")]
82fn rayon_simd_sum_f64(buffer: &Buffer<f64>) -> f64 {
83 let slice = buffer.as_slice();
84 let chunk_size = 1 << 20; slice.par_chunks(chunk_size).map(|chunk| simd_sum_f64::<SIMD_LANES>(chunk)).sum()
86}
87#[cfg(feature = "parallel_proc")]
88fn run_benchmark() {
89 println!("--- SIMD + Rayon Benchmark, N = {} ---", N);
90
91 let data: Vec64<i64> = (0..N as i64).collect();
93 let buffer = Buffer::from(data);
94
95 let start = Instant::now();
96 let sum = black_box(rayon_simd_sum_i64(&buffer));
97 let dur = start.elapsed();
98 println!("SIMD + Rayon IntegerArray<i64>: sum = {}, time = {:?}", sum, dur);
99
100 let data: Vec64<f64> = (0..N as i64).map(|x| x as f64).collect();
102 let buffer = Buffer::from(data);
103
104 let start = Instant::now();
105 let sum = black_box(rayon_simd_sum_f64(&buffer));
106 let dur = start.elapsed();
107 println!("SIMD + Rayon FloatArray<f64>: sum = {}, time = {:?}", sum, dur);
108}
109
110fn main() {
111 if cfg!(feature = "parallel_proc") {
112 #[cfg(feature = "parallel_proc")]
113 run_benchmark()
114 } else {
115 println!("The parallel_simd example requires enabling the `parallel_proc` feature.")
116 }
117}