#![feature(portable_simd)]
#[cfg(feature = "parallel_proc")]
mod parallel_simd {
use std::hint::black_box;
use std::simd::Simd;
use std::simd::num::{SimdFloat, SimdInt};
use std::time::Instant;
use minarrow::{Buffer, Vec64};
use rayon::iter::ParallelIterator;
use rayon::slice::ParallelSlice;
const N: usize = 1_000_000_000;
const SIMD_LANES: usize = 4;
#[inline(always)]
fn simd_sum_i64<const LANES: usize>(data: &[i64]) -> i64 {
let n = data.len();
let simd_width = LANES;
let simd_chunks = n / simd_width;
let mut acc_simd = Simd::<i64, LANES>::splat(0);
for i in 0..simd_chunks {
let v = Simd::<i64, LANES>::from_slice(&data[i * simd_width..][..simd_width]);
acc_simd += v;
}
let mut result = acc_simd.reduce_sum();
for i in (simd_chunks * simd_width)..n {
result += data[i];
}
result
}
#[inline(always)]
fn simd_sum_f64<const LANES: usize>(data: &[f64]) -> f64 {
let n = data.len();
let simd_width = LANES;
let simd_chunks = n / simd_width;
let mut acc_simd = Simd::<f64, LANES>::splat(0.0);
for i in 0..simd_chunks {
let v = Simd::<f64, LANES>::from_slice(&data[i * simd_width..][..simd_width]);
acc_simd += v;
}
let mut result = acc_simd.reduce_sum();
for i in (simd_chunks * simd_width)..n {
result += data[i];
}
result
}
fn rayon_simd_sum_i64(buffer: &Buffer<i64>) -> i64 {
let slice = buffer.as_slice();
let chunk_size = 1 << 20; slice
.par_chunks(chunk_size)
.map(|chunk| simd_sum_i64::<SIMD_LANES>(chunk))
.sum()
}
fn rayon_simd_sum_f64(buffer: &Buffer<f64>) -> f64 {
let slice = buffer.as_slice();
let chunk_size = 1 << 20; slice
.par_chunks(chunk_size)
.map(|chunk| simd_sum_f64::<SIMD_LANES>(chunk))
.sum()
}
pub fn run_benchmark() {
println!("--- SIMD + Rayon Benchmark, N = {} ---", N);
let data: Vec64<i64> = (0..N as i64).collect();
let buffer = Buffer::from(data);
let start = Instant::now();
let sum = black_box(rayon_simd_sum_i64(&buffer));
let dur = start.elapsed();
println!(
"SIMD + Rayon IntegerArray<i64>: sum = {}, time = {:?}",
sum, dur
);
let data: Vec64<f64> = (0..N as i64).map(|x| x as f64).collect();
let buffer = Buffer::from(data);
let start = Instant::now();
let sum = black_box(rayon_simd_sum_f64(&buffer));
let dur = start.elapsed();
println!(
"SIMD + Rayon FloatArray<f64>: sum = {}, time = {:?}",
sum, dur
);
}
}
fn main() {
if cfg!(feature = "parallel_proc") {
#[cfg(feature = "parallel_proc")]
parallel_simd::run_benchmark()
} else {
println!("The parallel_simd example requires enabling the `parallel_proc` feature.")
}
}