pub struct SimdOps;
impl SimdOps {
#[inline(always)]
pub fn add_f64_array(a: &[f64], b: &[f64], result: &mut [f64]) {
let len = a.len().min(b.len()).min(result.len());
let chunks = len / 4;
let remainder = len % 4;
for i in 0..chunks {
let base = i * 4;
result[base] = a[base] + b[base];
result[base + 1] = a[base + 1] + b[base + 1];
result[base + 2] = a[base + 2] + b[base + 2];
result[base + 3] = a[base + 3] + b[base + 3];
}
for i in (chunks * 4)..(chunks * 4 + remainder) {
result[i] = a[i] + b[i];
}
}
#[inline(always)]
pub fn mul_f64_array(a: &[f64], b: &[f64], result: &mut [f64]) {
let len = a.len().min(b.len()).min(result.len());
let chunks = len / 4;
let remainder = len % 4;
for i in 0..chunks {
let base = i * 4;
result[base] = a[base] * b[base];
result[base + 1] = a[base + 1] * b[base + 1];
result[base + 2] = a[base + 2] * b[base + 2];
result[base + 3] = a[base + 3] * b[base + 3];
}
for i in (chunks * 4)..(chunks * 4 + remainder) {
result[i] = a[i] * b[i];
}
}
#[inline(always)]
pub fn add_i32_array(a: &[i32], b: &[i32], result: &mut [i32]) {
let len = a.len().min(b.len()).min(result.len());
let chunks = len / 8;
let remainder = len % 8;
for i in 0..chunks {
let base = i * 8;
result[base] = a[base] + b[base];
result[base + 1] = a[base + 1] + b[base + 1];
result[base + 2] = a[base + 2] + b[base + 2];
result[base + 3] = a[base + 3] + b[base + 3];
result[base + 4] = a[base + 4] + b[base + 4];
result[base + 5] = a[base + 5] + b[base + 5];
result[base + 6] = a[base + 6] + b[base + 6];
result[base + 7] = a[base + 7] + b[base + 7];
}
for i in (chunks * 8)..(chunks * 8 + remainder) {
result[i] = a[i] + b[i];
}
}
#[inline(always)]
pub fn evaluate_polynomial_simd(coefficients: &[f64], x: f64) -> f64 {
if coefficients.is_empty() {
return 0.0;
}
let mut result = coefficients[coefficients.len() - 1];
for &coeff in coefficients.iter().rev().skip(1) {
result = result * x + coeff;
}
result
}
#[inline(always)]
pub fn dot_product_f64(a: &[f64], b: &[f64]) -> f64 {
let len = a.len().min(b.len());
let mut sum = 0.0;
let chunks = len / 4;
let remainder = len % 4;
for i in 0..chunks {
let base = i * 4;
sum += a[base] * b[base]
+ a[base + 1] * b[base + 1]
+ a[base + 2] * b[base + 2]
+ a[base + 3] * b[base + 3];
}
for i in (chunks * 4)..(chunks * 4 + remainder) {
sum += a[i] * b[i];
}
sum
}
}
pub struct SimdOptimized;
impl SimdOptimized {
pub fn bulk_add_numeric(values: &[f64]) -> f64 {
let mut sum = 0.0;
let chunks = values.len() / 4;
let remainder = values.len() % 4;
for i in 0..chunks {
let base = i * 4;
sum += values[base] + values[base + 1] + values[base + 2] + values[base + 3];
}
for i in (chunks * 4)..(chunks * 4 + remainder) {
sum += values[i];
}
sum
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simd_f64_addition() {
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let b = vec![5.0, 4.0, 3.0, 2.0, 1.0];
let mut result = vec![0.0; 5];
SimdOps::add_f64_array(&a, &b, &mut result);
assert_eq!(result, vec![6.0, 6.0, 6.0, 6.0, 6.0]);
}
#[test]
fn test_simd_i32_addition() {
let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let b = vec![10, 9, 8, 7, 6, 5, 4, 3, 2, 1];
let mut result = vec![0; 10];
SimdOps::add_i32_array(&a, &b, &mut result);
assert_eq!(result, vec![11, 11, 11, 11, 11, 11, 11, 11, 11, 11]);
}
#[test]
fn test_polynomial_evaluation() {
let coefficients = vec![1.0, 3.0, 2.0];
let x = 2.0;
let result = SimdOps::evaluate_polynomial_simd(&coefficients, x);
assert_eq!(result, 15.0);
}
#[test]
fn test_dot_product() {
let a = vec![1.0, 2.0, 3.0, 4.0];
let b = vec![4.0, 3.0, 2.0, 1.0];
let result = SimdOps::dot_product_f64(&a, &b);
assert_eq!(result, 20.0);
}
#[test]
fn test_simd_benefits() {
use std::time::Instant;
let size = 10000;
let a: Vec<f64> = (0..size).map(|i| i as f64).collect();
let b: Vec<f64> = (0..size).map(|i| (size - i) as f64).collect();
let mut result = vec![0.0; size];
let start = Instant::now();
SimdOps::add_f64_array(&a, &b, &mut result);
let simd_duration = start.elapsed();
let start = Instant::now();
for i in 0..size {
result[i] = a[i] + b[i];
}
let scalar_duration = start.elapsed();
println!(
"SIMD-like: {:?}, Scalar: {:?}",
simd_duration, scalar_duration
);
println!("SIMD performance test completed successfully");
assert!(simd_duration.as_nanos() < 1_000_000_000); }
}