#![cfg(feature = "simd")]
use crate::VeloxxError;
#[cfg(all(feature = "simd", not(target_arch = "wasm32")))]
use wide::{f64x4, i32x4};
pub trait StdSimdOps<T> {
fn std_simd_add(&self, other: &[T]) -> Result<Vec<T>, VeloxxError>;
fn std_simd_sub(&self, other: &[T]) -> Result<Vec<T>, VeloxxError>;
fn std_simd_mul(&self, other: &[T]) -> Result<Vec<T>, VeloxxError>;
fn std_simd_div(&self, other: &[T]) -> Result<Vec<T>, VeloxxError>;
fn std_simd_sum(&self) -> Result<T, VeloxxError>;
fn std_simd_mean(&self) -> Result<Option<f64>, VeloxxError>;
}
impl StdSimdOps<f64> for [f64] {
fn std_simd_add(&self, other: &[f64]) -> Result<Vec<f64>, VeloxxError> {
if self.len() != other.len() {
return Err(VeloxxError::InvalidOperation(
"Arrays must have same length for SIMD operations".to_string(),
));
}
let len = self.len();
let mut result = vec![0.0; len];
let simd_len = len / 4;
let remainder = len % 4;
for i in 0..simd_len {
let a = f64x4::from([
self[i * 4],
self[i * 4 + 1],
self[i * 4 + 2],
self[i * 4 + 3],
]);
let b = f64x4::from([
other[i * 4],
other[i * 4 + 1],
other[i * 4 + 2],
other[i * 4 + 3],
]);
let res = a + b;
let res_array = res.to_array();
result[i * 4..i * 4 + 4].copy_from_slice(&res_array);
}
for i in (len - remainder)..len {
result[i] = self[i] + other[i];
}
Ok(result)
}
fn std_simd_sub(&self, other: &[f64]) -> Result<Vec<f64>, VeloxxError> {
if self.len() != other.len() {
return Err(VeloxxError::InvalidOperation(
"Arrays must have same length for SIMD operations".to_string(),
));
}
let len = self.len();
let mut result = Vec::with_capacity(len);
let simd_len = len / 4;
let remainder = len % 4;
for i in 0..simd_len {
let a = f64x4::from([
self[i * 4],
self[i * 4 + 1],
self[i * 4 + 2],
self[i * 4 + 3],
]);
let b = f64x4::from([
other[i * 4],
other[i * 4 + 1],
other[i * 4 + 2],
other[i * 4 + 3],
]);
let res = a - b;
let res_array = res.to_array();
result.extend_from_slice(&res_array);
}
for i in (len - remainder)..len {
result.push(self[i] - other[i]);
}
Ok(result)
}
fn std_simd_mul(&self, other: &[f64]) -> Result<Vec<f64>, VeloxxError> {
if self.len() != other.len() {
return Err(VeloxxError::InvalidOperation(
"Arrays must have same length for SIMD operations".to_string(),
));
}
let len = self.len();
let mut result = Vec::with_capacity(len);
let simd_len = len / 4;
let remainder = len % 4;
for i in 0..simd_len {
let a = f64x4::from([
self[i * 4],
self[i * 4 + 1],
self[i * 4 + 2],
self[i * 4 + 3],
]);
let b = f64x4::from([
other[i * 4],
other[i * 4 + 1],
other[i * 4 + 2],
other[i * 4 + 3],
]);
let res = a * b;
let res_array = res.to_array();
result.extend_from_slice(&res_array);
}
for i in (len - remainder)..len {
result.push(self[i] * other[i]);
}
Ok(result)
}
fn std_simd_div(&self, other: &[f64]) -> Result<Vec<f64>, VeloxxError> {
if self.len() != other.len() {
return Err(VeloxxError::InvalidOperation(
"Arrays must have same length for SIMD operations".to_string(),
));
}
let len = self.len();
let mut result = Vec::with_capacity(len);
let simd_len = len / 4;
let remainder = len % 4;
for i in 0..simd_len {
let a = f64x4::from([
self[i * 4],
self[i * 4 + 1],
self[i * 4 + 2],
self[i * 4 + 3],
]);
let b = f64x4::from([
other[i * 4],
other[i * 4 + 1],
other[i * 4 + 2],
other[i * 4 + 3],
]);
let res = a / b;
let res_array = res.to_array();
result.extend_from_slice(&res_array);
}
for i in (len - remainder)..len {
result.push(self[i] / other[i]);
}
Ok(result)
}
fn std_simd_sum(&self) -> Result<f64, VeloxxError> {
optimized::std_simd_sum_optimized(self)
}
fn std_simd_mean(&self) -> Result<Option<f64>, VeloxxError> {
if self.is_empty() {
Ok(None)
} else {
Ok(Some(self.std_simd_sum()? / self.len() as f64))
}
}
}
impl StdSimdOps<i32> for [i32] {
fn std_simd_add(&self, other: &[i32]) -> Result<Vec<i32>, VeloxxError> {
if self.len() != other.len() {
return Err(VeloxxError::InvalidOperation(
"Arrays must have same length for SIMD operations".to_string(),
));
}
let len = self.len();
let mut result = Vec::with_capacity(len);
let simd_len = len / 4;
let remainder = len % 4;
for i in 0..simd_len {
let a = i32x4::from([
self[i * 4],
self[i * 4 + 1],
self[i * 4 + 2],
self[i * 4 + 3],
]);
let b = i32x4::from([
other[i * 4],
other[i * 4 + 1],
other[i * 4 + 2],
other[i * 4 + 3],
]);
let res = a + b;
let res_array = res.to_array();
result.extend_from_slice(&res_array);
}
for i in (len - remainder)..len {
result.push(self[i] + other[i]);
}
Ok(result)
}
fn std_simd_sub(&self, other: &[i32]) -> Result<Vec<i32>, VeloxxError> {
if self.len() != other.len() {
return Err(VeloxxError::InvalidOperation(
"Arrays must have same length for SIMD operations".to_string(),
));
}
let len = self.len();
let mut result = Vec::with_capacity(len);
let simd_len = len / 4;
let remainder = len % 4;
for i in 0..simd_len {
let a = i32x4::from([
self[i * 4],
self[i * 4 + 1],
self[i * 4 + 2],
self[i * 4 + 3],
]);
let b = i32x4::from([
other[i * 4],
other[i * 4 + 1],
other[i * 4 + 2],
other[i * 4 + 3],
]);
let res = a - b;
let res_array = res.to_array();
result.extend_from_slice(&res_array);
}
for i in (len - remainder)..len {
result.push(self[i] - other[i]);
}
Ok(result)
}
fn std_simd_mul(&self, other: &[i32]) -> Result<Vec<i32>, VeloxxError> {
if self.len() != other.len() {
return Err(VeloxxError::InvalidOperation(
"Arrays must have same length for SIMD operations".to_string(),
));
}
let len = self.len();
let mut result = Vec::with_capacity(len);
let simd_len = len / 4;
let remainder = len % 4;
for i in 0..simd_len {
let a = i32x4::from([
self[i * 4],
self[i * 4 + 1],
self[i * 4 + 2],
self[i * 4 + 3],
]);
let b = i32x4::from([
other[i * 4],
other[i * 4 + 1],
other[i * 4 + 2],
other[i * 4 + 3],
]);
let res = a * b;
let res_array = res.to_array();
result.extend_from_slice(&res_array);
}
for i in (len - remainder)..len {
result.push(self[i] * other[i]);
}
Ok(result)
}
fn std_simd_div(&self, other: &[i32]) -> Result<Vec<i32>, VeloxxError> {
if self.len() != other.len() {
return Err(VeloxxError::InvalidOperation(
"Arrays must have same length for SIMD operations".to_string(),
));
}
let len = self.len();
let mut result = Vec::with_capacity(len);
let simd_len = len / 4;
let remainder = len % 4;
for i in 0..simd_len {
let a = [
self[i * 4] as f64,
self[i * 4 + 1] as f64,
self[i * 4 + 2] as f64,
self[i * 4 + 3] as f64,
];
let b = [
other[i * 4] as f64,
other[i * 4 + 1] as f64,
other[i * 4 + 2] as f64,
other[i * 4 + 3] as f64,
];
let res = [
(a[0] / b[0]) as i32,
(a[1] / b[1]) as i32,
(a[2] / b[2]) as i32,
(a[3] / b[3]) as i32,
];
result.extend_from_slice(&res);
}
for i in (len - remainder)..len {
result.push(self[i] / other[i]);
}
Ok(result)
}
fn std_simd_sum(&self) -> Result<i32, VeloxxError> {
let len = self.len();
let simd_len = len / 4;
let remainder = len % 4;
let mut simd_sum = i32x4::ZERO;
for i in 0..simd_len {
let a = i32x4::from([
self[i * 4],
self[i * 4 + 1],
self[i * 4 + 2],
self[i * 4 + 3],
]);
simd_sum += a;
}
let mut sum = simd_sum.reduce_add();
sum += self
.iter()
.skip(len - remainder)
.take(remainder)
.copied()
.sum::<i32>();
Ok(sum)
}
fn std_simd_mean(&self) -> Result<Option<f64>, VeloxxError> {
if self.is_empty() {
Ok(None)
} else {
let sum = self.std_simd_sum()? as f64;
Ok(Some(sum / self.len() as f64))
}
}
}
pub mod optimized {
#[cfg(all(feature = "simd", not(target_arch = "wasm32")))]
use wide::f64x4;
use crate::VeloxxError;
pub fn std_simd_sum_optimized(values: &[f64]) -> Result<f64, VeloxxError> {
let len = values.len();
let simd_len = len / 4;
if simd_len == 0 {
return Ok(values.iter().sum());
}
let mut chunks = values.chunks_exact(4);
let mut simd_sum = f64x4::ZERO;
for chunk in chunks.by_ref() {
let vec = f64x4::from([chunk[0], chunk[1], chunk[2], chunk[3]]);
simd_sum += vec;
}
let mut sum = simd_sum.reduce_add();
sum += chunks.remainder().iter().sum::<f64>();
Ok(sum)
}
}
#[cfg(all(test, feature = "simd"))]
mod tests {
use super::*;
#[test]
fn test_std_simd_add_f64() {
let a = [1.0, 2.0, 3.0, 4.0, 5.0];
let b = [1.0, 1.0, 1.0, 1.0, 1.0];
let result = a.std_simd_add(&b).unwrap();
assert_eq!(result, vec![2.0, 3.0, 4.0, 5.0, 6.0]);
}
#[test]
fn test_std_simd_sum_f64() {
let a = [1.0, 2.0, 3.0, 4.0, 5.0];
let result = a.std_simd_sum().unwrap();
assert_eq!(result, 15.0);
}
#[test]
fn test_std_simd_mean_f64() {
let a = [2.0, 4.0, 6.0, 8.0];
let result = a.std_simd_mean().unwrap().unwrap();
assert_eq!(result, 5.0);
}
#[test]
fn test_std_simd_add_i32() {
let a = [1, 2, 3, 4, 5];
let b = [1, 1, 1, 1, 1];
let result = a.std_simd_add(&b).unwrap();
assert_eq!(result, vec![2, 3, 4, 5, 6]);
}
#[test]
fn test_std_simd_sum_i32() {
let a = [1, 2, 3, 4, 5];
let result = a.std_simd_sum().unwrap();
assert_eq!(result, 15);
}
#[test]
fn test_std_simd_mean_i32() {
let a = [2, 4, 6, 8];
let result = a.std_simd_mean().unwrap().unwrap();
assert_eq!(result, 5.0);
}
}