use super::config::SIMDConfig;
pub fn simd_sum_f64(data: &[f64]) -> f64 {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
return unsafe { simd_sum_f64_avx2(data) };
} else if is_x86_feature_detected!("sse2") {
return unsafe { simd_sum_f64_sse2(data) };
}
}
data.iter().sum()
}
pub fn simd_mean_f64(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
simd_sum_f64(data) / data.len() as f64
}
pub fn simd_min_f64(data: &[f64]) -> f64 {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
return unsafe { simd_min_f64_avx2(data) };
} else if is_x86_feature_detected!("sse2") {
return unsafe { simd_min_f64_sse2(data) };
}
}
data.iter().copied().fold(f64::INFINITY, f64::min)
}
pub fn simd_max_f64(data: &[f64]) -> f64 {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
return unsafe { simd_max_f64_avx2(data) };
} else if is_x86_feature_detected!("sse2") {
return unsafe { simd_max_f64_sse2(data) };
}
}
data.iter().copied().fold(f64::NEG_INFINITY, f64::max)
}
pub fn simd_sum_i64(data: &[i64]) -> i64 {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
return unsafe { simd_sum_i64_avx2(data) };
} else if is_x86_feature_detected!("sse2") {
return unsafe { simd_sum_i64_sse2(data) };
}
}
data.iter().sum()
}
pub fn simd_mean_i64(data: &[i64]) -> i64 {
if data.is_empty() {
return 0;
}
simd_sum_i64(data) / data.len() as i64
}
pub fn simd_min_i64(data: &[i64]) -> i64 {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
return unsafe { simd_min_i64_avx2(data) };
} else if is_x86_feature_detected!("sse2") {
return unsafe { simd_min_i64_sse2(data) };
}
}
data.iter().copied().min().unwrap_or(i64::MAX)
}
pub fn simd_max_i64(data: &[i64]) -> i64 {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
return unsafe { simd_max_i64_avx2(data) };
} else if is_x86_feature_detected!("sse2") {
return unsafe { simd_max_i64_sse2(data) };
}
}
data.iter().copied().max().unwrap_or(i64::MIN)
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_sum_f64_avx2(data: &[f64]) -> f64 {
use std::arch::x86_64::*;
let mut sum = _mm256_setzero_pd();
let chunks = data.chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm256_loadu_pd(chunk.as_ptr());
sum = _mm256_add_pd(sum, vec);
}
let mut result = [0.0; 4];
_mm256_storeu_pd(result.as_mut_ptr(), sum);
let mut total = result[0] + result[1] + result[2] + result[3];
for &value in remainder {
total += value;
}
total
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_min_f64_avx2(data: &[f64]) -> f64 {
use std::arch::x86_64::*;
if data.is_empty() {
return f64::INFINITY;
}
let mut min_vec = _mm256_set1_pd(f64::INFINITY);
let chunks = data.chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm256_loadu_pd(chunk.as_ptr());
min_vec = _mm256_min_pd(min_vec, vec);
}
let mut result = [0.0; 4];
_mm256_storeu_pd(result.as_mut_ptr(), min_vec);
let mut min_val = result[0].min(result[1]).min(result[2]).min(result[3]);
for &value in remainder {
min_val = min_val.min(value);
}
min_val
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_max_f64_avx2(data: &[f64]) -> f64 {
use std::arch::x86_64::*;
if data.is_empty() {
return f64::NEG_INFINITY;
}
let mut max_vec = _mm256_set1_pd(f64::NEG_INFINITY);
let chunks = data.chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm256_loadu_pd(chunk.as_ptr());
max_vec = _mm256_max_pd(max_vec, vec);
}
let mut result = [0.0; 4];
_mm256_storeu_pd(result.as_mut_ptr(), max_vec);
let mut max_val = result[0].max(result[1]).max(result[2]).max(result[3]);
for &value in remainder {
max_val = max_val.max(value);
}
max_val
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_sum_f64_sse2(data: &[f64]) -> f64 {
use std::arch::x86_64::*;
let mut sum = _mm_setzero_pd();
let chunks = data.chunks_exact(2);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm_loadu_pd(chunk.as_ptr());
sum = _mm_add_pd(sum, vec);
}
let mut result = [0.0; 2];
_mm_storeu_pd(result.as_mut_ptr(), sum);
let mut total = result[0] + result[1];
for &value in remainder {
total += value;
}
total
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_min_f64_sse2(data: &[f64]) -> f64 {
use std::arch::x86_64::*;
if data.is_empty() {
return f64::INFINITY;
}
let mut min_vec = _mm_set1_pd(f64::INFINITY);
let chunks = data.chunks_exact(2);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm_loadu_pd(chunk.as_ptr());
min_vec = _mm_min_pd(min_vec, vec);
}
let mut result = [0.0; 2];
_mm_storeu_pd(result.as_mut_ptr(), min_vec);
let mut min_val = result[0].min(result[1]);
for &value in remainder {
min_val = min_val.min(value);
}
min_val
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_max_f64_sse2(data: &[f64]) -> f64 {
use std::arch::x86_64::*;
if data.is_empty() {
return f64::NEG_INFINITY;
}
let mut max_vec = _mm_set1_pd(f64::NEG_INFINITY);
let chunks = data.chunks_exact(2);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm_loadu_pd(chunk.as_ptr());
max_vec = _mm_max_pd(max_vec, vec);
}
let mut result = [0.0; 2];
_mm_storeu_pd(result.as_mut_ptr(), max_vec);
let mut max_val = result[0].max(result[1]);
for &value in remainder {
max_val = max_val.max(value);
}
max_val
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_sum_i64_avx2(data: &[i64]) -> i64 {
use std::arch::x86_64::*;
let mut sum = _mm256_setzero_si256();
let chunks = data.chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
sum = _mm256_add_epi64(sum, vec);
}
let mut result = [0i64; 4];
_mm256_storeu_si256(result.as_mut_ptr() as *mut __m256i, sum);
let mut total = result[0] + result[1] + result[2] + result[3];
for &value in remainder {
total += value;
}
total
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_min_i64_avx2(data: &[i64]) -> i64 {
if data.is_empty() {
return i64::MAX;
}
data.iter().copied().min().unwrap_or(i64::MAX)
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_max_i64_avx2(data: &[i64]) -> i64 {
if data.is_empty() {
return i64::MIN;
}
data.iter().copied().max().unwrap_or(i64::MIN)
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_sum_i64_sse2(data: &[i64]) -> i64 {
use std::arch::x86_64::*;
let mut sum = _mm_setzero_si128();
let chunks = data.chunks_exact(2);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
sum = _mm_add_epi64(sum, vec);
}
let mut result = [0i64; 2];
_mm_storeu_si128(result.as_mut_ptr() as *mut __m128i, sum);
let mut total = result[0] + result[1];
for &value in remainder {
total += value;
}
total
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_min_i64_sse2(data: &[i64]) -> i64 {
if data.is_empty() {
return i64::MAX;
}
data.iter().copied().min().unwrap_or(i64::MAX)
}
#[cfg(target_arch = "x86_64")]
unsafe fn simd_max_i64_sse2(data: &[i64]) -> i64 {
if data.is_empty() {
return i64::MIN;
}
data.iter().copied().max().unwrap_or(i64::MIN)
}
pub fn simd_available() -> bool {
#[cfg(target_arch = "x86_64")]
{
is_x86_feature_detected!("sse2")
}
#[cfg(not(target_arch = "x86_64"))]
{
false
}
}
pub fn avx2_available() -> bool {
#[cfg(target_arch = "x86_64")]
{
is_x86_feature_detected!("avx2")
}
#[cfg(not(target_arch = "x86_64"))]
{
false
}
}
pub fn simd_capabilities() -> String {
#[cfg(target_arch = "x86_64")]
{
let mut caps: Vec<&str> = Vec::new();
if is_x86_feature_detected!("avx2") {
caps.push("AVX2");
}
if is_x86_feature_detected!("sse4.2") {
caps.push("SSE4.2");
}
if is_x86_feature_detected!("sse4.1") {
caps.push("SSE4.1");
}
if is_x86_feature_detected!("ssse3") {
caps.push("SSSE3");
}
if is_x86_feature_detected!("sse3") {
caps.push("SSE3");
}
if is_x86_feature_detected!("sse2") {
caps.push("SSE2");
}
if is_x86_feature_detected!("sse") {
caps.push("SSE");
}
if caps.is_empty() {
"None".to_string()
} else {
caps.join(", ")
}
}
#[cfg(not(target_arch = "x86_64"))]
{
"None".to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simd_sum_f64() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let expected = 36.0;
let result = simd_sum_f64(&data);
assert!((result - expected).abs() < 1e-10);
}
#[test]
fn test_simd_mean_f64() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let expected = 3.0;
let result = simd_mean_f64(&data);
assert!((result - expected).abs() < 1e-10);
}
#[test]
fn test_simd_min_max_f64() {
let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0];
let min_result = simd_min_f64(&data);
let max_result = simd_max_f64(&data);
assert_eq!(min_result, 1.0);
assert_eq!(max_result, 9.0);
}
#[test]
fn test_simd_sum_i64() {
let data = vec![1i64, 2, 3, 4, 5, 6, 7, 8];
let expected = 36i64;
let result = simd_sum_i64(&data);
assert_eq!(result, expected);
}
#[test]
fn test_simd_capabilities() {
let caps = simd_capabilities();
println!("SIMD capabilities: {}", caps);
assert!(!caps.is_empty());
}
#[test]
fn test_empty_arrays() {
let empty_f64: Vec<f64> = vec![];
let empty_i64: Vec<i64> = vec![];
assert_eq!(simd_sum_f64(&empty_f64), 0.0);
assert_eq!(simd_mean_f64(&empty_f64), 0.0);
assert_eq!(simd_min_f64(&empty_f64), f64::INFINITY);
assert_eq!(simd_max_f64(&empty_f64), f64::NEG_INFINITY);
assert_eq!(simd_sum_i64(&empty_i64), 0);
assert_eq!(simd_mean_i64(&empty_i64), 0);
assert_eq!(simd_min_i64(&empty_i64), i64::MAX);
assert_eq!(simd_max_i64(&empty_i64), i64::MIN);
}
}