#[cfg(feature = "simd")]
pub use wide::*;
#[cfg(not(feature = "std"))]
use spin::Once;
#[cfg(feature = "std")]
use std::sync::Once;
static INIT: Once = Once::new();
static mut HAS_AVX2: bool = false;
static mut HAS_AVX512: bool = false;
static mut HAS_NEON: bool = false;
fn init_simd_features() {
INIT.call_once(|| {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
unsafe {
HAS_AVX2 = true;
}
}
if is_x86_feature_detected!("avx512f") {
unsafe {
HAS_AVX512 = true;
}
}
}
#[cfg(target_arch = "aarch64")]
{
unsafe {
HAS_NEON = true;
}
}
});
}
pub fn has_avx2() -> bool {
init_simd_features();
unsafe { HAS_AVX2 }
}
pub fn has_avx512() -> bool {
init_simd_features();
unsafe { HAS_AVX512 }
}
pub fn has_neon() -> bool {
init_simd_features();
unsafe { HAS_NEON }
}
pub fn should_use_simd(len: usize) -> bool {
len >= 16 && (has_avx2() || has_avx512() || has_neon())
}
pub fn optimal_simd_chunk_size<T>() -> usize {
let type_size = std::mem::size_of::<T>();
if has_avx512() {
64 / type_size
} else if has_avx2() {
32 / type_size
} else if has_neon() {
16 / type_size
} else {
1
}
}
pub mod vector_width {
pub const F32_AVX512: usize = 16; pub const F32_AVX2: usize = 8; pub const F32_NEON: usize = 4;
pub const F64_AVX512: usize = 8; pub const F64_AVX2: usize = 4; pub const F64_NEON: usize = 2;
pub const I32_AVX512: usize = 16; pub const I32_AVX2: usize = 8; pub const I32_NEON: usize = 4; }
pub fn f32_vector_width() -> usize {
if has_avx512() {
vector_width::F32_AVX512
} else if has_avx2() {
vector_width::F32_AVX2
} else if has_neon() {
vector_width::F32_NEON
} else {
1
}
}
pub fn f64_vector_width() -> usize {
if has_avx512() {
vector_width::F64_AVX512
} else if has_avx2() {
vector_width::F64_AVX2
} else if has_neon() {
vector_width::F64_NEON
} else {
1
}
}
pub fn i32_vector_width() -> usize {
if has_avx512() {
vector_width::I32_AVX512
} else if has_avx2() {
vector_width::I32_AVX2
} else if has_neon() {
vector_width::I32_NEON
} else {
1
}
}
pub mod alignment {
pub fn is_aligned<T>(ptr: *const T, alignment: usize) -> bool {
(ptr as usize).is_multiple_of(alignment)
}
pub fn simd_alignment() -> usize {
if crate::cpu::simd::core::has_avx512() {
64 } else if crate::cpu::simd::core::has_avx2() {
32 } else {
16 }
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_feature_detection() {
let _avx2 = has_avx2();
let _avx512 = has_avx512();
let _neon = has_neon();
}
#[test]
fn test_should_use_simd() {
assert!(!should_use_simd(4)); assert!(!should_use_simd(8)); let _result = should_use_simd(16);
}
#[test]
fn test_optimal_chunk_size() {
let f32_chunk = optimal_simd_chunk_size::<f32>();
let f64_chunk = optimal_simd_chunk_size::<f64>();
assert!(f32_chunk >= 1);
assert!(f64_chunk >= 1);
assert!(f32_chunk >= f64_chunk); }
#[test]
fn test_vector_widths() {
let f32_width = f32_vector_width();
let f64_width = f64_vector_width();
let i32_width = i32_vector_width();
assert!(f32_width >= 1);
assert!(f64_width >= 1);
assert!(i32_width >= 1);
}
#[test]
fn test_alignment() {
let test_array = [1.0f32; 16];
let ptr = test_array.as_ptr();
let is_16_aligned = alignment::is_aligned(ptr, 16);
let _is_32_aligned = alignment::is_aligned(ptr, 32);
println!("16-byte aligned: {}", is_16_aligned);
let req = alignment::simd_alignment();
assert!(req == 16 || req == 32 || req == 64);
}
}