use std::sync::OnceLock;
#[derive(Debug, Clone)]
pub struct CpuFeatures {
pub has_avx512f: bool,
pub has_avx2: bool,
pub has_sse: bool,
pub has_fma: bool,
pub has_neon: bool,
pub has_sve: bool,
pub has_sve2: bool,
pub has_dotprod: bool,
pub has_bf16: bool,
}
static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
pub fn get_cpu_features() -> &'static CpuFeatures {
CPU_FEATURES.get_or_init(|| {
#[cfg(target_arch = "x86_64")]
{
CpuFeatures {
has_avx512f: std::arch::is_x86_feature_detected!("avx512f"),
has_avx2: std::arch::is_x86_feature_detected!("avx2"),
has_sse: std::arch::is_x86_feature_detected!("sse"),
has_fma: std::arch::is_x86_feature_detected!("fma"),
has_neon: false,
has_sve: false,
has_sve2: false,
has_dotprod: false,
has_bf16: false,
}
}
#[cfg(target_arch = "aarch64")]
{
CpuFeatures {
has_avx512f: false,
has_avx2: false,
has_sse: false,
has_fma: false, has_neon: std::arch::is_aarch64_feature_detected!("neon"),
has_sve: std::arch::is_aarch64_feature_detected!("sve"),
has_sve2: std::arch::is_aarch64_feature_detected!("sve2"),
has_dotprod: std::arch::is_aarch64_feature_detected!("dotprod"),
has_bf16: std::arch::is_aarch64_feature_detected!("bf16"),
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
CpuFeatures {
has_avx512f: false,
has_avx2: false,
has_sse: false,
has_fma: false,
has_neon: false,
has_sve: false,
has_sve2: false,
has_dotprod: false,
has_bf16: false,
}
}
})
}
#[derive(Debug, Clone)]
pub struct SimdCapabilities {
pub has_avx2: bool,
pub has_avx512: bool,
pub has_fma: bool,
pub has_sse42: bool,
pub has_bmi2: bool,
pub has_neon: bool,
pub has_sve: bool,
pub has_sve2: bool,
pub has_dotprod: bool,
pub has_bf16: bool,
pub vector_width_f32: usize,
pub vector_width_f64: usize,
pub cache_line_size: usize,
pub l1_cache_size: usize,
pub l2_cache_size: usize,
pub prefetch_distance: usize,
}
impl Default for SimdCapabilities {
fn default() -> Self {
let cpu_features = get_cpu_features();
Self {
has_avx2: cpu_features.has_avx2,
has_avx512: cpu_features.has_avx512f,
has_fma: cpu_features.has_fma,
has_sse42: cpu_features.has_sse,
has_bmi2: false, has_neon: cpu_features.has_neon,
has_sve: cpu_features.has_sve,
has_sve2: cpu_features.has_sve2,
has_dotprod: cpu_features.has_dotprod,
has_bf16: cpu_features.has_bf16,
vector_width_f32: if cpu_features.has_avx512f {
16 } else if cpu_features.has_avx2 {
8 } else if cpu_features.has_sse || cpu_features.has_neon {
4 } else {
1 },
vector_width_f64: if cpu_features.has_avx512f {
8 } else if cpu_features.has_avx2 {
4 } else if cpu_features.has_sse || cpu_features.has_neon {
2 } else {
1 },
cache_line_size: 64, l1_cache_size: 32768, l2_cache_size: 262144, prefetch_distance: 16, }
}
}
#[allow(dead_code)]
pub fn detect_simd_capabilities() -> SimdCapabilities {
SimdCapabilities::default()
}