use crate::backend::native::types::CpuProfile;
use std::sync::atomic::{AtomicUsize, Ordering};
static CACHED_CPU_PROFILE: AtomicUsize = AtomicUsize::new(usize::MAX);
#[inline]
fn profile_to_usize(profile: CpuProfile) -> usize {
match profile {
CpuProfile::Generic => 0,
CpuProfile::Auto => 1,
CpuProfile::X86Zen4 => 2,
CpuProfile::X86Avx2 => 3,
CpuProfile::X86Avx512 => 4,
}
}
#[inline]
fn usize_to_profile(value: usize) -> CpuProfile {
match value {
0 => CpuProfile::Generic,
1 => CpuProfile::Auto,
2 => CpuProfile::X86Zen4,
3 => CpuProfile::X86Avx2,
4 => CpuProfile::X86Avx512,
_ => CpuProfile::Generic,
}
}
pub fn detect_cpu_profile() -> CpuProfile {
let cached = CACHED_CPU_PROFILE.load(Ordering::Relaxed);
if cached != usize::MAX {
return usize_to_profile(cached);
}
let detected = if cfg!(target_arch = "x86_64") {
detect_x86_64_profile()
} else if cfg!(target_arch = "aarch64") {
detect_aarch64_profile()
} else {
CpuProfile::Generic
};
let profile_int = profile_to_usize(detected);
CACHED_CPU_PROFILE.store(profile_int, Ordering::Relaxed);
detected
}
#[inline]
fn detect_x86_64_profile() -> CpuProfile {
if has_avx512_support() {
return CpuProfile::X86Avx512;
}
if has_avx2_support() {
if is_zen4_cpu() {
return CpuProfile::X86Zen4;
}
return CpuProfile::X86Avx2;
}
CpuProfile::Generic
}
#[inline]
fn detect_aarch64_profile() -> CpuProfile {
CpuProfile::Generic
}
#[inline]
fn has_avx2_support() -> bool {
#[cfg(target_arch = "x86_64")]
{
std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
&& std::arch::is_x86_feature_detected!("bmi2")
}
#[cfg(not(target_arch = "x86_64"))]
{
false
}
}
#[inline]
fn has_avx512_support() -> bool {
#[cfg(target_arch = "x86_64")]
{
std::arch::is_x86_feature_detected!("avx512f")
&& std::arch::is_x86_feature_detected!("avx512vl")
&& std::arch::is_x86_feature_detected!("avx512dq")
}
#[cfg(not(target_arch = "x86_64"))]
{
false
}
}
#[inline]
fn is_zen4_cpu() -> bool {
#[cfg(target_arch = "x86_64")]
{
if has_avx2_support() {
std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
&& std::arch::is_x86_feature_detected!("bmi2")
&& std::arch::is_x86_feature_detected!("adx")
&& std::arch::is_x86_feature_detected!("sha")
} else {
false
}
}
#[cfg(not(target_arch = "x86_64"))]
{
false
}
}
pub fn resolve_cpu_profile(profile: CpuProfile) -> CpuProfile {
match profile {
CpuProfile::Auto => detect_cpu_profile(),
CpuProfile::Generic => CpuProfile::Generic,
CpuProfile::X86Zen4 => {
if cfg!(target_arch = "x86_64") && has_avx2_support() && is_zen4_cpu() {
CpuProfile::X86Zen4
} else {
detect_cpu_profile()
}
}
CpuProfile::X86Avx2 => {
if has_avx2_support() {
CpuProfile::X86Avx2
} else {
CpuProfile::Generic
}
}
CpuProfile::X86Avx512 => {
if has_avx512_support() {
CpuProfile::X86Avx512
} else if has_avx2_support() {
CpuProfile::X86Avx2
} else {
CpuProfile::Generic
}
}
}
}
pub fn has_feature(profile: CpuProfile, feature: &str) -> bool {
let resolved = resolve_cpu_profile(profile);
match resolved {
CpuProfile::Generic => false, CpuProfile::Auto => has_feature(CpuProfile::Auto, feature), CpuProfile::X86Zen4 | CpuProfile::X86Avx2 => match feature.to_lowercase().as_str() {
"avx2" | "fma" | "bmi2" => has_avx2_support(),
"avx512" | "avx512f" | "avx512vl" => has_avx512_support(),
_ => false,
},
CpuProfile::X86Avx512 => match feature.to_lowercase().as_str() {
"avx2" | "fma" | "bmi2" | "avx512" | "avx512f" | "avx512vl" | "avx512dq" => {
has_avx512_support()
}
_ => false,
},
}
}
pub fn get_optimization_hints(profile: CpuProfile) -> (usize, usize, bool) {
let resolved = resolve_cpu_profile(profile);
match resolved {
CpuProfile::Generic => (64, 0, false), CpuProfile::X86Zen4 => {
(64, 256, true) }
CpuProfile::X86Avx2 => {
(64, 256, true)
}
CpuProfile::X86Avx512 => {
(64, 512, true)
}
CpuProfile::Auto => get_optimization_hints(CpuProfile::Auto), }
}
#[cfg(test)]
pub fn reset_cpu_profile_cache() {
CACHED_CPU_PROFILE.store(usize::MAX, Ordering::Relaxed);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_cpu_profile() {
let profile = detect_cpu_profile();
match profile {
CpuProfile::Generic
| CpuProfile::Auto
| CpuProfile::X86Zen4
| CpuProfile::X86Avx2
| CpuProfile::X86Avx512 => {
}
}
}
#[test]
fn test_resolve_cpu_profile() {
let auto_resolved = resolve_cpu_profile(CpuProfile::Auto);
assert_ne!(auto_resolved, CpuProfile::Auto);
let generic_resolved = resolve_cpu_profile(CpuProfile::Generic);
assert_eq!(generic_resolved, CpuProfile::Generic);
let _avx512_resolved = resolve_cpu_profile(CpuProfile::X86Avx512);
}
#[test]
fn test_has_feature() {
assert!(!has_feature(CpuProfile::Generic, "avx2"));
assert!(!has_feature(CpuProfile::Generic, "avx512"));
assert!(!has_feature(CpuProfile::Generic, "invalid"));
let profile = if has_avx2_support() {
CpuProfile::X86Avx2
} else {
CpuProfile::Generic
};
if has_avx2_support() {
assert!(has_feature(profile, "AVX2"));
assert!(has_feature(profile, "avx2"));
assert!(has_feature(profile, "AVX2"));
}
}
#[test]
fn test_get_optimization_hints() {
let (cache_line, vector_width, branch_friendly) =
get_optimization_hints(CpuProfile::Generic);
assert_eq!(cache_line, 64);
assert_eq!(vector_width, 0);
assert!(!branch_friendly);
let hints = get_optimization_hints(CpuProfile::X86Avx2);
assert_eq!(hints.0, 64); assert_eq!(hints.1, 256); assert!(hints.2); }
#[test]
fn test_caching() {
reset_cpu_profile_cache();
let profile1 = detect_cpu_profile();
let profile2 = detect_cpu_profile();
assert_eq!(profile1, profile2);
}
#[test]
fn test_profile_conversions() {
assert_eq!(profile_to_usize(CpuProfile::Generic), 0);
assert_eq!(profile_to_usize(CpuProfile::Auto), 1);
assert_eq!(profile_to_usize(CpuProfile::X86Zen4), 2);
assert_eq!(profile_to_usize(CpuProfile::X86Avx2), 3);
assert_eq!(profile_to_usize(CpuProfile::X86Avx512), 4);
assert_eq!(usize_to_profile(0), CpuProfile::Generic);
assert_eq!(usize_to_profile(1), CpuProfile::Auto);
assert_eq!(usize_to_profile(2), CpuProfile::X86Zen4);
assert_eq!(usize_to_profile(3), CpuProfile::X86Avx2);
assert_eq!(usize_to_profile(4), CpuProfile::X86Avx512);
assert_eq!(usize_to_profile(999), CpuProfile::Generic);
}
}