#[cfg(target_arch = "x86")]
use core::arch::x86::{__cpuid, __cpuid_count, _xgetbv, CpuidResult};
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::{__cpuid, __cpuid_count, _xgetbv, CpuidResult};
use crate::support::feature_detect::{Flags, get_or_init_flags_cache, unique_masks};
pub mod cpu_flags {
use super::unique_masks;
unique_masks! {
u32,
SSE3,
F16C,
SSE,
SSE2,
ERMSB,
MOVRS,
FMA,
FMA4,
AVX512FP16,
AVX512BF16,
}
}
pub fn get_cpu_features() -> Flags {
use core::sync::atomic::AtomicU32;
static CACHE: AtomicU32 = AtomicU32::new(0);
get_or_init_flags_cache(&CACHE, load_x86_features)
}
fn load_x86_features() -> Flags {
let mut value = Flags::empty();
if cfg!(target_env = "sgx") {
return Flags::empty();
}
let mut vendor_id = [0u8; 12];
let max_basic_leaf;
unsafe {
let CpuidResult { eax, ebx, ecx, edx } = __cpuid(0);
max_basic_leaf = eax;
vendor_id[0..4].copy_from_slice(&ebx.to_ne_bytes());
vendor_id[4..8].copy_from_slice(&edx.to_ne_bytes());
vendor_id[8..12].copy_from_slice(&ecx.to_ne_bytes());
}
if max_basic_leaf < 1 {
return value;
}
let CpuidResult { ecx, edx, .. } = unsafe { __cpuid(0x0000_0001_u32) };
let proc_info_ecx = Flags::from_bits(ecx);
let proc_info_edx = Flags::from_bits(edx);
let mut extended_features_ebx = Flags::empty();
let mut extended_features_edx = Flags::empty();
let mut extended_features_eax_leaf_1 = Flags::empty();
if max_basic_leaf >= 7 {
let CpuidResult { ebx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
extended_features_ebx = Flags::from_bits(ebx);
extended_features_edx = Flags::from_bits(edx);
let CpuidResult { eax, .. } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
extended_features_eax_leaf_1 = Flags::from_bits(eax)
}
let extended_max_basic_leaf = unsafe { __cpuid(0x8000_0000_u32) }.eax;
let mut extended_proc_info_ecx = Flags::empty();
if extended_max_basic_leaf >= 1 {
let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
extended_proc_info_ecx = Flags::from_bits(ecx);
}
let mut enable = |regflags: Flags, regbit, flag| {
if regflags.test_nth(regbit) {
value.insert(flag);
}
};
enable(proc_info_ecx, 0, cpu_flags::SSE3);
enable(proc_info_ecx, 29, cpu_flags::F16C);
enable(proc_info_edx, 25, cpu_flags::SSE);
enable(proc_info_edx, 26, cpu_flags::SSE2);
enable(extended_features_ebx, 9, cpu_flags::ERMSB);
enable(extended_features_eax_leaf_1, 31, cpu_flags::MOVRS);
let cpu_xsave = proc_info_ecx.test_nth(26);
if cpu_xsave {
let cpu_osxsave = proc_info_ecx.test_nth(27);
if cpu_osxsave {
let xcr0 = unsafe { _xgetbv(0) };
let os_avx_support = xcr0 & 6 == 6;
let os_avx512_support = xcr0 & 0xe0 == 0xe0;
if os_avx_support {
enable(proc_info_ecx, 12, cpu_flags::FMA);
if os_avx512_support {
enable(extended_features_edx, 23, cpu_flags::AVX512FP16);
enable(extended_features_eax_leaf_1, 5, cpu_flags::AVX512BF16);
}
}
}
}
if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
enable(extended_proc_info_ecx, 16, cpu_flags::FMA4);
}
value
}
#[cfg(test)]
mod tests {
extern crate std;
use std::is_x86_feature_detected;
use super::*;
#[test]
fn check_matches_std() {
let features = get_cpu_features();
for i in 0..cpu_flags::ALL.len() {
let flag = cpu_flags::ALL[i];
let name = cpu_flags::NAMES[i];
let std_detected = match flag {
cpu_flags::SSE3 => is_x86_feature_detected!("sse3"),
cpu_flags::F16C => is_x86_feature_detected!("f16c"),
cpu_flags::SSE => is_x86_feature_detected!("sse"),
cpu_flags::SSE2 => is_x86_feature_detected!("sse2"),
cpu_flags::ERMSB => is_x86_feature_detected!("ermsb"),
cpu_flags::MOVRS => continue, cpu_flags::FMA => is_x86_feature_detected!("fma"),
cpu_flags::FMA4 => continue, cpu_flags::AVX512FP16 => is_x86_feature_detected!("avx512fp16"),
cpu_flags::AVX512BF16 => is_x86_feature_detected!("avx512bf16"),
_ => panic!("untested CPU flag {name}"),
};
assert_eq!(
std_detected,
features.contains(flag),
"different flag {name}. flags: {features:?}"
);
}
}
}