use super::{
cache::CacheInfo,
features::CpuFeatures,
microarchitecture::{ArmMicroarchitecture, X86Microarchitecture},
optimization::MicroarchOptimization,
};
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::sync::OnceLock;
static CPU_INFO: OnceLock<CpuInfo> = OnceLock::new();
#[derive(Debug, Clone)]
pub struct CpuInfo {
pub features: CpuFeatures,
pub cache: CacheInfo,
pub x86_microarch: Option<X86Microarchitecture>,
pub arm_microarch: Option<ArmMicroarchitecture>,
pub optimization: MicroarchOptimization,
pub vendor: String,
pub model_name: String,
pub physical_cores: usize,
pub logical_cores: usize,
pub base_frequency: f64,
pub max_frequency: f64,
}
impl CpuInfo {
pub fn get() -> &'static CpuInfo {
CPU_INFO.get_or_init(Self::detect)
}
fn detect() -> Self {
let mut info = Self::default();
#[cfg(target_arch = "x86_64")]
{
info.detect_x86_features();
info.detect_x86_microarchitecture();
info.detect_x86_cache_info();
}
#[cfg(target_arch = "aarch64")]
{
info.detect_arm_features();
info.detect_arm_microarchitecture();
info.detect_arm_cache_info();
}
info.detect_topology();
info.create_optimization_profile();
info
}
#[cfg(target_arch = "x86_64")]
fn detect_x86_features(&mut self) {
if !has_cpuid() {
return;
}
let cpuid_result = __cpuid(1);
self.features.sse = (cpuid_result.edx & (1 << 25)) != 0;
self.features.sse2 = (cpuid_result.edx & (1 << 26)) != 0;
self.features.fma = (cpuid_result.ecx & (1 << 12)) != 0;
self.features.popcnt = (cpuid_result.ecx & (1 << 23)) != 0;
self.features.aes = (cpuid_result.ecx & (1 << 25)) != 0;
self.features.avx = (cpuid_result.ecx & (1 << 28)) != 0;
self.features.f16c = (cpuid_result.ecx & (1 << 29)) != 0;
self.features.rdrand = (cpuid_result.ecx & (1 << 30)) != 0;
self.features.sse3 = (cpuid_result.ecx & (1 << 0)) != 0;
self.features.pclmul = (cpuid_result.ecx & (1 << 1)) != 0;
self.features.ssse3 = (cpuid_result.ecx & (1 << 9)) != 0;
self.features.sse4_1 = (cpuid_result.ecx & (1 << 19)) != 0;
self.features.sse4_2 = (cpuid_result.ecx & (1 << 20)) != 0;
self.features.movbe = (cpuid_result.ecx & (1 << 22)) != 0;
self.features.xsave = (cpuid_result.ecx & (1 << 26)) != 0;
let extended_result = __cpuid_count(7, 0);
self.features.avx2 = (extended_result.ebx & (1 << 5)) != 0;
self.features.bmi1 = (extended_result.ebx & (1 << 3)) != 0;
self.features.bmi2 = (extended_result.ebx & (1 << 8)) != 0;
self.features.rtm = (extended_result.ebx & (1 << 11)) != 0;
self.features.hle = (extended_result.ebx & (1 << 4)) != 0;
self.features.avx512f = (extended_result.ebx & (1 << 16)) != 0;
self.features.avx512dq = (extended_result.ebx & (1 << 17)) != 0;
self.features.rdseed = (extended_result.ebx & (1 << 18)) != 0;
self.features.adx = (extended_result.ebx & (1 << 19)) != 0;
self.features.avx512cd = (extended_result.ebx & (1 << 28)) != 0;
self.features.avx512bw = (extended_result.ebx & (1 << 30)) != 0;
self.features.avx512vl = (extended_result.ebx & (1 << 31)) != 0;
self.features.prefetchw = (extended_result.ecx & (1 << 0)) != 0;
self.features.avx512vnni = (extended_result.ecx & (1 << 11)) != 0;
self.features.avx512bf16 = (extended_result.ecx & (1 << 5)) != 0;
self.features.sha = (extended_result.ecx & (1 << 29)) != 0;
let extended_fn = __cpuid(0x80000001);
self.features.lzcnt = (extended_fn.ecx & (1 << 5)) != 0;
self.features.fma4 = (extended_fn.ecx & (1 << 16)) != 0;
self.features.rdtscp = (extended_fn.edx & (1 << 27)) != 0;
let vendor_result = __cpuid(0);
let vendor_bytes = [
(vendor_result.ebx as u32).to_le_bytes(),
(vendor_result.edx as u32).to_le_bytes(),
(vendor_result.ecx as u32).to_le_bytes(),
];
self.vendor = String::from_utf8_lossy(&[
vendor_bytes[0][0],
vendor_bytes[0][1],
vendor_bytes[0][2],
vendor_bytes[0][3],
vendor_bytes[1][0],
vendor_bytes[1][1],
vendor_bytes[1][2],
vendor_bytes[1][3],
vendor_bytes[2][0],
vendor_bytes[2][1],
vendor_bytes[2][2],
vendor_bytes[2][3],
])
.trim_end_matches('\0')
.to_string();
}
#[cfg(target_arch = "x86_64")]
fn detect_x86_microarchitecture(&mut self) {
self.x86_microarch = Some(if self.vendor.contains("Intel") {
if self.features.avx512f {
if self.features.avx512vnni {
X86Microarchitecture::IceLake
} else {
X86Microarchitecture::Skylake
}
} else if self.features.avx2 {
X86Microarchitecture::Haswell
} else if self.features.avx {
X86Microarchitecture::SandyBridge
} else {
X86Microarchitecture::Nehalem
}
} else if self.vendor.contains("AMD") {
if self.features.avx2 {
X86Microarchitecture::Zen2
} else if self.features.avx {
X86Microarchitecture::Bulldozer
} else {
X86Microarchitecture::K10
}
} else {
X86Microarchitecture::Unknown
});
}
#[cfg(target_arch = "x86_64")]
fn detect_x86_cache_info(&mut self) {
if !has_cpuid() {
return;
}
for level in 0..4 {
let cache_info = __cpuid_count(4, level);
let cache_type = cache_info.eax & 0x1F;
if cache_type == 0 {
break; }
let cache_level = (cache_info.eax >> 5) & 0x7;
let line_size = ((cache_info.ebx & 0xFFF) + 1) as usize;
let ways = (((cache_info.ebx >> 22) & 0x3FF) + 1) as usize;
let sets = (cache_info.ecx + 1) as usize;
let size = ways * sets * line_size;
match (cache_level, cache_type) {
(1, 1) => {
self.cache.l1d_size = size;
self.cache.l1_line_size = line_size;
self.cache.l1_associativity = ways;
}
(1, 2) => {
self.cache.l1i_size = size;
}
(2, 3) => {
self.cache.l2_size = size;
self.cache.l2_line_size = line_size;
self.cache.l2_associativity = ways;
}
(3, 3) => {
self.cache.l3_size = size;
self.cache.l3_line_size = line_size;
self.cache.l3_associativity = ways;
}
_ => {}
}
}
}
#[cfg(target_arch = "aarch64")]
fn detect_arm_features(&mut self) {
self.features.neon = true;
self.features.fp = true;
self.features.asimd = true;
#[cfg(target_os = "macos")]
{
self.features.fp = true;
self.features.asimd = true;
self.features.crc32 = true;
self.features.aes_arm = true;
self.features.sha1 = true;
self.features.sha256 = true;
}
}
#[cfg(target_arch = "aarch64")]
fn detect_arm_microarchitecture(&mut self) {
#[cfg(target_os = "macos")]
{
self.arm_microarch = Self::detect_apple_silicon_chip();
self.vendor = "Apple".to_string();
match self.arm_microarch {
Some(ArmMicroarchitecture::M1) => {
self.base_frequency = 3200.0; self.max_frequency = 3200.0; }
Some(ArmMicroarchitecture::M2) => {
self.base_frequency = 3500.0; self.max_frequency = 3500.0; }
Some(ArmMicroarchitecture::M3) => {
self.base_frequency = 4000.0; self.max_frequency = 4000.0; }
_ => {
self.base_frequency = 3000.0; self.max_frequency = 3000.0;
}
}
}
#[cfg(not(target_os = "macos"))]
{
self.arm_microarch = Some(ArmMicroarchitecture::CortexA76); self.vendor = "ARM".to_string();
}
}
#[cfg(all(target_arch = "aarch64", target_os = "macos"))]
fn detect_apple_silicon_chip() -> Option<ArmMicroarchitecture> {
use std::process::Command;
if let Ok(output) = Command::new("sysctl")
.arg("-n")
.arg("machdep.cpu.brand_string")
.output()
{
let brand_string = String::from_utf8_lossy(&output.stdout);
if brand_string.contains("M3") {
return Some(ArmMicroarchitecture::M3);
} else if brand_string.contains("M2") {
return Some(ArmMicroarchitecture::M2);
} else if brand_string.contains("M1") {
return Some(ArmMicroarchitecture::M1);
}
}
if let Ok(output) = Command::new("sysctl")
.arg("-n")
.arg("hw.perflevel0.physicalcpu")
.output()
{
if let Ok(cores_str) = String::from_utf8(output.stdout) {
if let Ok(cores) = cores_str.trim().parse::<i32>() {
return Some(match cores {
12 => ArmMicroarchitecture::M3,
8 => ArmMicroarchitecture::M2, 4 => ArmMicroarchitecture::M1,
_ => ArmMicroarchitecture::M2, });
}
}
}
Some(ArmMicroarchitecture::M1)
}
#[cfg(target_arch = "aarch64")]
fn detect_arm_cache_info(&mut self) {
#[cfg(target_os = "macos")]
{
match self.arm_microarch {
Some(ArmMicroarchitecture::M1) => {
self.cache.l1d_size = 128 * 1024; self.cache.l1i_size = 128 * 1024; self.cache.l2_size = 12 * 1024 * 1024; self.cache.l1_line_size = 64;
self.cache.l2_line_size = 64;
self.cache.l1_associativity = 8;
self.cache.l2_associativity = 12;
}
Some(ArmMicroarchitecture::M2) => {
self.cache.l1d_size = 128 * 1024; self.cache.l1i_size = 128 * 1024; self.cache.l2_size = 16 * 1024 * 1024; self.cache.l1_line_size = 64;
self.cache.l2_line_size = 64;
self.cache.l1_associativity = 8;
self.cache.l2_associativity = 16;
}
Some(ArmMicroarchitecture::M3) => {
self.cache.l1d_size = 128 * 1024; self.cache.l1i_size = 128 * 1024; self.cache.l2_size = 18 * 1024 * 1024; self.cache.l1_line_size = 64;
self.cache.l2_line_size = 64;
self.cache.l1_associativity = 8;
self.cache.l2_associativity = 18;
}
_ => {
self.cache.l1d_size = 128 * 1024;
self.cache.l1i_size = 128 * 1024;
self.cache.l2_size = 12 * 1024 * 1024;
self.cache.l1_line_size = 64;
self.cache.l2_line_size = 64;
self.cache.l1_associativity = 8;
self.cache.l2_associativity = 12;
}
}
}
#[cfg(not(target_os = "macos"))]
{
self.cache.l1d_size = 64 * 1024; self.cache.l1i_size = 64 * 1024; self.cache.l2_size = 1024 * 1024; self.cache.l1_line_size = 64;
self.cache.l2_line_size = 64;
self.cache.l1_associativity = 4;
self.cache.l2_associativity = 8;
}
}
fn detect_topology(&mut self) {
self.logical_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(4);
self.physical_cores = if self.features.sse {
self.logical_cores / 2
} else {
self.logical_cores
};
self.base_frequency = 2400.0; self.max_frequency = 3600.0; }
fn create_optimization_profile(&mut self) {
self.optimization = match (self.x86_microarch, self.arm_microarch) {
(Some(X86Microarchitecture::Skylake | X86Microarchitecture::IceLake), _) => {
MicroarchOptimization {
optimal_vector_width: 64, unroll_factor: 8,
matrix_block_size: 128,
prefetch_distance: 12,
branch_friendly: true,
prefer_fma: true,
cache_blocking: true,
software_prefetch: true,
memory_alignment: 64,
parallel_chunk_size: 2048,
ht_aware: true,
numa_aware: true,
}
}
(Some(X86Microarchitecture::Haswell | X86Microarchitecture::Broadwell), _) => {
MicroarchOptimization {
optimal_vector_width: 32, unroll_factor: 4,
matrix_block_size: 64,
prefetch_distance: 8,
branch_friendly: true,
prefer_fma: true,
cache_blocking: true,
software_prefetch: true,
memory_alignment: 32,
parallel_chunk_size: 1024,
ht_aware: true,
numa_aware: false,
}
}
(Some(X86Microarchitecture::Zen2 | X86Microarchitecture::Zen3), _) => {
MicroarchOptimization {
optimal_vector_width: 32, unroll_factor: 6,
matrix_block_size: 96,
prefetch_distance: 10,
branch_friendly: true,
prefer_fma: true,
cache_blocking: true,
software_prefetch: true,
memory_alignment: 32,
parallel_chunk_size: 1536,
ht_aware: false, numa_aware: true,
}
}
(_, Some(ArmMicroarchitecture::M1)) => {
MicroarchOptimization {
optimal_vector_width: 16, unroll_factor: 4,
matrix_block_size: 96,
prefetch_distance: 16,
branch_friendly: true,
prefer_fma: true,
cache_blocking: true,
software_prefetch: false, memory_alignment: 16,
parallel_chunk_size: 1024,
ht_aware: false,
numa_aware: false, }
}
(_, Some(ArmMicroarchitecture::M2)) => {
MicroarchOptimization {
optimal_vector_width: 16, unroll_factor: 6, matrix_block_size: 128, prefetch_distance: 20,
branch_friendly: true,
prefer_fma: true,
cache_blocking: true,
software_prefetch: false, memory_alignment: 16,
parallel_chunk_size: 1536, ht_aware: false,
numa_aware: false, }
}
(_, Some(ArmMicroarchitecture::M3)) => {
MicroarchOptimization {
optimal_vector_width: 16, unroll_factor: 8, matrix_block_size: 144, prefetch_distance: 24,
branch_friendly: true,
prefer_fma: true,
cache_blocking: true,
software_prefetch: false, memory_alignment: 16,
parallel_chunk_size: 2048, ht_aware: false,
numa_aware: false, }
}
(
_,
Some(
ArmMicroarchitecture::CortexA76
| ArmMicroarchitecture::CortexA77
| ArmMicroarchitecture::CortexA78,
),
) => {
MicroarchOptimization {
optimal_vector_width: 16, unroll_factor: 4,
matrix_block_size: 64, prefetch_distance: 8,
branch_friendly: true,
prefer_fma: true,
cache_blocking: true,
software_prefetch: true, memory_alignment: 16,
parallel_chunk_size: 512,
ht_aware: false,
numa_aware: true, }
}
_ => MicroarchOptimization::default(),
};
}
}
impl Default for CpuInfo {
fn default() -> Self {
Self {
features: CpuFeatures::default(),
cache: CacheInfo::default(),
x86_microarch: None,
arm_microarch: None,
optimization: MicroarchOptimization::default(),
vendor: "Unknown".to_string(),
model_name: "Unknown".to_string(),
physical_cores: 4,
logical_cores: 4,
base_frequency: 2000.0,
max_frequency: 3000.0,
}
}
}
#[cfg(target_arch = "x86_64")]
fn has_cpuid() -> bool {
true }
#[cfg(not(target_arch = "x86_64"))]
fn has_cpuid() -> bool {
false
}
pub fn detect_x86_microarchitecture() -> Option<X86Microarchitecture> {
#[cfg(target_arch = "x86_64")]
{
if !has_cpuid() {
return Some(X86Microarchitecture::Unknown);
}
let cpuid = __cpuid(0);
if cpuid.eax < 1 {
return Some(X86Microarchitecture::Unknown);
}
let info = __cpuid(1);
let family = ((info.eax >> 8) & 0xF) + ((info.eax >> 20) & 0xFF);
let model = ((info.eax >> 4) & 0xF) | (((info.eax >> 16) & 0xF) << 4);
if cpuid.ebx == 0x756e6547 && cpuid.edx == 0x49656e69 && cpuid.ecx == 0x6c65746e {
return Some(match (family, model) {
(6, 0x1E..=0x1F) => X86Microarchitecture::Nehalem,
(6, 0x2A..=0x2D) => X86Microarchitecture::SandyBridge,
(6, 0x3A | 0x3B) => X86Microarchitecture::IvyBridge,
(6, 0x3C | 0x3E | 0x3F | 0x45 | 0x46) => X86Microarchitecture::Haswell,
(6, 0x3D | 0x47 | 0x4F | 0x56) => X86Microarchitecture::Broadwell,
(6, 0x4E | 0x5E | 0x8E) => X86Microarchitecture::Skylake,
(6, 0x97 | 0x9A) => X86Microarchitecture::AlderLake,
_ => X86Microarchitecture::Unknown,
});
}
if cpuid.ebx == 0x68747541 && cpuid.edx == 0x69746e65 && cpuid.ecx == 0x444d4163 {
return Some(match family {
0x17 => X86Microarchitecture::Zen,
0x19 => X86Microarchitecture::Zen3,
_ => X86Microarchitecture::Unknown,
});
}
}
None
}
pub fn detect_arm_microarchitecture() -> Option<ArmMicroarchitecture> {
#[cfg(target_arch = "aarch64")]
{
if cfg!(target_os = "macos") {
Some(ArmMicroarchitecture::M1) } else {
Some(ArmMicroarchitecture::CortexA55) }
}
#[cfg(not(target_arch = "aarch64"))]
{
None
}
}