use std::sync::OnceLock;
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CpuFeature {
SSE2,
SSE3,
SSSE3,
SSE4_1,
SSE4_2,
AVX,
AVX2,
BMI1,
BMI2,
POPCNT,
LZCNT,
TZCNT,
PREFETCHW,
PCLMULQDQ,
AES,
RDRAND,
RDSEED,
AVX512F, AVX512DQ, AVX512CD, AVX512BW, AVX512VL, AVX512VPOPCNTDQ, AVX512VBMI, AVX512IFMA,
NEON,
CRC32,
AesArm,
SHA1,
SHA2,
SHA3,
Crypto,
SVE, SVE2,
UnalignedAccess,
Prefetch,
ClflushOpt,
Clwb,
}
#[derive(Debug, Clone)]
pub struct CpuFeatures {
pub has_sse41: bool,
pub has_sse42: bool,
pub has_avx: bool,
pub has_avx2: bool,
pub has_avx512f: bool,
pub has_avx512vl: bool,
pub has_avx512bw: bool,
pub has_avx512vpopcntdq: bool,
pub has_bmi1: bool,
pub has_bmi2: bool,
pub has_popcnt: bool,
pub has_lzcnt: bool,
pub has_tzcnt: bool,
pub has_prefetchw: bool,
pub has_neon: bool,
pub has_crc32: bool,
pub has_crypto: bool,
pub has_sve: bool,
pub has_sve2: bool,
pub l1_cache_size: usize,
pub l2_cache_size: usize,
pub l3_cache_size: usize,
pub cache_line_size: usize,
pub logical_cores: usize,
pub physical_cores: usize,
pub vendor: String,
pub model: String,
pub optimization_tier: u8,
pub simd_tier: u8,
}
#[derive(Debug, Clone)]
pub struct CpuFeatureSet {
pub features: HashMap<CpuFeature, bool>,
pub vendor: String,
pub model: String,
pub logical_cores: usize,
pub physical_cores: usize,
pub cache_line_size: usize,
pub l1_cache_size: usize,
pub l2_cache_size: usize,
pub l3_cache_size: usize,
pub simd_tier: u8,
}
impl CpuFeatures {
pub fn new() -> Self {
Self {
has_sse41: false,
has_sse42: false,
has_avx: false,
has_avx2: false,
has_avx512f: false,
has_avx512vl: false,
has_avx512bw: false,
has_avx512vpopcntdq: false,
has_bmi1: false,
has_bmi2: false,
has_popcnt: false,
has_lzcnt: false,
has_tzcnt: false,
has_prefetchw: false,
has_neon: false,
has_crc32: false,
has_crypto: false,
has_sve: false,
has_sve2: false,
l1_cache_size: 32 * 1024, l2_cache_size: 256 * 1024, l3_cache_size: 8 * 1024 * 1024, cache_line_size: 64,
logical_cores: 1,
physical_cores: 1,
vendor: String::new(),
model: String::new(),
optimization_tier: 0,
simd_tier: 0,
}
}
pub fn detect_and_configure_simd(&mut self) {
self.optimization_tier = self.calculate_optimization_tier();
self.simd_tier = self.calculate_simd_tier();
}
fn calculate_optimization_tier(&self) -> u8 {
if self.has_avx512f && self.has_avx512bw && self.has_avx512vpopcntdq {
5 } else if self.has_avx2 && self.has_bmi2 {
4 } else if self.has_bmi2 {
3 } else if self.has_popcnt || self.has_neon {
2 } else {
1 }
}
fn calculate_simd_tier(&self) -> u8 {
if self.has_avx512f {
4 } else if self.has_bmi2 && self.has_avx2 {
3 } else if self.has_avx2 {
2 } else if self.has_popcnt || self.has_neon {
1 } else {
0 }
}
pub fn optimal_rank_select_variant(&self) -> &'static str {
if self.has_avx512f && self.has_avx512bw && self.has_avx512vpopcntdq {
"avx512_popcnt"
} else if self.has_bmi2 && self.has_avx2 {
"bmi2_avx2"
} else if self.has_avx2 {
"avx2"
} else if self.has_bmi2 {
"bmi2"
} else if self.has_popcnt {
"popcnt"
} else if self.has_neon {
"neon"
} else {
"scalar"
}
}
pub fn optimal_string_search_variant(&self) -> &'static str {
if self.has_sse42 {
"sse42_pcmpestri"
} else if self.has_avx2 {
"avx2_search"
} else if self.has_neon {
"neon_search"
} else {
"scalar"
}
}
pub fn optimal_memcpy_variant(&self) -> &'static str {
if self.has_avx512f {
"avx512_memcpy"
} else if self.has_avx2 {
"avx2_memcpy"
} else if self.has_neon {
"neon_memcpy"
} else {
"scalar_memcpy"
}
}
pub fn optimal_base64_variant(&self) -> &'static str {
if self.has_avx2 {
"avx2"
} else if self.has_sse42 {
"sse42"
} else if self.has_neon {
"neon"
} else {
"scalar"
}
}
pub fn recommended_chunk_size(&self) -> usize {
match self.optimization_tier {
5 => 64 * 1024, 4 => 32 * 1024, 3 => 16 * 1024, 2 => 8 * 1024, _ => 4 * 1024, }
}
pub fn should_use_prefetch(&self) -> bool {
self.has_prefetchw || self.optimization_tier >= 3
}
pub fn has_optimal_memory_access(&self) -> bool {
self.cache_line_size == 64 &&
(cfg!(target_arch = "x86_64") || cfg!(target_arch = "aarch64"))
}
pub fn recommended_alignment(&self) -> usize {
if self.has_avx512f {
64 } else if self.has_avx2 {
32 } else if self.has_neon {
16 } else {
8 }
}
pub fn has_feature(&self, feature: CpuFeature) -> bool {
match feature {
CpuFeature::SSE2 => true, CpuFeature::SSE3 => true, CpuFeature::SSSE3 => true, CpuFeature::SSE4_1 => self.has_sse41,
CpuFeature::SSE4_2 => self.has_sse42,
CpuFeature::AVX => self.has_avx,
CpuFeature::AVX2 => self.has_avx2,
CpuFeature::BMI1 => self.has_bmi1,
CpuFeature::BMI2 => self.has_bmi2,
CpuFeature::POPCNT => self.has_popcnt,
CpuFeature::LZCNT => self.has_lzcnt,
CpuFeature::TZCNT => self.has_tzcnt,
CpuFeature::PREFETCHW => self.has_prefetchw,
CpuFeature::AVX512F => self.has_avx512f,
CpuFeature::AVX512VL => self.has_avx512vl,
CpuFeature::AVX512BW => self.has_avx512bw,
CpuFeature::AVX512VPOPCNTDQ => self.has_avx512vpopcntdq,
CpuFeature::NEON => self.has_neon,
CpuFeature::CRC32 => self.has_crc32,
CpuFeature::Crypto => self.has_crypto,
CpuFeature::SVE => self.has_sve,
CpuFeature::SVE2 => self.has_sve2,
CpuFeature::UnalignedAccess => {
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
{
true }
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
false
}
}
_ => false,
}
}
}
impl Default for CpuFeatures {
fn default() -> Self {
Self::new()
}
}
impl CpuFeatureSet {
pub fn has_feature(&self, feature: CpuFeature) -> bool {
self.features.get(&feature).copied().unwrap_or(false)
}
pub fn optimal_rank_select_variant(&self) -> &'static str {
if self.has_feature(CpuFeature::AVX512F) && self.has_feature(CpuFeature::AVX512BW) {
"avx512"
} else if self.has_feature(CpuFeature::BMI2) && self.has_feature(CpuFeature::AVX2) {
"bmi2_avx2"
} else if self.has_feature(CpuFeature::AVX2) {
"avx2"
} else if self.has_feature(CpuFeature::POPCNT) {
"popcnt"
} else if self.has_feature(CpuFeature::NEON) {
"neon"
} else {
"scalar"
}
}
pub fn optimal_base64_variant(&self) -> &'static str {
if self.has_feature(CpuFeature::AVX2) {
"avx2"
} else if self.has_feature(CpuFeature::SSE4_2) {
"sse42"
} else if self.has_feature(CpuFeature::NEON) {
"neon"
} else {
"scalar"
}
}
pub fn get_simd_tier(&self) -> u8 {
if self.has_feature(CpuFeature::AVX512F) {
4 } else if self.has_feature(CpuFeature::BMI2) && self.has_feature(CpuFeature::AVX2) {
3 } else if self.has_feature(CpuFeature::AVX2) {
2 } else if self.has_feature(CpuFeature::POPCNT) || self.has_feature(CpuFeature::NEON) {
1 } else {
0 }
}
pub fn has_optimal_memory_access(&self) -> bool {
self.cache_line_size == 64 &&
(self.has_feature(CpuFeature::UnalignedAccess) || cfg!(target_arch = "x86_64"))
}
pub fn recommended_alignment(&self) -> usize {
if self.has_feature(CpuFeature::AVX512F) {
64 } else if self.has_feature(CpuFeature::AVX2) {
32 } else if self.has_feature(CpuFeature::SSE2) || self.has_feature(CpuFeature::NEON) {
16 } else {
8 }
}
}
pub struct RuntimeCpuFeatures;
impl RuntimeCpuFeatures {
pub fn new() -> Self {
Self
}
pub fn detect_features(&self) -> CpuFeatures {
let mut features = CpuFeatures::new();
let (vendor, model) = self.get_cpu_info();
features.vendor = vendor;
features.model = model;
let (logical_cores, physical_cores) = self.get_core_count();
features.logical_cores = logical_cores;
features.physical_cores = physical_cores;
let cache_info = self.get_enhanced_cache_info();
features.cache_line_size = cache_info.0;
features.l1_cache_size = cache_info.1;
features.l2_cache_size = cache_info.2;
features.l3_cache_size = cache_info.3;
#[cfg(target_arch = "x86_64")]
{
self.detect_x86_features(&mut features);
}
#[cfg(target_arch = "aarch64")]
{
self.detect_arm_features(&mut features);
}
features.detect_and_configure_simd();
features
}
#[cfg(target_arch = "x86_64")]
fn detect_x86_features(&self, features: &mut CpuFeatures) {
let cpuid = raw_cpuid::CpuId::new();
if let Some(feature_info) = cpuid.get_feature_info() {
features.has_sse41 = feature_info.has_sse41();
features.has_sse42 = feature_info.has_sse42();
features.has_avx = feature_info.has_avx();
features.has_popcnt = feature_info.has_popcnt();
}
if let Some(extended_features) = cpuid.get_extended_feature_info() {
features.has_avx2 = extended_features.has_avx2();
features.has_bmi1 = extended_features.has_bmi1();
features.has_bmi2 = extended_features.has_bmi2();
features.has_prefetchw = false;
features.has_avx512f = extended_features.has_avx512f();
features.has_avx512vl = extended_features.has_avx512vl();
features.has_avx512bw = extended_features.has_avx512bw();
features.has_avx512vpopcntdq = false; }
if let Some(extended_info) = cpuid.get_extended_processor_and_feature_identifiers() {
features.has_lzcnt = extended_info.has_lzcnt();
features.has_tzcnt = features.has_bmi1;
}
}
#[cfg(target_arch = "aarch64")]
fn detect_arm_features(&self, features: &mut CpuFeatures) {
features.has_neon = true;
if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
let cpuinfo_lower = cpuinfo.to_lowercase();
features.has_crc32 = cpuinfo_lower.contains("crc32");
features.has_crypto = cpuinfo_lower.contains("aes") || cpuinfo_lower.contains("crypto");
features.has_sve = cpuinfo_lower.contains("sve");
features.has_sve2 = cpuinfo_lower.contains("sve2");
}
#[cfg(target_os = "linux")]
{
if let Ok(auxval) = self.get_auxval_features() {
features.has_crc32 = (auxval & (1 << 7)) != 0; features.has_crypto = (auxval & (1 << 4)) != 0; features.has_sve = (auxval & (1 << 22)) != 0; }
}
}
fn get_enhanced_cache_info(&self) -> (usize, usize, usize, usize) {
let mut cache_line_size = 64; let l1_size = 32 * 1024; let l2_size = 256 * 1024; let l3_size = 8 * 1024 * 1024;
#[cfg(target_arch = "x86_64")]
{
let cpuid = raw_cpuid::CpuId::new();
if let Some(cache_params) = cpuid.get_cache_parameters() {
for cache in cache_params {
cache_line_size = cache.coherency_line_size() as usize;
let cache_size = cache.associativity() * cache.coherency_line_size()
* cache.physical_line_partitions();
let _cache_size = cache_size; }
}
}
#[cfg(target_arch = "aarch64")]
{
if let Ok(entries) = std::fs::read_dir("/sys/devices/system/cpu/cpu0/cache") {
for entry in entries.flatten() {
if let Ok(coherency_str) = std::fs::read_to_string(entry.path().join("coherency_line_size")) {
if let Ok(coherency) = coherency_str.trim().parse::<usize>() {
cache_line_size = coherency;
}
}
if let Ok(_level_str) = std::fs::read_to_string(entry.path().join("level")) {
if let Ok(_size_str) = std::fs::read_to_string(entry.path().join("size")) {
}
}
}
}
}
(cache_line_size, l1_size, l2_size, l3_size)
}
fn parse_cache_size(&self, size_str: &str) -> Result<usize, std::num::ParseIntError> {
let trimmed = size_str.trim().to_uppercase();
if trimmed.ends_with('K') {
let num = trimmed.trim_end_matches('K').parse::<usize>()?;
Ok(num * 1024)
} else if trimmed.ends_with('M') {
let num = trimmed.trim_end_matches('M').parse::<usize>()?;
Ok(num * 1024 * 1024)
} else {
trimmed.parse::<usize>()
}
}
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
fn get_auxval_features(&self) -> Result<u64, std::io::Error> {
Ok(0)
}
fn get_cpu_info(&self) -> (String, String) {
#[cfg(target_arch = "x86_64")]
{
let cpuid = raw_cpuid::CpuId::new();
let vendor = cpuid.get_vendor_info()
.map(|v| v.as_str().to_string())
.unwrap_or_else(|| "Unknown".to_string());
let model = cpuid.get_processor_brand_string()
.map(|b| b.as_str().to_string())
.unwrap_or_else(|| "Unknown".to_string());
(vendor, model)
}
#[cfg(target_arch = "aarch64")]
{
if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
let mut vendor = "ARM".to_string();
let mut model = "Unknown".to_string();
for line in cpuinfo.lines() {
if line.starts_with("CPU implementer") {
if line.contains("0x41") {
vendor = "ARM".to_string();
} else if line.contains("0x51") {
vendor = "Qualcomm".to_string();
}
} else if line.starts_with("model name") {
if let Some(name) = line.split(':').nth(1) {
model = name.trim().to_string();
}
}
}
return (vendor, model);
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
("Unknown".to_string(), "Unknown".to_string())
}
}
fn get_core_count(&self) -> (usize, usize) {
let logical_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let physical_cores = logical_cores;
#[cfg(target_arch = "x86_64")]
{
let cpuid = raw_cpuid::CpuId::new();
if let Some(feature_info) = cpuid.get_feature_info() {
if feature_info.has_htt() {
return (logical_cores, logical_cores / 2);
}
}
}
(logical_cores, physical_cores)
}
fn get_cache_info(&self) -> (usize, usize, usize, usize) {
let mut cache_line_size = 64; let mut l1_size = 32 * 1024; let mut l2_size = 256 * 1024; let mut l3_size = 8 * 1024 * 1024;
#[cfg(target_arch = "x86_64")]
{
let cpuid = raw_cpuid::CpuId::new();
if let Some(mut cache_params) = cpuid.get_cache_parameters() {
if let Some(cache) = cache_params.next() {
cache_line_size = cache.coherency_line_size() as usize;
}
}
}
(cache_line_size, l1_size, l2_size, l3_size)
}
}
static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
pub fn get_cpu_features() -> &'static CpuFeatures {
CPU_FEATURES.get_or_init(|| {
RuntimeCpuFeatures::new().detect_features()
})
}
pub fn has_cpu_feature(feature: CpuFeature) -> bool {
get_cpu_features().has_feature(feature)
}
pub fn detect_and_configure_simd() -> &'static CpuFeatures {
get_cpu_features()
}
pub fn get_optimal_simd_strategy() -> SimdStrategy {
let features = get_cpu_features();
SimdStrategy {
rank_select_variant: features.optimal_rank_select_variant(),
string_search_variant: features.optimal_string_search_variant(),
memcpy_variant: features.optimal_memcpy_variant(),
chunk_size: features.recommended_chunk_size(),
alignment: features.recommended_alignment(),
use_prefetch: features.should_use_prefetch(),
optimization_tier: features.optimization_tier,
simd_tier: features.simd_tier,
}
}
#[derive(Debug, Clone)]
pub struct SimdStrategy {
pub rank_select_variant: &'static str,
pub string_search_variant: &'static str,
pub memcpy_variant: &'static str,
pub chunk_size: usize,
pub alignment: usize,
pub use_prefetch: bool,
pub optimization_tier: u8,
pub simd_tier: u8,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cpu_feature_detection() {
let features = get_cpu_features();
assert!(features.logical_cores > 0);
assert!(features.physical_cores > 0);
assert!(features.cache_line_size > 0);
assert!(!features.vendor.is_empty());
assert!(features.simd_tier <= 4);
println!("CPU: {} {}", features.vendor, features.model);
println!("Cores: {} logical, {} physical", features.logical_cores, features.physical_cores);
println!("Cache line size: {} bytes", features.cache_line_size);
println!("SIMD tier: {}", features.simd_tier);
println!("Optimal rank/select: {}", features.optimal_rank_select_variant());
println!("Optimal base64: {}", features.optimal_base64_variant());
}
#[test]
fn test_advanced_cpu_feature_detection() {
let advanced_features = get_cpu_features();
assert!(advanced_features.logical_cores > 0);
assert!(advanced_features.physical_cores > 0);
assert!(advanced_features.cache_line_size > 0);
assert!(!advanced_features.vendor.is_empty());
assert!(advanced_features.optimization_tier <= 5);
assert!(advanced_features.simd_tier <= 4);
println!("=== Advanced CPU Features ===");
println!("CPU: {} {}", advanced_features.vendor, advanced_features.model);
println!("Cores: {} logical, {} physical", advanced_features.logical_cores, advanced_features.physical_cores);
println!("Cache: L1={}, L2={}, L3={}, Line={}",
advanced_features.l1_cache_size,
advanced_features.l2_cache_size,
advanced_features.l3_cache_size,
advanced_features.cache_line_size);
println!("x86_64 Features:");
println!(" SSE4.1: {}, SSE4.2: {}", advanced_features.has_sse41, advanced_features.has_sse42);
println!(" AVX: {}, AVX2: {}", advanced_features.has_avx, advanced_features.has_avx2);
println!(" AVX-512F: {}, AVX-512VL: {}, AVX-512BW: {}",
advanced_features.has_avx512f, advanced_features.has_avx512vl, advanced_features.has_avx512bw);
println!(" BMI1: {}, BMI2: {}", advanced_features.has_bmi1, advanced_features.has_bmi2);
println!(" POPCNT: {}, LZCNT: {}, TZCNT: {}",
advanced_features.has_popcnt, advanced_features.has_lzcnt, advanced_features.has_tzcnt);
println!("ARM64 Features:");
println!(" NEON: {}, CRC32: {}", advanced_features.has_neon, advanced_features.has_crc32);
println!(" Crypto: {}, SVE: {}, SVE2: {}",
advanced_features.has_crypto, advanced_features.has_sve, advanced_features.has_sve2);
println!("Optimization: Tier={}, SIMD={}",
advanced_features.optimization_tier, advanced_features.simd_tier);
println!("Optimal rank/select: {}", advanced_features.optimal_rank_select_variant());
println!("Optimal string search: {}", advanced_features.optimal_string_search_variant());
println!("Optimal memcpy: {}", advanced_features.optimal_memcpy_variant());
println!("Recommended chunk size: {} bytes", advanced_features.recommended_chunk_size());
println!("Recommended alignment: {} bytes", advanced_features.recommended_alignment());
println!("Use prefetch: {}", advanced_features.should_use_prefetch());
}
#[test]
fn test_simd_strategy() {
let strategy = get_optimal_simd_strategy();
assert!(!strategy.rank_select_variant.is_empty());
assert!(!strategy.string_search_variant.is_empty());
assert!(!strategy.memcpy_variant.is_empty());
assert!(strategy.chunk_size >= 4096);
assert!(strategy.chunk_size <= 65536);
assert!(strategy.alignment >= 8);
assert!(strategy.alignment <= 64);
assert!(strategy.alignment.is_power_of_two());
assert!(strategy.optimization_tier <= 5);
assert!(strategy.simd_tier <= 4);
println!("=== SIMD Strategy ===");
println!("Rank/Select: {}", strategy.rank_select_variant);
println!("String Search: {}", strategy.string_search_variant);
println!("Memory Copy: {}", strategy.memcpy_variant);
println!("Chunk Size: {} bytes", strategy.chunk_size);
println!("Alignment: {} bytes", strategy.alignment);
println!("Use Prefetch: {}", strategy.use_prefetch);
println!("Optimization Tier: {}", strategy.optimization_tier);
println!("SIMD Tier: {}", strategy.simd_tier);
}
#[test]
fn test_detect_and_configure_simd() {
let features = detect_and_configure_simd();
let global_features = get_cpu_features();
assert_eq!(features.optimization_tier, global_features.optimization_tier);
assert_eq!(features.simd_tier, global_features.simd_tier);
assert!(features.optimization_tier <= 5);
assert!(features.simd_tier <= 4);
println!("SIMD Configuration Complete:");
println!(" Optimization Tier: {}", features.optimization_tier);
println!(" SIMD Tier: {}", features.simd_tier);
}
#[test]
fn test_has_cpu_feature() {
let _has_popcnt = has_cpu_feature(CpuFeature::POPCNT);
let _has_avx2 = has_cpu_feature(CpuFeature::AVX2);
}
#[test]
fn test_feature_set_methods() {
let features = get_cpu_features();
let rank_select_variant = features.optimal_rank_select_variant();
assert!(["scalar", "popcnt", "avx2", "bmi2_avx2", "avx512", "neon"].contains(&rank_select_variant));
let base64_variant = features.optimal_base64_variant();
assert!(["scalar", "sse42", "avx2", "neon"].contains(&base64_variant));
let alignment = features.recommended_alignment();
assert!(alignment >= 8 && alignment <= 64);
assert!(alignment.is_power_of_two());
}
#[test]
fn test_memory_access_patterns() {
let features = get_cpu_features();
let _has_optimal = features.has_optimal_memory_access();
assert!(features.cache_line_size >= 32 && features.cache_line_size <= 128);
}
}