Skip to main content

laminar_core/detect/
cpu.rs

1//! # CPU Feature Detection
2//!
3//! Detects CPU capabilities including SIMD instructions and cache configuration.
4//!
5//! ## Usage
6//!
7//! ```rust,ignore
8//! use laminar_core::detect::{CpuFeatures, cache_line_size};
9//!
10//! let features = CpuFeatures::detect();
11//! if features.avx2 {
12//!     println!("AVX2 is available!");
13//! }
14//!
15//! let cache_line = cache_line_size();
16//! println!("Cache line size: {} bytes", cache_line);
17//! ```
18
19/// CPU feature flags.
20///
21/// Detected using CPUID on x86/x86\_64 or equivalent on other architectures.
22#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
23#[allow(clippy::struct_excessive_bools)]
24pub struct CpuFeatures {
25    /// SSE4.2 support (CRC32, string compare)
26    pub sse4_2: bool,
27    /// AVX2 support (256-bit SIMD)
28    pub avx2: bool,
29    /// AVX-512 Foundation support (512-bit SIMD)
30    pub avx512f: bool,
31    /// AVX-512 VBMI2 support (byte/word manipulation)
32    pub avx512vbmi2: bool,
33    /// POPCNT support (population count)
34    pub popcnt: bool,
35    /// AES-NI support (hardware AES)
36    pub aes_ni: bool,
37    /// CLMUL support (carryless multiply, used for CRC)
38    pub clmul: bool,
39    /// BMI1 support (bit manipulation)
40    pub bmi1: bool,
41    /// BMI2 support (bit manipulation)
42    pub bmi2: bool,
43    /// LZCNT support (leading zero count)
44    pub lzcnt: bool,
45    /// NEON support (ARM SIMD)
46    pub neon: bool,
47    /// CRC32 hardware support (ARM)
48    pub arm_crc32: bool,
49}
50
51impl CpuFeatures {
52    /// Detect CPU features for the current processor.
53    #[must_use]
54    pub fn detect() -> Self {
55        let mut features = Self::default();
56
57        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
58        {
59            features.detect_x86();
60        }
61
62        #[cfg(target_arch = "aarch64")]
63        {
64            features.detect_aarch64();
65        }
66
67        features
68    }
69
70    /// Detect features on x86/x86\_64 using CPUID.
71    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
72    fn detect_x86(&mut self) {
73        // Use std::arch::is_x86_feature_detected! macro for reliable detection
74        self.sse4_2 = std::arch::is_x86_feature_detected!("sse4.2");
75        self.avx2 = std::arch::is_x86_feature_detected!("avx2");
76        self.avx512f = std::arch::is_x86_feature_detected!("avx512f");
77        self.avx512vbmi2 = std::arch::is_x86_feature_detected!("avx512vbmi2");
78        self.popcnt = std::arch::is_x86_feature_detected!("popcnt");
79        self.aes_ni = std::arch::is_x86_feature_detected!("aes");
80        self.clmul = std::arch::is_x86_feature_detected!("pclmulqdq");
81        self.bmi1 = std::arch::is_x86_feature_detected!("bmi1");
82        self.bmi2 = std::arch::is_x86_feature_detected!("bmi2");
83        self.lzcnt = std::arch::is_x86_feature_detected!("lzcnt");
84    }
85
86    /// Detect features on AArch64 (ARM64).
87    #[cfg(target_arch = "aarch64")]
88    fn detect_aarch64(&mut self) {
89        // NEON is mandatory on AArch64
90        self.neon = true;
91        // CRC32 is common but not universal
92        self.arm_crc32 = std::arch::is_aarch64_feature_detected!("crc");
93    }
94
95    /// Check if SIMD acceleration is available.
96    #[must_use]
97    pub fn has_simd(&self) -> bool {
98        self.avx2 || self.avx512f || self.neon
99    }
100
101    /// Check if hardware CRC32 is available.
102    #[must_use]
103    pub fn has_hw_crc32(&self) -> bool {
104        self.sse4_2 || self.arm_crc32
105    }
106
107    /// Check if hardware AES is available.
108    #[must_use]
109    pub fn has_hw_aes(&self) -> bool {
110        self.aes_ni
111    }
112
113    /// Get a summary of SIMD capabilities.
114    #[must_use]
115    pub fn simd_level(&self) -> SimdLevel {
116        if self.avx512f {
117            SimdLevel::Avx512
118        } else if self.avx2 {
119            SimdLevel::Avx2
120        } else if self.sse4_2 {
121            SimdLevel::Sse42
122        } else if self.neon {
123            SimdLevel::Neon
124        } else {
125            SimdLevel::None
126        }
127    }
128
129    /// Get a summary string.
130    #[must_use]
131    pub fn summary(&self) -> String {
132        let mut features = Vec::new();
133
134        if self.avx512f {
135            features.push("AVX-512");
136        } else if self.avx2 {
137            features.push("AVX2");
138        } else if self.sse4_2 {
139            features.push("SSE4.2");
140        }
141
142        if self.neon {
143            features.push("NEON");
144        }
145
146        if self.aes_ni {
147            features.push("AES-NI");
148        }
149
150        if self.popcnt {
151            features.push("POPCNT");
152        }
153
154        if features.is_empty() {
155            "None".to_string()
156        } else {
157            features.join(", ")
158        }
159    }
160}
161
162/// SIMD capability level.
163#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
164pub enum SimdLevel {
165    /// No SIMD support
166    None,
167    /// ARM NEON (128-bit)
168    Neon,
169    /// x86 SSE4.2 (128-bit)
170    Sse42,
171    /// x86 AVX2 (256-bit)
172    Avx2,
173    /// x86 AVX-512 (512-bit)
174    Avx512,
175}
176
177impl std::fmt::Display for SimdLevel {
178    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179        match self {
180            SimdLevel::None => write!(f, "None"),
181            SimdLevel::Neon => write!(f, "NEON"),
182            SimdLevel::Sse42 => write!(f, "SSE4.2"),
183            SimdLevel::Avx2 => write!(f, "AVX2"),
184            SimdLevel::Avx512 => write!(f, "AVX-512"),
185        }
186    }
187}
188
189/// Detect the cache line size.
190///
191/// Returns 64 bytes as the default, which is correct for most modern processors.
192#[must_use]
193pub fn cache_line_size() -> usize {
194    detect_cache_line_size().unwrap_or(64)
195}
196
197/// Attempt to detect the cache line size from system information.
198fn detect_cache_line_size() -> Option<usize> {
199    // Try Linux sysfs first
200    #[cfg(target_os = "linux")]
201    {
202        if let Some(size) = detect_cache_line_sysfs() {
203            return Some(size);
204        }
205    }
206
207    // Try CPUID on x86
208    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
209    {
210        if let Some(size) = detect_cache_line_cpuid() {
211            return Some(size);
212        }
213    }
214
215    None
216}
217
218/// Detect cache line size from Linux sysfs.
219#[cfg(target_os = "linux")]
220fn detect_cache_line_sysfs() -> Option<usize> {
221    // Try L1 data cache first
222    let paths = [
223        "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size",
224        "/sys/devices/system/cpu/cpu0/cache/index1/coherency_line_size",
225        "/sys/devices/system/cpu/cpu0/cache/index2/coherency_line_size",
226    ];
227
228    for path in &paths {
229        if let Ok(content) = std::fs::read_to_string(path) {
230            if let Ok(size) = content.trim().parse::<usize>() {
231                return Some(size);
232            }
233        }
234    }
235
236    None
237}
238
239/// Detect cache line size using CPUID.
240#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
241fn detect_cache_line_cpuid() -> Option<usize> {
242    // CPUID leaf 0x80000006 (AMD-style cache info)
243    // On Intel, this also typically works
244
245    #[cfg(target_arch = "x86")]
246    use std::arch::x86::__cpuid;
247    #[cfg(target_arch = "x86_64")]
248    use std::arch::x86_64::__cpuid;
249
250    // Check if extended CPUID is supported
251    // SAFETY: CPUID is a safe instruction on x86
252    let max_extended = unsafe { __cpuid(0x8000_0000) }.eax;
253
254    if max_extended >= 0x8000_0006 {
255        // SAFETY: CPUID is a safe instruction on x86
256        let result = unsafe { __cpuid(0x8000_0006) };
257        // ECX bits 0-7 contain L2 cache line size
258        let line_size = (result.ecx & 0xFF) as usize;
259        if line_size > 0 {
260            return Some(line_size);
261        }
262    }
263
264    None
265}
266
267/// Get the number of logical CPUs.
268#[must_use]
269pub fn logical_cpu_count() -> usize {
270    num_cpus::get()
271}
272
273/// Get the number of physical CPU cores.
274#[must_use]
275pub fn physical_cpu_count() -> usize {
276    num_cpus::get_physical()
277}
278
279/// Check if SMT (Hyper-Threading) is enabled.
280#[must_use]
281pub fn is_smt_enabled() -> bool {
282    logical_cpu_count() > physical_cpu_count()
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn test_cpu_features_detect() {
291        let features = CpuFeatures::detect();
292
293        // At minimum, we should have some features on x86
294        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
295        {
296            // SSE4.2 and POPCNT are available on any CPU from the last 15+ years
297            // This test should pass on any reasonably modern system
298            assert!(
299                features.sse4_2 || features.popcnt || features.avx2,
300                "Expected at least one common x86 feature to be available"
301            );
302        }
303
304        // On ARM64, NEON is mandatory
305        #[cfg(target_arch = "aarch64")]
306        {
307            assert!(features.neon);
308        }
309    }
310
311    #[test]
312    fn test_cpu_features_simd_level() {
313        let features = CpuFeatures::detect();
314        let level = features.simd_level();
315
316        // simd_level should return a valid level
317        match level {
318            SimdLevel::None
319            | SimdLevel::Neon
320            | SimdLevel::Sse42
321            | SimdLevel::Avx2
322            | SimdLevel::Avx512 => {}
323        }
324
325        // Display should work
326        let level_str = format!("{level}");
327        assert!(!level_str.is_empty());
328    }
329
330    #[test]
331    fn test_cpu_features_summary() {
332        let features = CpuFeatures::detect();
333        let summary = features.summary();
334
335        // Summary should be non-empty (at least "None")
336        assert!(!summary.is_empty());
337    }
338
339    #[test]
340    fn test_cpu_features_default() {
341        let features = CpuFeatures::default();
342        assert!(!features.sse4_2);
343        assert!(!features.avx2);
344        assert!(!features.avx512f);
345        assert!(!features.neon);
346    }
347
348    #[test]
349    fn test_cache_line_size() {
350        let size = cache_line_size();
351        // Cache line size should be a reasonable power of 2
352        assert!(size >= 32);
353        assert!(size <= 256);
354        assert!(size.is_power_of_two());
355    }
356
357    #[test]
358    fn test_logical_cpu_count() {
359        let count = logical_cpu_count();
360        assert!(count >= 1);
361    }
362
363    #[test]
364    fn test_physical_cpu_count() {
365        let count = physical_cpu_count();
366        assert!(count >= 1);
367        // Physical count should not exceed logical
368        assert!(count <= logical_cpu_count());
369    }
370
371    #[test]
372    fn test_is_smt_enabled() {
373        // Just ensure this doesn't panic
374        let _ = is_smt_enabled();
375    }
376
377    #[test]
378    fn test_simd_level_ordering() {
379        assert!(SimdLevel::None < SimdLevel::Neon);
380        assert!(SimdLevel::Neon < SimdLevel::Sse42);
381        assert!(SimdLevel::Sse42 < SimdLevel::Avx2);
382        assert!(SimdLevel::Avx2 < SimdLevel::Avx512);
383    }
384
385    #[test]
386    fn test_has_simd() {
387        let mut features = CpuFeatures::default();
388        assert!(!features.has_simd());
389
390        features.avx2 = true;
391        assert!(features.has_simd());
392
393        features = CpuFeatures::default();
394        features.neon = true;
395        assert!(features.has_simd());
396    }
397
398    #[test]
399    fn test_has_hw_crc32() {
400        let mut features = CpuFeatures::default();
401        assert!(!features.has_hw_crc32());
402
403        features.sse4_2 = true;
404        assert!(features.has_hw_crc32());
405
406        features = CpuFeatures::default();
407        features.arm_crc32 = true;
408        assert!(features.has_hw_crc32());
409    }
410}