scirs2_core/simd/
detect.rs

1//! CPU feature detection and SIMD capability management
2//!
3//! This module provides runtime detection of SIMD capabilities and manages
4//! CPU feature information through a cached singleton pattern for optimal performance.
5
6use std::sync::OnceLock;
7
8/// CPU feature flags detected at runtime
9///
10/// This struct caches the results of CPU feature detection to avoid repeated
11/// runtime checks. It is initialized once and shared across all SIMD operations.
12#[derive(Debug, Clone)]
13pub struct CpuFeatures {
14    /// AVX-512F (512-bit SIMD) support
15    pub has_avx512f: bool,
16    /// AVX2 (256-bit SIMD) support
17    pub has_avx2: bool,
18    /// SSE (128-bit SIMD) support
19    pub has_sse: bool,
20    /// FMA (Fused Multiply-Add) support
21    pub has_fma: bool,
22    /// NEON (ARM SIMD) support
23    pub has_neon: bool,
24}
25
26static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
27
28/// Get CPU features with lazy initialization
29///
30/// This function returns a static reference to CPU features, initializing
31/// them on first call. Subsequent calls return the cached result.
32///
33/// # Returns
34///
35/// A static reference to `CpuFeatures` containing detected CPU capabilities.
36pub fn get_cpu_features() -> &'static CpuFeatures {
37    CPU_FEATURES.get_or_init(|| {
38        #[cfg(target_arch = "x86_64")]
39        {
40            CpuFeatures {
41                has_avx512f: std::arch::is_x86_feature_detected!("avx512f"),
42                has_avx2: std::arch::is_x86_feature_detected!("avx2"),
43                has_sse: std::arch::is_x86_feature_detected!("sse"),
44                has_fma: std::arch::is_x86_feature_detected!("fma"),
45                has_neon: false,
46            }
47        }
48        #[cfg(target_arch = "aarch64")]
49        {
50            CpuFeatures {
51                has_avx512f: false,
52                has_avx2: false,
53                has_sse: false,
54                has_fma: false, // ARM uses different FMA instructions
55                has_neon: std::arch::is_aarch64_feature_detected!("neon"),
56            }
57        }
58        #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
59        {
60            CpuFeatures {
61                has_avx512f: false,
62                has_avx2: false,
63                has_sse: false,
64                has_fma: false,
65                has_neon: false,
66            }
67        }
68    })
69}
70
71/// Extended SIMD capabilities including cache information
72///
73/// This struct provides detailed information about the system's SIMD capabilities
74/// including vector widths, cache sizes, and optimal prefetch distances.
75#[derive(Debug, Clone)]
76pub struct SimdCapabilities {
77    /// AVX2 (256-bit SIMD) support
78    pub has_avx2: bool,
79    /// AVX-512 (512-bit SIMD) support
80    pub has_avx512: bool,
81    /// FMA (Fused Multiply-Add) support
82    pub has_fma: bool,
83    /// SSE4.2 support
84    pub has_sse42: bool,
85    /// BMI2 (Bit Manipulation Instructions 2) support
86    pub has_bmi2: bool,
87    /// Number of f32 elements that can be processed in parallel
88    pub vector_width_f32: usize,
89    /// Number of f64 elements that can be processed in parallel
90    pub vector_width_f64: usize,
91    /// CPU cache line size in bytes
92    pub cache_line_size: usize,
93    /// L1 cache size in bytes
94    pub l1_cache_size: usize,
95    /// L2 cache size in bytes
96    pub l2_cache_size: usize,
97    /// Prefetch distance in cache lines
98    pub prefetch_distance: usize,
99}
100
101impl Default for SimdCapabilities {
102    fn default() -> Self {
103        let cpu_features = get_cpu_features();
104
105        Self {
106            // Use detected CPU features
107            has_avx2: cpu_features.has_avx2,
108            has_avx512: cpu_features.has_avx512f,
109            has_fma: cpu_features.has_fma,
110            has_sse42: cpu_features.has_sse,
111            has_bmi2: false, // Conservative default, would need specific detection
112            vector_width_f32: if cpu_features.has_avx512f {
113                16 // AVX-512 can process 16 f32s
114            } else if cpu_features.has_avx2 {
115                8 // AVX2 can process 8 f32s
116            } else if cpu_features.has_sse || cpu_features.has_neon {
117                4 // SSE/NEON can process 4 f32s
118            } else {
119                1 // Scalar fallback
120            },
121            vector_width_f64: if cpu_features.has_avx512f {
122                8 // AVX-512 can process 8 f64s
123            } else if cpu_features.has_avx2 {
124                4 // AVX2 can process 4 f64s
125            } else if cpu_features.has_sse || cpu_features.has_neon {
126                2 // SSE/NEON can process 2 f64s
127            } else {
128                1 // Scalar fallback
129            },
130            cache_line_size: 64,   // Typical cache line size
131            l1_cache_size: 32768,  // 32KB typical L1 cache
132            l2_cache_size: 262144, // 256KB typical L2 cache
133            prefetch_distance: 16, // Prefetch 16 cache lines ahead
134        }
135    }
136}
137
138/// Detect SIMD capabilities for the current system
139///
140/// This function returns detailed SIMD capabilities including vector widths,
141/// cache information, and supported instruction sets.
142///
143/// # Returns
144///
145/// A `SimdCapabilities` struct containing detailed system capabilities.
146///
147/// # Examples
148///
149/// ```ignore
150/// use scirs2_core::simd::detect::detect_simd_capabilities;
151///
152/// let caps = detect_simd_capabilities();
153/// println!("Vector width for f32: {}", caps.vector_width_f32);
154/// println!("Has AVX2: {}", caps.has_avx2);
155/// ```
156#[allow(dead_code)]
157pub fn detect_simd_capabilities() -> SimdCapabilities {
158    SimdCapabilities::default()
159}