scirs2_core/simd/detect.rs
1//! CPU feature detection and SIMD capability management
2//!
3//! This module provides runtime detection of SIMD capabilities and manages
4//! CPU feature information through a cached singleton pattern for optimal performance.
5
6use std::sync::OnceLock;
7
8/// CPU feature flags detected at runtime
9///
10/// This struct caches the results of CPU feature detection to avoid repeated
11/// runtime checks. It is initialized once and shared across all SIMD operations.
12#[derive(Debug, Clone)]
13pub struct CpuFeatures {
14 /// AVX-512F (512-bit SIMD) support
15 pub has_avx512f: bool,
16 /// AVX2 (256-bit SIMD) support
17 pub has_avx2: bool,
18 /// SSE (128-bit SIMD) support
19 pub has_sse: bool,
20 /// FMA (Fused Multiply-Add) support
21 pub has_fma: bool,
22 /// NEON (ARM SIMD) support
23 pub has_neon: bool,
24}
25
26static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
27
28/// Get CPU features with lazy initialization
29///
30/// This function returns a static reference to CPU features, initializing
31/// them on first call. Subsequent calls return the cached result.
32///
33/// # Returns
34///
35/// A static reference to `CpuFeatures` containing detected CPU capabilities.
36pub fn get_cpu_features() -> &'static CpuFeatures {
37 CPU_FEATURES.get_or_init(|| {
38 #[cfg(target_arch = "x86_64")]
39 {
40 CpuFeatures {
41 has_avx512f: std::arch::is_x86_feature_detected!("avx512f"),
42 has_avx2: std::arch::is_x86_feature_detected!("avx2"),
43 has_sse: std::arch::is_x86_feature_detected!("sse"),
44 has_fma: std::arch::is_x86_feature_detected!("fma"),
45 has_neon: false,
46 }
47 }
48 #[cfg(target_arch = "aarch64")]
49 {
50 CpuFeatures {
51 has_avx512f: false,
52 has_avx2: false,
53 has_sse: false,
54 has_fma: false, // ARM uses different FMA instructions
55 has_neon: std::arch::is_aarch64_feature_detected!("neon"),
56 }
57 }
58 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
59 {
60 CpuFeatures {
61 has_avx512f: false,
62 has_avx2: false,
63 has_sse: false,
64 has_fma: false,
65 has_neon: false,
66 }
67 }
68 })
69}
70
71/// Extended SIMD capabilities including cache information
72///
73/// This struct provides detailed information about the system's SIMD capabilities
74/// including vector widths, cache sizes, and optimal prefetch distances.
75#[derive(Debug, Clone)]
76pub struct SimdCapabilities {
77 /// AVX2 (256-bit SIMD) support
78 pub has_avx2: bool,
79 /// AVX-512 (512-bit SIMD) support
80 pub has_avx512: bool,
81 /// FMA (Fused Multiply-Add) support
82 pub has_fma: bool,
83 /// SSE4.2 support
84 pub has_sse42: bool,
85 /// BMI2 (Bit Manipulation Instructions 2) support
86 pub has_bmi2: bool,
87 /// Number of f32 elements that can be processed in parallel
88 pub vector_width_f32: usize,
89 /// Number of f64 elements that can be processed in parallel
90 pub vector_width_f64: usize,
91 /// CPU cache line size in bytes
92 pub cache_line_size: usize,
93 /// L1 cache size in bytes
94 pub l1_cache_size: usize,
95 /// L2 cache size in bytes
96 pub l2_cache_size: usize,
97 /// Prefetch distance in cache lines
98 pub prefetch_distance: usize,
99}
100
101impl Default for SimdCapabilities {
102 fn default() -> Self {
103 let cpu_features = get_cpu_features();
104
105 Self {
106 // Use detected CPU features
107 has_avx2: cpu_features.has_avx2,
108 has_avx512: cpu_features.has_avx512f,
109 has_fma: cpu_features.has_fma,
110 has_sse42: cpu_features.has_sse,
111 has_bmi2: false, // Conservative default, would need specific detection
112 vector_width_f32: if cpu_features.has_avx512f {
113 16 // AVX-512 can process 16 f32s
114 } else if cpu_features.has_avx2 {
115 8 // AVX2 can process 8 f32s
116 } else if cpu_features.has_sse || cpu_features.has_neon {
117 4 // SSE/NEON can process 4 f32s
118 } else {
119 1 // Scalar fallback
120 },
121 vector_width_f64: if cpu_features.has_avx512f {
122 8 // AVX-512 can process 8 f64s
123 } else if cpu_features.has_avx2 {
124 4 // AVX2 can process 4 f64s
125 } else if cpu_features.has_sse || cpu_features.has_neon {
126 2 // SSE/NEON can process 2 f64s
127 } else {
128 1 // Scalar fallback
129 },
130 cache_line_size: 64, // Typical cache line size
131 l1_cache_size: 32768, // 32KB typical L1 cache
132 l2_cache_size: 262144, // 256KB typical L2 cache
133 prefetch_distance: 16, // Prefetch 16 cache lines ahead
134 }
135 }
136}
137
138/// Detect SIMD capabilities for the current system
139///
140/// This function returns detailed SIMD capabilities including vector widths,
141/// cache information, and supported instruction sets.
142///
143/// # Returns
144///
145/// A `SimdCapabilities` struct containing detailed system capabilities.
146///
147/// # Examples
148///
149/// ```ignore
150/// use scirs2_core::simd::detect::detect_simd_capabilities;
151///
152/// let caps = detect_simd_capabilities();
153/// println!("Vector width for f32: {}", caps.vector_width_f32);
154/// println!("Has AVX2: {}", caps.has_avx2);
155/// ```
156#[allow(dead_code)]
157pub fn detect_simd_capabilities() -> SimdCapabilities {
158 SimdCapabilities::default()
159}