preemptive_threads/arch/
detection.rs

1//! Architecture detection and runtime optimization.
2//!
3//! This module provides runtime detection of CPU features and capabilities
4//! to enable architecture-specific optimizations.
5
6use portable_atomic::{AtomicBool, Ordering};
7
8/// CPU architecture types.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum CpuArch {
11    X86_64,
12    Aarch64,
13    RiscV64,
14    Unknown,
15}
16
17/// CPU feature flags for different architectures.
18#[derive(Debug, Clone, Copy)]
19pub struct CpuFeatures {
20    pub arch: CpuArch,
21    pub cache_line_size: u32,
22    pub cpu_cores: u32,
23    pub supports_fpu: bool,
24    pub supports_vector: bool,
25    pub supports_atomic_cas: bool,
26    pub supports_memory_ordering: bool,
27    
28    // x86_64-specific features
29    #[cfg(feature = "x86_64")]
30    pub supports_sse: bool,
31    #[cfg(feature = "x86_64")]
32    pub supports_avx: bool,
33    #[cfg(feature = "x86_64")]
34    pub supports_avx512: bool,
35    
36    // ARM64-specific features
37    #[cfg(feature = "arm64")]
38    pub supports_neon: bool,
39    #[cfg(feature = "arm64")]
40    pub supports_sve: bool,
41    #[cfg(feature = "arm64")]
42    pub supports_sve2: bool,
43    
44    // RISC-V-specific features
45    #[cfg(feature = "riscv64")]
46    pub supports_riscv_f: bool,
47    #[cfg(feature = "riscv64")]
48    pub supports_riscv_d: bool,
49    #[cfg(feature = "riscv64")]
50    pub supports_riscv_v: bool,
51}
52
53static CPU_FEATURES: spin::Mutex<Option<CpuFeatures>> = spin::Mutex::new(None);
54static DETECTION_DONE: AtomicBool = AtomicBool::new(false);
55
56/// Detect current CPU architecture and features.
57pub fn detect_cpu_features() -> CpuFeatures {
58    // Fast path - check if already detected
59    if DETECTION_DONE.load(Ordering::Acquire) {
60        let guard = CPU_FEATURES.lock();
61        if let Some(features) = *guard {
62            return features;
63        }
64    }
65    
66    // Slow path - perform detection
67    let features = perform_detection();
68    
69    // Store results
70    {
71        let mut guard = CPU_FEATURES.lock();
72        *guard = Some(features);
73    }
74    DETECTION_DONE.store(true, Ordering::Release);
75    
76    features
77}
78
79/// Get cached CPU features (must call detect_cpu_features first).
80pub fn get_cpu_features() -> Option<CpuFeatures> {
81    if DETECTION_DONE.load(Ordering::Acquire) {
82        let guard = CPU_FEATURES.lock();
83        *guard
84    } else {
85        None
86    }
87}
88
89/// Internal CPU feature detection.
90fn perform_detection() -> CpuFeatures {
91    let arch = detect_architecture();
92    let cache_line_size = detect_cache_line_size(arch);
93    let cpu_cores = detect_cpu_cores();
94    
95    CpuFeatures {
96        arch,
97        cache_line_size,
98        cpu_cores,
99        supports_fpu: detect_fpu_support(arch),
100        supports_vector: detect_vector_support(arch),
101        supports_atomic_cas: detect_atomic_cas_support(arch),
102        supports_memory_ordering: detect_memory_ordering_support(arch),
103        
104        #[cfg(feature = "x86_64")]
105        supports_sse: detect_x86_64_sse(),
106        #[cfg(feature = "x86_64")]
107        supports_avx: detect_x86_64_avx(),
108        #[cfg(feature = "x86_64")]
109        supports_avx512: detect_x86_64_avx512(),
110        
111        #[cfg(feature = "arm64")]
112        supports_neon: detect_arm64_neon(),
113        #[cfg(feature = "arm64")]
114        supports_sve: detect_arm64_sve(),
115        #[cfg(feature = "arm64")]
116        supports_sve2: detect_arm64_sve2(),
117        
118        #[cfg(feature = "riscv64")]
119        supports_riscv_f: detect_riscv_f_extension(),
120        #[cfg(feature = "riscv64")]
121        supports_riscv_d: detect_riscv_d_extension(),
122        #[cfg(feature = "riscv64")]
123        supports_riscv_v: detect_riscv_v_extension(),
124    }
125}
126
127/// Detect the current CPU architecture.
128fn detect_architecture() -> CpuArch {
129    #[cfg(target_arch = "x86_64")]
130    return CpuArch::X86_64;
131    
132    #[cfg(target_arch = "aarch64")]
133    return CpuArch::Aarch64;
134    
135    #[cfg(target_arch = "riscv64")]
136    return CpuArch::RiscV64;
137    
138    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "riscv64")))]
139    return CpuArch::Unknown;
140}
141
142/// Detect cache line size for the current architecture.
143fn detect_cache_line_size(arch: CpuArch) -> u32 {
144    match arch {
145        CpuArch::X86_64 => {
146            #[cfg(feature = "x86_64")]
147            return detect_x86_64_cache_line_size();
148            #[cfg(not(feature = "x86_64"))]
149            return 64;
150        }
151        CpuArch::Aarch64 => {
152            #[cfg(feature = "arm64")]
153            return detect_arm64_cache_line_size();
154            #[cfg(not(feature = "arm64"))]
155            return 64;
156        }
157        CpuArch::RiscV64 => {
158            #[cfg(feature = "riscv64")]
159            return detect_riscv_cache_line_size();
160            #[cfg(not(feature = "riscv64"))]
161            return 64;
162        }
163        CpuArch::Unknown => 64, // Safe default
164    }
165}
166
167/// Detect number of CPU cores.
168fn detect_cpu_cores() -> u32 {
169    // This would typically read from system information
170    // For now, return a safe default
171    1
172}
173
174/// Detect FPU support for the given architecture.
175fn detect_fpu_support(arch: CpuArch) -> bool {
176    match arch {
177        CpuArch::X86_64 => true, // x86_64 always has FPU
178        CpuArch::Aarch64 => true, // ARM64 always has FPU
179        CpuArch::RiscV64 => {
180            #[cfg(feature = "riscv64")]
181            return detect_riscv_f_extension();
182            #[cfg(not(feature = "riscv64"))]
183            return false;
184        }
185        CpuArch::Unknown => false,
186    }
187}
188
189/// Detect vector instruction support.
190fn detect_vector_support(arch: CpuArch) -> bool {
191    match arch {
192        CpuArch::X86_64 => {
193            #[cfg(feature = "x86_64")]
194            return detect_x86_64_sse();
195            #[cfg(not(feature = "x86_64"))]
196            return false;
197        }
198        CpuArch::Aarch64 => {
199            #[cfg(feature = "arm64")]
200            return detect_arm64_neon();
201            #[cfg(not(feature = "arm64"))]
202            return false;
203        }
204        CpuArch::RiscV64 => {
205            #[cfg(feature = "riscv64")]
206            return detect_riscv_v_extension();
207            #[cfg(not(feature = "riscv64"))]
208            return false;
209        }
210        CpuArch::Unknown => false,
211    }
212}
213
214/// Detect atomic compare-and-swap support.
215fn detect_atomic_cas_support(arch: CpuArch) -> bool {
216    match arch {
217        CpuArch::X86_64 => true, // x86_64 always supports CAS
218        CpuArch::Aarch64 => true, // ARM64 always supports CAS
219        CpuArch::RiscV64 => true, // RISC-V with A extension (assumed)
220        CpuArch::Unknown => false,
221    }
222}
223
224/// Detect memory ordering instruction support.
225fn detect_memory_ordering_support(arch: CpuArch) -> bool {
226    match arch {
227        CpuArch::X86_64 => true, // Has mfence, lfence, sfence
228        CpuArch::Aarch64 => true, // Has dsb, dmb, isb
229        CpuArch::RiscV64 => true, // Has fence
230        CpuArch::Unknown => false,
231    }
232}
233
234// x86_64-specific detection functions
235#[cfg(feature = "x86_64")]
236fn detect_x86_64_cache_line_size() -> u32 {
237    // Could use CPUID to detect, for now return common size
238    64
239}
240
241#[cfg(feature = "x86_64")]
242fn detect_x86_64_sse() -> bool {
243    // Would use CPUID instruction to check SSE support
244    // For now, assume true on x86_64
245    true
246}
247
248#[cfg(feature = "x86_64")]
249fn detect_x86_64_avx() -> bool {
250    // Would use CPUID to check AVX support
251    // For now, return false as not all systems have AVX
252    false
253}
254
255#[cfg(feature = "x86_64")]
256fn detect_x86_64_avx512() -> bool {
257    // Would use CPUID to check AVX-512 support
258    // For now, return false as AVX-512 is less common
259    false
260}
261
262// ARM64-specific detection functions
263#[cfg(feature = "arm64")]
264fn detect_arm64_cache_line_size() -> u32 {
265    // Could read from system registers
266    64
267}
268
269#[cfg(not(feature = "arm64"))]
270fn detect_arm64_cache_line_size() -> u32 {
271    64
272}
273
274#[cfg(feature = "arm64")]
275fn detect_arm64_neon() -> bool {
276    // ARM64 always has NEON
277    true
278}
279
280#[cfg(feature = "arm64")]
281fn detect_arm64_sve() -> bool {
282    // Would check ID_AA64PFR0_EL1 register for SVE support
283    // For now, return false as not all ARM64 systems have SVE
284    false
285}
286
287#[cfg(feature = "arm64")]
288fn detect_arm64_sve2() -> bool {
289    // Would check for SVE2 support in system registers
290    false
291}
292
293// RISC-V-specific detection functions
294#[cfg(feature = "riscv64")]
295fn detect_riscv_cache_line_size() -> u32 {
296    // RISC-V cache line size varies by implementation
297    64 // Common default
298}
299
300#[cfg(not(feature = "riscv64"))]
301fn detect_riscv_cache_line_size() -> u32 {
302    64
303}
304
305#[cfg(feature = "riscv64")]
306fn detect_riscv_f_extension() -> bool {
307    // Would check misa CSR for F extension
308    // For now, assume based on feature flags
309    cfg!(feature = "riscv-float")
310}
311
312#[cfg(not(feature = "riscv64"))]
313fn detect_riscv_f_extension() -> bool {
314    false
315}
316
317#[cfg(feature = "riscv64")]
318fn detect_riscv_d_extension() -> bool {
319    // Would check misa CSR for D extension
320    cfg!(feature = "riscv-float")
321}
322
323#[cfg(feature = "riscv64")]
324fn detect_riscv_v_extension() -> bool {
325    // Would check for V extension support
326    cfg!(feature = "riscv-vector")
327}
328
329/// Runtime optimization controller.
330pub struct RuntimeOptimizer {
331    features: CpuFeatures,
332}
333
334impl RuntimeOptimizer {
335    /// Create a new runtime optimizer with detected CPU features.
336    pub fn new() -> Self {
337        Self {
338            features: detect_cpu_features(),
339        }
340    }
341    
342    /// Get the detected CPU features.
343    pub fn features(&self) -> &CpuFeatures {
344        &self.features
345    }
346    
347    /// Choose optimal memory barrier implementation.
348    pub fn optimal_memory_barrier(&self) -> fn() {
349        match self.features.arch {
350            CpuArch::X86_64 => {
351                #[cfg(feature = "x86_64")]
352                return crate::arch::x86_64::memory_barrier_full;
353                #[cfg(not(feature = "x86_64"))]
354                return || core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst);
355            }
356            CpuArch::Aarch64 => {
357                #[cfg(feature = "arm64")]
358                return crate::arch::aarch64::memory_barrier_full;
359                #[cfg(not(feature = "arm64"))]
360                return || core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst);
361            }
362            CpuArch::RiscV64 => {
363                #[cfg(feature = "riscv64")]
364                return crate::arch::riscv::memory_barrier_full;
365                #[cfg(not(feature = "riscv64"))]
366                return || core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst);
367            }
368            CpuArch::Unknown => || core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst),
369        }
370    }
371    
372    /// Get optimal cache line size for alignment.
373    pub fn optimal_cache_line_size(&self) -> usize {
374        self.features.cache_line_size as usize
375    }
376    
377    /// Determine if lock-free algorithms should be preferred.
378    pub fn prefer_lock_free(&self) -> bool {
379        self.features.supports_atomic_cas && self.features.supports_memory_ordering
380    }
381    
382    /// Get recommended number of worker threads.
383    pub fn recommended_worker_threads(&self) -> usize {
384        (self.features.cpu_cores as usize).max(1)
385    }
386}
387
388impl Default for RuntimeOptimizer {
389    fn default() -> Self {
390        Self::new()
391    }
392}
393
394/// Global runtime optimizer instance.
395static GLOBAL_OPTIMIZER: spin::Mutex<Option<RuntimeOptimizer>> = spin::Mutex::new(None);
396
397/// Get the global runtime optimizer instance.
398pub fn global_optimizer() -> RuntimeOptimizer {
399    let mut guard = GLOBAL_OPTIMIZER.lock();
400    if let Some(optimizer) = guard.as_ref() {
401        RuntimeOptimizer {
402            features: optimizer.features,
403        }
404    } else {
405        let optimizer = RuntimeOptimizer::new();
406        *guard = Some(RuntimeOptimizer {
407            features: optimizer.features,
408        });
409        optimizer
410    }
411}