Skip to main content

rustalign_simd/
cpu.rs

1//! CPU feature detection
2//!
3//! This module provides runtime CPU feature detection,
4//! matching the C++ ProcessorSupport class and __builtin_cpu_supports.
5
6/// Detected CPU features for SIMD dispatch
7#[derive(Debug, Clone, Copy)]
8pub struct CpuFeatures {
9    /// SSE2 support
10    pub sse2: bool,
11    /// SSE4.2 support
12    pub sse4_2: bool,
13    /// POPCNT instruction support
14    pub popcnt: bool,
15    /// AVX support
16    pub avx: bool,
17    /// AVX2 support
18    pub avx2: bool,
19    /// NEON support (AArch64 only)
20    #[cfg(target_arch = "aarch64")]
21    pub neon: bool,
22}
23
24impl CpuFeatures {
25    /// Detect CPU features at runtime
26    ///
27    /// This matches the C++ ProcessorSupport::POPCNTenabled() method
28    /// and extends it to detect all SIMD features used by RustAlign.
29    pub fn detect() -> Self {
30        Self {
31            sse2: cfg!(not(target_arch = "aarch64")) && detect_sse2(),
32            sse4_2: cfg!(not(target_arch = "aarch64")) && detect_sse4_2(),
33            popcnt: detect_popcnt(),
34            avx: cfg!(not(target_arch = "aarch64")) && detect_avx(),
35            avx2: cfg!(not(target_arch = "aarch64")) && detect_avx2(),
36            #[cfg(target_arch = "aarch64")]
37            neon: detect_neon(),
38        }
39    }
40
41    /// Check if x86-64-v3 (AVX2) is supported
42    ///
43    /// This matches the C++ __builtin_cpu_supports("x86-64-v3") check.
44    pub fn x86_64_v3(&self) -> bool {
45        self.avx2
46    }
47
48    /// Get the best available SIMD width
49    pub fn simd_width(&self) -> usize {
50        if self.avx2 {
51            32
52        } else if self.sse2 {
53            16
54        } else {
55            0 // Scalar
56        }
57    }
58}
59
60impl Default for CpuFeatures {
61    fn default() -> Self {
62        Self::detect()
63    }
64}
65
66#[cfg(target_arch = "x86_64")]
67#[allow(unused_imports)]
68use std::arch::x86_64::*;
69
70/// Detect SSE2 support (x86_64 only)
71#[cfg(target_arch = "x86_64")]
72fn detect_sse2() -> bool {
73    is_x86_feature_detected!("sse2")
74}
75
76/// Detect SSE4.2 support (x86_64 only)
77#[cfg(target_arch = "x86_64")]
78fn detect_sse4_2() -> bool {
79    is_x86_feature_detected!("sse4.2")
80}
81
82/// Detect POPCNT support
83#[cfg(target_arch = "x86_64")]
84fn detect_popcnt() -> bool {
85    is_x86_feature_detected!("popcnt")
86}
87
88/// Detect AVX support (x86_64 only)
89#[cfg(target_arch = "x86_64")]
90fn detect_avx() -> bool {
91    is_x86_feature_detected!("avx")
92}
93
94/// Detect AVX2 support (x86_64 only)
95#[cfg(target_arch = "x86_64")]
96fn detect_avx2() -> bool {
97    is_x86_feature_detected!("avx2")
98}
99
100/// Fallback implementations for non-x86_64
101#[cfg(not(target_arch = "x86_64"))]
102fn detect_sse2() -> bool {
103    false
104}
105
106#[cfg(not(target_arch = "x86_64"))]
107fn detect_sse4_2() -> bool {
108    false
109}
110
111#[cfg(not(target_arch = "x86_64"))]
112fn detect_popcnt() -> bool {
113    true // Most modern CPUs support popcnt
114}
115
116#[cfg(not(target_arch = "x86_64"))]
117fn detect_avx() -> bool {
118    false
119}
120
121#[cfg(not(target_arch = "x86_64"))]
122fn detect_avx2() -> bool {
123    false
124}
125
126/// Detect NEON support (aarch64 only)
127#[cfg(target_arch = "aarch64")]
128fn detect_neon() -> bool {
129    // NEON is always available on aarch64
130    true
131}
132
133/// Convenience functions for individual feature detection
134pub fn has_sse2() -> bool {
135    detect_sse2()
136}
137
138/// Check if SSE4.2 is available
139pub fn has_sse4_2() -> bool {
140    detect_sse4_2()
141}
142
143/// Check if POPCNT is available
144pub fn has_popcnt() -> bool {
145    detect_popcnt()
146}
147
148/// Check if AVX is available (x86_64)
149#[cfg(target_arch = "x86_64")]
150#[allow(dead_code)]
151pub fn has_avx() -> bool {
152    detect_avx()
153}
154
155/// Check if AVX2 is available (x86_64)
156#[cfg(target_arch = "x86_64")]
157pub fn has_avx2() -> bool {
158    detect_avx2()
159}
160
161/// Check if NEON is available (always false on x86_64)
162#[cfg(target_arch = "x86_64")]
163pub fn has_neon() -> bool {
164    false
165}
166
167/// Check if NEON is available (aarch64)
168#[cfg(target_arch = "aarch64")]
169pub fn has_neon() -> bool {
170    detect_neon()
171}
172
173/// Check if AVX is available (always false on aarch64)
174#[cfg(target_arch = "aarch64")]
175#[allow(dead_code)]
176pub fn has_avx() -> bool {
177    false
178}
179
180/// Check if AVX2 is available (always false on aarch64)
181#[cfg(target_arch = "aarch64")]
182pub fn has_avx2() -> bool {
183    false
184}
185
186/// Stub implementations for non-x86_64, non-aarch64 platforms
187#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
188#[allow(dead_code)]
189pub fn has_avx() -> bool {
190    false
191}
192
193/// Check if AVX2 is available (stub for unsupported platforms)
194#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
195pub fn has_avx2() -> bool {
196    false
197}
198
199/// Check if NEON is available (stub for unsupported platforms)
200#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
201pub fn has_neon() -> bool {
202    false
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn test_cpu_features_detect() {
211        let features = CpuFeatures::detect();
212        // At minimum, we should have a valid detection result
213        // (may be all false on non-x86_64 without NEON)
214        let _ = features.sse2;
215        let _ = features.sse4_2;
216        let _ = features.popcnt;
217    }
218
219    #[test]
220    fn test_x86_64_v3() {
221        let features = CpuFeatures::detect();
222        let _ = features.x86_64_v3();
223    }
224
225    #[test]
226    fn test_simd_width() {
227        let features = CpuFeatures::detect();
228        let width = features.simd_width();
229        // Width should be 0, 16, or 32
230        assert!(width == 0 || width == 16 || width == 32);
231    }
232}