Skip to main content

openjph_core/
arch.rs

1//! CPU architecture utilities — port of `ojph_arch.h/cpp`.
2//!
3//! Provides CPU feature detection, alignment constants, and bit-manipulation
4//! helpers that wrap Rust intrinsics for clarity and C++ API compatibility.
5
6// ---------------------------------------------------------------------------
7// Alignment constants
8// ---------------------------------------------------------------------------
9
10/// Required byte alignment for SIMD buffers (AVX-512 = 64 bytes).
11pub const BYTE_ALIGNMENT: u32 = 64;
12
13/// log₂(BYTE_ALIGNMENT).
14pub const LOG_BYTE_ALIGNMENT: u32 = BYTE_ALIGNMENT.trailing_zeros();
15
16/// Required alignment for heap-allocated objects.
17pub const OBJECT_ALIGNMENT: u32 = 8;
18
19// ---------------------------------------------------------------------------
20// CPU extension levels — x86-64
21// ---------------------------------------------------------------------------
22
23/// Supported x86-64 SIMD extension levels, ordered by capability.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
25#[repr(i32)]
26pub enum CpuExtLevel {
27    /// No SIMD.
28    Generic = 0,
29    /// MMX (legacy, rarely targeted).
30    Mmx = 1,
31    /// SSE.
32    Sse = 2,
33    /// SSE2.
34    Sse2 = 3,
35    /// SSE3.
36    Sse3 = 4,
37    /// SSSE3.
38    Ssse3 = 5,
39    /// SSE4.1.
40    Sse41 = 6,
41    /// SSE4.2.
42    Sse42 = 7,
43    /// AVX.
44    Avx = 8,
45    /// AVX2.
46    Avx2 = 9,
47    /// AVX2 + FMA.
48    Avx2Fma = 10,
49    /// AVX-512 (F+BW+CD+DQ+VL at minimum).
50    Avx512 = 11,
51}
52
53// ---------------------------------------------------------------------------
54// CPU extension levels — ARM
55// ---------------------------------------------------------------------------
56
57/// Supported ARM SIMD extension levels.
58#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
59#[repr(i32)]
60pub enum ArmCpuExtLevel {
61    /// No SIMD.
62    Generic = 0,
63    /// ARM NEON (ASIMD).
64    Neon = 1,
65    /// ARM SVE.
66    Sve = 2,
67    /// ARM SVE2.
68    Sve2 = 3,
69}
70
71// ---------------------------------------------------------------------------
72// Runtime CPU feature detection
73// ---------------------------------------------------------------------------
74
75/// Detects the highest supported x86-64 SIMD level at runtime.
76///
77/// On non-x86 targets this always returns `CpuExtLevel::Generic as i32`.
78#[cfg(target_arch = "x86_64")]
79pub fn get_cpu_ext_level() -> i32 {
80    // Probe from highest to lowest.
81    if is_x86_feature_detected!("avx512f")
82        && is_x86_feature_detected!("avx512bw")
83        && is_x86_feature_detected!("avx512cd")
84        && is_x86_feature_detected!("avx512dq")
85        && is_x86_feature_detected!("avx512vl")
86    {
87        return CpuExtLevel::Avx512 as i32;
88    }
89    if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
90        return CpuExtLevel::Avx2Fma as i32;
91    }
92    if is_x86_feature_detected!("avx2") {
93        return CpuExtLevel::Avx2 as i32;
94    }
95    if is_x86_feature_detected!("avx") {
96        return CpuExtLevel::Avx as i32;
97    }
98    if is_x86_feature_detected!("sse4.2") {
99        return CpuExtLevel::Sse42 as i32;
100    }
101    if is_x86_feature_detected!("sse4.1") {
102        return CpuExtLevel::Sse41 as i32;
103    }
104    if is_x86_feature_detected!("ssse3") {
105        return CpuExtLevel::Ssse3 as i32;
106    }
107    if is_x86_feature_detected!("sse3") {
108        return CpuExtLevel::Sse3 as i32;
109    }
110    if is_x86_feature_detected!("sse2") {
111        return CpuExtLevel::Sse2 as i32;
112    }
113    if is_x86_feature_detected!("sse") {
114        return CpuExtLevel::Sse as i32;
115    }
116    CpuExtLevel::Generic as i32
117}
118
119/// Detects the highest supported ARM SIMD level at runtime.
120#[cfg(target_arch = "aarch64")]
121pub fn get_cpu_ext_level() -> i32 {
122    // aarch64 always has NEON.
123    ArmCpuExtLevel::Neon as i32
124}
125
126/// Fallback for architectures without specialised detection.
127#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
128pub fn get_cpu_ext_level() -> i32 {
129    0
130}
131
132// ---------------------------------------------------------------------------
133// Bit-manipulation helpers
134// ---------------------------------------------------------------------------
135
136/// Population count (number of set bits).
137#[inline]
138pub const fn population_count(val: u32) -> u32 {
139    val.count_ones()
140}
141
142/// Count leading zeros (32-bit).
143#[inline]
144pub const fn count_leading_zeros(val: u32) -> u32 {
145    val.leading_zeros()
146}
147
148/// Count leading zeros (64-bit).
149#[inline]
150pub const fn count_leading_zeros_u64(val: u64) -> u32 {
151    val.leading_zeros()
152}
153
154/// Count trailing zeros (32-bit).
155#[inline]
156pub const fn count_trailing_zeros(val: u32) -> u32 {
157    val.trailing_zeros()
158}
159
160// ---------------------------------------------------------------------------
161// Rounding helpers
162// ---------------------------------------------------------------------------
163
164/// Rounds a float to the nearest integer (ties away from zero), matching the
165/// C++ `ojph_round` behaviour.
166#[inline]
167pub fn ojph_round(val: f32) -> i32 {
168    (val + if val >= 0.0 { 0.5 } else { -0.5 }) as i32
169}
170
171/// Truncates a float toward zero, matching the C++ `ojph_trunc`.
172#[inline]
173pub fn ojph_trunc(val: f32) -> i32 {
174    val as i32
175}
176
177// ---------------------------------------------------------------------------
178// Alignment helpers
179// ---------------------------------------------------------------------------
180
181/// Returns the smallest multiple of `alignment` (in bytes) that can hold
182/// `count` elements of type `T`.
183///
184/// This is the Rust equivalent of the C++ `calc_aligned_size<T>()` template.
185#[inline]
186pub const fn calc_aligned_size<T>(count: usize, alignment: u32) -> usize {
187    let byte_size = count * std::mem::size_of::<T>();
188    let align = alignment as usize;
189    (byte_size + align - 1) & !(align - 1)
190}
191
192// ---------------------------------------------------------------------------
193// Tests
194// ---------------------------------------------------------------------------
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn popcount() {
202        assert_eq!(population_count(0b1010_1010), 4);
203    }
204
205    #[test]
206    fn leading_zeros() {
207        assert_eq!(count_leading_zeros(1), 31);
208        assert_eq!(count_leading_zeros_u64(1), 63);
209    }
210
211    #[test]
212    fn trailing_zeros() {
213        assert_eq!(count_trailing_zeros(8), 3);
214    }
215
216    #[test]
217    fn round_trunc() {
218        assert_eq!(ojph_round(2.3), 2);
219        assert_eq!(ojph_round(2.7), 3);
220        assert_eq!(ojph_round(-2.3), -2);
221        assert_eq!(ojph_trunc(2.9), 2);
222        assert_eq!(ojph_trunc(-2.9), -2);
223    }
224
225    #[test]
226    fn aligned_size() {
227        // 10 i32 = 40 bytes → next multiple of 64 = 64
228        assert_eq!(calc_aligned_size::<i32>(10, 64), 64);
229        // 20 i32 = 80 bytes → next multiple of 64 = 128
230        assert_eq!(calc_aligned_size::<i32>(20, 64), 128);
231    }
232
233    #[test]
234    fn cpu_detection_runs() {
235        let level = get_cpu_ext_level();
236        assert!(level >= 0);
237    }
238}