Skip to main content

oximedia_codec/simd/
mod.rs

1//! SIMD abstraction layer for video codec implementations.
2//!
3//! This module provides a unified interface for SIMD operations used in
4//! video encoding and decoding. It abstracts over different SIMD instruction
5//! sets (AVX2, AVX-512, NEON) while providing a scalar fallback for portability.
6//!
7//! # Architecture
8//!
9//! The SIMD abstraction consists of:
10//!
11//! - **Types** (`types.rs`): Vector types like `I16x8`, `I32x4`, `U8x16`
12//! - **Traits** (`traits.rs`): `SimdOps` and `SimdOpsExt` for SIMD operations
13//! - **Architecture-specific**: x86 (AVX2/AVX-512), ARM (NEON), scalar fallback
14//! - **Codec-specific**: AV1 and VP9 optimized operations
15//! - **Operations**: Domain-specific modules for codec operations
16//!
17//! # Usage
18//!
19//! ```ignore
20//! use oximedia_codec::simd::{detect_simd, select_transform_impl};
21//!
22//! // Detect SIMD capabilities
23//! let caps = detect_simd();
24//! println!("Best SIMD: {}", caps.best_level());
25//!
26//! // Use codec-specific SIMD operations
27//! use oximedia_codec::simd::av1::TransformSimd;
28//! let transform = TransformSimd::new(select_transform_impl());
29//! transform.forward_dct_8x8(&input, &mut output);
30//! ```
31//!
32//! # Feature Detection and Dispatch
33//!
34//! The SIMD implementation is selected at runtime based on CPU capabilities:
35//!
36//! ```ignore
37//! use oximedia_codec::simd::{SimdCapabilities, detect_simd};
38//!
39//! let caps = detect_simd();
40//! if caps.avx512 {
41//!     // Use AVX-512 optimized path
42//! } else if caps.avx2 {
43//!     // Use AVX2 path
44//! } else if caps.neon {
45//!     // Use ARM NEON path
46//! } else {
47//!     // Use scalar fallback
48//! }
49//! ```
50
51#![allow(unsafe_code)]
52
53// Core modules
54pub mod scalar;
55pub mod traits;
56pub mod types;
57
58// Architecture-specific implementations
59pub mod arm;
60pub mod x86;
61
62// Codec-specific SIMD operations
63pub mod av1;
64pub mod vp9;
65
66// Legacy operation modules (preserved for compatibility)
67pub mod blend;
68pub mod dct;
69pub mod filter;
70pub mod sad;
71
72// Pixel format conversion (YUV ↔ RGB, all subsampling modes)
73pub mod pixel_convert;
74
75// Re-exports
76pub use blend::{blend_ops, BlendOps};
77pub use dct::{dct_ops, DctOps};
78pub use filter::{filter_ops, FilterOps};
79pub use sad::{sad_ops, SadOps};
80pub use traits::{SimdOps, SimdOpsExt, SimdSelector};
81pub use types::{I16x16, I16x8, I32x4, I32x8, U8x16, U8x32};
82
83// Architecture-specific re-exports
84pub use arm::NeonSimd;
85pub use scalar::ScalarFallback;
86pub use x86::{Avx2Simd, Avx512Simd};
87
88// Codec-specific re-exports
89pub use av1::{CdefSimd, IntraPredSimd, LoopFilterSimd, MotionCompSimd, TransformSimd};
90pub use vp9::{Vp9DctSimd, Vp9InterpolateSimd, Vp9IntraPredSimd, Vp9LoopFilterSimd};
91
92// ============================================================================
93// CPU Feature Detection and Dispatch
94// ============================================================================
95
96/// CPU SIMD capabilities.
97///
98/// This structure represents the SIMD instruction sets available on the
99/// current CPU, detected at runtime.
100#[derive(Clone, Copy, Debug, Default)]
101#[allow(clippy::struct_excessive_bools)]
102pub struct SimdCapabilities {
103    /// x86 AVX2 support (Intel Haswell 2013+, AMD Excavator 2015+).
104    pub avx2: bool,
105
106    /// x86 AVX-512 support (Intel Skylake-X 2017+, Ice Lake 2019+).
107    pub avx512: bool,
108
109    /// ARM NEON support (all ARMv8/AArch64, ARMv7-A with NEON).
110    pub neon: bool,
111}
112
113impl SimdCapabilities {
114    /// Create with all features disabled.
115    #[must_use]
116    pub const fn none() -> Self {
117        Self {
118            avx2: false,
119            avx512: false,
120            neon: false,
121        }
122    }
123
124    /// Check if AVX2 is available.
125    #[inline]
126    #[must_use]
127    pub const fn has_avx2(&self) -> bool {
128        self.avx2
129    }
130
131    /// Check if AVX-512 is available.
132    #[inline]
133    #[must_use]
134    pub const fn has_avx512(&self) -> bool {
135        self.avx512
136    }
137
138    /// Check if NEON is available.
139    #[inline]
140    #[must_use]
141    pub const fn has_neon(&self) -> bool {
142        self.neon
143    }
144
145    /// Get the best available SIMD level name.
146    #[must_use]
147    pub const fn best_level(&self) -> &'static str {
148        if self.avx512 {
149            "avx512"
150        } else if self.avx2 {
151            "avx2"
152        } else if self.neon {
153            "neon"
154        } else {
155            "scalar"
156        }
157    }
158}
159
160/// Detect CPU SIMD capabilities at runtime.
161///
162/// This function uses CPU feature detection to determine which SIMD
163/// instruction sets are available on the current processor.
164///
165/// # Returns
166///
167/// A `SimdCapabilities` struct indicating which SIMD features are available.
168///
169/// # Example
170///
171/// ```ignore
172/// use oximedia_codec::simd::detect_simd;
173///
174/// let caps = detect_simd();
175/// println!("Running on: {}", caps.best_level());
176/// ```
177#[must_use]
178pub fn detect_simd() -> SimdCapabilities {
179    #[cfg(target_arch = "x86_64")]
180    {
181        SimdCapabilities {
182            avx2: is_x86_feature_detected!("avx2"),
183            avx512: is_x86_feature_detected!("avx512f")
184                && is_x86_feature_detected!("avx512bw")
185                && is_x86_feature_detected!("avx512dq"),
186            neon: false,
187        }
188    }
189
190    #[cfg(target_arch = "aarch64")]
191    {
192        // On AArch64, NEON is always available
193        SimdCapabilities {
194            avx2: false,
195            avx512: false,
196            neon: true,
197        }
198    }
199
200    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
201    {
202        SimdCapabilities::none()
203    }
204}
205
206/// Transform implementation selection.
207///
208/// This enum represents the different SIMD implementations available
209/// for transform operations.
210#[derive(Clone, Copy, Debug, PartialEq, Eq)]
211pub enum TransformImpl {
212    /// AVX-512 implementation.
213    Avx512,
214    /// AVX2 implementation.
215    Avx2,
216    /// ARM NEON implementation.
217    Neon,
218    /// Scalar fallback implementation.
219    Scalar,
220}
221
222/// Select the best transform implementation for the current CPU.
223///
224/// This function detects CPU capabilities and returns the optimal
225/// transform implementation.
226///
227/// # Returns
228///
229/// The best available `TransformImpl` for the current CPU.
230#[must_use]
231pub fn select_transform_impl() -> TransformImpl {
232    let caps = detect_simd();
233
234    if caps.has_avx512() {
235        TransformImpl::Avx512
236    } else if caps.has_avx2() {
237        TransformImpl::Avx2
238    } else if caps.has_neon() {
239        TransformImpl::Neon
240    } else {
241        TransformImpl::Scalar
242    }
243}
244
245// Static instances for each SIMD implementation
246static SCALAR_INSTANCE: ScalarFallback = ScalarFallback;
247
248#[cfg(target_arch = "x86_64")]
249static AVX2_INSTANCE: Avx2Simd = Avx2Simd;
250
251#[cfg(target_arch = "x86_64")]
252static AVX512_INSTANCE: Avx512Simd = Avx512Simd;
253
254#[cfg(target_arch = "aarch64")]
255static NEON_INSTANCE: NeonSimd = NeonSimd;
256
257/// Get the best SIMD implementation for the current CPU.
258///
259/// Returns a reference to the optimal SIMD implementation based on
260/// detected CPU capabilities. This provides dynamic dispatch to the
261/// fastest available implementation.
262///
263/// # Returns
264///
265/// A static reference to a `SimdOps` implementation.
266#[must_use]
267pub fn get_simd() -> &'static dyn SimdOps {
268    #[cfg(target_arch = "x86_64")]
269    {
270        if Avx512Simd::is_available() {
271            return &AVX512_INSTANCE;
272        } else if Avx2Simd::is_available() {
273            return &AVX2_INSTANCE;
274        }
275    }
276
277    #[cfg(target_arch = "aarch64")]
278    {
279        if NeonSimd::is_available() {
280            return &NEON_INSTANCE;
281        }
282    }
283
284    &SCALAR_INSTANCE
285}
286
287/// Get the best extended SIMD implementation for the current CPU.
288///
289/// Returns a reference to the optimal extended SIMD implementation
290/// (with additional operations like transpose and butterfly).
291///
292/// # Returns
293///
294/// A static reference to a `SimdOpsExt` implementation.
295#[must_use]
296pub fn get_simd_ext() -> &'static dyn SimdOpsExt {
297    #[cfg(target_arch = "x86_64")]
298    {
299        if Avx512Simd::is_available() {
300            return &AVX512_INSTANCE;
301        } else if Avx2Simd::is_available() {
302            return &AVX2_INSTANCE;
303        }
304    }
305
306    #[cfg(target_arch = "aarch64")]
307    {
308        if NeonSimd::is_available() {
309            return &NEON_INSTANCE;
310        }
311    }
312
313    &SCALAR_INSTANCE
314}
315
316// ============================================================================
317// Legacy Compatibility
318// ============================================================================
319
320/// Legacy scalar SIMD accessor (deprecated, use `ScalarFallback` directly).
321#[deprecated(
322    since = "0.1.0",
323    note = "Use &SCALAR_INSTANCE or ScalarFallback directly"
324)]
325#[must_use]
326pub fn scalar_simd() -> &'static ScalarFallback {
327    &SCALAR_INSTANCE
328}
329
330/// Legacy capabilities detection (deprecated, use `detect_simd` instead).
331#[deprecated(since = "0.1.0", note = "Use detect_simd() instead")]
332#[must_use]
333pub fn detect_capabilities() -> SimdCapabilities {
334    detect_simd()
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_detect_simd() {
343        let caps = detect_simd();
344
345        // Should return valid capabilities
346        let level = caps.best_level();
347        assert!(!level.is_empty());
348
349        // At least one implementation should be available
350        assert!(get_simd().is_available());
351    }
352
353    #[test]
354    fn test_simd_capabilities() {
355        let caps = SimdCapabilities::none();
356        assert!(!caps.has_avx2());
357        assert!(!caps.has_avx512());
358        assert!(!caps.has_neon());
359        assert_eq!(caps.best_level(), "scalar");
360    }
361
362    #[test]
363    fn test_get_simd() {
364        let simd = get_simd();
365        assert!(simd.is_available());
366
367        // Check that the name matches expected values
368        let name = simd.name();
369        assert!(
370            name == "scalar" || name == "avx2" || name == "avx512" || name == "neon",
371            "Unexpected SIMD name: {}",
372            name
373        );
374    }
375
376    #[test]
377    fn test_get_simd_ext() {
378        let simd = get_simd_ext();
379        assert!(simd.is_available());
380    }
381
382    #[test]
383    fn test_select_transform_impl() {
384        let impl_type = select_transform_impl();
385
386        // Should select a valid implementation
387        match impl_type {
388            TransformImpl::Avx512
389            | TransformImpl::Avx2
390            | TransformImpl::Neon
391            | TransformImpl::Scalar => {}
392        }
393    }
394
395    #[test]
396    fn test_module_reexports() {
397        // Test that all reexports work
398        let _v = I16x8::zero();
399        let _v = I32x4::zero();
400        let _v = U8x16::zero();
401
402        let _ops = sad_ops();
403        let _ops = blend_ops();
404        let _ops = dct_ops();
405        let _ops = filter_ops();
406    }
407
408    #[test]
409    fn test_architecture_specific() {
410        // Test that architecture-specific types are accessible
411        let _scalar = ScalarFallback::new();
412
413        #[cfg(target_arch = "x86_64")]
414        {
415            let _avx2 = Avx2Simd::new();
416            let _avx512 = Avx512Simd::new();
417        }
418
419        #[cfg(target_arch = "aarch64")]
420        {
421            let _neon = NeonSimd::new();
422        }
423    }
424
425    #[test]
426    fn test_codec_specific_types() {
427        // Verify codec-specific types are accessible
428        use crate::simd::scalar::ScalarFallback;
429
430        let simd = ScalarFallback::new();
431
432        // AV1
433        let _transform = TransformSimd::new(simd);
434        let _loop_filter = LoopFilterSimd::new(simd);
435        let _cdef = CdefSimd::new(simd);
436        let _intra = IntraPredSimd::new(simd);
437        let _motion = MotionCompSimd::new(simd);
438
439        // VP9
440        let _vp9_dct = Vp9DctSimd::new(simd);
441        let _vp9_interp = Vp9InterpolateSimd::new(simd);
442        let _vp9_intra = Vp9IntraPredSimd::new(simd);
443        let _vp9_lf = Vp9LoopFilterSimd::new(simd);
444    }
445
446    #[test]
447    fn test_integration_sad() {
448        let sad = sad_ops();
449
450        // Test basic SAD calculation
451        let src = [128u8; 64];
452        let ref_block = [128u8; 64];
453
454        let result = sad.sad_8x8(&src, 8, &ref_block, 8);
455        assert_eq!(result, 0);
456    }
457
458    #[test]
459    fn test_integration_blend() {
460        let blend = blend_ops();
461
462        // Test linear interpolation
463        let result = blend.lerp_u8(0, 255, 128);
464        assert!(result >= 126 && result <= 130);
465    }
466
467    #[test]
468    fn test_integration_dct() {
469        let dct = dct_ops();
470
471        // Test DCT round-trip
472        let input = [100i16; 16];
473        let mut dct_out = [0i16; 16];
474        let mut reconstructed = [0i16; 16];
475
476        dct.forward_dct_4x4(&input, &mut dct_out);
477        dct.inverse_dct_4x4(&dct_out, &mut reconstructed);
478
479        // Should be close to original
480        for i in 0..16 {
481            let diff = (input[i] - reconstructed[i]).abs();
482            assert!(
483                diff <= 2,
484                "DCT mismatch at {}: {} vs {}",
485                i,
486                input[i],
487                reconstructed[i]
488            );
489        }
490    }
491
492    #[test]
493    fn test_integration_filter() {
494        let filter = filter_ops();
495
496        // Test 2-tap filter on constant input
497        let src = [128u8; 16];
498        let mut dst = [0u8; 15];
499
500        filter.filter_h_2tap(&src, &mut dst, 15);
501
502        for &v in &dst {
503            assert_eq!(v, 128);
504        }
505    }
506}