Skip to main content

oximedia_codec/simd/
mod.rs

1//! SIMD abstraction layer for video codec implementations.
2//!
3//! This module provides a unified interface for SIMD operations used in
4//! video encoding and decoding. It abstracts over different SIMD instruction
5//! sets (AVX2, AVX-512, NEON) while providing a scalar fallback for portability.
6//!
7//! # Architecture
8//!
9//! The SIMD abstraction consists of:
10//!
11//! - **Types** (`types.rs`): Vector types like `I16x8`, `I32x4`, `U8x16`
12//! - **Traits** (`traits.rs`): `SimdOps` and `SimdOpsExt` for SIMD operations
13//! - **Architecture-specific**: x86 (AVX2/AVX-512), ARM (NEON), scalar fallback
14//! - **Codec-specific**: AV1 and VP9 optimized operations
15//! - **Operations**: Domain-specific modules for codec operations
16//!
17//! # Usage
18//!
19//! ```ignore
20//! use oximedia_codec::simd::{detect_simd, select_transform_impl};
21//!
22//! // Detect SIMD capabilities
23//! let caps = detect_simd();
24//! println!("Best SIMD: {}", caps.best_level());
25//!
26//! // Use codec-specific SIMD operations
27//! use oximedia_codec::simd::av1::TransformSimd;
28//! let transform = TransformSimd::new(select_transform_impl());
29//! transform.forward_dct_8x8(&input, &mut output);
30//! ```
31//!
32//! # Feature Detection and Dispatch
33//!
34//! The SIMD implementation is selected at runtime based on CPU capabilities:
35//!
36//! ```ignore
37//! use oximedia_codec::simd::{SimdCapabilities, detect_simd};
38//!
39//! let caps = detect_simd();
40//! if caps.avx512 {
41//!     // Use AVX-512 optimized path
42//! } else if caps.avx2 {
43//!     // Use AVX2 path
44//! } else if caps.neon {
45//!     // Use ARM NEON path
46//! } else {
47//!     // Use scalar fallback
48//! }
49//! ```
50
51#![allow(unsafe_code)]
52
53// Core modules
54pub mod scalar;
55pub mod traits;
56pub mod types;
57
58// Architecture-specific implementations
59pub mod arm;
60pub mod x86;
61
62// Codec-specific SIMD operations
63pub mod av1;
64pub mod vp9;
65
66// Legacy operation modules (preserved for compatibility)
67pub mod blend;
68pub mod dct;
69pub mod filter;
70pub mod sad;
71
72// Pixel format conversion (YUV ↔ RGB, all subsampling modes)
73pub mod pixel_convert;
74
75// YUV subsampling format conversion (4:2:0 ↔ 4:2:2 ↔ 4:4:4, NV12 ↔ I420)
76pub mod yuv_convert;
77
78// Re-exports
79pub use blend::{blend_ops, BlendOps};
80pub use dct::{dct_ops, DctOps};
81pub use filter::{filter_ops, FilterOps};
82pub use sad::{sad_ops, SadOps};
83pub use traits::{SimdOps, SimdOpsExt, SimdSelector};
84pub use types::{I16x16, I16x8, I32x4, I32x8, U8x16, U8x32};
85
86// Architecture-specific re-exports
87pub use arm::NeonSimd;
88pub use scalar::ScalarFallback;
89pub use x86::{Avx2Simd, Avx512Simd};
90
91// Codec-specific re-exports
92pub use av1::{CdefSimd, IntraPredSimd, LoopFilterSimd, MotionCompSimd, TransformSimd};
93pub use vp9::{Vp9DctSimd, Vp9InterpolateSimd, Vp9IntraPredSimd, Vp9LoopFilterSimd};
94
95// ============================================================================
96// CPU Feature Detection and Dispatch
97// ============================================================================
98
99/// CPU SIMD capabilities.
100///
101/// This structure represents the SIMD instruction sets available on the
102/// current CPU, detected at runtime.
103#[derive(Clone, Copy, Debug, Default)]
104#[allow(clippy::struct_excessive_bools)]
105pub struct SimdCapabilities {
106    /// x86 AVX2 support (Intel Haswell 2013+, AMD Excavator 2015+).
107    pub avx2: bool,
108
109    /// x86 AVX-512 support (Intel Skylake-X 2017+, Ice Lake 2019+).
110    pub avx512: bool,
111
112    /// ARM NEON support (all ARMv8/AArch64, ARMv7-A with NEON).
113    pub neon: bool,
114}
115
116impl SimdCapabilities {
117    /// Create with all features disabled.
118    #[must_use]
119    pub const fn none() -> Self {
120        Self {
121            avx2: false,
122            avx512: false,
123            neon: false,
124        }
125    }
126
127    /// Check if AVX2 is available.
128    #[inline]
129    #[must_use]
130    pub const fn has_avx2(&self) -> bool {
131        self.avx2
132    }
133
134    /// Check if AVX-512 is available.
135    #[inline]
136    #[must_use]
137    pub const fn has_avx512(&self) -> bool {
138        self.avx512
139    }
140
141    /// Check if NEON is available.
142    #[inline]
143    #[must_use]
144    pub const fn has_neon(&self) -> bool {
145        self.neon
146    }
147
148    /// Get the best available SIMD level name.
149    #[must_use]
150    pub const fn best_level(&self) -> &'static str {
151        if self.avx512 {
152            "avx512"
153        } else if self.avx2 {
154            "avx2"
155        } else if self.neon {
156            "neon"
157        } else {
158            "scalar"
159        }
160    }
161}
162
163/// Detect CPU SIMD capabilities at runtime.
164///
165/// This function uses CPU feature detection to determine which SIMD
166/// instruction sets are available on the current processor.
167///
168/// # Returns
169///
170/// A `SimdCapabilities` struct indicating which SIMD features are available.
171///
172/// # Example
173///
174/// ```ignore
175/// use oximedia_codec::simd::detect_simd;
176///
177/// let caps = detect_simd();
178/// println!("Running on: {}", caps.best_level());
179/// ```
180#[must_use]
181pub fn detect_simd() -> SimdCapabilities {
182    #[cfg(target_arch = "x86_64")]
183    {
184        SimdCapabilities {
185            avx2: is_x86_feature_detected!("avx2"),
186            avx512: is_x86_feature_detected!("avx512f")
187                && is_x86_feature_detected!("avx512bw")
188                && is_x86_feature_detected!("avx512dq"),
189            neon: false,
190        }
191    }
192
193    #[cfg(target_arch = "aarch64")]
194    {
195        // On AArch64, NEON is always available
196        SimdCapabilities {
197            avx2: false,
198            avx512: false,
199            neon: true,
200        }
201    }
202
203    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
204    {
205        SimdCapabilities::none()
206    }
207}
208
209/// Transform implementation selection.
210///
211/// This enum represents the different SIMD implementations available
212/// for transform operations.
213#[derive(Clone, Copy, Debug, PartialEq, Eq)]
214pub enum TransformImpl {
215    /// AVX-512 implementation.
216    Avx512,
217    /// AVX2 implementation.
218    Avx2,
219    /// ARM NEON implementation.
220    Neon,
221    /// Scalar fallback implementation.
222    Scalar,
223}
224
225/// Select the best transform implementation for the current CPU.
226///
227/// This function detects CPU capabilities and returns the optimal
228/// transform implementation.
229///
230/// # Returns
231///
232/// The best available `TransformImpl` for the current CPU.
233#[must_use]
234pub fn select_transform_impl() -> TransformImpl {
235    let caps = detect_simd();
236
237    if caps.has_avx512() {
238        TransformImpl::Avx512
239    } else if caps.has_avx2() {
240        TransformImpl::Avx2
241    } else if caps.has_neon() {
242        TransformImpl::Neon
243    } else {
244        TransformImpl::Scalar
245    }
246}
247
248// Static instances for each SIMD implementation
249static SCALAR_INSTANCE: ScalarFallback = ScalarFallback;
250
251#[cfg(target_arch = "x86_64")]
252static AVX2_INSTANCE: Avx2Simd = Avx2Simd;
253
254#[cfg(target_arch = "x86_64")]
255static AVX512_INSTANCE: Avx512Simd = Avx512Simd;
256
257#[cfg(target_arch = "aarch64")]
258static NEON_INSTANCE: NeonSimd = NeonSimd;
259
260/// Get the best SIMD implementation for the current CPU.
261///
262/// Returns a reference to the optimal SIMD implementation based on
263/// detected CPU capabilities. This provides dynamic dispatch to the
264/// fastest available implementation.
265///
266/// # Returns
267///
268/// A static reference to a `SimdOps` implementation.
269#[must_use]
270pub fn get_simd() -> &'static dyn SimdOps {
271    #[cfg(target_arch = "x86_64")]
272    {
273        if Avx512Simd::is_available() {
274            return &AVX512_INSTANCE;
275        } else if Avx2Simd::is_available() {
276            return &AVX2_INSTANCE;
277        }
278    }
279
280    #[cfg(target_arch = "aarch64")]
281    {
282        if NeonSimd::is_available() {
283            return &NEON_INSTANCE;
284        }
285    }
286
287    &SCALAR_INSTANCE
288}
289
290/// Get the best extended SIMD implementation for the current CPU.
291///
292/// Returns a reference to the optimal extended SIMD implementation
293/// (with additional operations like transpose and butterfly).
294///
295/// # Returns
296///
297/// A static reference to a `SimdOpsExt` implementation.
298#[must_use]
299pub fn get_simd_ext() -> &'static dyn SimdOpsExt {
300    #[cfg(target_arch = "x86_64")]
301    {
302        if Avx512Simd::is_available() {
303            return &AVX512_INSTANCE;
304        } else if Avx2Simd::is_available() {
305            return &AVX2_INSTANCE;
306        }
307    }
308
309    #[cfg(target_arch = "aarch64")]
310    {
311        if NeonSimd::is_available() {
312            return &NEON_INSTANCE;
313        }
314    }
315
316    &SCALAR_INSTANCE
317}
318
319// ============================================================================
320// Legacy Compatibility
321// ============================================================================
322
323/// Legacy scalar SIMD accessor (deprecated, use `ScalarFallback` directly).
324#[deprecated(
325    since = "0.1.0",
326    note = "Use &SCALAR_INSTANCE or ScalarFallback directly"
327)]
328#[must_use]
329pub fn scalar_simd() -> &'static ScalarFallback {
330    &SCALAR_INSTANCE
331}
332
333/// Legacy capabilities detection (deprecated, use `detect_simd` instead).
334#[deprecated(since = "0.1.0", note = "Use detect_simd() instead")]
335#[must_use]
336pub fn detect_capabilities() -> SimdCapabilities {
337    detect_simd()
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    #[test]
345    fn test_detect_simd() {
346        let caps = detect_simd();
347
348        // Should return valid capabilities
349        let level = caps.best_level();
350        assert!(!level.is_empty());
351
352        // At least one implementation should be available
353        assert!(get_simd().is_available());
354    }
355
356    #[test]
357    fn test_simd_capabilities() {
358        let caps = SimdCapabilities::none();
359        assert!(!caps.has_avx2());
360        assert!(!caps.has_avx512());
361        assert!(!caps.has_neon());
362        assert_eq!(caps.best_level(), "scalar");
363    }
364
365    #[test]
366    fn test_get_simd() {
367        let simd = get_simd();
368        assert!(simd.is_available());
369
370        // Check that the name matches expected values
371        let name = simd.name();
372        assert!(
373            name == "scalar" || name == "avx2" || name == "avx512" || name == "neon",
374            "Unexpected SIMD name: {}",
375            name
376        );
377    }
378
379    #[test]
380    fn test_get_simd_ext() {
381        let simd = get_simd_ext();
382        assert!(simd.is_available());
383    }
384
385    #[test]
386    fn test_select_transform_impl() {
387        let impl_type = select_transform_impl();
388
389        // Should select a valid implementation
390        match impl_type {
391            TransformImpl::Avx512
392            | TransformImpl::Avx2
393            | TransformImpl::Neon
394            | TransformImpl::Scalar => {}
395        }
396    }
397
398    #[test]
399    fn test_module_reexports() {
400        // Test that all reexports work
401        let _v = I16x8::zero();
402        let _v = I32x4::zero();
403        let _v = U8x16::zero();
404
405        let _ops = sad_ops();
406        let _ops = blend_ops();
407        let _ops = dct_ops();
408        let _ops = filter_ops();
409    }
410
411    #[test]
412    fn test_architecture_specific() {
413        // Test that architecture-specific types are accessible
414        let _scalar = ScalarFallback::new();
415
416        #[cfg(target_arch = "x86_64")]
417        {
418            let _avx2 = Avx2Simd::new();
419            let _avx512 = Avx512Simd::new();
420        }
421
422        #[cfg(target_arch = "aarch64")]
423        {
424            let _neon = NeonSimd::new();
425        }
426    }
427
428    #[test]
429    fn test_codec_specific_types() {
430        // Verify codec-specific types are accessible
431        use crate::simd::scalar::ScalarFallback;
432
433        let simd = ScalarFallback::new();
434
435        // AV1
436        let _transform = TransformSimd::new(simd);
437        let _loop_filter = LoopFilterSimd::new(simd);
438        let _cdef = CdefSimd::new(simd);
439        let _intra = IntraPredSimd::new(simd);
440        let _motion = MotionCompSimd::new(simd);
441
442        // VP9
443        let _vp9_dct = Vp9DctSimd::new(simd);
444        let _vp9_interp = Vp9InterpolateSimd::new(simd);
445        let _vp9_intra = Vp9IntraPredSimd::new(simd);
446        let _vp9_lf = Vp9LoopFilterSimd::new(simd);
447    }
448
449    #[test]
450    fn test_integration_sad() {
451        let sad = sad_ops();
452
453        // Test basic SAD calculation
454        let src = [128u8; 64];
455        let ref_block = [128u8; 64];
456
457        let result = sad.sad_8x8(&src, 8, &ref_block, 8);
458        assert_eq!(result, 0);
459    }
460
461    #[test]
462    fn test_integration_blend() {
463        let blend = blend_ops();
464
465        // Test linear interpolation
466        let result = blend.lerp_u8(0, 255, 128);
467        assert!(result >= 126 && result <= 130);
468    }
469
470    #[test]
471    fn test_integration_dct() {
472        let dct = dct_ops();
473
474        // Test DCT round-trip
475        let input = [100i16; 16];
476        let mut dct_out = [0i16; 16];
477        let mut reconstructed = [0i16; 16];
478
479        dct.forward_dct_4x4(&input, &mut dct_out);
480        dct.inverse_dct_4x4(&dct_out, &mut reconstructed);
481
482        // Should be close to original
483        for i in 0..16 {
484            let diff = (input[i] - reconstructed[i]).abs();
485            assert!(
486                diff <= 2,
487                "DCT mismatch at {}: {} vs {}",
488                i,
489                input[i],
490                reconstructed[i]
491            );
492        }
493    }
494
495    #[test]
496    fn test_integration_filter() {
497        let filter = filter_ops();
498
499        // Test 2-tap filter on constant input
500        let src = [128u8; 16];
501        let mut dst = [0u8; 15];
502
503        filter.filter_h_2tap(&src, &mut dst, 15);
504
505        for &v in &dst {
506            assert_eq!(v, 128);
507        }
508    }
509}