oximedia_codec/simd/
mod.rs1#![allow(unsafe_code)]
52
53pub mod scalar;
55pub mod traits;
56pub mod types;
57
58pub mod arm;
60pub mod x86;
61
62pub mod av1;
64pub mod vp9;
65
66pub mod blend;
68pub mod dct;
69pub mod filter;
70pub mod sad;
71
72pub mod pixel_convert;
74
75pub mod yuv_convert;
77
78pub use blend::{blend_ops, BlendOps};
80pub use dct::{dct_ops, DctOps};
81pub use filter::{filter_ops, FilterOps};
82pub use sad::{sad_ops, SadOps};
83pub use traits::{SimdOps, SimdOpsExt, SimdSelector};
84pub use types::{I16x16, I16x8, I32x4, I32x8, U8x16, U8x32};
85
86pub use arm::NeonSimd;
88pub use scalar::ScalarFallback;
89pub use x86::{Avx2Simd, Avx512Simd};
90
91pub use av1::{CdefSimd, IntraPredSimd, LoopFilterSimd, MotionCompSimd, TransformSimd};
93pub use vp9::{Vp9DctSimd, Vp9InterpolateSimd, Vp9IntraPredSimd, Vp9LoopFilterSimd};
94
95#[derive(Clone, Copy, Debug, Default)]
104#[allow(clippy::struct_excessive_bools)]
105pub struct SimdCapabilities {
106 pub avx2: bool,
108
109 pub avx512: bool,
111
112 pub neon: bool,
114}
115
116impl SimdCapabilities {
117 #[must_use]
119 pub const fn none() -> Self {
120 Self {
121 avx2: false,
122 avx512: false,
123 neon: false,
124 }
125 }
126
127 #[inline]
129 #[must_use]
130 pub const fn has_avx2(&self) -> bool {
131 self.avx2
132 }
133
134 #[inline]
136 #[must_use]
137 pub const fn has_avx512(&self) -> bool {
138 self.avx512
139 }
140
141 #[inline]
143 #[must_use]
144 pub const fn has_neon(&self) -> bool {
145 self.neon
146 }
147
148 #[must_use]
150 pub const fn best_level(&self) -> &'static str {
151 if self.avx512 {
152 "avx512"
153 } else if self.avx2 {
154 "avx2"
155 } else if self.neon {
156 "neon"
157 } else {
158 "scalar"
159 }
160 }
161}
162
163#[must_use]
181pub fn detect_simd() -> SimdCapabilities {
182 #[cfg(target_arch = "x86_64")]
183 {
184 SimdCapabilities {
185 avx2: is_x86_feature_detected!("avx2"),
186 avx512: is_x86_feature_detected!("avx512f")
187 && is_x86_feature_detected!("avx512bw")
188 && is_x86_feature_detected!("avx512dq"),
189 neon: false,
190 }
191 }
192
193 #[cfg(target_arch = "aarch64")]
194 {
195 SimdCapabilities {
197 avx2: false,
198 avx512: false,
199 neon: true,
200 }
201 }
202
203 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
204 {
205 SimdCapabilities::none()
206 }
207}
208
209#[derive(Clone, Copy, Debug, PartialEq, Eq)]
214pub enum TransformImpl {
215 Avx512,
217 Avx2,
219 Neon,
221 Scalar,
223}
224
225#[must_use]
234pub fn select_transform_impl() -> TransformImpl {
235 let caps = detect_simd();
236
237 if caps.has_avx512() {
238 TransformImpl::Avx512
239 } else if caps.has_avx2() {
240 TransformImpl::Avx2
241 } else if caps.has_neon() {
242 TransformImpl::Neon
243 } else {
244 TransformImpl::Scalar
245 }
246}
247
248static SCALAR_INSTANCE: ScalarFallback = ScalarFallback;
250
251#[cfg(target_arch = "x86_64")]
252static AVX2_INSTANCE: Avx2Simd = Avx2Simd;
253
254#[cfg(target_arch = "x86_64")]
255static AVX512_INSTANCE: Avx512Simd = Avx512Simd;
256
257#[cfg(target_arch = "aarch64")]
258static NEON_INSTANCE: NeonSimd = NeonSimd;
259
260#[must_use]
270pub fn get_simd() -> &'static dyn SimdOps {
271 #[cfg(target_arch = "x86_64")]
272 {
273 if Avx512Simd::is_available() {
274 return &AVX512_INSTANCE;
275 } else if Avx2Simd::is_available() {
276 return &AVX2_INSTANCE;
277 }
278 }
279
280 #[cfg(target_arch = "aarch64")]
281 {
282 if NeonSimd::is_available() {
283 return &NEON_INSTANCE;
284 }
285 }
286
287 &SCALAR_INSTANCE
288}
289
290#[must_use]
299pub fn get_simd_ext() -> &'static dyn SimdOpsExt {
300 #[cfg(target_arch = "x86_64")]
301 {
302 if Avx512Simd::is_available() {
303 return &AVX512_INSTANCE;
304 } else if Avx2Simd::is_available() {
305 return &AVX2_INSTANCE;
306 }
307 }
308
309 #[cfg(target_arch = "aarch64")]
310 {
311 if NeonSimd::is_available() {
312 return &NEON_INSTANCE;
313 }
314 }
315
316 &SCALAR_INSTANCE
317}
318
319#[deprecated(
325 since = "0.1.0",
326 note = "Use &SCALAR_INSTANCE or ScalarFallback directly"
327)]
328#[must_use]
329pub fn scalar_simd() -> &'static ScalarFallback {
330 &SCALAR_INSTANCE
331}
332
333#[deprecated(since = "0.1.0", note = "Use detect_simd() instead")]
335#[must_use]
336pub fn detect_capabilities() -> SimdCapabilities {
337 detect_simd()
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343
344 #[test]
345 fn test_detect_simd() {
346 let caps = detect_simd();
347
348 let level = caps.best_level();
350 assert!(!level.is_empty());
351
352 assert!(get_simd().is_available());
354 }
355
356 #[test]
357 fn test_simd_capabilities() {
358 let caps = SimdCapabilities::none();
359 assert!(!caps.has_avx2());
360 assert!(!caps.has_avx512());
361 assert!(!caps.has_neon());
362 assert_eq!(caps.best_level(), "scalar");
363 }
364
365 #[test]
366 fn test_get_simd() {
367 let simd = get_simd();
368 assert!(simd.is_available());
369
370 let name = simd.name();
372 assert!(
373 name == "scalar" || name == "avx2" || name == "avx512" || name == "neon",
374 "Unexpected SIMD name: {}",
375 name
376 );
377 }
378
379 #[test]
380 fn test_get_simd_ext() {
381 let simd = get_simd_ext();
382 assert!(simd.is_available());
383 }
384
385 #[test]
386 fn test_select_transform_impl() {
387 let impl_type = select_transform_impl();
388
389 match impl_type {
391 TransformImpl::Avx512
392 | TransformImpl::Avx2
393 | TransformImpl::Neon
394 | TransformImpl::Scalar => {}
395 }
396 }
397
398 #[test]
399 fn test_module_reexports() {
400 let _v = I16x8::zero();
402 let _v = I32x4::zero();
403 let _v = U8x16::zero();
404
405 let _ops = sad_ops();
406 let _ops = blend_ops();
407 let _ops = dct_ops();
408 let _ops = filter_ops();
409 }
410
411 #[test]
412 fn test_architecture_specific() {
413 let _scalar = ScalarFallback::new();
415
416 #[cfg(target_arch = "x86_64")]
417 {
418 let _avx2 = Avx2Simd::new();
419 let _avx512 = Avx512Simd::new();
420 }
421
422 #[cfg(target_arch = "aarch64")]
423 {
424 let _neon = NeonSimd::new();
425 }
426 }
427
428 #[test]
429 fn test_codec_specific_types() {
430 use crate::simd::scalar::ScalarFallback;
432
433 let simd = ScalarFallback::new();
434
435 let _transform = TransformSimd::new(simd);
437 let _loop_filter = LoopFilterSimd::new(simd);
438 let _cdef = CdefSimd::new(simd);
439 let _intra = IntraPredSimd::new(simd);
440 let _motion = MotionCompSimd::new(simd);
441
442 let _vp9_dct = Vp9DctSimd::new(simd);
444 let _vp9_interp = Vp9InterpolateSimd::new(simd);
445 let _vp9_intra = Vp9IntraPredSimd::new(simd);
446 let _vp9_lf = Vp9LoopFilterSimd::new(simd);
447 }
448
449 #[test]
450 fn test_integration_sad() {
451 let sad = sad_ops();
452
453 let src = [128u8; 64];
455 let ref_block = [128u8; 64];
456
457 let result = sad.sad_8x8(&src, 8, &ref_block, 8);
458 assert_eq!(result, 0);
459 }
460
461 #[test]
462 fn test_integration_blend() {
463 let blend = blend_ops();
464
465 let result = blend.lerp_u8(0, 255, 128);
467 assert!(result >= 126 && result <= 130);
468 }
469
470 #[test]
471 fn test_integration_dct() {
472 let dct = dct_ops();
473
474 let input = [100i16; 16];
476 let mut dct_out = [0i16; 16];
477 let mut reconstructed = [0i16; 16];
478
479 dct.forward_dct_4x4(&input, &mut dct_out);
480 dct.inverse_dct_4x4(&dct_out, &mut reconstructed);
481
482 for i in 0..16 {
484 let diff = (input[i] - reconstructed[i]).abs();
485 assert!(
486 diff <= 2,
487 "DCT mismatch at {}: {} vs {}",
488 i,
489 input[i],
490 reconstructed[i]
491 );
492 }
493 }
494
495 #[test]
496 fn test_integration_filter() {
497 let filter = filter_ops();
498
499 let src = [128u8; 16];
501 let mut dst = [0u8; 15];
502
503 filter.filter_h_2tap(&src, &mut dst, 15);
504
505 for &v in &dst {
506 assert_eq!(v, 128);
507 }
508 }
509}