oximedia_codec/simd/
mod.rs1#![allow(unsafe_code)]
52
53pub mod scalar;
55pub mod traits;
56pub mod types;
57
58pub mod arm;
60pub mod x86;
61
62pub mod av1;
64pub mod vp9;
65
66pub mod blend;
68pub mod dct;
69pub mod filter;
70pub mod sad;
71
72pub mod pixel_convert;
74
75pub use blend::{blend_ops, BlendOps};
77pub use dct::{dct_ops, DctOps};
78pub use filter::{filter_ops, FilterOps};
79pub use sad::{sad_ops, SadOps};
80pub use traits::{SimdOps, SimdOpsExt, SimdSelector};
81pub use types::{I16x16, I16x8, I32x4, I32x8, U8x16, U8x32};
82
83pub use arm::NeonSimd;
85pub use scalar::ScalarFallback;
86pub use x86::{Avx2Simd, Avx512Simd};
87
88pub use av1::{CdefSimd, IntraPredSimd, LoopFilterSimd, MotionCompSimd, TransformSimd};
90pub use vp9::{Vp9DctSimd, Vp9InterpolateSimd, Vp9IntraPredSimd, Vp9LoopFilterSimd};
91
92#[derive(Clone, Copy, Debug, Default)]
101#[allow(clippy::struct_excessive_bools)]
102pub struct SimdCapabilities {
103 pub avx2: bool,
105
106 pub avx512: bool,
108
109 pub neon: bool,
111}
112
113impl SimdCapabilities {
114 #[must_use]
116 pub const fn none() -> Self {
117 Self {
118 avx2: false,
119 avx512: false,
120 neon: false,
121 }
122 }
123
124 #[inline]
126 #[must_use]
127 pub const fn has_avx2(&self) -> bool {
128 self.avx2
129 }
130
131 #[inline]
133 #[must_use]
134 pub const fn has_avx512(&self) -> bool {
135 self.avx512
136 }
137
138 #[inline]
140 #[must_use]
141 pub const fn has_neon(&self) -> bool {
142 self.neon
143 }
144
145 #[must_use]
147 pub const fn best_level(&self) -> &'static str {
148 if self.avx512 {
149 "avx512"
150 } else if self.avx2 {
151 "avx2"
152 } else if self.neon {
153 "neon"
154 } else {
155 "scalar"
156 }
157 }
158}
159
160#[must_use]
178pub fn detect_simd() -> SimdCapabilities {
179 #[cfg(target_arch = "x86_64")]
180 {
181 SimdCapabilities {
182 avx2: is_x86_feature_detected!("avx2"),
183 avx512: is_x86_feature_detected!("avx512f")
184 && is_x86_feature_detected!("avx512bw")
185 && is_x86_feature_detected!("avx512dq"),
186 neon: false,
187 }
188 }
189
190 #[cfg(target_arch = "aarch64")]
191 {
192 SimdCapabilities {
194 avx2: false,
195 avx512: false,
196 neon: true,
197 }
198 }
199
200 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
201 {
202 SimdCapabilities::none()
203 }
204}
205
206#[derive(Clone, Copy, Debug, PartialEq, Eq)]
211pub enum TransformImpl {
212 Avx512,
214 Avx2,
216 Neon,
218 Scalar,
220}
221
222#[must_use]
231pub fn select_transform_impl() -> TransformImpl {
232 let caps = detect_simd();
233
234 if caps.has_avx512() {
235 TransformImpl::Avx512
236 } else if caps.has_avx2() {
237 TransformImpl::Avx2
238 } else if caps.has_neon() {
239 TransformImpl::Neon
240 } else {
241 TransformImpl::Scalar
242 }
243}
244
245static SCALAR_INSTANCE: ScalarFallback = ScalarFallback;
247
248#[cfg(target_arch = "x86_64")]
249static AVX2_INSTANCE: Avx2Simd = Avx2Simd;
250
251#[cfg(target_arch = "x86_64")]
252static AVX512_INSTANCE: Avx512Simd = Avx512Simd;
253
254#[cfg(target_arch = "aarch64")]
255static NEON_INSTANCE: NeonSimd = NeonSimd;
256
257#[must_use]
267pub fn get_simd() -> &'static dyn SimdOps {
268 #[cfg(target_arch = "x86_64")]
269 {
270 if Avx512Simd::is_available() {
271 return &AVX512_INSTANCE;
272 } else if Avx2Simd::is_available() {
273 return &AVX2_INSTANCE;
274 }
275 }
276
277 #[cfg(target_arch = "aarch64")]
278 {
279 if NeonSimd::is_available() {
280 return &NEON_INSTANCE;
281 }
282 }
283
284 &SCALAR_INSTANCE
285}
286
287#[must_use]
296pub fn get_simd_ext() -> &'static dyn SimdOpsExt {
297 #[cfg(target_arch = "x86_64")]
298 {
299 if Avx512Simd::is_available() {
300 return &AVX512_INSTANCE;
301 } else if Avx2Simd::is_available() {
302 return &AVX2_INSTANCE;
303 }
304 }
305
306 #[cfg(target_arch = "aarch64")]
307 {
308 if NeonSimd::is_available() {
309 return &NEON_INSTANCE;
310 }
311 }
312
313 &SCALAR_INSTANCE
314}
315
316#[deprecated(
322 since = "0.1.0",
323 note = "Use &SCALAR_INSTANCE or ScalarFallback directly"
324)]
325#[must_use]
326pub fn scalar_simd() -> &'static ScalarFallback {
327 &SCALAR_INSTANCE
328}
329
330#[deprecated(since = "0.1.0", note = "Use detect_simd() instead")]
332#[must_use]
333pub fn detect_capabilities() -> SimdCapabilities {
334 detect_simd()
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 #[test]
342 fn test_detect_simd() {
343 let caps = detect_simd();
344
345 let level = caps.best_level();
347 assert!(!level.is_empty());
348
349 assert!(get_simd().is_available());
351 }
352
353 #[test]
354 fn test_simd_capabilities() {
355 let caps = SimdCapabilities::none();
356 assert!(!caps.has_avx2());
357 assert!(!caps.has_avx512());
358 assert!(!caps.has_neon());
359 assert_eq!(caps.best_level(), "scalar");
360 }
361
362 #[test]
363 fn test_get_simd() {
364 let simd = get_simd();
365 assert!(simd.is_available());
366
367 let name = simd.name();
369 assert!(
370 name == "scalar" || name == "avx2" || name == "avx512" || name == "neon",
371 "Unexpected SIMD name: {}",
372 name
373 );
374 }
375
376 #[test]
377 fn test_get_simd_ext() {
378 let simd = get_simd_ext();
379 assert!(simd.is_available());
380 }
381
382 #[test]
383 fn test_select_transform_impl() {
384 let impl_type = select_transform_impl();
385
386 match impl_type {
388 TransformImpl::Avx512
389 | TransformImpl::Avx2
390 | TransformImpl::Neon
391 | TransformImpl::Scalar => {}
392 }
393 }
394
395 #[test]
396 fn test_module_reexports() {
397 let _v = I16x8::zero();
399 let _v = I32x4::zero();
400 let _v = U8x16::zero();
401
402 let _ops = sad_ops();
403 let _ops = blend_ops();
404 let _ops = dct_ops();
405 let _ops = filter_ops();
406 }
407
408 #[test]
409 fn test_architecture_specific() {
410 let _scalar = ScalarFallback::new();
412
413 #[cfg(target_arch = "x86_64")]
414 {
415 let _avx2 = Avx2Simd::new();
416 let _avx512 = Avx512Simd::new();
417 }
418
419 #[cfg(target_arch = "aarch64")]
420 {
421 let _neon = NeonSimd::new();
422 }
423 }
424
425 #[test]
426 fn test_codec_specific_types() {
427 use crate::simd::scalar::ScalarFallback;
429
430 let simd = ScalarFallback::new();
431
432 let _transform = TransformSimd::new(simd);
434 let _loop_filter = LoopFilterSimd::new(simd);
435 let _cdef = CdefSimd::new(simd);
436 let _intra = IntraPredSimd::new(simd);
437 let _motion = MotionCompSimd::new(simd);
438
439 let _vp9_dct = Vp9DctSimd::new(simd);
441 let _vp9_interp = Vp9InterpolateSimd::new(simd);
442 let _vp9_intra = Vp9IntraPredSimd::new(simd);
443 let _vp9_lf = Vp9LoopFilterSimd::new(simd);
444 }
445
446 #[test]
447 fn test_integration_sad() {
448 let sad = sad_ops();
449
450 let src = [128u8; 64];
452 let ref_block = [128u8; 64];
453
454 let result = sad.sad_8x8(&src, 8, &ref_block, 8);
455 assert_eq!(result, 0);
456 }
457
458 #[test]
459 fn test_integration_blend() {
460 let blend = blend_ops();
461
462 let result = blend.lerp_u8(0, 255, 128);
464 assert!(result >= 126 && result <= 130);
465 }
466
467 #[test]
468 fn test_integration_dct() {
469 let dct = dct_ops();
470
471 let input = [100i16; 16];
473 let mut dct_out = [0i16; 16];
474 let mut reconstructed = [0i16; 16];
475
476 dct.forward_dct_4x4(&input, &mut dct_out);
477 dct.inverse_dct_4x4(&dct_out, &mut reconstructed);
478
479 for i in 0..16 {
481 let diff = (input[i] - reconstructed[i]).abs();
482 assert!(
483 diff <= 2,
484 "DCT mismatch at {}: {} vs {}",
485 i,
486 input[i],
487 reconstructed[i]
488 );
489 }
490 }
491
492 #[test]
493 fn test_integration_filter() {
494 let filter = filter_ops();
495
496 let src = [128u8; 16];
498 let mut dst = [0u8; 15];
499
500 filter.filter_h_2tap(&src, &mut dst, 15);
501
502 for &v in &dst {
503 assert_eq!(v, 128);
504 }
505 }
506}