#![allow(unsafe_code)]
pub mod scalar;
pub mod traits;
pub mod types;
pub mod arm;
pub mod x86;
pub mod av1;
pub mod vp9;
pub mod blend;
pub mod dct;
pub mod filter;
pub mod sad;
pub mod pixel_convert;
pub mod yuv_convert;
pub use blend::{blend_ops, BlendOps};
pub use dct::{dct_ops, DctOps};
pub use filter::{filter_ops, FilterOps};
pub use sad::{sad_ops, SadOps};
pub use traits::{SimdOps, SimdOpsExt, SimdSelector};
pub use types::{I16x16, I16x8, I32x4, I32x8, U8x16, U8x32};
pub use arm::NeonSimd;
pub use scalar::ScalarFallback;
pub use x86::{Avx2Simd, Avx512Simd};
pub use av1::{CdefSimd, IntraPredSimd, LoopFilterSimd, MotionCompSimd, TransformSimd};
pub use vp9::{Vp9DctSimd, Vp9InterpolateSimd, Vp9IntraPredSimd, Vp9LoopFilterSimd};
#[derive(Clone, Copy, Debug, Default)]
#[allow(clippy::struct_excessive_bools)]
pub struct SimdCapabilities {
pub avx2: bool,
pub avx512: bool,
pub neon: bool,
}
impl SimdCapabilities {
#[must_use]
pub const fn none() -> Self {
Self {
avx2: false,
avx512: false,
neon: false,
}
}
#[inline]
#[must_use]
pub const fn has_avx2(&self) -> bool {
self.avx2
}
#[inline]
#[must_use]
pub const fn has_avx512(&self) -> bool {
self.avx512
}
#[inline]
#[must_use]
pub const fn has_neon(&self) -> bool {
self.neon
}
#[must_use]
pub const fn best_level(&self) -> &'static str {
if self.avx512 {
"avx512"
} else if self.avx2 {
"avx2"
} else if self.neon {
"neon"
} else {
"scalar"
}
}
}
#[must_use]
pub fn detect_simd() -> SimdCapabilities {
#[cfg(target_arch = "x86_64")]
{
SimdCapabilities {
avx2: is_x86_feature_detected!("avx2"),
avx512: is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("avx512bw")
&& is_x86_feature_detected!("avx512dq"),
neon: false,
}
}
#[cfg(target_arch = "aarch64")]
{
SimdCapabilities {
avx2: false,
avx512: false,
neon: true,
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
SimdCapabilities::none()
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TransformImpl {
Avx512,
Avx2,
Neon,
Scalar,
}
#[must_use]
pub fn select_transform_impl() -> TransformImpl {
let caps = detect_simd();
if caps.has_avx512() {
TransformImpl::Avx512
} else if caps.has_avx2() {
TransformImpl::Avx2
} else if caps.has_neon() {
TransformImpl::Neon
} else {
TransformImpl::Scalar
}
}
static SCALAR_INSTANCE: ScalarFallback = ScalarFallback;
#[cfg(target_arch = "x86_64")]
static AVX2_INSTANCE: Avx2Simd = Avx2Simd;
#[cfg(target_arch = "x86_64")]
static AVX512_INSTANCE: Avx512Simd = Avx512Simd;
#[cfg(target_arch = "aarch64")]
static NEON_INSTANCE: NeonSimd = NeonSimd;
#[must_use]
pub fn get_simd() -> &'static dyn SimdOps {
#[cfg(target_arch = "x86_64")]
{
if Avx512Simd::is_available() {
return &AVX512_INSTANCE;
} else if Avx2Simd::is_available() {
return &AVX2_INSTANCE;
}
}
#[cfg(target_arch = "aarch64")]
{
if NeonSimd::is_available() {
return &NEON_INSTANCE;
}
}
&SCALAR_INSTANCE
}
#[must_use]
pub fn get_simd_ext() -> &'static dyn SimdOpsExt {
#[cfg(target_arch = "x86_64")]
{
if Avx512Simd::is_available() {
return &AVX512_INSTANCE;
} else if Avx2Simd::is_available() {
return &AVX2_INSTANCE;
}
}
#[cfg(target_arch = "aarch64")]
{
if NeonSimd::is_available() {
return &NEON_INSTANCE;
}
}
&SCALAR_INSTANCE
}
#[deprecated(
since = "0.1.0",
note = "Use &SCALAR_INSTANCE or ScalarFallback directly"
)]
#[must_use]
pub fn scalar_simd() -> &'static ScalarFallback {
&SCALAR_INSTANCE
}
#[deprecated(since = "0.1.0", note = "Use detect_simd() instead")]
#[must_use]
pub fn detect_capabilities() -> SimdCapabilities {
detect_simd()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_simd() {
let caps = detect_simd();
let level = caps.best_level();
assert!(!level.is_empty());
assert!(get_simd().is_available());
}
#[test]
fn test_simd_capabilities() {
let caps = SimdCapabilities::none();
assert!(!caps.has_avx2());
assert!(!caps.has_avx512());
assert!(!caps.has_neon());
assert_eq!(caps.best_level(), "scalar");
}
#[test]
fn test_get_simd() {
let simd = get_simd();
assert!(simd.is_available());
let name = simd.name();
assert!(
name == "scalar" || name == "avx2" || name == "avx512" || name == "neon",
"Unexpected SIMD name: {}",
name
);
}
#[test]
fn test_get_simd_ext() {
let simd = get_simd_ext();
assert!(simd.is_available());
}
#[test]
fn test_select_transform_impl() {
let impl_type = select_transform_impl();
match impl_type {
TransformImpl::Avx512
| TransformImpl::Avx2
| TransformImpl::Neon
| TransformImpl::Scalar => {}
}
}
#[test]
fn test_module_reexports() {
let _v = I16x8::zero();
let _v = I32x4::zero();
let _v = U8x16::zero();
let _ops = sad_ops();
let _ops = blend_ops();
let _ops = dct_ops();
let _ops = filter_ops();
}
#[test]
fn test_architecture_specific() {
let _scalar = ScalarFallback::new();
#[cfg(target_arch = "x86_64")]
{
let _avx2 = Avx2Simd::new();
let _avx512 = Avx512Simd::new();
}
#[cfg(target_arch = "aarch64")]
{
let _neon = NeonSimd::new();
}
}
#[test]
fn test_codec_specific_types() {
use crate::simd::scalar::ScalarFallback;
let simd = ScalarFallback::new();
let _transform = TransformSimd::new(simd);
let _loop_filter = LoopFilterSimd::new(simd);
let _cdef = CdefSimd::new(simd);
let _intra = IntraPredSimd::new(simd);
let _motion = MotionCompSimd::new(simd);
let _vp9_dct = Vp9DctSimd::new(simd);
let _vp9_interp = Vp9InterpolateSimd::new(simd);
let _vp9_intra = Vp9IntraPredSimd::new(simd);
let _vp9_lf = Vp9LoopFilterSimd::new(simd);
}
#[test]
fn test_integration_sad() {
let sad = sad_ops();
let src = [128u8; 64];
let ref_block = [128u8; 64];
let result = sad.sad_8x8(&src, 8, &ref_block, 8);
assert_eq!(result, 0);
}
#[test]
fn test_integration_blend() {
let blend = blend_ops();
let result = blend.lerp_u8(0, 255, 128);
assert!(result >= 126 && result <= 130);
}
#[test]
fn test_integration_dct() {
let dct = dct_ops();
let input = [100i16; 16];
let mut dct_out = [0i16; 16];
let mut reconstructed = [0i16; 16];
dct.forward_dct_4x4(&input, &mut dct_out);
dct.inverse_dct_4x4(&dct_out, &mut reconstructed);
for i in 0..16 {
let diff = (input[i] - reconstructed[i]).abs();
assert!(
diff <= 2,
"DCT mismatch at {}: {} vs {}",
i,
input[i],
reconstructed[i]
);
}
}
#[test]
fn test_integration_filter() {
let filter = filter_ops();
let src = [128u8; 16];
let mut dst = [0u8; 15];
filter.filter_h_2tap(&src, &mut dst, 15);
for &v in &dst {
assert_eq!(v, 128);
}
}
}