use std::num::NonZeroUsize;
use v_frame::pixel::Pixel;
use crate::data::{block::BlockSize, plane::PlaneRegion};
macro_rules! declare_asm_dist_fn {
($(($name: ident, $T: ident)),+) => (
$(
unsafe extern "C" { fn $name (
src: *const $T, src_stride: isize, dst: *const $T, dst_stride: isize
) -> u32; }
)+
)
}
declare_asm_dist_fn![
(avsc_satd_4x4_avx2, u8),
(avsc_satd_4x8_avx2, u8),
(avsc_satd_4x16_avx2, u8),
(avsc_satd_8x4_avx2, u8),
(avsc_satd_8x8_avx2, u8),
(avsc_satd_8x16_avx2, u8),
(avsc_satd_8x32_avx2, u8),
(avsc_satd_16x4_avx2, u8),
(avsc_satd_16x8_avx2, u8),
(avsc_satd_16x16_avx2, u8),
(avsc_satd_16x32_avx2, u8),
(avsc_satd_16x64_avx2, u8),
(avsc_satd_32x8_avx2, u8),
(avsc_satd_32x16_avx2, u8),
(avsc_satd_32x32_avx2, u8),
(avsc_satd_32x64_avx2, u8),
(avsc_satd_64x16_avx2, u8),
(avsc_satd_64x32_avx2, u8),
(avsc_satd_64x64_avx2, u8),
(avsc_satd_64x128_avx2, u8),
(avsc_satd_128x64_avx2, u8),
(avsc_satd_128x128_avx2, u8),
(avsc_satd_4x4_hbd_avx2, u16),
(avsc_satd_4x8_hbd_avx2, u16),
(avsc_satd_4x16_hbd_avx2, u16),
(avsc_satd_8x4_hbd_avx2, u16),
(avsc_satd_8x8_hbd_avx2, u16),
(avsc_satd_8x16_hbd_avx2, u16),
(avsc_satd_8x32_hbd_avx2, u16),
(avsc_satd_16x4_hbd_avx2, u16),
(avsc_satd_16x8_hbd_avx2, u16),
(avsc_satd_16x16_hbd_avx2, u16),
(avsc_satd_16x32_hbd_avx2, u16),
(avsc_satd_16x64_hbd_avx2, u16),
(avsc_satd_32x8_hbd_avx2, u16),
(avsc_satd_32x16_hbd_avx2, u16),
(avsc_satd_32x32_hbd_avx2, u16),
(avsc_satd_32x64_hbd_avx2, u16),
(avsc_satd_64x16_hbd_avx2, u16),
(avsc_satd_64x32_hbd_avx2, u16),
(avsc_satd_64x64_hbd_avx2, u16),
(avsc_satd_64x128_hbd_avx2, u16),
(avsc_satd_128x64_hbd_avx2, u16),
(avsc_satd_128x128_hbd_avx2, u16)
];
#[target_feature(enable = "avx2")]
pub(super) fn get_satd_internal<T: Pixel>(
src: &PlaneRegion<'_, T>,
dst: &PlaneRegion<'_, T>,
w: NonZeroUsize,
h: NonZeroUsize,
bit_depth: NonZeroUsize,
) -> u32 {
let bsize_opt = BlockSize::from_width_and_height_opt(w.get(), h.get());
match (bsize_opt, size_of::<T>()) {
(Err(_), _) => super::rust::get_satd_internal(src, dst, w, h, bit_depth),
(Ok(bsize), 1) => unsafe {
(match bsize {
BlockSize::BLOCK_4X4 => avsc_satd_4x4_avx2,
BlockSize::BLOCK_4X8 => avsc_satd_4x8_avx2,
BlockSize::BLOCK_4X16 => avsc_satd_4x16_avx2,
BlockSize::BLOCK_8X4 => avsc_satd_8x4_avx2,
BlockSize::BLOCK_16X4 => avsc_satd_16x4_avx2,
BlockSize::BLOCK_8X8 => avsc_satd_8x8_avx2,
BlockSize::BLOCK_8X16 => avsc_satd_8x16_avx2,
BlockSize::BLOCK_8X32 => avsc_satd_8x32_avx2,
BlockSize::BLOCK_16X8 => avsc_satd_16x8_avx2,
BlockSize::BLOCK_16X16 => avsc_satd_16x16_avx2,
BlockSize::BLOCK_16X32 => avsc_satd_16x32_avx2,
BlockSize::BLOCK_16X64 => avsc_satd_16x64_avx2,
BlockSize::BLOCK_32X8 => avsc_satd_32x8_avx2,
BlockSize::BLOCK_32X16 => avsc_satd_32x16_avx2,
BlockSize::BLOCK_32X32 => avsc_satd_32x32_avx2,
BlockSize::BLOCK_32X64 => avsc_satd_32x64_avx2,
BlockSize::BLOCK_64X16 => avsc_satd_64x16_avx2,
BlockSize::BLOCK_64X32 => avsc_satd_64x32_avx2,
BlockSize::BLOCK_64X64 => avsc_satd_64x64_avx2,
BlockSize::BLOCK_64X128 => avsc_satd_64x128_avx2,
BlockSize::BLOCK_128X64 => avsc_satd_128x64_avx2,
BlockSize::BLOCK_128X128 => avsc_satd_128x128_avx2,
})(
src.data_ptr() as *const _,
(size_of::<T>() * src.plane_cfg.stride.get()) as isize,
dst.data_ptr() as *const _,
(size_of::<T>() * dst.plane_cfg.stride.get()) as isize,
)
},
(Ok(bsize), 2) => unsafe {
(match bsize {
BlockSize::BLOCK_4X4 => avsc_satd_4x4_hbd_avx2,
BlockSize::BLOCK_4X8 => avsc_satd_4x8_hbd_avx2,
BlockSize::BLOCK_4X16 => avsc_satd_4x16_hbd_avx2,
BlockSize::BLOCK_8X4 => avsc_satd_8x4_hbd_avx2,
BlockSize::BLOCK_16X4 => avsc_satd_16x4_hbd_avx2,
BlockSize::BLOCK_8X8 => avsc_satd_8x8_hbd_avx2,
BlockSize::BLOCK_8X16 => avsc_satd_8x16_hbd_avx2,
BlockSize::BLOCK_8X32 => avsc_satd_8x32_hbd_avx2,
BlockSize::BLOCK_16X8 => avsc_satd_16x8_hbd_avx2,
BlockSize::BLOCK_16X16 => avsc_satd_16x16_hbd_avx2,
BlockSize::BLOCK_16X32 => avsc_satd_16x32_hbd_avx2,
BlockSize::BLOCK_16X64 => avsc_satd_16x64_hbd_avx2,
BlockSize::BLOCK_32X8 => avsc_satd_32x8_hbd_avx2,
BlockSize::BLOCK_32X16 => avsc_satd_32x16_hbd_avx2,
BlockSize::BLOCK_32X32 => avsc_satd_32x32_hbd_avx2,
BlockSize::BLOCK_32X64 => avsc_satd_32x64_hbd_avx2,
BlockSize::BLOCK_64X16 => avsc_satd_64x16_hbd_avx2,
BlockSize::BLOCK_64X32 => avsc_satd_64x32_hbd_avx2,
BlockSize::BLOCK_64X64 => avsc_satd_64x64_hbd_avx2,
BlockSize::BLOCK_64X128 => avsc_satd_64x128_hbd_avx2,
BlockSize::BLOCK_128X64 => avsc_satd_128x64_hbd_avx2,
BlockSize::BLOCK_128X128 => avsc_satd_128x128_hbd_avx2,
})(
src.data_ptr() as *const _,
(size_of::<T>() * src.plane_cfg.stride.get()) as isize,
dst.data_ptr() as *const _,
(size_of::<T>() * dst.plane_cfg.stride.get()) as isize,
)
},
_ => unreachable!(),
}
}