use crate::tiling::PlaneRegionMut;
use crate::util::*;
use std::mem::MaybeUninit;
pub type InvTxfmFunc =
unsafe extern fn(*mut u8, libc::ptrdiff_t, *mut i16, i32);
pub type InvTxfmHBDFunc =
unsafe extern fn(*mut u16, libc::ptrdiff_t, *mut i16, i32, i32);
pub fn call_inverse_func<T: Pixel>(
func: InvTxfmFunc, input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>,
eob: u16, width: usize, height: usize, bd: usize,
) {
debug_assert!(bd == 8);
let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];
let mut copied = Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();
for (a, b) in copied.data.iter_mut().zip(input) {
a.write(*b);
}
unsafe {
func(
output.data_ptr_mut() as *mut _,
output.plane_cfg.stride as isize,
copied.data.as_mut_ptr() as *mut _,
eob as i32 - 1,
);
}
}
pub fn call_inverse_hbd_func<T: Pixel>(
func: InvTxfmHBDFunc, input: &[T::Coeff],
output: &mut PlaneRegionMut<'_, T>, eob: u16, width: usize, height: usize,
bd: usize,
) {
let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];
let mut copied = Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();
for (a, b) in copied.data.iter_mut().zip(input) {
a.write(*b);
}
unsafe {
func(
output.data_ptr_mut() as *mut _,
T::to_asm_stride(output.plane_cfg.stride),
copied.data.as_mut_ptr() as *mut _,
eob as i32 - 1,
(1 << bd) - 1,
);
}
}
#[cfg(test)]
pub mod test {
use super::*;
use crate::context::av1_get_coded_tx_size;
use crate::cpu_features::CpuFeatureLevel;
use crate::frame::{AsRegion, Plane};
use crate::scan_order::av1_scan_orders;
use crate::transform::TxSize::*;
use crate::transform::*;
use rand::{random, rng, Rng};
pub fn pick_eob<T: Coefficient>(
coeffs: &mut [T], tx_size: TxSize, tx_type: TxType, sub_h: usize,
) -> u16 {
let coeff_h = av1_get_coded_tx_size(tx_size).height();
let sub_high: usize = if sub_h > 0 { sub_h * 8 - 1 } else { 0 };
let sub_low: usize = if sub_h > 1 { sub_high - 8 } else { 0 };
let mut eob = 0u16;
let mut exit = 0;
let scan = av1_scan_orders[tx_size][tx_type].scan;
for (i, &pos) in scan.iter().enumerate() {
exit = i as u16;
let rc = pos as usize;
let rcx = rc % coeff_h;
let rcy = rc / coeff_h;
if rcx > sub_high || rcy > sub_high {
break;
} else if eob == 0 && (rcx > sub_low || rcy > sub_low) {
eob = i as u16;
}
}
if eob != 0 {
eob += rng().random_range(0..(exit - eob).min(1));
}
for &pos in scan.iter().skip(usize::from(eob)) {
coeffs[pos as usize] = T::cast_from(0);
}
eob + 1
}
pub fn test_transform<T: Pixel>(
tx_size: TxSize, tx_type: TxType, bit_depth: usize, cpu: CpuFeatureLevel,
) {
let sub_h_iterations: usize = match tx_size.height().max(tx_size.width()) {
4 => 2,
8 => 2,
16 => 3,
32 | 64 => 4,
_ => unreachable!(),
};
for sub_h in 0..sub_h_iterations {
let mut src_storage = [T::zero(); 64 * 64];
let src = &mut src_storage[..tx_size.area()];
let mut dst = Plane::from_slice(
&[T::zero(); 64 * 64][..tx_size.area()],
tx_size.width(),
);
let mut res = Aligned::<[MaybeUninit<i16>; 64 * 64]>::uninit_array();
let res = &mut res.data[..tx_size.area()];
let mut freq =
Aligned::<[MaybeUninit<T::Coeff>; 64 * 64]>::uninit_array();
let freq = &mut freq.data[..tx_size.area()];
for ((r, s), d) in
res.iter_mut().zip(src.iter_mut()).zip(dst.data.iter_mut())
{
*s = T::cast_from(random::<u16>() >> (16 - bit_depth));
*d = T::cast_from(random::<u16>() >> (16 - bit_depth));
r.write(i16::cast_from(*s) - i16::cast_from(*d));
}
let res = unsafe { slice_assume_init_mut(res) };
forward_transform(
res,
freq,
tx_size.width(),
tx_size,
tx_type,
bit_depth,
CpuFeatureLevel::RUST,
);
let freq = unsafe { slice_assume_init_mut(freq) };
let eob: u16 = pick_eob(freq, tx_size, tx_type, sub_h);
let mut rust_dst = dst.clone();
inverse_transform_add(
freq,
&mut dst.as_region_mut(),
eob,
tx_size,
tx_type,
bit_depth,
cpu,
);
inverse_transform_add(
freq,
&mut rust_dst.as_region_mut(),
eob,
tx_size,
tx_type,
bit_depth,
CpuFeatureLevel::RUST,
);
assert_eq!(rust_dst.data_origin(), dst.data_origin());
}
}
macro_rules! test_itx_fns {
([$([$(($ENUM:expr, $TYPE1:ident, $TYPE2:ident)),*]),*], $W:expr, $H:expr) => {
paste::item! {
$(
$(
#[test]
fn [<inv_txfm2d_add_ $TYPE2 _$TYPE1 _$W x $H>]() {
for &cpu in
&CpuFeatureLevel::all()[..=CpuFeatureLevel::default().as_index()]
{
test_transform::<u8>([<TX_ $W X $H>], $ENUM, 8, cpu);
test_transform::<u16>([<TX_ $W X $H>], $ENUM, 10, cpu);
test_transform::<u16>([<TX_ $W X $H>], $ENUM, 12, cpu);
}
}
)*
)*
}
};
($TYPES_VALID:tt, [$(($W:expr, $H:expr)),*]) => {
$(
test_itx_fns!($TYPES_VALID, $W, $H);
)*
};
($TYPES64:tt, $DIMS64:tt, $TYPES32:tt, $DIMS32:tt, $TYPES16:tt, $DIMS16:tt,
$TYPES84:tt, $DIMS84:tt, $TYPES4:tt, $DIMS4:tt) => {
test_itx_fns!([$TYPES64], $DIMS64);
test_itx_fns!([$TYPES64, $TYPES32], $DIMS32);
test_itx_fns!([$TYPES64, $TYPES32, $TYPES16], $DIMS16);
test_itx_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84], $DIMS84
);
test_itx_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4], $DIMS4
);
};
}
test_itx_fns!(
[(TxType::DCT_DCT, dct, dct)],
[(64, 64), (64, 32), (32, 64), (16, 64), (64, 16)],
[(TxType::IDTX, identity, identity)],
[(32, 32), (32, 16), (16, 32), (32, 8), (8, 32)],
[
(TxType::DCT_ADST, dct, adst),
(TxType::ADST_DCT, adst, dct),
(TxType::DCT_FLIPADST, dct, flipadst),
(TxType::FLIPADST_DCT, flipadst, dct),
(TxType::V_DCT, dct, identity),
(TxType::H_DCT, identity, dct),
(TxType::ADST_ADST, adst, adst),
(TxType::ADST_FLIPADST, adst, flipadst),
(TxType::FLIPADST_ADST, flipadst, adst),
(TxType::FLIPADST_FLIPADST, flipadst, flipadst)
],
[(16, 16)],
[
(TxType::V_ADST, adst, identity),
(TxType::H_ADST, identity, adst),
(TxType::V_FLIPADST, flipadst, identity),
(TxType::H_FLIPADST, identity, flipadst)
],
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8)],
[(TxType::WHT_WHT, wht, wht)],
[(4, 4)]
);
}