use crate::tiling::PlaneRegionMut;
use crate::util::*;
pub type InvTxfmFunc =
unsafe extern fn(*mut u8, libc::ptrdiff_t, *mut i16, i32);
#[cfg(asm_neon)]
pub type InvTxfmHBDFunc =
unsafe extern fn(*mut u16, libc::ptrdiff_t, *mut i16, i32);
pub fn call_inverse_func<T: Pixel>(
func: InvTxfmFunc, input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>,
eob: usize, width: usize, height: usize, bd: usize,
) {
debug_assert!(bd == 8);
let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];
let mut copied: Aligned<[T::Coeff; 32 * 32]> = Aligned::uninitialized();
for (a, b) in copied.data.iter_mut().zip(input) {
*a = *b;
}
unsafe {
func(
output.data_ptr_mut() as *mut _,
output.plane_cfg.stride as isize,
copied.data.as_mut_ptr() as *mut _,
eob as i32,
);
}
}
#[cfg(asm_neon)]
pub fn call_inverse_hbd_func<T: Pixel>(
func: InvTxfmHBDFunc, input: &[T::Coeff],
output: &mut PlaneRegionMut<'_, T>, eob: usize, width: usize, height: usize,
bd: usize,
) {
debug_assert!(bd == 10);
let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];
let mut copied: Aligned<[T::Coeff; 32 * 32]> = Aligned::uninitialized();
for (a, b) in copied.data.iter_mut().zip(input) {
*a = *b;
}
unsafe {
func(
output.data_ptr_mut() as *mut _,
T::to_asm_stride(output.plane_cfg.stride) as isize,
copied.data.as_mut_ptr() as *mut _,
eob as i32,
);
}
}
#[cfg(test)]
pub mod test {
use super::*;
use crate::context::av1_get_coded_tx_size;
use crate::cpu_features::CpuFeatureLevel;
use crate::frame::{AsRegion, Plane};
use crate::scan_order::av1_scan_orders;
use crate::transform::*;
use rand::{random, thread_rng, Rng};
pub fn pick_eob<T: Coefficient>(
coeffs: &mut [T], tx_size: TxSize, tx_type: TxType, sub_h: usize,
) -> usize {
let coeff_h = av1_get_coded_tx_size(tx_size).height();
let sub_high: usize = if sub_h > 0 { sub_h * 8 - 1 } else { 0 };
let sub_low: usize = if sub_h > 1 { sub_high - 8 } else { 0 };
let mut eob = 0;
let mut exit = 0;
let scan = av1_scan_orders[tx_size as usize][tx_type as usize].scan;
for (i, &pos) in scan.iter().enumerate() {
exit = i;
let rc = pos as usize;
let rcx = rc % coeff_h;
let rcy = rc / coeff_h;
if rcx > sub_high || rcy > sub_high {
break;
} else if eob == 0 && (rcx > sub_low || rcy > sub_low) {
eob = i;
}
}
if eob != 0 {
eob += thread_rng().gen_range(0, (exit - eob).min(1));
}
for &pos in scan.iter().skip(eob + 1) {
coeffs[pos as usize] = T::cast_from(0);
}
eob
}
pub fn test_transform(
tx_size: TxSize, tx_type: TxType, cpu: CpuFeatureLevel,
) {
let sub_h_iterations: usize = match tx_size.height().max(tx_size.width()) {
4 => 2,
8 => 2,
16 => 3,
32 | 64 => 4,
_ => unreachable!(),
};
for sub_h in 0..sub_h_iterations {
let mut src_storage = [0u8; 64 * 64];
let src = &mut src_storage[..tx_size.area()];
let mut dst =
Plane::from_slice(&vec![0u8; tx_size.area()], tx_size.width());
let mut res_storage: Aligned<[i16; 64 * 64]> = Aligned::uninitialized();
let res = &mut res_storage.data[..tx_size.area()];
let mut freq_storage: Aligned<[i16; 64 * 64]> = Aligned::uninitialized();
let freq = &mut freq_storage.data[..tx_size.area()];
for ((r, s), d) in
res.iter_mut().zip(src.iter_mut()).zip(dst.data.iter_mut())
{
*s = random::<u8>();
*d = random::<u8>();
*r = i16::from(*s) - i16::from(*d);
}
forward_transform(
res,
freq,
tx_size.width(),
tx_size,
tx_type,
8,
CpuFeatureLevel::RUST,
);
let eob: usize = pick_eob(freq, tx_size, tx_type, sub_h);
let mut rust_dst = dst.clone();
inverse_transform_add(
freq,
&mut dst.as_region_mut(),
eob,
tx_size,
tx_type,
8,
cpu,
);
inverse_transform_add(
freq,
&mut rust_dst.as_region_mut(),
eob,
tx_size,
tx_type,
8,
CpuFeatureLevel::RUST,
);
assert_eq!(rust_dst.data_origin(), dst.data_origin());
}
}
}