#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use crate::context::av1_get_coded_tx_size;
use crate::cpu_features::CpuFeatureLevel;
use crate::quantize::*;
use crate::transform::TxSize;
use crate::util::*;
use std::mem::MaybeUninit;
type DequantizeFn = unsafe fn(
qindex: u8,
coeffs_ptr: *const i16,
_eob: u16,
rcoeffs_ptr: *mut i16,
tx_size: TxSize,
bit_depth: usize,
dc_delta_q: i8,
ac_delta_q: i8,
);
cpu_function_lookup_table!(
DEQUANTIZE_FNS: [Option<DequantizeFn>],
default: None,
[(AVX2, Some(dequantize_avx2))]
);
#[inline(always)]
pub fn dequantize<T: Coefficient>(
qindex: u8, coeffs: &[T], eob: u16, rcoeffs: &mut [MaybeUninit<T>],
tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
cpu: CpuFeatureLevel,
) {
let call_rust = |rcoeffs: &mut [MaybeUninit<T>]| {
crate::quantize::rust::dequantize(
qindex, coeffs, eob, rcoeffs, tx_size, bit_depth, dc_delta_q,
ac_delta_q, cpu,
);
};
#[cfg(any(feature = "check_asm", test))]
let mut ref_rcoeffs = {
let area = av1_get_coded_tx_size(tx_size).area();
let mut copy = vec![MaybeUninit::new(T::cast_from(0)); area];
call_rust(&mut copy);
copy
};
match T::Pixel::type_enum() {
PixelType::U8 => {
if let Some(func) = DEQUANTIZE_FNS[cpu.as_index()] {
unsafe {
(func)(
qindex,
coeffs.as_ptr() as *const _,
eob,
rcoeffs.as_mut_ptr() as *mut _,
tx_size,
bit_depth,
dc_delta_q,
ac_delta_q,
)
}
} else {
call_rust(rcoeffs)
}
}
PixelType::U16 => call_rust(rcoeffs),
}
#[cfg(any(feature = "check_asm", test))]
{
let area = av1_get_coded_tx_size(tx_size).area();
let rcoeffs = unsafe { slice_assume_init_mut(&mut rcoeffs[..area]) };
let ref_rcoeffs = unsafe { slice_assume_init_mut(&mut ref_rcoeffs[..]) };
assert_eq!(rcoeffs, ref_rcoeffs);
}
}
#[target_feature(enable = "avx2")]
unsafe fn dequantize_avx2(
qindex: u8, coeffs_ptr: *const i16, _eob: u16, rcoeffs_ptr: *mut i16,
tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
) {
let log_tx_scale = _mm256_set1_epi32(get_log_tx_scale(tx_size) as i32);
let quants_ac =
_mm256_set1_epi32(ac_q(qindex, ac_delta_q, bit_depth).get() as i32);
let mut quants = _mm256_insert_epi32(
quants_ac,
dc_q(qindex, dc_delta_q, bit_depth).get() as i32,
0,
);
let area: usize = av1_get_coded_tx_size(tx_size).area();
let step: usize = 16;
assert!(area >= step);
let mut coeffs_ptr: *const i16 = coeffs_ptr;
let mut rcoeffs_ptr: *mut i16 = rcoeffs_ptr;
for _i in (0..area).step_by(step) {
let coeffs = _mm256_load_si256(coeffs_ptr as *const _);
let coeffs_abs = _mm256_abs_epi16(coeffs);
let rcoeffs = _mm256_sign_epi16(
_mm256_packs_epi32(
_mm256_srlv_epi32(
_mm256_madd_epi16(
quants,
_mm256_unpacklo_epi16(coeffs_abs, _mm256_setzero_si256()),
),
log_tx_scale,
),
_mm256_srlv_epi32(
_mm256_madd_epi16(
quants_ac,
_mm256_unpackhi_epi16(coeffs_abs, _mm256_setzero_si256()),
),
log_tx_scale,
),
),
coeffs,
);
_mm256_store_si256(rcoeffs_ptr as *mut _, rcoeffs);
quants = quants_ac;
coeffs_ptr = coeffs_ptr.add(step);
rcoeffs_ptr = rcoeffs_ptr.add(step);
}
}
#[cfg(test)]
mod test {
use super::*;
use rand::distr::{Distribution, Uniform};
use rand::{rng, Rng};
#[test]
fn dequantize_test() {
let mut rng = rng();
use TxSize::*;
let tx_sizes = [
TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64, TX_4X8, TX_8X4, TX_8X16,
TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16, TX_16X4,
TX_8X32, TX_32X8, TX_16X64, TX_64X16,
];
let bd: usize = 8;
for &tx_size in &tx_sizes {
let qindex: u8 = rng.random_range((MINQ as u8)..(MAXQ as u8));
let dc_quant = dc_q(qindex, 0, bd).get() as i16;
let ac_quant = ac_q(qindex, 0, bd).get() as i16;
let eobs = {
let mut out = [0u16; 16];
let area: usize = av1_get_coded_tx_size(tx_size).area();
out[0] = 0;
out[1] = area as u16;
for eob in out.iter_mut().skip(2) {
*eob = rng.random_range(0..area as u16);
}
out
};
for &eob in &eobs {
let mut qcoeffs = Aligned::new([0i16; 32 * 32]);
let mut rcoeffs = Aligned::new([MaybeUninit::new(0i16); 32 * 32]);
let between = Uniform::new_inclusive(-i16::MAX, i16::MAX).unwrap();
for (i, qcoeff) in
qcoeffs.data.iter_mut().enumerate().take(eob as usize)
{
*qcoeff = between.sample(&mut rng)
/ if i == 0 { dc_quant } else { ac_quant };
}
dequantize(
qindex,
&qcoeffs.data,
eob,
&mut rcoeffs.data,
tx_size,
bd,
0,
0,
CpuFeatureLevel::default(),
);
}
}
}
}