#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use crate::context::av1_get_coded_tx_size;
use crate::cpu_features::CpuFeatureLevel;
use crate::quantize::*;
use crate::transform::TxSize;
use crate::util::*;
type DequantizeFn = unsafe fn(
qindex: u8,
coeffs_ptr: *const i16,
_eob: usize,
rcoeffs_ptr: *mut i16,
tx_size: TxSize,
bit_depth: usize,
dc_delta_q: i8,
ac_delta_q: i8,
);
cpu_function_lookup_table!(
DEQUANTIZE_FNS: [Option<DequantizeFn>],
default: None,
[(AVX2, Some(dequantize_avx2))]
);
#[inline(always)]
pub fn dequantize<T: Coefficient>(
qindex: u8, coeffs: &[T], eob: usize, rcoeffs: &mut [T], tx_size: TxSize,
bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8, cpu: CpuFeatureLevel,
) {
let call_rust = |rcoeffs: &mut [T]| {
crate::quantize::rust::dequantize(
qindex, coeffs, eob, rcoeffs, tx_size, bit_depth, dc_delta_q,
ac_delta_q, cpu,
);
};
#[cfg(any(feature = "check_asm", test))]
let ref_rcoeffs = {
let area = av1_get_coded_tx_size(tx_size).area();
let mut copy = vec![T::cast_from(0); area];
copy[..].copy_from_slice(&rcoeffs[..area]);
call_rust(&mut copy);
copy
};
match T::Pixel::type_enum() {
PixelType::U8 => {
if let Some(func) = DEQUANTIZE_FNS[cpu.as_index()] {
unsafe {
(func)(
qindex,
coeffs.as_ptr() as *const _,
eob,
rcoeffs.as_mut_ptr() as *mut _,
tx_size,
bit_depth,
dc_delta_q,
ac_delta_q,
)
}
} else {
call_rust(rcoeffs)
}
}
PixelType::U16 => call_rust(rcoeffs),
}
#[cfg(any(feature = "check_asm", test))]
{
let area = av1_get_coded_tx_size(tx_size).area();
assert_eq!(&rcoeffs[..area], &ref_rcoeffs[..]);
}
}
#[target_feature(enable = "avx2")]
unsafe fn dequantize_avx2(
qindex: u8, coeffs_ptr: *const i16, _eob: usize, rcoeffs_ptr: *mut i16,
tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
) {
let log_tx_scale = _mm256_set1_epi32(get_log_tx_scale(tx_size) as i32);
let quants_ac =
_mm256_set1_epi32(ac_q(qindex, ac_delta_q, bit_depth) as i32);
let mut quants = _mm256_insert_epi32(
quants_ac,
dc_q(qindex, dc_delta_q, bit_depth) as i32,
0,
);
let area: usize = av1_get_coded_tx_size(tx_size).area();
let step: usize = 16;
assert!(area >= step);
let mut coeffs_ptr: *const i16 = coeffs_ptr;
let mut rcoeffs_ptr: *mut i16 = rcoeffs_ptr;
for _i in (0..area).step_by(step) {
let coeffs = _mm256_load_si256(coeffs_ptr as *const _);
let coeffs_abs = _mm256_abs_epi16(coeffs);
let rcoeffs = _mm256_sign_epi16(
_mm256_packs_epi32(
_mm256_srlv_epi32(
_mm256_madd_epi16(
quants,
_mm256_unpacklo_epi16(coeffs_abs, _mm256_setzero_si256()),
),
log_tx_scale,
),
_mm256_srlv_epi32(
_mm256_madd_epi16(
quants_ac,
_mm256_unpackhi_epi16(coeffs_abs, _mm256_setzero_si256()),
),
log_tx_scale,
),
),
coeffs,
);
_mm256_store_si256(rcoeffs_ptr as *mut _, rcoeffs);
quants = quants_ac;
coeffs_ptr = coeffs_ptr.add(step);
rcoeffs_ptr = rcoeffs_ptr.add(step);
}
}
#[cfg(test)]
mod test {
use super::*;
use rand::{thread_rng, Rng};
#[test]
fn dequantize_test() {
let mut rng = thread_rng();
use TxSize::*;
let tx_sizes = [
TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64, TX_4X8, TX_8X4, TX_8X16,
TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16, TX_16X4,
TX_8X32, TX_32X8, TX_16X64, TX_64X16,
];
let bd: usize = 8;
for &tx_size in &tx_sizes {
let qindex: u8 = rng.gen_range(MINQ as u8, MAXQ as u8);
let dc_quant: i16 = dc_q(qindex, 0, bd);
let ac_quant: i16 = ac_q(qindex, 0, bd);
let eobs = {
let mut out = [0usize; 16];
let area: usize = av1_get_coded_tx_size(tx_size).area();
out[0] = 0;
out[1] = area;
for eob in out.iter_mut().skip(2) {
*eob = rng.gen_range(0, area);
}
out
};
for &eob in &eobs {
let mut qcoeffs = Aligned::new([0i16; 32 * 32]);
let mut rcoeffs = Aligned::new([0i16; 32 * 32]);
for (i, qcoeff) in qcoeffs.data.iter_mut().enumerate().take(eob) {
*qcoeff =
rng.gen::<i16>() / if i == 0 { dc_quant } else { ac_quant };
}
dequantize(
qindex,
&qcoeffs.data,
eob,
&mut rcoeffs.data,
tx_size,
bd,
0,
0,
CpuFeatureLevel::default(),
);
}
}
}
}