use super::constants::*;
use super::forward::dct1d_16;
use super::inverse::{idct1d_4, idct1d_8};
use crate::vardct::common::{as_array_mut, as_array_ref};
pub fn dct1d_32(mem: &mut [f32]) {
let mut tmp = [0.0f32; 32];
for i in 0..16 {
tmp[i] = mem[i] + mem[31 - i];
}
for i in 0..16 {
tmp[16 + i] = mem[i] - mem[31 - i];
}
dct1d_16(&mut tmp[0..16]);
for i in 0..16 {
tmp[16 + i] *= WC_MULTIPLIERS_32[i];
}
dct1d_16(&mut tmp[16..32]);
tmp[16] = SQRT2 * tmp[16] + tmp[17];
for i in 1..15 {
tmp[16 + i] += tmp[16 + i + 1];
}
for i in 0..16 {
mem[2 * i] = tmp[i];
mem[2 * i + 1] = tmp[16 + i];
}
}
pub fn dct_32x32(input: &[f32; 1024], output: &mut [f32; 1024]) {
jxl_simd::dct_32x32(input, output);
}
pub fn dc_from_dct_32x32(coeffs: &[f32; 1024]) -> [f32; 16] {
let mut block = [0.0f32; 16];
for iy in 0..4 {
for ix in 0..4 {
block[iy * 4 + ix] = coeffs[iy * 32 + ix]
* DCT_RESAMPLE_SCALE_32_TO_4[iy]
* DCT_RESAMPLE_SCALE_32_TO_4[ix]
* 16.0; }
}
for iy in 0..4 {
idct1d_4(&mut block[iy * 4..(iy + 1) * 4]);
}
let mut transposed = [0.0f32; 16];
for iy in 0..4 {
for ix in 0..4 {
transposed[ix * 4 + iy] = block[iy * 4 + ix];
}
}
for iy in 0..4 {
idct1d_4(&mut transposed[iy * 4..(iy + 1) * 4]);
}
transposed
}
pub fn dct_32x16(input: &[f32; 512], output: &mut [f32; 512]) {
jxl_simd::dct_32x16(input, output);
}
pub fn dct_16x32(input: &[f32; 512], output: &mut [f32; 512]) {
jxl_simd::dct_16x32(input, output);
}
pub fn dc_from_dct_32x16(coeffs: &[f32; 512]) -> [f32; 8] {
let mut block = [0.0f32; 8];
for iy in 0..2 {
for ix in 0..4 {
block[iy * 4 + ix] = coeffs[iy * 32 + ix]
* DCT_RESAMPLE_SCALE_16_TO_2[iy]
* DCT_RESAMPLE_SCALE_32_TO_4[ix]
* 4.0;
}
}
idct1d_4(&mut block[0..4]);
idct1d_4(&mut block[4..8]);
let mut transposed = [0.0f32; 8];
for iy in 0..2 {
for ix in 0..4 {
transposed[ix * 2 + iy] = block[iy * 4 + ix];
}
}
for iy in 0..4 {
let a = transposed[iy * 2];
let b = transposed[iy * 2 + 1];
transposed[iy * 2] = a + b;
transposed[iy * 2 + 1] = a - b;
}
transposed
}
pub fn dc_from_dct_16x32(coeffs: &[f32; 512]) -> [f32; 8] {
let mut block = [0.0f32; 8];
for iy in 0..2 {
for ix in 0..4 {
block[iy * 4 + ix] = coeffs[iy * 32 + ix]
* DCT_RESAMPLE_SCALE_16_TO_2[iy]
* DCT_RESAMPLE_SCALE_32_TO_4[ix]
* 4.0;
}
}
idct1d_4(&mut block[0..4]);
idct1d_4(&mut block[4..8]);
let mut transposed = [0.0f32; 8];
for iy in 0..2 {
for ix in 0..4 {
transposed[ix * 2 + iy] = block[iy * 4 + ix];
}
}
for iy in 0..4 {
let a = transposed[iy * 2];
let b = transposed[iy * 2 + 1];
transposed[iy * 2] = a + b;
transposed[iy * 2 + 1] = a - b;
}
let mut result = [0.0f32; 8];
for iy in 0..4 {
for ix in 0..2 {
result[ix * 4 + iy] = transposed[iy * 2 + ix];
}
}
result
}
pub fn dct1d_64(mem: &mut [f32]) {
let mut tmp = [0.0f32; 64];
for i in 0..32 {
tmp[i] = mem[i] + mem[63 - i];
}
for i in 0..32 {
tmp[32 + i] = mem[i] - mem[63 - i];
}
dct1d_32(&mut tmp[0..32]);
for i in 0..32 {
tmp[32 + i] *= WC_MULTIPLIERS_64[i];
}
dct1d_32(&mut tmp[32..64]);
tmp[32] = SQRT2 * tmp[32] + tmp[33];
for i in 1..31 {
tmp[32 + i] += tmp[32 + i + 1];
}
for i in 0..32 {
mem[2 * i] = tmp[i];
mem[2 * i + 1] = tmp[32 + i];
}
}
pub fn dct_64x64(input: &[f32], output: &mut [f32]) {
debug_assert!(input.len() >= 4096);
debug_assert!(output.len() >= 4096);
jxl_simd::dct_64x64(as_array_ref(input, 0), as_array_mut(output, 0));
}
pub fn dct_64x32(input: &[f32], output: &mut [f32]) {
debug_assert!(input.len() >= 2048);
debug_assert!(output.len() >= 2048);
jxl_simd::dct_64x32(as_array_ref(input, 0), as_array_mut(output, 0));
}
pub fn dct_32x64(input: &[f32], output: &mut [f32]) {
debug_assert!(input.len() >= 2048);
debug_assert!(output.len() >= 2048);
jxl_simd::dct_32x64(as_array_ref(input, 0), as_array_mut(output, 0));
}
pub fn dc_from_dct_64x64(coeffs: &[f32]) -> [f32; 64] {
debug_assert!(coeffs.len() >= 4096);
let mut block = [0.0f32; 64];
for iy in 0..8 {
for ix in 0..8 {
block[iy * 8 + ix] = coeffs[iy * 64 + ix]
* DCT_RESAMPLE_SCALE_64_TO_8[iy]
* DCT_RESAMPLE_SCALE_64_TO_8[ix];
}
}
for iy in 0..8 {
idct1d_8(&mut block[iy * 8..(iy + 1) * 8]);
}
let mut transposed = [0.0f32; 64];
for iy in 0..8 {
for ix in 0..8 {
transposed[ix * 8 + iy] = block[iy * 8 + ix];
}
}
for iy in 0..8 {
idct1d_8(&mut transposed[iy * 8..(iy + 1) * 8]);
}
transposed
}
pub fn dc_from_dct_64x32(coeffs: &[f32]) -> [f32; 32] {
debug_assert!(coeffs.len() >= 2048);
let mut block = [0.0f32; 32];
for iy in 0..4 {
for ix in 0..8 {
block[iy * 8 + ix] = coeffs[iy * 64 + ix]
* DCT_RESAMPLE_SCALE_32_TO_4[iy]
* DCT_RESAMPLE_SCALE_64_TO_8[ix]
* 4.0;
}
}
for iy in 0..4 {
idct1d_8(&mut block[iy * 8..(iy + 1) * 8]);
}
let mut transposed = [0.0f32; 32];
for iy in 0..4 {
for ix in 0..8 {
transposed[ix * 4 + iy] = block[iy * 8 + ix];
}
}
for iy in 0..8 {
idct1d_4(&mut transposed[iy * 4..(iy + 1) * 4]);
}
transposed
}
pub fn dc_from_dct_32x64(coeffs: &[f32]) -> [f32; 32] {
debug_assert!(coeffs.len() >= 2048);
let mut block = [0.0f32; 32];
for iy in 0..4 {
for ix in 0..8 {
block[iy * 8 + ix] = coeffs[iy * 64 + ix]
* DCT_RESAMPLE_SCALE_32_TO_4[iy]
* DCT_RESAMPLE_SCALE_64_TO_8[ix]
* 4.0;
}
}
for iy in 0..4 {
idct1d_8(&mut block[iy * 8..(iy + 1) * 8]);
}
let mut transposed = [0.0f32; 32];
for iy in 0..4 {
for ix in 0..8 {
transposed[ix * 4 + iy] = block[iy * 8 + ix];
}
}
for iy in 0..8 {
idct1d_4(&mut transposed[iy * 4..(iy + 1) * 4]);
}
let mut result = [0.0f32; 32];
for iy in 0..8 {
for ix in 0..4 {
result[ix * 8 + iy] = transposed[iy * 4 + ix];
}
}
result
}