use super::constants::*;
#[inline]
pub fn dct1d_2(mem: &mut [f32]) {
let in1 = mem[0];
let in2 = mem[1];
mem[0] = in1 + in2;
mem[1] = in1 - in2;
}
#[inline(always)]
fn dct1d_4_val(a: f32, b: f32, c: f32, d: f32) -> [f32; 4] {
let t0 = a + d;
let t1 = b + c;
let t2 = a - d;
let t3 = b - c;
let u0 = t0 + t1;
let u1 = t0 - t1;
let v0 = t2 * WC_MULTIPLIERS_4[0];
let v1 = t3 * WC_MULTIPLIERS_4[1];
let w0 = v0 + v1;
let w1 = v0 - v1;
let b0 = SQRT2 * w0 + w1;
[u0, b0, u1, w1]
}
#[inline(always)]
pub fn dct1d_4(mem: &mut [f32]) {
let r = dct1d_4_val(mem[0], mem[1], mem[2], mem[3]);
mem[0] = r[0];
mem[1] = r[1];
mem[2] = r[2];
mem[3] = r[3];
}
#[inline(always)]
fn dct1d_8_val(m: [f32; 8]) -> [f32; 8] {
let t0 = m[0] + m[7];
let t1 = m[1] + m[6];
let t2 = m[2] + m[5];
let t3 = m[3] + m[4];
let t4 = m[0] - m[7];
let t5 = m[1] - m[6];
let t6 = m[2] - m[5];
let t7 = m[3] - m[4];
let r0 = dct1d_4_val(t0, t1, t2, t3);
let w4 = t4 * WC_MULTIPLIERS_8[0];
let w5 = t5 * WC_MULTIPLIERS_8[1];
let w6 = t6 * WC_MULTIPLIERS_8[2];
let w7 = t7 * WC_MULTIPLIERS_8[3];
let r1 = dct1d_4_val(w4, w5, w6, w7);
let b0 = SQRT2 * r1[0] + r1[1];
let b1 = r1[1] + r1[2];
let b2 = r1[2] + r1[3];
let b3 = r1[3];
[r0[0], b0, r0[1], b1, r0[2], b2, r0[3], b3]
}
pub fn dct1d_8(mem: &mut [f32]) {
let r = dct1d_8_val([
mem[0], mem[1], mem[2], mem[3], mem[4], mem[5], mem[6], mem[7],
]);
mem[..8].copy_from_slice(&r);
}
pub fn dct1d_16(mem: &mut [f32]) {
let mut tmp = [0.0f32; 16];
for i in 0..8 {
tmp[i] = mem[i] + mem[15 - i];
}
for i in 0..8 {
tmp[8 + i] = mem[i] - mem[15 - i];
}
dct1d_8(&mut tmp[0..8]);
for i in 0..8 {
tmp[8 + i] *= WC_MULTIPLIERS_16[i];
}
dct1d_8(&mut tmp[8..16]);
tmp[8] = SQRT2 * tmp[8] + tmp[9];
for i in 1..7 {
tmp[8 + i] += tmp[8 + i + 1];
}
for i in 0..8 {
mem[2 * i] = tmp[i];
mem[2 * i + 1] = tmp[8 + i];
}
}
#[inline]
pub fn dct_8x8(input: &[f32; 64], output: &mut [f32; 64]) {
jxl_simd::dct_8x8(input, output);
}
#[inline(always)]
pub fn dct_4x8(input: &[f32; 32], output: &mut [f32; 32]) {
let mut temp = [0.0f32; 32];
for row in 0..4 {
let s = row * 8;
let r = dct1d_8_val([
input[s],
input[s + 1],
input[s + 2],
input[s + 3],
input[s + 4],
input[s + 5],
input[s + 6],
input[s + 7],
]);
for col in 0..8 {
temp[col * 4 + row] = r[col] * (1.0 / 8.0);
}
}
for row in 0..8 {
let s = row * 4;
let r = dct1d_4_val(temp[s], temp[s + 1], temp[s + 2], temp[s + 3]);
for col in 0..4 {
output[col * 8 + row] = r[col] * (1.0 / 4.0);
}
}
}
#[inline(always)]
pub fn dct_8x4(input: &[f32; 32], output: &mut [f32; 32]) {
let mut temp = [0.0f32; 32];
for row in 0..8 {
let s = row * 4;
let r = dct1d_4_val(input[s], input[s + 1], input[s + 2], input[s + 3]);
for col in 0..4 {
temp[col * 8 + row] = r[col] * (1.0 / 4.0);
}
}
for row in 0..4 {
let s = row * 8;
let r = dct1d_8_val([
temp[s],
temp[s + 1],
temp[s + 2],
temp[s + 3],
temp[s + 4],
temp[s + 5],
temp[s + 6],
temp[s + 7],
]);
for col in 0..8 {
output[row * 8 + col] = r[col] * (1.0 / 8.0);
}
}
}
#[inline(always)]
pub fn dct_4x8_full(input: &[f32; 64], output: &mut [f32; 64]) {
jxl_simd::dct_4x8_full(input, output);
}
#[inline(always)]
pub fn dct_8x4_full(input: &[f32; 64], output: &mut [f32; 64]) {
jxl_simd::dct_8x4_full(input, output);
}
#[inline]
pub fn dc_from_dct_4x8_full(coeffs: &[f32; 64]) -> f32 {
coeffs[0]
}
#[inline]
pub fn dc_from_dct_8x4_full(coeffs: &[f32; 64]) -> f32 {
coeffs[0]
}
#[inline(always)]
pub fn dct_4x4(input: &[f32; 16], output: &mut [f32; 16]) {
let mut temp = [0.0f32; 16];
for row in 0..4 {
let s = row * 4;
let r = dct1d_4_val(input[s], input[s + 1], input[s + 2], input[s + 3]);
for col in 0..4 {
temp[col * 4 + row] = r[col] * (1.0 / 4.0);
}
}
for row in 0..4 {
let s = row * 4;
let r = dct1d_4_val(temp[s], temp[s + 1], temp[s + 2], temp[s + 3]);
for col in 0..4 {
output[row * 4 + col] = r[col] * (1.0 / 4.0);
}
}
}
#[inline(always)]
pub fn dct_4x4_full(input: &[f32; 64], output: &mut [f32; 64]) {
jxl_simd::dct_4x4_full(input, output);
}
#[inline]
pub fn dc_from_dct_4x4_full(coeffs: &[f32; 64]) -> f32 {
coeffs[0]
}
#[inline]
pub fn dct_16x8(input: &[f32; 128], output: &mut [f32; 128]) {
jxl_simd::dct_16x8(input, output);
}
#[inline]
pub fn dct_8x16(input: &[f32; 128], output: &mut [f32; 128]) {
jxl_simd::dct_8x16(input, output);
}
#[inline]
pub fn dct_16x16(input: &[f32; 256], output: &mut [f32; 256]) {
jxl_simd::dct_16x16(input, output);
}
pub fn dc_from_dct_16x16(coeffs: &[f32; 256]) -> [f32; 4] {
let s0 = DCT_RESAMPLE_SCALE_16_TO_2[0]; let s1 = DCT_RESAMPLE_SCALE_16_TO_2[1];
let b00 = coeffs[0] * s0 * s0;
let b01 = coeffs[1] * s0 * s1;
let b10 = coeffs[16] * s1 * s0;
let b11 = coeffs[17] * s1 * s1;
let out00 = (b00 + b01) + (b10 + b11); let out01 = (b00 + b01) - (b10 + b11); let out10 = (b00 - b01) + (b10 - b11); let out11 = (b00 - b01) - (b10 - b11);
[out00, out01, out10, out11]
}