use core::num::Wrapping;
const CONST_BITS: usize = 13;
const PASS1_BITS: usize = 2;
const FIX_0_298631336: Wrapping<i32> = Wrapping(2446);
const FIX_0_390180644: Wrapping<i32> = Wrapping(3196);
const FIX_0_541196100: Wrapping<i32> = Wrapping(4433);
const FIX_0_765366865: Wrapping<i32> = Wrapping(6270);
const FIX_0_899976223: Wrapping<i32> = Wrapping(7373);
const FIX_1_175875602: Wrapping<i32> = Wrapping(9633);
const FIX_1_501321110: Wrapping<i32> = Wrapping(12299);
const FIX_1_847759065: Wrapping<i32> = Wrapping(15137);
const FIX_1_961570560: Wrapping<i32> = Wrapping(16069);
const FIX_2_053119869: Wrapping<i32> = Wrapping(16819);
const FIX_2_562915447: Wrapping<i32> = Wrapping(20995);
const FIX_3_072711026: Wrapping<i32> = Wrapping(25172);
pub(crate) fn idct_islow(input: &[i16; 64], output: &mut [u8; 64]) {
let mut work = [Wrapping(0i32); 64];
if input[32..].iter().all(|&coeff| coeff == 0) {
for col in 0..8 {
idct_1d_column_bottom_half_zero(input, &mut work, col);
}
} else {
for col in 0..8 {
idct_1d_column(input, &mut work, col);
}
}
for row in 0..8 {
idct_1d_row::<u8>(&work, output, row);
}
}
pub(crate) fn idct_islow_bottom_half_zero(input: &[i16; 64], output: &mut [u8; 64]) {
let mut work = [Wrapping(0i32); 64];
for col in 0..8 {
idct_1d_column_bottom_half_zero(input, &mut work, col);
}
for row in 0..8 {
idct_1d_row::<u8>(&work, output, row);
}
}
pub(crate) fn idct_islow_dc_only(dc_coeff: i16, output: &mut [u8; 64]) {
output.fill(idct_islow_dc_only_pixel(dc_coeff));
}
pub(crate) fn idct_islow_dc_only_pixel(dc_coeff: i16) -> u8 {
idct_islow_dc_only_sample::<u8>(dc_coeff)
}
pub(crate) fn idct_islow_12bit(input: &[i16; 64], output: &mut [u16; 64]) {
let mut work = [Wrapping(0i32); 64];
if input[32..].iter().all(|&coeff| coeff == 0) {
for col in 0..8 {
idct_1d_column_bottom_half_zero(input, &mut work, col);
}
} else {
for col in 0..8 {
idct_1d_column(input, &mut work, col);
}
}
for row in 0..8 {
idct_1d_row::<u16>(&work, output, row);
}
}
pub(crate) fn idct_islow_12bit_dc_only_sample(dc_coeff: i16) -> u16 {
idct_islow_dc_only_sample::<u16>(dc_coeff)
}
trait IdctSample: Copy {
const LEVEL_SHIFT: i32;
const MAX: i32;
fn from_clamped_i32(value: i32) -> Self;
}
impl IdctSample for u8 {
const LEVEL_SHIFT: i32 = 128;
const MAX: i32 = 255;
fn from_clamped_i32(value: i32) -> Self {
value as Self
}
}
impl IdctSample for u16 {
const LEVEL_SHIFT: i32 = 2048;
const MAX: i32 = 4095;
fn from_clamped_i32(value: i32) -> Self {
value as Self
}
}
fn idct_islow_dc_only_sample<T: IdctSample>(dc_coeff: i16) -> T {
let level_shifted = ((i32::from(dc_coeff) + 4) >> 3).wrapping_add(T::LEVEL_SHIFT);
T::from_clamped_i32(level_shifted.clamp(0, T::MAX))
}
fn idct_1d_column(input: &[i16; 64], work: &mut [Wrapping<i32>; 64], col: usize) {
let p0 = Wrapping(input[col] as i32);
let p1 = Wrapping(input[col + 8] as i32);
let p2 = Wrapping(input[col + 16] as i32);
let p3 = Wrapping(input[col + 24] as i32);
let p4 = Wrapping(input[col + 32] as i32);
let p5 = Wrapping(input[col + 40] as i32);
let p6 = Wrapping(input[col + 48] as i32);
let p7 = Wrapping(input[col + 56] as i32);
if p1.0 == 0 && p2.0 == 0 && p3.0 == 0 && p4.0 == 0 && p5.0 == 0 && p6.0 == 0 && p7.0 == 0 {
let dc = p0 << PASS1_BITS;
work[col] = dc;
work[col + 8] = dc;
work[col + 16] = dc;
work[col + 24] = dc;
work[col + 32] = dc;
work[col + 40] = dc;
work[col + 48] = dc;
work[col + 56] = dc;
return;
}
let z2 = p2;
let z3 = p6;
let z1 = (z2 + z3) * FIX_0_541196100;
let tmp2 = z1 + z3 * (-FIX_1_847759065);
let tmp3 = z1 + z2 * FIX_0_765366865;
let z2 = p0;
let z3 = p4;
let tmp0 = (z2 + z3) << CONST_BITS;
let tmp1 = (z2 - z3) << CONST_BITS;
let tmp10 = tmp0 + tmp3;
let tmp13 = tmp0 - tmp3;
let tmp11 = tmp1 + tmp2;
let tmp12 = tmp1 - tmp2;
let tmp0 = p7;
let tmp1 = p5;
let tmp2 = p3;
let tmp3 = p1;
let z1 = tmp0 + tmp3;
let z2 = tmp1 + tmp2;
let z3 = tmp0 + tmp2;
let z4 = tmp1 + tmp3;
let z5 = (z3 + z4) * FIX_1_175875602;
let tmp0 = tmp0 * FIX_0_298631336;
let tmp1 = tmp1 * FIX_2_053119869;
let tmp2 = tmp2 * FIX_3_072711026;
let tmp3 = tmp3 * FIX_1_501321110;
let z1 = z1 * (-FIX_0_899976223);
let z2 = z2 * (-FIX_2_562915447);
let z3 = z3 * (-FIX_1_961570560);
let z4 = z4 * (-FIX_0_390180644);
let z3 = z3 + z5;
let z4 = z4 + z5;
let tmp0 = tmp0 + z1 + z3;
let tmp1 = tmp1 + z2 + z4;
let tmp2 = tmp2 + z2 + z3;
let tmp3 = tmp3 + z1 + z4;
let shift = CONST_BITS - PASS1_BITS;
let rounding = Wrapping(1i32 << (shift - 1));
work[col] = descale(tmp10 + tmp3 + rounding, shift);
work[col + 56] = descale(tmp10 - tmp3 + rounding, shift);
work[col + 8] = descale(tmp11 + tmp2 + rounding, shift);
work[col + 48] = descale(tmp11 - tmp2 + rounding, shift);
work[col + 16] = descale(tmp12 + tmp1 + rounding, shift);
work[col + 40] = descale(tmp12 - tmp1 + rounding, shift);
work[col + 24] = descale(tmp13 + tmp0 + rounding, shift);
work[col + 32] = descale(tmp13 - tmp0 + rounding, shift);
}
fn idct_1d_column_bottom_half_zero(input: &[i16; 64], work: &mut [Wrapping<i32>; 64], col: usize) {
let p0 = Wrapping(input[col] as i32);
let p1 = Wrapping(input[col + 8] as i32);
let p2 = Wrapping(input[col + 16] as i32);
let p3 = Wrapping(input[col + 24] as i32);
if p1.0 == 0 && p2.0 == 0 && p3.0 == 0 {
let dc = p0 << PASS1_BITS;
work[col] = dc;
work[col + 8] = dc;
work[col + 16] = dc;
work[col + 24] = dc;
work[col + 32] = dc;
work[col + 40] = dc;
work[col + 48] = dc;
work[col + 56] = dc;
return;
}
let z1 = p2 * FIX_0_541196100;
let tmp2 = z1;
let tmp3 = z1 + p2 * FIX_0_765366865;
let tmp0 = p0 << CONST_BITS;
let tmp1 = p0 << CONST_BITS;
let tmp10 = tmp0 + tmp3;
let tmp13 = tmp0 - tmp3;
let tmp11 = tmp1 + tmp2;
let tmp12 = tmp1 - tmp2;
let z5 = (p1 + p3) * FIX_1_175875602;
let z1 = p1 * (-FIX_0_899976223);
let z2 = p3 * (-FIX_2_562915447);
let z3 = p3 * (-FIX_1_961570560) + z5;
let z4 = p1 * (-FIX_0_390180644) + z5;
let tmp0 = z1 + z3;
let tmp1 = z2 + z4;
let tmp2 = p3 * FIX_3_072711026 + z2 + z3;
let tmp3 = p1 * FIX_1_501321110 + z1 + z4;
let shift = CONST_BITS - PASS1_BITS;
let rounding = Wrapping(1i32 << (shift - 1));
work[col] = descale(tmp10 + tmp3 + rounding, shift);
work[col + 56] = descale(tmp10 - tmp3 + rounding, shift);
work[col + 8] = descale(tmp11 + tmp2 + rounding, shift);
work[col + 48] = descale(tmp11 - tmp2 + rounding, shift);
work[col + 16] = descale(tmp12 + tmp1 + rounding, shift);
work[col + 40] = descale(tmp12 - tmp1 + rounding, shift);
work[col + 24] = descale(tmp13 + tmp0 + rounding, shift);
work[col + 32] = descale(tmp13 - tmp0 + rounding, shift);
}
fn descale(v: Wrapping<i32>, shift: usize) -> Wrapping<i32> {
Wrapping(v.0 >> shift)
}
fn idct_1d_row<T: IdctSample>(work: &[Wrapping<i32>; 64], output: &mut [T; 64], row: usize) {
let base = row * 8;
let p0 = work[base];
let p1 = work[base + 1];
let p2 = work[base + 2];
let p3 = work[base + 3];
let p4 = work[base + 4];
let p5 = work[base + 5];
let p6 = work[base + 6];
let p7 = work[base + 7];
let shift = CONST_BITS + PASS1_BITS + 3;
let rounding = Wrapping(1i32 << (shift - 1));
if p1.0 == 0 && p2.0 == 0 && p3.0 == 0 && p4.0 == 0 && p5.0 == 0 && p6.0 == 0 && p7.0 == 0 {
let dc_shift = PASS1_BITS + 3;
let rounding_dc = Wrapping(1i32 << (dc_shift - 1));
let pixel = descale_and_clamp::<T>(p0 + rounding_dc, dc_shift);
for i in 0..8 {
output[base + i] = pixel;
}
return;
}
let z2 = p2;
let z3 = p6;
let z1 = (z2 + z3) * FIX_0_541196100;
let tmp2 = z1 + z3 * (-FIX_1_847759065);
let tmp3 = z1 + z2 * FIX_0_765366865;
let tmp0 = (p0 + p4) << CONST_BITS;
let tmp1 = (p0 - p4) << CONST_BITS;
let tmp10 = tmp0 + tmp3;
let tmp13 = tmp0 - tmp3;
let tmp11 = tmp1 + tmp2;
let tmp12 = tmp1 - tmp2;
let tmp0 = p7;
let tmp1 = p5;
let tmp2 = p3;
let tmp3 = p1;
let z1 = tmp0 + tmp3;
let z2 = tmp1 + tmp2;
let z3 = tmp0 + tmp2;
let z4 = tmp1 + tmp3;
let z5 = (z3 + z4) * FIX_1_175875602;
let tmp0 = tmp0 * FIX_0_298631336;
let tmp1 = tmp1 * FIX_2_053119869;
let tmp2 = tmp2 * FIX_3_072711026;
let tmp3 = tmp3 * FIX_1_501321110;
let z1 = z1 * (-FIX_0_899976223);
let z2 = z2 * (-FIX_2_562915447);
let z3 = z3 * (-FIX_1_961570560);
let z4 = z4 * (-FIX_0_390180644);
let z3 = z3 + z5;
let z4 = z4 + z5;
let tmp0 = tmp0 + z1 + z3;
let tmp1 = tmp1 + z2 + z4;
let tmp2 = tmp2 + z2 + z3;
let tmp3 = tmp3 + z1 + z4;
output[base] = descale_and_clamp::<T>(tmp10 + tmp3 + rounding, shift);
output[base + 7] = descale_and_clamp::<T>(tmp10 - tmp3 + rounding, shift);
output[base + 1] = descale_and_clamp::<T>(tmp11 + tmp2 + rounding, shift);
output[base + 6] = descale_and_clamp::<T>(tmp11 - tmp2 + rounding, shift);
output[base + 2] = descale_and_clamp::<T>(tmp12 + tmp1 + rounding, shift);
output[base + 5] = descale_and_clamp::<T>(tmp12 - tmp1 + rounding, shift);
output[base + 3] = descale_and_clamp::<T>(tmp13 + tmp0 + rounding, shift);
output[base + 4] = descale_and_clamp::<T>(tmp13 - tmp0 + rounding, shift);
}
fn descale_and_clamp<T: IdctSample>(value: Wrapping<i32>, shift: usize) -> T {
let shifted = value.0 >> shift;
let level_shifted = shifted.wrapping_add(T::LEVEL_SHIFT);
T::from_clamped_i32(level_shifted.clamp(0, T::MAX))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn all_zero_input_produces_level_shifted_gray_block() {
let input = [0i16; 64];
let mut output = [0u8; 64];
idct_islow(&input, &mut output);
for (i, &px) in output.iter().enumerate() {
assert_eq!(px, 128, "pixel {i} = {px}, expected 128");
}
}
#[test]
fn dc_only_input_produces_uniform_block() {
let mut input = [0i16; 64];
input[0] = 8 * 8;
let mut output = [0u8; 64];
idct_islow(&input, &mut output);
for &px in &output {
assert!((px as i32 - 136).abs() <= 1, "got {px}");
}
}
fn exact_idct_pixel(coeffs: &[i16; 64], x: usize, y: usize) -> f64 {
use core::f64::consts::PI;
let alpha = |k: usize| {
if k == 0 {
(1.0_f64 / 8.0).sqrt()
} else {
(2.0_f64 / 8.0).sqrt()
}
};
let cos_term = |sample: usize, freq: usize| -> f64 {
let s = f64::from(u8::try_from(sample).unwrap());
let f = f64::from(u8::try_from(freq).unwrap());
(((2.0 * s) + 1.0) * f * PI / 16.0).cos()
};
let mut acc = 0.0;
for v in 0..8 {
for u in 0..8 {
acc += alpha(u)
* alpha(v)
* f64::from(coeffs[v * 8 + u])
* cos_term(x, u)
* cos_term(y, v);
}
}
acc
}
fn exact_islow_reference(coeffs: &[i16; 64]) -> [u8; 64] {
let mut out = [0u8; 64];
for y in 0..8 {
for x in 0..8 {
let value = exact_idct_pixel(coeffs, x, y) + 128.0;
out[y * 8 + x] = value.round().clamp(0.0, 255.0) as u8;
}
}
out
}
fn next_coeff(state: &mut u64) -> i16 {
*state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
((*state >> 40) & 0x1ff) as i32 as i16 - 256
}
#[test]
fn islow_matches_exact_idct_within_one_lsb() {
let mut state = 0x0bad_c0de_1234_5678u64;
for _ in 0..256 {
let mut coeffs = [0i16; 64];
for c in &mut coeffs {
*c = next_coeff(&mut state);
}
coeffs[0] = i16::try_from(i32::from(coeffs[0]) / 4).unwrap();
let mut got = [0u8; 64];
idct_islow(&coeffs, &mut got);
let want = exact_islow_reference(&coeffs);
for i in 0..64 {
assert!(
(i32::from(got[i]) - i32::from(want[i])).abs() <= 1,
"pixel {i}: islow={} exact={} (coeffs {coeffs:?})",
got[i],
want[i]
);
}
}
}
#[test]
fn islow_dc_only_matches_closed_form() {
for dc in [-512i16, -200, -64, 8, 64, 200, 512] {
let mut coeffs = [0i16; 64];
coeffs[0] = dc;
let mut got = [0u8; 64];
idct_islow(&coeffs, &mut got);
let expected = ((f64::from(dc) / 8.0) + 128.0).round().clamp(0.0, 255.0) as u8;
for &px in &got {
assert!(
(i32::from(px) - i32::from(expected)).abs() <= 1,
"dc={dc}: px={px} expected={expected}"
);
}
}
}
#[test]
fn dc_only_helper_matches_full_idct() {
let mut input = [0i16; 64];
input[0] = 73;
let mut full = [0u8; 64];
let mut fast = [0u8; 64];
idct_islow(&input, &mut full);
idct_islow_dc_only(input[0], &mut fast);
assert_eq!(fast, full);
assert_eq!(idct_islow_dc_only_pixel(input[0]), full[0]);
}
#[test]
fn clamps_extreme_coefficients_into_0_255() {
let mut input = [0i16; 64];
input[0] = i16::MAX;
let mut output = [0u8; 64];
idct_islow(&input, &mut output);
assert!(output.iter().all(|&px| px == 255));
let mut input = [0i16; 64];
input[0] = i16::MIN;
let mut output = [0u8; 64];
idct_islow(&input, &mut output);
assert!(output.iter().all(|&px| px == 0));
}
#[test]
fn roundtrip_identity_basis_reconstructs_8x8_impulse() {
let mut input = [0i16; 64];
input[1] = 400;
let mut output = [0u8; 64];
idct_islow(&input, &mut output);
let left = output[0] as i32;
let right = output[7] as i32;
assert!(
(left - right).abs() > 40,
"AC[1] basis should produce horizontal variation, got L={left} R={right}"
);
}
#[test]
fn does_not_panic_on_extreme_adversarial_coefficients() {
let input = [i16::MAX; 64];
let mut output = [0u8; 64];
idct_islow(&input, &mut output);
let _ = output;
}
}