use super::TxSize;
use super::TxType;
use super::HTX_TAB;
use super::VTX_TAB;
pub type TxfmShift = [i8; 3];
pub type TxfmShifts = [TxfmShift; 3];
const FWD_SHIFT_4X4: TxfmShifts = [[3, 0, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_8X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_16X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_32X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
const FWD_SHIFT_64X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
const FWD_SHIFT_4X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_8X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_8X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_16X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_16X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
const FWD_SHIFT_32X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
const FWD_SHIFT_32X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
const FWD_SHIFT_64X32: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
const FWD_SHIFT_4X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_16X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_8X32: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_32X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
const FWD_SHIFT_16X64: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
const FWD_SHIFT_64X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
pub const FWD_TXFM_SHIFT_LS: [TxfmShifts; TxSize::TX_SIZES_ALL] = [
FWD_SHIFT_4X4,
FWD_SHIFT_8X8,
FWD_SHIFT_16X16,
FWD_SHIFT_32X32,
FWD_SHIFT_64X64,
FWD_SHIFT_4X8,
FWD_SHIFT_8X4,
FWD_SHIFT_8X16,
FWD_SHIFT_16X8,
FWD_SHIFT_16X32,
FWD_SHIFT_32X16,
FWD_SHIFT_32X64,
FWD_SHIFT_64X32,
FWD_SHIFT_4X16,
FWD_SHIFT_16X4,
FWD_SHIFT_8X32,
FWD_SHIFT_32X8,
FWD_SHIFT_16X64,
FWD_SHIFT_64X16,
];
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum TxfmType {
DCT4,
DCT8,
DCT16,
DCT32,
DCT64,
ADST4,
ADST8,
ADST16,
Identity4,
Identity8,
Identity16,
Identity32,
Invalid,
}
impl TxfmType {
const TX_TYPES_1D: usize = 4;
const AV1_TXFM_TYPE_LS: [[TxfmType; Self::TX_TYPES_1D]; 5] = [
[TxfmType::DCT4, TxfmType::ADST4, TxfmType::ADST4, TxfmType::Identity4],
[TxfmType::DCT8, TxfmType::ADST8, TxfmType::ADST8, TxfmType::Identity8],
[
TxfmType::DCT16,
TxfmType::ADST16,
TxfmType::ADST16,
TxfmType::Identity16,
],
[
TxfmType::DCT32,
TxfmType::Invalid,
TxfmType::Invalid,
TxfmType::Identity32,
],
[TxfmType::DCT64, TxfmType::Invalid, TxfmType::Invalid, TxfmType::Invalid],
];
}
#[derive(Debug, Clone, Copy)]
pub struct Txfm2DFlipCfg {
pub tx_size: TxSize,
pub ud_flip: bool,
pub lr_flip: bool,
pub shift: TxfmShift,
pub txfm_type_col: TxfmType,
pub txfm_type_row: TxfmType,
}
impl Txfm2DFlipCfg {
pub fn fwd(tx_type: TxType, tx_size: TxSize, bd: usize) -> Self {
let tx_type_1d_col = VTX_TAB[tx_type as usize];
let tx_type_1d_row = HTX_TAB[tx_type as usize];
let txw_idx = tx_size.width_index();
let txh_idx = tx_size.height_index();
let txfm_type_col =
TxfmType::AV1_TXFM_TYPE_LS[txh_idx][tx_type_1d_col as usize];
let txfm_type_row =
TxfmType::AV1_TXFM_TYPE_LS[txw_idx][tx_type_1d_row as usize];
assert_ne!(txfm_type_col, TxfmType::Invalid);
assert_ne!(txfm_type_row, TxfmType::Invalid);
let (ud_flip, lr_flip) = Self::get_flip_cfg(tx_type);
Txfm2DFlipCfg {
tx_size,
ud_flip,
lr_flip,
shift: FWD_TXFM_SHIFT_LS[tx_size as usize][(bd - 8) / 2],
txfm_type_col,
txfm_type_row,
}
}
fn get_flip_cfg(tx_type: TxType) -> (bool, bool) {
use self::TxType::*;
match tx_type {
DCT_DCT | ADST_DCT | DCT_ADST | ADST_ADST | IDTX | V_DCT | H_DCT
| V_ADST | H_ADST => (false, false),
FLIPADST_DCT | FLIPADST_ADST | V_FLIPADST => (true, false),
DCT_FLIPADST | ADST_FLIPADST | H_FLIPADST => (false, true),
FLIPADST_FLIPADST => (true, true),
}
}
}
macro_rules! impl_fwd_txs {
() => {
impl_fwd_txs! { (4, 4), (8, 8), (16, 16), (32, 32), (64, 64) }
impl_fwd_txs! { (4, 8), (8, 16), (16, 32), (32, 64) }
impl_fwd_txs! { (8, 4), (16, 8), (32, 16), (64, 32) }
impl_fwd_txs! { (4, 16), (8, 32), (16, 64) }
impl_fwd_txs! { (16, 4), (32, 8), (64, 16) }
};
($(($W:expr, $H:expr)),+) => {
$(
paste::item! {
pub use crate::predict::[<Block $W x $H>];
impl FwdTxfm2D for [<Block $W x $H>] {}
}
)*
}
}
macro_rules! store_coeffs {
( $arr:expr, $( $x:expr ),* ) => {
{
let mut i: i32 = -1;
$(
i += 1;
$arr[i as usize] = $x;
)*
}
};
}
macro_rules! impl_1d_tx {
() => {
impl_1d_tx! {allow(), }
};
($m:meta, $($s:ident),*) => {
trait RotateKernelPi4<T: TxOperations> {
const ADD: $($s)* fn(T, T) -> T;
const SUB: $($s)* fn(T, T) -> T;
#[$m]
$($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32))) -> (T, T) {
let t = Self::ADD(p1, p0);
let (a, out0) = (p0.tx_mul(m.0), t.tx_mul(m.1));
let out1 = Self::SUB(a, out0);
(out0, out1)
}
}
struct RotatePi4Add;
struct RotatePi4AddAvg;
struct RotatePi4Sub;
struct RotatePi4SubAvg;
impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Add {
const ADD: $($s)* fn(T, T) -> T = T::add;
const SUB: $($s)* fn(T, T) -> T = T::sub;
}
impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4AddAvg {
const ADD: $($s)* fn(T, T) -> T = T::add_avg;
const SUB: $($s)* fn(T, T) -> T = T::sub;
}
impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Sub {
const ADD: $($s)* fn(T, T) -> T = T::sub;
const SUB: $($s)* fn(T, T) -> T = T::add;
}
impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4SubAvg {
const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
const SUB: $($s)* fn(T, T) -> T = T::add;
}
trait RotateKernel<T: TxOperations> {
const ADD: $($s)* fn(T, T) -> T;
const SUB: $($s)* fn(T, T) -> T;
const SHIFT: $($s)* fn(T) -> T;
#[$m]
$($s)* fn half_kernel(
p0: (T, T), p1: T, m: ((i32, i32), (i32, i32), (i32, i32)),
) -> (T, T) {
let t = Self::ADD(p1, p0.0);
let (a, b, c) = (p0.1.tx_mul(m.0), p1.tx_mul(m.1), t.tx_mul(m.2));
let out0 = b.add(c);
let shifted = Self::SHIFT(c);
let out1 = Self::SUB(a, shifted);
(out0, out1)
}
#[$m]
$($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32), (i32, i32))) -> (T, T) {
Self::half_kernel((p0, p0), p1, m)
}
}
trait RotateKernelNeg<T: TxOperations> {
const ADD: $($s)* fn(T, T) -> T;
#[$m]
$($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32), (i32, i32))) -> (T, T) {
let t = Self::ADD(p0, p1);
let (a, b, c) = (p0.tx_mul(m.0), p1.tx_mul(m.1), t.tx_mul(m.2));
let out0 = b.sub(c);
let out1 = c.sub(a);
(out0, out1)
}
}
struct RotateAdd;
struct RotateAddAvg;
struct RotateAddShift;
struct RotateSub;
struct RotateSubAvg;
struct RotateSubShift;
struct RotateNeg;
struct RotateNegAvg;
impl<T: TxOperations> RotateKernel<T> for RotateAdd {
const ADD: $($s)* fn(T, T) -> T = T::add;
const SUB: $($s)* fn(T, T) -> T = T::sub;
const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
}
impl<T: TxOperations> RotateKernel<T> for RotateAddAvg {
const ADD: $($s)* fn(T, T) -> T = T::add_avg;
const SUB: $($s)* fn(T, T) -> T = T::sub;
const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
}
impl<T: TxOperations> RotateKernel<T> for RotateAddShift {
const ADD: $($s)* fn(T, T) -> T = T::add;
const SUB: $($s)* fn(T, T) -> T = T::sub;
const SHIFT: $($s)* fn(T) -> T = T::rshift1;
}
impl<T: TxOperations> RotateKernel<T> for RotateSub {
const ADD: $($s)* fn(T, T) -> T = T::sub;
const SUB: $($s)* fn(T, T) -> T = T::add;
const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
}
impl<T: TxOperations> RotateKernel<T> for RotateSubAvg {
const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
const SUB: $($s)* fn(T, T) -> T = T::add;
const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
}
impl<T: TxOperations> RotateKernel<T> for RotateSubShift {
const ADD: $($s)* fn(T, T) -> T = T::sub;
const SUB: $($s)* fn(T, T) -> T = T::add;
const SHIFT: $($s)* fn(T) -> T = T::rshift1;
}
impl<T: TxOperations> RotateKernelNeg<T> for RotateNeg {
const ADD: $($s)* fn(T, T) -> T = T::sub;
}
impl<T: TxOperations> RotateKernelNeg<T> for RotateNegAvg {
const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
}
#[inline]
#[$m]
$($s)* fn butterfly_add<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) {
let p0 = p0.add(p1);
let p0h = p0.rshift1();
let p1h = p1.sub(p0h);
((p0h, p0), p1h)
}
#[inline]
#[$m]
$($s)* fn butterfly_sub<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) {
let p0 = p0.sub(p1);
let p0h = p0.rshift1();
let p1h = p1.add(p0h);
((p0h, p0), p1h)
}
#[inline]
#[$m]
$($s)* fn butterfly_neg<T: TxOperations>(p0: T, p1: T) -> (T, (T, T)) {
let p1 = p0.sub(p1);
let p1h = p1.rshift1();
let p0h = p0.sub(p1h);
(p0h, (p1h, p1))
}
#[inline]
#[$m]
$($s)* fn butterfly_add_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
let p1 = p1h.add(p0.0);
let p0 = p0.1.sub(p1);
(p0, p1)
}
#[inline]
#[$m]
$($s)* fn butterfly_sub_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
let p1 = p1h.sub(p0.0);
let p0 = p0.1.add(p1);
(p0, p1)
}
#[inline]
#[$m]
$($s)* fn butterfly_neg_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) {
let p0 = p0h.add(p1.0);
let p1 = p0.sub(p1.1);
(p0, p1)
}
#[$m]
$($s)* fn daala_fdct_ii_2_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) {
butterfly_neg_asym(p0h, p1)
}
#[$m]
$($s)* fn daala_fdst_iv_2_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
RotateAdd::half_kernel(p0, p1h, ((473, 9), (3135, 12), (4433, 13)))
}
#[$m]
$($s)* fn daala_fdct_ii_4<T: TxOperations>(
q0: T, q1: T, q2: T, q3: T, output: &mut [T],
) {
let (q0h, q3) = butterfly_neg(q0, q3);
let (q1, q2h) = butterfly_add(q1, q2);
let (q0, q1) = daala_fdct_ii_2_asym(q0h, q1);
let (q3, q2) = daala_fdst_iv_2_asym(q3, q2h);
store_coeffs!(output, q0, q1, q2, q3);
}
#[$m]
pub $($s)* fn daala_fdct4<T: TxOperations>(input: &[T], output: &mut [T]) {
assert!(input.len() >= 4);
assert!(output.len() >= 4);
let mut temp_out: [T; 4] = [T::zero(); 4];
daala_fdct_ii_4(input[0], input[1], input[2], input[3], &mut temp_out);
output[0] = temp_out[0];
output[1] = temp_out[2];
output[2] = temp_out[1];
output[3] = temp_out[3];
}
#[$m]
pub $($s)* fn daala_fdst_vii_4<T: TxOperations>(input: &[T], output: &mut [T]) {
assert!(input.len() >= 4);
assert!(output.len() >= 4);
let q0 = input[0];
let q1 = input[1];
let q2 = input[2];
let q3 = input[3];
let t0 = q1.add(q3);
let t1 = q1.add(q0.sub_avg(t0));
let t2 = q0.sub(q1);
let t3 = q2;
let t4 = q0.add(q3);
let t0 = t0.tx_mul((7021, 14));
let t1 = t1.tx_mul((37837, 15));
let t2 = t2.tx_mul((21513, 15));
let t3 = t3.tx_mul((37837, 15));
let t4 = t4.tx_mul((467, 11));
let t3h = t3.rshift1();
let u4 = t4.add(t3h);
output[0] = t0.add(u4);
output[1] = t1;
output[2] = t0.add(t2.sub(t3h));
output[3] = t2.add(t3.sub(u4));
}
#[$m]
$($s)* fn daala_fdct_ii_2<T: TxOperations>(p0: T, p1: T) -> (T, T) {
let (p1, p0) = RotatePi4SubAvg::kernel(p1, p0, ((11585, 13), (11585, 13)));
(p0, p1)
}
#[$m]
$($s)* fn daala_fdst_iv_2<T: TxOperations>(p0: T, p1: T) -> (T, T) {
RotateAddAvg::kernel(p0, p1, ((10703, 13), (8867, 14), (3135, 12)))
}
#[$m]
$($s)* fn daala_fdct_ii_4_asym<T: TxOperations>(
q0h: T, q1: (T, T), q2h: T, q3: (T, T), output: &mut [T],
) {
let (q0, q3) = butterfly_neg_asym(q0h, q3);
let (q1, q2) = butterfly_sub_asym(q1, q2h);
let (q0, q1) = daala_fdct_ii_2(q0, q1);
let (q3, q2) = daala_fdst_iv_2(q3, q2);
store_coeffs!(output, q0, q1, q2, q3);
}
#[$m]
$($s)* fn daala_fdst_iv_4_asym<T: TxOperations>(
q0: (T, T), q1h: T, q2: (T, T), q3h: T, output: &mut [T],
) {
let (q0, q3) = RotateAddShift::half_kernel(
q0,
q3h,
((9633, 14), (12873, 13), (12785, 15)),
);
let (q2, q1) = RotateSubShift::half_kernel(
q2,
q1h,
((11363, 14), (18081, 15), (4551, 12)),
);
let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3);
let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1);
let (q2, q1) = RotatePi4AddAvg::kernel(q2, q1, ((11585, 13), (11585, 13)));
store_coeffs!(output, q0, q1, q2, q3);
}
#[$m]
$($s)* fn daala_fdct_ii_8<T: TxOperations>(
r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T],
) {
let (r0h, r7) = butterfly_neg(r0, r7);
let (r1, r6h) = butterfly_add(r1, r6);
let (r2h, r5) = butterfly_neg(r2, r5);
let (r3, r4h) = butterfly_add(r3, r4);
daala_fdct_ii_4_asym(r0h, r1, r2h, r3, &mut output[0..4]);
daala_fdst_iv_4_asym(r7, r6h, r5, r4h, &mut output[4..8]);
output[4..8].reverse();
}
#[$m]
pub $($s)* fn daala_fdct8<T: TxOperations>(input: &[T], output: &mut [T]) {
assert!(input.len() >= 8);
assert!(output.len() >= 8);
let mut temp_out: [T; 8] = [T::zero(); 8];
daala_fdct_ii_8(
input[0],
input[1],
input[2],
input[3],
input[4],
input[5],
input[6],
input[7],
&mut temp_out,
);
output[0] = temp_out[0];
output[1] = temp_out[4];
output[2] = temp_out[2];
output[3] = temp_out[6];
output[4] = temp_out[1];
output[5] = temp_out[5];
output[6] = temp_out[3];
output[7] = temp_out[7];
}
#[$m]
pub $($s)* fn daala_fdst_iv_8<T: TxOperations>(
r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T],
) {
let (r0, r7) =
RotateAdd::kernel(r0, r7, ((17911, 14), (14699, 14), (803, 13)));
let (r6, r1) =
RotateSub::kernel(r6, r1, ((20435, 14), (21845, 15), (1189, 12)));
let (r2, r5) =
RotateAdd::kernel(r2, r5, ((22173, 14), (3363, 13), (15447, 15)));
let (r4, r3) =
RotateSub::kernel(r4, r3, ((23059, 14), (2271, 14), (5197, 13)));
let (r0, r3h) = butterfly_add(r0, r3);
let (r2, r1h) = butterfly_sub(r2, r1);
let (r5, r6h) = butterfly_add(r5, r6);
let (r7, r4h) = butterfly_sub(r7, r4);
let (r7, r6) = butterfly_add_asym(r7, r6h);
let (r5, r3) = butterfly_add_asym(r5, r3h);
let (r2, r4) = butterfly_add_asym(r2, r4h);
let (r0, r1) = butterfly_sub_asym(r0, r1h);
let (r3, r4) =
RotateSubAvg::kernel(r3, r4, ((10703, 13), (8867, 14), (3135, 12)));
let (r2, r5) =
RotateNegAvg::kernel(r2, r5, ((10703, 13), (8867, 14), (3135, 12)));
let (r1, r6) = RotatePi4SubAvg::kernel(r1, r6, ((11585, 13), (11585, 13)));
store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7);
}
#[$m]
pub $($s)* fn daala_fdst8<T: TxOperations>(input: &[T], output: &mut [T]) {
assert!(input.len() >= 8);
assert!(output.len() >= 8);
let mut temp_out: [T; 8] = [T::zero(); 8];
daala_fdst_iv_8(
input[0],
input[1],
input[2],
input[3],
input[4],
input[5],
input[6],
input[7],
&mut temp_out,
);
output[0] = temp_out[0];
output[1] = temp_out[4];
output[2] = temp_out[2];
output[3] = temp_out[6];
output[4] = temp_out[1];
output[5] = temp_out[5];
output[6] = temp_out[3];
output[7] = temp_out[7];
}
#[$m]
$($s)* fn daala_fdst_iv_4<T: TxOperations>(
q0: T, q1: T, q2: T, q3: T, output: &mut [T],
) {
let (q0, q3) =
RotateAddShift::kernel(q0, q3, ((13623, 14), (4551, 12), (565, 11)));
let (q2, q1) =
RotateSubShift::kernel(q2, q1, ((16069, 14), (12785, 15), (1609, 11)));
let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3);
let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1);
let (q2, q1) = RotatePi4AddAvg::kernel(q2, q1, ((11585, 13), (11585, 13)));
store_coeffs!(output, q0, q1, q2, q3);
}
#[$m]
$($s)* fn daala_fdct_ii_8_asym<T: TxOperations>(
r0h: T, r1: (T, T), r2h: T, r3: (T, T), r4h: T, r5: (T, T), r6h: T,
r7: (T, T), output: &mut [T],
) {
let (r0, r7) = butterfly_neg_asym(r0h, r7);
let (r1, r6) = butterfly_sub_asym(r1, r6h);
let (r2, r5) = butterfly_neg_asym(r2h, r5);
let (r3, r4) = butterfly_sub_asym(r3, r4h);
daala_fdct_ii_4(r0, r1, r2, r3, &mut output[0..4]);
daala_fdst_iv_4(r7, r6, r5, r4, &mut output[4..8]);
output[4..8].reverse();
}
#[$m]
$($s)* fn daala_fdst_iv_8_asym<T: TxOperations>(
r0: (T, T), r1h: T, r2: (T, T), r3h: T, r4: (T, T), r5h: T, r6: (T, T),
r7h: T, output: &mut [T],
) {
let (r0, r7) =
RotateAdd::half_kernel(r0, r7h, ((12665, 14), (5197, 12), (2271, 14)));
let (r6, r1) =
RotateSub::half_kernel(r6, r1h, ((14449, 14), (30893, 15), (3363, 13)));
let (r2, r5) =
RotateAdd::half_kernel(r2, r5h, ((15679, 14), (1189, 11), (5461, 13)));
let (r4, r3) =
RotateSub::half_kernel(r4, r3h, ((16305, 14), (803, 12), (14699, 14)));
let (r0, r3h) = butterfly_add(r0, r3);
let (r2, r1h) = butterfly_sub(r2, r1);
let (r5, r6h) = butterfly_add(r5, r6);
let (r7, r4h) = butterfly_sub(r7, r4);
let (r7, r6) = butterfly_add_asym(r7, r6h);
let (r5, r3) = butterfly_add_asym(r5, r3h);
let (r2, r4) = butterfly_add_asym(r2, r4h);
let (r0, r1) = butterfly_sub_asym(r0, r1h);
let (r3, r4) =
RotateSubAvg::kernel(r3, r4, ((669, 9), (8867, 14), (3135, 12)));
let (r2, r5) =
RotateNegAvg::kernel(r2, r5, ((669, 9), (8867, 14), (3135, 12)));
let (r1, r6) = RotatePi4SubAvg::kernel(r1, r6, ((5793, 12), (11585, 13)));
store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7);
}
#[$m]
$($s)* fn daala_fdct_ii_16<T: TxOperations>(
s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T,
sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T],
) {
let (s0h, sf) = butterfly_neg(s0, sf);
let (s1, seh) = butterfly_add(s1, se);
let (s2h, sd) = butterfly_neg(s2, sd);
let (s3, sch) = butterfly_add(s3, sc);
let (s4h, sb) = butterfly_neg(s4, sb);
let (s5, sah) = butterfly_add(s5, sa);
let (s6h, s9) = butterfly_neg(s6, s9);
let (s7, s8h) = butterfly_add(s7, s8);
daala_fdct_ii_8_asym(s0h, s1, s2h, s3, s4h, s5, s6h, s7, &mut output[0..8]);
daala_fdst_iv_8_asym(sf, seh, sd, sch, sb, sah, s9, s8h, &mut output[8..16]);
output[8..16].reverse();
}
#[$m]
$($s)* fn daala_fdct16<T: TxOperations>(input: &[T], output: &mut [T]) {
assert!(input.len() >= 16);
assert!(output.len() >= 16);
let mut temp_out: [T; 16] = [T::zero(); 16];
daala_fdct_ii_16(
input[0],
input[1],
input[2],
input[3],
input[4],
input[5],
input[6],
input[7],
input[8],
input[9],
input[10],
input[11],
input[12],
input[13],
input[14],
input[15],
&mut temp_out,
);
output[0] = temp_out[0];
output[1] = temp_out[8];
output[2] = temp_out[4];
output[3] = temp_out[12];
output[4] = temp_out[2];
output[5] = temp_out[10];
output[6] = temp_out[6];
output[7] = temp_out[14];
output[8] = temp_out[1];
output[9] = temp_out[9];
output[10] = temp_out[5];
output[11] = temp_out[13];
output[12] = temp_out[3];
output[13] = temp_out[11];
output[14] = temp_out[7];
output[15] = temp_out[15];
}
#[$m]
$($s)* fn daala_fdst_iv_16<T: TxOperations>(
s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T,
sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T],
) {
let (s0, sf) =
RotateAddShift::kernel(s0, sf, ((24279, 15), (11003, 13), (1137, 14)));
let (se, s1) =
RotateSubShift::kernel(se, s1, ((1645, 11), (305, 8), (425, 11)));
let (s2, sd) =
RotateAddShift::kernel(s2, sd, ((14053, 14), (8423, 13), (2815, 13)));
let (sc, s3) =
RotateSubShift::kernel(sc, s3, ((14811, 14), (7005, 13), (3903, 13)));
let (s4, sb) =
RotateAddShift::kernel(s4, sb, ((30853, 15), (11039, 14), (9907, 14)));
let (sa, s5) =
RotateSubShift::kernel(sa, s5, ((15893, 14), (3981, 13), (1489, 11)));
let (s6, s9) =
RotateAddShift::kernel(s6, s9, ((32413, 15), (601, 11), (13803, 14)));
let (s8, s7) =
RotateSubShift::kernel(s8, s7, ((32729, 15), (201, 11), (1945, 11)));
let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7);
let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf);
let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3);
let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb);
let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5);
let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd);
let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1);
let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9);
let ((_s8h, s8), s4h) = butterfly_add(s8, s4);
let ((_s7h, s7), sbh) = butterfly_add(s7, sb);
let ((_sah, sa), s6h) = butterfly_sub(sa, s6);
let ((_s5h, s5), s9h) = butterfly_sub(s5, s9);
let (s0, s3h) = butterfly_add(s0, s3);
let (sd, seh) = butterfly_add(sd, se);
let (s2, s1h) = butterfly_sub(s2, s1);
let (sf, sch) = butterfly_sub(sf, sc);
let (s8, s7) =
RotateAddAvg::kernel(s8, s7, ((301, 8), (1609, 11), (12785, 15)));
let (s9, s6) =
RotateAdd::kernel(s9h, s6h, ((11363, 13), (9041, 15), (4551, 13)));
let (s5, sa) =
RotateNegAvg::kernel(s5, sa, ((5681, 12), (9041, 15), (4551, 12)));
let (s4, sb) =
RotateNeg::kernel(s4h, sbh, ((9633, 13), (12873, 14), (6393, 15)));
let (s2, sc) = butterfly_add_asym(s2, sch);
let (s0, s1) = butterfly_sub_asym(s0, s1h);
let (sf, se) = butterfly_add_asym(sf, seh);
let (sd, s3) = butterfly_add_asym(sd, s3h);
let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6);
let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9);
let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb);
let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4);
let (sc, s3) =
RotateAddAvg::kernel(sc, s3, ((669, 9), (8867, 14), (3135, 12)));
let (s2, sd) =
RotateNegAvg::kernel(s2, sd, ((669, 9), (8867, 14), (3135, 12)));
let (sa, s5) = RotatePi4AddAvg::kernel(sa, s5, ((5793, 12), (11585, 13)));
let (s6, s9) = RotatePi4AddAvg::kernel(s6, s9, ((5793, 12), (11585, 13)));
let (se, s1) = RotatePi4AddAvg::kernel(se, s1, ((5793, 12), (11585, 13)));
store_coeffs!(
output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf
);
}
#[$m]
$($s)* fn daala_fdst16<T: TxOperations>(input: &[T], output: &mut [T]) {
assert!(input.len() >= 16);
assert!(output.len() >= 16);
let mut temp_out: [T; 16] = [T::zero(); 16];
daala_fdst_iv_16(
input[0],
input[1],
input[2],
input[3],
input[4],
input[5],
input[6],
input[7],
input[8],
input[9],
input[10],
input[11],
input[12],
input[13],
input[14],
input[15],
&mut temp_out,
);
output[0] = temp_out[0];
output[1] = temp_out[8];
output[2] = temp_out[4];
output[3] = temp_out[12];
output[4] = temp_out[2];
output[5] = temp_out[10];
output[6] = temp_out[6];
output[7] = temp_out[14];
output[8] = temp_out[1];
output[9] = temp_out[9];
output[10] = temp_out[5];
output[11] = temp_out[13];
output[12] = temp_out[3];
output[13] = temp_out[11];
output[14] = temp_out[7];
output[15] = temp_out[15];
}
#[$m]
$($s)* fn daala_fdct_ii_16_asym<T: TxOperations>(
s0h: T, s1: (T, T), s2h: T, s3: (T, T), s4h: T, s5: (T, T), s6h: T,
s7: (T, T), s8h: T, s9: (T, T), sah: T, sb: (T, T), sch: T, sd: (T, T),
seh: T, sf: (T, T), output: &mut [T],
) {
let (s0, sf) = butterfly_neg_asym(s0h, sf);
let (s1, se) = butterfly_sub_asym(s1, seh);
let (s2, sd) = butterfly_neg_asym(s2h, sd);
let (s3, sc) = butterfly_sub_asym(s3, sch);
let (s4, sb) = butterfly_neg_asym(s4h, sb);
let (s5, sa) = butterfly_sub_asym(s5, sah);
let (s6, s9) = butterfly_neg_asym(s6h, s9);
let (s7, s8) = butterfly_sub_asym(s7, s8h);
daala_fdct_ii_8(s0, s1, s2, s3, s4, s5, s6, s7, &mut output[0..8]);
daala_fdst_iv_8(sf, se, sd, sc, sb, sa, s9, s8, &mut output[8..16]);
output[8..16].reverse();
}
#[$m]
$($s)* fn daala_fdst_iv_16_asym<T: TxOperations>(
s0: (T, T), s1h: T, s2: (T, T), s3h: T, s4: (T, T), s5h: T, s6: (T, T),
s7h: T, s8: (T, T), s9h: T, sa: (T, T), sbh: T, sc: (T, T), sdh: T,
se: (T, T), sfh: T, output: &mut [T],
) {
let (s0, sf) =
RotateAddShift::half_kernel(s0, sfh, ((1073, 11), (62241, 15), (201, 11)));
let (se, s1) = RotateSubShift::half_kernel(
se,
s1h,
((18611, 15), (55211, 15), (601, 11)),
);
let (s2, sd) =
RotateAddShift::half_kernel(s2, sdh, ((9937, 14), (1489, 10), (3981, 13)));
let (sc, s3) = RotateSubShift::half_kernel(
sc,
s3h,
((10473, 14), (39627, 15), (11039, 14)),
);
let (s4, sb) =
RotateAddShift::half_kernel(s4, sbh, ((2727, 12), (3903, 12), (7005, 13)));
let (sa, s5) =
RotateSubShift::half_kernel(sa, s5h, ((5619, 13), (2815, 12), (8423, 13)));
let (s6, s9) =
RotateAddShift::half_kernel(s6, s9h, ((2865, 12), (13599, 15), (305, 8)));
let (s8, s7) = RotateSubShift::half_kernel(
s8,
s7h,
((23143, 15), (1137, 13), (11003, 13)),
);
let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7);
let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf);
let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3);
let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb);
let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5);
let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd);
let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1);
let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9);
let ((_s8h, s8), s4h) = butterfly_add(s8, s4);
let ((_s7h, s7), sbh) = butterfly_add(s7, sb);
let ((_sah, sa), s6h) = butterfly_sub(sa, s6);
let ((_s5h, s5), s9h) = butterfly_sub(s5, s9);
let (s0, s3h) = butterfly_add(s0, s3);
let (sd, seh) = butterfly_add(sd, se);
let (s2, s1h) = butterfly_sub(s2, s1);
let (sf, sch) = butterfly_sub(sf, sc);
let (s8, s7) =
RotateAdd::kernel(s8, s7, ((9633, 13), (12873, 14), (6393, 15)));
let (s9, s6) =
RotateAdd::kernel(s9h, s6h, ((22725, 14), (9041, 15), (4551, 13)));
let (s5, sa) =
RotateNeg::kernel(s5, sa, ((11363, 13), (9041, 15), (4551, 13)));
let (s4, sb) =
RotateNeg::kernel(s4h, sbh, ((9633, 13), (12873, 14), (6393, 15)));
let (s2, sc) = butterfly_add_asym(s2, sch);
let (s0, s1) = butterfly_sub_asym(s0, s1h);
let (sf, se) = butterfly_add_asym(sf, seh);
let (sd, s3) = butterfly_add_asym(sd, s3h);
let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6);
let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9);
let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb);
let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4);
let (sc, s3) =
RotateAdd::kernel(sc, s3, ((10703, 13), (8867, 14), (3135, 13)));
let (s2, sd) =
RotateNeg::kernel(s2, sd, ((10703, 13), (8867, 14), (3135, 13)));
let (sa, s5) = RotatePi4Add::kernel(sa, s5, ((11585, 13), (5793, 13)));
let (s6, s9) = RotatePi4Add::kernel(s6, s9, ((11585, 13), (5793, 13)));
let (se, s1) = RotatePi4Add::kernel(se, s1, ((11585, 13), (5793, 13)));
store_coeffs!(
output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf
);
}
#[$m]
$($s)* fn daala_fdct_ii_32<T: TxOperations>(
t0: T, t1: T, t2: T, t3: T, t4: T, t5: T, t6: T, t7: T, t8: T, t9: T, ta: T,
tb: T, tc: T, td: T, te: T, tf: T, tg: T, th: T, ti: T, tj: T, tk: T, tl: T,
tm: T, tn: T, to: T, tp: T, tq: T, tr: T, ts: T, tt: T, tu: T, tv: T,
output: &mut [T],
) {
let (t0h, tv) = butterfly_neg(t0, tv);
let (t1, tuh) = butterfly_add(t1, tu);
let (t2h, tt) = butterfly_neg(t2, tt);
let (t3, tsh) = butterfly_add(t3, ts);
let (t4h, tr) = butterfly_neg(t4, tr);
let (t5, tqh) = butterfly_add(t5, tq);
let (t6h, tp) = butterfly_neg(t6, tp);
let (t7, toh) = butterfly_add(t7, to);
let (t8h, tn) = butterfly_neg(t8, tn);
let (t9, tmh) = butterfly_add(t9, tm);
let (tah, tl) = butterfly_neg(ta, tl);
let (tb, tkh) = butterfly_add(tb, tk);
let (tch, tj) = butterfly_neg(tc, tj);
let (td, tih) = butterfly_add(td, ti);
let (teh, th) = butterfly_neg(te, th);
let (tf, tgh) = butterfly_add(tf, tg);
daala_fdct_ii_16_asym(
t0h,
t1,
t2h,
t3,
t4h,
t5,
t6h,
t7,
t8h,
t9,
tah,
tb,
tch,
td,
teh,
tf,
&mut output[0..16],
);
daala_fdst_iv_16_asym(
tv,
tuh,
tt,
tsh,
tr,
tqh,
tp,
toh,
tn,
tmh,
tl,
tkh,
tj,
tih,
th,
tgh,
&mut output[16..32],
);
output[16..32].reverse();
}
#[$m]
$($s)* fn daala_fdct32<T: TxOperations>(input: &[T], output: &mut [T]) {
assert!(input.len() >= 32);
assert!(output.len() >= 32);
let mut temp_out: [T; 32] = [T::zero(); 32];
daala_fdct_ii_32(
input[0],
input[1],
input[2],
input[3],
input[4],
input[5],
input[6],
input[7],
input[8],
input[9],
input[10],
input[11],
input[12],
input[13],
input[14],
input[15],
input[16],
input[17],
input[18],
input[19],
input[20],
input[21],
input[22],
input[23],
input[24],
input[25],
input[26],
input[27],
input[28],
input[29],
input[30],
input[31],
&mut temp_out,
);
output[0] = temp_out[0];
output[1] = temp_out[16];
output[2] = temp_out[8];
output[3] = temp_out[24];
output[4] = temp_out[4];
output[5] = temp_out[20];
output[6] = temp_out[12];
output[7] = temp_out[28];
output[8] = temp_out[2];
output[9] = temp_out[18];
output[10] = temp_out[10];
output[11] = temp_out[26];
output[12] = temp_out[6];
output[13] = temp_out[22];
output[14] = temp_out[14];
output[15] = temp_out[30];
output[16] = temp_out[1];
output[17] = temp_out[17];
output[18] = temp_out[9];
output[19] = temp_out[25];
output[20] = temp_out[5];
output[21] = temp_out[21];
output[22] = temp_out[13];
output[23] = temp_out[29];
output[24] = temp_out[3];
output[25] = temp_out[19];
output[26] = temp_out[11];
output[27] = temp_out[27];
output[28] = temp_out[7];
output[29] = temp_out[23];
output[30] = temp_out[15];
output[31] = temp_out[31];
}
#[$m]
$($s)* fn daala_fdct_ii_32_asym<T: TxOperations>(
t0h: T, t1: (T, T), t2h: T, t3: (T, T), t4h: T, t5: (T, T), t6h: T,
t7: (T, T), t8h: T, t9: (T, T), tah: T, tb: (T, T), tch: T, td: (T, T),
teh: T, tf: (T, T), tgh: T, th: (T, T), tih: T, tj: (T, T), tkh: T,
tl: (T, T), tmh: T, tn: (T, T), toh: T, tp: (T, T), tqh: T, tr: (T, T),
tsh: T, tt: (T, T), tuh: T, tv: (T, T), output: &mut [T],
) {
let (t0, tv) = butterfly_neg_asym(t0h, tv);
let (t1, tu) = butterfly_sub_asym(t1, tuh);
let (t2, tt) = butterfly_neg_asym(t2h, tt);
let (t3, ts) = butterfly_sub_asym(t3, tsh);
let (t4, tr) = butterfly_neg_asym(t4h, tr);
let (t5, tq) = butterfly_sub_asym(t5, tqh);
let (t6, tp) = butterfly_neg_asym(t6h, tp);
let (t7, to) = butterfly_sub_asym(t7, toh);
let (t8, tn) = butterfly_neg_asym(t8h, tn);
let (t9, tm) = butterfly_sub_asym(t9, tmh);
let (ta, tl) = butterfly_neg_asym(tah, tl);
let (tb, tk) = butterfly_sub_asym(tb, tkh);
let (tc, tj) = butterfly_neg_asym(tch, tj);
let (td, ti) = butterfly_sub_asym(td, tih);
let (te, th) = butterfly_neg_asym(teh, th);
let (tf, tg) = butterfly_sub_asym(tf, tgh);
daala_fdct_ii_16(
t0,
t1,
t2,
t3,
t4,
t5,
t6,
t7,
t8,
t9,
ta,
tb,
tc,
td,
te,
tf,
&mut output[0..16],
);
daala_fdst_iv_16(
tv,
tu,
tt,
ts,
tr,
tq,
tp,
to,
tn,
tm,
tl,
tk,
tj,
ti,
th,
tg,
&mut output[16..32],
);
output[16..32].reverse();
}
#[$m]
$($s)* fn daala_fdst_iv_32_asym<T: TxOperations>(
t0: (T, T), t1h: T, t2: (T, T), t3h: T, t4: (T, T), t5h: T, t6: (T, T),
t7h: T, t8: (T, T), t9h: T, ta: (T, T), tbh: T, tc: (T, T), tdh: T,
te: (T, T), tfh: T, tg: (T, T), thh: T, ti: (T, T), tjh: T, tk: (T, T),
tlh: T, tm: (T, T), tnh: T, to: (T, T), tph: T, tq: (T, T), trh: T,
ts: (T, T), tth: T, tu: (T, T), tvh: T, output: &mut [T],
) {
let (t0, tv) =
RotateAdd::half_kernel(t0, tvh, ((5933, 13), (22595, 14), (1137, 15)));
let (tu, t1) =
RotateSub::half_kernel(tu, t1h, ((6203, 13), (21403, 14), (3409, 15)));
let (t2, tt) =
RotateAdd::half_kernel(t2, tth, ((25833, 15), (315, 8), (5673, 15)));
let (ts, t3) =
RotateSub::half_kernel(ts, t3h, ((26791, 15), (4717, 12), (7923, 15)));
let (t4, tr) =
RotateAdd::half_kernel(t4, trh, ((6921, 13), (17531, 14), (10153, 15)));
let (tq, t5) =
RotateSub::half_kernel(tq, t5h, ((28511, 15), (32303, 15), (1545, 12)));
let (t6, tp) =
RotateAdd::half_kernel(t6, tph, ((29269, 15), (14733, 14), (1817, 12)));
let (to, t7) =
RotateSub::half_kernel(to, t7h, ((29957, 15), (13279, 14), (8339, 14)));
let (t8, tn) =
RotateAdd::half_kernel(t8, tnh, ((7643, 13), (11793, 14), (18779, 15)));
let (tm, t9) =
RotateSub::half_kernel(tm, t9h, ((15557, 14), (20557, 15), (20835, 15)));
let (ta, tl) =
RotateAdd::half_kernel(ta, tlh, ((31581, 15), (17479, 15), (22841, 15)));
let (tk, tb) =
RotateSub::half_kernel(tk, tbh, ((7993, 13), (14359, 15), (3099, 12)));
let (tc, tj) =
RotateAdd::half_kernel(tc, tjh, ((16143, 14), (2801, 13), (26683, 15)));
let (ti, td) =
RotateSub::half_kernel(ti, tdh, ((16261, 14), (4011, 14), (14255, 14)));
let (te, th) =
RotateAdd::half_kernel(te, thh, ((32679, 15), (4821, 15), (30269, 15)));
let (tg, tf) =
RotateSub::half_kernel(tg, tfh, ((16379, 14), (201, 12), (15977, 14)));
let (t0, tfh) = butterfly_add(t0, tf);
let (tv, tgh) = butterfly_sub(tv, tg);
let (th, tuh) = butterfly_add(th, tu);
let (te, t1h) = butterfly_sub(te, t1);
let (t2, tdh) = butterfly_add(t2, td);
let (tt, tih) = butterfly_sub(tt, ti);
let (tj, tsh) = butterfly_add(tj, ts);
let (tc, t3h) = butterfly_sub(tc, t3);
let (t4, tbh) = butterfly_add(t4, tb);
let (tr, tkh) = butterfly_sub(tr, tk);
let (tl, tqh) = butterfly_add(tl, tq);
let (ta, t5h) = butterfly_sub(ta, t5);
let (t6, t9h) = butterfly_add(t6, t9);
let (tp, tmh) = butterfly_sub(tp, tm);
let (tn, toh) = butterfly_add(tn, to);
let (t8, t7h) = butterfly_sub(t8, t7);
let (t0, t7) = butterfly_sub_asym(t0, t7h);
let (tv, to) = butterfly_add_asym(tv, toh);
let (tp, tu) = butterfly_sub_asym(tp, tuh);
let (t6, t1) = butterfly_add_asym(t6, t1h);
let (t2, t5) = butterfly_sub_asym(t2, t5h);
let (tt, tq) = butterfly_add_asym(tt, tqh);
let (tr, ts) = butterfly_sub_asym(tr, tsh);
let (t4, t3) = butterfly_add_asym(t4, t3h);
let (t8, tg) = butterfly_add_asym(t8, tgh);
let (te, tm) = butterfly_sub_asym(te, tmh);
let (tn, tf) = butterfly_add_asym(tn, tfh);
let (th, t9) = butterfly_sub_asym(th, t9h);
let (ta, ti) = butterfly_add_asym(ta, tih);
let (tc, tk) = butterfly_sub_asym(tc, tkh);
let (tl, td) = butterfly_add_asym(tl, tdh);
let (tj, tb) = butterfly_sub_asym(tj, tbh);
let (tf, tg) =
RotateSub::kernel(tf, tg, ((17911, 14), (14699, 14), (803, 13)));
let (th, te) =
RotateAdd::kernel(th, te, ((10217, 13), (5461, 13), (1189, 12)));
let (ti, td) =
RotateAdd::kernel(ti, td, ((5543, 12), (3363, 13), (7723, 14)));
let (tc, tj) =
RotateSub::kernel(tc, tj, ((11529, 13), (2271, 14), (5197, 13)));
let (tb, tk) =
RotateNeg::kernel(tb, tk, ((11529, 13), (2271, 14), (5197, 13)));
let (ta, tl) =
RotateNeg::kernel(ta, tl, ((5543, 12), (3363, 13), (7723, 14)));
let (t9, tm) =
RotateNeg::kernel(t9, tm, ((10217, 13), (5461, 13), (1189, 12)));
let (t8, tn) =
RotateNeg::kernel(t8, tn, ((17911, 14), (14699, 14), (803, 13)));
let (t3, t0h) = butterfly_sub(t3, t0);
let (ts, tvh) = butterfly_add(ts, tv);
let (tu, tth) = butterfly_sub(tu, tt);
let (t1, t2h) = butterfly_add(t1, t2);
let ((_toh, to), t4h) = butterfly_add(to, t4);
let ((_tqh, tq), t6h) = butterfly_sub(tq, t6);
let ((_t7h, t7), trh) = butterfly_add(t7, tr);
let ((_t5h, t5), tph) = butterfly_sub(t5, tp);
let (tb, t8h) = butterfly_sub(tb, t8);
let (tk, tnh) = butterfly_add(tk, tn);
let (tm, tlh) = butterfly_sub(tm, tl);
let (t9, tah) = butterfly_add(t9, ta);
let (tf, tch) = butterfly_sub(tf, tc);
let (tg, tjh) = butterfly_add(tg, tj);
let (ti, thh) = butterfly_sub(ti, th);
let (td, teh) = butterfly_add(td, te);
let (to, t7) = RotateAdd::kernel(to, t7, ((301, 8), (1609, 11), (6393, 15)));
let (tph, t6h) =
RotateAdd::kernel(tph, t6h, ((11363, 13), (9041, 15), (4551, 13)));
let (t5, tq) =
RotateNeg::kernel(t5, tq, ((5681, 12), (9041, 15), (4551, 13)));
let (t4h, trh) =
RotateNeg::kernel(t4h, trh, ((9633, 13), (12873, 14), (6393, 15)));
let (t1, t0) = butterfly_add_asym(t1, t0h);
let (tu, tv) = butterfly_sub_asym(tu, tvh);
let (ts, t2) = butterfly_sub_asym(ts, t2h);
let (t3, tt) = butterfly_sub_asym(t3, tth);
let (t5, t4) = butterfly_add_asym((t5.rshift1(), t5), t4h);
let (tq, tr) = butterfly_sub_asym((tq.rshift1(), tq), trh);
let (t7, t6) = butterfly_add_asym((t7.rshift1(), t7), t6h);
let (to, tp) = butterfly_sub_asym((to.rshift1(), to), tph);
let (t9, t8) = butterfly_add_asym(t9, t8h);
let (tm, tn) = butterfly_sub_asym(tm, tnh);
let (tk, ta) = butterfly_sub_asym(tk, tah);
let (tb, tl) = butterfly_sub_asym(tb, tlh);
let (ti, tc) = butterfly_add_asym(ti, tch);
let (td, tj) = butterfly_add_asym(td, tjh);
let (tf, te) = butterfly_add_asym(tf, teh);
let (tg, th) = butterfly_sub_asym(tg, thh);
let (t2, tt) = RotateNeg::kernel(t2, tt, ((669, 9), (8867, 14), (3135, 13)));
let (ts, t3) = RotateAdd::kernel(ts, t3, ((669, 9), (8867, 14), (3135, 13)));
let (ta, tl) = RotateNeg::kernel(ta, tl, ((669, 9), (8867, 14), (3135, 13)));
let (tk, tb) = RotateAdd::kernel(tk, tb, ((669, 9), (8867, 14), (3135, 13)));
let (tc, tj) = RotateAdd::kernel(tc, tj, ((669, 9), (8867, 14), (3135, 13)));
let (ti, td) = RotateNeg::kernel(ti, td, ((669, 9), (8867, 14), (3135, 13)));
let (tu, t1) = RotatePi4Add::kernel(tu, t1, ((5793, 12), (5793, 13)));
let (tq, t5) = RotatePi4Add::kernel(tq, t5, ((5793, 12), (5793, 13)));
let (tp, t6) = RotatePi4Sub::kernel(tp, t6, ((5793, 12), (5793, 13)));
let (tm, t9) = RotatePi4Add::kernel(tm, t9, ((5793, 12), (5793, 13)));
let (te, th) = RotatePi4Add::kernel(te, th, ((5793, 12), (5793, 13)));
store_coeffs!(
output, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf,
tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv
);
}
#[allow(clippy::identity_op)]
#[$m]
$($s)* fn daala_fdct64<T: TxOperations>(input: &[T], output: &mut [T]) {
assert!(input.len() >= 64);
assert!(output.len() >= 64);
let mut asym: [(T, T); 32] = [(T::zero(), T::zero()); 32];
let mut half: [T; 32] = [T::zero(); 32];
{
#[$m]
#[inline]
$($s)* fn butterfly_pair<T: TxOperations>(
half: &mut [T; 32], asym: &mut [(T, T); 32], input: &[T], i: usize
) {
let j = i * 2;
let (ah, c) = butterfly_neg(input[j], input[63 - j]);
let (b, dh) = butterfly_add(input[j + 1], input[63 - j - 1]);
half[i] = ah;
half[31 - i] = dh;
asym[i] = b;
asym[31 - i] = c;
}
butterfly_pair(&mut half, &mut asym, input, 0);
butterfly_pair(&mut half, &mut asym, input, 1);
butterfly_pair(&mut half, &mut asym, input, 2);
butterfly_pair(&mut half, &mut asym, input, 3);
butterfly_pair(&mut half, &mut asym, input, 4);
butterfly_pair(&mut half, &mut asym, input, 5);
butterfly_pair(&mut half, &mut asym, input, 6);
butterfly_pair(&mut half, &mut asym, input, 7);
butterfly_pair(&mut half, &mut asym, input, 8);
butterfly_pair(&mut half, &mut asym, input, 9);
butterfly_pair(&mut half, &mut asym, input, 10);
butterfly_pair(&mut half, &mut asym, input, 11);
butterfly_pair(&mut half, &mut asym, input, 12);
butterfly_pair(&mut half, &mut asym, input, 13);
butterfly_pair(&mut half, &mut asym, input, 14);
butterfly_pair(&mut half, &mut asym, input, 15);
}
let mut temp_out: [T; 64] = [T::zero(); 64];
daala_fdct_ii_32_asym(
half[0],
asym[0],
half[1],
asym[1],
half[2],
asym[2],
half[3],
asym[3],
half[4],
asym[4],
half[5],
asym[5],
half[6],
asym[6],
half[7],
asym[7],
half[8],
asym[8],
half[9],
asym[9],
half[10],
asym[10],
half[11],
asym[11],
half[12],
asym[12],
half[13],
asym[13],
half[14],
asym[14],
half[15],
asym[15],
&mut temp_out[0..32],
);
daala_fdst_iv_32_asym(
asym[31],
half[31],
asym[30],
half[30],
asym[29],
half[29],
asym[28],
half[28],
asym[27],
half[27],
asym[26],
half[26],
asym[25],
half[25],
asym[24],
half[24],
asym[23],
half[23],
asym[22],
half[22],
asym[21],
half[21],
asym[20],
half[20],
asym[19],
half[19],
asym[18],
half[18],
asym[17],
half[17],
asym[16],
half[16],
&mut temp_out[32..64],
);
temp_out[32..64].reverse();
#[$m]
#[inline]
$($s)* fn reorder_4<T: TxOperations>(
output: &mut [T], i: usize, tmp: [T; 64], j: usize
) {
output[0 + i * 4] = tmp[0 + j];
output[1 + i * 4] = tmp[32 + j];
output[2 + i * 4] = tmp[16 + j];
output[3 + i * 4] = tmp[48 + j];
}
reorder_4(output, 0, temp_out, 0);
reorder_4(output, 1, temp_out, 8);
reorder_4(output, 2, temp_out, 4);
reorder_4(output, 3, temp_out, 12);
reorder_4(output, 4, temp_out, 2);
reorder_4(output, 5, temp_out, 10);
reorder_4(output, 6, temp_out, 6);
reorder_4(output, 7, temp_out, 14);
reorder_4(output, 8, temp_out, 1);
reorder_4(output, 9, temp_out, 9);
reorder_4(output, 10, temp_out, 5);
reorder_4(output, 11, temp_out, 13);
reorder_4(output, 12, temp_out, 3);
reorder_4(output, 13, temp_out, 11);
reorder_4(output, 14, temp_out, 7);
reorder_4(output, 15, temp_out, 15);
}
#[$m]
pub $($s)* fn fidentity4<T: TxOperations>(input: &[T], output: &mut [T]) {
output[..4].copy_from_slice(&input[..4]);
}
#[$m]
pub $($s)* fn fidentity8<T: TxOperations>(input: &[T], output: &mut [T]) {
output[..8].copy_from_slice(&input[..8]);
}
#[$m]
$($s)* fn fidentity16<T: TxOperations>(input: &[T], output: &mut [T]) {
output[..16].copy_from_slice(&input[..16]);
}
#[$m]
$($s)* fn fidentity32<T: TxOperations>(input: &[T], output: &mut [T]) {
output[..32].copy_from_slice(&input[..32]);
}
}
}