use std::any::TypeId;
use std::sync::Arc;
use num_complex::Complex;
use crate::{common::FftNum, FftDirection};
use crate::array_utils::DoubleBuf;
use crate::twiddles;
use crate::{Direction, Fft, Length};
use super::wasm_simd_common::{assert_f32, assert_f64};
use super::wasm_simd_utils::*;
use super::wasm_simd_vector::*;
pub const fn prime_butterfly_lens() -> &'static [usize] {
&[7, 11, 13, 17, 19, 23, 29, 31, ]
}
#[target_feature(enable = "simd128")]
pub unsafe fn construct_prime_butterfly<T: FftNum>(len: usize, direction: FftDirection) -> Arc<dyn Fft<T>> {
let id_f32 = TypeId::of::<f32>();
let id_f64 = TypeId::of::<f64>();
let id_t = TypeId::of::<T>();
if id_t == id_f32 {
match len {
7 => Arc::new(WasmSimdF32Butterfly7::new(direction)) as Arc<dyn Fft<T>>,
11 => Arc::new(WasmSimdF32Butterfly11::new(direction)) as Arc<dyn Fft<T>>,
13 => Arc::new(WasmSimdF32Butterfly13::new(direction)) as Arc<dyn Fft<T>>,
17 => Arc::new(WasmSimdF32Butterfly17::new(direction)) as Arc<dyn Fft<T>>,
19 => Arc::new(WasmSimdF32Butterfly19::new(direction)) as Arc<dyn Fft<T>>,
23 => Arc::new(WasmSimdF32Butterfly23::new(direction)) as Arc<dyn Fft<T>>,
29 => Arc::new(WasmSimdF32Butterfly29::new(direction)) as Arc<dyn Fft<T>>,
31 => Arc::new(WasmSimdF32Butterfly31::new(direction)) as Arc<dyn Fft<T>>,
_ => unimplemented!("Invalid Wasm SIMD prime butterfly length: {len}"),
}
} else if id_t == id_f64 {
match len {
7 => Arc::new(WasmSimdF64Butterfly7::new(direction)) as Arc<dyn Fft<T>>,
11 => Arc::new(WasmSimdF64Butterfly11::new(direction)) as Arc<dyn Fft<T>>,
13 => Arc::new(WasmSimdF64Butterfly13::new(direction)) as Arc<dyn Fft<T>>,
17 => Arc::new(WasmSimdF64Butterfly17::new(direction)) as Arc<dyn Fft<T>>,
19 => Arc::new(WasmSimdF64Butterfly19::new(direction)) as Arc<dyn Fft<T>>,
23 => Arc::new(WasmSimdF64Butterfly23::new(direction)) as Arc<dyn Fft<T>>,
29 => Arc::new(WasmSimdF64Butterfly29::new(direction)) as Arc<dyn Fft<T>>,
31 => Arc::new(WasmSimdF64Butterfly31::new(direction)) as Arc<dyn Fft<T>>,
_ => unimplemented!("Invalid Wasm SIMD prime butterfly length: {len}"),
}
} else {
unimplemented!("Not f32 or f64");
}
}
#[inline(always)]
fn make_twiddles<const TW: usize, T: FftNum>(len: usize, direction: FftDirection) -> [Complex<T>; TW] {
let mut i = 1;
[(); TW].map(|_| {
let twiddle = twiddles::compute_twiddle(i, len, direction);
i += 1;
twiddle
})
}
struct WasmSimdF32Butterfly7<T> {
direction: FftDirection,
twiddles_re: [WasmVector32; 3],
twiddles_im: [WasmVector32; 3],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f32_butterfly!(WasmSimdF32Butterfly7, 7, |this: &WasmSimdF32Butterfly7<_>| this.direction);
impl<T: FftNum> WasmSimdF32Butterfly7<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f32::<T>();
let twiddles = make_twiddles(7, direction);
Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let values = read_partial1_complex_to_array!(buffer, { 0,1,2,3,4,5,6 });
let out = self.perform_parallel_fft_direct(values);
write_partial_lo_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6 } );
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let input_packed = read_complex_to_array!(buffer, { 0,2,4,6,8,10,12 });
let values = [
extract_lo_hi_f32(input_packed[0], input_packed[3]),
extract_hi_lo_f32(input_packed[0], input_packed[4]),
extract_lo_hi_f32(input_packed[1], input_packed[4]),
extract_hi_lo_f32(input_packed[1], input_packed[5]),
extract_lo_hi_f32(input_packed[2], input_packed[5]),
extract_hi_lo_f32(input_packed[2], input_packed[6]),
extract_lo_hi_f32(input_packed[3], input_packed[6]),
];
let out = self.perform_parallel_fft_direct(values);
let out_packed = [
extract_lo_lo_f32(out[0], out[1]),
extract_lo_lo_f32(out[2], out[3]),
extract_lo_lo_f32(out[4], out[5]),
extract_lo_hi_f32(out[6], out[0]),
extract_hi_hi_f32(out[1], out[2]),
extract_hi_hi_f32(out[3], out[4]),
extract_hi_hi_f32(out[5], out[6]),
];
write_complex_to_array_strided!(out_packed, buffer, 2, { 0,1,2,3,4,5,6 });
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_direct(&self, values: [WasmVector32; 7]) -> [WasmVector32; 7] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p6, x1m6] = WasmVector::column_butterfly2([values[1], values[6]]);
let x1m6 = WasmVector::apply_rotate90(rotate, x1m6);
let y00 = WasmVector::add(y00, x1p6);
let [x2p5, x2m5] = WasmVector::column_butterfly2([values[2], values[5]]);
let x2m5 = WasmVector::apply_rotate90(rotate, x2m5);
let y00 = WasmVector::add(y00, x2p5);
let [x3p4, x3m4] = WasmVector::column_butterfly2([values[3], values[4]]);
let x3m4 = WasmVector::apply_rotate90(rotate, x3m4);
let y00 = WasmVector::add(y00, x3p4);
let m0106a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p6);
let m0106a = WasmVector::fmadd(m0106a, self.twiddles_re[1], x2p5);
let m0106a = WasmVector::fmadd(m0106a, self.twiddles_re[2], x3p4);
let m0106b = WasmVector::mul(self.twiddles_im[0], x1m6);
let m0106b = WasmVector::fmadd(m0106b, self.twiddles_im[1], x2m5);
let m0106b = WasmVector::fmadd(m0106b, self.twiddles_im[2], x3m4);
let [y01, y06] = WasmVector::column_butterfly2([m0106a, m0106b]);
let m0205a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p6);
let m0205a = WasmVector::fmadd(m0205a, self.twiddles_re[2], x2p5);
let m0205a = WasmVector::fmadd(m0205a, self.twiddles_re[0], x3p4);
let m0205b = WasmVector::mul(self.twiddles_im[1], x1m6);
let m0205b = WasmVector::nmadd(m0205b, self.twiddles_im[2], x2m5);
let m0205b = WasmVector::nmadd(m0205b, self.twiddles_im[0], x3m4);
let [y02, y05] = WasmVector::column_butterfly2([m0205a, m0205b]);
let m0304a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p6);
let m0304a = WasmVector::fmadd(m0304a, self.twiddles_re[0], x2p5);
let m0304a = WasmVector::fmadd(m0304a, self.twiddles_re[1], x3p4);
let m0304b = WasmVector::mul(self.twiddles_im[2], x1m6);
let m0304b = WasmVector::nmadd(m0304b, self.twiddles_im[0], x2m5);
let m0304b = WasmVector::fmadd(m0304b, self.twiddles_im[1], x3m4);
let [y03, y04] = WasmVector::column_butterfly2([m0304a, m0304b]);
[y00, y01, y02, y03, y04, y05, y06]
}
}
struct WasmSimdF64Butterfly7<T> {
direction: FftDirection,
twiddles_re: [WasmVector64; 3],
twiddles_im: [WasmVector64; 3],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f64_butterfly!(WasmSimdF64Butterfly7, 7, |this: &WasmSimdF64Butterfly7<_>| this.direction);
impl<T: FftNum> WasmSimdF64Butterfly7<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f64::<T>();
let twiddles = make_twiddles(7, direction);
unsafe {Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f64>) {
let values = read_complex_to_array!(buffer, { 0,1,2,3,4,5,6 });
let out = self.perform_fft_direct(values);
write_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6 });
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_direct(&self, values: [WasmVector64; 7]) -> [WasmVector64; 7] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p6, x1m6] = WasmVector::column_butterfly2([values[1], values[6]]);
let x1m6 = WasmVector::apply_rotate90(rotate, x1m6);
let y00 = WasmVector::add(y00, x1p6);
let [x2p5, x2m5] = WasmVector::column_butterfly2([values[2], values[5]]);
let x2m5 = WasmVector::apply_rotate90(rotate, x2m5);
let y00 = WasmVector::add(y00, x2p5);
let [x3p4, x3m4] = WasmVector::column_butterfly2([values[3], values[4]]);
let x3m4 = WasmVector::apply_rotate90(rotate, x3m4);
let y00 = WasmVector::add(y00, x3p4);
let m0106a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p6);
let m0106a = WasmVector::fmadd(m0106a, self.twiddles_re[1], x2p5);
let m0106a = WasmVector::fmadd(m0106a, self.twiddles_re[2], x3p4);
let m0106b = WasmVector::mul(self.twiddles_im[0], x1m6);
let m0106b = WasmVector::fmadd(m0106b, self.twiddles_im[1], x2m5);
let m0106b = WasmVector::fmadd(m0106b, self.twiddles_im[2], x3m4);
let [y01, y06] = WasmVector::column_butterfly2([m0106a, m0106b]);
let m0205a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p6);
let m0205a = WasmVector::fmadd(m0205a, self.twiddles_re[2], x2p5);
let m0205a = WasmVector::fmadd(m0205a, self.twiddles_re[0], x3p4);
let m0205b = WasmVector::mul(self.twiddles_im[1], x1m6);
let m0205b = WasmVector::nmadd(m0205b, self.twiddles_im[2], x2m5);
let m0205b = WasmVector::nmadd(m0205b, self.twiddles_im[0], x3m4);
let [y02, y05] = WasmVector::column_butterfly2([m0205a, m0205b]);
let m0304a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p6);
let m0304a = WasmVector::fmadd(m0304a, self.twiddles_re[0], x2p5);
let m0304a = WasmVector::fmadd(m0304a, self.twiddles_re[1], x3p4);
let m0304b = WasmVector::mul(self.twiddles_im[2], x1m6);
let m0304b = WasmVector::nmadd(m0304b, self.twiddles_im[0], x2m5);
let m0304b = WasmVector::fmadd(m0304b, self.twiddles_im[1], x3m4);
let [y03, y04] = WasmVector::column_butterfly2([m0304a, m0304b]);
[y00, y01, y02, y03, y04, y05, y06]
}
}
struct WasmSimdF32Butterfly11<T> {
direction: FftDirection,
twiddles_re: [WasmVector32; 5],
twiddles_im: [WasmVector32; 5],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f32_butterfly!(WasmSimdF32Butterfly11, 11, |this: &WasmSimdF32Butterfly11<_>| this.direction);
impl<T: FftNum> WasmSimdF32Butterfly11<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f32::<T>();
let twiddles = make_twiddles(11, direction);
Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let values = read_partial1_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10 });
let out = self.perform_parallel_fft_direct(values);
write_partial_lo_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10 } );
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let input_packed = read_complex_to_array!(buffer, { 0,2,4,6,8,10,12,14,16,18,20 });
let values = [
extract_lo_hi_f32(input_packed[0], input_packed[5]),
extract_hi_lo_f32(input_packed[0], input_packed[6]),
extract_lo_hi_f32(input_packed[1], input_packed[6]),
extract_hi_lo_f32(input_packed[1], input_packed[7]),
extract_lo_hi_f32(input_packed[2], input_packed[7]),
extract_hi_lo_f32(input_packed[2], input_packed[8]),
extract_lo_hi_f32(input_packed[3], input_packed[8]),
extract_hi_lo_f32(input_packed[3], input_packed[9]),
extract_lo_hi_f32(input_packed[4], input_packed[9]),
extract_hi_lo_f32(input_packed[4], input_packed[10]),
extract_lo_hi_f32(input_packed[5], input_packed[10]),
];
let out = self.perform_parallel_fft_direct(values);
let out_packed = [
extract_lo_lo_f32(out[0], out[1]),
extract_lo_lo_f32(out[2], out[3]),
extract_lo_lo_f32(out[4], out[5]),
extract_lo_lo_f32(out[6], out[7]),
extract_lo_lo_f32(out[8], out[9]),
extract_lo_hi_f32(out[10], out[0]),
extract_hi_hi_f32(out[1], out[2]),
extract_hi_hi_f32(out[3], out[4]),
extract_hi_hi_f32(out[5], out[6]),
extract_hi_hi_f32(out[7], out[8]),
extract_hi_hi_f32(out[9], out[10]),
];
write_complex_to_array_strided!(out_packed, buffer, 2, { 0,1,2,3,4,5,6,7,8,9,10 });
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_direct(&self, values: [WasmVector32; 11]) -> [WasmVector32; 11] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p10, x1m10] = WasmVector::column_butterfly2([values[1], values[10]]);
let x1m10 = WasmVector::apply_rotate90(rotate, x1m10);
let y00 = WasmVector::add(y00, x1p10);
let [x2p9, x2m9] = WasmVector::column_butterfly2([values[2], values[9]]);
let x2m9 = WasmVector::apply_rotate90(rotate, x2m9);
let y00 = WasmVector::add(y00, x2p9);
let [x3p8, x3m8] = WasmVector::column_butterfly2([values[3], values[8]]);
let x3m8 = WasmVector::apply_rotate90(rotate, x3m8);
let y00 = WasmVector::add(y00, x3p8);
let [x4p7, x4m7] = WasmVector::column_butterfly2([values[4], values[7]]);
let x4m7 = WasmVector::apply_rotate90(rotate, x4m7);
let y00 = WasmVector::add(y00, x4p7);
let [x5p6, x5m6] = WasmVector::column_butterfly2([values[5], values[6]]);
let x5m6 = WasmVector::apply_rotate90(rotate, x5m6);
let y00 = WasmVector::add(y00, x5p6);
let m0110a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p10);
let m0110a = WasmVector::fmadd(m0110a, self.twiddles_re[1], x2p9);
let m0110a = WasmVector::fmadd(m0110a, self.twiddles_re[2], x3p8);
let m0110a = WasmVector::fmadd(m0110a, self.twiddles_re[3], x4p7);
let m0110a = WasmVector::fmadd(m0110a, self.twiddles_re[4], x5p6);
let m0110b = WasmVector::mul(self.twiddles_im[0], x1m10);
let m0110b = WasmVector::fmadd(m0110b, self.twiddles_im[1], x2m9);
let m0110b = WasmVector::fmadd(m0110b, self.twiddles_im[2], x3m8);
let m0110b = WasmVector::fmadd(m0110b, self.twiddles_im[3], x4m7);
let m0110b = WasmVector::fmadd(m0110b, self.twiddles_im[4], x5m6);
let [y01, y10] = WasmVector::column_butterfly2([m0110a, m0110b]);
let m0209a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p10);
let m0209a = WasmVector::fmadd(m0209a, self.twiddles_re[3], x2p9);
let m0209a = WasmVector::fmadd(m0209a, self.twiddles_re[4], x3p8);
let m0209a = WasmVector::fmadd(m0209a, self.twiddles_re[2], x4p7);
let m0209a = WasmVector::fmadd(m0209a, self.twiddles_re[0], x5p6);
let m0209b = WasmVector::mul(self.twiddles_im[1], x1m10);
let m0209b = WasmVector::fmadd(m0209b, self.twiddles_im[3], x2m9);
let m0209b = WasmVector::nmadd(m0209b, self.twiddles_im[4], x3m8);
let m0209b = WasmVector::nmadd(m0209b, self.twiddles_im[2], x4m7);
let m0209b = WasmVector::nmadd(m0209b, self.twiddles_im[0], x5m6);
let [y02, y09] = WasmVector::column_butterfly2([m0209a, m0209b]);
let m0308a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p10);
let m0308a = WasmVector::fmadd(m0308a, self.twiddles_re[4], x2p9);
let m0308a = WasmVector::fmadd(m0308a, self.twiddles_re[1], x3p8);
let m0308a = WasmVector::fmadd(m0308a, self.twiddles_re[0], x4p7);
let m0308a = WasmVector::fmadd(m0308a, self.twiddles_re[3], x5p6);
let m0308b = WasmVector::mul(self.twiddles_im[2], x1m10);
let m0308b = WasmVector::nmadd(m0308b, self.twiddles_im[4], x2m9);
let m0308b = WasmVector::nmadd(m0308b, self.twiddles_im[1], x3m8);
let m0308b = WasmVector::fmadd(m0308b, self.twiddles_im[0], x4m7);
let m0308b = WasmVector::fmadd(m0308b, self.twiddles_im[3], x5m6);
let [y03, y08] = WasmVector::column_butterfly2([m0308a, m0308b]);
let m0407a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p10);
let m0407a = WasmVector::fmadd(m0407a, self.twiddles_re[2], x2p9);
let m0407a = WasmVector::fmadd(m0407a, self.twiddles_re[0], x3p8);
let m0407a = WasmVector::fmadd(m0407a, self.twiddles_re[4], x4p7);
let m0407a = WasmVector::fmadd(m0407a, self.twiddles_re[1], x5p6);
let m0407b = WasmVector::mul(self.twiddles_im[3], x1m10);
let m0407b = WasmVector::nmadd(m0407b, self.twiddles_im[2], x2m9);
let m0407b = WasmVector::fmadd(m0407b, self.twiddles_im[0], x3m8);
let m0407b = WasmVector::fmadd(m0407b, self.twiddles_im[4], x4m7);
let m0407b = WasmVector::nmadd(m0407b, self.twiddles_im[1], x5m6);
let [y04, y07] = WasmVector::column_butterfly2([m0407a, m0407b]);
let m0506a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p10);
let m0506a = WasmVector::fmadd(m0506a, self.twiddles_re[0], x2p9);
let m0506a = WasmVector::fmadd(m0506a, self.twiddles_re[3], x3p8);
let m0506a = WasmVector::fmadd(m0506a, self.twiddles_re[1], x4p7);
let m0506a = WasmVector::fmadd(m0506a, self.twiddles_re[2], x5p6);
let m0506b = WasmVector::mul(self.twiddles_im[4], x1m10);
let m0506b = WasmVector::nmadd(m0506b, self.twiddles_im[0], x2m9);
let m0506b = WasmVector::fmadd(m0506b, self.twiddles_im[3], x3m8);
let m0506b = WasmVector::nmadd(m0506b, self.twiddles_im[1], x4m7);
let m0506b = WasmVector::fmadd(m0506b, self.twiddles_im[2], x5m6);
let [y05, y06] = WasmVector::column_butterfly2([m0506a, m0506b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10]
}
}
struct WasmSimdF64Butterfly11<T> {
direction: FftDirection,
twiddles_re: [WasmVector64; 5],
twiddles_im: [WasmVector64; 5],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f64_butterfly!(WasmSimdF64Butterfly11, 11, |this: &WasmSimdF64Butterfly11<_>| this.direction);
impl<T: FftNum> WasmSimdF64Butterfly11<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f64::<T>();
let twiddles = make_twiddles(11, direction);
unsafe {Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f64>) {
let values = read_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10 });
let out = self.perform_fft_direct(values);
write_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10 });
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_direct(&self, values: [WasmVector64; 11]) -> [WasmVector64; 11] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p10, x1m10] = WasmVector::column_butterfly2([values[1], values[10]]);
let x1m10 = WasmVector::apply_rotate90(rotate, x1m10);
let y00 = WasmVector::add(y00, x1p10);
let [x2p9, x2m9] = WasmVector::column_butterfly2([values[2], values[9]]);
let x2m9 = WasmVector::apply_rotate90(rotate, x2m9);
let y00 = WasmVector::add(y00, x2p9);
let [x3p8, x3m8] = WasmVector::column_butterfly2([values[3], values[8]]);
let x3m8 = WasmVector::apply_rotate90(rotate, x3m8);
let y00 = WasmVector::add(y00, x3p8);
let [x4p7, x4m7] = WasmVector::column_butterfly2([values[4], values[7]]);
let x4m7 = WasmVector::apply_rotate90(rotate, x4m7);
let y00 = WasmVector::add(y00, x4p7);
let [x5p6, x5m6] = WasmVector::column_butterfly2([values[5], values[6]]);
let x5m6 = WasmVector::apply_rotate90(rotate, x5m6);
let y00 = WasmVector::add(y00, x5p6);
let m0110a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p10);
let m0110a = WasmVector::fmadd(m0110a, self.twiddles_re[1], x2p9);
let m0110a = WasmVector::fmadd(m0110a, self.twiddles_re[2], x3p8);
let m0110a = WasmVector::fmadd(m0110a, self.twiddles_re[3], x4p7);
let m0110a = WasmVector::fmadd(m0110a, self.twiddles_re[4], x5p6);
let m0110b = WasmVector::mul(self.twiddles_im[0], x1m10);
let m0110b = WasmVector::fmadd(m0110b, self.twiddles_im[1], x2m9);
let m0110b = WasmVector::fmadd(m0110b, self.twiddles_im[2], x3m8);
let m0110b = WasmVector::fmadd(m0110b, self.twiddles_im[3], x4m7);
let m0110b = WasmVector::fmadd(m0110b, self.twiddles_im[4], x5m6);
let [y01, y10] = WasmVector::column_butterfly2([m0110a, m0110b]);
let m0209a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p10);
let m0209a = WasmVector::fmadd(m0209a, self.twiddles_re[3], x2p9);
let m0209a = WasmVector::fmadd(m0209a, self.twiddles_re[4], x3p8);
let m0209a = WasmVector::fmadd(m0209a, self.twiddles_re[2], x4p7);
let m0209a = WasmVector::fmadd(m0209a, self.twiddles_re[0], x5p6);
let m0209b = WasmVector::mul(self.twiddles_im[1], x1m10);
let m0209b = WasmVector::fmadd(m0209b, self.twiddles_im[3], x2m9);
let m0209b = WasmVector::nmadd(m0209b, self.twiddles_im[4], x3m8);
let m0209b = WasmVector::nmadd(m0209b, self.twiddles_im[2], x4m7);
let m0209b = WasmVector::nmadd(m0209b, self.twiddles_im[0], x5m6);
let [y02, y09] = WasmVector::column_butterfly2([m0209a, m0209b]);
let m0308a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p10);
let m0308a = WasmVector::fmadd(m0308a, self.twiddles_re[4], x2p9);
let m0308a = WasmVector::fmadd(m0308a, self.twiddles_re[1], x3p8);
let m0308a = WasmVector::fmadd(m0308a, self.twiddles_re[0], x4p7);
let m0308a = WasmVector::fmadd(m0308a, self.twiddles_re[3], x5p6);
let m0308b = WasmVector::mul(self.twiddles_im[2], x1m10);
let m0308b = WasmVector::nmadd(m0308b, self.twiddles_im[4], x2m9);
let m0308b = WasmVector::nmadd(m0308b, self.twiddles_im[1], x3m8);
let m0308b = WasmVector::fmadd(m0308b, self.twiddles_im[0], x4m7);
let m0308b = WasmVector::fmadd(m0308b, self.twiddles_im[3], x5m6);
let [y03, y08] = WasmVector::column_butterfly2([m0308a, m0308b]);
let m0407a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p10);
let m0407a = WasmVector::fmadd(m0407a, self.twiddles_re[2], x2p9);
let m0407a = WasmVector::fmadd(m0407a, self.twiddles_re[0], x3p8);
let m0407a = WasmVector::fmadd(m0407a, self.twiddles_re[4], x4p7);
let m0407a = WasmVector::fmadd(m0407a, self.twiddles_re[1], x5p6);
let m0407b = WasmVector::mul(self.twiddles_im[3], x1m10);
let m0407b = WasmVector::nmadd(m0407b, self.twiddles_im[2], x2m9);
let m0407b = WasmVector::fmadd(m0407b, self.twiddles_im[0], x3m8);
let m0407b = WasmVector::fmadd(m0407b, self.twiddles_im[4], x4m7);
let m0407b = WasmVector::nmadd(m0407b, self.twiddles_im[1], x5m6);
let [y04, y07] = WasmVector::column_butterfly2([m0407a, m0407b]);
let m0506a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p10);
let m0506a = WasmVector::fmadd(m0506a, self.twiddles_re[0], x2p9);
let m0506a = WasmVector::fmadd(m0506a, self.twiddles_re[3], x3p8);
let m0506a = WasmVector::fmadd(m0506a, self.twiddles_re[1], x4p7);
let m0506a = WasmVector::fmadd(m0506a, self.twiddles_re[2], x5p6);
let m0506b = WasmVector::mul(self.twiddles_im[4], x1m10);
let m0506b = WasmVector::nmadd(m0506b, self.twiddles_im[0], x2m9);
let m0506b = WasmVector::fmadd(m0506b, self.twiddles_im[3], x3m8);
let m0506b = WasmVector::nmadd(m0506b, self.twiddles_im[1], x4m7);
let m0506b = WasmVector::fmadd(m0506b, self.twiddles_im[2], x5m6);
let [y05, y06] = WasmVector::column_butterfly2([m0506a, m0506b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10]
}
}
struct WasmSimdF32Butterfly13<T> {
direction: FftDirection,
twiddles_re: [WasmVector32; 6],
twiddles_im: [WasmVector32; 6],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f32_butterfly!(WasmSimdF32Butterfly13, 13, |this: &WasmSimdF32Butterfly13<_>| this.direction);
impl<T: FftNum> WasmSimdF32Butterfly13<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f32::<T>();
let twiddles = make_twiddles(13, direction);
Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let values = read_partial1_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12 });
let out = self.perform_parallel_fft_direct(values);
write_partial_lo_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12 } );
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let input_packed = read_complex_to_array!(buffer, { 0,2,4,6,8,10,12,14,16,18,20,22,24 });
let values = [
extract_lo_hi_f32(input_packed[0], input_packed[6]),
extract_hi_lo_f32(input_packed[0], input_packed[7]),
extract_lo_hi_f32(input_packed[1], input_packed[7]),
extract_hi_lo_f32(input_packed[1], input_packed[8]),
extract_lo_hi_f32(input_packed[2], input_packed[8]),
extract_hi_lo_f32(input_packed[2], input_packed[9]),
extract_lo_hi_f32(input_packed[3], input_packed[9]),
extract_hi_lo_f32(input_packed[3], input_packed[10]),
extract_lo_hi_f32(input_packed[4], input_packed[10]),
extract_hi_lo_f32(input_packed[4], input_packed[11]),
extract_lo_hi_f32(input_packed[5], input_packed[11]),
extract_hi_lo_f32(input_packed[5], input_packed[12]),
extract_lo_hi_f32(input_packed[6], input_packed[12]),
];
let out = self.perform_parallel_fft_direct(values);
let out_packed = [
extract_lo_lo_f32(out[0], out[1]),
extract_lo_lo_f32(out[2], out[3]),
extract_lo_lo_f32(out[4], out[5]),
extract_lo_lo_f32(out[6], out[7]),
extract_lo_lo_f32(out[8], out[9]),
extract_lo_lo_f32(out[10], out[11]),
extract_lo_hi_f32(out[12], out[0]),
extract_hi_hi_f32(out[1], out[2]),
extract_hi_hi_f32(out[3], out[4]),
extract_hi_hi_f32(out[5], out[6]),
extract_hi_hi_f32(out[7], out[8]),
extract_hi_hi_f32(out[9], out[10]),
extract_hi_hi_f32(out[11], out[12]),
];
write_complex_to_array_strided!(out_packed, buffer, 2, { 0,1,2,3,4,5,6,7,8,9,10,11,12 });
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_direct(&self, values: [WasmVector32; 13]) -> [WasmVector32; 13] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p12, x1m12] = WasmVector::column_butterfly2([values[1], values[12]]);
let x1m12 = WasmVector::apply_rotate90(rotate, x1m12);
let y00 = WasmVector::add(y00, x1p12);
let [x2p11, x2m11] = WasmVector::column_butterfly2([values[2], values[11]]);
let x2m11 = WasmVector::apply_rotate90(rotate, x2m11);
let y00 = WasmVector::add(y00, x2p11);
let [x3p10, x3m10] = WasmVector::column_butterfly2([values[3], values[10]]);
let x3m10 = WasmVector::apply_rotate90(rotate, x3m10);
let y00 = WasmVector::add(y00, x3p10);
let [x4p9, x4m9] = WasmVector::column_butterfly2([values[4], values[9]]);
let x4m9 = WasmVector::apply_rotate90(rotate, x4m9);
let y00 = WasmVector::add(y00, x4p9);
let [x5p8, x5m8] = WasmVector::column_butterfly2([values[5], values[8]]);
let x5m8 = WasmVector::apply_rotate90(rotate, x5m8);
let y00 = WasmVector::add(y00, x5p8);
let [x6p7, x6m7] = WasmVector::column_butterfly2([values[6], values[7]]);
let x6m7 = WasmVector::apply_rotate90(rotate, x6m7);
let y00 = WasmVector::add(y00, x6p7);
let m0112a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p12);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[1], x2p11);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[2], x3p10);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[3], x4p9);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[4], x5p8);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[5], x6p7);
let m0112b = WasmVector::mul(self.twiddles_im[0], x1m12);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[1], x2m11);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[2], x3m10);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[3], x4m9);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[4], x5m8);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[5], x6m7);
let [y01, y12] = WasmVector::column_butterfly2([m0112a, m0112b]);
let m0211a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p12);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[3], x2p11);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[5], x3p10);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[4], x4p9);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[2], x5p8);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[0], x6p7);
let m0211b = WasmVector::mul(self.twiddles_im[1], x1m12);
let m0211b = WasmVector::fmadd(m0211b, self.twiddles_im[3], x2m11);
let m0211b = WasmVector::fmadd(m0211b, self.twiddles_im[5], x3m10);
let m0211b = WasmVector::nmadd(m0211b, self.twiddles_im[4], x4m9);
let m0211b = WasmVector::nmadd(m0211b, self.twiddles_im[2], x5m8);
let m0211b = WasmVector::nmadd(m0211b, self.twiddles_im[0], x6m7);
let [y02, y11] = WasmVector::column_butterfly2([m0211a, m0211b]);
let m0310a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p12);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[5], x2p11);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[3], x3p10);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[0], x4p9);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[1], x5p8);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[4], x6p7);
let m0310b = WasmVector::mul(self.twiddles_im[2], x1m12);
let m0310b = WasmVector::fmadd(m0310b, self.twiddles_im[5], x2m11);
let m0310b = WasmVector::nmadd(m0310b, self.twiddles_im[3], x3m10);
let m0310b = WasmVector::nmadd(m0310b, self.twiddles_im[0], x4m9);
let m0310b = WasmVector::fmadd(m0310b, self.twiddles_im[1], x5m8);
let m0310b = WasmVector::fmadd(m0310b, self.twiddles_im[4], x6m7);
let [y03, y10] = WasmVector::column_butterfly2([m0310a, m0310b]);
let m0409a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p12);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[4], x2p11);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[0], x3p10);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[2], x4p9);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[5], x5p8);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[1], x6p7);
let m0409b = WasmVector::mul(self.twiddles_im[3], x1m12);
let m0409b = WasmVector::nmadd(m0409b, self.twiddles_im[4], x2m11);
let m0409b = WasmVector::nmadd(m0409b, self.twiddles_im[0], x3m10);
let m0409b = WasmVector::fmadd(m0409b, self.twiddles_im[2], x4m9);
let m0409b = WasmVector::nmadd(m0409b, self.twiddles_im[5], x5m8);
let m0409b = WasmVector::nmadd(m0409b, self.twiddles_im[1], x6m7);
let [y04, y09] = WasmVector::column_butterfly2([m0409a, m0409b]);
let m0508a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p12);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[2], x2p11);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[1], x3p10);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[5], x4p9);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[0], x5p8);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[3], x6p7);
let m0508b = WasmVector::mul(self.twiddles_im[4], x1m12);
let m0508b = WasmVector::nmadd(m0508b, self.twiddles_im[2], x2m11);
let m0508b = WasmVector::fmadd(m0508b, self.twiddles_im[1], x3m10);
let m0508b = WasmVector::nmadd(m0508b, self.twiddles_im[5], x4m9);
let m0508b = WasmVector::nmadd(m0508b, self.twiddles_im[0], x5m8);
let m0508b = WasmVector::fmadd(m0508b, self.twiddles_im[3], x6m7);
let [y05, y08] = WasmVector::column_butterfly2([m0508a, m0508b]);
let m0607a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p12);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[0], x2p11);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[4], x3p10);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[1], x4p9);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[3], x5p8);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[2], x6p7);
let m0607b = WasmVector::mul(self.twiddles_im[5], x1m12);
let m0607b = WasmVector::nmadd(m0607b, self.twiddles_im[0], x2m11);
let m0607b = WasmVector::fmadd(m0607b, self.twiddles_im[4], x3m10);
let m0607b = WasmVector::nmadd(m0607b, self.twiddles_im[1], x4m9);
let m0607b = WasmVector::fmadd(m0607b, self.twiddles_im[3], x5m8);
let m0607b = WasmVector::nmadd(m0607b, self.twiddles_im[2], x6m7);
let [y06, y07] = WasmVector::column_butterfly2([m0607a, m0607b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12]
}
}
struct WasmSimdF64Butterfly13<T> {
direction: FftDirection,
twiddles_re: [WasmVector64; 6],
twiddles_im: [WasmVector64; 6],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f64_butterfly!(WasmSimdF64Butterfly13, 13, |this: &WasmSimdF64Butterfly13<_>| this.direction);
impl<T: FftNum> WasmSimdF64Butterfly13<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f64::<T>();
let twiddles = make_twiddles(13, direction);
unsafe {Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f64>) {
let values = read_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12 });
let out = self.perform_fft_direct(values);
write_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12 });
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_direct(&self, values: [WasmVector64; 13]) -> [WasmVector64; 13] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p12, x1m12] = WasmVector::column_butterfly2([values[1], values[12]]);
let x1m12 = WasmVector::apply_rotate90(rotate, x1m12);
let y00 = WasmVector::add(y00, x1p12);
let [x2p11, x2m11] = WasmVector::column_butterfly2([values[2], values[11]]);
let x2m11 = WasmVector::apply_rotate90(rotate, x2m11);
let y00 = WasmVector::add(y00, x2p11);
let [x3p10, x3m10] = WasmVector::column_butterfly2([values[3], values[10]]);
let x3m10 = WasmVector::apply_rotate90(rotate, x3m10);
let y00 = WasmVector::add(y00, x3p10);
let [x4p9, x4m9] = WasmVector::column_butterfly2([values[4], values[9]]);
let x4m9 = WasmVector::apply_rotate90(rotate, x4m9);
let y00 = WasmVector::add(y00, x4p9);
let [x5p8, x5m8] = WasmVector::column_butterfly2([values[5], values[8]]);
let x5m8 = WasmVector::apply_rotate90(rotate, x5m8);
let y00 = WasmVector::add(y00, x5p8);
let [x6p7, x6m7] = WasmVector::column_butterfly2([values[6], values[7]]);
let x6m7 = WasmVector::apply_rotate90(rotate, x6m7);
let y00 = WasmVector::add(y00, x6p7);
let m0112a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p12);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[1], x2p11);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[2], x3p10);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[3], x4p9);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[4], x5p8);
let m0112a = WasmVector::fmadd(m0112a, self.twiddles_re[5], x6p7);
let m0112b = WasmVector::mul(self.twiddles_im[0], x1m12);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[1], x2m11);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[2], x3m10);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[3], x4m9);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[4], x5m8);
let m0112b = WasmVector::fmadd(m0112b, self.twiddles_im[5], x6m7);
let [y01, y12] = WasmVector::column_butterfly2([m0112a, m0112b]);
let m0211a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p12);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[3], x2p11);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[5], x3p10);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[4], x4p9);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[2], x5p8);
let m0211a = WasmVector::fmadd(m0211a, self.twiddles_re[0], x6p7);
let m0211b = WasmVector::mul(self.twiddles_im[1], x1m12);
let m0211b = WasmVector::fmadd(m0211b, self.twiddles_im[3], x2m11);
let m0211b = WasmVector::fmadd(m0211b, self.twiddles_im[5], x3m10);
let m0211b = WasmVector::nmadd(m0211b, self.twiddles_im[4], x4m9);
let m0211b = WasmVector::nmadd(m0211b, self.twiddles_im[2], x5m8);
let m0211b = WasmVector::nmadd(m0211b, self.twiddles_im[0], x6m7);
let [y02, y11] = WasmVector::column_butterfly2([m0211a, m0211b]);
let m0310a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p12);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[5], x2p11);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[3], x3p10);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[0], x4p9);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[1], x5p8);
let m0310a = WasmVector::fmadd(m0310a, self.twiddles_re[4], x6p7);
let m0310b = WasmVector::mul(self.twiddles_im[2], x1m12);
let m0310b = WasmVector::fmadd(m0310b, self.twiddles_im[5], x2m11);
let m0310b = WasmVector::nmadd(m0310b, self.twiddles_im[3], x3m10);
let m0310b = WasmVector::nmadd(m0310b, self.twiddles_im[0], x4m9);
let m0310b = WasmVector::fmadd(m0310b, self.twiddles_im[1], x5m8);
let m0310b = WasmVector::fmadd(m0310b, self.twiddles_im[4], x6m7);
let [y03, y10] = WasmVector::column_butterfly2([m0310a, m0310b]);
let m0409a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p12);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[4], x2p11);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[0], x3p10);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[2], x4p9);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[5], x5p8);
let m0409a = WasmVector::fmadd(m0409a, self.twiddles_re[1], x6p7);
let m0409b = WasmVector::mul(self.twiddles_im[3], x1m12);
let m0409b = WasmVector::nmadd(m0409b, self.twiddles_im[4], x2m11);
let m0409b = WasmVector::nmadd(m0409b, self.twiddles_im[0], x3m10);
let m0409b = WasmVector::fmadd(m0409b, self.twiddles_im[2], x4m9);
let m0409b = WasmVector::nmadd(m0409b, self.twiddles_im[5], x5m8);
let m0409b = WasmVector::nmadd(m0409b, self.twiddles_im[1], x6m7);
let [y04, y09] = WasmVector::column_butterfly2([m0409a, m0409b]);
let m0508a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p12);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[2], x2p11);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[1], x3p10);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[5], x4p9);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[0], x5p8);
let m0508a = WasmVector::fmadd(m0508a, self.twiddles_re[3], x6p7);
let m0508b = WasmVector::mul(self.twiddles_im[4], x1m12);
let m0508b = WasmVector::nmadd(m0508b, self.twiddles_im[2], x2m11);
let m0508b = WasmVector::fmadd(m0508b, self.twiddles_im[1], x3m10);
let m0508b = WasmVector::nmadd(m0508b, self.twiddles_im[5], x4m9);
let m0508b = WasmVector::nmadd(m0508b, self.twiddles_im[0], x5m8);
let m0508b = WasmVector::fmadd(m0508b, self.twiddles_im[3], x6m7);
let [y05, y08] = WasmVector::column_butterfly2([m0508a, m0508b]);
let m0607a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p12);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[0], x2p11);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[4], x3p10);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[1], x4p9);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[3], x5p8);
let m0607a = WasmVector::fmadd(m0607a, self.twiddles_re[2], x6p7);
let m0607b = WasmVector::mul(self.twiddles_im[5], x1m12);
let m0607b = WasmVector::nmadd(m0607b, self.twiddles_im[0], x2m11);
let m0607b = WasmVector::fmadd(m0607b, self.twiddles_im[4], x3m10);
let m0607b = WasmVector::nmadd(m0607b, self.twiddles_im[1], x4m9);
let m0607b = WasmVector::fmadd(m0607b, self.twiddles_im[3], x5m8);
let m0607b = WasmVector::nmadd(m0607b, self.twiddles_im[2], x6m7);
let [y06, y07] = WasmVector::column_butterfly2([m0607a, m0607b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12]
}
}
struct WasmSimdF32Butterfly17<T> {
direction: FftDirection,
twiddles_re: [WasmVector32; 8],
twiddles_im: [WasmVector32; 8],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f32_butterfly!(WasmSimdF32Butterfly17, 17, |this: &WasmSimdF32Butterfly17<_>| this.direction);
impl<T: FftNum> WasmSimdF32Butterfly17<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f32::<T>();
let twiddles = make_twiddles(17, direction);
Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let values = read_partial1_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 });
let out = self.perform_parallel_fft_direct(values);
write_partial_lo_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 } );
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let input_packed = read_complex_to_array!(buffer, { 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 });
let values = [
extract_lo_hi_f32(input_packed[0], input_packed[8]),
extract_hi_lo_f32(input_packed[0], input_packed[9]),
extract_lo_hi_f32(input_packed[1], input_packed[9]),
extract_hi_lo_f32(input_packed[1], input_packed[10]),
extract_lo_hi_f32(input_packed[2], input_packed[10]),
extract_hi_lo_f32(input_packed[2], input_packed[11]),
extract_lo_hi_f32(input_packed[3], input_packed[11]),
extract_hi_lo_f32(input_packed[3], input_packed[12]),
extract_lo_hi_f32(input_packed[4], input_packed[12]),
extract_hi_lo_f32(input_packed[4], input_packed[13]),
extract_lo_hi_f32(input_packed[5], input_packed[13]),
extract_hi_lo_f32(input_packed[5], input_packed[14]),
extract_lo_hi_f32(input_packed[6], input_packed[14]),
extract_hi_lo_f32(input_packed[6], input_packed[15]),
extract_lo_hi_f32(input_packed[7], input_packed[15]),
extract_hi_lo_f32(input_packed[7], input_packed[16]),
extract_lo_hi_f32(input_packed[8], input_packed[16]),
];
let out = self.perform_parallel_fft_direct(values);
let out_packed = [
extract_lo_lo_f32(out[0], out[1]),
extract_lo_lo_f32(out[2], out[3]),
extract_lo_lo_f32(out[4], out[5]),
extract_lo_lo_f32(out[6], out[7]),
extract_lo_lo_f32(out[8], out[9]),
extract_lo_lo_f32(out[10], out[11]),
extract_lo_lo_f32(out[12], out[13]),
extract_lo_lo_f32(out[14], out[15]),
extract_lo_hi_f32(out[16], out[0]),
extract_hi_hi_f32(out[1], out[2]),
extract_hi_hi_f32(out[3], out[4]),
extract_hi_hi_f32(out[5], out[6]),
extract_hi_hi_f32(out[7], out[8]),
extract_hi_hi_f32(out[9], out[10]),
extract_hi_hi_f32(out[11], out[12]),
extract_hi_hi_f32(out[13], out[14]),
extract_hi_hi_f32(out[15], out[16]),
];
write_complex_to_array_strided!(out_packed, buffer, 2, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 });
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_direct(&self, values: [WasmVector32; 17]) -> [WasmVector32; 17] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p16, x1m16] = WasmVector::column_butterfly2([values[1], values[16]]);
let x1m16 = WasmVector::apply_rotate90(rotate, x1m16);
let y00 = WasmVector::add(y00, x1p16);
let [x2p15, x2m15] = WasmVector::column_butterfly2([values[2], values[15]]);
let x2m15 = WasmVector::apply_rotate90(rotate, x2m15);
let y00 = WasmVector::add(y00, x2p15);
let [x3p14, x3m14] = WasmVector::column_butterfly2([values[3], values[14]]);
let x3m14 = WasmVector::apply_rotate90(rotate, x3m14);
let y00 = WasmVector::add(y00, x3p14);
let [x4p13, x4m13] = WasmVector::column_butterfly2([values[4], values[13]]);
let x4m13 = WasmVector::apply_rotate90(rotate, x4m13);
let y00 = WasmVector::add(y00, x4p13);
let [x5p12, x5m12] = WasmVector::column_butterfly2([values[5], values[12]]);
let x5m12 = WasmVector::apply_rotate90(rotate, x5m12);
let y00 = WasmVector::add(y00, x5p12);
let [x6p11, x6m11] = WasmVector::column_butterfly2([values[6], values[11]]);
let x6m11 = WasmVector::apply_rotate90(rotate, x6m11);
let y00 = WasmVector::add(y00, x6p11);
let [x7p10, x7m10] = WasmVector::column_butterfly2([values[7], values[10]]);
let x7m10 = WasmVector::apply_rotate90(rotate, x7m10);
let y00 = WasmVector::add(y00, x7p10);
let [x8p9, x8m9] = WasmVector::column_butterfly2([values[8], values[9]]);
let x8m9 = WasmVector::apply_rotate90(rotate, x8m9);
let y00 = WasmVector::add(y00, x8p9);
let m0116a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p16);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[1], x2p15);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[2], x3p14);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[3], x4p13);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[4], x5p12);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[5], x6p11);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[6], x7p10);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[7], x8p9);
let m0116b = WasmVector::mul(self.twiddles_im[0], x1m16);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[1], x2m15);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[2], x3m14);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[3], x4m13);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[4], x5m12);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[5], x6m11);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[6], x7m10);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[7], x8m9);
let [y01, y16] = WasmVector::column_butterfly2([m0116a, m0116b]);
let m0215a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p16);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[3], x2p15);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[5], x3p14);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[7], x4p13);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[6], x5p12);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[4], x6p11);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[2], x7p10);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[0], x8p9);
let m0215b = WasmVector::mul(self.twiddles_im[1], x1m16);
let m0215b = WasmVector::fmadd(m0215b, self.twiddles_im[3], x2m15);
let m0215b = WasmVector::fmadd(m0215b, self.twiddles_im[5], x3m14);
let m0215b = WasmVector::fmadd(m0215b, self.twiddles_im[7], x4m13);
let m0215b = WasmVector::nmadd(m0215b, self.twiddles_im[6], x5m12);
let m0215b = WasmVector::nmadd(m0215b, self.twiddles_im[4], x6m11);
let m0215b = WasmVector::nmadd(m0215b, self.twiddles_im[2], x7m10);
let m0215b = WasmVector::nmadd(m0215b, self.twiddles_im[0], x8m9);
let [y02, y15] = WasmVector::column_butterfly2([m0215a, m0215b]);
let m0314a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p16);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[5], x2p15);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[7], x3p14);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[4], x4p13);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[1], x5p12);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[0], x6p11);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[3], x7p10);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[6], x8p9);
let m0314b = WasmVector::mul(self.twiddles_im[2], x1m16);
let m0314b = WasmVector::fmadd(m0314b, self.twiddles_im[5], x2m15);
let m0314b = WasmVector::nmadd(m0314b, self.twiddles_im[7], x3m14);
let m0314b = WasmVector::nmadd(m0314b, self.twiddles_im[4], x4m13);
let m0314b = WasmVector::nmadd(m0314b, self.twiddles_im[1], x5m12);
let m0314b = WasmVector::fmadd(m0314b, self.twiddles_im[0], x6m11);
let m0314b = WasmVector::fmadd(m0314b, self.twiddles_im[3], x7m10);
let m0314b = WasmVector::fmadd(m0314b, self.twiddles_im[6], x8m9);
let [y03, y14] = WasmVector::column_butterfly2([m0314a, m0314b]);
let m0413a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p16);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[7], x2p15);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[4], x3p14);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[0], x4p13);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[2], x5p12);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[6], x6p11);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[5], x7p10);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[1], x8p9);
let m0413b = WasmVector::mul(self.twiddles_im[3], x1m16);
let m0413b = WasmVector::fmadd(m0413b, self.twiddles_im[7], x2m15);
let m0413b = WasmVector::nmadd(m0413b, self.twiddles_im[4], x3m14);
let m0413b = WasmVector::nmadd(m0413b, self.twiddles_im[0], x4m13);
let m0413b = WasmVector::fmadd(m0413b, self.twiddles_im[2], x5m12);
let m0413b = WasmVector::fmadd(m0413b, self.twiddles_im[6], x6m11);
let m0413b = WasmVector::nmadd(m0413b, self.twiddles_im[5], x7m10);
let m0413b = WasmVector::nmadd(m0413b, self.twiddles_im[1], x8m9);
let [y04, y13] = WasmVector::column_butterfly2([m0413a, m0413b]);
let m0512a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p16);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[6], x2p15);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[1], x3p14);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[2], x4p13);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[7], x5p12);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[3], x6p11);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[0], x7p10);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[5], x8p9);
let m0512b = WasmVector::mul(self.twiddles_im[4], x1m16);
let m0512b = WasmVector::nmadd(m0512b, self.twiddles_im[6], x2m15);
let m0512b = WasmVector::nmadd(m0512b, self.twiddles_im[1], x3m14);
let m0512b = WasmVector::fmadd(m0512b, self.twiddles_im[2], x4m13);
let m0512b = WasmVector::fmadd(m0512b, self.twiddles_im[7], x5m12);
let m0512b = WasmVector::nmadd(m0512b, self.twiddles_im[3], x6m11);
let m0512b = WasmVector::fmadd(m0512b, self.twiddles_im[0], x7m10);
let m0512b = WasmVector::fmadd(m0512b, self.twiddles_im[5], x8m9);
let [y05, y12] = WasmVector::column_butterfly2([m0512a, m0512b]);
let m0611a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p16);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[4], x2p15);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[0], x3p14);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[6], x4p13);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[3], x5p12);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[1], x6p11);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[7], x7p10);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[2], x8p9);
let m0611b = WasmVector::mul(self.twiddles_im[5], x1m16);
let m0611b = WasmVector::nmadd(m0611b, self.twiddles_im[4], x2m15);
let m0611b = WasmVector::fmadd(m0611b, self.twiddles_im[0], x3m14);
let m0611b = WasmVector::fmadd(m0611b, self.twiddles_im[6], x4m13);
let m0611b = WasmVector::nmadd(m0611b, self.twiddles_im[3], x5m12);
let m0611b = WasmVector::fmadd(m0611b, self.twiddles_im[1], x6m11);
let m0611b = WasmVector::fmadd(m0611b, self.twiddles_im[7], x7m10);
let m0611b = WasmVector::nmadd(m0611b, self.twiddles_im[2], x8m9);
let [y06, y11] = WasmVector::column_butterfly2([m0611a, m0611b]);
let m0710a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p16);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[2], x2p15);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[3], x3p14);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[5], x4p13);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[0], x5p12);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[7], x6p11);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[1], x7p10);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[4], x8p9);
let m0710b = WasmVector::mul(self.twiddles_im[6], x1m16);
let m0710b = WasmVector::nmadd(m0710b, self.twiddles_im[2], x2m15);
let m0710b = WasmVector::fmadd(m0710b, self.twiddles_im[3], x3m14);
let m0710b = WasmVector::nmadd(m0710b, self.twiddles_im[5], x4m13);
let m0710b = WasmVector::fmadd(m0710b, self.twiddles_im[0], x5m12);
let m0710b = WasmVector::fmadd(m0710b, self.twiddles_im[7], x6m11);
let m0710b = WasmVector::nmadd(m0710b, self.twiddles_im[1], x7m10);
let m0710b = WasmVector::fmadd(m0710b, self.twiddles_im[4], x8m9);
let [y07, y10] = WasmVector::column_butterfly2([m0710a, m0710b]);
let m0809a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p16);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[0], x2p15);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[6], x3p14);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[1], x4p13);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[5], x5p12);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[2], x6p11);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[4], x7p10);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[3], x8p9);
let m0809b = WasmVector::mul(self.twiddles_im[7], x1m16);
let m0809b = WasmVector::nmadd(m0809b, self.twiddles_im[0], x2m15);
let m0809b = WasmVector::fmadd(m0809b, self.twiddles_im[6], x3m14);
let m0809b = WasmVector::nmadd(m0809b, self.twiddles_im[1], x4m13);
let m0809b = WasmVector::fmadd(m0809b, self.twiddles_im[5], x5m12);
let m0809b = WasmVector::nmadd(m0809b, self.twiddles_im[2], x6m11);
let m0809b = WasmVector::fmadd(m0809b, self.twiddles_im[4], x7m10);
let m0809b = WasmVector::nmadd(m0809b, self.twiddles_im[3], x8m9);
let [y08, y09] = WasmVector::column_butterfly2([m0809a, m0809b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16]
}
}
struct WasmSimdF64Butterfly17<T> {
direction: FftDirection,
twiddles_re: [WasmVector64; 8],
twiddles_im: [WasmVector64; 8],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f64_butterfly!(WasmSimdF64Butterfly17, 17, |this: &WasmSimdF64Butterfly17<_>| this.direction);
impl<T: FftNum> WasmSimdF64Butterfly17<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f64::<T>();
let twiddles = make_twiddles(17, direction);
unsafe {Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f64>) {
let values = read_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 });
let out = self.perform_fft_direct(values);
write_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 });
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_direct(&self, values: [WasmVector64; 17]) -> [WasmVector64; 17] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p16, x1m16] = WasmVector::column_butterfly2([values[1], values[16]]);
let x1m16 = WasmVector::apply_rotate90(rotate, x1m16);
let y00 = WasmVector::add(y00, x1p16);
let [x2p15, x2m15] = WasmVector::column_butterfly2([values[2], values[15]]);
let x2m15 = WasmVector::apply_rotate90(rotate, x2m15);
let y00 = WasmVector::add(y00, x2p15);
let [x3p14, x3m14] = WasmVector::column_butterfly2([values[3], values[14]]);
let x3m14 = WasmVector::apply_rotate90(rotate, x3m14);
let y00 = WasmVector::add(y00, x3p14);
let [x4p13, x4m13] = WasmVector::column_butterfly2([values[4], values[13]]);
let x4m13 = WasmVector::apply_rotate90(rotate, x4m13);
let y00 = WasmVector::add(y00, x4p13);
let [x5p12, x5m12] = WasmVector::column_butterfly2([values[5], values[12]]);
let x5m12 = WasmVector::apply_rotate90(rotate, x5m12);
let y00 = WasmVector::add(y00, x5p12);
let [x6p11, x6m11] = WasmVector::column_butterfly2([values[6], values[11]]);
let x6m11 = WasmVector::apply_rotate90(rotate, x6m11);
let y00 = WasmVector::add(y00, x6p11);
let [x7p10, x7m10] = WasmVector::column_butterfly2([values[7], values[10]]);
let x7m10 = WasmVector::apply_rotate90(rotate, x7m10);
let y00 = WasmVector::add(y00, x7p10);
let [x8p9, x8m9] = WasmVector::column_butterfly2([values[8], values[9]]);
let x8m9 = WasmVector::apply_rotate90(rotate, x8m9);
let y00 = WasmVector::add(y00, x8p9);
let m0116a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p16);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[1], x2p15);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[2], x3p14);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[3], x4p13);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[4], x5p12);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[5], x6p11);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[6], x7p10);
let m0116a = WasmVector::fmadd(m0116a, self.twiddles_re[7], x8p9);
let m0116b = WasmVector::mul(self.twiddles_im[0], x1m16);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[1], x2m15);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[2], x3m14);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[3], x4m13);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[4], x5m12);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[5], x6m11);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[6], x7m10);
let m0116b = WasmVector::fmadd(m0116b, self.twiddles_im[7], x8m9);
let [y01, y16] = WasmVector::column_butterfly2([m0116a, m0116b]);
let m0215a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p16);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[3], x2p15);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[5], x3p14);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[7], x4p13);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[6], x5p12);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[4], x6p11);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[2], x7p10);
let m0215a = WasmVector::fmadd(m0215a, self.twiddles_re[0], x8p9);
let m0215b = WasmVector::mul(self.twiddles_im[1], x1m16);
let m0215b = WasmVector::fmadd(m0215b, self.twiddles_im[3], x2m15);
let m0215b = WasmVector::fmadd(m0215b, self.twiddles_im[5], x3m14);
let m0215b = WasmVector::fmadd(m0215b, self.twiddles_im[7], x4m13);
let m0215b = WasmVector::nmadd(m0215b, self.twiddles_im[6], x5m12);
let m0215b = WasmVector::nmadd(m0215b, self.twiddles_im[4], x6m11);
let m0215b = WasmVector::nmadd(m0215b, self.twiddles_im[2], x7m10);
let m0215b = WasmVector::nmadd(m0215b, self.twiddles_im[0], x8m9);
let [y02, y15] = WasmVector::column_butterfly2([m0215a, m0215b]);
let m0314a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p16);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[5], x2p15);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[7], x3p14);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[4], x4p13);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[1], x5p12);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[0], x6p11);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[3], x7p10);
let m0314a = WasmVector::fmadd(m0314a, self.twiddles_re[6], x8p9);
let m0314b = WasmVector::mul(self.twiddles_im[2], x1m16);
let m0314b = WasmVector::fmadd(m0314b, self.twiddles_im[5], x2m15);
let m0314b = WasmVector::nmadd(m0314b, self.twiddles_im[7], x3m14);
let m0314b = WasmVector::nmadd(m0314b, self.twiddles_im[4], x4m13);
let m0314b = WasmVector::nmadd(m0314b, self.twiddles_im[1], x5m12);
let m0314b = WasmVector::fmadd(m0314b, self.twiddles_im[0], x6m11);
let m0314b = WasmVector::fmadd(m0314b, self.twiddles_im[3], x7m10);
let m0314b = WasmVector::fmadd(m0314b, self.twiddles_im[6], x8m9);
let [y03, y14] = WasmVector::column_butterfly2([m0314a, m0314b]);
let m0413a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p16);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[7], x2p15);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[4], x3p14);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[0], x4p13);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[2], x5p12);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[6], x6p11);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[5], x7p10);
let m0413a = WasmVector::fmadd(m0413a, self.twiddles_re[1], x8p9);
let m0413b = WasmVector::mul(self.twiddles_im[3], x1m16);
let m0413b = WasmVector::fmadd(m0413b, self.twiddles_im[7], x2m15);
let m0413b = WasmVector::nmadd(m0413b, self.twiddles_im[4], x3m14);
let m0413b = WasmVector::nmadd(m0413b, self.twiddles_im[0], x4m13);
let m0413b = WasmVector::fmadd(m0413b, self.twiddles_im[2], x5m12);
let m0413b = WasmVector::fmadd(m0413b, self.twiddles_im[6], x6m11);
let m0413b = WasmVector::nmadd(m0413b, self.twiddles_im[5], x7m10);
let m0413b = WasmVector::nmadd(m0413b, self.twiddles_im[1], x8m9);
let [y04, y13] = WasmVector::column_butterfly2([m0413a, m0413b]);
let m0512a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p16);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[6], x2p15);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[1], x3p14);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[2], x4p13);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[7], x5p12);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[3], x6p11);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[0], x7p10);
let m0512a = WasmVector::fmadd(m0512a, self.twiddles_re[5], x8p9);
let m0512b = WasmVector::mul(self.twiddles_im[4], x1m16);
let m0512b = WasmVector::nmadd(m0512b, self.twiddles_im[6], x2m15);
let m0512b = WasmVector::nmadd(m0512b, self.twiddles_im[1], x3m14);
let m0512b = WasmVector::fmadd(m0512b, self.twiddles_im[2], x4m13);
let m0512b = WasmVector::fmadd(m0512b, self.twiddles_im[7], x5m12);
let m0512b = WasmVector::nmadd(m0512b, self.twiddles_im[3], x6m11);
let m0512b = WasmVector::fmadd(m0512b, self.twiddles_im[0], x7m10);
let m0512b = WasmVector::fmadd(m0512b, self.twiddles_im[5], x8m9);
let [y05, y12] = WasmVector::column_butterfly2([m0512a, m0512b]);
let m0611a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p16);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[4], x2p15);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[0], x3p14);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[6], x4p13);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[3], x5p12);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[1], x6p11);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[7], x7p10);
let m0611a = WasmVector::fmadd(m0611a, self.twiddles_re[2], x8p9);
let m0611b = WasmVector::mul(self.twiddles_im[5], x1m16);
let m0611b = WasmVector::nmadd(m0611b, self.twiddles_im[4], x2m15);
let m0611b = WasmVector::fmadd(m0611b, self.twiddles_im[0], x3m14);
let m0611b = WasmVector::fmadd(m0611b, self.twiddles_im[6], x4m13);
let m0611b = WasmVector::nmadd(m0611b, self.twiddles_im[3], x5m12);
let m0611b = WasmVector::fmadd(m0611b, self.twiddles_im[1], x6m11);
let m0611b = WasmVector::fmadd(m0611b, self.twiddles_im[7], x7m10);
let m0611b = WasmVector::nmadd(m0611b, self.twiddles_im[2], x8m9);
let [y06, y11] = WasmVector::column_butterfly2([m0611a, m0611b]);
let m0710a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p16);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[2], x2p15);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[3], x3p14);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[5], x4p13);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[0], x5p12);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[7], x6p11);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[1], x7p10);
let m0710a = WasmVector::fmadd(m0710a, self.twiddles_re[4], x8p9);
let m0710b = WasmVector::mul(self.twiddles_im[6], x1m16);
let m0710b = WasmVector::nmadd(m0710b, self.twiddles_im[2], x2m15);
let m0710b = WasmVector::fmadd(m0710b, self.twiddles_im[3], x3m14);
let m0710b = WasmVector::nmadd(m0710b, self.twiddles_im[5], x4m13);
let m0710b = WasmVector::fmadd(m0710b, self.twiddles_im[0], x5m12);
let m0710b = WasmVector::fmadd(m0710b, self.twiddles_im[7], x6m11);
let m0710b = WasmVector::nmadd(m0710b, self.twiddles_im[1], x7m10);
let m0710b = WasmVector::fmadd(m0710b, self.twiddles_im[4], x8m9);
let [y07, y10] = WasmVector::column_butterfly2([m0710a, m0710b]);
let m0809a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p16);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[0], x2p15);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[6], x3p14);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[1], x4p13);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[5], x5p12);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[2], x6p11);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[4], x7p10);
let m0809a = WasmVector::fmadd(m0809a, self.twiddles_re[3], x8p9);
let m0809b = WasmVector::mul(self.twiddles_im[7], x1m16);
let m0809b = WasmVector::nmadd(m0809b, self.twiddles_im[0], x2m15);
let m0809b = WasmVector::fmadd(m0809b, self.twiddles_im[6], x3m14);
let m0809b = WasmVector::nmadd(m0809b, self.twiddles_im[1], x4m13);
let m0809b = WasmVector::fmadd(m0809b, self.twiddles_im[5], x5m12);
let m0809b = WasmVector::nmadd(m0809b, self.twiddles_im[2], x6m11);
let m0809b = WasmVector::fmadd(m0809b, self.twiddles_im[4], x7m10);
let m0809b = WasmVector::nmadd(m0809b, self.twiddles_im[3], x8m9);
let [y08, y09] = WasmVector::column_butterfly2([m0809a, m0809b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16]
}
}
struct WasmSimdF32Butterfly19<T> {
direction: FftDirection,
twiddles_re: [WasmVector32; 9],
twiddles_im: [WasmVector32; 9],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f32_butterfly!(WasmSimdF32Butterfly19, 19, |this: &WasmSimdF32Butterfly19<_>| this.direction);
impl<T: FftNum> WasmSimdF32Butterfly19<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f32::<T>();
let twiddles = make_twiddles(19, direction);
Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let values = read_partial1_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 });
let out = self.perform_parallel_fft_direct(values);
write_partial_lo_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 } );
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let input_packed = read_complex_to_array!(buffer, { 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36 });
let values = [
extract_lo_hi_f32(input_packed[0], input_packed[9]),
extract_hi_lo_f32(input_packed[0], input_packed[10]),
extract_lo_hi_f32(input_packed[1], input_packed[10]),
extract_hi_lo_f32(input_packed[1], input_packed[11]),
extract_lo_hi_f32(input_packed[2], input_packed[11]),
extract_hi_lo_f32(input_packed[2], input_packed[12]),
extract_lo_hi_f32(input_packed[3], input_packed[12]),
extract_hi_lo_f32(input_packed[3], input_packed[13]),
extract_lo_hi_f32(input_packed[4], input_packed[13]),
extract_hi_lo_f32(input_packed[4], input_packed[14]),
extract_lo_hi_f32(input_packed[5], input_packed[14]),
extract_hi_lo_f32(input_packed[5], input_packed[15]),
extract_lo_hi_f32(input_packed[6], input_packed[15]),
extract_hi_lo_f32(input_packed[6], input_packed[16]),
extract_lo_hi_f32(input_packed[7], input_packed[16]),
extract_hi_lo_f32(input_packed[7], input_packed[17]),
extract_lo_hi_f32(input_packed[8], input_packed[17]),
extract_hi_lo_f32(input_packed[8], input_packed[18]),
extract_lo_hi_f32(input_packed[9], input_packed[18]),
];
let out = self.perform_parallel_fft_direct(values);
let out_packed = [
extract_lo_lo_f32(out[0], out[1]),
extract_lo_lo_f32(out[2], out[3]),
extract_lo_lo_f32(out[4], out[5]),
extract_lo_lo_f32(out[6], out[7]),
extract_lo_lo_f32(out[8], out[9]),
extract_lo_lo_f32(out[10], out[11]),
extract_lo_lo_f32(out[12], out[13]),
extract_lo_lo_f32(out[14], out[15]),
extract_lo_lo_f32(out[16], out[17]),
extract_lo_hi_f32(out[18], out[0]),
extract_hi_hi_f32(out[1], out[2]),
extract_hi_hi_f32(out[3], out[4]),
extract_hi_hi_f32(out[5], out[6]),
extract_hi_hi_f32(out[7], out[8]),
extract_hi_hi_f32(out[9], out[10]),
extract_hi_hi_f32(out[11], out[12]),
extract_hi_hi_f32(out[13], out[14]),
extract_hi_hi_f32(out[15], out[16]),
extract_hi_hi_f32(out[17], out[18]),
];
write_complex_to_array_strided!(out_packed, buffer, 2, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 });
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_direct(&self, values: [WasmVector32; 19]) -> [WasmVector32; 19] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p18, x1m18] = WasmVector::column_butterfly2([values[1], values[18]]);
let x1m18 = WasmVector::apply_rotate90(rotate, x1m18);
let y00 = WasmVector::add(y00, x1p18);
let [x2p17, x2m17] = WasmVector::column_butterfly2([values[2], values[17]]);
let x2m17 = WasmVector::apply_rotate90(rotate, x2m17);
let y00 = WasmVector::add(y00, x2p17);
let [x3p16, x3m16] = WasmVector::column_butterfly2([values[3], values[16]]);
let x3m16 = WasmVector::apply_rotate90(rotate, x3m16);
let y00 = WasmVector::add(y00, x3p16);
let [x4p15, x4m15] = WasmVector::column_butterfly2([values[4], values[15]]);
let x4m15 = WasmVector::apply_rotate90(rotate, x4m15);
let y00 = WasmVector::add(y00, x4p15);
let [x5p14, x5m14] = WasmVector::column_butterfly2([values[5], values[14]]);
let x5m14 = WasmVector::apply_rotate90(rotate, x5m14);
let y00 = WasmVector::add(y00, x5p14);
let [x6p13, x6m13] = WasmVector::column_butterfly2([values[6], values[13]]);
let x6m13 = WasmVector::apply_rotate90(rotate, x6m13);
let y00 = WasmVector::add(y00, x6p13);
let [x7p12, x7m12] = WasmVector::column_butterfly2([values[7], values[12]]);
let x7m12 = WasmVector::apply_rotate90(rotate, x7m12);
let y00 = WasmVector::add(y00, x7p12);
let [x8p11, x8m11] = WasmVector::column_butterfly2([values[8], values[11]]);
let x8m11 = WasmVector::apply_rotate90(rotate, x8m11);
let y00 = WasmVector::add(y00, x8p11);
let [x9p10, x9m10] = WasmVector::column_butterfly2([values[9], values[10]]);
let x9m10 = WasmVector::apply_rotate90(rotate, x9m10);
let y00 = WasmVector::add(y00, x9p10);
let m0118a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p18);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[1], x2p17);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[2], x3p16);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[3], x4p15);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[4], x5p14);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[5], x6p13);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[6], x7p12);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[7], x8p11);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[8], x9p10);
let m0118b = WasmVector::mul(self.twiddles_im[0], x1m18);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[1], x2m17);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[2], x3m16);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[3], x4m15);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[4], x5m14);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[5], x6m13);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[6], x7m12);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[7], x8m11);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[8], x9m10);
let [y01, y18] = WasmVector::column_butterfly2([m0118a, m0118b]);
let m0217a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p18);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[3], x2p17);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[5], x3p16);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[7], x4p15);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[8], x5p14);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[6], x6p13);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[4], x7p12);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[2], x8p11);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[0], x9p10);
let m0217b = WasmVector::mul(self.twiddles_im[1], x1m18);
let m0217b = WasmVector::fmadd(m0217b, self.twiddles_im[3], x2m17);
let m0217b = WasmVector::fmadd(m0217b, self.twiddles_im[5], x3m16);
let m0217b = WasmVector::fmadd(m0217b, self.twiddles_im[7], x4m15);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[8], x5m14);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[6], x6m13);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[4], x7m12);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[2], x8m11);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[0], x9m10);
let [y02, y17] = WasmVector::column_butterfly2([m0217a, m0217b]);
let m0316a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p18);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[5], x2p17);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[8], x3p16);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[6], x4p15);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[3], x5p14);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[0], x6p13);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[1], x7p12);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[4], x8p11);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[7], x9p10);
let m0316b = WasmVector::mul(self.twiddles_im[2], x1m18);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[5], x2m17);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[8], x3m16);
let m0316b = WasmVector::nmadd(m0316b, self.twiddles_im[6], x4m15);
let m0316b = WasmVector::nmadd(m0316b, self.twiddles_im[3], x5m14);
let m0316b = WasmVector::nmadd(m0316b, self.twiddles_im[0], x6m13);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[1], x7m12);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[4], x8m11);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[7], x9m10);
let [y03, y16] = WasmVector::column_butterfly2([m0316a, m0316b]);
let m0415a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p18);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[7], x2p17);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[6], x3p16);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[2], x4p15);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[0], x5p14);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[4], x6p13);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[8], x7p12);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[5], x8p11);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[1], x9p10);
let m0415b = WasmVector::mul(self.twiddles_im[3], x1m18);
let m0415b = WasmVector::fmadd(m0415b, self.twiddles_im[7], x2m17);
let m0415b = WasmVector::nmadd(m0415b, self.twiddles_im[6], x3m16);
let m0415b = WasmVector::nmadd(m0415b, self.twiddles_im[2], x4m15);
let m0415b = WasmVector::fmadd(m0415b, self.twiddles_im[0], x5m14);
let m0415b = WasmVector::fmadd(m0415b, self.twiddles_im[4], x6m13);
let m0415b = WasmVector::fmadd(m0415b, self.twiddles_im[8], x7m12);
let m0415b = WasmVector::nmadd(m0415b, self.twiddles_im[5], x8m11);
let m0415b = WasmVector::nmadd(m0415b, self.twiddles_im[1], x9m10);
let [y04, y15] = WasmVector::column_butterfly2([m0415a, m0415b]);
let m0514a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p18);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[8], x2p17);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[3], x3p16);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[0], x4p15);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[5], x5p14);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[7], x6p13);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[2], x7p12);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[1], x8p11);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[6], x9p10);
let m0514b = WasmVector::mul(self.twiddles_im[4], x1m18);
let m0514b = WasmVector::nmadd(m0514b, self.twiddles_im[8], x2m17);
let m0514b = WasmVector::nmadd(m0514b, self.twiddles_im[3], x3m16);
let m0514b = WasmVector::fmadd(m0514b, self.twiddles_im[0], x4m15);
let m0514b = WasmVector::fmadd(m0514b, self.twiddles_im[5], x5m14);
let m0514b = WasmVector::nmadd(m0514b, self.twiddles_im[7], x6m13);
let m0514b = WasmVector::nmadd(m0514b, self.twiddles_im[2], x7m12);
let m0514b = WasmVector::fmadd(m0514b, self.twiddles_im[1], x8m11);
let m0514b = WasmVector::fmadd(m0514b, self.twiddles_im[6], x9m10);
let [y05, y14] = WasmVector::column_butterfly2([m0514a, m0514b]);
let m0613a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p18);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[6], x2p17);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[0], x3p16);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[4], x4p15);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[7], x5p14);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[1], x6p13);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[3], x7p12);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[8], x8p11);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[2], x9p10);
let m0613b = WasmVector::mul(self.twiddles_im[5], x1m18);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[6], x2m17);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[0], x3m16);
let m0613b = WasmVector::fmadd(m0613b, self.twiddles_im[4], x4m15);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[7], x5m14);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[1], x6m13);
let m0613b = WasmVector::fmadd(m0613b, self.twiddles_im[3], x7m12);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[8], x8m11);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[2], x9m10);
let [y06, y13] = WasmVector::column_butterfly2([m0613a, m0613b]);
let m0712a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p18);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[4], x2p17);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[1], x3p16);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[8], x4p15);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[2], x5p14);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[3], x6p13);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[7], x7p12);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[0], x8p11);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[5], x9p10);
let m0712b = WasmVector::mul(self.twiddles_im[6], x1m18);
let m0712b = WasmVector::nmadd(m0712b, self.twiddles_im[4], x2m17);
let m0712b = WasmVector::fmadd(m0712b, self.twiddles_im[1], x3m16);
let m0712b = WasmVector::fmadd(m0712b, self.twiddles_im[8], x4m15);
let m0712b = WasmVector::nmadd(m0712b, self.twiddles_im[2], x5m14);
let m0712b = WasmVector::fmadd(m0712b, self.twiddles_im[3], x6m13);
let m0712b = WasmVector::nmadd(m0712b, self.twiddles_im[7], x7m12);
let m0712b = WasmVector::nmadd(m0712b, self.twiddles_im[0], x8m11);
let m0712b = WasmVector::fmadd(m0712b, self.twiddles_im[5], x9m10);
let [y07, y12] = WasmVector::column_butterfly2([m0712a, m0712b]);
let m0811a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p18);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[2], x2p17);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[4], x3p16);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[5], x4p15);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[1], x5p14);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[8], x6p13);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[0], x7p12);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[6], x8p11);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[3], x9p10);
let m0811b = WasmVector::mul(self.twiddles_im[7], x1m18);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[2], x2m17);
let m0811b = WasmVector::fmadd(m0811b, self.twiddles_im[4], x3m16);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[5], x4m15);
let m0811b = WasmVector::fmadd(m0811b, self.twiddles_im[1], x5m14);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[8], x6m13);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[0], x7m12);
let m0811b = WasmVector::fmadd(m0811b, self.twiddles_im[6], x8m11);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[3], x9m10);
let [y08, y11] = WasmVector::column_butterfly2([m0811a, m0811b]);
let m0910a = WasmVector::fmadd(values[0], self.twiddles_re[8], x1p18);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[0], x2p17);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[7], x3p16);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[1], x4p15);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[6], x5p14);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[2], x6p13);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[5], x7p12);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[3], x8p11);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[4], x9p10);
let m0910b = WasmVector::mul(self.twiddles_im[8], x1m18);
let m0910b = WasmVector::nmadd(m0910b, self.twiddles_im[0], x2m17);
let m0910b = WasmVector::fmadd(m0910b, self.twiddles_im[7], x3m16);
let m0910b = WasmVector::nmadd(m0910b, self.twiddles_im[1], x4m15);
let m0910b = WasmVector::fmadd(m0910b, self.twiddles_im[6], x5m14);
let m0910b = WasmVector::nmadd(m0910b, self.twiddles_im[2], x6m13);
let m0910b = WasmVector::fmadd(m0910b, self.twiddles_im[5], x7m12);
let m0910b = WasmVector::nmadd(m0910b, self.twiddles_im[3], x8m11);
let m0910b = WasmVector::fmadd(m0910b, self.twiddles_im[4], x9m10);
let [y09, y10] = WasmVector::column_butterfly2([m0910a, m0910b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16, y17, y18]
}
}
struct WasmSimdF64Butterfly19<T> {
direction: FftDirection,
twiddles_re: [WasmVector64; 9],
twiddles_im: [WasmVector64; 9],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f64_butterfly!(WasmSimdF64Butterfly19, 19, |this: &WasmSimdF64Butterfly19<_>| this.direction);
impl<T: FftNum> WasmSimdF64Butterfly19<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f64::<T>();
let twiddles = make_twiddles(19, direction);
unsafe {Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f64>) {
let values = read_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 });
let out = self.perform_fft_direct(values);
write_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 });
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_direct(&self, values: [WasmVector64; 19]) -> [WasmVector64; 19] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p18, x1m18] = WasmVector::column_butterfly2([values[1], values[18]]);
let x1m18 = WasmVector::apply_rotate90(rotate, x1m18);
let y00 = WasmVector::add(y00, x1p18);
let [x2p17, x2m17] = WasmVector::column_butterfly2([values[2], values[17]]);
let x2m17 = WasmVector::apply_rotate90(rotate, x2m17);
let y00 = WasmVector::add(y00, x2p17);
let [x3p16, x3m16] = WasmVector::column_butterfly2([values[3], values[16]]);
let x3m16 = WasmVector::apply_rotate90(rotate, x3m16);
let y00 = WasmVector::add(y00, x3p16);
let [x4p15, x4m15] = WasmVector::column_butterfly2([values[4], values[15]]);
let x4m15 = WasmVector::apply_rotate90(rotate, x4m15);
let y00 = WasmVector::add(y00, x4p15);
let [x5p14, x5m14] = WasmVector::column_butterfly2([values[5], values[14]]);
let x5m14 = WasmVector::apply_rotate90(rotate, x5m14);
let y00 = WasmVector::add(y00, x5p14);
let [x6p13, x6m13] = WasmVector::column_butterfly2([values[6], values[13]]);
let x6m13 = WasmVector::apply_rotate90(rotate, x6m13);
let y00 = WasmVector::add(y00, x6p13);
let [x7p12, x7m12] = WasmVector::column_butterfly2([values[7], values[12]]);
let x7m12 = WasmVector::apply_rotate90(rotate, x7m12);
let y00 = WasmVector::add(y00, x7p12);
let [x8p11, x8m11] = WasmVector::column_butterfly2([values[8], values[11]]);
let x8m11 = WasmVector::apply_rotate90(rotate, x8m11);
let y00 = WasmVector::add(y00, x8p11);
let [x9p10, x9m10] = WasmVector::column_butterfly2([values[9], values[10]]);
let x9m10 = WasmVector::apply_rotate90(rotate, x9m10);
let y00 = WasmVector::add(y00, x9p10);
let m0118a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p18);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[1], x2p17);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[2], x3p16);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[3], x4p15);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[4], x5p14);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[5], x6p13);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[6], x7p12);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[7], x8p11);
let m0118a = WasmVector::fmadd(m0118a, self.twiddles_re[8], x9p10);
let m0118b = WasmVector::mul(self.twiddles_im[0], x1m18);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[1], x2m17);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[2], x3m16);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[3], x4m15);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[4], x5m14);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[5], x6m13);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[6], x7m12);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[7], x8m11);
let m0118b = WasmVector::fmadd(m0118b, self.twiddles_im[8], x9m10);
let [y01, y18] = WasmVector::column_butterfly2([m0118a, m0118b]);
let m0217a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p18);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[3], x2p17);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[5], x3p16);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[7], x4p15);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[8], x5p14);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[6], x6p13);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[4], x7p12);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[2], x8p11);
let m0217a = WasmVector::fmadd(m0217a, self.twiddles_re[0], x9p10);
let m0217b = WasmVector::mul(self.twiddles_im[1], x1m18);
let m0217b = WasmVector::fmadd(m0217b, self.twiddles_im[3], x2m17);
let m0217b = WasmVector::fmadd(m0217b, self.twiddles_im[5], x3m16);
let m0217b = WasmVector::fmadd(m0217b, self.twiddles_im[7], x4m15);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[8], x5m14);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[6], x6m13);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[4], x7m12);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[2], x8m11);
let m0217b = WasmVector::nmadd(m0217b, self.twiddles_im[0], x9m10);
let [y02, y17] = WasmVector::column_butterfly2([m0217a, m0217b]);
let m0316a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p18);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[5], x2p17);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[8], x3p16);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[6], x4p15);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[3], x5p14);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[0], x6p13);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[1], x7p12);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[4], x8p11);
let m0316a = WasmVector::fmadd(m0316a, self.twiddles_re[7], x9p10);
let m0316b = WasmVector::mul(self.twiddles_im[2], x1m18);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[5], x2m17);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[8], x3m16);
let m0316b = WasmVector::nmadd(m0316b, self.twiddles_im[6], x4m15);
let m0316b = WasmVector::nmadd(m0316b, self.twiddles_im[3], x5m14);
let m0316b = WasmVector::nmadd(m0316b, self.twiddles_im[0], x6m13);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[1], x7m12);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[4], x8m11);
let m0316b = WasmVector::fmadd(m0316b, self.twiddles_im[7], x9m10);
let [y03, y16] = WasmVector::column_butterfly2([m0316a, m0316b]);
let m0415a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p18);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[7], x2p17);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[6], x3p16);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[2], x4p15);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[0], x5p14);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[4], x6p13);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[8], x7p12);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[5], x8p11);
let m0415a = WasmVector::fmadd(m0415a, self.twiddles_re[1], x9p10);
let m0415b = WasmVector::mul(self.twiddles_im[3], x1m18);
let m0415b = WasmVector::fmadd(m0415b, self.twiddles_im[7], x2m17);
let m0415b = WasmVector::nmadd(m0415b, self.twiddles_im[6], x3m16);
let m0415b = WasmVector::nmadd(m0415b, self.twiddles_im[2], x4m15);
let m0415b = WasmVector::fmadd(m0415b, self.twiddles_im[0], x5m14);
let m0415b = WasmVector::fmadd(m0415b, self.twiddles_im[4], x6m13);
let m0415b = WasmVector::fmadd(m0415b, self.twiddles_im[8], x7m12);
let m0415b = WasmVector::nmadd(m0415b, self.twiddles_im[5], x8m11);
let m0415b = WasmVector::nmadd(m0415b, self.twiddles_im[1], x9m10);
let [y04, y15] = WasmVector::column_butterfly2([m0415a, m0415b]);
let m0514a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p18);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[8], x2p17);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[3], x3p16);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[0], x4p15);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[5], x5p14);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[7], x6p13);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[2], x7p12);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[1], x8p11);
let m0514a = WasmVector::fmadd(m0514a, self.twiddles_re[6], x9p10);
let m0514b = WasmVector::mul(self.twiddles_im[4], x1m18);
let m0514b = WasmVector::nmadd(m0514b, self.twiddles_im[8], x2m17);
let m0514b = WasmVector::nmadd(m0514b, self.twiddles_im[3], x3m16);
let m0514b = WasmVector::fmadd(m0514b, self.twiddles_im[0], x4m15);
let m0514b = WasmVector::fmadd(m0514b, self.twiddles_im[5], x5m14);
let m0514b = WasmVector::nmadd(m0514b, self.twiddles_im[7], x6m13);
let m0514b = WasmVector::nmadd(m0514b, self.twiddles_im[2], x7m12);
let m0514b = WasmVector::fmadd(m0514b, self.twiddles_im[1], x8m11);
let m0514b = WasmVector::fmadd(m0514b, self.twiddles_im[6], x9m10);
let [y05, y14] = WasmVector::column_butterfly2([m0514a, m0514b]);
let m0613a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p18);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[6], x2p17);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[0], x3p16);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[4], x4p15);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[7], x5p14);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[1], x6p13);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[3], x7p12);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[8], x8p11);
let m0613a = WasmVector::fmadd(m0613a, self.twiddles_re[2], x9p10);
let m0613b = WasmVector::mul(self.twiddles_im[5], x1m18);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[6], x2m17);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[0], x3m16);
let m0613b = WasmVector::fmadd(m0613b, self.twiddles_im[4], x4m15);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[7], x5m14);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[1], x6m13);
let m0613b = WasmVector::fmadd(m0613b, self.twiddles_im[3], x7m12);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[8], x8m11);
let m0613b = WasmVector::nmadd(m0613b, self.twiddles_im[2], x9m10);
let [y06, y13] = WasmVector::column_butterfly2([m0613a, m0613b]);
let m0712a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p18);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[4], x2p17);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[1], x3p16);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[8], x4p15);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[2], x5p14);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[3], x6p13);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[7], x7p12);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[0], x8p11);
let m0712a = WasmVector::fmadd(m0712a, self.twiddles_re[5], x9p10);
let m0712b = WasmVector::mul(self.twiddles_im[6], x1m18);
let m0712b = WasmVector::nmadd(m0712b, self.twiddles_im[4], x2m17);
let m0712b = WasmVector::fmadd(m0712b, self.twiddles_im[1], x3m16);
let m0712b = WasmVector::fmadd(m0712b, self.twiddles_im[8], x4m15);
let m0712b = WasmVector::nmadd(m0712b, self.twiddles_im[2], x5m14);
let m0712b = WasmVector::fmadd(m0712b, self.twiddles_im[3], x6m13);
let m0712b = WasmVector::nmadd(m0712b, self.twiddles_im[7], x7m12);
let m0712b = WasmVector::nmadd(m0712b, self.twiddles_im[0], x8m11);
let m0712b = WasmVector::fmadd(m0712b, self.twiddles_im[5], x9m10);
let [y07, y12] = WasmVector::column_butterfly2([m0712a, m0712b]);
let m0811a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p18);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[2], x2p17);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[4], x3p16);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[5], x4p15);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[1], x5p14);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[8], x6p13);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[0], x7p12);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[6], x8p11);
let m0811a = WasmVector::fmadd(m0811a, self.twiddles_re[3], x9p10);
let m0811b = WasmVector::mul(self.twiddles_im[7], x1m18);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[2], x2m17);
let m0811b = WasmVector::fmadd(m0811b, self.twiddles_im[4], x3m16);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[5], x4m15);
let m0811b = WasmVector::fmadd(m0811b, self.twiddles_im[1], x5m14);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[8], x6m13);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[0], x7m12);
let m0811b = WasmVector::fmadd(m0811b, self.twiddles_im[6], x8m11);
let m0811b = WasmVector::nmadd(m0811b, self.twiddles_im[3], x9m10);
let [y08, y11] = WasmVector::column_butterfly2([m0811a, m0811b]);
let m0910a = WasmVector::fmadd(values[0], self.twiddles_re[8], x1p18);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[0], x2p17);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[7], x3p16);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[1], x4p15);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[6], x5p14);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[2], x6p13);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[5], x7p12);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[3], x8p11);
let m0910a = WasmVector::fmadd(m0910a, self.twiddles_re[4], x9p10);
let m0910b = WasmVector::mul(self.twiddles_im[8], x1m18);
let m0910b = WasmVector::nmadd(m0910b, self.twiddles_im[0], x2m17);
let m0910b = WasmVector::fmadd(m0910b, self.twiddles_im[7], x3m16);
let m0910b = WasmVector::nmadd(m0910b, self.twiddles_im[1], x4m15);
let m0910b = WasmVector::fmadd(m0910b, self.twiddles_im[6], x5m14);
let m0910b = WasmVector::nmadd(m0910b, self.twiddles_im[2], x6m13);
let m0910b = WasmVector::fmadd(m0910b, self.twiddles_im[5], x7m12);
let m0910b = WasmVector::nmadd(m0910b, self.twiddles_im[3], x8m11);
let m0910b = WasmVector::fmadd(m0910b, self.twiddles_im[4], x9m10);
let [y09, y10] = WasmVector::column_butterfly2([m0910a, m0910b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16, y17, y18]
}
}
struct WasmSimdF32Butterfly23<T> {
direction: FftDirection,
twiddles_re: [WasmVector32; 11],
twiddles_im: [WasmVector32; 11],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f32_butterfly!(WasmSimdF32Butterfly23, 23, |this: &WasmSimdF32Butterfly23<_>| this.direction);
impl<T: FftNum> WasmSimdF32Butterfly23<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f32::<T>();
let twiddles = make_twiddles(23, direction);
Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let values = read_partial1_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 });
let out = self.perform_parallel_fft_direct(values);
write_partial_lo_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 } );
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let input_packed = read_complex_to_array!(buffer, { 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44 });
let values = [
extract_lo_hi_f32(input_packed[0], input_packed[11]),
extract_hi_lo_f32(input_packed[0], input_packed[12]),
extract_lo_hi_f32(input_packed[1], input_packed[12]),
extract_hi_lo_f32(input_packed[1], input_packed[13]),
extract_lo_hi_f32(input_packed[2], input_packed[13]),
extract_hi_lo_f32(input_packed[2], input_packed[14]),
extract_lo_hi_f32(input_packed[3], input_packed[14]),
extract_hi_lo_f32(input_packed[3], input_packed[15]),
extract_lo_hi_f32(input_packed[4], input_packed[15]),
extract_hi_lo_f32(input_packed[4], input_packed[16]),
extract_lo_hi_f32(input_packed[5], input_packed[16]),
extract_hi_lo_f32(input_packed[5], input_packed[17]),
extract_lo_hi_f32(input_packed[6], input_packed[17]),
extract_hi_lo_f32(input_packed[6], input_packed[18]),
extract_lo_hi_f32(input_packed[7], input_packed[18]),
extract_hi_lo_f32(input_packed[7], input_packed[19]),
extract_lo_hi_f32(input_packed[8], input_packed[19]),
extract_hi_lo_f32(input_packed[8], input_packed[20]),
extract_lo_hi_f32(input_packed[9], input_packed[20]),
extract_hi_lo_f32(input_packed[9], input_packed[21]),
extract_lo_hi_f32(input_packed[10], input_packed[21]),
extract_hi_lo_f32(input_packed[10], input_packed[22]),
extract_lo_hi_f32(input_packed[11], input_packed[22]),
];
let out = self.perform_parallel_fft_direct(values);
let out_packed = [
extract_lo_lo_f32(out[0], out[1]),
extract_lo_lo_f32(out[2], out[3]),
extract_lo_lo_f32(out[4], out[5]),
extract_lo_lo_f32(out[6], out[7]),
extract_lo_lo_f32(out[8], out[9]),
extract_lo_lo_f32(out[10], out[11]),
extract_lo_lo_f32(out[12], out[13]),
extract_lo_lo_f32(out[14], out[15]),
extract_lo_lo_f32(out[16], out[17]),
extract_lo_lo_f32(out[18], out[19]),
extract_lo_lo_f32(out[20], out[21]),
extract_lo_hi_f32(out[22], out[0]),
extract_hi_hi_f32(out[1], out[2]),
extract_hi_hi_f32(out[3], out[4]),
extract_hi_hi_f32(out[5], out[6]),
extract_hi_hi_f32(out[7], out[8]),
extract_hi_hi_f32(out[9], out[10]),
extract_hi_hi_f32(out[11], out[12]),
extract_hi_hi_f32(out[13], out[14]),
extract_hi_hi_f32(out[15], out[16]),
extract_hi_hi_f32(out[17], out[18]),
extract_hi_hi_f32(out[19], out[20]),
extract_hi_hi_f32(out[21], out[22]),
];
write_complex_to_array_strided!(out_packed, buffer, 2, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 });
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_direct(&self, values: [WasmVector32; 23]) -> [WasmVector32; 23] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p22, x1m22] = WasmVector::column_butterfly2([values[1], values[22]]);
let x1m22 = WasmVector::apply_rotate90(rotate, x1m22);
let y00 = WasmVector::add(y00, x1p22);
let [x2p21, x2m21] = WasmVector::column_butterfly2([values[2], values[21]]);
let x2m21 = WasmVector::apply_rotate90(rotate, x2m21);
let y00 = WasmVector::add(y00, x2p21);
let [x3p20, x3m20] = WasmVector::column_butterfly2([values[3], values[20]]);
let x3m20 = WasmVector::apply_rotate90(rotate, x3m20);
let y00 = WasmVector::add(y00, x3p20);
let [x4p19, x4m19] = WasmVector::column_butterfly2([values[4], values[19]]);
let x4m19 = WasmVector::apply_rotate90(rotate, x4m19);
let y00 = WasmVector::add(y00, x4p19);
let [x5p18, x5m18] = WasmVector::column_butterfly2([values[5], values[18]]);
let x5m18 = WasmVector::apply_rotate90(rotate, x5m18);
let y00 = WasmVector::add(y00, x5p18);
let [x6p17, x6m17] = WasmVector::column_butterfly2([values[6], values[17]]);
let x6m17 = WasmVector::apply_rotate90(rotate, x6m17);
let y00 = WasmVector::add(y00, x6p17);
let [x7p16, x7m16] = WasmVector::column_butterfly2([values[7], values[16]]);
let x7m16 = WasmVector::apply_rotate90(rotate, x7m16);
let y00 = WasmVector::add(y00, x7p16);
let [x8p15, x8m15] = WasmVector::column_butterfly2([values[8], values[15]]);
let x8m15 = WasmVector::apply_rotate90(rotate, x8m15);
let y00 = WasmVector::add(y00, x8p15);
let [x9p14, x9m14] = WasmVector::column_butterfly2([values[9], values[14]]);
let x9m14 = WasmVector::apply_rotate90(rotate, x9m14);
let y00 = WasmVector::add(y00, x9p14);
let [x10p13, x10m13] = WasmVector::column_butterfly2([values[10], values[13]]);
let x10m13 = WasmVector::apply_rotate90(rotate, x10m13);
let y00 = WasmVector::add(y00, x10p13);
let [x11p12, x11m12] = WasmVector::column_butterfly2([values[11], values[12]]);
let x11m12 = WasmVector::apply_rotate90(rotate, x11m12);
let y00 = WasmVector::add(y00, x11p12);
let m0122a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p22);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[1], x2p21);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[2], x3p20);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[3], x4p19);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[4], x5p18);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[5], x6p17);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[6], x7p16);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[7], x8p15);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[8], x9p14);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[9], x10p13);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[10], x11p12);
let m0122b = WasmVector::mul(self.twiddles_im[0], x1m22);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[1], x2m21);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[2], x3m20);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[3], x4m19);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[4], x5m18);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[5], x6m17);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[6], x7m16);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[7], x8m15);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[8], x9m14);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[9], x10m13);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[10], x11m12);
let [y01, y22] = WasmVector::column_butterfly2([m0122a, m0122b]);
let m0221a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p22);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[3], x2p21);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[5], x3p20);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[7], x4p19);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[9], x5p18);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[10], x6p17);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[8], x7p16);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[6], x8p15);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[4], x9p14);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[2], x10p13);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[0], x11p12);
let m0221b = WasmVector::mul(self.twiddles_im[1], x1m22);
let m0221b = WasmVector::fmadd(m0221b, self.twiddles_im[3], x2m21);
let m0221b = WasmVector::fmadd(m0221b, self.twiddles_im[5], x3m20);
let m0221b = WasmVector::fmadd(m0221b, self.twiddles_im[7], x4m19);
let m0221b = WasmVector::fmadd(m0221b, self.twiddles_im[9], x5m18);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[10], x6m17);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[8], x7m16);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[6], x8m15);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[4], x9m14);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[2], x10m13);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[0], x11m12);
let [y02, y21] = WasmVector::column_butterfly2([m0221a, m0221b]);
let m0320a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p22);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[5], x2p21);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[8], x3p20);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[10], x4p19);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[7], x5p18);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[4], x6p17);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[1], x7p16);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[0], x8p15);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[3], x9p14);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[6], x10p13);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[9], x11p12);
let m0320b = WasmVector::mul(self.twiddles_im[2], x1m22);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[5], x2m21);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[8], x3m20);
let m0320b = WasmVector::nmadd(m0320b, self.twiddles_im[10], x4m19);
let m0320b = WasmVector::nmadd(m0320b, self.twiddles_im[7], x5m18);
let m0320b = WasmVector::nmadd(m0320b, self.twiddles_im[4], x6m17);
let m0320b = WasmVector::nmadd(m0320b, self.twiddles_im[1], x7m16);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[0], x8m15);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[3], x9m14);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[6], x10m13);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[9], x11m12);
let [y03, y20] = WasmVector::column_butterfly2([m0320a, m0320b]);
let m0419a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p22);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[7], x2p21);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[10], x3p20);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[6], x4p19);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[2], x5p18);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[0], x6p17);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[4], x7p16);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[8], x8p15);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[9], x9p14);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[5], x10p13);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[1], x11p12);
let m0419b = WasmVector::mul(self.twiddles_im[3], x1m22);
let m0419b = WasmVector::fmadd(m0419b, self.twiddles_im[7], x2m21);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[10], x3m20);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[6], x4m19);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[2], x5m18);
let m0419b = WasmVector::fmadd(m0419b, self.twiddles_im[0], x6m17);
let m0419b = WasmVector::fmadd(m0419b, self.twiddles_im[4], x7m16);
let m0419b = WasmVector::fmadd(m0419b, self.twiddles_im[8], x8m15);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[9], x9m14);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[5], x10m13);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[1], x11m12);
let [y04, y19] = WasmVector::column_butterfly2([m0419a, m0419b]);
let m0518a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p22);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[9], x2p21);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[7], x3p20);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[2], x4p19);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[1], x5p18);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[6], x6p17);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[10], x7p16);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[5], x8p15);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[0], x9p14);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[3], x10p13);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[8], x11p12);
let m0518b = WasmVector::mul(self.twiddles_im[4], x1m22);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[9], x2m21);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[7], x3m20);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[2], x4m19);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[1], x5m18);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[6], x6m17);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[10], x7m16);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[5], x8m15);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[0], x9m14);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[3], x10m13);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[8], x11m12);
let [y05, y18] = WasmVector::column_butterfly2([m0518a, m0518b]);
let m0617a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p22);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[10], x2p21);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[4], x3p20);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[0], x4p19);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[6], x5p18);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[9], x6p17);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[3], x7p16);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[1], x8p15);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[7], x9p14);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[8], x10p13);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[2], x11p12);
let m0617b = WasmVector::mul(self.twiddles_im[5], x1m22);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[10], x2m21);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[4], x3m20);
let m0617b = WasmVector::fmadd(m0617b, self.twiddles_im[0], x4m19);
let m0617b = WasmVector::fmadd(m0617b, self.twiddles_im[6], x5m18);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[9], x6m17);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[3], x7m16);
let m0617b = WasmVector::fmadd(m0617b, self.twiddles_im[1], x8m15);
let m0617b = WasmVector::fmadd(m0617b, self.twiddles_im[7], x9m14);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[8], x10m13);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[2], x11m12);
let [y06, y17] = WasmVector::column_butterfly2([m0617a, m0617b]);
let m0716a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p22);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[8], x2p21);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[1], x3p20);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[4], x4p19);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[10], x5p18);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[3], x6p17);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[2], x7p16);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[9], x8p15);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[5], x9p14);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[0], x10p13);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[7], x11p12);
let m0716b = WasmVector::mul(self.twiddles_im[6], x1m22);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[8], x2m21);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[1], x3m20);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[4], x4m19);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[10], x5m18);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[3], x6m17);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[2], x7m16);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[9], x8m15);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[5], x9m14);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[0], x10m13);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[7], x11m12);
let [y07, y16] = WasmVector::column_butterfly2([m0716a, m0716b]);
let m0815a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p22);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[6], x2p21);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[0], x3p20);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[8], x4p19);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[5], x5p18);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[1], x6p17);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[9], x7p16);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[4], x8p15);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[2], x9p14);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[10], x10p13);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[3], x11p12);
let m0815b = WasmVector::mul(self.twiddles_im[7], x1m22);
let m0815b = WasmVector::nmadd(m0815b, self.twiddles_im[6], x2m21);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[0], x3m20);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[8], x4m19);
let m0815b = WasmVector::nmadd(m0815b, self.twiddles_im[5], x5m18);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[1], x6m17);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[9], x7m16);
let m0815b = WasmVector::nmadd(m0815b, self.twiddles_im[4], x8m15);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[2], x9m14);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[10], x10m13);
let m0815b = WasmVector::nmadd(m0815b, self.twiddles_im[3], x11m12);
let [y08, y15] = WasmVector::column_butterfly2([m0815a, m0815b]);
let m0914a = WasmVector::fmadd(values[0], self.twiddles_re[8], x1p22);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[4], x2p21);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[3], x3p20);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[9], x4p19);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[0], x5p18);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[7], x6p17);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[5], x7p16);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[2], x8p15);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[10], x9p14);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[1], x10p13);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[6], x11p12);
let m0914b = WasmVector::mul(self.twiddles_im[8], x1m22);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[4], x2m21);
let m0914b = WasmVector::fmadd(m0914b, self.twiddles_im[3], x3m20);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[9], x4m19);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[0], x5m18);
let m0914b = WasmVector::fmadd(m0914b, self.twiddles_im[7], x6m17);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[5], x7m16);
let m0914b = WasmVector::fmadd(m0914b, self.twiddles_im[2], x8m15);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[10], x9m14);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[1], x10m13);
let m0914b = WasmVector::fmadd(m0914b, self.twiddles_im[6], x11m12);
let [y09, y14] = WasmVector::column_butterfly2([m0914a, m0914b]);
let m1013a = WasmVector::fmadd(values[0], self.twiddles_re[9], x1p22);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[2], x2p21);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[6], x3p20);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[5], x4p19);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[3], x5p18);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[8], x6p17);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[0], x7p16);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[10], x8p15);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[1], x9p14);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[7], x10p13);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[4], x11p12);
let m1013b = WasmVector::mul(self.twiddles_im[9], x1m22);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[2], x2m21);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[6], x3m20);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[5], x4m19);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[3], x5m18);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[8], x6m17);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[0], x7m16);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[10], x8m15);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[1], x9m14);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[7], x10m13);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[4], x11m12);
let [y10, y13] = WasmVector::column_butterfly2([m1013a, m1013b]);
let m1112a = WasmVector::fmadd(values[0], self.twiddles_re[10], x1p22);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[0], x2p21);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[9], x3p20);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[1], x4p19);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[8], x5p18);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[2], x6p17);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[7], x7p16);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[3], x8p15);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[6], x9p14);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[4], x10p13);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[5], x11p12);
let m1112b = WasmVector::mul(self.twiddles_im[10], x1m22);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[0], x2m21);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[9], x3m20);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[1], x4m19);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[8], x5m18);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[2], x6m17);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[7], x7m16);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[3], x8m15);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[6], x9m14);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[4], x10m13);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[5], x11m12);
let [y11, y12] = WasmVector::column_butterfly2([m1112a, m1112b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16, y17, y18, y19, y20, y21, y22]
}
}
struct WasmSimdF64Butterfly23<T> {
direction: FftDirection,
twiddles_re: [WasmVector64; 11],
twiddles_im: [WasmVector64; 11],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f64_butterfly!(WasmSimdF64Butterfly23, 23, |this: &WasmSimdF64Butterfly23<_>| this.direction);
impl<T: FftNum> WasmSimdF64Butterfly23<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f64::<T>();
let twiddles = make_twiddles(23, direction);
unsafe {Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f64>) {
let values = read_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 });
let out = self.perform_fft_direct(values);
write_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 });
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_direct(&self, values: [WasmVector64; 23]) -> [WasmVector64; 23] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p22, x1m22] = WasmVector::column_butterfly2([values[1], values[22]]);
let x1m22 = WasmVector::apply_rotate90(rotate, x1m22);
let y00 = WasmVector::add(y00, x1p22);
let [x2p21, x2m21] = WasmVector::column_butterfly2([values[2], values[21]]);
let x2m21 = WasmVector::apply_rotate90(rotate, x2m21);
let y00 = WasmVector::add(y00, x2p21);
let [x3p20, x3m20] = WasmVector::column_butterfly2([values[3], values[20]]);
let x3m20 = WasmVector::apply_rotate90(rotate, x3m20);
let y00 = WasmVector::add(y00, x3p20);
let [x4p19, x4m19] = WasmVector::column_butterfly2([values[4], values[19]]);
let x4m19 = WasmVector::apply_rotate90(rotate, x4m19);
let y00 = WasmVector::add(y00, x4p19);
let [x5p18, x5m18] = WasmVector::column_butterfly2([values[5], values[18]]);
let x5m18 = WasmVector::apply_rotate90(rotate, x5m18);
let y00 = WasmVector::add(y00, x5p18);
let [x6p17, x6m17] = WasmVector::column_butterfly2([values[6], values[17]]);
let x6m17 = WasmVector::apply_rotate90(rotate, x6m17);
let y00 = WasmVector::add(y00, x6p17);
let [x7p16, x7m16] = WasmVector::column_butterfly2([values[7], values[16]]);
let x7m16 = WasmVector::apply_rotate90(rotate, x7m16);
let y00 = WasmVector::add(y00, x7p16);
let [x8p15, x8m15] = WasmVector::column_butterfly2([values[8], values[15]]);
let x8m15 = WasmVector::apply_rotate90(rotate, x8m15);
let y00 = WasmVector::add(y00, x8p15);
let [x9p14, x9m14] = WasmVector::column_butterfly2([values[9], values[14]]);
let x9m14 = WasmVector::apply_rotate90(rotate, x9m14);
let y00 = WasmVector::add(y00, x9p14);
let [x10p13, x10m13] = WasmVector::column_butterfly2([values[10], values[13]]);
let x10m13 = WasmVector::apply_rotate90(rotate, x10m13);
let y00 = WasmVector::add(y00, x10p13);
let [x11p12, x11m12] = WasmVector::column_butterfly2([values[11], values[12]]);
let x11m12 = WasmVector::apply_rotate90(rotate, x11m12);
let y00 = WasmVector::add(y00, x11p12);
let m0122a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p22);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[1], x2p21);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[2], x3p20);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[3], x4p19);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[4], x5p18);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[5], x6p17);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[6], x7p16);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[7], x8p15);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[8], x9p14);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[9], x10p13);
let m0122a = WasmVector::fmadd(m0122a, self.twiddles_re[10], x11p12);
let m0122b = WasmVector::mul(self.twiddles_im[0], x1m22);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[1], x2m21);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[2], x3m20);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[3], x4m19);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[4], x5m18);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[5], x6m17);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[6], x7m16);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[7], x8m15);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[8], x9m14);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[9], x10m13);
let m0122b = WasmVector::fmadd(m0122b, self.twiddles_im[10], x11m12);
let [y01, y22] = WasmVector::column_butterfly2([m0122a, m0122b]);
let m0221a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p22);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[3], x2p21);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[5], x3p20);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[7], x4p19);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[9], x5p18);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[10], x6p17);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[8], x7p16);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[6], x8p15);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[4], x9p14);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[2], x10p13);
let m0221a = WasmVector::fmadd(m0221a, self.twiddles_re[0], x11p12);
let m0221b = WasmVector::mul(self.twiddles_im[1], x1m22);
let m0221b = WasmVector::fmadd(m0221b, self.twiddles_im[3], x2m21);
let m0221b = WasmVector::fmadd(m0221b, self.twiddles_im[5], x3m20);
let m0221b = WasmVector::fmadd(m0221b, self.twiddles_im[7], x4m19);
let m0221b = WasmVector::fmadd(m0221b, self.twiddles_im[9], x5m18);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[10], x6m17);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[8], x7m16);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[6], x8m15);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[4], x9m14);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[2], x10m13);
let m0221b = WasmVector::nmadd(m0221b, self.twiddles_im[0], x11m12);
let [y02, y21] = WasmVector::column_butterfly2([m0221a, m0221b]);
let m0320a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p22);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[5], x2p21);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[8], x3p20);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[10], x4p19);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[7], x5p18);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[4], x6p17);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[1], x7p16);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[0], x8p15);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[3], x9p14);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[6], x10p13);
let m0320a = WasmVector::fmadd(m0320a, self.twiddles_re[9], x11p12);
let m0320b = WasmVector::mul(self.twiddles_im[2], x1m22);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[5], x2m21);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[8], x3m20);
let m0320b = WasmVector::nmadd(m0320b, self.twiddles_im[10], x4m19);
let m0320b = WasmVector::nmadd(m0320b, self.twiddles_im[7], x5m18);
let m0320b = WasmVector::nmadd(m0320b, self.twiddles_im[4], x6m17);
let m0320b = WasmVector::nmadd(m0320b, self.twiddles_im[1], x7m16);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[0], x8m15);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[3], x9m14);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[6], x10m13);
let m0320b = WasmVector::fmadd(m0320b, self.twiddles_im[9], x11m12);
let [y03, y20] = WasmVector::column_butterfly2([m0320a, m0320b]);
let m0419a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p22);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[7], x2p21);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[10], x3p20);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[6], x4p19);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[2], x5p18);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[0], x6p17);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[4], x7p16);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[8], x8p15);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[9], x9p14);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[5], x10p13);
let m0419a = WasmVector::fmadd(m0419a, self.twiddles_re[1], x11p12);
let m0419b = WasmVector::mul(self.twiddles_im[3], x1m22);
let m0419b = WasmVector::fmadd(m0419b, self.twiddles_im[7], x2m21);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[10], x3m20);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[6], x4m19);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[2], x5m18);
let m0419b = WasmVector::fmadd(m0419b, self.twiddles_im[0], x6m17);
let m0419b = WasmVector::fmadd(m0419b, self.twiddles_im[4], x7m16);
let m0419b = WasmVector::fmadd(m0419b, self.twiddles_im[8], x8m15);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[9], x9m14);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[5], x10m13);
let m0419b = WasmVector::nmadd(m0419b, self.twiddles_im[1], x11m12);
let [y04, y19] = WasmVector::column_butterfly2([m0419a, m0419b]);
let m0518a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p22);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[9], x2p21);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[7], x3p20);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[2], x4p19);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[1], x5p18);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[6], x6p17);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[10], x7p16);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[5], x8p15);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[0], x9p14);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[3], x10p13);
let m0518a = WasmVector::fmadd(m0518a, self.twiddles_re[8], x11p12);
let m0518b = WasmVector::mul(self.twiddles_im[4], x1m22);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[9], x2m21);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[7], x3m20);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[2], x4m19);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[1], x5m18);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[6], x6m17);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[10], x7m16);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[5], x8m15);
let m0518b = WasmVector::nmadd(m0518b, self.twiddles_im[0], x9m14);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[3], x10m13);
let m0518b = WasmVector::fmadd(m0518b, self.twiddles_im[8], x11m12);
let [y05, y18] = WasmVector::column_butterfly2([m0518a, m0518b]);
let m0617a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p22);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[10], x2p21);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[4], x3p20);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[0], x4p19);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[6], x5p18);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[9], x6p17);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[3], x7p16);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[1], x8p15);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[7], x9p14);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[8], x10p13);
let m0617a = WasmVector::fmadd(m0617a, self.twiddles_re[2], x11p12);
let m0617b = WasmVector::mul(self.twiddles_im[5], x1m22);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[10], x2m21);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[4], x3m20);
let m0617b = WasmVector::fmadd(m0617b, self.twiddles_im[0], x4m19);
let m0617b = WasmVector::fmadd(m0617b, self.twiddles_im[6], x5m18);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[9], x6m17);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[3], x7m16);
let m0617b = WasmVector::fmadd(m0617b, self.twiddles_im[1], x8m15);
let m0617b = WasmVector::fmadd(m0617b, self.twiddles_im[7], x9m14);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[8], x10m13);
let m0617b = WasmVector::nmadd(m0617b, self.twiddles_im[2], x11m12);
let [y06, y17] = WasmVector::column_butterfly2([m0617a, m0617b]);
let m0716a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p22);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[8], x2p21);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[1], x3p20);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[4], x4p19);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[10], x5p18);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[3], x6p17);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[2], x7p16);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[9], x8p15);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[5], x9p14);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[0], x10p13);
let m0716a = WasmVector::fmadd(m0716a, self.twiddles_re[7], x11p12);
let m0716b = WasmVector::mul(self.twiddles_im[6], x1m22);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[8], x2m21);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[1], x3m20);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[4], x4m19);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[10], x5m18);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[3], x6m17);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[2], x7m16);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[9], x8m15);
let m0716b = WasmVector::nmadd(m0716b, self.twiddles_im[5], x9m14);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[0], x10m13);
let m0716b = WasmVector::fmadd(m0716b, self.twiddles_im[7], x11m12);
let [y07, y16] = WasmVector::column_butterfly2([m0716a, m0716b]);
let m0815a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p22);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[6], x2p21);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[0], x3p20);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[8], x4p19);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[5], x5p18);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[1], x6p17);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[9], x7p16);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[4], x8p15);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[2], x9p14);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[10], x10p13);
let m0815a = WasmVector::fmadd(m0815a, self.twiddles_re[3], x11p12);
let m0815b = WasmVector::mul(self.twiddles_im[7], x1m22);
let m0815b = WasmVector::nmadd(m0815b, self.twiddles_im[6], x2m21);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[0], x3m20);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[8], x4m19);
let m0815b = WasmVector::nmadd(m0815b, self.twiddles_im[5], x5m18);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[1], x6m17);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[9], x7m16);
let m0815b = WasmVector::nmadd(m0815b, self.twiddles_im[4], x8m15);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[2], x9m14);
let m0815b = WasmVector::fmadd(m0815b, self.twiddles_im[10], x10m13);
let m0815b = WasmVector::nmadd(m0815b, self.twiddles_im[3], x11m12);
let [y08, y15] = WasmVector::column_butterfly2([m0815a, m0815b]);
let m0914a = WasmVector::fmadd(values[0], self.twiddles_re[8], x1p22);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[4], x2p21);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[3], x3p20);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[9], x4p19);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[0], x5p18);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[7], x6p17);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[5], x7p16);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[2], x8p15);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[10], x9p14);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[1], x10p13);
let m0914a = WasmVector::fmadd(m0914a, self.twiddles_re[6], x11p12);
let m0914b = WasmVector::mul(self.twiddles_im[8], x1m22);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[4], x2m21);
let m0914b = WasmVector::fmadd(m0914b, self.twiddles_im[3], x3m20);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[9], x4m19);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[0], x5m18);
let m0914b = WasmVector::fmadd(m0914b, self.twiddles_im[7], x6m17);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[5], x7m16);
let m0914b = WasmVector::fmadd(m0914b, self.twiddles_im[2], x8m15);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[10], x9m14);
let m0914b = WasmVector::nmadd(m0914b, self.twiddles_im[1], x10m13);
let m0914b = WasmVector::fmadd(m0914b, self.twiddles_im[6], x11m12);
let [y09, y14] = WasmVector::column_butterfly2([m0914a, m0914b]);
let m1013a = WasmVector::fmadd(values[0], self.twiddles_re[9], x1p22);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[2], x2p21);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[6], x3p20);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[5], x4p19);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[3], x5p18);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[8], x6p17);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[0], x7p16);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[10], x8p15);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[1], x9p14);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[7], x10p13);
let m1013a = WasmVector::fmadd(m1013a, self.twiddles_re[4], x11p12);
let m1013b = WasmVector::mul(self.twiddles_im[9], x1m22);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[2], x2m21);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[6], x3m20);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[5], x4m19);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[3], x5m18);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[8], x6m17);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[0], x7m16);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[10], x8m15);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[1], x9m14);
let m1013b = WasmVector::fmadd(m1013b, self.twiddles_im[7], x10m13);
let m1013b = WasmVector::nmadd(m1013b, self.twiddles_im[4], x11m12);
let [y10, y13] = WasmVector::column_butterfly2([m1013a, m1013b]);
let m1112a = WasmVector::fmadd(values[0], self.twiddles_re[10], x1p22);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[0], x2p21);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[9], x3p20);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[1], x4p19);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[8], x5p18);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[2], x6p17);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[7], x7p16);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[3], x8p15);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[6], x9p14);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[4], x10p13);
let m1112a = WasmVector::fmadd(m1112a, self.twiddles_re[5], x11p12);
let m1112b = WasmVector::mul(self.twiddles_im[10], x1m22);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[0], x2m21);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[9], x3m20);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[1], x4m19);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[8], x5m18);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[2], x6m17);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[7], x7m16);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[3], x8m15);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[6], x9m14);
let m1112b = WasmVector::nmadd(m1112b, self.twiddles_im[4], x10m13);
let m1112b = WasmVector::fmadd(m1112b, self.twiddles_im[5], x11m12);
let [y11, y12] = WasmVector::column_butterfly2([m1112a, m1112b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16, y17, y18, y19, y20, y21, y22]
}
}
struct WasmSimdF32Butterfly29<T> {
direction: FftDirection,
twiddles_re: [WasmVector32; 14],
twiddles_im: [WasmVector32; 14],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f32_butterfly!(WasmSimdF32Butterfly29, 29, |this: &WasmSimdF32Butterfly29<_>| this.direction);
impl<T: FftNum> WasmSimdF32Butterfly29<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f32::<T>();
let twiddles = make_twiddles(29, direction);
Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let values = read_partial1_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28 });
let out = self.perform_parallel_fft_direct(values);
write_partial_lo_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28 } );
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let input_packed = read_complex_to_array!(buffer, { 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56 });
let values = [
extract_lo_hi_f32(input_packed[0], input_packed[14]),
extract_hi_lo_f32(input_packed[0], input_packed[15]),
extract_lo_hi_f32(input_packed[1], input_packed[15]),
extract_hi_lo_f32(input_packed[1], input_packed[16]),
extract_lo_hi_f32(input_packed[2], input_packed[16]),
extract_hi_lo_f32(input_packed[2], input_packed[17]),
extract_lo_hi_f32(input_packed[3], input_packed[17]),
extract_hi_lo_f32(input_packed[3], input_packed[18]),
extract_lo_hi_f32(input_packed[4], input_packed[18]),
extract_hi_lo_f32(input_packed[4], input_packed[19]),
extract_lo_hi_f32(input_packed[5], input_packed[19]),
extract_hi_lo_f32(input_packed[5], input_packed[20]),
extract_lo_hi_f32(input_packed[6], input_packed[20]),
extract_hi_lo_f32(input_packed[6], input_packed[21]),
extract_lo_hi_f32(input_packed[7], input_packed[21]),
extract_hi_lo_f32(input_packed[7], input_packed[22]),
extract_lo_hi_f32(input_packed[8], input_packed[22]),
extract_hi_lo_f32(input_packed[8], input_packed[23]),
extract_lo_hi_f32(input_packed[9], input_packed[23]),
extract_hi_lo_f32(input_packed[9], input_packed[24]),
extract_lo_hi_f32(input_packed[10], input_packed[24]),
extract_hi_lo_f32(input_packed[10], input_packed[25]),
extract_lo_hi_f32(input_packed[11], input_packed[25]),
extract_hi_lo_f32(input_packed[11], input_packed[26]),
extract_lo_hi_f32(input_packed[12], input_packed[26]),
extract_hi_lo_f32(input_packed[12], input_packed[27]),
extract_lo_hi_f32(input_packed[13], input_packed[27]),
extract_hi_lo_f32(input_packed[13], input_packed[28]),
extract_lo_hi_f32(input_packed[14], input_packed[28]),
];
let out = self.perform_parallel_fft_direct(values);
let out_packed = [
extract_lo_lo_f32(out[0], out[1]),
extract_lo_lo_f32(out[2], out[3]),
extract_lo_lo_f32(out[4], out[5]),
extract_lo_lo_f32(out[6], out[7]),
extract_lo_lo_f32(out[8], out[9]),
extract_lo_lo_f32(out[10], out[11]),
extract_lo_lo_f32(out[12], out[13]),
extract_lo_lo_f32(out[14], out[15]),
extract_lo_lo_f32(out[16], out[17]),
extract_lo_lo_f32(out[18], out[19]),
extract_lo_lo_f32(out[20], out[21]),
extract_lo_lo_f32(out[22], out[23]),
extract_lo_lo_f32(out[24], out[25]),
extract_lo_lo_f32(out[26], out[27]),
extract_lo_hi_f32(out[28], out[0]),
extract_hi_hi_f32(out[1], out[2]),
extract_hi_hi_f32(out[3], out[4]),
extract_hi_hi_f32(out[5], out[6]),
extract_hi_hi_f32(out[7], out[8]),
extract_hi_hi_f32(out[9], out[10]),
extract_hi_hi_f32(out[11], out[12]),
extract_hi_hi_f32(out[13], out[14]),
extract_hi_hi_f32(out[15], out[16]),
extract_hi_hi_f32(out[17], out[18]),
extract_hi_hi_f32(out[19], out[20]),
extract_hi_hi_f32(out[21], out[22]),
extract_hi_hi_f32(out[23], out[24]),
extract_hi_hi_f32(out[25], out[26]),
extract_hi_hi_f32(out[27], out[28]),
];
write_complex_to_array_strided!(out_packed, buffer, 2, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28 });
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_direct(&self, values: [WasmVector32; 29]) -> [WasmVector32; 29] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p28, x1m28] = WasmVector::column_butterfly2([values[1], values[28]]);
let x1m28 = WasmVector::apply_rotate90(rotate, x1m28);
let y00 = WasmVector::add(y00, x1p28);
let [x2p27, x2m27] = WasmVector::column_butterfly2([values[2], values[27]]);
let x2m27 = WasmVector::apply_rotate90(rotate, x2m27);
let y00 = WasmVector::add(y00, x2p27);
let [x3p26, x3m26] = WasmVector::column_butterfly2([values[3], values[26]]);
let x3m26 = WasmVector::apply_rotate90(rotate, x3m26);
let y00 = WasmVector::add(y00, x3p26);
let [x4p25, x4m25] = WasmVector::column_butterfly2([values[4], values[25]]);
let x4m25 = WasmVector::apply_rotate90(rotate, x4m25);
let y00 = WasmVector::add(y00, x4p25);
let [x5p24, x5m24] = WasmVector::column_butterfly2([values[5], values[24]]);
let x5m24 = WasmVector::apply_rotate90(rotate, x5m24);
let y00 = WasmVector::add(y00, x5p24);
let [x6p23, x6m23] = WasmVector::column_butterfly2([values[6], values[23]]);
let x6m23 = WasmVector::apply_rotate90(rotate, x6m23);
let y00 = WasmVector::add(y00, x6p23);
let [x7p22, x7m22] = WasmVector::column_butterfly2([values[7], values[22]]);
let x7m22 = WasmVector::apply_rotate90(rotate, x7m22);
let y00 = WasmVector::add(y00, x7p22);
let [x8p21, x8m21] = WasmVector::column_butterfly2([values[8], values[21]]);
let x8m21 = WasmVector::apply_rotate90(rotate, x8m21);
let y00 = WasmVector::add(y00, x8p21);
let [x9p20, x9m20] = WasmVector::column_butterfly2([values[9], values[20]]);
let x9m20 = WasmVector::apply_rotate90(rotate, x9m20);
let y00 = WasmVector::add(y00, x9p20);
let [x10p19, x10m19] = WasmVector::column_butterfly2([values[10], values[19]]);
let x10m19 = WasmVector::apply_rotate90(rotate, x10m19);
let y00 = WasmVector::add(y00, x10p19);
let [x11p18, x11m18] = WasmVector::column_butterfly2([values[11], values[18]]);
let x11m18 = WasmVector::apply_rotate90(rotate, x11m18);
let y00 = WasmVector::add(y00, x11p18);
let [x12p17, x12m17] = WasmVector::column_butterfly2([values[12], values[17]]);
let x12m17 = WasmVector::apply_rotate90(rotate, x12m17);
let y00 = WasmVector::add(y00, x12p17);
let [x13p16, x13m16] = WasmVector::column_butterfly2([values[13], values[16]]);
let x13m16 = WasmVector::apply_rotate90(rotate, x13m16);
let y00 = WasmVector::add(y00, x13p16);
let [x14p15, x14m15] = WasmVector::column_butterfly2([values[14], values[15]]);
let x14m15 = WasmVector::apply_rotate90(rotate, x14m15);
let y00 = WasmVector::add(y00, x14p15);
let m0128a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p28);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[1], x2p27);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[2], x3p26);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[3], x4p25);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[4], x5p24);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[5], x6p23);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[6], x7p22);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[7], x8p21);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[8], x9p20);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[9], x10p19);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[10], x11p18);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[11], x12p17);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[12], x13p16);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[13], x14p15);
let m0128b = WasmVector::mul(self.twiddles_im[0], x1m28);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[1], x2m27);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[2], x3m26);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[3], x4m25);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[4], x5m24);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[5], x6m23);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[6], x7m22);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[7], x8m21);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[8], x9m20);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[9], x10m19);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[10], x11m18);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[11], x12m17);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[12], x13m16);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[13], x14m15);
let [y01, y28] = WasmVector::column_butterfly2([m0128a, m0128b]);
let m0227a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p28);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[3], x2p27);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[5], x3p26);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[7], x4p25);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[9], x5p24);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[11], x6p23);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[13], x7p22);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[12], x8p21);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[10], x9p20);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[8], x10p19);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[6], x11p18);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[4], x12p17);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[2], x13p16);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[0], x14p15);
let m0227b = WasmVector::mul(self.twiddles_im[1], x1m28);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[3], x2m27);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[5], x3m26);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[7], x4m25);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[9], x5m24);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[11], x6m23);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[13], x7m22);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[12], x8m21);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[10], x9m20);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[8], x10m19);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[6], x11m18);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[4], x12m17);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[2], x13m16);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[0], x14m15);
let [y02, y27] = WasmVector::column_butterfly2([m0227a, m0227b]);
let m0326a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p28);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[5], x2p27);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[8], x3p26);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[11], x4p25);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[13], x5p24);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[10], x6p23);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[7], x7p22);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[4], x8p21);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[1], x9p20);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[0], x10p19);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[3], x11p18);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[6], x12p17);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[9], x13p16);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[12], x14p15);
let m0326b = WasmVector::mul(self.twiddles_im[2], x1m28);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[5], x2m27);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[8], x3m26);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[11], x4m25);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[13], x5m24);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[10], x6m23);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[7], x7m22);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[4], x8m21);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[1], x9m20);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[0], x10m19);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[3], x11m18);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[6], x12m17);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[9], x13m16);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[12], x14m15);
let [y03, y26] = WasmVector::column_butterfly2([m0326a, m0326b]);
let m0425a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p28);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[7], x2p27);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[11], x3p26);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[12], x4p25);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[8], x5p24);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[4], x6p23);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[0], x7p22);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[2], x8p21);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[6], x9p20);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[10], x10p19);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[13], x11p18);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[9], x12p17);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[5], x13p16);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[1], x14p15);
let m0425b = WasmVector::mul(self.twiddles_im[3], x1m28);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[7], x2m27);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[11], x3m26);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[12], x4m25);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[8], x5m24);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[4], x6m23);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[0], x7m22);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[2], x8m21);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[6], x9m20);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[10], x10m19);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[13], x11m18);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[9], x12m17);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[5], x13m16);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[1], x14m15);
let [y04, y25] = WasmVector::column_butterfly2([m0425a, m0425b]);
let m0524a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p28);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[9], x2p27);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[13], x3p26);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[8], x4p25);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[3], x5p24);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[0], x6p23);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[5], x7p22);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[10], x8p21);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[12], x9p20);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[7], x10p19);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[2], x11p18);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[1], x12p17);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[6], x13p16);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[11], x14p15);
let m0524b = WasmVector::mul(self.twiddles_im[4], x1m28);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[9], x2m27);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[13], x3m26);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[8], x4m25);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[3], x5m24);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[0], x6m23);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[5], x7m22);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[10], x8m21);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[12], x9m20);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[7], x10m19);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[2], x11m18);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[1], x12m17);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[6], x13m16);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[11], x14m15);
let [y05, y24] = WasmVector::column_butterfly2([m0524a, m0524b]);
let m0623a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p28);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[11], x2p27);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[10], x3p26);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[4], x4p25);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[0], x5p24);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[6], x6p23);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[12], x7p22);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[9], x8p21);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[3], x9p20);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[1], x10p19);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[7], x11p18);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[13], x12p17);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[8], x13p16);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[2], x14p15);
let m0623b = WasmVector::mul(self.twiddles_im[5], x1m28);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[11], x2m27);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[10], x3m26);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[4], x4m25);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[0], x5m24);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[6], x6m23);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[12], x7m22);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[9], x8m21);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[3], x9m20);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[1], x10m19);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[7], x11m18);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[13], x12m17);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[8], x13m16);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[2], x14m15);
let [y06, y23] = WasmVector::column_butterfly2([m0623a, m0623b]);
let m0722a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p28);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[13], x2p27);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[7], x3p26);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[0], x4p25);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[5], x5p24);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[12], x6p23);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[8], x7p22);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[1], x8p21);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[4], x9p20);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[11], x10p19);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[9], x11p18);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[2], x12p17);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[3], x13p16);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[10], x14p15);
let m0722b = WasmVector::mul(self.twiddles_im[6], x1m28);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[13], x2m27);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[7], x3m26);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[0], x4m25);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[5], x5m24);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[12], x6m23);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[8], x7m22);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[1], x8m21);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[4], x9m20);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[11], x10m19);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[9], x11m18);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[2], x12m17);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[3], x13m16);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[10], x14m15);
let [y07, y22] = WasmVector::column_butterfly2([m0722a, m0722b]);
let m0821a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p28);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[12], x2p27);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[4], x3p26);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[2], x4p25);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[10], x5p24);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[9], x6p23);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[1], x7p22);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[5], x8p21);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[13], x9p20);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[6], x10p19);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[0], x11p18);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[8], x12p17);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[11], x13p16);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[3], x14p15);
let m0821b = WasmVector::mul(self.twiddles_im[7], x1m28);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[12], x2m27);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[4], x3m26);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[2], x4m25);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[10], x5m24);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[9], x6m23);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[1], x7m22);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[5], x8m21);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[13], x9m20);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[6], x10m19);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[0], x11m18);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[8], x12m17);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[11], x13m16);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[3], x14m15);
let [y08, y21] = WasmVector::column_butterfly2([m0821a, m0821b]);
let m0920a = WasmVector::fmadd(values[0], self.twiddles_re[8], x1p28);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[10], x2p27);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[1], x3p26);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[6], x4p25);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[12], x5p24);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[3], x6p23);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[4], x7p22);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[13], x8p21);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[5], x9p20);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[2], x10p19);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[11], x11p18);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[7], x12p17);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[0], x13p16);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[9], x14p15);
let m0920b = WasmVector::mul(self.twiddles_im[8], x1m28);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[10], x2m27);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[1], x3m26);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[6], x4m25);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[12], x5m24);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[3], x6m23);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[4], x7m22);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[13], x8m21);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[5], x9m20);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[2], x10m19);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[11], x11m18);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[7], x12m17);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[0], x13m16);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[9], x14m15);
let [y09, y20] = WasmVector::column_butterfly2([m0920a, m0920b]);
let m1019a = WasmVector::fmadd(values[0], self.twiddles_re[9], x1p28);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[8], x2p27);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[0], x3p26);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[10], x4p25);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[7], x5p24);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[1], x6p23);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[11], x7p22);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[6], x8p21);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[2], x9p20);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[12], x10p19);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[5], x11p18);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[3], x12p17);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[13], x13p16);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[4], x14p15);
let m1019b = WasmVector::mul(self.twiddles_im[9], x1m28);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[8], x2m27);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[0], x3m26);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[10], x4m25);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[7], x5m24);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[1], x6m23);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[11], x7m22);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[6], x8m21);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[2], x9m20);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[12], x10m19);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[5], x11m18);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[3], x12m17);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[13], x13m16);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[4], x14m15);
let [y10, y19] = WasmVector::column_butterfly2([m1019a, m1019b]);
let m1118a = WasmVector::fmadd(values[0], self.twiddles_re[10], x1p28);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[6], x2p27);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[3], x3p26);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[13], x4p25);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[2], x5p24);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[7], x6p23);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[9], x7p22);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[0], x8p21);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[11], x9p20);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[5], x10p19);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[4], x11p18);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[12], x12p17);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[1], x13p16);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[8], x14p15);
let m1118b = WasmVector::mul(self.twiddles_im[10], x1m28);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[6], x2m27);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[3], x3m26);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[13], x4m25);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[2], x5m24);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[7], x6m23);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[9], x7m22);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[0], x8m21);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[11], x9m20);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[5], x10m19);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[4], x11m18);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[12], x12m17);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[1], x13m16);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[8], x14m15);
let [y11, y18] = WasmVector::column_butterfly2([m1118a, m1118b]);
let m1217a = WasmVector::fmadd(values[0], self.twiddles_re[11], x1p28);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[4], x2p27);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[6], x3p26);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[9], x4p25);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[1], x5p24);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[13], x6p23);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[2], x7p22);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[8], x8p21);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[7], x9p20);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[3], x10p19);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[12], x11p18);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[0], x12p17);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[10], x13p16);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[5], x14p15);
let m1217b = WasmVector::mul(self.twiddles_im[11], x1m28);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[4], x2m27);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[6], x3m26);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[9], x4m25);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[1], x5m24);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[13], x6m23);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[2], x7m22);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[8], x8m21);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[7], x9m20);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[3], x10m19);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[12], x11m18);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[0], x12m17);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[10], x13m16);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[5], x14m15);
let [y12, y17] = WasmVector::column_butterfly2([m1217a, m1217b]);
let m1316a = WasmVector::fmadd(values[0], self.twiddles_re[12], x1p28);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[2], x2p27);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[9], x3p26);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[5], x4p25);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[6], x5p24);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[8], x6p23);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[3], x7p22);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[11], x8p21);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[0], x9p20);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[13], x10p19);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[1], x11p18);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[10], x12p17);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[4], x13p16);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[7], x14p15);
let m1316b = WasmVector::mul(self.twiddles_im[12], x1m28);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[2], x2m27);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[9], x3m26);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[5], x4m25);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[6], x5m24);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[8], x6m23);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[3], x7m22);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[11], x8m21);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[0], x9m20);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[13], x10m19);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[1], x11m18);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[10], x12m17);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[4], x13m16);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[7], x14m15);
let [y13, y16] = WasmVector::column_butterfly2([m1316a, m1316b]);
let m1415a = WasmVector::fmadd(values[0], self.twiddles_re[13], x1p28);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[0], x2p27);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[12], x3p26);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[1], x4p25);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[11], x5p24);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[2], x6p23);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[10], x7p22);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[3], x8p21);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[9], x9p20);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[4], x10p19);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[8], x11p18);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[5], x12p17);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[7], x13p16);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[6], x14p15);
let m1415b = WasmVector::mul(self.twiddles_im[13], x1m28);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[0], x2m27);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[12], x3m26);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[1], x4m25);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[11], x5m24);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[2], x6m23);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[10], x7m22);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[3], x8m21);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[9], x9m20);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[4], x10m19);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[8], x11m18);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[5], x12m17);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[7], x13m16);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[6], x14m15);
let [y14, y15] = WasmVector::column_butterfly2([m1415a, m1415b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16, y17, y18, y19, y20, y21, y22, y23, y24, y25, y26, y27, y28]
}
}
struct WasmSimdF64Butterfly29<T> {
direction: FftDirection,
twiddles_re: [WasmVector64; 14],
twiddles_im: [WasmVector64; 14],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f64_butterfly!(WasmSimdF64Butterfly29, 29, |this: &WasmSimdF64Butterfly29<_>| this.direction);
impl<T: FftNum> WasmSimdF64Butterfly29<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f64::<T>();
let twiddles = make_twiddles(29, direction);
unsafe {Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f64>) {
let values = read_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28 });
let out = self.perform_fft_direct(values);
write_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28 });
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_direct(&self, values: [WasmVector64; 29]) -> [WasmVector64; 29] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p28, x1m28] = WasmVector::column_butterfly2([values[1], values[28]]);
let x1m28 = WasmVector::apply_rotate90(rotate, x1m28);
let y00 = WasmVector::add(y00, x1p28);
let [x2p27, x2m27] = WasmVector::column_butterfly2([values[2], values[27]]);
let x2m27 = WasmVector::apply_rotate90(rotate, x2m27);
let y00 = WasmVector::add(y00, x2p27);
let [x3p26, x3m26] = WasmVector::column_butterfly2([values[3], values[26]]);
let x3m26 = WasmVector::apply_rotate90(rotate, x3m26);
let y00 = WasmVector::add(y00, x3p26);
let [x4p25, x4m25] = WasmVector::column_butterfly2([values[4], values[25]]);
let x4m25 = WasmVector::apply_rotate90(rotate, x4m25);
let y00 = WasmVector::add(y00, x4p25);
let [x5p24, x5m24] = WasmVector::column_butterfly2([values[5], values[24]]);
let x5m24 = WasmVector::apply_rotate90(rotate, x5m24);
let y00 = WasmVector::add(y00, x5p24);
let [x6p23, x6m23] = WasmVector::column_butterfly2([values[6], values[23]]);
let x6m23 = WasmVector::apply_rotate90(rotate, x6m23);
let y00 = WasmVector::add(y00, x6p23);
let [x7p22, x7m22] = WasmVector::column_butterfly2([values[7], values[22]]);
let x7m22 = WasmVector::apply_rotate90(rotate, x7m22);
let y00 = WasmVector::add(y00, x7p22);
let [x8p21, x8m21] = WasmVector::column_butterfly2([values[8], values[21]]);
let x8m21 = WasmVector::apply_rotate90(rotate, x8m21);
let y00 = WasmVector::add(y00, x8p21);
let [x9p20, x9m20] = WasmVector::column_butterfly2([values[9], values[20]]);
let x9m20 = WasmVector::apply_rotate90(rotate, x9m20);
let y00 = WasmVector::add(y00, x9p20);
let [x10p19, x10m19] = WasmVector::column_butterfly2([values[10], values[19]]);
let x10m19 = WasmVector::apply_rotate90(rotate, x10m19);
let y00 = WasmVector::add(y00, x10p19);
let [x11p18, x11m18] = WasmVector::column_butterfly2([values[11], values[18]]);
let x11m18 = WasmVector::apply_rotate90(rotate, x11m18);
let y00 = WasmVector::add(y00, x11p18);
let [x12p17, x12m17] = WasmVector::column_butterfly2([values[12], values[17]]);
let x12m17 = WasmVector::apply_rotate90(rotate, x12m17);
let y00 = WasmVector::add(y00, x12p17);
let [x13p16, x13m16] = WasmVector::column_butterfly2([values[13], values[16]]);
let x13m16 = WasmVector::apply_rotate90(rotate, x13m16);
let y00 = WasmVector::add(y00, x13p16);
let [x14p15, x14m15] = WasmVector::column_butterfly2([values[14], values[15]]);
let x14m15 = WasmVector::apply_rotate90(rotate, x14m15);
let y00 = WasmVector::add(y00, x14p15);
let m0128a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p28);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[1], x2p27);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[2], x3p26);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[3], x4p25);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[4], x5p24);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[5], x6p23);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[6], x7p22);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[7], x8p21);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[8], x9p20);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[9], x10p19);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[10], x11p18);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[11], x12p17);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[12], x13p16);
let m0128a = WasmVector::fmadd(m0128a, self.twiddles_re[13], x14p15);
let m0128b = WasmVector::mul(self.twiddles_im[0], x1m28);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[1], x2m27);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[2], x3m26);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[3], x4m25);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[4], x5m24);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[5], x6m23);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[6], x7m22);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[7], x8m21);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[8], x9m20);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[9], x10m19);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[10], x11m18);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[11], x12m17);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[12], x13m16);
let m0128b = WasmVector::fmadd(m0128b, self.twiddles_im[13], x14m15);
let [y01, y28] = WasmVector::column_butterfly2([m0128a, m0128b]);
let m0227a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p28);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[3], x2p27);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[5], x3p26);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[7], x4p25);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[9], x5p24);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[11], x6p23);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[13], x7p22);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[12], x8p21);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[10], x9p20);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[8], x10p19);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[6], x11p18);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[4], x12p17);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[2], x13p16);
let m0227a = WasmVector::fmadd(m0227a, self.twiddles_re[0], x14p15);
let m0227b = WasmVector::mul(self.twiddles_im[1], x1m28);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[3], x2m27);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[5], x3m26);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[7], x4m25);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[9], x5m24);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[11], x6m23);
let m0227b = WasmVector::fmadd(m0227b, self.twiddles_im[13], x7m22);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[12], x8m21);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[10], x9m20);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[8], x10m19);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[6], x11m18);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[4], x12m17);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[2], x13m16);
let m0227b = WasmVector::nmadd(m0227b, self.twiddles_im[0], x14m15);
let [y02, y27] = WasmVector::column_butterfly2([m0227a, m0227b]);
let m0326a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p28);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[5], x2p27);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[8], x3p26);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[11], x4p25);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[13], x5p24);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[10], x6p23);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[7], x7p22);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[4], x8p21);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[1], x9p20);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[0], x10p19);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[3], x11p18);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[6], x12p17);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[9], x13p16);
let m0326a = WasmVector::fmadd(m0326a, self.twiddles_re[12], x14p15);
let m0326b = WasmVector::mul(self.twiddles_im[2], x1m28);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[5], x2m27);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[8], x3m26);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[11], x4m25);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[13], x5m24);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[10], x6m23);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[7], x7m22);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[4], x8m21);
let m0326b = WasmVector::nmadd(m0326b, self.twiddles_im[1], x9m20);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[0], x10m19);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[3], x11m18);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[6], x12m17);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[9], x13m16);
let m0326b = WasmVector::fmadd(m0326b, self.twiddles_im[12], x14m15);
let [y03, y26] = WasmVector::column_butterfly2([m0326a, m0326b]);
let m0425a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p28);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[7], x2p27);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[11], x3p26);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[12], x4p25);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[8], x5p24);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[4], x6p23);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[0], x7p22);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[2], x8p21);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[6], x9p20);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[10], x10p19);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[13], x11p18);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[9], x12p17);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[5], x13p16);
let m0425a = WasmVector::fmadd(m0425a, self.twiddles_re[1], x14p15);
let m0425b = WasmVector::mul(self.twiddles_im[3], x1m28);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[7], x2m27);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[11], x3m26);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[12], x4m25);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[8], x5m24);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[4], x6m23);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[0], x7m22);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[2], x8m21);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[6], x9m20);
let m0425b = WasmVector::fmadd(m0425b, self.twiddles_im[10], x10m19);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[13], x11m18);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[9], x12m17);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[5], x13m16);
let m0425b = WasmVector::nmadd(m0425b, self.twiddles_im[1], x14m15);
let [y04, y25] = WasmVector::column_butterfly2([m0425a, m0425b]);
let m0524a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p28);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[9], x2p27);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[13], x3p26);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[8], x4p25);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[3], x5p24);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[0], x6p23);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[5], x7p22);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[10], x8p21);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[12], x9p20);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[7], x10p19);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[2], x11p18);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[1], x12p17);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[6], x13p16);
let m0524a = WasmVector::fmadd(m0524a, self.twiddles_re[11], x14p15);
let m0524b = WasmVector::mul(self.twiddles_im[4], x1m28);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[9], x2m27);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[13], x3m26);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[8], x4m25);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[3], x5m24);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[0], x6m23);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[5], x7m22);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[10], x8m21);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[12], x9m20);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[7], x10m19);
let m0524b = WasmVector::nmadd(m0524b, self.twiddles_im[2], x11m18);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[1], x12m17);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[6], x13m16);
let m0524b = WasmVector::fmadd(m0524b, self.twiddles_im[11], x14m15);
let [y05, y24] = WasmVector::column_butterfly2([m0524a, m0524b]);
let m0623a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p28);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[11], x2p27);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[10], x3p26);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[4], x4p25);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[0], x5p24);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[6], x6p23);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[12], x7p22);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[9], x8p21);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[3], x9p20);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[1], x10p19);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[7], x11p18);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[13], x12p17);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[8], x13p16);
let m0623a = WasmVector::fmadd(m0623a, self.twiddles_re[2], x14p15);
let m0623b = WasmVector::mul(self.twiddles_im[5], x1m28);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[11], x2m27);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[10], x3m26);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[4], x4m25);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[0], x5m24);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[6], x6m23);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[12], x7m22);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[9], x8m21);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[3], x9m20);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[1], x10m19);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[7], x11m18);
let m0623b = WasmVector::fmadd(m0623b, self.twiddles_im[13], x12m17);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[8], x13m16);
let m0623b = WasmVector::nmadd(m0623b, self.twiddles_im[2], x14m15);
let [y06, y23] = WasmVector::column_butterfly2([m0623a, m0623b]);
let m0722a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p28);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[13], x2p27);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[7], x3p26);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[0], x4p25);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[5], x5p24);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[12], x6p23);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[8], x7p22);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[1], x8p21);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[4], x9p20);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[11], x10p19);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[9], x11p18);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[2], x12p17);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[3], x13p16);
let m0722a = WasmVector::fmadd(m0722a, self.twiddles_re[10], x14p15);
let m0722b = WasmVector::mul(self.twiddles_im[6], x1m28);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[13], x2m27);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[7], x3m26);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[0], x4m25);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[5], x5m24);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[12], x6m23);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[8], x7m22);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[1], x8m21);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[4], x9m20);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[11], x10m19);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[9], x11m18);
let m0722b = WasmVector::nmadd(m0722b, self.twiddles_im[2], x12m17);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[3], x13m16);
let m0722b = WasmVector::fmadd(m0722b, self.twiddles_im[10], x14m15);
let [y07, y22] = WasmVector::column_butterfly2([m0722a, m0722b]);
let m0821a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p28);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[12], x2p27);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[4], x3p26);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[2], x4p25);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[10], x5p24);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[9], x6p23);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[1], x7p22);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[5], x8p21);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[13], x9p20);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[6], x10p19);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[0], x11p18);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[8], x12p17);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[11], x13p16);
let m0821a = WasmVector::fmadd(m0821a, self.twiddles_re[3], x14p15);
let m0821b = WasmVector::mul(self.twiddles_im[7], x1m28);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[12], x2m27);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[4], x3m26);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[2], x4m25);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[10], x5m24);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[9], x6m23);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[1], x7m22);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[5], x8m21);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[13], x9m20);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[6], x10m19);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[0], x11m18);
let m0821b = WasmVector::fmadd(m0821b, self.twiddles_im[8], x12m17);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[11], x13m16);
let m0821b = WasmVector::nmadd(m0821b, self.twiddles_im[3], x14m15);
let [y08, y21] = WasmVector::column_butterfly2([m0821a, m0821b]);
let m0920a = WasmVector::fmadd(values[0], self.twiddles_re[8], x1p28);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[10], x2p27);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[1], x3p26);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[6], x4p25);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[12], x5p24);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[3], x6p23);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[4], x7p22);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[13], x8p21);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[5], x9p20);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[2], x10p19);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[11], x11p18);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[7], x12p17);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[0], x13p16);
let m0920a = WasmVector::fmadd(m0920a, self.twiddles_re[9], x14p15);
let m0920b = WasmVector::mul(self.twiddles_im[8], x1m28);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[10], x2m27);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[1], x3m26);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[6], x4m25);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[12], x5m24);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[3], x6m23);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[4], x7m22);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[13], x8m21);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[5], x9m20);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[2], x10m19);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[11], x11m18);
let m0920b = WasmVector::nmadd(m0920b, self.twiddles_im[7], x12m17);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[0], x13m16);
let m0920b = WasmVector::fmadd(m0920b, self.twiddles_im[9], x14m15);
let [y09, y20] = WasmVector::column_butterfly2([m0920a, m0920b]);
let m1019a = WasmVector::fmadd(values[0], self.twiddles_re[9], x1p28);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[8], x2p27);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[0], x3p26);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[10], x4p25);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[7], x5p24);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[1], x6p23);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[11], x7p22);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[6], x8p21);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[2], x9p20);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[12], x10p19);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[5], x11p18);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[3], x12p17);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[13], x13p16);
let m1019a = WasmVector::fmadd(m1019a, self.twiddles_re[4], x14p15);
let m1019b = WasmVector::mul(self.twiddles_im[9], x1m28);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[8], x2m27);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[0], x3m26);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[10], x4m25);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[7], x5m24);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[1], x6m23);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[11], x7m22);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[6], x8m21);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[2], x9m20);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[12], x10m19);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[5], x11m18);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[3], x12m17);
let m1019b = WasmVector::fmadd(m1019b, self.twiddles_im[13], x13m16);
let m1019b = WasmVector::nmadd(m1019b, self.twiddles_im[4], x14m15);
let [y10, y19] = WasmVector::column_butterfly2([m1019a, m1019b]);
let m1118a = WasmVector::fmadd(values[0], self.twiddles_re[10], x1p28);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[6], x2p27);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[3], x3p26);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[13], x4p25);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[2], x5p24);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[7], x6p23);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[9], x7p22);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[0], x8p21);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[11], x9p20);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[5], x10p19);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[4], x11p18);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[12], x12p17);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[1], x13p16);
let m1118a = WasmVector::fmadd(m1118a, self.twiddles_re[8], x14p15);
let m1118b = WasmVector::mul(self.twiddles_im[10], x1m28);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[6], x2m27);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[3], x3m26);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[13], x4m25);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[2], x5m24);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[7], x6m23);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[9], x7m22);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[0], x8m21);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[11], x9m20);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[5], x10m19);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[4], x11m18);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[12], x12m17);
let m1118b = WasmVector::nmadd(m1118b, self.twiddles_im[1], x13m16);
let m1118b = WasmVector::fmadd(m1118b, self.twiddles_im[8], x14m15);
let [y11, y18] = WasmVector::column_butterfly2([m1118a, m1118b]);
let m1217a = WasmVector::fmadd(values[0], self.twiddles_re[11], x1p28);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[4], x2p27);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[6], x3p26);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[9], x4p25);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[1], x5p24);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[13], x6p23);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[2], x7p22);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[8], x8p21);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[7], x9p20);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[3], x10p19);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[12], x11p18);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[0], x12p17);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[10], x13p16);
let m1217a = WasmVector::fmadd(m1217a, self.twiddles_re[5], x14p15);
let m1217b = WasmVector::mul(self.twiddles_im[11], x1m28);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[4], x2m27);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[6], x3m26);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[9], x4m25);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[1], x5m24);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[13], x6m23);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[2], x7m22);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[8], x8m21);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[7], x9m20);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[3], x10m19);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[12], x11m18);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[0], x12m17);
let m1217b = WasmVector::fmadd(m1217b, self.twiddles_im[10], x13m16);
let m1217b = WasmVector::nmadd(m1217b, self.twiddles_im[5], x14m15);
let [y12, y17] = WasmVector::column_butterfly2([m1217a, m1217b]);
let m1316a = WasmVector::fmadd(values[0], self.twiddles_re[12], x1p28);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[2], x2p27);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[9], x3p26);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[5], x4p25);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[6], x5p24);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[8], x6p23);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[3], x7p22);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[11], x8p21);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[0], x9p20);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[13], x10p19);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[1], x11p18);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[10], x12p17);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[4], x13p16);
let m1316a = WasmVector::fmadd(m1316a, self.twiddles_re[7], x14p15);
let m1316b = WasmVector::mul(self.twiddles_im[12], x1m28);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[2], x2m27);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[9], x3m26);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[5], x4m25);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[6], x5m24);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[8], x6m23);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[3], x7m22);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[11], x8m21);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[0], x9m20);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[13], x10m19);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[1], x11m18);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[10], x12m17);
let m1316b = WasmVector::nmadd(m1316b, self.twiddles_im[4], x13m16);
let m1316b = WasmVector::fmadd(m1316b, self.twiddles_im[7], x14m15);
let [y13, y16] = WasmVector::column_butterfly2([m1316a, m1316b]);
let m1415a = WasmVector::fmadd(values[0], self.twiddles_re[13], x1p28);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[0], x2p27);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[12], x3p26);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[1], x4p25);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[11], x5p24);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[2], x6p23);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[10], x7p22);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[3], x8p21);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[9], x9p20);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[4], x10p19);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[8], x11p18);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[5], x12p17);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[7], x13p16);
let m1415a = WasmVector::fmadd(m1415a, self.twiddles_re[6], x14p15);
let m1415b = WasmVector::mul(self.twiddles_im[13], x1m28);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[0], x2m27);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[12], x3m26);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[1], x4m25);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[11], x5m24);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[2], x6m23);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[10], x7m22);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[3], x8m21);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[9], x9m20);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[4], x10m19);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[8], x11m18);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[5], x12m17);
let m1415b = WasmVector::fmadd(m1415b, self.twiddles_im[7], x13m16);
let m1415b = WasmVector::nmadd(m1415b, self.twiddles_im[6], x14m15);
let [y14, y15] = WasmVector::column_butterfly2([m1415a, m1415b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16, y17, y18, y19, y20, y21, y22, y23, y24, y25, y26, y27, y28]
}
}
struct WasmSimdF32Butterfly31<T> {
direction: FftDirection,
twiddles_re: [WasmVector32; 15],
twiddles_im: [WasmVector32; 15],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f32_butterfly!(WasmSimdF32Butterfly31, 31, |this: &WasmSimdF32Butterfly31<_>| this.direction);
impl<T: FftNum> WasmSimdF32Butterfly31<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f32::<T>();
let twiddles = make_twiddles(31, direction);
Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let values = read_partial1_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 });
let out = self.perform_parallel_fft_direct(values);
write_partial_lo_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 } );
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f32>) {
let input_packed = read_complex_to_array!(buffer, { 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60 });
let values = [
extract_lo_hi_f32(input_packed[0], input_packed[15]),
extract_hi_lo_f32(input_packed[0], input_packed[16]),
extract_lo_hi_f32(input_packed[1], input_packed[16]),
extract_hi_lo_f32(input_packed[1], input_packed[17]),
extract_lo_hi_f32(input_packed[2], input_packed[17]),
extract_hi_lo_f32(input_packed[2], input_packed[18]),
extract_lo_hi_f32(input_packed[3], input_packed[18]),
extract_hi_lo_f32(input_packed[3], input_packed[19]),
extract_lo_hi_f32(input_packed[4], input_packed[19]),
extract_hi_lo_f32(input_packed[4], input_packed[20]),
extract_lo_hi_f32(input_packed[5], input_packed[20]),
extract_hi_lo_f32(input_packed[5], input_packed[21]),
extract_lo_hi_f32(input_packed[6], input_packed[21]),
extract_hi_lo_f32(input_packed[6], input_packed[22]),
extract_lo_hi_f32(input_packed[7], input_packed[22]),
extract_hi_lo_f32(input_packed[7], input_packed[23]),
extract_lo_hi_f32(input_packed[8], input_packed[23]),
extract_hi_lo_f32(input_packed[8], input_packed[24]),
extract_lo_hi_f32(input_packed[9], input_packed[24]),
extract_hi_lo_f32(input_packed[9], input_packed[25]),
extract_lo_hi_f32(input_packed[10], input_packed[25]),
extract_hi_lo_f32(input_packed[10], input_packed[26]),
extract_lo_hi_f32(input_packed[11], input_packed[26]),
extract_hi_lo_f32(input_packed[11], input_packed[27]),
extract_lo_hi_f32(input_packed[12], input_packed[27]),
extract_hi_lo_f32(input_packed[12], input_packed[28]),
extract_lo_hi_f32(input_packed[13], input_packed[28]),
extract_hi_lo_f32(input_packed[13], input_packed[29]),
extract_lo_hi_f32(input_packed[14], input_packed[29]),
extract_hi_lo_f32(input_packed[14], input_packed[30]),
extract_lo_hi_f32(input_packed[15], input_packed[30]),
];
let out = self.perform_parallel_fft_direct(values);
let out_packed = [
extract_lo_lo_f32(out[0], out[1]),
extract_lo_lo_f32(out[2], out[3]),
extract_lo_lo_f32(out[4], out[5]),
extract_lo_lo_f32(out[6], out[7]),
extract_lo_lo_f32(out[8], out[9]),
extract_lo_lo_f32(out[10], out[11]),
extract_lo_lo_f32(out[12], out[13]),
extract_lo_lo_f32(out[14], out[15]),
extract_lo_lo_f32(out[16], out[17]),
extract_lo_lo_f32(out[18], out[19]),
extract_lo_lo_f32(out[20], out[21]),
extract_lo_lo_f32(out[22], out[23]),
extract_lo_lo_f32(out[24], out[25]),
extract_lo_lo_f32(out[26], out[27]),
extract_lo_lo_f32(out[28], out[29]),
extract_lo_hi_f32(out[30], out[0]),
extract_hi_hi_f32(out[1], out[2]),
extract_hi_hi_f32(out[3], out[4]),
extract_hi_hi_f32(out[5], out[6]),
extract_hi_hi_f32(out[7], out[8]),
extract_hi_hi_f32(out[9], out[10]),
extract_hi_hi_f32(out[11], out[12]),
extract_hi_hi_f32(out[13], out[14]),
extract_hi_hi_f32(out[15], out[16]),
extract_hi_hi_f32(out[17], out[18]),
extract_hi_hi_f32(out[19], out[20]),
extract_hi_hi_f32(out[21], out[22]),
extract_hi_hi_f32(out[23], out[24]),
extract_hi_hi_f32(out[25], out[26]),
extract_hi_hi_f32(out[27], out[28]),
extract_hi_hi_f32(out[29], out[30]),
];
write_complex_to_array_strided!(out_packed, buffer, 2, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 });
}
#[inline(always)]
pub(crate) unsafe fn perform_parallel_fft_direct(&self, values: [WasmVector32; 31]) -> [WasmVector32; 31] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p30, x1m30] = WasmVector::column_butterfly2([values[1], values[30]]);
let x1m30 = WasmVector::apply_rotate90(rotate, x1m30);
let y00 = WasmVector::add(y00, x1p30);
let [x2p29, x2m29] = WasmVector::column_butterfly2([values[2], values[29]]);
let x2m29 = WasmVector::apply_rotate90(rotate, x2m29);
let y00 = WasmVector::add(y00, x2p29);
let [x3p28, x3m28] = WasmVector::column_butterfly2([values[3], values[28]]);
let x3m28 = WasmVector::apply_rotate90(rotate, x3m28);
let y00 = WasmVector::add(y00, x3p28);
let [x4p27, x4m27] = WasmVector::column_butterfly2([values[4], values[27]]);
let x4m27 = WasmVector::apply_rotate90(rotate, x4m27);
let y00 = WasmVector::add(y00, x4p27);
let [x5p26, x5m26] = WasmVector::column_butterfly2([values[5], values[26]]);
let x5m26 = WasmVector::apply_rotate90(rotate, x5m26);
let y00 = WasmVector::add(y00, x5p26);
let [x6p25, x6m25] = WasmVector::column_butterfly2([values[6], values[25]]);
let x6m25 = WasmVector::apply_rotate90(rotate, x6m25);
let y00 = WasmVector::add(y00, x6p25);
let [x7p24, x7m24] = WasmVector::column_butterfly2([values[7], values[24]]);
let x7m24 = WasmVector::apply_rotate90(rotate, x7m24);
let y00 = WasmVector::add(y00, x7p24);
let [x8p23, x8m23] = WasmVector::column_butterfly2([values[8], values[23]]);
let x8m23 = WasmVector::apply_rotate90(rotate, x8m23);
let y00 = WasmVector::add(y00, x8p23);
let [x9p22, x9m22] = WasmVector::column_butterfly2([values[9], values[22]]);
let x9m22 = WasmVector::apply_rotate90(rotate, x9m22);
let y00 = WasmVector::add(y00, x9p22);
let [x10p21, x10m21] = WasmVector::column_butterfly2([values[10], values[21]]);
let x10m21 = WasmVector::apply_rotate90(rotate, x10m21);
let y00 = WasmVector::add(y00, x10p21);
let [x11p20, x11m20] = WasmVector::column_butterfly2([values[11], values[20]]);
let x11m20 = WasmVector::apply_rotate90(rotate, x11m20);
let y00 = WasmVector::add(y00, x11p20);
let [x12p19, x12m19] = WasmVector::column_butterfly2([values[12], values[19]]);
let x12m19 = WasmVector::apply_rotate90(rotate, x12m19);
let y00 = WasmVector::add(y00, x12p19);
let [x13p18, x13m18] = WasmVector::column_butterfly2([values[13], values[18]]);
let x13m18 = WasmVector::apply_rotate90(rotate, x13m18);
let y00 = WasmVector::add(y00, x13p18);
let [x14p17, x14m17] = WasmVector::column_butterfly2([values[14], values[17]]);
let x14m17 = WasmVector::apply_rotate90(rotate, x14m17);
let y00 = WasmVector::add(y00, x14p17);
let [x15p16, x15m16] = WasmVector::column_butterfly2([values[15], values[16]]);
let x15m16 = WasmVector::apply_rotate90(rotate, x15m16);
let y00 = WasmVector::add(y00, x15p16);
let m0130a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p30);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[1], x2p29);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[2], x3p28);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[3], x4p27);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[4], x5p26);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[5], x6p25);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[6], x7p24);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[7], x8p23);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[8], x9p22);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[9], x10p21);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[10], x11p20);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[11], x12p19);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[12], x13p18);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[13], x14p17);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[14], x15p16);
let m0130b = WasmVector::mul(self.twiddles_im[0], x1m30);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[1], x2m29);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[2], x3m28);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[3], x4m27);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[4], x5m26);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[5], x6m25);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[6], x7m24);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[7], x8m23);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[8], x9m22);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[9], x10m21);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[10], x11m20);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[11], x12m19);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[12], x13m18);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[13], x14m17);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[14], x15m16);
let [y01, y30] = WasmVector::column_butterfly2([m0130a, m0130b]);
let m0229a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p30);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[3], x2p29);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[5], x3p28);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[7], x4p27);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[9], x5p26);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[11], x6p25);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[13], x7p24);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[14], x8p23);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[12], x9p22);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[10], x10p21);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[8], x11p20);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[6], x12p19);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[4], x13p18);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[2], x14p17);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[0], x15p16);
let m0229b = WasmVector::mul(self.twiddles_im[1], x1m30);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[3], x2m29);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[5], x3m28);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[7], x4m27);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[9], x5m26);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[11], x6m25);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[13], x7m24);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[14], x8m23);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[12], x9m22);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[10], x10m21);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[8], x11m20);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[6], x12m19);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[4], x13m18);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[2], x14m17);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[0], x15m16);
let [y02, y29] = WasmVector::column_butterfly2([m0229a, m0229b]);
let m0328a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p30);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[5], x2p29);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[8], x3p28);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[11], x4p27);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[14], x5p26);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[12], x6p25);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[9], x7p24);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[6], x8p23);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[3], x9p22);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[0], x10p21);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[1], x11p20);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[4], x12p19);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[7], x13p18);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[10], x14p17);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[13], x15p16);
let m0328b = WasmVector::mul(self.twiddles_im[2], x1m30);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[5], x2m29);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[8], x3m28);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[11], x4m27);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[14], x5m26);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[12], x6m25);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[9], x7m24);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[6], x8m23);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[3], x9m22);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[0], x10m21);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[1], x11m20);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[4], x12m19);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[7], x13m18);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[10], x14m17);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[13], x15m16);
let [y03, y28] = WasmVector::column_butterfly2([m0328a, m0328b]);
let m0427a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p30);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[7], x2p29);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[11], x3p28);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[14], x4p27);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[10], x5p26);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[6], x6p25);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[2], x7p24);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[0], x8p23);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[4], x9p22);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[8], x10p21);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[12], x11p20);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[13], x12p19);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[9], x13p18);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[5], x14p17);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[1], x15p16);
let m0427b = WasmVector::mul(self.twiddles_im[3], x1m30);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[7], x2m29);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[11], x3m28);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[14], x4m27);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[10], x5m26);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[6], x6m25);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[2], x7m24);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[0], x8m23);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[4], x9m22);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[8], x10m21);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[12], x11m20);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[13], x12m19);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[9], x13m18);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[5], x14m17);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[1], x15m16);
let [y04, y27] = WasmVector::column_butterfly2([m0427a, m0427b]);
let m0526a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p30);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[9], x2p29);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[14], x3p28);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[10], x4p27);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[5], x5p26);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[0], x6p25);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[3], x7p24);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[8], x8p23);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[13], x9p22);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[11], x10p21);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[6], x11p20);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[1], x12p19);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[2], x13p18);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[7], x14p17);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[12], x15p16);
let m0526b = WasmVector::mul(self.twiddles_im[4], x1m30);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[9], x2m29);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[14], x3m28);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[10], x4m27);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[5], x5m26);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[0], x6m25);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[3], x7m24);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[8], x8m23);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[13], x9m22);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[11], x10m21);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[6], x11m20);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[1], x12m19);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[2], x13m18);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[7], x14m17);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[12], x15m16);
let [y05, y26] = WasmVector::column_butterfly2([m0526a, m0526b]);
let m0625a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p30);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[11], x2p29);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[12], x3p28);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[6], x4p27);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[0], x5p26);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[4], x6p25);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[10], x7p24);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[13], x8p23);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[7], x9p22);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[1], x10p21);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[3], x11p20);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[9], x12p19);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[14], x13p18);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[8], x14p17);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[2], x15p16);
let m0625b = WasmVector::mul(self.twiddles_im[5], x1m30);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[11], x2m29);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[12], x3m28);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[6], x4m27);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[0], x5m26);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[4], x6m25);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[10], x7m24);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[13], x8m23);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[7], x9m22);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[1], x10m21);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[3], x11m20);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[9], x12m19);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[14], x13m18);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[8], x14m17);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[2], x15m16);
let [y06, y25] = WasmVector::column_butterfly2([m0625a, m0625b]);
let m0724a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p30);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[13], x2p29);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[9], x3p28);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[2], x4p27);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[3], x5p26);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[10], x6p25);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[12], x7p24);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[5], x8p23);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[0], x9p22);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[7], x10p21);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[14], x11p20);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[8], x12p19);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[1], x13p18);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[4], x14p17);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[11], x15p16);
let m0724b = WasmVector::mul(self.twiddles_im[6], x1m30);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[13], x2m29);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[9], x3m28);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[2], x4m27);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[3], x5m26);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[10], x6m25);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[12], x7m24);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[5], x8m23);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[0], x9m22);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[7], x10m21);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[14], x11m20);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[8], x12m19);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[1], x13m18);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[4], x14m17);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[11], x15m16);
let [y07, y24] = WasmVector::column_butterfly2([m0724a, m0724b]);
let m0823a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p30);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[14], x2p29);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[6], x3p28);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[0], x4p27);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[8], x5p26);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[13], x6p25);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[5], x7p24);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[1], x8p23);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[9], x9p22);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[12], x10p21);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[4], x11p20);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[2], x12p19);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[10], x13p18);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[11], x14p17);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[3], x15p16);
let m0823b = WasmVector::mul(self.twiddles_im[7], x1m30);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[14], x2m29);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[6], x3m28);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[0], x4m27);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[8], x5m26);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[13], x6m25);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[5], x7m24);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[1], x8m23);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[9], x9m22);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[12], x10m21);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[4], x11m20);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[2], x12m19);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[10], x13m18);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[11], x14m17);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[3], x15m16);
let [y08, y23] = WasmVector::column_butterfly2([m0823a, m0823b]);
let m0922a = WasmVector::fmadd(values[0], self.twiddles_re[8], x1p30);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[12], x2p29);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[3], x3p28);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[4], x4p27);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[13], x5p26);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[7], x6p25);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[0], x7p24);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[9], x8p23);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[11], x9p22);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[2], x10p21);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[5], x11p20);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[14], x12p19);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[6], x13p18);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[1], x14p17);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[10], x15p16);
let m0922b = WasmVector::mul(self.twiddles_im[8], x1m30);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[12], x2m29);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[3], x3m28);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[4], x4m27);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[13], x5m26);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[7], x6m25);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[0], x7m24);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[9], x8m23);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[11], x9m22);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[2], x10m21);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[5], x11m20);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[14], x12m19);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[6], x13m18);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[1], x14m17);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[10], x15m16);
let [y09, y22] = WasmVector::column_butterfly2([m0922a, m0922b]);
let m1021a = WasmVector::fmadd(values[0], self.twiddles_re[9], x1p30);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[10], x2p29);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[0], x3p28);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[8], x4p27);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[11], x5p26);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[1], x6p25);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[7], x7p24);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[12], x8p23);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[2], x9p22);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[6], x10p21);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[13], x11p20);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[3], x12p19);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[5], x13p18);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[14], x14p17);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[4], x15p16);
let m1021b = WasmVector::mul(self.twiddles_im[9], x1m30);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[10], x2m29);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[0], x3m28);
let m1021b = WasmVector::fmadd(m1021b, self.twiddles_im[8], x4m27);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[11], x5m26);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[1], x6m25);
let m1021b = WasmVector::fmadd(m1021b, self.twiddles_im[7], x7m24);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[12], x8m23);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[2], x9m22);
let m1021b = WasmVector::fmadd(m1021b, self.twiddles_im[6], x10m21);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[13], x11m20);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[3], x12m19);
let m1021b = WasmVector::fmadd(m1021b, self.twiddles_im[5], x13m18);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[14], x14m17);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[4], x15m16);
let [y10, y21] = WasmVector::column_butterfly2([m1021a, m1021b]);
let m1120a = WasmVector::fmadd(values[0], self.twiddles_re[10], x1p30);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[8], x2p29);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[1], x3p28);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[12], x4p27);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[6], x5p26);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[3], x6p25);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[14], x7p24);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[4], x8p23);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[5], x9p22);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[13], x10p21);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[2], x11p20);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[7], x12p19);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[11], x13p18);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[0], x14p17);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[9], x15p16);
let m1120b = WasmVector::mul(self.twiddles_im[10], x1m30);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[8], x2m29);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[1], x3m28);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[12], x4m27);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[6], x5m26);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[3], x6m25);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[14], x7m24);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[4], x8m23);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[5], x9m22);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[13], x10m21);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[2], x11m20);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[7], x12m19);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[11], x13m18);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[0], x14m17);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[9], x15m16);
let [y11, y20] = WasmVector::column_butterfly2([m1120a, m1120b]);
let m1219a = WasmVector::fmadd(values[0], self.twiddles_re[11], x1p30);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[6], x2p29);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[4], x3p28);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[13], x4p27);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[1], x5p26);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[9], x6p25);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[8], x7p24);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[2], x8p23);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[14], x9p22);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[3], x10p21);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[7], x11p20);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[10], x12p19);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[0], x13p18);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[12], x14p17);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[5], x15p16);
let m1219b = WasmVector::mul(self.twiddles_im[11], x1m30);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[6], x2m29);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[4], x3m28);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[13], x4m27);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[1], x5m26);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[9], x6m25);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[8], x7m24);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[2], x8m23);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[14], x9m22);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[3], x10m21);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[7], x11m20);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[10], x12m19);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[0], x13m18);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[12], x14m17);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[5], x15m16);
let [y12, y19] = WasmVector::column_butterfly2([m1219a, m1219b]);
let m1318a = WasmVector::fmadd(values[0], self.twiddles_re[12], x1p30);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[4], x2p29);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[7], x3p28);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[9], x4p27);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[2], x5p26);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[14], x6p25);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[1], x7p24);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[10], x8p23);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[6], x9p22);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[5], x10p21);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[11], x11p20);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[0], x12p19);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[13], x13p18);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[3], x14p17);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[8], x15p16);
let m1318b = WasmVector::mul(self.twiddles_im[12], x1m30);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[4], x2m29);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[7], x3m28);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[9], x4m27);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[2], x5m26);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[14], x6m25);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[1], x7m24);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[10], x8m23);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[6], x9m22);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[5], x10m21);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[11], x11m20);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[0], x12m19);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[13], x13m18);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[3], x14m17);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[8], x15m16);
let [y13, y18] = WasmVector::column_butterfly2([m1318a, m1318b]);
let m1417a = WasmVector::fmadd(values[0], self.twiddles_re[13], x1p30);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[2], x2p29);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[10], x3p28);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[5], x4p27);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[7], x5p26);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[8], x6p25);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[4], x7p24);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[11], x8p23);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[1], x9p22);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[14], x10p21);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[0], x11p20);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[12], x12p19);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[3], x13p18);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[9], x14p17);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[6], x15p16);
let m1417b = WasmVector::mul(self.twiddles_im[13], x1m30);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[2], x2m29);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[10], x3m28);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[5], x4m27);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[7], x5m26);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[8], x6m25);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[4], x7m24);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[11], x8m23);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[1], x9m22);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[14], x10m21);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[0], x11m20);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[12], x12m19);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[3], x13m18);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[9], x14m17);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[6], x15m16);
let [y14, y17] = WasmVector::column_butterfly2([m1417a, m1417b]);
let m1516a = WasmVector::fmadd(values[0], self.twiddles_re[14], x1p30);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[0], x2p29);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[13], x3p28);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[1], x4p27);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[12], x5p26);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[2], x6p25);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[11], x7p24);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[3], x8p23);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[10], x9p22);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[4], x10p21);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[9], x11p20);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[5], x12p19);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[8], x13p18);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[6], x14p17);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[7], x15p16);
let m1516b = WasmVector::mul(self.twiddles_im[14], x1m30);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[0], x2m29);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[13], x3m28);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[1], x4m27);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[12], x5m26);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[2], x6m25);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[11], x7m24);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[3], x8m23);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[10], x9m22);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[4], x10m21);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[9], x11m20);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[5], x12m19);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[8], x13m18);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[6], x14m17);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[7], x15m16);
let [y15, y16] = WasmVector::column_butterfly2([m1516a, m1516b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16, y17, y18, y19, y20, y21, y22, y23, y24, y25, y26, y27, y28, y29, y30]
}
}
struct WasmSimdF64Butterfly31<T> {
direction: FftDirection,
twiddles_re: [WasmVector64; 15],
twiddles_im: [WasmVector64; 15],
_phantom: std::marker::PhantomData<T>,
}
boilerplate_fft_wasm_simd_f64_butterfly!(WasmSimdF64Butterfly31, 31, |this: &WasmSimdF64Butterfly31<_>| this.direction);
impl<T: FftNum> WasmSimdF64Butterfly31<T> {
#[target_feature(enable = "simd128")]
unsafe fn new(direction: FftDirection) -> Self {
assert_f64::<T>();
let twiddles = make_twiddles(31, direction);
unsafe {Self {
direction,
twiddles_re: twiddles.map(|t| WasmVector::broadcast_scalar(t.re)),
twiddles_im: twiddles.map(|t| WasmVector::broadcast_scalar(t.im)),
_phantom: std::marker::PhantomData,
}}
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_contiguous(&self, mut buffer: impl WasmSimdArrayMut<f64>) {
let values = read_complex_to_array!(buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 });
let out = self.perform_fft_direct(values);
write_complex_to_array!(out, buffer, { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 });
}
#[inline(always)]
pub(crate) unsafe fn perform_fft_direct(&self, values: [WasmVector64; 31]) -> [WasmVector64; 31] {
let rotate = WasmVector::make_rotate90(FftDirection::Inverse);
let y00 = values[0];
let [x1p30, x1m30] = WasmVector::column_butterfly2([values[1], values[30]]);
let x1m30 = WasmVector::apply_rotate90(rotate, x1m30);
let y00 = WasmVector::add(y00, x1p30);
let [x2p29, x2m29] = WasmVector::column_butterfly2([values[2], values[29]]);
let x2m29 = WasmVector::apply_rotate90(rotate, x2m29);
let y00 = WasmVector::add(y00, x2p29);
let [x3p28, x3m28] = WasmVector::column_butterfly2([values[3], values[28]]);
let x3m28 = WasmVector::apply_rotate90(rotate, x3m28);
let y00 = WasmVector::add(y00, x3p28);
let [x4p27, x4m27] = WasmVector::column_butterfly2([values[4], values[27]]);
let x4m27 = WasmVector::apply_rotate90(rotate, x4m27);
let y00 = WasmVector::add(y00, x4p27);
let [x5p26, x5m26] = WasmVector::column_butterfly2([values[5], values[26]]);
let x5m26 = WasmVector::apply_rotate90(rotate, x5m26);
let y00 = WasmVector::add(y00, x5p26);
let [x6p25, x6m25] = WasmVector::column_butterfly2([values[6], values[25]]);
let x6m25 = WasmVector::apply_rotate90(rotate, x6m25);
let y00 = WasmVector::add(y00, x6p25);
let [x7p24, x7m24] = WasmVector::column_butterfly2([values[7], values[24]]);
let x7m24 = WasmVector::apply_rotate90(rotate, x7m24);
let y00 = WasmVector::add(y00, x7p24);
let [x8p23, x8m23] = WasmVector::column_butterfly2([values[8], values[23]]);
let x8m23 = WasmVector::apply_rotate90(rotate, x8m23);
let y00 = WasmVector::add(y00, x8p23);
let [x9p22, x9m22] = WasmVector::column_butterfly2([values[9], values[22]]);
let x9m22 = WasmVector::apply_rotate90(rotate, x9m22);
let y00 = WasmVector::add(y00, x9p22);
let [x10p21, x10m21] = WasmVector::column_butterfly2([values[10], values[21]]);
let x10m21 = WasmVector::apply_rotate90(rotate, x10m21);
let y00 = WasmVector::add(y00, x10p21);
let [x11p20, x11m20] = WasmVector::column_butterfly2([values[11], values[20]]);
let x11m20 = WasmVector::apply_rotate90(rotate, x11m20);
let y00 = WasmVector::add(y00, x11p20);
let [x12p19, x12m19] = WasmVector::column_butterfly2([values[12], values[19]]);
let x12m19 = WasmVector::apply_rotate90(rotate, x12m19);
let y00 = WasmVector::add(y00, x12p19);
let [x13p18, x13m18] = WasmVector::column_butterfly2([values[13], values[18]]);
let x13m18 = WasmVector::apply_rotate90(rotate, x13m18);
let y00 = WasmVector::add(y00, x13p18);
let [x14p17, x14m17] = WasmVector::column_butterfly2([values[14], values[17]]);
let x14m17 = WasmVector::apply_rotate90(rotate, x14m17);
let y00 = WasmVector::add(y00, x14p17);
let [x15p16, x15m16] = WasmVector::column_butterfly2([values[15], values[16]]);
let x15m16 = WasmVector::apply_rotate90(rotate, x15m16);
let y00 = WasmVector::add(y00, x15p16);
let m0130a = WasmVector::fmadd(values[0], self.twiddles_re[0], x1p30);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[1], x2p29);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[2], x3p28);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[3], x4p27);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[4], x5p26);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[5], x6p25);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[6], x7p24);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[7], x8p23);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[8], x9p22);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[9], x10p21);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[10], x11p20);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[11], x12p19);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[12], x13p18);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[13], x14p17);
let m0130a = WasmVector::fmadd(m0130a, self.twiddles_re[14], x15p16);
let m0130b = WasmVector::mul(self.twiddles_im[0], x1m30);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[1], x2m29);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[2], x3m28);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[3], x4m27);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[4], x5m26);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[5], x6m25);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[6], x7m24);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[7], x8m23);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[8], x9m22);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[9], x10m21);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[10], x11m20);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[11], x12m19);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[12], x13m18);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[13], x14m17);
let m0130b = WasmVector::fmadd(m0130b, self.twiddles_im[14], x15m16);
let [y01, y30] = WasmVector::column_butterfly2([m0130a, m0130b]);
let m0229a = WasmVector::fmadd(values[0], self.twiddles_re[1], x1p30);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[3], x2p29);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[5], x3p28);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[7], x4p27);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[9], x5p26);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[11], x6p25);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[13], x7p24);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[14], x8p23);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[12], x9p22);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[10], x10p21);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[8], x11p20);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[6], x12p19);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[4], x13p18);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[2], x14p17);
let m0229a = WasmVector::fmadd(m0229a, self.twiddles_re[0], x15p16);
let m0229b = WasmVector::mul(self.twiddles_im[1], x1m30);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[3], x2m29);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[5], x3m28);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[7], x4m27);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[9], x5m26);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[11], x6m25);
let m0229b = WasmVector::fmadd(m0229b, self.twiddles_im[13], x7m24);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[14], x8m23);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[12], x9m22);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[10], x10m21);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[8], x11m20);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[6], x12m19);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[4], x13m18);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[2], x14m17);
let m0229b = WasmVector::nmadd(m0229b, self.twiddles_im[0], x15m16);
let [y02, y29] = WasmVector::column_butterfly2([m0229a, m0229b]);
let m0328a = WasmVector::fmadd(values[0], self.twiddles_re[2], x1p30);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[5], x2p29);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[8], x3p28);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[11], x4p27);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[14], x5p26);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[12], x6p25);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[9], x7p24);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[6], x8p23);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[3], x9p22);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[0], x10p21);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[1], x11p20);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[4], x12p19);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[7], x13p18);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[10], x14p17);
let m0328a = WasmVector::fmadd(m0328a, self.twiddles_re[13], x15p16);
let m0328b = WasmVector::mul(self.twiddles_im[2], x1m30);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[5], x2m29);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[8], x3m28);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[11], x4m27);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[14], x5m26);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[12], x6m25);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[9], x7m24);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[6], x8m23);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[3], x9m22);
let m0328b = WasmVector::nmadd(m0328b, self.twiddles_im[0], x10m21);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[1], x11m20);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[4], x12m19);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[7], x13m18);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[10], x14m17);
let m0328b = WasmVector::fmadd(m0328b, self.twiddles_im[13], x15m16);
let [y03, y28] = WasmVector::column_butterfly2([m0328a, m0328b]);
let m0427a = WasmVector::fmadd(values[0], self.twiddles_re[3], x1p30);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[7], x2p29);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[11], x3p28);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[14], x4p27);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[10], x5p26);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[6], x6p25);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[2], x7p24);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[0], x8p23);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[4], x9p22);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[8], x10p21);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[12], x11p20);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[13], x12p19);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[9], x13p18);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[5], x14p17);
let m0427a = WasmVector::fmadd(m0427a, self.twiddles_re[1], x15p16);
let m0427b = WasmVector::mul(self.twiddles_im[3], x1m30);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[7], x2m29);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[11], x3m28);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[14], x4m27);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[10], x5m26);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[6], x6m25);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[2], x7m24);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[0], x8m23);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[4], x9m22);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[8], x10m21);
let m0427b = WasmVector::fmadd(m0427b, self.twiddles_im[12], x11m20);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[13], x12m19);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[9], x13m18);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[5], x14m17);
let m0427b = WasmVector::nmadd(m0427b, self.twiddles_im[1], x15m16);
let [y04, y27] = WasmVector::column_butterfly2([m0427a, m0427b]);
let m0526a = WasmVector::fmadd(values[0], self.twiddles_re[4], x1p30);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[9], x2p29);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[14], x3p28);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[10], x4p27);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[5], x5p26);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[0], x6p25);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[3], x7p24);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[8], x8p23);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[13], x9p22);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[11], x10p21);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[6], x11p20);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[1], x12p19);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[2], x13p18);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[7], x14p17);
let m0526a = WasmVector::fmadd(m0526a, self.twiddles_re[12], x15p16);
let m0526b = WasmVector::mul(self.twiddles_im[4], x1m30);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[9], x2m29);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[14], x3m28);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[10], x4m27);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[5], x5m26);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[0], x6m25);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[3], x7m24);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[8], x8m23);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[13], x9m22);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[11], x10m21);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[6], x11m20);
let m0526b = WasmVector::nmadd(m0526b, self.twiddles_im[1], x12m19);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[2], x13m18);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[7], x14m17);
let m0526b = WasmVector::fmadd(m0526b, self.twiddles_im[12], x15m16);
let [y05, y26] = WasmVector::column_butterfly2([m0526a, m0526b]);
let m0625a = WasmVector::fmadd(values[0], self.twiddles_re[5], x1p30);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[11], x2p29);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[12], x3p28);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[6], x4p27);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[0], x5p26);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[4], x6p25);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[10], x7p24);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[13], x8p23);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[7], x9p22);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[1], x10p21);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[3], x11p20);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[9], x12p19);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[14], x13p18);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[8], x14p17);
let m0625a = WasmVector::fmadd(m0625a, self.twiddles_re[2], x15p16);
let m0625b = WasmVector::mul(self.twiddles_im[5], x1m30);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[11], x2m29);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[12], x3m28);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[6], x4m27);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[0], x5m26);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[4], x6m25);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[10], x7m24);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[13], x8m23);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[7], x9m22);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[1], x10m21);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[3], x11m20);
let m0625b = WasmVector::fmadd(m0625b, self.twiddles_im[9], x12m19);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[14], x13m18);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[8], x14m17);
let m0625b = WasmVector::nmadd(m0625b, self.twiddles_im[2], x15m16);
let [y06, y25] = WasmVector::column_butterfly2([m0625a, m0625b]);
let m0724a = WasmVector::fmadd(values[0], self.twiddles_re[6], x1p30);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[13], x2p29);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[9], x3p28);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[2], x4p27);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[3], x5p26);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[10], x6p25);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[12], x7p24);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[5], x8p23);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[0], x9p22);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[7], x10p21);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[14], x11p20);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[8], x12p19);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[1], x13p18);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[4], x14p17);
let m0724a = WasmVector::fmadd(m0724a, self.twiddles_re[11], x15p16);
let m0724b = WasmVector::mul(self.twiddles_im[6], x1m30);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[13], x2m29);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[9], x3m28);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[2], x4m27);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[3], x5m26);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[10], x6m25);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[12], x7m24);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[5], x8m23);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[0], x9m22);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[7], x10m21);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[14], x11m20);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[8], x12m19);
let m0724b = WasmVector::nmadd(m0724b, self.twiddles_im[1], x13m18);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[4], x14m17);
let m0724b = WasmVector::fmadd(m0724b, self.twiddles_im[11], x15m16);
let [y07, y24] = WasmVector::column_butterfly2([m0724a, m0724b]);
let m0823a = WasmVector::fmadd(values[0], self.twiddles_re[7], x1p30);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[14], x2p29);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[6], x3p28);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[0], x4p27);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[8], x5p26);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[13], x6p25);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[5], x7p24);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[1], x8p23);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[9], x9p22);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[12], x10p21);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[4], x11p20);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[2], x12p19);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[10], x13p18);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[11], x14p17);
let m0823a = WasmVector::fmadd(m0823a, self.twiddles_re[3], x15p16);
let m0823b = WasmVector::mul(self.twiddles_im[7], x1m30);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[14], x2m29);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[6], x3m28);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[0], x4m27);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[8], x5m26);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[13], x6m25);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[5], x7m24);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[1], x8m23);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[9], x9m22);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[12], x10m21);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[4], x11m20);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[2], x12m19);
let m0823b = WasmVector::fmadd(m0823b, self.twiddles_im[10], x13m18);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[11], x14m17);
let m0823b = WasmVector::nmadd(m0823b, self.twiddles_im[3], x15m16);
let [y08, y23] = WasmVector::column_butterfly2([m0823a, m0823b]);
let m0922a = WasmVector::fmadd(values[0], self.twiddles_re[8], x1p30);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[12], x2p29);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[3], x3p28);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[4], x4p27);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[13], x5p26);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[7], x6p25);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[0], x7p24);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[9], x8p23);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[11], x9p22);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[2], x10p21);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[5], x11p20);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[14], x12p19);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[6], x13p18);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[1], x14p17);
let m0922a = WasmVector::fmadd(m0922a, self.twiddles_re[10], x15p16);
let m0922b = WasmVector::mul(self.twiddles_im[8], x1m30);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[12], x2m29);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[3], x3m28);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[4], x4m27);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[13], x5m26);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[7], x6m25);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[0], x7m24);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[9], x8m23);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[11], x9m22);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[2], x10m21);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[5], x11m20);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[14], x12m19);
let m0922b = WasmVector::nmadd(m0922b, self.twiddles_im[6], x13m18);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[1], x14m17);
let m0922b = WasmVector::fmadd(m0922b, self.twiddles_im[10], x15m16);
let [y09, y22] = WasmVector::column_butterfly2([m0922a, m0922b]);
let m1021a = WasmVector::fmadd(values[0], self.twiddles_re[9], x1p30);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[10], x2p29);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[0], x3p28);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[8], x4p27);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[11], x5p26);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[1], x6p25);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[7], x7p24);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[12], x8p23);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[2], x9p22);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[6], x10p21);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[13], x11p20);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[3], x12p19);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[5], x13p18);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[14], x14p17);
let m1021a = WasmVector::fmadd(m1021a, self.twiddles_re[4], x15p16);
let m1021b = WasmVector::mul(self.twiddles_im[9], x1m30);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[10], x2m29);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[0], x3m28);
let m1021b = WasmVector::fmadd(m1021b, self.twiddles_im[8], x4m27);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[11], x5m26);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[1], x6m25);
let m1021b = WasmVector::fmadd(m1021b, self.twiddles_im[7], x7m24);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[12], x8m23);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[2], x9m22);
let m1021b = WasmVector::fmadd(m1021b, self.twiddles_im[6], x10m21);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[13], x11m20);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[3], x12m19);
let m1021b = WasmVector::fmadd(m1021b, self.twiddles_im[5], x13m18);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[14], x14m17);
let m1021b = WasmVector::nmadd(m1021b, self.twiddles_im[4], x15m16);
let [y10, y21] = WasmVector::column_butterfly2([m1021a, m1021b]);
let m1120a = WasmVector::fmadd(values[0], self.twiddles_re[10], x1p30);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[8], x2p29);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[1], x3p28);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[12], x4p27);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[6], x5p26);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[3], x6p25);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[14], x7p24);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[4], x8p23);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[5], x9p22);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[13], x10p21);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[2], x11p20);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[7], x12p19);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[11], x13p18);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[0], x14p17);
let m1120a = WasmVector::fmadd(m1120a, self.twiddles_re[9], x15p16);
let m1120b = WasmVector::mul(self.twiddles_im[10], x1m30);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[8], x2m29);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[1], x3m28);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[12], x4m27);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[6], x5m26);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[3], x6m25);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[14], x7m24);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[4], x8m23);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[5], x9m22);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[13], x10m21);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[2], x11m20);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[7], x12m19);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[11], x13m18);
let m1120b = WasmVector::nmadd(m1120b, self.twiddles_im[0], x14m17);
let m1120b = WasmVector::fmadd(m1120b, self.twiddles_im[9], x15m16);
let [y11, y20] = WasmVector::column_butterfly2([m1120a, m1120b]);
let m1219a = WasmVector::fmadd(values[0], self.twiddles_re[11], x1p30);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[6], x2p29);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[4], x3p28);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[13], x4p27);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[1], x5p26);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[9], x6p25);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[8], x7p24);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[2], x8p23);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[14], x9p22);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[3], x10p21);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[7], x11p20);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[10], x12p19);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[0], x13p18);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[12], x14p17);
let m1219a = WasmVector::fmadd(m1219a, self.twiddles_re[5], x15p16);
let m1219b = WasmVector::mul(self.twiddles_im[11], x1m30);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[6], x2m29);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[4], x3m28);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[13], x4m27);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[1], x5m26);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[9], x6m25);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[8], x7m24);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[2], x8m23);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[14], x9m22);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[3], x10m21);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[7], x11m20);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[10], x12m19);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[0], x13m18);
let m1219b = WasmVector::fmadd(m1219b, self.twiddles_im[12], x14m17);
let m1219b = WasmVector::nmadd(m1219b, self.twiddles_im[5], x15m16);
let [y12, y19] = WasmVector::column_butterfly2([m1219a, m1219b]);
let m1318a = WasmVector::fmadd(values[0], self.twiddles_re[12], x1p30);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[4], x2p29);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[7], x3p28);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[9], x4p27);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[2], x5p26);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[14], x6p25);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[1], x7p24);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[10], x8p23);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[6], x9p22);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[5], x10p21);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[11], x11p20);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[0], x12p19);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[13], x13p18);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[3], x14p17);
let m1318a = WasmVector::fmadd(m1318a, self.twiddles_re[8], x15p16);
let m1318b = WasmVector::mul(self.twiddles_im[12], x1m30);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[4], x2m29);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[7], x3m28);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[9], x4m27);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[2], x5m26);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[14], x6m25);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[1], x7m24);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[10], x8m23);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[6], x9m22);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[5], x10m21);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[11], x11m20);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[0], x12m19);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[13], x13m18);
let m1318b = WasmVector::nmadd(m1318b, self.twiddles_im[3], x14m17);
let m1318b = WasmVector::fmadd(m1318b, self.twiddles_im[8], x15m16);
let [y13, y18] = WasmVector::column_butterfly2([m1318a, m1318b]);
let m1417a = WasmVector::fmadd(values[0], self.twiddles_re[13], x1p30);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[2], x2p29);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[10], x3p28);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[5], x4p27);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[7], x5p26);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[8], x6p25);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[4], x7p24);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[11], x8p23);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[1], x9p22);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[14], x10p21);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[0], x11p20);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[12], x12p19);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[3], x13p18);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[9], x14p17);
let m1417a = WasmVector::fmadd(m1417a, self.twiddles_re[6], x15p16);
let m1417b = WasmVector::mul(self.twiddles_im[13], x1m30);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[2], x2m29);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[10], x3m28);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[5], x4m27);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[7], x5m26);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[8], x6m25);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[4], x7m24);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[11], x8m23);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[1], x9m22);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[14], x10m21);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[0], x11m20);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[12], x12m19);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[3], x13m18);
let m1417b = WasmVector::fmadd(m1417b, self.twiddles_im[9], x14m17);
let m1417b = WasmVector::nmadd(m1417b, self.twiddles_im[6], x15m16);
let [y14, y17] = WasmVector::column_butterfly2([m1417a, m1417b]);
let m1516a = WasmVector::fmadd(values[0], self.twiddles_re[14], x1p30);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[0], x2p29);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[13], x3p28);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[1], x4p27);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[12], x5p26);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[2], x6p25);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[11], x7p24);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[3], x8p23);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[10], x9p22);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[4], x10p21);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[9], x11p20);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[5], x12p19);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[8], x13p18);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[6], x14p17);
let m1516a = WasmVector::fmadd(m1516a, self.twiddles_re[7], x15p16);
let m1516b = WasmVector::mul(self.twiddles_im[14], x1m30);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[0], x2m29);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[13], x3m28);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[1], x4m27);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[12], x5m26);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[2], x6m25);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[11], x7m24);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[3], x8m23);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[10], x9m22);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[4], x10m21);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[9], x11m20);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[5], x12m19);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[8], x13m18);
let m1516b = WasmVector::nmadd(m1516b, self.twiddles_im[6], x14m17);
let m1516b = WasmVector::fmadd(m1516b, self.twiddles_im[7], x15m16);
let [y15, y16] = WasmVector::column_butterfly2([m1516a, m1516b]);
[y00, y01, y02, y03, y04, y05, y06, y07, y08, y09, y10, y11, y12, y13, y14, y15, y16, y17, y18, y19, y20, y21, y22, y23, y24, y25, y26, y27, y28, y29, y30]
}
}
#[cfg(test)]
mod unit_tests {
use super::*;
use crate::test_utils::check_fft_algorithm;
use wasm_bindgen_test::wasm_bindgen_test;
macro_rules! test_butterfly_32_func {
($test_name:ident, $struct_name:ident, $size:expr) => {
#[wasm_bindgen_test]
fn $test_name() {
let fwd = unsafe { $struct_name::new(FftDirection::Forward) };
check_fft_algorithm::<f32>(&fwd, $size, FftDirection::Forward);
let inv = unsafe { $struct_name::new(FftDirection::Inverse) };
check_fft_algorithm::<f32>(&inv, $size, FftDirection::Inverse);
}
};
}
macro_rules! test_butterfly_64_func {
($test_name:ident, $struct_name:ident, $size:expr) => {
#[wasm_bindgen_test]
fn $test_name() {
let fwd = unsafe { $struct_name::new(FftDirection::Forward) };
check_fft_algorithm::<f64>(&fwd, $size, FftDirection::Forward);
let inv = unsafe { $struct_name::new(FftDirection::Inverse) };
check_fft_algorithm::<f64>(&inv, $size, FftDirection::Inverse);
}
};
}
test_butterfly_32_func!(test_wasm_simdf32_butterfly7, WasmSimdF32Butterfly7, 7);
test_butterfly_32_func!(test_wasm_simdf32_butterfly11, WasmSimdF32Butterfly11, 11);
test_butterfly_32_func!(test_wasm_simdf32_butterfly13, WasmSimdF32Butterfly13, 13);
test_butterfly_32_func!(test_wasm_simdf32_butterfly17, WasmSimdF32Butterfly17, 17);
test_butterfly_32_func!(test_wasm_simdf32_butterfly19, WasmSimdF32Butterfly19, 19);
test_butterfly_32_func!(test_wasm_simdf32_butterfly23, WasmSimdF32Butterfly23, 23);
test_butterfly_32_func!(test_wasm_simdf32_butterfly29, WasmSimdF32Butterfly29, 29);
test_butterfly_32_func!(test_wasm_simdf32_butterfly31, WasmSimdF32Butterfly31, 31);
test_butterfly_64_func!(test_wasm_simdf64_butterfly7, WasmSimdF64Butterfly7, 7);
test_butterfly_64_func!(test_wasm_simdf64_butterfly11, WasmSimdF64Butterfly11, 11);
test_butterfly_64_func!(test_wasm_simdf64_butterfly13, WasmSimdF64Butterfly13, 13);
test_butterfly_64_func!(test_wasm_simdf64_butterfly17, WasmSimdF64Butterfly17, 17);
test_butterfly_64_func!(test_wasm_simdf64_butterfly19, WasmSimdF64Butterfly19, 19);
test_butterfly_64_func!(test_wasm_simdf64_butterfly23, WasmSimdF64Butterfly23, 23);
test_butterfly_64_func!(test_wasm_simdf64_butterfly29, WasmSimdF64Butterfly29, 29);
test_butterfly_64_func!(test_wasm_simdf64_butterfly31, WasmSimdF64Butterfly31, 31);
}