1use num_complex::Complex;
2
3use crate::{common::FftNum, FftDirection};
4
5use crate::array_utils::{DoubleBuf, LoadStore};
6use crate::twiddles;
7use crate::{Direction, Fft, Length};
8
9#[allow(unused)]
10macro_rules! boilerplate_fft_butterfly {
11 ($struct_name:ident, $len:expr, $direction_fn:expr) => {
12 impl<T: FftNum> $struct_name<T> {
13 #[inline(always)]
14 pub(crate) unsafe fn perform_fft_butterfly(&self, buffer: impl LoadStore<T>) {
15 self.perform_fft_contiguous(buffer);
16 }
17 }
18 impl<T: FftNum> Fft<T> for $struct_name<T> {
19 #[inline]
20 fn process_immutable_with_scratch(
21 &self,
22 input: &[Complex<T>],
23 output: &mut [Complex<T>],
24 _scratch: &mut [Complex<T>],
25 ) {
26 crate::fft_helper::fft_helper_immut(
27 input,
28 output,
29 &mut [],
30 self.len(),
31 0,
32 |in_chunk, out_chunk, _| unsafe {
33 self.perform_fft_butterfly(DoubleBuf {
34 input: in_chunk,
35 output: out_chunk,
36 })
37 },
38 );
39 }
40 fn process_outofplace_with_scratch(
41 &self,
42 input: &mut [Complex<T>],
43 output: &mut [Complex<T>],
44 _scratch: &mut [Complex<T>],
45 ) {
46 crate::fft_helper::fft_helper_outofplace(
47 input,
48 output,
49 &mut [],
50 self.len(),
51 0,
52 |in_chunk, out_chunk, _| unsafe {
53 self.perform_fft_butterfly(DoubleBuf {
54 input: in_chunk,
55 output: out_chunk,
56 })
57 },
58 );
59 }
60 fn process_with_scratch(&self, buffer: &mut [Complex<T>], _scratch: &mut [Complex<T>]) {
61 crate::fft_helper::fft_helper_inplace(
62 buffer,
63 &mut [],
64 self.len(),
65 0,
66 |chunk, _| unsafe { self.perform_fft_butterfly(chunk) },
67 );
68 }
69 #[inline(always)]
70 fn get_inplace_scratch_len(&self) -> usize {
71 0
72 }
73 #[inline(always)]
74 fn get_outofplace_scratch_len(&self) -> usize {
75 0
76 }
77 #[inline(always)]
78 fn get_immutable_scratch_len(&self) -> usize {
79 0
80 }
81 }
82 impl<T> Length for $struct_name<T> {
83 #[inline(always)]
84 fn len(&self) -> usize {
85 $len
86 }
87 }
88 impl<T> Direction for $struct_name<T> {
89 #[inline(always)]
90 fn fft_direction(&self) -> FftDirection {
91 $direction_fn(self)
92 }
93 }
94 };
95}
96
97pub struct Butterfly1<T> {
98 direction: FftDirection,
99 _phantom: std::marker::PhantomData<T>,
100}
101impl<T: FftNum> Butterfly1<T> {
102 #[inline(always)]
103 pub fn new(direction: FftDirection) -> Self {
104 Self {
105 direction,
106 _phantom: std::marker::PhantomData,
107 }
108 }
109}
110impl<T: FftNum> Fft<T> for Butterfly1<T> {
111 fn process_immutable_with_scratch(
112 &self,
113 input: &[Complex<T>],
114 output: &mut [Complex<T>],
115 _scratch: &mut [Complex<T>],
116 ) {
117 output.copy_from_slice(input);
118 }
119
120 fn process_outofplace_with_scratch(
121 &self,
122 input: &mut [Complex<T>],
123 output: &mut [Complex<T>],
124 _scratch: &mut [Complex<T>],
125 ) {
126 output.copy_from_slice(input);
127 }
128
129 fn process_with_scratch(&self, _buffer: &mut [Complex<T>], _scratch: &mut [Complex<T>]) {}
130
131 fn get_inplace_scratch_len(&self) -> usize {
132 0
133 }
134
135 fn get_outofplace_scratch_len(&self) -> usize {
136 0
137 }
138
139 fn get_immutable_scratch_len(&self) -> usize {
140 0
141 }
142}
143impl<T> Length for Butterfly1<T> {
144 fn len(&self) -> usize {
145 1
146 }
147}
148impl<T> Direction for Butterfly1<T> {
149 fn fft_direction(&self) -> FftDirection {
150 self.direction
151 }
152}
153
154pub struct Butterfly2<T> {
155 direction: FftDirection,
156 _phantom: std::marker::PhantomData<T>,
157}
158boilerplate_fft_butterfly!(Butterfly2, 2, |this: &Butterfly2<_>| this.direction);
159impl<T: FftNum> Butterfly2<T> {
160 #[inline(always)]
161 pub fn new(direction: FftDirection) -> Self {
162 Self {
163 direction,
164 _phantom: std::marker::PhantomData,
165 }
166 }
167 #[inline(always)]
168 unsafe fn perform_fft_strided(left: &mut Complex<T>, right: &mut Complex<T>) {
169 let temp = *left + *right;
170
171 *right = *left - *right;
172 *left = temp;
173 }
174 #[inline(always)]
175 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
176 let value0 = buffer.load(0);
177 let value1 = buffer.load(1);
178 buffer.store(value0 + value1, 0);
179 buffer.store(value0 - value1, 1);
180 }
181}
182
183pub struct Butterfly3<T> {
184 pub twiddle: Complex<T>,
185 direction: FftDirection,
186}
187boilerplate_fft_butterfly!(Butterfly3, 3, |this: &Butterfly3<_>| this.direction);
188impl<T: FftNum> Butterfly3<T> {
189 #[inline(always)]
190 pub fn new(direction: FftDirection) -> Self {
191 Self {
192 twiddle: twiddles::compute_twiddle(1, 3, direction),
193 direction,
194 }
195 }
196 #[inline(always)]
197 pub fn direction_of(fft: &Butterfly3<T>) -> Self {
198 Self {
199 twiddle: fft.twiddle.conj(),
200 direction: fft.direction.opposite_direction(),
201 }
202 }
203 #[inline(always)]
204 unsafe fn perform_fft_strided(
205 &self,
206 val0: &mut Complex<T>,
207 val1: &mut Complex<T>,
208 val2: &mut Complex<T>,
209 ) {
210 let xp = *val1 + *val2;
211 let xn = *val1 - *val2;
212 let sum = *val0 + xp;
213
214 let temp_a = *val0
215 + Complex {
216 re: self.twiddle.re * xp.re,
217 im: self.twiddle.re * xp.im,
218 };
219 let temp_b = Complex {
220 re: -self.twiddle.im * xn.im,
221 im: self.twiddle.im * xn.re,
222 };
223
224 *val0 = sum;
225 *val1 = temp_a + temp_b;
226 *val2 = temp_a - temp_b;
227 }
228
229 #[inline(always)]
230 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
231 let xp = buffer.load(1) + buffer.load(2);
232 let xn = buffer.load(1) - buffer.load(2);
233 let sum = buffer.load(0) + xp;
234
235 let temp_a = buffer.load(0)
236 + Complex {
237 re: self.twiddle.re * xp.re,
238 im: self.twiddle.re * xp.im,
239 };
240 let temp_b = Complex {
241 re: -self.twiddle.im * xn.im,
242 im: self.twiddle.im * xn.re,
243 };
244
245 buffer.store(sum, 0);
246 buffer.store(temp_a + temp_b, 1);
247 buffer.store(temp_a - temp_b, 2);
248 }
249}
250
251pub struct Butterfly4<T> {
252 direction: FftDirection,
253 _phantom: std::marker::PhantomData<T>,
254}
255boilerplate_fft_butterfly!(Butterfly4, 4, |this: &Butterfly4<_>| this.direction);
256impl<T: FftNum> Butterfly4<T> {
257 #[inline(always)]
258 pub fn new(direction: FftDirection) -> Self {
259 Self {
260 direction,
261 _phantom: std::marker::PhantomData,
262 }
263 }
264 #[inline(always)]
265 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
266 let mut value0 = buffer.load(0);
271 let mut value1 = buffer.load(1);
272 let mut value2 = buffer.load(2);
273 let mut value3 = buffer.load(3);
274
275 Butterfly2::perform_fft_strided(&mut value0, &mut value2);
277 Butterfly2::perform_fft_strided(&mut value1, &mut value3);
278
279 value3 = twiddles::rotate_90(value3, self.direction);
281
282 Butterfly2::perform_fft_strided(&mut value0, &mut value1);
286 Butterfly2::perform_fft_strided(&mut value2, &mut value3);
287
288 buffer.store(value0, 0);
290 buffer.store(value2, 1);
291 buffer.store(value1, 2);
292 buffer.store(value3, 3);
293 }
294
295 #[inline(always)]
296 unsafe fn perform_fft_strided(
297 &self,
298 value0: &mut Complex<T>,
299 value1: &mut Complex<T>,
300 value2: &mut Complex<T>,
301 value3: &mut Complex<T>,
302 ) {
303 Butterfly2::perform_fft_strided(value0, value2);
305 Butterfly2::perform_fft_strided(value1, value3);
306
307 *value3 = twiddles::rotate_90(*value3, self.direction);
309
310 Butterfly2::perform_fft_strided(value0, value1);
314 Butterfly2::perform_fft_strided(value2, value3);
315
316 let temp = *value1;
318 *value1 = *value2;
319 *value2 = temp;
320 }
321}
322
323pub struct Butterfly5<T> {
324 twiddle1: Complex<T>,
325 twiddle2: Complex<T>,
326 direction: FftDirection,
327}
328boilerplate_fft_butterfly!(Butterfly5, 5, |this: &Butterfly5<_>| this.direction);
329impl<T: FftNum> Butterfly5<T> {
330 pub fn new(direction: FftDirection) -> Self {
331 Self {
332 twiddle1: twiddles::compute_twiddle(1, 5, direction),
333 twiddle2: twiddles::compute_twiddle(2, 5, direction),
334 direction,
335 }
336 }
337
338 #[inline(never)] unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
340 let x14p = buffer.load(1) + buffer.load(4);
420 let x14n = buffer.load(1) - buffer.load(4);
421 let x23p = buffer.load(2) + buffer.load(3);
422 let x23n = buffer.load(2) - buffer.load(3);
423 let sum = buffer.load(0) + x14p + x23p;
424 let b14re_a = buffer.load(0).re + self.twiddle1.re * x14p.re + self.twiddle2.re * x23p.re;
425 let b14re_b = self.twiddle1.im * x14n.im + self.twiddle2.im * x23n.im;
426 let b23re_a = buffer.load(0).re + self.twiddle2.re * x14p.re + self.twiddle1.re * x23p.re;
427 let b23re_b = self.twiddle2.im * x14n.im + -self.twiddle1.im * x23n.im;
428
429 let b14im_a = buffer.load(0).im + self.twiddle1.re * x14p.im + self.twiddle2.re * x23p.im;
430 let b14im_b = self.twiddle1.im * x14n.re + self.twiddle2.im * x23n.re;
431 let b23im_a = buffer.load(0).im + self.twiddle2.re * x14p.im + self.twiddle1.re * x23p.im;
432 let b23im_b = self.twiddle2.im * x14n.re + -self.twiddle1.im * x23n.re;
433
434 let out1re = b14re_a - b14re_b;
435 let out1im = b14im_a + b14im_b;
436 let out2re = b23re_a - b23re_b;
437 let out2im = b23im_a + b23im_b;
438 let out3re = b23re_a + b23re_b;
439 let out3im = b23im_a - b23im_b;
440 let out4re = b14re_a + b14re_b;
441 let out4im = b14im_a - b14im_b;
442 buffer.store(sum, 0);
443 buffer.store(
444 Complex {
445 re: out1re,
446 im: out1im,
447 },
448 1,
449 );
450 buffer.store(
451 Complex {
452 re: out2re,
453 im: out2im,
454 },
455 2,
456 );
457 buffer.store(
458 Complex {
459 re: out3re,
460 im: out3im,
461 },
462 3,
463 );
464 buffer.store(
465 Complex {
466 re: out4re,
467 im: out4im,
468 },
469 4,
470 );
471 }
472}
473
474pub struct Butterfly6<T> {
475 butterfly3: Butterfly3<T>,
476}
477boilerplate_fft_butterfly!(Butterfly6, 6, |this: &Butterfly6<_>| this
478 .butterfly3
479 .fft_direction());
480impl<T: FftNum> Butterfly6<T> {
481 #[inline(always)]
482 pub fn new(direction: FftDirection) -> Self {
483 Self {
484 butterfly3: Butterfly3::new(direction),
485 }
486 }
487 #[inline(always)]
488 pub fn direction_of(fft: &Butterfly6<T>) -> Self {
489 Self {
490 butterfly3: Butterfly3::direction_of(&fft.butterfly3),
491 }
492 }
493 #[inline(always)]
494 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
495 let mut scratch_a = [buffer.load(0), buffer.load(2), buffer.load(4)];
500
501 let mut scratch_b = [buffer.load(3), buffer.load(5), buffer.load(1)];
502
503 self.butterfly3.perform_fft_contiguous(&mut scratch_a);
505 self.butterfly3.perform_fft_contiguous(&mut scratch_b);
506
507 Butterfly2::perform_fft_strided(&mut scratch_a[0], &mut scratch_b[0]);
513 Butterfly2::perform_fft_strided(&mut scratch_a[1], &mut scratch_b[1]);
514 Butterfly2::perform_fft_strided(&mut scratch_a[2], &mut scratch_b[2]);
515
516 buffer.store(scratch_a[0], 0);
520 buffer.store(scratch_b[1], 1);
521 buffer.store(scratch_a[2], 2);
522 buffer.store(scratch_b[0], 3);
523 buffer.store(scratch_a[1], 4);
524 buffer.store(scratch_b[2], 5);
525 }
526}
527
528pub struct Butterfly7<T> {
529 twiddle1: Complex<T>,
530 twiddle2: Complex<T>,
531 twiddle3: Complex<T>,
532 direction: FftDirection,
533}
534boilerplate_fft_butterfly!(Butterfly7, 7, |this: &Butterfly7<_>| this.direction);
535impl<T: FftNum> Butterfly7<T> {
536 pub fn new(direction: FftDirection) -> Self {
537 Self {
538 twiddle1: twiddles::compute_twiddle(1, 7, direction),
539 twiddle2: twiddles::compute_twiddle(2, 7, direction),
540 twiddle3: twiddles::compute_twiddle(3, 7, direction),
541 direction,
542 }
543 }
544 #[inline(never)]
545 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
546 let x16p = buffer.load(1) + buffer.load(6);
616 let x16n = buffer.load(1) - buffer.load(6);
617 let x25p = buffer.load(2) + buffer.load(5);
618 let x25n = buffer.load(2) - buffer.load(5);
619 let x34p = buffer.load(3) + buffer.load(4);
620 let x34n = buffer.load(3) - buffer.load(4);
621 let sum = buffer.load(0) + x16p + x25p + x34p;
622
623 let x16re_a = buffer.load(0).re
624 + self.twiddle1.re * x16p.re
625 + self.twiddle2.re * x25p.re
626 + self.twiddle3.re * x34p.re;
627 let x16re_b =
628 self.twiddle1.im * x16n.im + self.twiddle2.im * x25n.im + self.twiddle3.im * x34n.im;
629 let x25re_a = buffer.load(0).re
630 + self.twiddle1.re * x34p.re
631 + self.twiddle2.re * x16p.re
632 + self.twiddle3.re * x25p.re;
633 let x25re_b =
634 -self.twiddle1.im * x34n.im + self.twiddle2.im * x16n.im - self.twiddle3.im * x25n.im;
635 let x34re_a = buffer.load(0).re
636 + self.twiddle1.re * x25p.re
637 + self.twiddle2.re * x34p.re
638 + self.twiddle3.re * x16p.re;
639 let x34re_b =
640 -self.twiddle1.im * x25n.im + self.twiddle2.im * x34n.im + self.twiddle3.im * x16n.im;
641 let x16im_a = buffer.load(0).im
642 + self.twiddle1.re * x16p.im
643 + self.twiddle2.re * x25p.im
644 + self.twiddle3.re * x34p.im;
645 let x16im_b =
646 self.twiddle1.im * x16n.re + self.twiddle2.im * x25n.re + self.twiddle3.im * x34n.re;
647 let x25im_a = buffer.load(0).im
648 + self.twiddle1.re * x34p.im
649 + self.twiddle2.re * x16p.im
650 + self.twiddle3.re * x25p.im;
651 let x25im_b =
652 -self.twiddle1.im * x34n.re + self.twiddle2.im * x16n.re - self.twiddle3.im * x25n.re;
653 let x34im_a = buffer.load(0).im
654 + self.twiddle1.re * x25p.im
655 + self.twiddle2.re * x34p.im
656 + self.twiddle3.re * x16p.im;
657 let x34im_b =
658 self.twiddle1.im * x25n.re - self.twiddle2.im * x34n.re - self.twiddle3.im * x16n.re;
659
660 let out1re = x16re_a - x16re_b;
661 let out1im = x16im_a + x16im_b;
662 let out2re = x25re_a - x25re_b;
663 let out2im = x25im_a + x25im_b;
664 let out3re = x34re_a - x34re_b;
665 let out3im = x34im_a - x34im_b;
666 let out4re = x34re_a + x34re_b;
667 let out4im = x34im_a + x34im_b;
668 let out5re = x25re_a + x25re_b;
669 let out5im = x25im_a - x25im_b;
670 let out6re = x16re_a + x16re_b;
671 let out6im = x16im_a - x16im_b;
672
673 buffer.store(sum, 0);
674 buffer.store(
675 Complex {
676 re: out1re,
677 im: out1im,
678 },
679 1,
680 );
681 buffer.store(
682 Complex {
683 re: out2re,
684 im: out2im,
685 },
686 2,
687 );
688 buffer.store(
689 Complex {
690 re: out3re,
691 im: out3im,
692 },
693 3,
694 );
695 buffer.store(
696 Complex {
697 re: out4re,
698 im: out4im,
699 },
700 4,
701 );
702 buffer.store(
703 Complex {
704 re: out5re,
705 im: out5im,
706 },
707 5,
708 );
709 buffer.store(
710 Complex {
711 re: out6re,
712 im: out6im,
713 },
714 6,
715 );
716 }
717}
718
719pub struct Butterfly8<T> {
720 root2: T,
721 direction: FftDirection,
722}
723boilerplate_fft_butterfly!(Butterfly8, 8, |this: &Butterfly8<_>| this.direction);
724impl<T: FftNum> Butterfly8<T> {
725 #[inline(always)]
726 pub fn new(direction: FftDirection) -> Self {
727 Self {
728 root2: T::from_f64(0.5f64.sqrt()).unwrap(),
729 direction,
730 }
731 }
732
733 #[inline(always)]
734 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
735 let butterfly4 = Butterfly4::new(self.direction);
736
737 let mut scratch0 = [
742 buffer.load(0),
743 buffer.load(2),
744 buffer.load(4),
745 buffer.load(6),
746 ];
747 let mut scratch1 = [
748 buffer.load(1),
749 buffer.load(3),
750 buffer.load(5),
751 buffer.load(7),
752 ];
753
754 butterfly4.perform_fft_contiguous(&mut scratch0);
756 butterfly4.perform_fft_contiguous(&mut scratch1);
757
758 scratch1[1] = (twiddles::rotate_90(scratch1[1], self.direction) + scratch1[1]) * self.root2;
760 scratch1[2] = twiddles::rotate_90(scratch1[2], self.direction);
761 scratch1[3] = (twiddles::rotate_90(scratch1[3], self.direction) - scratch1[3]) * self.root2;
762
763 for i in 0..4 {
767 Butterfly2::perform_fft_strided(&mut scratch0[i], &mut scratch1[i]);
768 }
769
770 for i in 0..4 {
772 buffer.store(scratch0[i], i);
773 }
774 for i in 0..4 {
775 buffer.store(scratch1[i], i + 4);
776 }
777 }
778}
779
780pub struct Butterfly9<T> {
781 butterfly3: Butterfly3<T>,
782 twiddle1: Complex<T>,
783 twiddle2: Complex<T>,
784 twiddle4: Complex<T>,
785}
786boilerplate_fft_butterfly!(Butterfly9, 9, |this: &Butterfly9<_>| this
787 .butterfly3
788 .fft_direction());
789impl<T: FftNum> Butterfly9<T> {
790 #[inline(always)]
791 pub fn new(direction: FftDirection) -> Self {
792 Self {
793 butterfly3: Butterfly3::new(direction),
794 twiddle1: twiddles::compute_twiddle(1, 9, direction),
795 twiddle2: twiddles::compute_twiddle(2, 9, direction),
796 twiddle4: twiddles::compute_twiddle(4, 9, direction),
797 }
798 }
799 #[inline(always)]
800 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
801 let mut scratch0 = [buffer.load(0), buffer.load(3), buffer.load(6)];
805 let mut scratch1 = [buffer.load(1), buffer.load(4), buffer.load(7)];
806 let mut scratch2 = [buffer.load(2), buffer.load(5), buffer.load(8)];
807
808 self.butterfly3.perform_fft_contiguous(&mut scratch0);
810 self.butterfly3.perform_fft_contiguous(&mut scratch1);
811 self.butterfly3.perform_fft_contiguous(&mut scratch2);
812
813 scratch1[1] = scratch1[1] * self.twiddle1;
815 scratch1[2] = scratch1[2] * self.twiddle2;
816 scratch2[1] = scratch2[1] * self.twiddle2;
817 scratch2[2] = scratch2[2] * self.twiddle4;
818
819 self.butterfly3
823 .perform_fft_strided(&mut scratch0[0], &mut scratch1[0], &mut scratch2[0]);
824 self.butterfly3
825 .perform_fft_strided(&mut scratch0[1], &mut scratch1[1], &mut scratch2[1]);
826 self.butterfly3
827 .perform_fft_strided(&mut scratch0[2], &mut scratch1[2], &mut scratch2[2]);
828
829 buffer.store(scratch0[0], 0);
831 buffer.store(scratch0[1], 1);
832 buffer.store(scratch0[2], 2);
833 buffer.store(scratch1[0], 3);
834 buffer.store(scratch1[1], 4);
835 buffer.store(scratch1[2], 5);
836 buffer.store(scratch2[0], 6);
837 buffer.store(scratch2[1], 7);
838 buffer.store(scratch2[2], 8);
839 }
840}
841
842pub struct Butterfly11<T> {
843 twiddle1: Complex<T>,
844 twiddle2: Complex<T>,
845 twiddle3: Complex<T>,
846 twiddle4: Complex<T>,
847 twiddle5: Complex<T>,
848 direction: FftDirection,
849}
850boilerplate_fft_butterfly!(Butterfly11, 11, |this: &Butterfly11<_>| this.direction);
851impl<T: FftNum> Butterfly11<T> {
852 pub fn new(direction: FftDirection) -> Self {
853 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 11, direction);
854 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 11, direction);
855 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 11, direction);
856 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 11, direction);
857 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 11, direction);
858 Self {
859 twiddle1,
860 twiddle2,
861 twiddle3,
862 twiddle4,
863 twiddle5,
864 direction,
865 }
866 }
867
868 #[inline(never)]
869 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
870 let x110p = buffer.load(1) + buffer.load(10);
875 let x110n = buffer.load(1) - buffer.load(10);
876 let x29p = buffer.load(2) + buffer.load(9);
877 let x29n = buffer.load(2) - buffer.load(9);
878 let x38p = buffer.load(3) + buffer.load(8);
879 let x38n = buffer.load(3) - buffer.load(8);
880 let x47p = buffer.load(4) + buffer.load(7);
881 let x47n = buffer.load(4) - buffer.load(7);
882 let x56p = buffer.load(5) + buffer.load(6);
883 let x56n = buffer.load(5) - buffer.load(6);
884 let sum = buffer.load(0) + x110p + x29p + x38p + x47p + x56p;
885 let b110re_a = buffer.load(0).re
886 + self.twiddle1.re * x110p.re
887 + self.twiddle2.re * x29p.re
888 + self.twiddle3.re * x38p.re
889 + self.twiddle4.re * x47p.re
890 + self.twiddle5.re * x56p.re;
891 let b110re_b = self.twiddle1.im * x110n.im
892 + self.twiddle2.im * x29n.im
893 + self.twiddle3.im * x38n.im
894 + self.twiddle4.im * x47n.im
895 + self.twiddle5.im * x56n.im;
896 let b29re_a = buffer.load(0).re
897 + self.twiddle2.re * x110p.re
898 + self.twiddle4.re * x29p.re
899 + self.twiddle5.re * x38p.re
900 + self.twiddle3.re * x47p.re
901 + self.twiddle1.re * x56p.re;
902 let b29re_b = self.twiddle2.im * x110n.im
903 + self.twiddle4.im * x29n.im
904 + -self.twiddle5.im * x38n.im
905 + -self.twiddle3.im * x47n.im
906 + -self.twiddle1.im * x56n.im;
907 let b38re_a = buffer.load(0).re
908 + self.twiddle3.re * x110p.re
909 + self.twiddle5.re * x29p.re
910 + self.twiddle2.re * x38p.re
911 + self.twiddle1.re * x47p.re
912 + self.twiddle4.re * x56p.re;
913 let b38re_b = self.twiddle3.im * x110n.im
914 + -self.twiddle5.im * x29n.im
915 + -self.twiddle2.im * x38n.im
916 + self.twiddle1.im * x47n.im
917 + self.twiddle4.im * x56n.im;
918 let b47re_a = buffer.load(0).re
919 + self.twiddle4.re * x110p.re
920 + self.twiddle3.re * x29p.re
921 + self.twiddle1.re * x38p.re
922 + self.twiddle5.re * x47p.re
923 + self.twiddle2.re * x56p.re;
924 let b47re_b = self.twiddle4.im * x110n.im
925 + -self.twiddle3.im * x29n.im
926 + self.twiddle1.im * x38n.im
927 + self.twiddle5.im * x47n.im
928 + -self.twiddle2.im * x56n.im;
929 let b56re_a = buffer.load(0).re
930 + self.twiddle5.re * x110p.re
931 + self.twiddle1.re * x29p.re
932 + self.twiddle4.re * x38p.re
933 + self.twiddle2.re * x47p.re
934 + self.twiddle3.re * x56p.re;
935 let b56re_b = self.twiddle5.im * x110n.im
936 + -self.twiddle1.im * x29n.im
937 + self.twiddle4.im * x38n.im
938 + -self.twiddle2.im * x47n.im
939 + self.twiddle3.im * x56n.im;
940
941 let b110im_a = buffer.load(0).im
942 + self.twiddle1.re * x110p.im
943 + self.twiddle2.re * x29p.im
944 + self.twiddle3.re * x38p.im
945 + self.twiddle4.re * x47p.im
946 + self.twiddle5.re * x56p.im;
947 let b110im_b = self.twiddle1.im * x110n.re
948 + self.twiddle2.im * x29n.re
949 + self.twiddle3.im * x38n.re
950 + self.twiddle4.im * x47n.re
951 + self.twiddle5.im * x56n.re;
952 let b29im_a = buffer.load(0).im
953 + self.twiddle2.re * x110p.im
954 + self.twiddle4.re * x29p.im
955 + self.twiddle5.re * x38p.im
956 + self.twiddle3.re * x47p.im
957 + self.twiddle1.re * x56p.im;
958 let b29im_b = self.twiddle2.im * x110n.re
959 + self.twiddle4.im * x29n.re
960 + -self.twiddle5.im * x38n.re
961 + -self.twiddle3.im * x47n.re
962 + -self.twiddle1.im * x56n.re;
963 let b38im_a = buffer.load(0).im
964 + self.twiddle3.re * x110p.im
965 + self.twiddle5.re * x29p.im
966 + self.twiddle2.re * x38p.im
967 + self.twiddle1.re * x47p.im
968 + self.twiddle4.re * x56p.im;
969 let b38im_b = self.twiddle3.im * x110n.re
970 + -self.twiddle5.im * x29n.re
971 + -self.twiddle2.im * x38n.re
972 + self.twiddle1.im * x47n.re
973 + self.twiddle4.im * x56n.re;
974 let b47im_a = buffer.load(0).im
975 + self.twiddle4.re * x110p.im
976 + self.twiddle3.re * x29p.im
977 + self.twiddle1.re * x38p.im
978 + self.twiddle5.re * x47p.im
979 + self.twiddle2.re * x56p.im;
980 let b47im_b = self.twiddle4.im * x110n.re
981 + -self.twiddle3.im * x29n.re
982 + self.twiddle1.im * x38n.re
983 + self.twiddle5.im * x47n.re
984 + -self.twiddle2.im * x56n.re;
985 let b56im_a = buffer.load(0).im
986 + self.twiddle5.re * x110p.im
987 + self.twiddle1.re * x29p.im
988 + self.twiddle4.re * x38p.im
989 + self.twiddle2.re * x47p.im
990 + self.twiddle3.re * x56p.im;
991 let b56im_b = self.twiddle5.im * x110n.re
992 + -self.twiddle1.im * x29n.re
993 + self.twiddle4.im * x38n.re
994 + -self.twiddle2.im * x47n.re
995 + self.twiddle3.im * x56n.re;
996
997 let out1re = b110re_a - b110re_b;
998 let out1im = b110im_a + b110im_b;
999 let out2re = b29re_a - b29re_b;
1000 let out2im = b29im_a + b29im_b;
1001 let out3re = b38re_a - b38re_b;
1002 let out3im = b38im_a + b38im_b;
1003 let out4re = b47re_a - b47re_b;
1004 let out4im = b47im_a + b47im_b;
1005 let out5re = b56re_a - b56re_b;
1006 let out5im = b56im_a + b56im_b;
1007 let out6re = b56re_a + b56re_b;
1008 let out6im = b56im_a - b56im_b;
1009 let out7re = b47re_a + b47re_b;
1010 let out7im = b47im_a - b47im_b;
1011 let out8re = b38re_a + b38re_b;
1012 let out8im = b38im_a - b38im_b;
1013 let out9re = b29re_a + b29re_b;
1014 let out9im = b29im_a - b29im_b;
1015 let out10re = b110re_a + b110re_b;
1016 let out10im = b110im_a - b110im_b;
1017 buffer.store(sum, 0);
1018 buffer.store(
1019 Complex {
1020 re: out1re,
1021 im: out1im,
1022 },
1023 1,
1024 );
1025 buffer.store(
1026 Complex {
1027 re: out2re,
1028 im: out2im,
1029 },
1030 2,
1031 );
1032 buffer.store(
1033 Complex {
1034 re: out3re,
1035 im: out3im,
1036 },
1037 3,
1038 );
1039 buffer.store(
1040 Complex {
1041 re: out4re,
1042 im: out4im,
1043 },
1044 4,
1045 );
1046 buffer.store(
1047 Complex {
1048 re: out5re,
1049 im: out5im,
1050 },
1051 5,
1052 );
1053 buffer.store(
1054 Complex {
1055 re: out6re,
1056 im: out6im,
1057 },
1058 6,
1059 );
1060 buffer.store(
1061 Complex {
1062 re: out7re,
1063 im: out7im,
1064 },
1065 7,
1066 );
1067 buffer.store(
1068 Complex {
1069 re: out8re,
1070 im: out8im,
1071 },
1072 8,
1073 );
1074 buffer.store(
1075 Complex {
1076 re: out9re,
1077 im: out9im,
1078 },
1079 9,
1080 );
1081 buffer.store(
1082 Complex {
1083 re: out10re,
1084 im: out10im,
1085 },
1086 10,
1087 );
1088 }
1089}
1090
1091pub struct Butterfly12<T> {
1092 butterfly3: Butterfly3<T>,
1093 butterfly4: Butterfly4<T>,
1094}
1095boilerplate_fft_butterfly!(Butterfly12, 12, |this: &Butterfly12<_>| this
1096 .butterfly3
1097 .fft_direction());
1098impl<T: FftNum> Butterfly12<T> {
1099 #[inline(always)]
1100 pub fn new(direction: FftDirection) -> Self {
1101 Self {
1102 butterfly3: Butterfly3::new(direction),
1103 butterfly4: Butterfly4::new(direction),
1104 }
1105 }
1106 #[inline(always)]
1107 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1108 let mut scratch0 = [
1113 buffer.load(0),
1114 buffer.load(3),
1115 buffer.load(6),
1116 buffer.load(9),
1117 ];
1118 let mut scratch1 = [
1119 buffer.load(4),
1120 buffer.load(7),
1121 buffer.load(10),
1122 buffer.load(1),
1123 ];
1124 let mut scratch2 = [
1125 buffer.load(8),
1126 buffer.load(11),
1127 buffer.load(2),
1128 buffer.load(5),
1129 ];
1130
1131 self.butterfly4.perform_fft_contiguous(&mut scratch0);
1133 self.butterfly4.perform_fft_contiguous(&mut scratch1);
1134 self.butterfly4.perform_fft_contiguous(&mut scratch2);
1135
1136 self.butterfly3
1142 .perform_fft_strided(&mut scratch0[0], &mut scratch1[0], &mut scratch2[0]);
1143 self.butterfly3
1144 .perform_fft_strided(&mut scratch0[1], &mut scratch1[1], &mut scratch2[1]);
1145 self.butterfly3
1146 .perform_fft_strided(&mut scratch0[2], &mut scratch1[2], &mut scratch2[2]);
1147 self.butterfly3
1148 .perform_fft_strided(&mut scratch0[3], &mut scratch1[3], &mut scratch2[3]);
1149
1150 buffer.store(scratch0[0], 0);
1154 buffer.store(scratch1[1], 1);
1155 buffer.store(scratch2[2], 2);
1156 buffer.store(scratch0[3], 3);
1157 buffer.store(scratch1[0], 4);
1158 buffer.store(scratch2[1], 5);
1159 buffer.store(scratch0[2], 6);
1160 buffer.store(scratch1[3], 7);
1161 buffer.store(scratch2[0], 8);
1162 buffer.store(scratch0[1], 9);
1163 buffer.store(scratch1[2], 10);
1164 buffer.store(scratch2[3], 11);
1165 }
1166}
1167
1168pub struct Butterfly13<T> {
1169 twiddle1: Complex<T>,
1170 twiddle2: Complex<T>,
1171 twiddle3: Complex<T>,
1172 twiddle4: Complex<T>,
1173 twiddle5: Complex<T>,
1174 twiddle6: Complex<T>,
1175 direction: FftDirection,
1176}
1177boilerplate_fft_butterfly!(Butterfly13, 13, |this: &Butterfly13<_>| this.direction);
1178impl<T: FftNum> Butterfly13<T> {
1179 pub fn new(direction: FftDirection) -> Self {
1180 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 13, direction);
1181 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 13, direction);
1182 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 13, direction);
1183 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 13, direction);
1184 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 13, direction);
1185 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 13, direction);
1186 Self {
1187 twiddle1,
1188 twiddle2,
1189 twiddle3,
1190 twiddle4,
1191 twiddle5,
1192 twiddle6,
1193 direction,
1194 }
1195 }
1196
1197 #[inline(never)]
1198 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1199 let x112p = buffer.load(1) + buffer.load(12);
1203 let x112n = buffer.load(1) - buffer.load(12);
1204 let x211p = buffer.load(2) + buffer.load(11);
1205 let x211n = buffer.load(2) - buffer.load(11);
1206 let x310p = buffer.load(3) + buffer.load(10);
1207 let x310n = buffer.load(3) - buffer.load(10);
1208 let x49p = buffer.load(4) + buffer.load(9);
1209 let x49n = buffer.load(4) - buffer.load(9);
1210 let x58p = buffer.load(5) + buffer.load(8);
1211 let x58n = buffer.load(5) - buffer.load(8);
1212 let x67p = buffer.load(6) + buffer.load(7);
1213 let x67n = buffer.load(6) - buffer.load(7);
1214 let sum = buffer.load(0) + x112p + x211p + x310p + x49p + x58p + x67p;
1215 let b112re_a = buffer.load(0).re
1216 + self.twiddle1.re * x112p.re
1217 + self.twiddle2.re * x211p.re
1218 + self.twiddle3.re * x310p.re
1219 + self.twiddle4.re * x49p.re
1220 + self.twiddle5.re * x58p.re
1221 + self.twiddle6.re * x67p.re;
1222 let b112re_b = self.twiddle1.im * x112n.im
1223 + self.twiddle2.im * x211n.im
1224 + self.twiddle3.im * x310n.im
1225 + self.twiddle4.im * x49n.im
1226 + self.twiddle5.im * x58n.im
1227 + self.twiddle6.im * x67n.im;
1228 let b211re_a = buffer.load(0).re
1229 + self.twiddle2.re * x112p.re
1230 + self.twiddle4.re * x211p.re
1231 + self.twiddle6.re * x310p.re
1232 + self.twiddle5.re * x49p.re
1233 + self.twiddle3.re * x58p.re
1234 + self.twiddle1.re * x67p.re;
1235 let b211re_b = self.twiddle2.im * x112n.im
1236 + self.twiddle4.im * x211n.im
1237 + self.twiddle6.im * x310n.im
1238 + -self.twiddle5.im * x49n.im
1239 + -self.twiddle3.im * x58n.im
1240 + -self.twiddle1.im * x67n.im;
1241 let b310re_a = buffer.load(0).re
1242 + self.twiddle3.re * x112p.re
1243 + self.twiddle6.re * x211p.re
1244 + self.twiddle4.re * x310p.re
1245 + self.twiddle1.re * x49p.re
1246 + self.twiddle2.re * x58p.re
1247 + self.twiddle5.re * x67p.re;
1248 let b310re_b = self.twiddle3.im * x112n.im
1249 + self.twiddle6.im * x211n.im
1250 + -self.twiddle4.im * x310n.im
1251 + -self.twiddle1.im * x49n.im
1252 + self.twiddle2.im * x58n.im
1253 + self.twiddle5.im * x67n.im;
1254 let b49re_a = buffer.load(0).re
1255 + self.twiddle4.re * x112p.re
1256 + self.twiddle5.re * x211p.re
1257 + self.twiddle1.re * x310p.re
1258 + self.twiddle3.re * x49p.re
1259 + self.twiddle6.re * x58p.re
1260 + self.twiddle2.re * x67p.re;
1261 let b49re_b = self.twiddle4.im * x112n.im
1262 + -self.twiddle5.im * x211n.im
1263 + -self.twiddle1.im * x310n.im
1264 + self.twiddle3.im * x49n.im
1265 + -self.twiddle6.im * x58n.im
1266 + -self.twiddle2.im * x67n.im;
1267 let b58re_a = buffer.load(0).re
1268 + self.twiddle5.re * x112p.re
1269 + self.twiddle3.re * x211p.re
1270 + self.twiddle2.re * x310p.re
1271 + self.twiddle6.re * x49p.re
1272 + self.twiddle1.re * x58p.re
1273 + self.twiddle4.re * x67p.re;
1274 let b58re_b = self.twiddle5.im * x112n.im
1275 + -self.twiddle3.im * x211n.im
1276 + self.twiddle2.im * x310n.im
1277 + -self.twiddle6.im * x49n.im
1278 + -self.twiddle1.im * x58n.im
1279 + self.twiddle4.im * x67n.im;
1280 let b67re_a = buffer.load(0).re
1281 + self.twiddle6.re * x112p.re
1282 + self.twiddle1.re * x211p.re
1283 + self.twiddle5.re * x310p.re
1284 + self.twiddle2.re * x49p.re
1285 + self.twiddle4.re * x58p.re
1286 + self.twiddle3.re * x67p.re;
1287 let b67re_b = self.twiddle6.im * x112n.im
1288 + -self.twiddle1.im * x211n.im
1289 + self.twiddle5.im * x310n.im
1290 + -self.twiddle2.im * x49n.im
1291 + self.twiddle4.im * x58n.im
1292 + -self.twiddle3.im * x67n.im;
1293
1294 let b112im_a = buffer.load(0).im
1295 + self.twiddle1.re * x112p.im
1296 + self.twiddle2.re * x211p.im
1297 + self.twiddle3.re * x310p.im
1298 + self.twiddle4.re * x49p.im
1299 + self.twiddle5.re * x58p.im
1300 + self.twiddle6.re * x67p.im;
1301 let b112im_b = self.twiddle1.im * x112n.re
1302 + self.twiddle2.im * x211n.re
1303 + self.twiddle3.im * x310n.re
1304 + self.twiddle4.im * x49n.re
1305 + self.twiddle5.im * x58n.re
1306 + self.twiddle6.im * x67n.re;
1307 let b211im_a = buffer.load(0).im
1308 + self.twiddle2.re * x112p.im
1309 + self.twiddle4.re * x211p.im
1310 + self.twiddle6.re * x310p.im
1311 + self.twiddle5.re * x49p.im
1312 + self.twiddle3.re * x58p.im
1313 + self.twiddle1.re * x67p.im;
1314 let b211im_b = self.twiddle2.im * x112n.re
1315 + self.twiddle4.im * x211n.re
1316 + self.twiddle6.im * x310n.re
1317 + -self.twiddle5.im * x49n.re
1318 + -self.twiddle3.im * x58n.re
1319 + -self.twiddle1.im * x67n.re;
1320 let b310im_a = buffer.load(0).im
1321 + self.twiddle3.re * x112p.im
1322 + self.twiddle6.re * x211p.im
1323 + self.twiddle4.re * x310p.im
1324 + self.twiddle1.re * x49p.im
1325 + self.twiddle2.re * x58p.im
1326 + self.twiddle5.re * x67p.im;
1327 let b310im_b = self.twiddle3.im * x112n.re
1328 + self.twiddle6.im * x211n.re
1329 + -self.twiddle4.im * x310n.re
1330 + -self.twiddle1.im * x49n.re
1331 + self.twiddle2.im * x58n.re
1332 + self.twiddle5.im * x67n.re;
1333 let b49im_a = buffer.load(0).im
1334 + self.twiddle4.re * x112p.im
1335 + self.twiddle5.re * x211p.im
1336 + self.twiddle1.re * x310p.im
1337 + self.twiddle3.re * x49p.im
1338 + self.twiddle6.re * x58p.im
1339 + self.twiddle2.re * x67p.im;
1340 let b49im_b = self.twiddle4.im * x112n.re
1341 + -self.twiddle5.im * x211n.re
1342 + -self.twiddle1.im * x310n.re
1343 + self.twiddle3.im * x49n.re
1344 + -self.twiddle6.im * x58n.re
1345 + -self.twiddle2.im * x67n.re;
1346 let b58im_a = buffer.load(0).im
1347 + self.twiddle5.re * x112p.im
1348 + self.twiddle3.re * x211p.im
1349 + self.twiddle2.re * x310p.im
1350 + self.twiddle6.re * x49p.im
1351 + self.twiddle1.re * x58p.im
1352 + self.twiddle4.re * x67p.im;
1353 let b58im_b = self.twiddle5.im * x112n.re
1354 + -self.twiddle3.im * x211n.re
1355 + self.twiddle2.im * x310n.re
1356 + -self.twiddle6.im * x49n.re
1357 + -self.twiddle1.im * x58n.re
1358 + self.twiddle4.im * x67n.re;
1359 let b67im_a = buffer.load(0).im
1360 + self.twiddle6.re * x112p.im
1361 + self.twiddle1.re * x211p.im
1362 + self.twiddle5.re * x310p.im
1363 + self.twiddle2.re * x49p.im
1364 + self.twiddle4.re * x58p.im
1365 + self.twiddle3.re * x67p.im;
1366 let b67im_b = self.twiddle6.im * x112n.re
1367 + -self.twiddle1.im * x211n.re
1368 + self.twiddle5.im * x310n.re
1369 + -self.twiddle2.im * x49n.re
1370 + self.twiddle4.im * x58n.re
1371 + -self.twiddle3.im * x67n.re;
1372
1373 let out1re = b112re_a - b112re_b;
1374 let out1im = b112im_a + b112im_b;
1375 let out2re = b211re_a - b211re_b;
1376 let out2im = b211im_a + b211im_b;
1377 let out3re = b310re_a - b310re_b;
1378 let out3im = b310im_a + b310im_b;
1379 let out4re = b49re_a - b49re_b;
1380 let out4im = b49im_a + b49im_b;
1381 let out5re = b58re_a - b58re_b;
1382 let out5im = b58im_a + b58im_b;
1383 let out6re = b67re_a - b67re_b;
1384 let out6im = b67im_a + b67im_b;
1385 let out7re = b67re_a + b67re_b;
1386 let out7im = b67im_a - b67im_b;
1387 let out8re = b58re_a + b58re_b;
1388 let out8im = b58im_a - b58im_b;
1389 let out9re = b49re_a + b49re_b;
1390 let out9im = b49im_a - b49im_b;
1391 let out10re = b310re_a + b310re_b;
1392 let out10im = b310im_a - b310im_b;
1393 let out11re = b211re_a + b211re_b;
1394 let out11im = b211im_a - b211im_b;
1395 let out12re = b112re_a + b112re_b;
1396 let out12im = b112im_a - b112im_b;
1397 buffer.store(sum, 0);
1398 buffer.store(
1399 Complex {
1400 re: out1re,
1401 im: out1im,
1402 },
1403 1,
1404 );
1405 buffer.store(
1406 Complex {
1407 re: out2re,
1408 im: out2im,
1409 },
1410 2,
1411 );
1412 buffer.store(
1413 Complex {
1414 re: out3re,
1415 im: out3im,
1416 },
1417 3,
1418 );
1419 buffer.store(
1420 Complex {
1421 re: out4re,
1422 im: out4im,
1423 },
1424 4,
1425 );
1426 buffer.store(
1427 Complex {
1428 re: out5re,
1429 im: out5im,
1430 },
1431 5,
1432 );
1433 buffer.store(
1434 Complex {
1435 re: out6re,
1436 im: out6im,
1437 },
1438 6,
1439 );
1440 buffer.store(
1441 Complex {
1442 re: out7re,
1443 im: out7im,
1444 },
1445 7,
1446 );
1447 buffer.store(
1448 Complex {
1449 re: out8re,
1450 im: out8im,
1451 },
1452 8,
1453 );
1454 buffer.store(
1455 Complex {
1456 re: out9re,
1457 im: out9im,
1458 },
1459 9,
1460 );
1461 buffer.store(
1462 Complex {
1463 re: out10re,
1464 im: out10im,
1465 },
1466 10,
1467 );
1468 buffer.store(
1469 Complex {
1470 re: out11re,
1471 im: out11im,
1472 },
1473 11,
1474 );
1475 buffer.store(
1476 Complex {
1477 re: out12re,
1478 im: out12im,
1479 },
1480 12,
1481 );
1482 }
1483}
1484
1485pub struct Butterfly16<T> {
1486 butterfly8: Butterfly8<T>,
1487 twiddle1: Complex<T>,
1488 twiddle2: Complex<T>,
1489 twiddle3: Complex<T>,
1490}
1491boilerplate_fft_butterfly!(Butterfly16, 16, |this: &Butterfly16<_>| this
1492 .butterfly8
1493 .fft_direction());
1494impl<T: FftNum> Butterfly16<T> {
1495 #[inline(always)]
1496 pub fn new(direction: FftDirection) -> Self {
1497 Self {
1498 butterfly8: Butterfly8::new(direction),
1499 twiddle1: twiddles::compute_twiddle(1, 16, direction),
1500 twiddle2: twiddles::compute_twiddle(2, 16, direction),
1501 twiddle3: twiddles::compute_twiddle(3, 16, direction),
1502 }
1503 }
1504
1505 #[inline(never)]
1506 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1507 let butterfly4 = Butterfly4::new(self.fft_direction());
1508
1509 let mut scratch_evens = [
1512 buffer.load(0),
1513 buffer.load(2),
1514 buffer.load(4),
1515 buffer.load(6),
1516 buffer.load(8),
1517 buffer.load(10),
1518 buffer.load(12),
1519 buffer.load(14),
1520 ];
1521
1522 let mut scratch_odds_n1 = [
1523 buffer.load(1),
1524 buffer.load(5),
1525 buffer.load(9),
1526 buffer.load(13),
1527 ];
1528 let mut scratch_odds_n3 = [
1529 buffer.load(15),
1530 buffer.load(3),
1531 buffer.load(7),
1532 buffer.load(11),
1533 ];
1534
1535 self.butterfly8.perform_fft_contiguous(&mut scratch_evens);
1537 butterfly4.perform_fft_contiguous(&mut scratch_odds_n1);
1538 butterfly4.perform_fft_contiguous(&mut scratch_odds_n3);
1539
1540 scratch_odds_n1[1] = scratch_odds_n1[1] * self.twiddle1;
1542 scratch_odds_n3[1] = scratch_odds_n3[1] * self.twiddle1.conj();
1543
1544 scratch_odds_n1[2] = scratch_odds_n1[2] * self.twiddle2;
1545 scratch_odds_n3[2] = scratch_odds_n3[2] * self.twiddle2.conj();
1546
1547 scratch_odds_n1[3] = scratch_odds_n1[3] * self.twiddle3;
1548 scratch_odds_n3[3] = scratch_odds_n3[3] * self.twiddle3.conj();
1549
1550 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[0], &mut scratch_odds_n3[0]);
1552 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[1], &mut scratch_odds_n3[1]);
1553 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[2], &mut scratch_odds_n3[2]);
1554 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[3], &mut scratch_odds_n3[3]);
1555
1556 scratch_odds_n3[0] = twiddles::rotate_90(scratch_odds_n3[0], self.fft_direction());
1558 scratch_odds_n3[1] = twiddles::rotate_90(scratch_odds_n3[1], self.fft_direction());
1559 scratch_odds_n3[2] = twiddles::rotate_90(scratch_odds_n3[2], self.fft_direction());
1560 scratch_odds_n3[3] = twiddles::rotate_90(scratch_odds_n3[3], self.fft_direction());
1561
1562 buffer.store(scratch_evens[0] + scratch_odds_n1[0], 0);
1564 buffer.store(scratch_evens[1] + scratch_odds_n1[1], 1);
1565 buffer.store(scratch_evens[2] + scratch_odds_n1[2], 2);
1566 buffer.store(scratch_evens[3] + scratch_odds_n1[3], 3);
1567 buffer.store(scratch_evens[4] + scratch_odds_n3[0], 4);
1568 buffer.store(scratch_evens[5] + scratch_odds_n3[1], 5);
1569 buffer.store(scratch_evens[6] + scratch_odds_n3[2], 6);
1570 buffer.store(scratch_evens[7] + scratch_odds_n3[3], 7);
1571 buffer.store(scratch_evens[0] - scratch_odds_n1[0], 8);
1572 buffer.store(scratch_evens[1] - scratch_odds_n1[1], 9);
1573 buffer.store(scratch_evens[2] - scratch_odds_n1[2], 10);
1574 buffer.store(scratch_evens[3] - scratch_odds_n1[3], 11);
1575 buffer.store(scratch_evens[4] - scratch_odds_n3[0], 12);
1576 buffer.store(scratch_evens[5] - scratch_odds_n3[1], 13);
1577 buffer.store(scratch_evens[6] - scratch_odds_n3[2], 14);
1578 buffer.store(scratch_evens[7] - scratch_odds_n3[3], 15);
1579 }
1580}
1581
1582pub struct Butterfly17<T> {
1583 twiddle1: Complex<T>,
1584 twiddle2: Complex<T>,
1585 twiddle3: Complex<T>,
1586 twiddle4: Complex<T>,
1587 twiddle5: Complex<T>,
1588 twiddle6: Complex<T>,
1589 twiddle7: Complex<T>,
1590 twiddle8: Complex<T>,
1591 direction: FftDirection,
1592}
1593boilerplate_fft_butterfly!(Butterfly17, 17, |this: &Butterfly17<_>| this.direction);
1594impl<T: FftNum> Butterfly17<T> {
1595 pub fn new(direction: FftDirection) -> Self {
1596 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 17, direction);
1597 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 17, direction);
1598 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 17, direction);
1599 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 17, direction);
1600 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 17, direction);
1601 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 17, direction);
1602 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 17, direction);
1603 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 17, direction);
1604 Self {
1605 twiddle1,
1606 twiddle2,
1607 twiddle3,
1608 twiddle4,
1609 twiddle5,
1610 twiddle6,
1611 twiddle7,
1612 twiddle8,
1613 direction,
1614 }
1615 }
1616
1617 #[inline(never)]
1618 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1619 let x116p = buffer.load(1) + buffer.load(16);
1623 let x116n = buffer.load(1) - buffer.load(16);
1624 let x215p = buffer.load(2) + buffer.load(15);
1625 let x215n = buffer.load(2) - buffer.load(15);
1626 let x314p = buffer.load(3) + buffer.load(14);
1627 let x314n = buffer.load(3) - buffer.load(14);
1628 let x413p = buffer.load(4) + buffer.load(13);
1629 let x413n = buffer.load(4) - buffer.load(13);
1630 let x512p = buffer.load(5) + buffer.load(12);
1631 let x512n = buffer.load(5) - buffer.load(12);
1632 let x611p = buffer.load(6) + buffer.load(11);
1633 let x611n = buffer.load(6) - buffer.load(11);
1634 let x710p = buffer.load(7) + buffer.load(10);
1635 let x710n = buffer.load(7) - buffer.load(10);
1636 let x89p = buffer.load(8) + buffer.load(9);
1637 let x89n = buffer.load(8) - buffer.load(9);
1638 let sum = buffer.load(0) + x116p + x215p + x314p + x413p + x512p + x611p + x710p + x89p;
1639 let b116re_a = buffer.load(0).re
1640 + self.twiddle1.re * x116p.re
1641 + self.twiddle2.re * x215p.re
1642 + self.twiddle3.re * x314p.re
1643 + self.twiddle4.re * x413p.re
1644 + self.twiddle5.re * x512p.re
1645 + self.twiddle6.re * x611p.re
1646 + self.twiddle7.re * x710p.re
1647 + self.twiddle8.re * x89p.re;
1648 let b116re_b = self.twiddle1.im * x116n.im
1649 + self.twiddle2.im * x215n.im
1650 + self.twiddle3.im * x314n.im
1651 + self.twiddle4.im * x413n.im
1652 + self.twiddle5.im * x512n.im
1653 + self.twiddle6.im * x611n.im
1654 + self.twiddle7.im * x710n.im
1655 + self.twiddle8.im * x89n.im;
1656 let b215re_a = buffer.load(0).re
1657 + self.twiddle2.re * x116p.re
1658 + self.twiddle4.re * x215p.re
1659 + self.twiddle6.re * x314p.re
1660 + self.twiddle8.re * x413p.re
1661 + self.twiddle7.re * x512p.re
1662 + self.twiddle5.re * x611p.re
1663 + self.twiddle3.re * x710p.re
1664 + self.twiddle1.re * x89p.re;
1665 let b215re_b = self.twiddle2.im * x116n.im
1666 + self.twiddle4.im * x215n.im
1667 + self.twiddle6.im * x314n.im
1668 + self.twiddle8.im * x413n.im
1669 + -self.twiddle7.im * x512n.im
1670 + -self.twiddle5.im * x611n.im
1671 + -self.twiddle3.im * x710n.im
1672 + -self.twiddle1.im * x89n.im;
1673 let b314re_a = buffer.load(0).re
1674 + self.twiddle3.re * x116p.re
1675 + self.twiddle6.re * x215p.re
1676 + self.twiddle8.re * x314p.re
1677 + self.twiddle5.re * x413p.re
1678 + self.twiddle2.re * x512p.re
1679 + self.twiddle1.re * x611p.re
1680 + self.twiddle4.re * x710p.re
1681 + self.twiddle7.re * x89p.re;
1682 let b314re_b = self.twiddle3.im * x116n.im
1683 + self.twiddle6.im * x215n.im
1684 + -self.twiddle8.im * x314n.im
1685 + -self.twiddle5.im * x413n.im
1686 + -self.twiddle2.im * x512n.im
1687 + self.twiddle1.im * x611n.im
1688 + self.twiddle4.im * x710n.im
1689 + self.twiddle7.im * x89n.im;
1690 let b413re_a = buffer.load(0).re
1691 + self.twiddle4.re * x116p.re
1692 + self.twiddle8.re * x215p.re
1693 + self.twiddle5.re * x314p.re
1694 + self.twiddle1.re * x413p.re
1695 + self.twiddle3.re * x512p.re
1696 + self.twiddle7.re * x611p.re
1697 + self.twiddle6.re * x710p.re
1698 + self.twiddle2.re * x89p.re;
1699 let b413re_b = self.twiddle4.im * x116n.im
1700 + self.twiddle8.im * x215n.im
1701 + -self.twiddle5.im * x314n.im
1702 + -self.twiddle1.im * x413n.im
1703 + self.twiddle3.im * x512n.im
1704 + self.twiddle7.im * x611n.im
1705 + -self.twiddle6.im * x710n.im
1706 + -self.twiddle2.im * x89n.im;
1707 let b512re_a = buffer.load(0).re
1708 + self.twiddle5.re * x116p.re
1709 + self.twiddle7.re * x215p.re
1710 + self.twiddle2.re * x314p.re
1711 + self.twiddle3.re * x413p.re
1712 + self.twiddle8.re * x512p.re
1713 + self.twiddle4.re * x611p.re
1714 + self.twiddle1.re * x710p.re
1715 + self.twiddle6.re * x89p.re;
1716 let b512re_b = self.twiddle5.im * x116n.im
1717 + -self.twiddle7.im * x215n.im
1718 + -self.twiddle2.im * x314n.im
1719 + self.twiddle3.im * x413n.im
1720 + self.twiddle8.im * x512n.im
1721 + -self.twiddle4.im * x611n.im
1722 + self.twiddle1.im * x710n.im
1723 + self.twiddle6.im * x89n.im;
1724 let b611re_a = buffer.load(0).re
1725 + self.twiddle6.re * x116p.re
1726 + self.twiddle5.re * x215p.re
1727 + self.twiddle1.re * x314p.re
1728 + self.twiddle7.re * x413p.re
1729 + self.twiddle4.re * x512p.re
1730 + self.twiddle2.re * x611p.re
1731 + self.twiddle8.re * x710p.re
1732 + self.twiddle3.re * x89p.re;
1733 let b611re_b = self.twiddle6.im * x116n.im
1734 + -self.twiddle5.im * x215n.im
1735 + self.twiddle1.im * x314n.im
1736 + self.twiddle7.im * x413n.im
1737 + -self.twiddle4.im * x512n.im
1738 + self.twiddle2.im * x611n.im
1739 + self.twiddle8.im * x710n.im
1740 + -self.twiddle3.im * x89n.im;
1741 let b710re_a = buffer.load(0).re
1742 + self.twiddle7.re * x116p.re
1743 + self.twiddle3.re * x215p.re
1744 + self.twiddle4.re * x314p.re
1745 + self.twiddle6.re * x413p.re
1746 + self.twiddle1.re * x512p.re
1747 + self.twiddle8.re * x611p.re
1748 + self.twiddle2.re * x710p.re
1749 + self.twiddle5.re * x89p.re;
1750 let b710re_b = self.twiddle7.im * x116n.im
1751 + -self.twiddle3.im * x215n.im
1752 + self.twiddle4.im * x314n.im
1753 + -self.twiddle6.im * x413n.im
1754 + self.twiddle1.im * x512n.im
1755 + self.twiddle8.im * x611n.im
1756 + -self.twiddle2.im * x710n.im
1757 + self.twiddle5.im * x89n.im;
1758 let b89re_a = buffer.load(0).re
1759 + self.twiddle8.re * x116p.re
1760 + self.twiddle1.re * x215p.re
1761 + self.twiddle7.re * x314p.re
1762 + self.twiddle2.re * x413p.re
1763 + self.twiddle6.re * x512p.re
1764 + self.twiddle3.re * x611p.re
1765 + self.twiddle5.re * x710p.re
1766 + self.twiddle4.re * x89p.re;
1767 let b89re_b = self.twiddle8.im * x116n.im
1768 + -self.twiddle1.im * x215n.im
1769 + self.twiddle7.im * x314n.im
1770 + -self.twiddle2.im * x413n.im
1771 + self.twiddle6.im * x512n.im
1772 + -self.twiddle3.im * x611n.im
1773 + self.twiddle5.im * x710n.im
1774 + -self.twiddle4.im * x89n.im;
1775
1776 let b116im_a = buffer.load(0).im
1777 + self.twiddle1.re * x116p.im
1778 + self.twiddle2.re * x215p.im
1779 + self.twiddle3.re * x314p.im
1780 + self.twiddle4.re * x413p.im
1781 + self.twiddle5.re * x512p.im
1782 + self.twiddle6.re * x611p.im
1783 + self.twiddle7.re * x710p.im
1784 + self.twiddle8.re * x89p.im;
1785 let b116im_b = self.twiddle1.im * x116n.re
1786 + self.twiddle2.im * x215n.re
1787 + self.twiddle3.im * x314n.re
1788 + self.twiddle4.im * x413n.re
1789 + self.twiddle5.im * x512n.re
1790 + self.twiddle6.im * x611n.re
1791 + self.twiddle7.im * x710n.re
1792 + self.twiddle8.im * x89n.re;
1793 let b215im_a = buffer.load(0).im
1794 + self.twiddle2.re * x116p.im
1795 + self.twiddle4.re * x215p.im
1796 + self.twiddle6.re * x314p.im
1797 + self.twiddle8.re * x413p.im
1798 + self.twiddle7.re * x512p.im
1799 + self.twiddle5.re * x611p.im
1800 + self.twiddle3.re * x710p.im
1801 + self.twiddle1.re * x89p.im;
1802 let b215im_b = self.twiddle2.im * x116n.re
1803 + self.twiddle4.im * x215n.re
1804 + self.twiddle6.im * x314n.re
1805 + self.twiddle8.im * x413n.re
1806 + -self.twiddle7.im * x512n.re
1807 + -self.twiddle5.im * x611n.re
1808 + -self.twiddle3.im * x710n.re
1809 + -self.twiddle1.im * x89n.re;
1810 let b314im_a = buffer.load(0).im
1811 + self.twiddle3.re * x116p.im
1812 + self.twiddle6.re * x215p.im
1813 + self.twiddle8.re * x314p.im
1814 + self.twiddle5.re * x413p.im
1815 + self.twiddle2.re * x512p.im
1816 + self.twiddle1.re * x611p.im
1817 + self.twiddle4.re * x710p.im
1818 + self.twiddle7.re * x89p.im;
1819 let b314im_b = self.twiddle3.im * x116n.re
1820 + self.twiddle6.im * x215n.re
1821 + -self.twiddle8.im * x314n.re
1822 + -self.twiddle5.im * x413n.re
1823 + -self.twiddle2.im * x512n.re
1824 + self.twiddle1.im * x611n.re
1825 + self.twiddle4.im * x710n.re
1826 + self.twiddle7.im * x89n.re;
1827 let b413im_a = buffer.load(0).im
1828 + self.twiddle4.re * x116p.im
1829 + self.twiddle8.re * x215p.im
1830 + self.twiddle5.re * x314p.im
1831 + self.twiddle1.re * x413p.im
1832 + self.twiddle3.re * x512p.im
1833 + self.twiddle7.re * x611p.im
1834 + self.twiddle6.re * x710p.im
1835 + self.twiddle2.re * x89p.im;
1836 let b413im_b = self.twiddle4.im * x116n.re
1837 + self.twiddle8.im * x215n.re
1838 + -self.twiddle5.im * x314n.re
1839 + -self.twiddle1.im * x413n.re
1840 + self.twiddle3.im * x512n.re
1841 + self.twiddle7.im * x611n.re
1842 + -self.twiddle6.im * x710n.re
1843 + -self.twiddle2.im * x89n.re;
1844 let b512im_a = buffer.load(0).im
1845 + self.twiddle5.re * x116p.im
1846 + self.twiddle7.re * x215p.im
1847 + self.twiddle2.re * x314p.im
1848 + self.twiddle3.re * x413p.im
1849 + self.twiddle8.re * x512p.im
1850 + self.twiddle4.re * x611p.im
1851 + self.twiddle1.re * x710p.im
1852 + self.twiddle6.re * x89p.im;
1853 let b512im_b = self.twiddle5.im * x116n.re
1854 + -self.twiddle7.im * x215n.re
1855 + -self.twiddle2.im * x314n.re
1856 + self.twiddle3.im * x413n.re
1857 + self.twiddle8.im * x512n.re
1858 + -self.twiddle4.im * x611n.re
1859 + self.twiddle1.im * x710n.re
1860 + self.twiddle6.im * x89n.re;
1861 let b611im_a = buffer.load(0).im
1862 + self.twiddle6.re * x116p.im
1863 + self.twiddle5.re * x215p.im
1864 + self.twiddle1.re * x314p.im
1865 + self.twiddle7.re * x413p.im
1866 + self.twiddle4.re * x512p.im
1867 + self.twiddle2.re * x611p.im
1868 + self.twiddle8.re * x710p.im
1869 + self.twiddle3.re * x89p.im;
1870 let b611im_b = self.twiddle6.im * x116n.re
1871 + -self.twiddle5.im * x215n.re
1872 + self.twiddle1.im * x314n.re
1873 + self.twiddle7.im * x413n.re
1874 + -self.twiddle4.im * x512n.re
1875 + self.twiddle2.im * x611n.re
1876 + self.twiddle8.im * x710n.re
1877 + -self.twiddle3.im * x89n.re;
1878 let b710im_a = buffer.load(0).im
1879 + self.twiddle7.re * x116p.im
1880 + self.twiddle3.re * x215p.im
1881 + self.twiddle4.re * x314p.im
1882 + self.twiddle6.re * x413p.im
1883 + self.twiddle1.re * x512p.im
1884 + self.twiddle8.re * x611p.im
1885 + self.twiddle2.re * x710p.im
1886 + self.twiddle5.re * x89p.im;
1887 let b710im_b = self.twiddle7.im * x116n.re
1888 + -self.twiddle3.im * x215n.re
1889 + self.twiddle4.im * x314n.re
1890 + -self.twiddle6.im * x413n.re
1891 + self.twiddle1.im * x512n.re
1892 + self.twiddle8.im * x611n.re
1893 + -self.twiddle2.im * x710n.re
1894 + self.twiddle5.im * x89n.re;
1895 let b89im_a = buffer.load(0).im
1896 + self.twiddle8.re * x116p.im
1897 + self.twiddle1.re * x215p.im
1898 + self.twiddle7.re * x314p.im
1899 + self.twiddle2.re * x413p.im
1900 + self.twiddle6.re * x512p.im
1901 + self.twiddle3.re * x611p.im
1902 + self.twiddle5.re * x710p.im
1903 + self.twiddle4.re * x89p.im;
1904 let b89im_b = self.twiddle8.im * x116n.re
1905 + -self.twiddle1.im * x215n.re
1906 + self.twiddle7.im * x314n.re
1907 + -self.twiddle2.im * x413n.re
1908 + self.twiddle6.im * x512n.re
1909 + -self.twiddle3.im * x611n.re
1910 + self.twiddle5.im * x710n.re
1911 + -self.twiddle4.im * x89n.re;
1912
1913 let out1re = b116re_a - b116re_b;
1914 let out1im = b116im_a + b116im_b;
1915 let out2re = b215re_a - b215re_b;
1916 let out2im = b215im_a + b215im_b;
1917 let out3re = b314re_a - b314re_b;
1918 let out3im = b314im_a + b314im_b;
1919 let out4re = b413re_a - b413re_b;
1920 let out4im = b413im_a + b413im_b;
1921 let out5re = b512re_a - b512re_b;
1922 let out5im = b512im_a + b512im_b;
1923 let out6re = b611re_a - b611re_b;
1924 let out6im = b611im_a + b611im_b;
1925 let out7re = b710re_a - b710re_b;
1926 let out7im = b710im_a + b710im_b;
1927 let out8re = b89re_a - b89re_b;
1928 let out8im = b89im_a + b89im_b;
1929 let out9re = b89re_a + b89re_b;
1930 let out9im = b89im_a - b89im_b;
1931 let out10re = b710re_a + b710re_b;
1932 let out10im = b710im_a - b710im_b;
1933 let out11re = b611re_a + b611re_b;
1934 let out11im = b611im_a - b611im_b;
1935 let out12re = b512re_a + b512re_b;
1936 let out12im = b512im_a - b512im_b;
1937 let out13re = b413re_a + b413re_b;
1938 let out13im = b413im_a - b413im_b;
1939 let out14re = b314re_a + b314re_b;
1940 let out14im = b314im_a - b314im_b;
1941 let out15re = b215re_a + b215re_b;
1942 let out15im = b215im_a - b215im_b;
1943 let out16re = b116re_a + b116re_b;
1944 let out16im = b116im_a - b116im_b;
1945 buffer.store(sum, 0);
1946 buffer.store(
1947 Complex {
1948 re: out1re,
1949 im: out1im,
1950 },
1951 1,
1952 );
1953 buffer.store(
1954 Complex {
1955 re: out2re,
1956 im: out2im,
1957 },
1958 2,
1959 );
1960 buffer.store(
1961 Complex {
1962 re: out3re,
1963 im: out3im,
1964 },
1965 3,
1966 );
1967 buffer.store(
1968 Complex {
1969 re: out4re,
1970 im: out4im,
1971 },
1972 4,
1973 );
1974 buffer.store(
1975 Complex {
1976 re: out5re,
1977 im: out5im,
1978 },
1979 5,
1980 );
1981 buffer.store(
1982 Complex {
1983 re: out6re,
1984 im: out6im,
1985 },
1986 6,
1987 );
1988 buffer.store(
1989 Complex {
1990 re: out7re,
1991 im: out7im,
1992 },
1993 7,
1994 );
1995 buffer.store(
1996 Complex {
1997 re: out8re,
1998 im: out8im,
1999 },
2000 8,
2001 );
2002 buffer.store(
2003 Complex {
2004 re: out9re,
2005 im: out9im,
2006 },
2007 9,
2008 );
2009 buffer.store(
2010 Complex {
2011 re: out10re,
2012 im: out10im,
2013 },
2014 10,
2015 );
2016 buffer.store(
2017 Complex {
2018 re: out11re,
2019 im: out11im,
2020 },
2021 11,
2022 );
2023 buffer.store(
2024 Complex {
2025 re: out12re,
2026 im: out12im,
2027 },
2028 12,
2029 );
2030 buffer.store(
2031 Complex {
2032 re: out13re,
2033 im: out13im,
2034 },
2035 13,
2036 );
2037 buffer.store(
2038 Complex {
2039 re: out14re,
2040 im: out14im,
2041 },
2042 14,
2043 );
2044 buffer.store(
2045 Complex {
2046 re: out15re,
2047 im: out15im,
2048 },
2049 15,
2050 );
2051 buffer.store(
2052 Complex {
2053 re: out16re,
2054 im: out16im,
2055 },
2056 16,
2057 );
2058 }
2059}
2060
2061pub struct Butterfly19<T> {
2062 twiddle1: Complex<T>,
2063 twiddle2: Complex<T>,
2064 twiddle3: Complex<T>,
2065 twiddle4: Complex<T>,
2066 twiddle5: Complex<T>,
2067 twiddle6: Complex<T>,
2068 twiddle7: Complex<T>,
2069 twiddle8: Complex<T>,
2070 twiddle9: Complex<T>,
2071 direction: FftDirection,
2072}
2073boilerplate_fft_butterfly!(Butterfly19, 19, |this: &Butterfly19<_>| this.direction);
2074impl<T: FftNum> Butterfly19<T> {
2075 pub fn new(direction: FftDirection) -> Self {
2076 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 19, direction);
2077 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 19, direction);
2078 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 19, direction);
2079 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 19, direction);
2080 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 19, direction);
2081 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 19, direction);
2082 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 19, direction);
2083 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 19, direction);
2084 let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 19, direction);
2085 Self {
2086 twiddle1,
2087 twiddle2,
2088 twiddle3,
2089 twiddle4,
2090 twiddle5,
2091 twiddle6,
2092 twiddle7,
2093 twiddle8,
2094 twiddle9,
2095 direction,
2096 }
2097 }
2098
2099 #[inline(never)]
2100 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
2101 let x118p = buffer.load(1) + buffer.load(18);
2105 let x118n = buffer.load(1) - buffer.load(18);
2106 let x217p = buffer.load(2) + buffer.load(17);
2107 let x217n = buffer.load(2) - buffer.load(17);
2108 let x316p = buffer.load(3) + buffer.load(16);
2109 let x316n = buffer.load(3) - buffer.load(16);
2110 let x415p = buffer.load(4) + buffer.load(15);
2111 let x415n = buffer.load(4) - buffer.load(15);
2112 let x514p = buffer.load(5) + buffer.load(14);
2113 let x514n = buffer.load(5) - buffer.load(14);
2114 let x613p = buffer.load(6) + buffer.load(13);
2115 let x613n = buffer.load(6) - buffer.load(13);
2116 let x712p = buffer.load(7) + buffer.load(12);
2117 let x712n = buffer.load(7) - buffer.load(12);
2118 let x811p = buffer.load(8) + buffer.load(11);
2119 let x811n = buffer.load(8) - buffer.load(11);
2120 let x910p = buffer.load(9) + buffer.load(10);
2121 let x910n = buffer.load(9) - buffer.load(10);
2122 let sum =
2123 buffer.load(0) + x118p + x217p + x316p + x415p + x514p + x613p + x712p + x811p + x910p;
2124 let b118re_a = buffer.load(0).re
2125 + self.twiddle1.re * x118p.re
2126 + self.twiddle2.re * x217p.re
2127 + self.twiddle3.re * x316p.re
2128 + self.twiddle4.re * x415p.re
2129 + self.twiddle5.re * x514p.re
2130 + self.twiddle6.re * x613p.re
2131 + self.twiddle7.re * x712p.re
2132 + self.twiddle8.re * x811p.re
2133 + self.twiddle9.re * x910p.re;
2134 let b118re_b = self.twiddle1.im * x118n.im
2135 + self.twiddle2.im * x217n.im
2136 + self.twiddle3.im * x316n.im
2137 + self.twiddle4.im * x415n.im
2138 + self.twiddle5.im * x514n.im
2139 + self.twiddle6.im * x613n.im
2140 + self.twiddle7.im * x712n.im
2141 + self.twiddle8.im * x811n.im
2142 + self.twiddle9.im * x910n.im;
2143 let b217re_a = buffer.load(0).re
2144 + self.twiddle2.re * x118p.re
2145 + self.twiddle4.re * x217p.re
2146 + self.twiddle6.re * x316p.re
2147 + self.twiddle8.re * x415p.re
2148 + self.twiddle9.re * x514p.re
2149 + self.twiddle7.re * x613p.re
2150 + self.twiddle5.re * x712p.re
2151 + self.twiddle3.re * x811p.re
2152 + self.twiddle1.re * x910p.re;
2153 let b217re_b = self.twiddle2.im * x118n.im
2154 + self.twiddle4.im * x217n.im
2155 + self.twiddle6.im * x316n.im
2156 + self.twiddle8.im * x415n.im
2157 + -self.twiddle9.im * x514n.im
2158 + -self.twiddle7.im * x613n.im
2159 + -self.twiddle5.im * x712n.im
2160 + -self.twiddle3.im * x811n.im
2161 + -self.twiddle1.im * x910n.im;
2162 let b316re_a = buffer.load(0).re
2163 + self.twiddle3.re * x118p.re
2164 + self.twiddle6.re * x217p.re
2165 + self.twiddle9.re * x316p.re
2166 + self.twiddle7.re * x415p.re
2167 + self.twiddle4.re * x514p.re
2168 + self.twiddle1.re * x613p.re
2169 + self.twiddle2.re * x712p.re
2170 + self.twiddle5.re * x811p.re
2171 + self.twiddle8.re * x910p.re;
2172 let b316re_b = self.twiddle3.im * x118n.im
2173 + self.twiddle6.im * x217n.im
2174 + self.twiddle9.im * x316n.im
2175 + -self.twiddle7.im * x415n.im
2176 + -self.twiddle4.im * x514n.im
2177 + -self.twiddle1.im * x613n.im
2178 + self.twiddle2.im * x712n.im
2179 + self.twiddle5.im * x811n.im
2180 + self.twiddle8.im * x910n.im;
2181 let b415re_a = buffer.load(0).re
2182 + self.twiddle4.re * x118p.re
2183 + self.twiddle8.re * x217p.re
2184 + self.twiddle7.re * x316p.re
2185 + self.twiddle3.re * x415p.re
2186 + self.twiddle1.re * x514p.re
2187 + self.twiddle5.re * x613p.re
2188 + self.twiddle9.re * x712p.re
2189 + self.twiddle6.re * x811p.re
2190 + self.twiddle2.re * x910p.re;
2191 let b415re_b = self.twiddle4.im * x118n.im
2192 + self.twiddle8.im * x217n.im
2193 + -self.twiddle7.im * x316n.im
2194 + -self.twiddle3.im * x415n.im
2195 + self.twiddle1.im * x514n.im
2196 + self.twiddle5.im * x613n.im
2197 + self.twiddle9.im * x712n.im
2198 + -self.twiddle6.im * x811n.im
2199 + -self.twiddle2.im * x910n.im;
2200 let b514re_a = buffer.load(0).re
2201 + self.twiddle5.re * x118p.re
2202 + self.twiddle9.re * x217p.re
2203 + self.twiddle4.re * x316p.re
2204 + self.twiddle1.re * x415p.re
2205 + self.twiddle6.re * x514p.re
2206 + self.twiddle8.re * x613p.re
2207 + self.twiddle3.re * x712p.re
2208 + self.twiddle2.re * x811p.re
2209 + self.twiddle7.re * x910p.re;
2210 let b514re_b = self.twiddle5.im * x118n.im
2211 + -self.twiddle9.im * x217n.im
2212 + -self.twiddle4.im * x316n.im
2213 + self.twiddle1.im * x415n.im
2214 + self.twiddle6.im * x514n.im
2215 + -self.twiddle8.im * x613n.im
2216 + -self.twiddle3.im * x712n.im
2217 + self.twiddle2.im * x811n.im
2218 + self.twiddle7.im * x910n.im;
2219 let b613re_a = buffer.load(0).re
2220 + self.twiddle6.re * x118p.re
2221 + self.twiddle7.re * x217p.re
2222 + self.twiddle1.re * x316p.re
2223 + self.twiddle5.re * x415p.re
2224 + self.twiddle8.re * x514p.re
2225 + self.twiddle2.re * x613p.re
2226 + self.twiddle4.re * x712p.re
2227 + self.twiddle9.re * x811p.re
2228 + self.twiddle3.re * x910p.re;
2229 let b613re_b = self.twiddle6.im * x118n.im
2230 + -self.twiddle7.im * x217n.im
2231 + -self.twiddle1.im * x316n.im
2232 + self.twiddle5.im * x415n.im
2233 + -self.twiddle8.im * x514n.im
2234 + -self.twiddle2.im * x613n.im
2235 + self.twiddle4.im * x712n.im
2236 + -self.twiddle9.im * x811n.im
2237 + -self.twiddle3.im * x910n.im;
2238 let b712re_a = buffer.load(0).re
2239 + self.twiddle7.re * x118p.re
2240 + self.twiddle5.re * x217p.re
2241 + self.twiddle2.re * x316p.re
2242 + self.twiddle9.re * x415p.re
2243 + self.twiddle3.re * x514p.re
2244 + self.twiddle4.re * x613p.re
2245 + self.twiddle8.re * x712p.re
2246 + self.twiddle1.re * x811p.re
2247 + self.twiddle6.re * x910p.re;
2248 let b712re_b = self.twiddle7.im * x118n.im
2249 + -self.twiddle5.im * x217n.im
2250 + self.twiddle2.im * x316n.im
2251 + self.twiddle9.im * x415n.im
2252 + -self.twiddle3.im * x514n.im
2253 + self.twiddle4.im * x613n.im
2254 + -self.twiddle8.im * x712n.im
2255 + -self.twiddle1.im * x811n.im
2256 + self.twiddle6.im * x910n.im;
2257 let b811re_a = buffer.load(0).re
2258 + self.twiddle8.re * x118p.re
2259 + self.twiddle3.re * x217p.re
2260 + self.twiddle5.re * x316p.re
2261 + self.twiddle6.re * x415p.re
2262 + self.twiddle2.re * x514p.re
2263 + self.twiddle9.re * x613p.re
2264 + self.twiddle1.re * x712p.re
2265 + self.twiddle7.re * x811p.re
2266 + self.twiddle4.re * x910p.re;
2267 let b811re_b = self.twiddle8.im * x118n.im
2268 + -self.twiddle3.im * x217n.im
2269 + self.twiddle5.im * x316n.im
2270 + -self.twiddle6.im * x415n.im
2271 + self.twiddle2.im * x514n.im
2272 + -self.twiddle9.im * x613n.im
2273 + -self.twiddle1.im * x712n.im
2274 + self.twiddle7.im * x811n.im
2275 + -self.twiddle4.im * x910n.im;
2276 let b910re_a = buffer.load(0).re
2277 + self.twiddle9.re * x118p.re
2278 + self.twiddle1.re * x217p.re
2279 + self.twiddle8.re * x316p.re
2280 + self.twiddle2.re * x415p.re
2281 + self.twiddle7.re * x514p.re
2282 + self.twiddle3.re * x613p.re
2283 + self.twiddle6.re * x712p.re
2284 + self.twiddle4.re * x811p.re
2285 + self.twiddle5.re * x910p.re;
2286 let b910re_b = self.twiddle9.im * x118n.im
2287 + -self.twiddle1.im * x217n.im
2288 + self.twiddle8.im * x316n.im
2289 + -self.twiddle2.im * x415n.im
2290 + self.twiddle7.im * x514n.im
2291 + -self.twiddle3.im * x613n.im
2292 + self.twiddle6.im * x712n.im
2293 + -self.twiddle4.im * x811n.im
2294 + self.twiddle5.im * x910n.im;
2295
2296 let b118im_a = buffer.load(0).im
2297 + self.twiddle1.re * x118p.im
2298 + self.twiddle2.re * x217p.im
2299 + self.twiddle3.re * x316p.im
2300 + self.twiddle4.re * x415p.im
2301 + self.twiddle5.re * x514p.im
2302 + self.twiddle6.re * x613p.im
2303 + self.twiddle7.re * x712p.im
2304 + self.twiddle8.re * x811p.im
2305 + self.twiddle9.re * x910p.im;
2306 let b118im_b = self.twiddle1.im * x118n.re
2307 + self.twiddle2.im * x217n.re
2308 + self.twiddle3.im * x316n.re
2309 + self.twiddle4.im * x415n.re
2310 + self.twiddle5.im * x514n.re
2311 + self.twiddle6.im * x613n.re
2312 + self.twiddle7.im * x712n.re
2313 + self.twiddle8.im * x811n.re
2314 + self.twiddle9.im * x910n.re;
2315 let b217im_a = buffer.load(0).im
2316 + self.twiddle2.re * x118p.im
2317 + self.twiddle4.re * x217p.im
2318 + self.twiddle6.re * x316p.im
2319 + self.twiddle8.re * x415p.im
2320 + self.twiddle9.re * x514p.im
2321 + self.twiddle7.re * x613p.im
2322 + self.twiddle5.re * x712p.im
2323 + self.twiddle3.re * x811p.im
2324 + self.twiddle1.re * x910p.im;
2325 let b217im_b = self.twiddle2.im * x118n.re
2326 + self.twiddle4.im * x217n.re
2327 + self.twiddle6.im * x316n.re
2328 + self.twiddle8.im * x415n.re
2329 + -self.twiddle9.im * x514n.re
2330 + -self.twiddle7.im * x613n.re
2331 + -self.twiddle5.im * x712n.re
2332 + -self.twiddle3.im * x811n.re
2333 + -self.twiddle1.im * x910n.re;
2334 let b316im_a = buffer.load(0).im
2335 + self.twiddle3.re * x118p.im
2336 + self.twiddle6.re * x217p.im
2337 + self.twiddle9.re * x316p.im
2338 + self.twiddle7.re * x415p.im
2339 + self.twiddle4.re * x514p.im
2340 + self.twiddle1.re * x613p.im
2341 + self.twiddle2.re * x712p.im
2342 + self.twiddle5.re * x811p.im
2343 + self.twiddle8.re * x910p.im;
2344 let b316im_b = self.twiddle3.im * x118n.re
2345 + self.twiddle6.im * x217n.re
2346 + self.twiddle9.im * x316n.re
2347 + -self.twiddle7.im * x415n.re
2348 + -self.twiddle4.im * x514n.re
2349 + -self.twiddle1.im * x613n.re
2350 + self.twiddle2.im * x712n.re
2351 + self.twiddle5.im * x811n.re
2352 + self.twiddle8.im * x910n.re;
2353 let b415im_a = buffer.load(0).im
2354 + self.twiddle4.re * x118p.im
2355 + self.twiddle8.re * x217p.im
2356 + self.twiddle7.re * x316p.im
2357 + self.twiddle3.re * x415p.im
2358 + self.twiddle1.re * x514p.im
2359 + self.twiddle5.re * x613p.im
2360 + self.twiddle9.re * x712p.im
2361 + self.twiddle6.re * x811p.im
2362 + self.twiddle2.re * x910p.im;
2363 let b415im_b = self.twiddle4.im * x118n.re
2364 + self.twiddle8.im * x217n.re
2365 + -self.twiddle7.im * x316n.re
2366 + -self.twiddle3.im * x415n.re
2367 + self.twiddle1.im * x514n.re
2368 + self.twiddle5.im * x613n.re
2369 + self.twiddle9.im * x712n.re
2370 + -self.twiddle6.im * x811n.re
2371 + -self.twiddle2.im * x910n.re;
2372 let b514im_a = buffer.load(0).im
2373 + self.twiddle5.re * x118p.im
2374 + self.twiddle9.re * x217p.im
2375 + self.twiddle4.re * x316p.im
2376 + self.twiddle1.re * x415p.im
2377 + self.twiddle6.re * x514p.im
2378 + self.twiddle8.re * x613p.im
2379 + self.twiddle3.re * x712p.im
2380 + self.twiddle2.re * x811p.im
2381 + self.twiddle7.re * x910p.im;
2382 let b514im_b = self.twiddle5.im * x118n.re
2383 + -self.twiddle9.im * x217n.re
2384 + -self.twiddle4.im * x316n.re
2385 + self.twiddle1.im * x415n.re
2386 + self.twiddle6.im * x514n.re
2387 + -self.twiddle8.im * x613n.re
2388 + -self.twiddle3.im * x712n.re
2389 + self.twiddle2.im * x811n.re
2390 + self.twiddle7.im * x910n.re;
2391 let b613im_a = buffer.load(0).im
2392 + self.twiddle6.re * x118p.im
2393 + self.twiddle7.re * x217p.im
2394 + self.twiddle1.re * x316p.im
2395 + self.twiddle5.re * x415p.im
2396 + self.twiddle8.re * x514p.im
2397 + self.twiddle2.re * x613p.im
2398 + self.twiddle4.re * x712p.im
2399 + self.twiddle9.re * x811p.im
2400 + self.twiddle3.re * x910p.im;
2401 let b613im_b = self.twiddle6.im * x118n.re
2402 + -self.twiddle7.im * x217n.re
2403 + -self.twiddle1.im * x316n.re
2404 + self.twiddle5.im * x415n.re
2405 + -self.twiddle8.im * x514n.re
2406 + -self.twiddle2.im * x613n.re
2407 + self.twiddle4.im * x712n.re
2408 + -self.twiddle9.im * x811n.re
2409 + -self.twiddle3.im * x910n.re;
2410 let b712im_a = buffer.load(0).im
2411 + self.twiddle7.re * x118p.im
2412 + self.twiddle5.re * x217p.im
2413 + self.twiddle2.re * x316p.im
2414 + self.twiddle9.re * x415p.im
2415 + self.twiddle3.re * x514p.im
2416 + self.twiddle4.re * x613p.im
2417 + self.twiddle8.re * x712p.im
2418 + self.twiddle1.re * x811p.im
2419 + self.twiddle6.re * x910p.im;
2420 let b712im_b = self.twiddle7.im * x118n.re
2421 + -self.twiddle5.im * x217n.re
2422 + self.twiddle2.im * x316n.re
2423 + self.twiddle9.im * x415n.re
2424 + -self.twiddle3.im * x514n.re
2425 + self.twiddle4.im * x613n.re
2426 + -self.twiddle8.im * x712n.re
2427 + -self.twiddle1.im * x811n.re
2428 + self.twiddle6.im * x910n.re;
2429 let b811im_a = buffer.load(0).im
2430 + self.twiddle8.re * x118p.im
2431 + self.twiddle3.re * x217p.im
2432 + self.twiddle5.re * x316p.im
2433 + self.twiddle6.re * x415p.im
2434 + self.twiddle2.re * x514p.im
2435 + self.twiddle9.re * x613p.im
2436 + self.twiddle1.re * x712p.im
2437 + self.twiddle7.re * x811p.im
2438 + self.twiddle4.re * x910p.im;
2439 let b811im_b = self.twiddle8.im * x118n.re
2440 + -self.twiddle3.im * x217n.re
2441 + self.twiddle5.im * x316n.re
2442 + -self.twiddle6.im * x415n.re
2443 + self.twiddle2.im * x514n.re
2444 + -self.twiddle9.im * x613n.re
2445 + -self.twiddle1.im * x712n.re
2446 + self.twiddle7.im * x811n.re
2447 + -self.twiddle4.im * x910n.re;
2448 let b910im_a = buffer.load(0).im
2449 + self.twiddle9.re * x118p.im
2450 + self.twiddle1.re * x217p.im
2451 + self.twiddle8.re * x316p.im
2452 + self.twiddle2.re * x415p.im
2453 + self.twiddle7.re * x514p.im
2454 + self.twiddle3.re * x613p.im
2455 + self.twiddle6.re * x712p.im
2456 + self.twiddle4.re * x811p.im
2457 + self.twiddle5.re * x910p.im;
2458 let b910im_b = self.twiddle9.im * x118n.re
2459 + -self.twiddle1.im * x217n.re
2460 + self.twiddle8.im * x316n.re
2461 + -self.twiddle2.im * x415n.re
2462 + self.twiddle7.im * x514n.re
2463 + -self.twiddle3.im * x613n.re
2464 + self.twiddle6.im * x712n.re
2465 + -self.twiddle4.im * x811n.re
2466 + self.twiddle5.im * x910n.re;
2467
2468 let out1re = b118re_a - b118re_b;
2469 let out1im = b118im_a + b118im_b;
2470 let out2re = b217re_a - b217re_b;
2471 let out2im = b217im_a + b217im_b;
2472 let out3re = b316re_a - b316re_b;
2473 let out3im = b316im_a + b316im_b;
2474 let out4re = b415re_a - b415re_b;
2475 let out4im = b415im_a + b415im_b;
2476 let out5re = b514re_a - b514re_b;
2477 let out5im = b514im_a + b514im_b;
2478 let out6re = b613re_a - b613re_b;
2479 let out6im = b613im_a + b613im_b;
2480 let out7re = b712re_a - b712re_b;
2481 let out7im = b712im_a + b712im_b;
2482 let out8re = b811re_a - b811re_b;
2483 let out8im = b811im_a + b811im_b;
2484 let out9re = b910re_a - b910re_b;
2485 let out9im = b910im_a + b910im_b;
2486 let out10re = b910re_a + b910re_b;
2487 let out10im = b910im_a - b910im_b;
2488 let out11re = b811re_a + b811re_b;
2489 let out11im = b811im_a - b811im_b;
2490 let out12re = b712re_a + b712re_b;
2491 let out12im = b712im_a - b712im_b;
2492 let out13re = b613re_a + b613re_b;
2493 let out13im = b613im_a - b613im_b;
2494 let out14re = b514re_a + b514re_b;
2495 let out14im = b514im_a - b514im_b;
2496 let out15re = b415re_a + b415re_b;
2497 let out15im = b415im_a - b415im_b;
2498 let out16re = b316re_a + b316re_b;
2499 let out16im = b316im_a - b316im_b;
2500 let out17re = b217re_a + b217re_b;
2501 let out17im = b217im_a - b217im_b;
2502 let out18re = b118re_a + b118re_b;
2503 let out18im = b118im_a - b118im_b;
2504 buffer.store(sum, 0);
2505 buffer.store(
2506 Complex {
2507 re: out1re,
2508 im: out1im,
2509 },
2510 1,
2511 );
2512 buffer.store(
2513 Complex {
2514 re: out2re,
2515 im: out2im,
2516 },
2517 2,
2518 );
2519 buffer.store(
2520 Complex {
2521 re: out3re,
2522 im: out3im,
2523 },
2524 3,
2525 );
2526 buffer.store(
2527 Complex {
2528 re: out4re,
2529 im: out4im,
2530 },
2531 4,
2532 );
2533 buffer.store(
2534 Complex {
2535 re: out5re,
2536 im: out5im,
2537 },
2538 5,
2539 );
2540 buffer.store(
2541 Complex {
2542 re: out6re,
2543 im: out6im,
2544 },
2545 6,
2546 );
2547 buffer.store(
2548 Complex {
2549 re: out7re,
2550 im: out7im,
2551 },
2552 7,
2553 );
2554 buffer.store(
2555 Complex {
2556 re: out8re,
2557 im: out8im,
2558 },
2559 8,
2560 );
2561 buffer.store(
2562 Complex {
2563 re: out9re,
2564 im: out9im,
2565 },
2566 9,
2567 );
2568 buffer.store(
2569 Complex {
2570 re: out10re,
2571 im: out10im,
2572 },
2573 10,
2574 );
2575 buffer.store(
2576 Complex {
2577 re: out11re,
2578 im: out11im,
2579 },
2580 11,
2581 );
2582 buffer.store(
2583 Complex {
2584 re: out12re,
2585 im: out12im,
2586 },
2587 12,
2588 );
2589 buffer.store(
2590 Complex {
2591 re: out13re,
2592 im: out13im,
2593 },
2594 13,
2595 );
2596 buffer.store(
2597 Complex {
2598 re: out14re,
2599 im: out14im,
2600 },
2601 14,
2602 );
2603 buffer.store(
2604 Complex {
2605 re: out15re,
2606 im: out15im,
2607 },
2608 15,
2609 );
2610 buffer.store(
2611 Complex {
2612 re: out16re,
2613 im: out16im,
2614 },
2615 16,
2616 );
2617 buffer.store(
2618 Complex {
2619 re: out17re,
2620 im: out17im,
2621 },
2622 17,
2623 );
2624 buffer.store(
2625 Complex {
2626 re: out18re,
2627 im: out18im,
2628 },
2629 18,
2630 );
2631 }
2632}
2633
2634pub struct Butterfly23<T> {
2635 twiddle1: Complex<T>,
2636 twiddle2: Complex<T>,
2637 twiddle3: Complex<T>,
2638 twiddle4: Complex<T>,
2639 twiddle5: Complex<T>,
2640 twiddle6: Complex<T>,
2641 twiddle7: Complex<T>,
2642 twiddle8: Complex<T>,
2643 twiddle9: Complex<T>,
2644 twiddle10: Complex<T>,
2645 twiddle11: Complex<T>,
2646 direction: FftDirection,
2647}
2648boilerplate_fft_butterfly!(Butterfly23, 23, |this: &Butterfly23<_>| this.direction);
2649impl<T: FftNum> Butterfly23<T> {
2650 pub fn new(direction: FftDirection) -> Self {
2651 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 23, direction);
2652 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 23, direction);
2653 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 23, direction);
2654 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 23, direction);
2655 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 23, direction);
2656 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 23, direction);
2657 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 23, direction);
2658 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 23, direction);
2659 let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 23, direction);
2660 let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 23, direction);
2661 let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 23, direction);
2662 Self {
2663 twiddle1,
2664 twiddle2,
2665 twiddle3,
2666 twiddle4,
2667 twiddle5,
2668 twiddle6,
2669 twiddle7,
2670 twiddle8,
2671 twiddle9,
2672 twiddle10,
2673 twiddle11,
2674 direction,
2675 }
2676 }
2677
2678 #[inline(never)]
2679 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
2680 let x122p = buffer.load(1) + buffer.load(22);
2684 let x122n = buffer.load(1) - buffer.load(22);
2685 let x221p = buffer.load(2) + buffer.load(21);
2686 let x221n = buffer.load(2) - buffer.load(21);
2687 let x320p = buffer.load(3) + buffer.load(20);
2688 let x320n = buffer.load(3) - buffer.load(20);
2689 let x419p = buffer.load(4) + buffer.load(19);
2690 let x419n = buffer.load(4) - buffer.load(19);
2691 let x518p = buffer.load(5) + buffer.load(18);
2692 let x518n = buffer.load(5) - buffer.load(18);
2693 let x617p = buffer.load(6) + buffer.load(17);
2694 let x617n = buffer.load(6) - buffer.load(17);
2695 let x716p = buffer.load(7) + buffer.load(16);
2696 let x716n = buffer.load(7) - buffer.load(16);
2697 let x815p = buffer.load(8) + buffer.load(15);
2698 let x815n = buffer.load(8) - buffer.load(15);
2699 let x914p = buffer.load(9) + buffer.load(14);
2700 let x914n = buffer.load(9) - buffer.load(14);
2701 let x1013p = buffer.load(10) + buffer.load(13);
2702 let x1013n = buffer.load(10) - buffer.load(13);
2703 let x1112p = buffer.load(11) + buffer.load(12);
2704 let x1112n = buffer.load(11) - buffer.load(12);
2705 let sum = buffer.load(0)
2706 + x122p
2707 + x221p
2708 + x320p
2709 + x419p
2710 + x518p
2711 + x617p
2712 + x716p
2713 + x815p
2714 + x914p
2715 + x1013p
2716 + x1112p;
2717 let b122re_a = buffer.load(0).re
2718 + self.twiddle1.re * x122p.re
2719 + self.twiddle2.re * x221p.re
2720 + self.twiddle3.re * x320p.re
2721 + self.twiddle4.re * x419p.re
2722 + self.twiddle5.re * x518p.re
2723 + self.twiddle6.re * x617p.re
2724 + self.twiddle7.re * x716p.re
2725 + self.twiddle8.re * x815p.re
2726 + self.twiddle9.re * x914p.re
2727 + self.twiddle10.re * x1013p.re
2728 + self.twiddle11.re * x1112p.re;
2729 let b122re_b = self.twiddle1.im * x122n.im
2730 + self.twiddle2.im * x221n.im
2731 + self.twiddle3.im * x320n.im
2732 + self.twiddle4.im * x419n.im
2733 + self.twiddle5.im * x518n.im
2734 + self.twiddle6.im * x617n.im
2735 + self.twiddle7.im * x716n.im
2736 + self.twiddle8.im * x815n.im
2737 + self.twiddle9.im * x914n.im
2738 + self.twiddle10.im * x1013n.im
2739 + self.twiddle11.im * x1112n.im;
2740 let b221re_a = buffer.load(0).re
2741 + self.twiddle2.re * x122p.re
2742 + self.twiddle4.re * x221p.re
2743 + self.twiddle6.re * x320p.re
2744 + self.twiddle8.re * x419p.re
2745 + self.twiddle10.re * x518p.re
2746 + self.twiddle11.re * x617p.re
2747 + self.twiddle9.re * x716p.re
2748 + self.twiddle7.re * x815p.re
2749 + self.twiddle5.re * x914p.re
2750 + self.twiddle3.re * x1013p.re
2751 + self.twiddle1.re * x1112p.re;
2752 let b221re_b = self.twiddle2.im * x122n.im
2753 + self.twiddle4.im * x221n.im
2754 + self.twiddle6.im * x320n.im
2755 + self.twiddle8.im * x419n.im
2756 + self.twiddle10.im * x518n.im
2757 + -self.twiddle11.im * x617n.im
2758 + -self.twiddle9.im * x716n.im
2759 + -self.twiddle7.im * x815n.im
2760 + -self.twiddle5.im * x914n.im
2761 + -self.twiddle3.im * x1013n.im
2762 + -self.twiddle1.im * x1112n.im;
2763 let b320re_a = buffer.load(0).re
2764 + self.twiddle3.re * x122p.re
2765 + self.twiddle6.re * x221p.re
2766 + self.twiddle9.re * x320p.re
2767 + self.twiddle11.re * x419p.re
2768 + self.twiddle8.re * x518p.re
2769 + self.twiddle5.re * x617p.re
2770 + self.twiddle2.re * x716p.re
2771 + self.twiddle1.re * x815p.re
2772 + self.twiddle4.re * x914p.re
2773 + self.twiddle7.re * x1013p.re
2774 + self.twiddle10.re * x1112p.re;
2775 let b320re_b = self.twiddle3.im * x122n.im
2776 + self.twiddle6.im * x221n.im
2777 + self.twiddle9.im * x320n.im
2778 + -self.twiddle11.im * x419n.im
2779 + -self.twiddle8.im * x518n.im
2780 + -self.twiddle5.im * x617n.im
2781 + -self.twiddle2.im * x716n.im
2782 + self.twiddle1.im * x815n.im
2783 + self.twiddle4.im * x914n.im
2784 + self.twiddle7.im * x1013n.im
2785 + self.twiddle10.im * x1112n.im;
2786 let b419re_a = buffer.load(0).re
2787 + self.twiddle4.re * x122p.re
2788 + self.twiddle8.re * x221p.re
2789 + self.twiddle11.re * x320p.re
2790 + self.twiddle7.re * x419p.re
2791 + self.twiddle3.re * x518p.re
2792 + self.twiddle1.re * x617p.re
2793 + self.twiddle5.re * x716p.re
2794 + self.twiddle9.re * x815p.re
2795 + self.twiddle10.re * x914p.re
2796 + self.twiddle6.re * x1013p.re
2797 + self.twiddle2.re * x1112p.re;
2798 let b419re_b = self.twiddle4.im * x122n.im
2799 + self.twiddle8.im * x221n.im
2800 + -self.twiddle11.im * x320n.im
2801 + -self.twiddle7.im * x419n.im
2802 + -self.twiddle3.im * x518n.im
2803 + self.twiddle1.im * x617n.im
2804 + self.twiddle5.im * x716n.im
2805 + self.twiddle9.im * x815n.im
2806 + -self.twiddle10.im * x914n.im
2807 + -self.twiddle6.im * x1013n.im
2808 + -self.twiddle2.im * x1112n.im;
2809 let b518re_a = buffer.load(0).re
2810 + self.twiddle5.re * x122p.re
2811 + self.twiddle10.re * x221p.re
2812 + self.twiddle8.re * x320p.re
2813 + self.twiddle3.re * x419p.re
2814 + self.twiddle2.re * x518p.re
2815 + self.twiddle7.re * x617p.re
2816 + self.twiddle11.re * x716p.re
2817 + self.twiddle6.re * x815p.re
2818 + self.twiddle1.re * x914p.re
2819 + self.twiddle4.re * x1013p.re
2820 + self.twiddle9.re * x1112p.re;
2821 let b518re_b = self.twiddle5.im * x122n.im
2822 + self.twiddle10.im * x221n.im
2823 + -self.twiddle8.im * x320n.im
2824 + -self.twiddle3.im * x419n.im
2825 + self.twiddle2.im * x518n.im
2826 + self.twiddle7.im * x617n.im
2827 + -self.twiddle11.im * x716n.im
2828 + -self.twiddle6.im * x815n.im
2829 + -self.twiddle1.im * x914n.im
2830 + self.twiddle4.im * x1013n.im
2831 + self.twiddle9.im * x1112n.im;
2832 let b617re_a = buffer.load(0).re
2833 + self.twiddle6.re * x122p.re
2834 + self.twiddle11.re * x221p.re
2835 + self.twiddle5.re * x320p.re
2836 + self.twiddle1.re * x419p.re
2837 + self.twiddle7.re * x518p.re
2838 + self.twiddle10.re * x617p.re
2839 + self.twiddle4.re * x716p.re
2840 + self.twiddle2.re * x815p.re
2841 + self.twiddle8.re * x914p.re
2842 + self.twiddle9.re * x1013p.re
2843 + self.twiddle3.re * x1112p.re;
2844 let b617re_b = self.twiddle6.im * x122n.im
2845 + -self.twiddle11.im * x221n.im
2846 + -self.twiddle5.im * x320n.im
2847 + self.twiddle1.im * x419n.im
2848 + self.twiddle7.im * x518n.im
2849 + -self.twiddle10.im * x617n.im
2850 + -self.twiddle4.im * x716n.im
2851 + self.twiddle2.im * x815n.im
2852 + self.twiddle8.im * x914n.im
2853 + -self.twiddle9.im * x1013n.im
2854 + -self.twiddle3.im * x1112n.im;
2855 let b716re_a = buffer.load(0).re
2856 + self.twiddle7.re * x122p.re
2857 + self.twiddle9.re * x221p.re
2858 + self.twiddle2.re * x320p.re
2859 + self.twiddle5.re * x419p.re
2860 + self.twiddle11.re * x518p.re
2861 + self.twiddle4.re * x617p.re
2862 + self.twiddle3.re * x716p.re
2863 + self.twiddle10.re * x815p.re
2864 + self.twiddle6.re * x914p.re
2865 + self.twiddle1.re * x1013p.re
2866 + self.twiddle8.re * x1112p.re;
2867 let b716re_b = self.twiddle7.im * x122n.im
2868 + -self.twiddle9.im * x221n.im
2869 + -self.twiddle2.im * x320n.im
2870 + self.twiddle5.im * x419n.im
2871 + -self.twiddle11.im * x518n.im
2872 + -self.twiddle4.im * x617n.im
2873 + self.twiddle3.im * x716n.im
2874 + self.twiddle10.im * x815n.im
2875 + -self.twiddle6.im * x914n.im
2876 + self.twiddle1.im * x1013n.im
2877 + self.twiddle8.im * x1112n.im;
2878 let b815re_a = buffer.load(0).re
2879 + self.twiddle8.re * x122p.re
2880 + self.twiddle7.re * x221p.re
2881 + self.twiddle1.re * x320p.re
2882 + self.twiddle9.re * x419p.re
2883 + self.twiddle6.re * x518p.re
2884 + self.twiddle2.re * x617p.re
2885 + self.twiddle10.re * x716p.re
2886 + self.twiddle5.re * x815p.re
2887 + self.twiddle3.re * x914p.re
2888 + self.twiddle11.re * x1013p.re
2889 + self.twiddle4.re * x1112p.re;
2890 let b815re_b = self.twiddle8.im * x122n.im
2891 + -self.twiddle7.im * x221n.im
2892 + self.twiddle1.im * x320n.im
2893 + self.twiddle9.im * x419n.im
2894 + -self.twiddle6.im * x518n.im
2895 + self.twiddle2.im * x617n.im
2896 + self.twiddle10.im * x716n.im
2897 + -self.twiddle5.im * x815n.im
2898 + self.twiddle3.im * x914n.im
2899 + self.twiddle11.im * x1013n.im
2900 + -self.twiddle4.im * x1112n.im;
2901 let b914re_a = buffer.load(0).re
2902 + self.twiddle9.re * x122p.re
2903 + self.twiddle5.re * x221p.re
2904 + self.twiddle4.re * x320p.re
2905 + self.twiddle10.re * x419p.re
2906 + self.twiddle1.re * x518p.re
2907 + self.twiddle8.re * x617p.re
2908 + self.twiddle6.re * x716p.re
2909 + self.twiddle3.re * x815p.re
2910 + self.twiddle11.re * x914p.re
2911 + self.twiddle2.re * x1013p.re
2912 + self.twiddle7.re * x1112p.re;
2913 let b914re_b = self.twiddle9.im * x122n.im
2914 + -self.twiddle5.im * x221n.im
2915 + self.twiddle4.im * x320n.im
2916 + -self.twiddle10.im * x419n.im
2917 + -self.twiddle1.im * x518n.im
2918 + self.twiddle8.im * x617n.im
2919 + -self.twiddle6.im * x716n.im
2920 + self.twiddle3.im * x815n.im
2921 + -self.twiddle11.im * x914n.im
2922 + -self.twiddle2.im * x1013n.im
2923 + self.twiddle7.im * x1112n.im;
2924 let b1013re_a = buffer.load(0).re
2925 + self.twiddle10.re * x122p.re
2926 + self.twiddle3.re * x221p.re
2927 + self.twiddle7.re * x320p.re
2928 + self.twiddle6.re * x419p.re
2929 + self.twiddle4.re * x518p.re
2930 + self.twiddle9.re * x617p.re
2931 + self.twiddle1.re * x716p.re
2932 + self.twiddle11.re * x815p.re
2933 + self.twiddle2.re * x914p.re
2934 + self.twiddle8.re * x1013p.re
2935 + self.twiddle5.re * x1112p.re;
2936 let b1013re_b = self.twiddle10.im * x122n.im
2937 + -self.twiddle3.im * x221n.im
2938 + self.twiddle7.im * x320n.im
2939 + -self.twiddle6.im * x419n.im
2940 + self.twiddle4.im * x518n.im
2941 + -self.twiddle9.im * x617n.im
2942 + self.twiddle1.im * x716n.im
2943 + self.twiddle11.im * x815n.im
2944 + -self.twiddle2.im * x914n.im
2945 + self.twiddle8.im * x1013n.im
2946 + -self.twiddle5.im * x1112n.im;
2947 let b1112re_a = buffer.load(0).re
2948 + self.twiddle11.re * x122p.re
2949 + self.twiddle1.re * x221p.re
2950 + self.twiddle10.re * x320p.re
2951 + self.twiddle2.re * x419p.re
2952 + self.twiddle9.re * x518p.re
2953 + self.twiddle3.re * x617p.re
2954 + self.twiddle8.re * x716p.re
2955 + self.twiddle4.re * x815p.re
2956 + self.twiddle7.re * x914p.re
2957 + self.twiddle5.re * x1013p.re
2958 + self.twiddle6.re * x1112p.re;
2959 let b1112re_b = self.twiddle11.im * x122n.im
2960 + -self.twiddle1.im * x221n.im
2961 + self.twiddle10.im * x320n.im
2962 + -self.twiddle2.im * x419n.im
2963 + self.twiddle9.im * x518n.im
2964 + -self.twiddle3.im * x617n.im
2965 + self.twiddle8.im * x716n.im
2966 + -self.twiddle4.im * x815n.im
2967 + self.twiddle7.im * x914n.im
2968 + -self.twiddle5.im * x1013n.im
2969 + self.twiddle6.im * x1112n.im;
2970
2971 let b122im_a = buffer.load(0).im
2972 + self.twiddle1.re * x122p.im
2973 + self.twiddle2.re * x221p.im
2974 + self.twiddle3.re * x320p.im
2975 + self.twiddle4.re * x419p.im
2976 + self.twiddle5.re * x518p.im
2977 + self.twiddle6.re * x617p.im
2978 + self.twiddle7.re * x716p.im
2979 + self.twiddle8.re * x815p.im
2980 + self.twiddle9.re * x914p.im
2981 + self.twiddle10.re * x1013p.im
2982 + self.twiddle11.re * x1112p.im;
2983 let b122im_b = self.twiddle1.im * x122n.re
2984 + self.twiddle2.im * x221n.re
2985 + self.twiddle3.im * x320n.re
2986 + self.twiddle4.im * x419n.re
2987 + self.twiddle5.im * x518n.re
2988 + self.twiddle6.im * x617n.re
2989 + self.twiddle7.im * x716n.re
2990 + self.twiddle8.im * x815n.re
2991 + self.twiddle9.im * x914n.re
2992 + self.twiddle10.im * x1013n.re
2993 + self.twiddle11.im * x1112n.re;
2994 let b221im_a = buffer.load(0).im
2995 + self.twiddle2.re * x122p.im
2996 + self.twiddle4.re * x221p.im
2997 + self.twiddle6.re * x320p.im
2998 + self.twiddle8.re * x419p.im
2999 + self.twiddle10.re * x518p.im
3000 + self.twiddle11.re * x617p.im
3001 + self.twiddle9.re * x716p.im
3002 + self.twiddle7.re * x815p.im
3003 + self.twiddle5.re * x914p.im
3004 + self.twiddle3.re * x1013p.im
3005 + self.twiddle1.re * x1112p.im;
3006 let b221im_b = self.twiddle2.im * x122n.re
3007 + self.twiddle4.im * x221n.re
3008 + self.twiddle6.im * x320n.re
3009 + self.twiddle8.im * x419n.re
3010 + self.twiddle10.im * x518n.re
3011 + -self.twiddle11.im * x617n.re
3012 + -self.twiddle9.im * x716n.re
3013 + -self.twiddle7.im * x815n.re
3014 + -self.twiddle5.im * x914n.re
3015 + -self.twiddle3.im * x1013n.re
3016 + -self.twiddle1.im * x1112n.re;
3017 let b320im_a = buffer.load(0).im
3018 + self.twiddle3.re * x122p.im
3019 + self.twiddle6.re * x221p.im
3020 + self.twiddle9.re * x320p.im
3021 + self.twiddle11.re * x419p.im
3022 + self.twiddle8.re * x518p.im
3023 + self.twiddle5.re * x617p.im
3024 + self.twiddle2.re * x716p.im
3025 + self.twiddle1.re * x815p.im
3026 + self.twiddle4.re * x914p.im
3027 + self.twiddle7.re * x1013p.im
3028 + self.twiddle10.re * x1112p.im;
3029 let b320im_b = self.twiddle3.im * x122n.re
3030 + self.twiddle6.im * x221n.re
3031 + self.twiddle9.im * x320n.re
3032 + -self.twiddle11.im * x419n.re
3033 + -self.twiddle8.im * x518n.re
3034 + -self.twiddle5.im * x617n.re
3035 + -self.twiddle2.im * x716n.re
3036 + self.twiddle1.im * x815n.re
3037 + self.twiddle4.im * x914n.re
3038 + self.twiddle7.im * x1013n.re
3039 + self.twiddle10.im * x1112n.re;
3040 let b419im_a = buffer.load(0).im
3041 + self.twiddle4.re * x122p.im
3042 + self.twiddle8.re * x221p.im
3043 + self.twiddle11.re * x320p.im
3044 + self.twiddle7.re * x419p.im
3045 + self.twiddle3.re * x518p.im
3046 + self.twiddle1.re * x617p.im
3047 + self.twiddle5.re * x716p.im
3048 + self.twiddle9.re * x815p.im
3049 + self.twiddle10.re * x914p.im
3050 + self.twiddle6.re * x1013p.im
3051 + self.twiddle2.re * x1112p.im;
3052 let b419im_b = self.twiddle4.im * x122n.re
3053 + self.twiddle8.im * x221n.re
3054 + -self.twiddle11.im * x320n.re
3055 + -self.twiddle7.im * x419n.re
3056 + -self.twiddle3.im * x518n.re
3057 + self.twiddle1.im * x617n.re
3058 + self.twiddle5.im * x716n.re
3059 + self.twiddle9.im * x815n.re
3060 + -self.twiddle10.im * x914n.re
3061 + -self.twiddle6.im * x1013n.re
3062 + -self.twiddle2.im * x1112n.re;
3063 let b518im_a = buffer.load(0).im
3064 + self.twiddle5.re * x122p.im
3065 + self.twiddle10.re * x221p.im
3066 + self.twiddle8.re * x320p.im
3067 + self.twiddle3.re * x419p.im
3068 + self.twiddle2.re * x518p.im
3069 + self.twiddle7.re * x617p.im
3070 + self.twiddle11.re * x716p.im
3071 + self.twiddle6.re * x815p.im
3072 + self.twiddle1.re * x914p.im
3073 + self.twiddle4.re * x1013p.im
3074 + self.twiddle9.re * x1112p.im;
3075 let b518im_b = self.twiddle5.im * x122n.re
3076 + self.twiddle10.im * x221n.re
3077 + -self.twiddle8.im * x320n.re
3078 + -self.twiddle3.im * x419n.re
3079 + self.twiddle2.im * x518n.re
3080 + self.twiddle7.im * x617n.re
3081 + -self.twiddle11.im * x716n.re
3082 + -self.twiddle6.im * x815n.re
3083 + -self.twiddle1.im * x914n.re
3084 + self.twiddle4.im * x1013n.re
3085 + self.twiddle9.im * x1112n.re;
3086 let b617im_a = buffer.load(0).im
3087 + self.twiddle6.re * x122p.im
3088 + self.twiddle11.re * x221p.im
3089 + self.twiddle5.re * x320p.im
3090 + self.twiddle1.re * x419p.im
3091 + self.twiddle7.re * x518p.im
3092 + self.twiddle10.re * x617p.im
3093 + self.twiddle4.re * x716p.im
3094 + self.twiddle2.re * x815p.im
3095 + self.twiddle8.re * x914p.im
3096 + self.twiddle9.re * x1013p.im
3097 + self.twiddle3.re * x1112p.im;
3098 let b617im_b = self.twiddle6.im * x122n.re
3099 + -self.twiddle11.im * x221n.re
3100 + -self.twiddle5.im * x320n.re
3101 + self.twiddle1.im * x419n.re
3102 + self.twiddle7.im * x518n.re
3103 + -self.twiddle10.im * x617n.re
3104 + -self.twiddle4.im * x716n.re
3105 + self.twiddle2.im * x815n.re
3106 + self.twiddle8.im * x914n.re
3107 + -self.twiddle9.im * x1013n.re
3108 + -self.twiddle3.im * x1112n.re;
3109 let b716im_a = buffer.load(0).im
3110 + self.twiddle7.re * x122p.im
3111 + self.twiddle9.re * x221p.im
3112 + self.twiddle2.re * x320p.im
3113 + self.twiddle5.re * x419p.im
3114 + self.twiddle11.re * x518p.im
3115 + self.twiddle4.re * x617p.im
3116 + self.twiddle3.re * x716p.im
3117 + self.twiddle10.re * x815p.im
3118 + self.twiddle6.re * x914p.im
3119 + self.twiddle1.re * x1013p.im
3120 + self.twiddle8.re * x1112p.im;
3121 let b716im_b = self.twiddle7.im * x122n.re
3122 + -self.twiddle9.im * x221n.re
3123 + -self.twiddle2.im * x320n.re
3124 + self.twiddle5.im * x419n.re
3125 + -self.twiddle11.im * x518n.re
3126 + -self.twiddle4.im * x617n.re
3127 + self.twiddle3.im * x716n.re
3128 + self.twiddle10.im * x815n.re
3129 + -self.twiddle6.im * x914n.re
3130 + self.twiddle1.im * x1013n.re
3131 + self.twiddle8.im * x1112n.re;
3132 let b815im_a = buffer.load(0).im
3133 + self.twiddle8.re * x122p.im
3134 + self.twiddle7.re * x221p.im
3135 + self.twiddle1.re * x320p.im
3136 + self.twiddle9.re * x419p.im
3137 + self.twiddle6.re * x518p.im
3138 + self.twiddle2.re * x617p.im
3139 + self.twiddle10.re * x716p.im
3140 + self.twiddle5.re * x815p.im
3141 + self.twiddle3.re * x914p.im
3142 + self.twiddle11.re * x1013p.im
3143 + self.twiddle4.re * x1112p.im;
3144 let b815im_b = self.twiddle8.im * x122n.re
3145 + -self.twiddle7.im * x221n.re
3146 + self.twiddle1.im * x320n.re
3147 + self.twiddle9.im * x419n.re
3148 + -self.twiddle6.im * x518n.re
3149 + self.twiddle2.im * x617n.re
3150 + self.twiddle10.im * x716n.re
3151 + -self.twiddle5.im * x815n.re
3152 + self.twiddle3.im * x914n.re
3153 + self.twiddle11.im * x1013n.re
3154 + -self.twiddle4.im * x1112n.re;
3155 let b914im_a = buffer.load(0).im
3156 + self.twiddle9.re * x122p.im
3157 + self.twiddle5.re * x221p.im
3158 + self.twiddle4.re * x320p.im
3159 + self.twiddle10.re * x419p.im
3160 + self.twiddle1.re * x518p.im
3161 + self.twiddle8.re * x617p.im
3162 + self.twiddle6.re * x716p.im
3163 + self.twiddle3.re * x815p.im
3164 + self.twiddle11.re * x914p.im
3165 + self.twiddle2.re * x1013p.im
3166 + self.twiddle7.re * x1112p.im;
3167 let b914im_b = self.twiddle9.im * x122n.re
3168 + -self.twiddle5.im * x221n.re
3169 + self.twiddle4.im * x320n.re
3170 + -self.twiddle10.im * x419n.re
3171 + -self.twiddle1.im * x518n.re
3172 + self.twiddle8.im * x617n.re
3173 + -self.twiddle6.im * x716n.re
3174 + self.twiddle3.im * x815n.re
3175 + -self.twiddle11.im * x914n.re
3176 + -self.twiddle2.im * x1013n.re
3177 + self.twiddle7.im * x1112n.re;
3178 let b1013im_a = buffer.load(0).im
3179 + self.twiddle10.re * x122p.im
3180 + self.twiddle3.re * x221p.im
3181 + self.twiddle7.re * x320p.im
3182 + self.twiddle6.re * x419p.im
3183 + self.twiddle4.re * x518p.im
3184 + self.twiddle9.re * x617p.im
3185 + self.twiddle1.re * x716p.im
3186 + self.twiddle11.re * x815p.im
3187 + self.twiddle2.re * x914p.im
3188 + self.twiddle8.re * x1013p.im
3189 + self.twiddle5.re * x1112p.im;
3190 let b1013im_b = self.twiddle10.im * x122n.re
3191 + -self.twiddle3.im * x221n.re
3192 + self.twiddle7.im * x320n.re
3193 + -self.twiddle6.im * x419n.re
3194 + self.twiddle4.im * x518n.re
3195 + -self.twiddle9.im * x617n.re
3196 + self.twiddle1.im * x716n.re
3197 + self.twiddle11.im * x815n.re
3198 + -self.twiddle2.im * x914n.re
3199 + self.twiddle8.im * x1013n.re
3200 + -self.twiddle5.im * x1112n.re;
3201 let b1112im_a = buffer.load(0).im
3202 + self.twiddle11.re * x122p.im
3203 + self.twiddle1.re * x221p.im
3204 + self.twiddle10.re * x320p.im
3205 + self.twiddle2.re * x419p.im
3206 + self.twiddle9.re * x518p.im
3207 + self.twiddle3.re * x617p.im
3208 + self.twiddle8.re * x716p.im
3209 + self.twiddle4.re * x815p.im
3210 + self.twiddle7.re * x914p.im
3211 + self.twiddle5.re * x1013p.im
3212 + self.twiddle6.re * x1112p.im;
3213 let b1112im_b = self.twiddle11.im * x122n.re
3214 + -self.twiddle1.im * x221n.re
3215 + self.twiddle10.im * x320n.re
3216 + -self.twiddle2.im * x419n.re
3217 + self.twiddle9.im * x518n.re
3218 + -self.twiddle3.im * x617n.re
3219 + self.twiddle8.im * x716n.re
3220 + -self.twiddle4.im * x815n.re
3221 + self.twiddle7.im * x914n.re
3222 + -self.twiddle5.im * x1013n.re
3223 + self.twiddle6.im * x1112n.re;
3224
3225 let out1re = b122re_a - b122re_b;
3226 let out1im = b122im_a + b122im_b;
3227 let out2re = b221re_a - b221re_b;
3228 let out2im = b221im_a + b221im_b;
3229 let out3re = b320re_a - b320re_b;
3230 let out3im = b320im_a + b320im_b;
3231 let out4re = b419re_a - b419re_b;
3232 let out4im = b419im_a + b419im_b;
3233 let out5re = b518re_a - b518re_b;
3234 let out5im = b518im_a + b518im_b;
3235 let out6re = b617re_a - b617re_b;
3236 let out6im = b617im_a + b617im_b;
3237 let out7re = b716re_a - b716re_b;
3238 let out7im = b716im_a + b716im_b;
3239 let out8re = b815re_a - b815re_b;
3240 let out8im = b815im_a + b815im_b;
3241 let out9re = b914re_a - b914re_b;
3242 let out9im = b914im_a + b914im_b;
3243 let out10re = b1013re_a - b1013re_b;
3244 let out10im = b1013im_a + b1013im_b;
3245 let out11re = b1112re_a - b1112re_b;
3246 let out11im = b1112im_a + b1112im_b;
3247 let out12re = b1112re_a + b1112re_b;
3248 let out12im = b1112im_a - b1112im_b;
3249 let out13re = b1013re_a + b1013re_b;
3250 let out13im = b1013im_a - b1013im_b;
3251 let out14re = b914re_a + b914re_b;
3252 let out14im = b914im_a - b914im_b;
3253 let out15re = b815re_a + b815re_b;
3254 let out15im = b815im_a - b815im_b;
3255 let out16re = b716re_a + b716re_b;
3256 let out16im = b716im_a - b716im_b;
3257 let out17re = b617re_a + b617re_b;
3258 let out17im = b617im_a - b617im_b;
3259 let out18re = b518re_a + b518re_b;
3260 let out18im = b518im_a - b518im_b;
3261 let out19re = b419re_a + b419re_b;
3262 let out19im = b419im_a - b419im_b;
3263 let out20re = b320re_a + b320re_b;
3264 let out20im = b320im_a - b320im_b;
3265 let out21re = b221re_a + b221re_b;
3266 let out21im = b221im_a - b221im_b;
3267 let out22re = b122re_a + b122re_b;
3268 let out22im = b122im_a - b122im_b;
3269 buffer.store(sum, 0);
3270 buffer.store(
3271 Complex {
3272 re: out1re,
3273 im: out1im,
3274 },
3275 1,
3276 );
3277 buffer.store(
3278 Complex {
3279 re: out2re,
3280 im: out2im,
3281 },
3282 2,
3283 );
3284 buffer.store(
3285 Complex {
3286 re: out3re,
3287 im: out3im,
3288 },
3289 3,
3290 );
3291 buffer.store(
3292 Complex {
3293 re: out4re,
3294 im: out4im,
3295 },
3296 4,
3297 );
3298 buffer.store(
3299 Complex {
3300 re: out5re,
3301 im: out5im,
3302 },
3303 5,
3304 );
3305 buffer.store(
3306 Complex {
3307 re: out6re,
3308 im: out6im,
3309 },
3310 6,
3311 );
3312 buffer.store(
3313 Complex {
3314 re: out7re,
3315 im: out7im,
3316 },
3317 7,
3318 );
3319 buffer.store(
3320 Complex {
3321 re: out8re,
3322 im: out8im,
3323 },
3324 8,
3325 );
3326 buffer.store(
3327 Complex {
3328 re: out9re,
3329 im: out9im,
3330 },
3331 9,
3332 );
3333 buffer.store(
3334 Complex {
3335 re: out10re,
3336 im: out10im,
3337 },
3338 10,
3339 );
3340 buffer.store(
3341 Complex {
3342 re: out11re,
3343 im: out11im,
3344 },
3345 11,
3346 );
3347 buffer.store(
3348 Complex {
3349 re: out12re,
3350 im: out12im,
3351 },
3352 12,
3353 );
3354 buffer.store(
3355 Complex {
3356 re: out13re,
3357 im: out13im,
3358 },
3359 13,
3360 );
3361 buffer.store(
3362 Complex {
3363 re: out14re,
3364 im: out14im,
3365 },
3366 14,
3367 );
3368 buffer.store(
3369 Complex {
3370 re: out15re,
3371 im: out15im,
3372 },
3373 15,
3374 );
3375 buffer.store(
3376 Complex {
3377 re: out16re,
3378 im: out16im,
3379 },
3380 16,
3381 );
3382 buffer.store(
3383 Complex {
3384 re: out17re,
3385 im: out17im,
3386 },
3387 17,
3388 );
3389 buffer.store(
3390 Complex {
3391 re: out18re,
3392 im: out18im,
3393 },
3394 18,
3395 );
3396 buffer.store(
3397 Complex {
3398 re: out19re,
3399 im: out19im,
3400 },
3401 19,
3402 );
3403 buffer.store(
3404 Complex {
3405 re: out20re,
3406 im: out20im,
3407 },
3408 20,
3409 );
3410 buffer.store(
3411 Complex {
3412 re: out21re,
3413 im: out21im,
3414 },
3415 21,
3416 );
3417 buffer.store(
3418 Complex {
3419 re: out22re,
3420 im: out22im,
3421 },
3422 22,
3423 );
3424 }
3425}
3426
3427pub struct Butterfly24<T> {
3428 butterfly4: Butterfly4<T>,
3429 butterfly6: Butterfly6<T>,
3430 twiddle1: Complex<T>,
3431 twiddle2: Complex<T>,
3432 twiddle4: Complex<T>,
3433 twiddle5: Complex<T>,
3434 twiddle8: Complex<T>,
3435 twiddle10: Complex<T>,
3436 root2: T,
3437}
3438boilerplate_fft_butterfly!(Butterfly24, 24, |this: &Butterfly24<_>| this
3439 .butterfly4
3440 .fft_direction());
3441impl<T: FftNum> Butterfly24<T> {
3442 #[inline(always)]
3443 pub fn new(direction: FftDirection) -> Self {
3444 Self {
3445 butterfly4: Butterfly4::new(direction),
3446 butterfly6: Butterfly6::new(direction),
3447 twiddle1: twiddles::compute_twiddle(1, 24, direction),
3448 twiddle2: twiddles::compute_twiddle(2, 24, direction),
3449 twiddle4: twiddles::compute_twiddle(4, 24, direction),
3450 twiddle5: twiddles::compute_twiddle(5, 24, direction),
3451 twiddle8: twiddles::compute_twiddle(8, 24, direction),
3452 twiddle10: twiddles::compute_twiddle(10, 24, direction),
3453 root2: T::from_f64(0.5f64.sqrt()).unwrap(),
3454 }
3455 }
3456 #[inline(never)]
3457 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3458 let mut scratch0 = [
3461 buffer.load(0),
3462 buffer.load(4),
3463 buffer.load(8),
3464 buffer.load(12),
3465 buffer.load(16),
3466 buffer.load(20),
3467 ];
3468 let mut scratch1 = [
3469 buffer.load(1),
3470 buffer.load(5),
3471 buffer.load(9),
3472 buffer.load(13),
3473 buffer.load(17),
3474 buffer.load(21),
3475 ];
3476 let mut scratch2 = [
3477 buffer.load(2),
3478 buffer.load(6),
3479 buffer.load(10),
3480 buffer.load(14),
3481 buffer.load(18),
3482 buffer.load(22),
3483 ];
3484 let mut scratch3 = [
3485 buffer.load(3),
3486 buffer.load(7),
3487 buffer.load(11),
3488 buffer.load(15),
3489 buffer.load(19),
3490 buffer.load(23),
3491 ];
3492
3493 self.butterfly6.perform_fft_contiguous(&mut scratch0);
3495 self.butterfly6.perform_fft_contiguous(&mut scratch1);
3496 self.butterfly6.perform_fft_contiguous(&mut scratch2);
3497 self.butterfly6.perform_fft_contiguous(&mut scratch3);
3498
3499 scratch1[1] = scratch1[1] * self.twiddle1;
3501 scratch1[2] = scratch1[2] * self.twiddle2;
3502 scratch1[3] =
3503 (twiddles::rotate_90(scratch1[3], self.fft_direction()) + scratch1[3]) * self.root2;
3504 scratch1[4] = scratch1[4] * self.twiddle4;
3505 scratch1[5] = scratch1[5] * self.twiddle5;
3506 scratch2[1] = scratch2[1] * self.twiddle2;
3507 scratch2[2] = scratch2[2] * self.twiddle4;
3508 scratch2[3] = twiddles::rotate_90(scratch2[3], self.fft_direction());
3509 scratch2[4] = scratch2[4] * self.twiddle8;
3510 scratch2[5] = scratch2[5] * self.twiddle10;
3511 scratch3[1] =
3512 (twiddles::rotate_90(scratch3[1], self.fft_direction()) + scratch3[1]) * self.root2;
3513 scratch3[2] = twiddles::rotate_90(scratch3[2], self.fft_direction());
3514 scratch3[3] =
3515 (twiddles::rotate_90(scratch3[3], self.fft_direction()) - scratch3[3]) * self.root2;
3516 scratch3[4] = -scratch3[4];
3517 scratch3[5] =
3518 (twiddles::rotate_90(scratch3[5], self.fft_direction()) + scratch3[5]) * -self.root2;
3519
3520 self.butterfly4.perform_fft_strided(
3524 &mut scratch0[0],
3525 &mut scratch1[0],
3526 &mut scratch2[0],
3527 &mut scratch3[0],
3528 );
3529 self.butterfly4.perform_fft_strided(
3530 &mut scratch0[1],
3531 &mut scratch1[1],
3532 &mut scratch2[1],
3533 &mut scratch3[1],
3534 );
3535 self.butterfly4.perform_fft_strided(
3536 &mut scratch0[2],
3537 &mut scratch1[2],
3538 &mut scratch2[2],
3539 &mut scratch3[2],
3540 );
3541 self.butterfly4.perform_fft_strided(
3542 &mut scratch0[3],
3543 &mut scratch1[3],
3544 &mut scratch2[3],
3545 &mut scratch3[3],
3546 );
3547 self.butterfly4.perform_fft_strided(
3548 &mut scratch0[4],
3549 &mut scratch1[4],
3550 &mut scratch2[4],
3551 &mut scratch3[4],
3552 );
3553 self.butterfly4.perform_fft_strided(
3554 &mut scratch0[5],
3555 &mut scratch1[5],
3556 &mut scratch2[5],
3557 &mut scratch3[5],
3558 );
3559
3560 buffer.store(scratch0[0], 0);
3562 buffer.store(scratch0[1], 1);
3563 buffer.store(scratch0[2], 2);
3564 buffer.store(scratch0[3], 3);
3565 buffer.store(scratch0[4], 4);
3566 buffer.store(scratch0[5], 5);
3567 buffer.store(scratch1[0], 6);
3568 buffer.store(scratch1[1], 7);
3569 buffer.store(scratch1[2], 8);
3570 buffer.store(scratch1[3], 9);
3571 buffer.store(scratch1[4], 10);
3572 buffer.store(scratch1[5], 11);
3573 buffer.store(scratch2[0], 12);
3574 buffer.store(scratch2[1], 13);
3575 buffer.store(scratch2[2], 14);
3576 buffer.store(scratch2[3], 15);
3577 buffer.store(scratch2[4], 16);
3578 buffer.store(scratch2[5], 17);
3579 buffer.store(scratch3[0], 18);
3580 buffer.store(scratch3[1], 19);
3581 buffer.store(scratch3[2], 20);
3582 buffer.store(scratch3[3], 21);
3583 buffer.store(scratch3[4], 22);
3584 buffer.store(scratch3[5], 23);
3585 }
3586}
3587
3588pub struct Butterfly27<T> {
3589 butterfly9: Butterfly9<T>,
3590 twiddles: [Complex<T>; 12],
3591}
3592boilerplate_fft_butterfly!(Butterfly27, 27, |this: &Butterfly27<_>| this
3593 .butterfly9
3594 .fft_direction());
3595impl<T: FftNum> Butterfly27<T> {
3596 #[inline(always)]
3597 pub fn new(direction: FftDirection) -> Self {
3598 Self {
3599 butterfly9: Butterfly9::new(direction),
3600 twiddles: [
3601 twiddles::compute_twiddle(1, 27, direction),
3602 twiddles::compute_twiddle(2, 27, direction),
3603 twiddles::compute_twiddle(3, 27, direction),
3604 twiddles::compute_twiddle(4, 27, direction),
3605 twiddles::compute_twiddle(5, 27, direction),
3606 twiddles::compute_twiddle(6, 27, direction),
3607 twiddles::compute_twiddle(7, 27, direction),
3608 twiddles::compute_twiddle(8, 27, direction),
3609 twiddles::compute_twiddle(10, 27, direction),
3610 twiddles::compute_twiddle(12, 27, direction),
3611 twiddles::compute_twiddle(14, 27, direction),
3612 twiddles::compute_twiddle(16, 27, direction),
3613 ],
3614 }
3615 }
3616
3617 #[inline(always)]
3618 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3619 let mut scratch0 = [
3623 buffer.load(0),
3624 buffer.load(3),
3625 buffer.load(6),
3626 buffer.load(9),
3627 buffer.load(12),
3628 buffer.load(15),
3629 buffer.load(18),
3630 buffer.load(21),
3631 buffer.load(24),
3632 ];
3633 let mut scratch1 = [
3634 buffer.load(1 + 0),
3635 buffer.load(1 + 3),
3636 buffer.load(1 + 6),
3637 buffer.load(1 + 9),
3638 buffer.load(1 + 12),
3639 buffer.load(1 + 15),
3640 buffer.load(1 + 18),
3641 buffer.load(1 + 21),
3642 buffer.load(1 + 24),
3643 ];
3644 let mut scratch2 = [
3645 buffer.load(2 + 0),
3646 buffer.load(2 + 3),
3647 buffer.load(2 + 6),
3648 buffer.load(2 + 9),
3649 buffer.load(2 + 12),
3650 buffer.load(2 + 15),
3651 buffer.load(2 + 18),
3652 buffer.load(2 + 21),
3653 buffer.load(2 + 24),
3654 ];
3655
3656 self.butterfly9.perform_fft_contiguous(&mut scratch0);
3658 self.butterfly9.perform_fft_contiguous(&mut scratch1);
3659 self.butterfly9.perform_fft_contiguous(&mut scratch2);
3660
3661 scratch1[1] = scratch1[1] * self.twiddles[0];
3663 scratch1[2] = scratch1[2] * self.twiddles[1];
3664 scratch1[3] = scratch1[3] * self.twiddles[2];
3665 scratch1[4] = scratch1[4] * self.twiddles[3];
3666 scratch1[5] = scratch1[5] * self.twiddles[4];
3667 scratch1[6] = scratch1[6] * self.twiddles[5];
3668 scratch1[7] = scratch1[7] * self.twiddles[6];
3669 scratch1[8] = scratch1[8] * self.twiddles[7];
3670 scratch2[1] = scratch2[1] * self.twiddles[1];
3671 scratch2[2] = scratch2[2] * self.twiddles[3];
3672 scratch2[3] = scratch2[3] * self.twiddles[5];
3673 scratch2[4] = scratch2[4] * self.twiddles[7];
3674 scratch2[5] = scratch2[5] * self.twiddles[8];
3675 scratch2[6] = scratch2[6] * self.twiddles[9];
3676 scratch2[7] = scratch2[7] * self.twiddles[10];
3677 scratch2[8] = scratch2[8] * self.twiddles[11];
3678
3679 self.butterfly9.butterfly3.perform_fft_strided(
3683 &mut scratch0[0],
3684 &mut scratch1[0],
3685 &mut scratch2[0],
3686 );
3687 self.butterfly9.butterfly3.perform_fft_strided(
3688 &mut scratch0[1],
3689 &mut scratch1[1],
3690 &mut scratch2[1],
3691 );
3692 self.butterfly9.butterfly3.perform_fft_strided(
3693 &mut scratch0[2],
3694 &mut scratch1[2],
3695 &mut scratch2[2],
3696 );
3697 self.butterfly9.butterfly3.perform_fft_strided(
3698 &mut scratch0[3],
3699 &mut scratch1[3],
3700 &mut scratch2[3],
3701 );
3702 self.butterfly9.butterfly3.perform_fft_strided(
3703 &mut scratch0[4],
3704 &mut scratch1[4],
3705 &mut scratch2[4],
3706 );
3707 self.butterfly9.butterfly3.perform_fft_strided(
3708 &mut scratch0[5],
3709 &mut scratch1[5],
3710 &mut scratch2[5],
3711 );
3712 self.butterfly9.butterfly3.perform_fft_strided(
3713 &mut scratch0[6],
3714 &mut scratch1[6],
3715 &mut scratch2[6],
3716 );
3717 self.butterfly9.butterfly3.perform_fft_strided(
3718 &mut scratch0[7],
3719 &mut scratch1[7],
3720 &mut scratch2[7],
3721 );
3722 self.butterfly9.butterfly3.perform_fft_strided(
3723 &mut scratch0[8],
3724 &mut scratch1[8],
3725 &mut scratch2[8],
3726 );
3727
3728 buffer.store(scratch0[0], 0);
3730 buffer.store(scratch0[1], 1);
3731 buffer.store(scratch0[2], 2);
3732 buffer.store(scratch0[3], 3);
3733 buffer.store(scratch0[4], 4);
3734 buffer.store(scratch0[5], 5);
3735 buffer.store(scratch0[6], 6);
3736 buffer.store(scratch0[7], 7);
3737 buffer.store(scratch0[8], 8);
3738
3739 buffer.store(scratch1[0], 9 + 0);
3740 buffer.store(scratch1[1], 9 + 1);
3741 buffer.store(scratch1[2], 9 + 2);
3742 buffer.store(scratch1[3], 9 + 3);
3743 buffer.store(scratch1[4], 9 + 4);
3744 buffer.store(scratch1[5], 9 + 5);
3745 buffer.store(scratch1[6], 9 + 6);
3746 buffer.store(scratch1[7], 9 + 7);
3747 buffer.store(scratch1[8], 9 + 8);
3748
3749 buffer.store(scratch2[0], 18 + 0);
3750 buffer.store(scratch2[1], 18 + 1);
3751 buffer.store(scratch2[2], 18 + 2);
3752 buffer.store(scratch2[3], 18 + 3);
3753 buffer.store(scratch2[4], 18 + 4);
3754 buffer.store(scratch2[5], 18 + 5);
3755 buffer.store(scratch2[6], 18 + 6);
3756 buffer.store(scratch2[7], 18 + 7);
3757 buffer.store(scratch2[8], 18 + 8);
3758 }
3759}
3760
3761pub struct Butterfly29<T> {
3762 twiddle1: Complex<T>,
3763 twiddle2: Complex<T>,
3764 twiddle3: Complex<T>,
3765 twiddle4: Complex<T>,
3766 twiddle5: Complex<T>,
3767 twiddle6: Complex<T>,
3768 twiddle7: Complex<T>,
3769 twiddle8: Complex<T>,
3770 twiddle9: Complex<T>,
3771 twiddle10: Complex<T>,
3772 twiddle11: Complex<T>,
3773 twiddle12: Complex<T>,
3774 twiddle13: Complex<T>,
3775 twiddle14: Complex<T>,
3776 direction: FftDirection,
3777}
3778boilerplate_fft_butterfly!(Butterfly29, 29, |this: &Butterfly29<_>| this.direction);
3779impl<T: FftNum> Butterfly29<T> {
3780 pub fn new(direction: FftDirection) -> Self {
3781 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 29, direction);
3782 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 29, direction);
3783 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 29, direction);
3784 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 29, direction);
3785 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 29, direction);
3786 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 29, direction);
3787 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 29, direction);
3788 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 29, direction);
3789 let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 29, direction);
3790 let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 29, direction);
3791 let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 29, direction);
3792 let twiddle12: Complex<T> = twiddles::compute_twiddle(12, 29, direction);
3793 let twiddle13: Complex<T> = twiddles::compute_twiddle(13, 29, direction);
3794 let twiddle14: Complex<T> = twiddles::compute_twiddle(14, 29, direction);
3795 Self {
3796 twiddle1,
3797 twiddle2,
3798 twiddle3,
3799 twiddle4,
3800 twiddle5,
3801 twiddle6,
3802 twiddle7,
3803 twiddle8,
3804 twiddle9,
3805 twiddle10,
3806 twiddle11,
3807 twiddle12,
3808 twiddle13,
3809 twiddle14,
3810 direction,
3811 }
3812 }
3813
3814 #[inline(never)]
3815 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3816 let x128p = buffer.load(1) + buffer.load(28);
3820 let x128n = buffer.load(1) - buffer.load(28);
3821 let x227p = buffer.load(2) + buffer.load(27);
3822 let x227n = buffer.load(2) - buffer.load(27);
3823 let x326p = buffer.load(3) + buffer.load(26);
3824 let x326n = buffer.load(3) - buffer.load(26);
3825 let x425p = buffer.load(4) + buffer.load(25);
3826 let x425n = buffer.load(4) - buffer.load(25);
3827 let x524p = buffer.load(5) + buffer.load(24);
3828 let x524n = buffer.load(5) - buffer.load(24);
3829 let x623p = buffer.load(6) + buffer.load(23);
3830 let x623n = buffer.load(6) - buffer.load(23);
3831 let x722p = buffer.load(7) + buffer.load(22);
3832 let x722n = buffer.load(7) - buffer.load(22);
3833 let x821p = buffer.load(8) + buffer.load(21);
3834 let x821n = buffer.load(8) - buffer.load(21);
3835 let x920p = buffer.load(9) + buffer.load(20);
3836 let x920n = buffer.load(9) - buffer.load(20);
3837 let x1019p = buffer.load(10) + buffer.load(19);
3838 let x1019n = buffer.load(10) - buffer.load(19);
3839 let x1118p = buffer.load(11) + buffer.load(18);
3840 let x1118n = buffer.load(11) - buffer.load(18);
3841 let x1217p = buffer.load(12) + buffer.load(17);
3842 let x1217n = buffer.load(12) - buffer.load(17);
3843 let x1316p = buffer.load(13) + buffer.load(16);
3844 let x1316n = buffer.load(13) - buffer.load(16);
3845 let x1415p = buffer.load(14) + buffer.load(15);
3846 let x1415n = buffer.load(14) - buffer.load(15);
3847 let sum = buffer.load(0)
3848 + x128p
3849 + x227p
3850 + x326p
3851 + x425p
3852 + x524p
3853 + x623p
3854 + x722p
3855 + x821p
3856 + x920p
3857 + x1019p
3858 + x1118p
3859 + x1217p
3860 + x1316p
3861 + x1415p;
3862 let b128re_a = buffer.load(0).re
3863 + self.twiddle1.re * x128p.re
3864 + self.twiddle2.re * x227p.re
3865 + self.twiddle3.re * x326p.re
3866 + self.twiddle4.re * x425p.re
3867 + self.twiddle5.re * x524p.re
3868 + self.twiddle6.re * x623p.re
3869 + self.twiddle7.re * x722p.re
3870 + self.twiddle8.re * x821p.re
3871 + self.twiddle9.re * x920p.re
3872 + self.twiddle10.re * x1019p.re
3873 + self.twiddle11.re * x1118p.re
3874 + self.twiddle12.re * x1217p.re
3875 + self.twiddle13.re * x1316p.re
3876 + self.twiddle14.re * x1415p.re;
3877 let b128re_b = self.twiddle1.im * x128n.im
3878 + self.twiddle2.im * x227n.im
3879 + self.twiddle3.im * x326n.im
3880 + self.twiddle4.im * x425n.im
3881 + self.twiddle5.im * x524n.im
3882 + self.twiddle6.im * x623n.im
3883 + self.twiddle7.im * x722n.im
3884 + self.twiddle8.im * x821n.im
3885 + self.twiddle9.im * x920n.im
3886 + self.twiddle10.im * x1019n.im
3887 + self.twiddle11.im * x1118n.im
3888 + self.twiddle12.im * x1217n.im
3889 + self.twiddle13.im * x1316n.im
3890 + self.twiddle14.im * x1415n.im;
3891 let b227re_a = buffer.load(0).re
3892 + self.twiddle2.re * x128p.re
3893 + self.twiddle4.re * x227p.re
3894 + self.twiddle6.re * x326p.re
3895 + self.twiddle8.re * x425p.re
3896 + self.twiddle10.re * x524p.re
3897 + self.twiddle12.re * x623p.re
3898 + self.twiddle14.re * x722p.re
3899 + self.twiddle13.re * x821p.re
3900 + self.twiddle11.re * x920p.re
3901 + self.twiddle9.re * x1019p.re
3902 + self.twiddle7.re * x1118p.re
3903 + self.twiddle5.re * x1217p.re
3904 + self.twiddle3.re * x1316p.re
3905 + self.twiddle1.re * x1415p.re;
3906 let b227re_b = self.twiddle2.im * x128n.im
3907 + self.twiddle4.im * x227n.im
3908 + self.twiddle6.im * x326n.im
3909 + self.twiddle8.im * x425n.im
3910 + self.twiddle10.im * x524n.im
3911 + self.twiddle12.im * x623n.im
3912 + self.twiddle14.im * x722n.im
3913 + -self.twiddle13.im * x821n.im
3914 + -self.twiddle11.im * x920n.im
3915 + -self.twiddle9.im * x1019n.im
3916 + -self.twiddle7.im * x1118n.im
3917 + -self.twiddle5.im * x1217n.im
3918 + -self.twiddle3.im * x1316n.im
3919 + -self.twiddle1.im * x1415n.im;
3920 let b326re_a = buffer.load(0).re
3921 + self.twiddle3.re * x128p.re
3922 + self.twiddle6.re * x227p.re
3923 + self.twiddle9.re * x326p.re
3924 + self.twiddle12.re * x425p.re
3925 + self.twiddle14.re * x524p.re
3926 + self.twiddle11.re * x623p.re
3927 + self.twiddle8.re * x722p.re
3928 + self.twiddle5.re * x821p.re
3929 + self.twiddle2.re * x920p.re
3930 + self.twiddle1.re * x1019p.re
3931 + self.twiddle4.re * x1118p.re
3932 + self.twiddle7.re * x1217p.re
3933 + self.twiddle10.re * x1316p.re
3934 + self.twiddle13.re * x1415p.re;
3935 let b326re_b = self.twiddle3.im * x128n.im
3936 + self.twiddle6.im * x227n.im
3937 + self.twiddle9.im * x326n.im
3938 + self.twiddle12.im * x425n.im
3939 + -self.twiddle14.im * x524n.im
3940 + -self.twiddle11.im * x623n.im
3941 + -self.twiddle8.im * x722n.im
3942 + -self.twiddle5.im * x821n.im
3943 + -self.twiddle2.im * x920n.im
3944 + self.twiddle1.im * x1019n.im
3945 + self.twiddle4.im * x1118n.im
3946 + self.twiddle7.im * x1217n.im
3947 + self.twiddle10.im * x1316n.im
3948 + self.twiddle13.im * x1415n.im;
3949 let b425re_a = buffer.load(0).re
3950 + self.twiddle4.re * x128p.re
3951 + self.twiddle8.re * x227p.re
3952 + self.twiddle12.re * x326p.re
3953 + self.twiddle13.re * x425p.re
3954 + self.twiddle9.re * x524p.re
3955 + self.twiddle5.re * x623p.re
3956 + self.twiddle1.re * x722p.re
3957 + self.twiddle3.re * x821p.re
3958 + self.twiddle7.re * x920p.re
3959 + self.twiddle11.re * x1019p.re
3960 + self.twiddle14.re * x1118p.re
3961 + self.twiddle10.re * x1217p.re
3962 + self.twiddle6.re * x1316p.re
3963 + self.twiddle2.re * x1415p.re;
3964 let b425re_b = self.twiddle4.im * x128n.im
3965 + self.twiddle8.im * x227n.im
3966 + self.twiddle12.im * x326n.im
3967 + -self.twiddle13.im * x425n.im
3968 + -self.twiddle9.im * x524n.im
3969 + -self.twiddle5.im * x623n.im
3970 + -self.twiddle1.im * x722n.im
3971 + self.twiddle3.im * x821n.im
3972 + self.twiddle7.im * x920n.im
3973 + self.twiddle11.im * x1019n.im
3974 + -self.twiddle14.im * x1118n.im
3975 + -self.twiddle10.im * x1217n.im
3976 + -self.twiddle6.im * x1316n.im
3977 + -self.twiddle2.im * x1415n.im;
3978 let b524re_a = buffer.load(0).re
3979 + self.twiddle5.re * x128p.re
3980 + self.twiddle10.re * x227p.re
3981 + self.twiddle14.re * x326p.re
3982 + self.twiddle9.re * x425p.re
3983 + self.twiddle4.re * x524p.re
3984 + self.twiddle1.re * x623p.re
3985 + self.twiddle6.re * x722p.re
3986 + self.twiddle11.re * x821p.re
3987 + self.twiddle13.re * x920p.re
3988 + self.twiddle8.re * x1019p.re
3989 + self.twiddle3.re * x1118p.re
3990 + self.twiddle2.re * x1217p.re
3991 + self.twiddle7.re * x1316p.re
3992 + self.twiddle12.re * x1415p.re;
3993 let b524re_b = self.twiddle5.im * x128n.im
3994 + self.twiddle10.im * x227n.im
3995 + -self.twiddle14.im * x326n.im
3996 + -self.twiddle9.im * x425n.im
3997 + -self.twiddle4.im * x524n.im
3998 + self.twiddle1.im * x623n.im
3999 + self.twiddle6.im * x722n.im
4000 + self.twiddle11.im * x821n.im
4001 + -self.twiddle13.im * x920n.im
4002 + -self.twiddle8.im * x1019n.im
4003 + -self.twiddle3.im * x1118n.im
4004 + self.twiddle2.im * x1217n.im
4005 + self.twiddle7.im * x1316n.im
4006 + self.twiddle12.im * x1415n.im;
4007 let b623re_a = buffer.load(0).re
4008 + self.twiddle6.re * x128p.re
4009 + self.twiddle12.re * x227p.re
4010 + self.twiddle11.re * x326p.re
4011 + self.twiddle5.re * x425p.re
4012 + self.twiddle1.re * x524p.re
4013 + self.twiddle7.re * x623p.re
4014 + self.twiddle13.re * x722p.re
4015 + self.twiddle10.re * x821p.re
4016 + self.twiddle4.re * x920p.re
4017 + self.twiddle2.re * x1019p.re
4018 + self.twiddle8.re * x1118p.re
4019 + self.twiddle14.re * x1217p.re
4020 + self.twiddle9.re * x1316p.re
4021 + self.twiddle3.re * x1415p.re;
4022 let b623re_b = self.twiddle6.im * x128n.im
4023 + self.twiddle12.im * x227n.im
4024 + -self.twiddle11.im * x326n.im
4025 + -self.twiddle5.im * x425n.im
4026 + self.twiddle1.im * x524n.im
4027 + self.twiddle7.im * x623n.im
4028 + self.twiddle13.im * x722n.im
4029 + -self.twiddle10.im * x821n.im
4030 + -self.twiddle4.im * x920n.im
4031 + self.twiddle2.im * x1019n.im
4032 + self.twiddle8.im * x1118n.im
4033 + self.twiddle14.im * x1217n.im
4034 + -self.twiddle9.im * x1316n.im
4035 + -self.twiddle3.im * x1415n.im;
4036 let b722re_a = buffer.load(0).re
4037 + self.twiddle7.re * x128p.re
4038 + self.twiddle14.re * x227p.re
4039 + self.twiddle8.re * x326p.re
4040 + self.twiddle1.re * x425p.re
4041 + self.twiddle6.re * x524p.re
4042 + self.twiddle13.re * x623p.re
4043 + self.twiddle9.re * x722p.re
4044 + self.twiddle2.re * x821p.re
4045 + self.twiddle5.re * x920p.re
4046 + self.twiddle12.re * x1019p.re
4047 + self.twiddle10.re * x1118p.re
4048 + self.twiddle3.re * x1217p.re
4049 + self.twiddle4.re * x1316p.re
4050 + self.twiddle11.re * x1415p.re;
4051 let b722re_b = self.twiddle7.im * x128n.im
4052 + self.twiddle14.im * x227n.im
4053 + -self.twiddle8.im * x326n.im
4054 + -self.twiddle1.im * x425n.im
4055 + self.twiddle6.im * x524n.im
4056 + self.twiddle13.im * x623n.im
4057 + -self.twiddle9.im * x722n.im
4058 + -self.twiddle2.im * x821n.im
4059 + self.twiddle5.im * x920n.im
4060 + self.twiddle12.im * x1019n.im
4061 + -self.twiddle10.im * x1118n.im
4062 + -self.twiddle3.im * x1217n.im
4063 + self.twiddle4.im * x1316n.im
4064 + self.twiddle11.im * x1415n.im;
4065 let b821re_a = buffer.load(0).re
4066 + self.twiddle8.re * x128p.re
4067 + self.twiddle13.re * x227p.re
4068 + self.twiddle5.re * x326p.re
4069 + self.twiddle3.re * x425p.re
4070 + self.twiddle11.re * x524p.re
4071 + self.twiddle10.re * x623p.re
4072 + self.twiddle2.re * x722p.re
4073 + self.twiddle6.re * x821p.re
4074 + self.twiddle14.re * x920p.re
4075 + self.twiddle7.re * x1019p.re
4076 + self.twiddle1.re * x1118p.re
4077 + self.twiddle9.re * x1217p.re
4078 + self.twiddle12.re * x1316p.re
4079 + self.twiddle4.re * x1415p.re;
4080 let b821re_b = self.twiddle8.im * x128n.im
4081 + -self.twiddle13.im * x227n.im
4082 + -self.twiddle5.im * x326n.im
4083 + self.twiddle3.im * x425n.im
4084 + self.twiddle11.im * x524n.im
4085 + -self.twiddle10.im * x623n.im
4086 + -self.twiddle2.im * x722n.im
4087 + self.twiddle6.im * x821n.im
4088 + self.twiddle14.im * x920n.im
4089 + -self.twiddle7.im * x1019n.im
4090 + self.twiddle1.im * x1118n.im
4091 + self.twiddle9.im * x1217n.im
4092 + -self.twiddle12.im * x1316n.im
4093 + -self.twiddle4.im * x1415n.im;
4094 let b920re_a = buffer.load(0).re
4095 + self.twiddle9.re * x128p.re
4096 + self.twiddle11.re * x227p.re
4097 + self.twiddle2.re * x326p.re
4098 + self.twiddle7.re * x425p.re
4099 + self.twiddle13.re * x524p.re
4100 + self.twiddle4.re * x623p.re
4101 + self.twiddle5.re * x722p.re
4102 + self.twiddle14.re * x821p.re
4103 + self.twiddle6.re * x920p.re
4104 + self.twiddle3.re * x1019p.re
4105 + self.twiddle12.re * x1118p.re
4106 + self.twiddle8.re * x1217p.re
4107 + self.twiddle1.re * x1316p.re
4108 + self.twiddle10.re * x1415p.re;
4109 let b920re_b = self.twiddle9.im * x128n.im
4110 + -self.twiddle11.im * x227n.im
4111 + -self.twiddle2.im * x326n.im
4112 + self.twiddle7.im * x425n.im
4113 + -self.twiddle13.im * x524n.im
4114 + -self.twiddle4.im * x623n.im
4115 + self.twiddle5.im * x722n.im
4116 + self.twiddle14.im * x821n.im
4117 + -self.twiddle6.im * x920n.im
4118 + self.twiddle3.im * x1019n.im
4119 + self.twiddle12.im * x1118n.im
4120 + -self.twiddle8.im * x1217n.im
4121 + self.twiddle1.im * x1316n.im
4122 + self.twiddle10.im * x1415n.im;
4123 let b1019re_a = buffer.load(0).re
4124 + self.twiddle10.re * x128p.re
4125 + self.twiddle9.re * x227p.re
4126 + self.twiddle1.re * x326p.re
4127 + self.twiddle11.re * x425p.re
4128 + self.twiddle8.re * x524p.re
4129 + self.twiddle2.re * x623p.re
4130 + self.twiddle12.re * x722p.re
4131 + self.twiddle7.re * x821p.re
4132 + self.twiddle3.re * x920p.re
4133 + self.twiddle13.re * x1019p.re
4134 + self.twiddle6.re * x1118p.re
4135 + self.twiddle4.re * x1217p.re
4136 + self.twiddle14.re * x1316p.re
4137 + self.twiddle5.re * x1415p.re;
4138 let b1019re_b = self.twiddle10.im * x128n.im
4139 + -self.twiddle9.im * x227n.im
4140 + self.twiddle1.im * x326n.im
4141 + self.twiddle11.im * x425n.im
4142 + -self.twiddle8.im * x524n.im
4143 + self.twiddle2.im * x623n.im
4144 + self.twiddle12.im * x722n.im
4145 + -self.twiddle7.im * x821n.im
4146 + self.twiddle3.im * x920n.im
4147 + self.twiddle13.im * x1019n.im
4148 + -self.twiddle6.im * x1118n.im
4149 + self.twiddle4.im * x1217n.im
4150 + self.twiddle14.im * x1316n.im
4151 + -self.twiddle5.im * x1415n.im;
4152 let b1118re_a = buffer.load(0).re
4153 + self.twiddle11.re * x128p.re
4154 + self.twiddle7.re * x227p.re
4155 + self.twiddle4.re * x326p.re
4156 + self.twiddle14.re * x425p.re
4157 + self.twiddle3.re * x524p.re
4158 + self.twiddle8.re * x623p.re
4159 + self.twiddle10.re * x722p.re
4160 + self.twiddle1.re * x821p.re
4161 + self.twiddle12.re * x920p.re
4162 + self.twiddle6.re * x1019p.re
4163 + self.twiddle5.re * x1118p.re
4164 + self.twiddle13.re * x1217p.re
4165 + self.twiddle2.re * x1316p.re
4166 + self.twiddle9.re * x1415p.re;
4167 let b1118re_b = self.twiddle11.im * x128n.im
4168 + -self.twiddle7.im * x227n.im
4169 + self.twiddle4.im * x326n.im
4170 + -self.twiddle14.im * x425n.im
4171 + -self.twiddle3.im * x524n.im
4172 + self.twiddle8.im * x623n.im
4173 + -self.twiddle10.im * x722n.im
4174 + self.twiddle1.im * x821n.im
4175 + self.twiddle12.im * x920n.im
4176 + -self.twiddle6.im * x1019n.im
4177 + self.twiddle5.im * x1118n.im
4178 + -self.twiddle13.im * x1217n.im
4179 + -self.twiddle2.im * x1316n.im
4180 + self.twiddle9.im * x1415n.im;
4181 let b1217re_a = buffer.load(0).re
4182 + self.twiddle12.re * x128p.re
4183 + self.twiddle5.re * x227p.re
4184 + self.twiddle7.re * x326p.re
4185 + self.twiddle10.re * x425p.re
4186 + self.twiddle2.re * x524p.re
4187 + self.twiddle14.re * x623p.re
4188 + self.twiddle3.re * x722p.re
4189 + self.twiddle9.re * x821p.re
4190 + self.twiddle8.re * x920p.re
4191 + self.twiddle4.re * x1019p.re
4192 + self.twiddle13.re * x1118p.re
4193 + self.twiddle1.re * x1217p.re
4194 + self.twiddle11.re * x1316p.re
4195 + self.twiddle6.re * x1415p.re;
4196 let b1217re_b = self.twiddle12.im * x128n.im
4197 + -self.twiddle5.im * x227n.im
4198 + self.twiddle7.im * x326n.im
4199 + -self.twiddle10.im * x425n.im
4200 + self.twiddle2.im * x524n.im
4201 + self.twiddle14.im * x623n.im
4202 + -self.twiddle3.im * x722n.im
4203 + self.twiddle9.im * x821n.im
4204 + -self.twiddle8.im * x920n.im
4205 + self.twiddle4.im * x1019n.im
4206 + -self.twiddle13.im * x1118n.im
4207 + -self.twiddle1.im * x1217n.im
4208 + self.twiddle11.im * x1316n.im
4209 + -self.twiddle6.im * x1415n.im;
4210 let b1316re_a = buffer.load(0).re
4211 + self.twiddle13.re * x128p.re
4212 + self.twiddle3.re * x227p.re
4213 + self.twiddle10.re * x326p.re
4214 + self.twiddle6.re * x425p.re
4215 + self.twiddle7.re * x524p.re
4216 + self.twiddle9.re * x623p.re
4217 + self.twiddle4.re * x722p.re
4218 + self.twiddle12.re * x821p.re
4219 + self.twiddle1.re * x920p.re
4220 + self.twiddle14.re * x1019p.re
4221 + self.twiddle2.re * x1118p.re
4222 + self.twiddle11.re * x1217p.re
4223 + self.twiddle5.re * x1316p.re
4224 + self.twiddle8.re * x1415p.re;
4225 let b1316re_b = self.twiddle13.im * x128n.im
4226 + -self.twiddle3.im * x227n.im
4227 + self.twiddle10.im * x326n.im
4228 + -self.twiddle6.im * x425n.im
4229 + self.twiddle7.im * x524n.im
4230 + -self.twiddle9.im * x623n.im
4231 + self.twiddle4.im * x722n.im
4232 + -self.twiddle12.im * x821n.im
4233 + self.twiddle1.im * x920n.im
4234 + self.twiddle14.im * x1019n.im
4235 + -self.twiddle2.im * x1118n.im
4236 + self.twiddle11.im * x1217n.im
4237 + -self.twiddle5.im * x1316n.im
4238 + self.twiddle8.im * x1415n.im;
4239 let b1415re_a = buffer.load(0).re
4240 + self.twiddle14.re * x128p.re
4241 + self.twiddle1.re * x227p.re
4242 + self.twiddle13.re * x326p.re
4243 + self.twiddle2.re * x425p.re
4244 + self.twiddle12.re * x524p.re
4245 + self.twiddle3.re * x623p.re
4246 + self.twiddle11.re * x722p.re
4247 + self.twiddle4.re * x821p.re
4248 + self.twiddle10.re * x920p.re
4249 + self.twiddle5.re * x1019p.re
4250 + self.twiddle9.re * x1118p.re
4251 + self.twiddle6.re * x1217p.re
4252 + self.twiddle8.re * x1316p.re
4253 + self.twiddle7.re * x1415p.re;
4254 let b1415re_b = self.twiddle14.im * x128n.im
4255 + -self.twiddle1.im * x227n.im
4256 + self.twiddle13.im * x326n.im
4257 + -self.twiddle2.im * x425n.im
4258 + self.twiddle12.im * x524n.im
4259 + -self.twiddle3.im * x623n.im
4260 + self.twiddle11.im * x722n.im
4261 + -self.twiddle4.im * x821n.im
4262 + self.twiddle10.im * x920n.im
4263 + -self.twiddle5.im * x1019n.im
4264 + self.twiddle9.im * x1118n.im
4265 + -self.twiddle6.im * x1217n.im
4266 + self.twiddle8.im * x1316n.im
4267 + -self.twiddle7.im * x1415n.im;
4268
4269 let b128im_a = buffer.load(0).im
4270 + self.twiddle1.re * x128p.im
4271 + self.twiddle2.re * x227p.im
4272 + self.twiddle3.re * x326p.im
4273 + self.twiddle4.re * x425p.im
4274 + self.twiddle5.re * x524p.im
4275 + self.twiddle6.re * x623p.im
4276 + self.twiddle7.re * x722p.im
4277 + self.twiddle8.re * x821p.im
4278 + self.twiddle9.re * x920p.im
4279 + self.twiddle10.re * x1019p.im
4280 + self.twiddle11.re * x1118p.im
4281 + self.twiddle12.re * x1217p.im
4282 + self.twiddle13.re * x1316p.im
4283 + self.twiddle14.re * x1415p.im;
4284 let b128im_b = self.twiddle1.im * x128n.re
4285 + self.twiddle2.im * x227n.re
4286 + self.twiddle3.im * x326n.re
4287 + self.twiddle4.im * x425n.re
4288 + self.twiddle5.im * x524n.re
4289 + self.twiddle6.im * x623n.re
4290 + self.twiddle7.im * x722n.re
4291 + self.twiddle8.im * x821n.re
4292 + self.twiddle9.im * x920n.re
4293 + self.twiddle10.im * x1019n.re
4294 + self.twiddle11.im * x1118n.re
4295 + self.twiddle12.im * x1217n.re
4296 + self.twiddle13.im * x1316n.re
4297 + self.twiddle14.im * x1415n.re;
4298 let b227im_a = buffer.load(0).im
4299 + self.twiddle2.re * x128p.im
4300 + self.twiddle4.re * x227p.im
4301 + self.twiddle6.re * x326p.im
4302 + self.twiddle8.re * x425p.im
4303 + self.twiddle10.re * x524p.im
4304 + self.twiddle12.re * x623p.im
4305 + self.twiddle14.re * x722p.im
4306 + self.twiddle13.re * x821p.im
4307 + self.twiddle11.re * x920p.im
4308 + self.twiddle9.re * x1019p.im
4309 + self.twiddle7.re * x1118p.im
4310 + self.twiddle5.re * x1217p.im
4311 + self.twiddle3.re * x1316p.im
4312 + self.twiddle1.re * x1415p.im;
4313 let b227im_b = self.twiddle2.im * x128n.re
4314 + self.twiddle4.im * x227n.re
4315 + self.twiddle6.im * x326n.re
4316 + self.twiddle8.im * x425n.re
4317 + self.twiddle10.im * x524n.re
4318 + self.twiddle12.im * x623n.re
4319 + self.twiddle14.im * x722n.re
4320 + -self.twiddle13.im * x821n.re
4321 + -self.twiddle11.im * x920n.re
4322 + -self.twiddle9.im * x1019n.re
4323 + -self.twiddle7.im * x1118n.re
4324 + -self.twiddle5.im * x1217n.re
4325 + -self.twiddle3.im * x1316n.re
4326 + -self.twiddle1.im * x1415n.re;
4327 let b326im_a = buffer.load(0).im
4328 + self.twiddle3.re * x128p.im
4329 + self.twiddle6.re * x227p.im
4330 + self.twiddle9.re * x326p.im
4331 + self.twiddle12.re * x425p.im
4332 + self.twiddle14.re * x524p.im
4333 + self.twiddle11.re * x623p.im
4334 + self.twiddle8.re * x722p.im
4335 + self.twiddle5.re * x821p.im
4336 + self.twiddle2.re * x920p.im
4337 + self.twiddle1.re * x1019p.im
4338 + self.twiddle4.re * x1118p.im
4339 + self.twiddle7.re * x1217p.im
4340 + self.twiddle10.re * x1316p.im
4341 + self.twiddle13.re * x1415p.im;
4342 let b326im_b = self.twiddle3.im * x128n.re
4343 + self.twiddle6.im * x227n.re
4344 + self.twiddle9.im * x326n.re
4345 + self.twiddle12.im * x425n.re
4346 + -self.twiddle14.im * x524n.re
4347 + -self.twiddle11.im * x623n.re
4348 + -self.twiddle8.im * x722n.re
4349 + -self.twiddle5.im * x821n.re
4350 + -self.twiddle2.im * x920n.re
4351 + self.twiddle1.im * x1019n.re
4352 + self.twiddle4.im * x1118n.re
4353 + self.twiddle7.im * x1217n.re
4354 + self.twiddle10.im * x1316n.re
4355 + self.twiddle13.im * x1415n.re;
4356 let b425im_a = buffer.load(0).im
4357 + self.twiddle4.re * x128p.im
4358 + self.twiddle8.re * x227p.im
4359 + self.twiddle12.re * x326p.im
4360 + self.twiddle13.re * x425p.im
4361 + self.twiddle9.re * x524p.im
4362 + self.twiddle5.re * x623p.im
4363 + self.twiddle1.re * x722p.im
4364 + self.twiddle3.re * x821p.im
4365 + self.twiddle7.re * x920p.im
4366 + self.twiddle11.re * x1019p.im
4367 + self.twiddle14.re * x1118p.im
4368 + self.twiddle10.re * x1217p.im
4369 + self.twiddle6.re * x1316p.im
4370 + self.twiddle2.re * x1415p.im;
4371 let b425im_b = self.twiddle4.im * x128n.re
4372 + self.twiddle8.im * x227n.re
4373 + self.twiddle12.im * x326n.re
4374 + -self.twiddle13.im * x425n.re
4375 + -self.twiddle9.im * x524n.re
4376 + -self.twiddle5.im * x623n.re
4377 + -self.twiddle1.im * x722n.re
4378 + self.twiddle3.im * x821n.re
4379 + self.twiddle7.im * x920n.re
4380 + self.twiddle11.im * x1019n.re
4381 + -self.twiddle14.im * x1118n.re
4382 + -self.twiddle10.im * x1217n.re
4383 + -self.twiddle6.im * x1316n.re
4384 + -self.twiddle2.im * x1415n.re;
4385 let b524im_a = buffer.load(0).im
4386 + self.twiddle5.re * x128p.im
4387 + self.twiddle10.re * x227p.im
4388 + self.twiddle14.re * x326p.im
4389 + self.twiddle9.re * x425p.im
4390 + self.twiddle4.re * x524p.im
4391 + self.twiddle1.re * x623p.im
4392 + self.twiddle6.re * x722p.im
4393 + self.twiddle11.re * x821p.im
4394 + self.twiddle13.re * x920p.im
4395 + self.twiddle8.re * x1019p.im
4396 + self.twiddle3.re * x1118p.im
4397 + self.twiddle2.re * x1217p.im
4398 + self.twiddle7.re * x1316p.im
4399 + self.twiddle12.re * x1415p.im;
4400 let b524im_b = self.twiddle5.im * x128n.re
4401 + self.twiddle10.im * x227n.re
4402 + -self.twiddle14.im * x326n.re
4403 + -self.twiddle9.im * x425n.re
4404 + -self.twiddle4.im * x524n.re
4405 + self.twiddle1.im * x623n.re
4406 + self.twiddle6.im * x722n.re
4407 + self.twiddle11.im * x821n.re
4408 + -self.twiddle13.im * x920n.re
4409 + -self.twiddle8.im * x1019n.re
4410 + -self.twiddle3.im * x1118n.re
4411 + self.twiddle2.im * x1217n.re
4412 + self.twiddle7.im * x1316n.re
4413 + self.twiddle12.im * x1415n.re;
4414 let b623im_a = buffer.load(0).im
4415 + self.twiddle6.re * x128p.im
4416 + self.twiddle12.re * x227p.im
4417 + self.twiddle11.re * x326p.im
4418 + self.twiddle5.re * x425p.im
4419 + self.twiddle1.re * x524p.im
4420 + self.twiddle7.re * x623p.im
4421 + self.twiddle13.re * x722p.im
4422 + self.twiddle10.re * x821p.im
4423 + self.twiddle4.re * x920p.im
4424 + self.twiddle2.re * x1019p.im
4425 + self.twiddle8.re * x1118p.im
4426 + self.twiddle14.re * x1217p.im
4427 + self.twiddle9.re * x1316p.im
4428 + self.twiddle3.re * x1415p.im;
4429 let b623im_b = self.twiddle6.im * x128n.re
4430 + self.twiddle12.im * x227n.re
4431 + -self.twiddle11.im * x326n.re
4432 + -self.twiddle5.im * x425n.re
4433 + self.twiddle1.im * x524n.re
4434 + self.twiddle7.im * x623n.re
4435 + self.twiddle13.im * x722n.re
4436 + -self.twiddle10.im * x821n.re
4437 + -self.twiddle4.im * x920n.re
4438 + self.twiddle2.im * x1019n.re
4439 + self.twiddle8.im * x1118n.re
4440 + self.twiddle14.im * x1217n.re
4441 + -self.twiddle9.im * x1316n.re
4442 + -self.twiddle3.im * x1415n.re;
4443 let b722im_a = buffer.load(0).im
4444 + self.twiddle7.re * x128p.im
4445 + self.twiddle14.re * x227p.im
4446 + self.twiddle8.re * x326p.im
4447 + self.twiddle1.re * x425p.im
4448 + self.twiddle6.re * x524p.im
4449 + self.twiddle13.re * x623p.im
4450 + self.twiddle9.re * x722p.im
4451 + self.twiddle2.re * x821p.im
4452 + self.twiddle5.re * x920p.im
4453 + self.twiddle12.re * x1019p.im
4454 + self.twiddle10.re * x1118p.im
4455 + self.twiddle3.re * x1217p.im
4456 + self.twiddle4.re * x1316p.im
4457 + self.twiddle11.re * x1415p.im;
4458 let b722im_b = self.twiddle7.im * x128n.re
4459 + self.twiddle14.im * x227n.re
4460 + -self.twiddle8.im * x326n.re
4461 + -self.twiddle1.im * x425n.re
4462 + self.twiddle6.im * x524n.re
4463 + self.twiddle13.im * x623n.re
4464 + -self.twiddle9.im * x722n.re
4465 + -self.twiddle2.im * x821n.re
4466 + self.twiddle5.im * x920n.re
4467 + self.twiddle12.im * x1019n.re
4468 + -self.twiddle10.im * x1118n.re
4469 + -self.twiddle3.im * x1217n.re
4470 + self.twiddle4.im * x1316n.re
4471 + self.twiddle11.im * x1415n.re;
4472 let b821im_a = buffer.load(0).im
4473 + self.twiddle8.re * x128p.im
4474 + self.twiddle13.re * x227p.im
4475 + self.twiddle5.re * x326p.im
4476 + self.twiddle3.re * x425p.im
4477 + self.twiddle11.re * x524p.im
4478 + self.twiddle10.re * x623p.im
4479 + self.twiddle2.re * x722p.im
4480 + self.twiddle6.re * x821p.im
4481 + self.twiddle14.re * x920p.im
4482 + self.twiddle7.re * x1019p.im
4483 + self.twiddle1.re * x1118p.im
4484 + self.twiddle9.re * x1217p.im
4485 + self.twiddle12.re * x1316p.im
4486 + self.twiddle4.re * x1415p.im;
4487 let b821im_b = self.twiddle8.im * x128n.re
4488 + -self.twiddle13.im * x227n.re
4489 + -self.twiddle5.im * x326n.re
4490 + self.twiddle3.im * x425n.re
4491 + self.twiddle11.im * x524n.re
4492 + -self.twiddle10.im * x623n.re
4493 + -self.twiddle2.im * x722n.re
4494 + self.twiddle6.im * x821n.re
4495 + self.twiddle14.im * x920n.re
4496 + -self.twiddle7.im * x1019n.re
4497 + self.twiddle1.im * x1118n.re
4498 + self.twiddle9.im * x1217n.re
4499 + -self.twiddle12.im * x1316n.re
4500 + -self.twiddle4.im * x1415n.re;
4501 let b920im_a = buffer.load(0).im
4502 + self.twiddle9.re * x128p.im
4503 + self.twiddle11.re * x227p.im
4504 + self.twiddle2.re * x326p.im
4505 + self.twiddle7.re * x425p.im
4506 + self.twiddle13.re * x524p.im
4507 + self.twiddle4.re * x623p.im
4508 + self.twiddle5.re * x722p.im
4509 + self.twiddle14.re * x821p.im
4510 + self.twiddle6.re * x920p.im
4511 + self.twiddle3.re * x1019p.im
4512 + self.twiddle12.re * x1118p.im
4513 + self.twiddle8.re * x1217p.im
4514 + self.twiddle1.re * x1316p.im
4515 + self.twiddle10.re * x1415p.im;
4516 let b920im_b = self.twiddle9.im * x128n.re
4517 + -self.twiddle11.im * x227n.re
4518 + -self.twiddle2.im * x326n.re
4519 + self.twiddle7.im * x425n.re
4520 + -self.twiddle13.im * x524n.re
4521 + -self.twiddle4.im * x623n.re
4522 + self.twiddle5.im * x722n.re
4523 + self.twiddle14.im * x821n.re
4524 + -self.twiddle6.im * x920n.re
4525 + self.twiddle3.im * x1019n.re
4526 + self.twiddle12.im * x1118n.re
4527 + -self.twiddle8.im * x1217n.re
4528 + self.twiddle1.im * x1316n.re
4529 + self.twiddle10.im * x1415n.re;
4530 let b1019im_a = buffer.load(0).im
4531 + self.twiddle10.re * x128p.im
4532 + self.twiddle9.re * x227p.im
4533 + self.twiddle1.re * x326p.im
4534 + self.twiddle11.re * x425p.im
4535 + self.twiddle8.re * x524p.im
4536 + self.twiddle2.re * x623p.im
4537 + self.twiddle12.re * x722p.im
4538 + self.twiddle7.re * x821p.im
4539 + self.twiddle3.re * x920p.im
4540 + self.twiddle13.re * x1019p.im
4541 + self.twiddle6.re * x1118p.im
4542 + self.twiddle4.re * x1217p.im
4543 + self.twiddle14.re * x1316p.im
4544 + self.twiddle5.re * x1415p.im;
4545 let b1019im_b = self.twiddle10.im * x128n.re
4546 + -self.twiddle9.im * x227n.re
4547 + self.twiddle1.im * x326n.re
4548 + self.twiddle11.im * x425n.re
4549 + -self.twiddle8.im * x524n.re
4550 + self.twiddle2.im * x623n.re
4551 + self.twiddle12.im * x722n.re
4552 + -self.twiddle7.im * x821n.re
4553 + self.twiddle3.im * x920n.re
4554 + self.twiddle13.im * x1019n.re
4555 + -self.twiddle6.im * x1118n.re
4556 + self.twiddle4.im * x1217n.re
4557 + self.twiddle14.im * x1316n.re
4558 + -self.twiddle5.im * x1415n.re;
4559 let b1118im_a = buffer.load(0).im
4560 + self.twiddle11.re * x128p.im
4561 + self.twiddle7.re * x227p.im
4562 + self.twiddle4.re * x326p.im
4563 + self.twiddle14.re * x425p.im
4564 + self.twiddle3.re * x524p.im
4565 + self.twiddle8.re * x623p.im
4566 + self.twiddle10.re * x722p.im
4567 + self.twiddle1.re * x821p.im
4568 + self.twiddle12.re * x920p.im
4569 + self.twiddle6.re * x1019p.im
4570 + self.twiddle5.re * x1118p.im
4571 + self.twiddle13.re * x1217p.im
4572 + self.twiddle2.re * x1316p.im
4573 + self.twiddle9.re * x1415p.im;
4574 let b1118im_b = self.twiddle11.im * x128n.re
4575 + -self.twiddle7.im * x227n.re
4576 + self.twiddle4.im * x326n.re
4577 + -self.twiddle14.im * x425n.re
4578 + -self.twiddle3.im * x524n.re
4579 + self.twiddle8.im * x623n.re
4580 + -self.twiddle10.im * x722n.re
4581 + self.twiddle1.im * x821n.re
4582 + self.twiddle12.im * x920n.re
4583 + -self.twiddle6.im * x1019n.re
4584 + self.twiddle5.im * x1118n.re
4585 + -self.twiddle13.im * x1217n.re
4586 + -self.twiddle2.im * x1316n.re
4587 + self.twiddle9.im * x1415n.re;
4588 let b1217im_a = buffer.load(0).im
4589 + self.twiddle12.re * x128p.im
4590 + self.twiddle5.re * x227p.im
4591 + self.twiddle7.re * x326p.im
4592 + self.twiddle10.re * x425p.im
4593 + self.twiddle2.re * x524p.im
4594 + self.twiddle14.re * x623p.im
4595 + self.twiddle3.re * x722p.im
4596 + self.twiddle9.re * x821p.im
4597 + self.twiddle8.re * x920p.im
4598 + self.twiddle4.re * x1019p.im
4599 + self.twiddle13.re * x1118p.im
4600 + self.twiddle1.re * x1217p.im
4601 + self.twiddle11.re * x1316p.im
4602 + self.twiddle6.re * x1415p.im;
4603 let b1217im_b = self.twiddle12.im * x128n.re
4604 + -self.twiddle5.im * x227n.re
4605 + self.twiddle7.im * x326n.re
4606 + -self.twiddle10.im * x425n.re
4607 + self.twiddle2.im * x524n.re
4608 + self.twiddle14.im * x623n.re
4609 + -self.twiddle3.im * x722n.re
4610 + self.twiddle9.im * x821n.re
4611 + -self.twiddle8.im * x920n.re
4612 + self.twiddle4.im * x1019n.re
4613 + -self.twiddle13.im * x1118n.re
4614 + -self.twiddle1.im * x1217n.re
4615 + self.twiddle11.im * x1316n.re
4616 + -self.twiddle6.im * x1415n.re;
4617 let b1316im_a = buffer.load(0).im
4618 + self.twiddle13.re * x128p.im
4619 + self.twiddle3.re * x227p.im
4620 + self.twiddle10.re * x326p.im
4621 + self.twiddle6.re * x425p.im
4622 + self.twiddle7.re * x524p.im
4623 + self.twiddle9.re * x623p.im
4624 + self.twiddle4.re * x722p.im
4625 + self.twiddle12.re * x821p.im
4626 + self.twiddle1.re * x920p.im
4627 + self.twiddle14.re * x1019p.im
4628 + self.twiddle2.re * x1118p.im
4629 + self.twiddle11.re * x1217p.im
4630 + self.twiddle5.re * x1316p.im
4631 + self.twiddle8.re * x1415p.im;
4632 let b1316im_b = self.twiddle13.im * x128n.re
4633 + -self.twiddle3.im * x227n.re
4634 + self.twiddle10.im * x326n.re
4635 + -self.twiddle6.im * x425n.re
4636 + self.twiddle7.im * x524n.re
4637 + -self.twiddle9.im * x623n.re
4638 + self.twiddle4.im * x722n.re
4639 + -self.twiddle12.im * x821n.re
4640 + self.twiddle1.im * x920n.re
4641 + self.twiddle14.im * x1019n.re
4642 + -self.twiddle2.im * x1118n.re
4643 + self.twiddle11.im * x1217n.re
4644 + -self.twiddle5.im * x1316n.re
4645 + self.twiddle8.im * x1415n.re;
4646 let b1415im_a = buffer.load(0).im
4647 + self.twiddle14.re * x128p.im
4648 + self.twiddle1.re * x227p.im
4649 + self.twiddle13.re * x326p.im
4650 + self.twiddle2.re * x425p.im
4651 + self.twiddle12.re * x524p.im
4652 + self.twiddle3.re * x623p.im
4653 + self.twiddle11.re * x722p.im
4654 + self.twiddle4.re * x821p.im
4655 + self.twiddle10.re * x920p.im
4656 + self.twiddle5.re * x1019p.im
4657 + self.twiddle9.re * x1118p.im
4658 + self.twiddle6.re * x1217p.im
4659 + self.twiddle8.re * x1316p.im
4660 + self.twiddle7.re * x1415p.im;
4661 let b1415im_b = self.twiddle14.im * x128n.re
4662 + -self.twiddle1.im * x227n.re
4663 + self.twiddle13.im * x326n.re
4664 + -self.twiddle2.im * x425n.re
4665 + self.twiddle12.im * x524n.re
4666 + -self.twiddle3.im * x623n.re
4667 + self.twiddle11.im * x722n.re
4668 + -self.twiddle4.im * x821n.re
4669 + self.twiddle10.im * x920n.re
4670 + -self.twiddle5.im * x1019n.re
4671 + self.twiddle9.im * x1118n.re
4672 + -self.twiddle6.im * x1217n.re
4673 + self.twiddle8.im * x1316n.re
4674 + -self.twiddle7.im * x1415n.re;
4675
4676 let out1re = b128re_a - b128re_b;
4677 let out1im = b128im_a + b128im_b;
4678 let out2re = b227re_a - b227re_b;
4679 let out2im = b227im_a + b227im_b;
4680 let out3re = b326re_a - b326re_b;
4681 let out3im = b326im_a + b326im_b;
4682 let out4re = b425re_a - b425re_b;
4683 let out4im = b425im_a + b425im_b;
4684 let out5re = b524re_a - b524re_b;
4685 let out5im = b524im_a + b524im_b;
4686 let out6re = b623re_a - b623re_b;
4687 let out6im = b623im_a + b623im_b;
4688 let out7re = b722re_a - b722re_b;
4689 let out7im = b722im_a + b722im_b;
4690 let out8re = b821re_a - b821re_b;
4691 let out8im = b821im_a + b821im_b;
4692 let out9re = b920re_a - b920re_b;
4693 let out9im = b920im_a + b920im_b;
4694 let out10re = b1019re_a - b1019re_b;
4695 let out10im = b1019im_a + b1019im_b;
4696 let out11re = b1118re_a - b1118re_b;
4697 let out11im = b1118im_a + b1118im_b;
4698 let out12re = b1217re_a - b1217re_b;
4699 let out12im = b1217im_a + b1217im_b;
4700 let out13re = b1316re_a - b1316re_b;
4701 let out13im = b1316im_a + b1316im_b;
4702 let out14re = b1415re_a - b1415re_b;
4703 let out14im = b1415im_a + b1415im_b;
4704 let out15re = b1415re_a + b1415re_b;
4705 let out15im = b1415im_a - b1415im_b;
4706 let out16re = b1316re_a + b1316re_b;
4707 let out16im = b1316im_a - b1316im_b;
4708 let out17re = b1217re_a + b1217re_b;
4709 let out17im = b1217im_a - b1217im_b;
4710 let out18re = b1118re_a + b1118re_b;
4711 let out18im = b1118im_a - b1118im_b;
4712 let out19re = b1019re_a + b1019re_b;
4713 let out19im = b1019im_a - b1019im_b;
4714 let out20re = b920re_a + b920re_b;
4715 let out20im = b920im_a - b920im_b;
4716 let out21re = b821re_a + b821re_b;
4717 let out21im = b821im_a - b821im_b;
4718 let out22re = b722re_a + b722re_b;
4719 let out22im = b722im_a - b722im_b;
4720 let out23re = b623re_a + b623re_b;
4721 let out23im = b623im_a - b623im_b;
4722 let out24re = b524re_a + b524re_b;
4723 let out24im = b524im_a - b524im_b;
4724 let out25re = b425re_a + b425re_b;
4725 let out25im = b425im_a - b425im_b;
4726 let out26re = b326re_a + b326re_b;
4727 let out26im = b326im_a - b326im_b;
4728 let out27re = b227re_a + b227re_b;
4729 let out27im = b227im_a - b227im_b;
4730 let out28re = b128re_a + b128re_b;
4731 let out28im = b128im_a - b128im_b;
4732 buffer.store(sum, 0);
4733 buffer.store(
4734 Complex {
4735 re: out1re,
4736 im: out1im,
4737 },
4738 1,
4739 );
4740 buffer.store(
4741 Complex {
4742 re: out2re,
4743 im: out2im,
4744 },
4745 2,
4746 );
4747 buffer.store(
4748 Complex {
4749 re: out3re,
4750 im: out3im,
4751 },
4752 3,
4753 );
4754 buffer.store(
4755 Complex {
4756 re: out4re,
4757 im: out4im,
4758 },
4759 4,
4760 );
4761 buffer.store(
4762 Complex {
4763 re: out5re,
4764 im: out5im,
4765 },
4766 5,
4767 );
4768 buffer.store(
4769 Complex {
4770 re: out6re,
4771 im: out6im,
4772 },
4773 6,
4774 );
4775 buffer.store(
4776 Complex {
4777 re: out7re,
4778 im: out7im,
4779 },
4780 7,
4781 );
4782 buffer.store(
4783 Complex {
4784 re: out8re,
4785 im: out8im,
4786 },
4787 8,
4788 );
4789 buffer.store(
4790 Complex {
4791 re: out9re,
4792 im: out9im,
4793 },
4794 9,
4795 );
4796 buffer.store(
4797 Complex {
4798 re: out10re,
4799 im: out10im,
4800 },
4801 10,
4802 );
4803 buffer.store(
4804 Complex {
4805 re: out11re,
4806 im: out11im,
4807 },
4808 11,
4809 );
4810 buffer.store(
4811 Complex {
4812 re: out12re,
4813 im: out12im,
4814 },
4815 12,
4816 );
4817 buffer.store(
4818 Complex {
4819 re: out13re,
4820 im: out13im,
4821 },
4822 13,
4823 );
4824 buffer.store(
4825 Complex {
4826 re: out14re,
4827 im: out14im,
4828 },
4829 14,
4830 );
4831 buffer.store(
4832 Complex {
4833 re: out15re,
4834 im: out15im,
4835 },
4836 15,
4837 );
4838 buffer.store(
4839 Complex {
4840 re: out16re,
4841 im: out16im,
4842 },
4843 16,
4844 );
4845 buffer.store(
4846 Complex {
4847 re: out17re,
4848 im: out17im,
4849 },
4850 17,
4851 );
4852 buffer.store(
4853 Complex {
4854 re: out18re,
4855 im: out18im,
4856 },
4857 18,
4858 );
4859 buffer.store(
4860 Complex {
4861 re: out19re,
4862 im: out19im,
4863 },
4864 19,
4865 );
4866 buffer.store(
4867 Complex {
4868 re: out20re,
4869 im: out20im,
4870 },
4871 20,
4872 );
4873 buffer.store(
4874 Complex {
4875 re: out21re,
4876 im: out21im,
4877 },
4878 21,
4879 );
4880 buffer.store(
4881 Complex {
4882 re: out22re,
4883 im: out22im,
4884 },
4885 22,
4886 );
4887 buffer.store(
4888 Complex {
4889 re: out23re,
4890 im: out23im,
4891 },
4892 23,
4893 );
4894 buffer.store(
4895 Complex {
4896 re: out24re,
4897 im: out24im,
4898 },
4899 24,
4900 );
4901 buffer.store(
4902 Complex {
4903 re: out25re,
4904 im: out25im,
4905 },
4906 25,
4907 );
4908 buffer.store(
4909 Complex {
4910 re: out26re,
4911 im: out26im,
4912 },
4913 26,
4914 );
4915 buffer.store(
4916 Complex {
4917 re: out27re,
4918 im: out27im,
4919 },
4920 27,
4921 );
4922 buffer.store(
4923 Complex {
4924 re: out28re,
4925 im: out28im,
4926 },
4927 28,
4928 );
4929 }
4930}
4931pub struct Butterfly31<T> {
4932 twiddle1: Complex<T>,
4933 twiddle2: Complex<T>,
4934 twiddle3: Complex<T>,
4935 twiddle4: Complex<T>,
4936 twiddle5: Complex<T>,
4937 twiddle6: Complex<T>,
4938 twiddle7: Complex<T>,
4939 twiddle8: Complex<T>,
4940 twiddle9: Complex<T>,
4941 twiddle10: Complex<T>,
4942 twiddle11: Complex<T>,
4943 twiddle12: Complex<T>,
4944 twiddle13: Complex<T>,
4945 twiddle14: Complex<T>,
4946 twiddle15: Complex<T>,
4947 direction: FftDirection,
4948}
4949boilerplate_fft_butterfly!(Butterfly31, 31, |this: &Butterfly31<_>| this.direction);
4950impl<T: FftNum> Butterfly31<T> {
4951 pub fn new(direction: FftDirection) -> Self {
4952 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 31, direction);
4953 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 31, direction);
4954 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 31, direction);
4955 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 31, direction);
4956 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 31, direction);
4957 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 31, direction);
4958 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 31, direction);
4959 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 31, direction);
4960 let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 31, direction);
4961 let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 31, direction);
4962 let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 31, direction);
4963 let twiddle12: Complex<T> = twiddles::compute_twiddle(12, 31, direction);
4964 let twiddle13: Complex<T> = twiddles::compute_twiddle(13, 31, direction);
4965 let twiddle14: Complex<T> = twiddles::compute_twiddle(14, 31, direction);
4966 let twiddle15: Complex<T> = twiddles::compute_twiddle(15, 31, direction);
4967 Self {
4968 twiddle1,
4969 twiddle2,
4970 twiddle3,
4971 twiddle4,
4972 twiddle5,
4973 twiddle6,
4974 twiddle7,
4975 twiddle8,
4976 twiddle9,
4977 twiddle10,
4978 twiddle11,
4979 twiddle12,
4980 twiddle13,
4981 twiddle14,
4982 twiddle15,
4983 direction,
4984 }
4985 }
4986
4987 #[inline(never)]
4988 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
4989 let x130p = buffer.load(1) + buffer.load(30);
4993 let x130n = buffer.load(1) - buffer.load(30);
4994 let x229p = buffer.load(2) + buffer.load(29);
4995 let x229n = buffer.load(2) - buffer.load(29);
4996 let x328p = buffer.load(3) + buffer.load(28);
4997 let x328n = buffer.load(3) - buffer.load(28);
4998 let x427p = buffer.load(4) + buffer.load(27);
4999 let x427n = buffer.load(4) - buffer.load(27);
5000 let x526p = buffer.load(5) + buffer.load(26);
5001 let x526n = buffer.load(5) - buffer.load(26);
5002 let x625p = buffer.load(6) + buffer.load(25);
5003 let x625n = buffer.load(6) - buffer.load(25);
5004 let x724p = buffer.load(7) + buffer.load(24);
5005 let x724n = buffer.load(7) - buffer.load(24);
5006 let x823p = buffer.load(8) + buffer.load(23);
5007 let x823n = buffer.load(8) - buffer.load(23);
5008 let x922p = buffer.load(9) + buffer.load(22);
5009 let x922n = buffer.load(9) - buffer.load(22);
5010 let x1021p = buffer.load(10) + buffer.load(21);
5011 let x1021n = buffer.load(10) - buffer.load(21);
5012 let x1120p = buffer.load(11) + buffer.load(20);
5013 let x1120n = buffer.load(11) - buffer.load(20);
5014 let x1219p = buffer.load(12) + buffer.load(19);
5015 let x1219n = buffer.load(12) - buffer.load(19);
5016 let x1318p = buffer.load(13) + buffer.load(18);
5017 let x1318n = buffer.load(13) - buffer.load(18);
5018 let x1417p = buffer.load(14) + buffer.load(17);
5019 let x1417n = buffer.load(14) - buffer.load(17);
5020 let x1516p = buffer.load(15) + buffer.load(16);
5021 let x1516n = buffer.load(15) - buffer.load(16);
5022 let sum = buffer.load(0)
5023 + x130p
5024 + x229p
5025 + x328p
5026 + x427p
5027 + x526p
5028 + x625p
5029 + x724p
5030 + x823p
5031 + x922p
5032 + x1021p
5033 + x1120p
5034 + x1219p
5035 + x1318p
5036 + x1417p
5037 + x1516p;
5038 let b130re_a = buffer.load(0).re
5039 + self.twiddle1.re * x130p.re
5040 + self.twiddle2.re * x229p.re
5041 + self.twiddle3.re * x328p.re
5042 + self.twiddle4.re * x427p.re
5043 + self.twiddle5.re * x526p.re
5044 + self.twiddle6.re * x625p.re
5045 + self.twiddle7.re * x724p.re
5046 + self.twiddle8.re * x823p.re
5047 + self.twiddle9.re * x922p.re
5048 + self.twiddle10.re * x1021p.re
5049 + self.twiddle11.re * x1120p.re
5050 + self.twiddle12.re * x1219p.re
5051 + self.twiddle13.re * x1318p.re
5052 + self.twiddle14.re * x1417p.re
5053 + self.twiddle15.re * x1516p.re;
5054 let b130re_b = self.twiddle1.im * x130n.im
5055 + self.twiddle2.im * x229n.im
5056 + self.twiddle3.im * x328n.im
5057 + self.twiddle4.im * x427n.im
5058 + self.twiddle5.im * x526n.im
5059 + self.twiddle6.im * x625n.im
5060 + self.twiddle7.im * x724n.im
5061 + self.twiddle8.im * x823n.im
5062 + self.twiddle9.im * x922n.im
5063 + self.twiddle10.im * x1021n.im
5064 + self.twiddle11.im * x1120n.im
5065 + self.twiddle12.im * x1219n.im
5066 + self.twiddle13.im * x1318n.im
5067 + self.twiddle14.im * x1417n.im
5068 + self.twiddle15.im * x1516n.im;
5069 let b229re_a = buffer.load(0).re
5070 + self.twiddle2.re * x130p.re
5071 + self.twiddle4.re * x229p.re
5072 + self.twiddle6.re * x328p.re
5073 + self.twiddle8.re * x427p.re
5074 + self.twiddle10.re * x526p.re
5075 + self.twiddle12.re * x625p.re
5076 + self.twiddle14.re * x724p.re
5077 + self.twiddle15.re * x823p.re
5078 + self.twiddle13.re * x922p.re
5079 + self.twiddle11.re * x1021p.re
5080 + self.twiddle9.re * x1120p.re
5081 + self.twiddle7.re * x1219p.re
5082 + self.twiddle5.re * x1318p.re
5083 + self.twiddle3.re * x1417p.re
5084 + self.twiddle1.re * x1516p.re;
5085 let b229re_b = self.twiddle2.im * x130n.im
5086 + self.twiddle4.im * x229n.im
5087 + self.twiddle6.im * x328n.im
5088 + self.twiddle8.im * x427n.im
5089 + self.twiddle10.im * x526n.im
5090 + self.twiddle12.im * x625n.im
5091 + self.twiddle14.im * x724n.im
5092 + -self.twiddle15.im * x823n.im
5093 + -self.twiddle13.im * x922n.im
5094 + -self.twiddle11.im * x1021n.im
5095 + -self.twiddle9.im * x1120n.im
5096 + -self.twiddle7.im * x1219n.im
5097 + -self.twiddle5.im * x1318n.im
5098 + -self.twiddle3.im * x1417n.im
5099 + -self.twiddle1.im * x1516n.im;
5100 let b328re_a = buffer.load(0).re
5101 + self.twiddle3.re * x130p.re
5102 + self.twiddle6.re * x229p.re
5103 + self.twiddle9.re * x328p.re
5104 + self.twiddle12.re * x427p.re
5105 + self.twiddle15.re * x526p.re
5106 + self.twiddle13.re * x625p.re
5107 + self.twiddle10.re * x724p.re
5108 + self.twiddle7.re * x823p.re
5109 + self.twiddle4.re * x922p.re
5110 + self.twiddle1.re * x1021p.re
5111 + self.twiddle2.re * x1120p.re
5112 + self.twiddle5.re * x1219p.re
5113 + self.twiddle8.re * x1318p.re
5114 + self.twiddle11.re * x1417p.re
5115 + self.twiddle14.re * x1516p.re;
5116 let b328re_b = self.twiddle3.im * x130n.im
5117 + self.twiddle6.im * x229n.im
5118 + self.twiddle9.im * x328n.im
5119 + self.twiddle12.im * x427n.im
5120 + self.twiddle15.im * x526n.im
5121 + -self.twiddle13.im * x625n.im
5122 + -self.twiddle10.im * x724n.im
5123 + -self.twiddle7.im * x823n.im
5124 + -self.twiddle4.im * x922n.im
5125 + -self.twiddle1.im * x1021n.im
5126 + self.twiddle2.im * x1120n.im
5127 + self.twiddle5.im * x1219n.im
5128 + self.twiddle8.im * x1318n.im
5129 + self.twiddle11.im * x1417n.im
5130 + self.twiddle14.im * x1516n.im;
5131 let b427re_a = buffer.load(0).re
5132 + self.twiddle4.re * x130p.re
5133 + self.twiddle8.re * x229p.re
5134 + self.twiddle12.re * x328p.re
5135 + self.twiddle15.re * x427p.re
5136 + self.twiddle11.re * x526p.re
5137 + self.twiddle7.re * x625p.re
5138 + self.twiddle3.re * x724p.re
5139 + self.twiddle1.re * x823p.re
5140 + self.twiddle5.re * x922p.re
5141 + self.twiddle9.re * x1021p.re
5142 + self.twiddle13.re * x1120p.re
5143 + self.twiddle14.re * x1219p.re
5144 + self.twiddle10.re * x1318p.re
5145 + self.twiddle6.re * x1417p.re
5146 + self.twiddle2.re * x1516p.re;
5147 let b427re_b = self.twiddle4.im * x130n.im
5148 + self.twiddle8.im * x229n.im
5149 + self.twiddle12.im * x328n.im
5150 + -self.twiddle15.im * x427n.im
5151 + -self.twiddle11.im * x526n.im
5152 + -self.twiddle7.im * x625n.im
5153 + -self.twiddle3.im * x724n.im
5154 + self.twiddle1.im * x823n.im
5155 + self.twiddle5.im * x922n.im
5156 + self.twiddle9.im * x1021n.im
5157 + self.twiddle13.im * x1120n.im
5158 + -self.twiddle14.im * x1219n.im
5159 + -self.twiddle10.im * x1318n.im
5160 + -self.twiddle6.im * x1417n.im
5161 + -self.twiddle2.im * x1516n.im;
5162 let b526re_a = buffer.load(0).re
5163 + self.twiddle5.re * x130p.re
5164 + self.twiddle10.re * x229p.re
5165 + self.twiddle15.re * x328p.re
5166 + self.twiddle11.re * x427p.re
5167 + self.twiddle6.re * x526p.re
5168 + self.twiddle1.re * x625p.re
5169 + self.twiddle4.re * x724p.re
5170 + self.twiddle9.re * x823p.re
5171 + self.twiddle14.re * x922p.re
5172 + self.twiddle12.re * x1021p.re
5173 + self.twiddle7.re * x1120p.re
5174 + self.twiddle2.re * x1219p.re
5175 + self.twiddle3.re * x1318p.re
5176 + self.twiddle8.re * x1417p.re
5177 + self.twiddle13.re * x1516p.re;
5178 let b526re_b = self.twiddle5.im * x130n.im
5179 + self.twiddle10.im * x229n.im
5180 + self.twiddle15.im * x328n.im
5181 + -self.twiddle11.im * x427n.im
5182 + -self.twiddle6.im * x526n.im
5183 + -self.twiddle1.im * x625n.im
5184 + self.twiddle4.im * x724n.im
5185 + self.twiddle9.im * x823n.im
5186 + self.twiddle14.im * x922n.im
5187 + -self.twiddle12.im * x1021n.im
5188 + -self.twiddle7.im * x1120n.im
5189 + -self.twiddle2.im * x1219n.im
5190 + self.twiddle3.im * x1318n.im
5191 + self.twiddle8.im * x1417n.im
5192 + self.twiddle13.im * x1516n.im;
5193 let b625re_a = buffer.load(0).re
5194 + self.twiddle6.re * x130p.re
5195 + self.twiddle12.re * x229p.re
5196 + self.twiddle13.re * x328p.re
5197 + self.twiddle7.re * x427p.re
5198 + self.twiddle1.re * x526p.re
5199 + self.twiddle5.re * x625p.re
5200 + self.twiddle11.re * x724p.re
5201 + self.twiddle14.re * x823p.re
5202 + self.twiddle8.re * x922p.re
5203 + self.twiddle2.re * x1021p.re
5204 + self.twiddle4.re * x1120p.re
5205 + self.twiddle10.re * x1219p.re
5206 + self.twiddle15.re * x1318p.re
5207 + self.twiddle9.re * x1417p.re
5208 + self.twiddle3.re * x1516p.re;
5209 let b625re_b = self.twiddle6.im * x130n.im
5210 + self.twiddle12.im * x229n.im
5211 + -self.twiddle13.im * x328n.im
5212 + -self.twiddle7.im * x427n.im
5213 + -self.twiddle1.im * x526n.im
5214 + self.twiddle5.im * x625n.im
5215 + self.twiddle11.im * x724n.im
5216 + -self.twiddle14.im * x823n.im
5217 + -self.twiddle8.im * x922n.im
5218 + -self.twiddle2.im * x1021n.im
5219 + self.twiddle4.im * x1120n.im
5220 + self.twiddle10.im * x1219n.im
5221 + -self.twiddle15.im * x1318n.im
5222 + -self.twiddle9.im * x1417n.im
5223 + -self.twiddle3.im * x1516n.im;
5224 let b724re_a = buffer.load(0).re
5225 + self.twiddle7.re * x130p.re
5226 + self.twiddle14.re * x229p.re
5227 + self.twiddle10.re * x328p.re
5228 + self.twiddle3.re * x427p.re
5229 + self.twiddle4.re * x526p.re
5230 + self.twiddle11.re * x625p.re
5231 + self.twiddle13.re * x724p.re
5232 + self.twiddle6.re * x823p.re
5233 + self.twiddle1.re * x922p.re
5234 + self.twiddle8.re * x1021p.re
5235 + self.twiddle15.re * x1120p.re
5236 + self.twiddle9.re * x1219p.re
5237 + self.twiddle2.re * x1318p.re
5238 + self.twiddle5.re * x1417p.re
5239 + self.twiddle12.re * x1516p.re;
5240 let b724re_b = self.twiddle7.im * x130n.im
5241 + self.twiddle14.im * x229n.im
5242 + -self.twiddle10.im * x328n.im
5243 + -self.twiddle3.im * x427n.im
5244 + self.twiddle4.im * x526n.im
5245 + self.twiddle11.im * x625n.im
5246 + -self.twiddle13.im * x724n.im
5247 + -self.twiddle6.im * x823n.im
5248 + self.twiddle1.im * x922n.im
5249 + self.twiddle8.im * x1021n.im
5250 + self.twiddle15.im * x1120n.im
5251 + -self.twiddle9.im * x1219n.im
5252 + -self.twiddle2.im * x1318n.im
5253 + self.twiddle5.im * x1417n.im
5254 + self.twiddle12.im * x1516n.im;
5255 let b823re_a = buffer.load(0).re
5256 + self.twiddle8.re * x130p.re
5257 + self.twiddle15.re * x229p.re
5258 + self.twiddle7.re * x328p.re
5259 + self.twiddle1.re * x427p.re
5260 + self.twiddle9.re * x526p.re
5261 + self.twiddle14.re * x625p.re
5262 + self.twiddle6.re * x724p.re
5263 + self.twiddle2.re * x823p.re
5264 + self.twiddle10.re * x922p.re
5265 + self.twiddle13.re * x1021p.re
5266 + self.twiddle5.re * x1120p.re
5267 + self.twiddle3.re * x1219p.re
5268 + self.twiddle11.re * x1318p.re
5269 + self.twiddle12.re * x1417p.re
5270 + self.twiddle4.re * x1516p.re;
5271 let b823re_b = self.twiddle8.im * x130n.im
5272 + -self.twiddle15.im * x229n.im
5273 + -self.twiddle7.im * x328n.im
5274 + self.twiddle1.im * x427n.im
5275 + self.twiddle9.im * x526n.im
5276 + -self.twiddle14.im * x625n.im
5277 + -self.twiddle6.im * x724n.im
5278 + self.twiddle2.im * x823n.im
5279 + self.twiddle10.im * x922n.im
5280 + -self.twiddle13.im * x1021n.im
5281 + -self.twiddle5.im * x1120n.im
5282 + self.twiddle3.im * x1219n.im
5283 + self.twiddle11.im * x1318n.im
5284 + -self.twiddle12.im * x1417n.im
5285 + -self.twiddle4.im * x1516n.im;
5286 let b922re_a = buffer.load(0).re
5287 + self.twiddle9.re * x130p.re
5288 + self.twiddle13.re * x229p.re
5289 + self.twiddle4.re * x328p.re
5290 + self.twiddle5.re * x427p.re
5291 + self.twiddle14.re * x526p.re
5292 + self.twiddle8.re * x625p.re
5293 + self.twiddle1.re * x724p.re
5294 + self.twiddle10.re * x823p.re
5295 + self.twiddle12.re * x922p.re
5296 + self.twiddle3.re * x1021p.re
5297 + self.twiddle6.re * x1120p.re
5298 + self.twiddle15.re * x1219p.re
5299 + self.twiddle7.re * x1318p.re
5300 + self.twiddle2.re * x1417p.re
5301 + self.twiddle11.re * x1516p.re;
5302 let b922re_b = self.twiddle9.im * x130n.im
5303 + -self.twiddle13.im * x229n.im
5304 + -self.twiddle4.im * x328n.im
5305 + self.twiddle5.im * x427n.im
5306 + self.twiddle14.im * x526n.im
5307 + -self.twiddle8.im * x625n.im
5308 + self.twiddle1.im * x724n.im
5309 + self.twiddle10.im * x823n.im
5310 + -self.twiddle12.im * x922n.im
5311 + -self.twiddle3.im * x1021n.im
5312 + self.twiddle6.im * x1120n.im
5313 + self.twiddle15.im * x1219n.im
5314 + -self.twiddle7.im * x1318n.im
5315 + self.twiddle2.im * x1417n.im
5316 + self.twiddle11.im * x1516n.im;
5317 let b1021re_a = buffer.load(0).re
5318 + self.twiddle10.re * x130p.re
5319 + self.twiddle11.re * x229p.re
5320 + self.twiddle1.re * x328p.re
5321 + self.twiddle9.re * x427p.re
5322 + self.twiddle12.re * x526p.re
5323 + self.twiddle2.re * x625p.re
5324 + self.twiddle8.re * x724p.re
5325 + self.twiddle13.re * x823p.re
5326 + self.twiddle3.re * x922p.re
5327 + self.twiddle7.re * x1021p.re
5328 + self.twiddle14.re * x1120p.re
5329 + self.twiddle4.re * x1219p.re
5330 + self.twiddle6.re * x1318p.re
5331 + self.twiddle15.re * x1417p.re
5332 + self.twiddle5.re * x1516p.re;
5333 let b1021re_b = self.twiddle10.im * x130n.im
5334 + -self.twiddle11.im * x229n.im
5335 + -self.twiddle1.im * x328n.im
5336 + self.twiddle9.im * x427n.im
5337 + -self.twiddle12.im * x526n.im
5338 + -self.twiddle2.im * x625n.im
5339 + self.twiddle8.im * x724n.im
5340 + -self.twiddle13.im * x823n.im
5341 + -self.twiddle3.im * x922n.im
5342 + self.twiddle7.im * x1021n.im
5343 + -self.twiddle14.im * x1120n.im
5344 + -self.twiddle4.im * x1219n.im
5345 + self.twiddle6.im * x1318n.im
5346 + -self.twiddle15.im * x1417n.im
5347 + -self.twiddle5.im * x1516n.im;
5348 let b1120re_a = buffer.load(0).re
5349 + self.twiddle11.re * x130p.re
5350 + self.twiddle9.re * x229p.re
5351 + self.twiddle2.re * x328p.re
5352 + self.twiddle13.re * x427p.re
5353 + self.twiddle7.re * x526p.re
5354 + self.twiddle4.re * x625p.re
5355 + self.twiddle15.re * x724p.re
5356 + self.twiddle5.re * x823p.re
5357 + self.twiddle6.re * x922p.re
5358 + self.twiddle14.re * x1021p.re
5359 + self.twiddle3.re * x1120p.re
5360 + self.twiddle8.re * x1219p.re
5361 + self.twiddle12.re * x1318p.re
5362 + self.twiddle1.re * x1417p.re
5363 + self.twiddle10.re * x1516p.re;
5364 let b1120re_b = self.twiddle11.im * x130n.im
5365 + -self.twiddle9.im * x229n.im
5366 + self.twiddle2.im * x328n.im
5367 + self.twiddle13.im * x427n.im
5368 + -self.twiddle7.im * x526n.im
5369 + self.twiddle4.im * x625n.im
5370 + self.twiddle15.im * x724n.im
5371 + -self.twiddle5.im * x823n.im
5372 + self.twiddle6.im * x922n.im
5373 + -self.twiddle14.im * x1021n.im
5374 + -self.twiddle3.im * x1120n.im
5375 + self.twiddle8.im * x1219n.im
5376 + -self.twiddle12.im * x1318n.im
5377 + -self.twiddle1.im * x1417n.im
5378 + self.twiddle10.im * x1516n.im;
5379 let b1219re_a = buffer.load(0).re
5380 + self.twiddle12.re * x130p.re
5381 + self.twiddle7.re * x229p.re
5382 + self.twiddle5.re * x328p.re
5383 + self.twiddle14.re * x427p.re
5384 + self.twiddle2.re * x526p.re
5385 + self.twiddle10.re * x625p.re
5386 + self.twiddle9.re * x724p.re
5387 + self.twiddle3.re * x823p.re
5388 + self.twiddle15.re * x922p.re
5389 + self.twiddle4.re * x1021p.re
5390 + self.twiddle8.re * x1120p.re
5391 + self.twiddle11.re * x1219p.re
5392 + self.twiddle1.re * x1318p.re
5393 + self.twiddle13.re * x1417p.re
5394 + self.twiddle6.re * x1516p.re;
5395 let b1219re_b = self.twiddle12.im * x130n.im
5396 + -self.twiddle7.im * x229n.im
5397 + self.twiddle5.im * x328n.im
5398 + -self.twiddle14.im * x427n.im
5399 + -self.twiddle2.im * x526n.im
5400 + self.twiddle10.im * x625n.im
5401 + -self.twiddle9.im * x724n.im
5402 + self.twiddle3.im * x823n.im
5403 + self.twiddle15.im * x922n.im
5404 + -self.twiddle4.im * x1021n.im
5405 + self.twiddle8.im * x1120n.im
5406 + -self.twiddle11.im * x1219n.im
5407 + self.twiddle1.im * x1318n.im
5408 + self.twiddle13.im * x1417n.im
5409 + -self.twiddle6.im * x1516n.im;
5410 let b1318re_a = buffer.load(0).re
5411 + self.twiddle13.re * x130p.re
5412 + self.twiddle5.re * x229p.re
5413 + self.twiddle8.re * x328p.re
5414 + self.twiddle10.re * x427p.re
5415 + self.twiddle3.re * x526p.re
5416 + self.twiddle15.re * x625p.re
5417 + self.twiddle2.re * x724p.re
5418 + self.twiddle11.re * x823p.re
5419 + self.twiddle7.re * x922p.re
5420 + self.twiddle6.re * x1021p.re
5421 + self.twiddle12.re * x1120p.re
5422 + self.twiddle1.re * x1219p.re
5423 + self.twiddle14.re * x1318p.re
5424 + self.twiddle4.re * x1417p.re
5425 + self.twiddle9.re * x1516p.re;
5426 let b1318re_b = self.twiddle13.im * x130n.im
5427 + -self.twiddle5.im * x229n.im
5428 + self.twiddle8.im * x328n.im
5429 + -self.twiddle10.im * x427n.im
5430 + self.twiddle3.im * x526n.im
5431 + -self.twiddle15.im * x625n.im
5432 + -self.twiddle2.im * x724n.im
5433 + self.twiddle11.im * x823n.im
5434 + -self.twiddle7.im * x922n.im
5435 + self.twiddle6.im * x1021n.im
5436 + -self.twiddle12.im * x1120n.im
5437 + self.twiddle1.im * x1219n.im
5438 + self.twiddle14.im * x1318n.im
5439 + -self.twiddle4.im * x1417n.im
5440 + self.twiddle9.im * x1516n.im;
5441 let b1417re_a = buffer.load(0).re
5442 + self.twiddle14.re * x130p.re
5443 + self.twiddle3.re * x229p.re
5444 + self.twiddle11.re * x328p.re
5445 + self.twiddle6.re * x427p.re
5446 + self.twiddle8.re * x526p.re
5447 + self.twiddle9.re * x625p.re
5448 + self.twiddle5.re * x724p.re
5449 + self.twiddle12.re * x823p.re
5450 + self.twiddle2.re * x922p.re
5451 + self.twiddle15.re * x1021p.re
5452 + self.twiddle1.re * x1120p.re
5453 + self.twiddle13.re * x1219p.re
5454 + self.twiddle4.re * x1318p.re
5455 + self.twiddle10.re * x1417p.re
5456 + self.twiddle7.re * x1516p.re;
5457 let b1417re_b = self.twiddle14.im * x130n.im
5458 + -self.twiddle3.im * x229n.im
5459 + self.twiddle11.im * x328n.im
5460 + -self.twiddle6.im * x427n.im
5461 + self.twiddle8.im * x526n.im
5462 + -self.twiddle9.im * x625n.im
5463 + self.twiddle5.im * x724n.im
5464 + -self.twiddle12.im * x823n.im
5465 + self.twiddle2.im * x922n.im
5466 + -self.twiddle15.im * x1021n.im
5467 + -self.twiddle1.im * x1120n.im
5468 + self.twiddle13.im * x1219n.im
5469 + -self.twiddle4.im * x1318n.im
5470 + self.twiddle10.im * x1417n.im
5471 + -self.twiddle7.im * x1516n.im;
5472 let b1516re_a = buffer.load(0).re
5473 + self.twiddle15.re * x130p.re
5474 + self.twiddle1.re * x229p.re
5475 + self.twiddle14.re * x328p.re
5476 + self.twiddle2.re * x427p.re
5477 + self.twiddle13.re * x526p.re
5478 + self.twiddle3.re * x625p.re
5479 + self.twiddle12.re * x724p.re
5480 + self.twiddle4.re * x823p.re
5481 + self.twiddle11.re * x922p.re
5482 + self.twiddle5.re * x1021p.re
5483 + self.twiddle10.re * x1120p.re
5484 + self.twiddle6.re * x1219p.re
5485 + self.twiddle9.re * x1318p.re
5486 + self.twiddle7.re * x1417p.re
5487 + self.twiddle8.re * x1516p.re;
5488 let b1516re_b = self.twiddle15.im * x130n.im
5489 + -self.twiddle1.im * x229n.im
5490 + self.twiddle14.im * x328n.im
5491 + -self.twiddle2.im * x427n.im
5492 + self.twiddle13.im * x526n.im
5493 + -self.twiddle3.im * x625n.im
5494 + self.twiddle12.im * x724n.im
5495 + -self.twiddle4.im * x823n.im
5496 + self.twiddle11.im * x922n.im
5497 + -self.twiddle5.im * x1021n.im
5498 + self.twiddle10.im * x1120n.im
5499 + -self.twiddle6.im * x1219n.im
5500 + self.twiddle9.im * x1318n.im
5501 + -self.twiddle7.im * x1417n.im
5502 + self.twiddle8.im * x1516n.im;
5503
5504 let b130im_a = buffer.load(0).im
5505 + self.twiddle1.re * x130p.im
5506 + self.twiddle2.re * x229p.im
5507 + self.twiddle3.re * x328p.im
5508 + self.twiddle4.re * x427p.im
5509 + self.twiddle5.re * x526p.im
5510 + self.twiddle6.re * x625p.im
5511 + self.twiddle7.re * x724p.im
5512 + self.twiddle8.re * x823p.im
5513 + self.twiddle9.re * x922p.im
5514 + self.twiddle10.re * x1021p.im
5515 + self.twiddle11.re * x1120p.im
5516 + self.twiddle12.re * x1219p.im
5517 + self.twiddle13.re * x1318p.im
5518 + self.twiddle14.re * x1417p.im
5519 + self.twiddle15.re * x1516p.im;
5520 let b130im_b = self.twiddle1.im * x130n.re
5521 + self.twiddle2.im * x229n.re
5522 + self.twiddle3.im * x328n.re
5523 + self.twiddle4.im * x427n.re
5524 + self.twiddle5.im * x526n.re
5525 + self.twiddle6.im * x625n.re
5526 + self.twiddle7.im * x724n.re
5527 + self.twiddle8.im * x823n.re
5528 + self.twiddle9.im * x922n.re
5529 + self.twiddle10.im * x1021n.re
5530 + self.twiddle11.im * x1120n.re
5531 + self.twiddle12.im * x1219n.re
5532 + self.twiddle13.im * x1318n.re
5533 + self.twiddle14.im * x1417n.re
5534 + self.twiddle15.im * x1516n.re;
5535 let b229im_a = buffer.load(0).im
5536 + self.twiddle2.re * x130p.im
5537 + self.twiddle4.re * x229p.im
5538 + self.twiddle6.re * x328p.im
5539 + self.twiddle8.re * x427p.im
5540 + self.twiddle10.re * x526p.im
5541 + self.twiddle12.re * x625p.im
5542 + self.twiddle14.re * x724p.im
5543 + self.twiddle15.re * x823p.im
5544 + self.twiddle13.re * x922p.im
5545 + self.twiddle11.re * x1021p.im
5546 + self.twiddle9.re * x1120p.im
5547 + self.twiddle7.re * x1219p.im
5548 + self.twiddle5.re * x1318p.im
5549 + self.twiddle3.re * x1417p.im
5550 + self.twiddle1.re * x1516p.im;
5551 let b229im_b = self.twiddle2.im * x130n.re
5552 + self.twiddle4.im * x229n.re
5553 + self.twiddle6.im * x328n.re
5554 + self.twiddle8.im * x427n.re
5555 + self.twiddle10.im * x526n.re
5556 + self.twiddle12.im * x625n.re
5557 + self.twiddle14.im * x724n.re
5558 + -self.twiddle15.im * x823n.re
5559 + -self.twiddle13.im * x922n.re
5560 + -self.twiddle11.im * x1021n.re
5561 + -self.twiddle9.im * x1120n.re
5562 + -self.twiddle7.im * x1219n.re
5563 + -self.twiddle5.im * x1318n.re
5564 + -self.twiddle3.im * x1417n.re
5565 + -self.twiddle1.im * x1516n.re;
5566 let b328im_a = buffer.load(0).im
5567 + self.twiddle3.re * x130p.im
5568 + self.twiddle6.re * x229p.im
5569 + self.twiddle9.re * x328p.im
5570 + self.twiddle12.re * x427p.im
5571 + self.twiddle15.re * x526p.im
5572 + self.twiddle13.re * x625p.im
5573 + self.twiddle10.re * x724p.im
5574 + self.twiddle7.re * x823p.im
5575 + self.twiddle4.re * x922p.im
5576 + self.twiddle1.re * x1021p.im
5577 + self.twiddle2.re * x1120p.im
5578 + self.twiddle5.re * x1219p.im
5579 + self.twiddle8.re * x1318p.im
5580 + self.twiddle11.re * x1417p.im
5581 + self.twiddle14.re * x1516p.im;
5582 let b328im_b = self.twiddle3.im * x130n.re
5583 + self.twiddle6.im * x229n.re
5584 + self.twiddle9.im * x328n.re
5585 + self.twiddle12.im * x427n.re
5586 + self.twiddle15.im * x526n.re
5587 + -self.twiddle13.im * x625n.re
5588 + -self.twiddle10.im * x724n.re
5589 + -self.twiddle7.im * x823n.re
5590 + -self.twiddle4.im * x922n.re
5591 + -self.twiddle1.im * x1021n.re
5592 + self.twiddle2.im * x1120n.re
5593 + self.twiddle5.im * x1219n.re
5594 + self.twiddle8.im * x1318n.re
5595 + self.twiddle11.im * x1417n.re
5596 + self.twiddle14.im * x1516n.re;
5597 let b427im_a = buffer.load(0).im
5598 + self.twiddle4.re * x130p.im
5599 + self.twiddle8.re * x229p.im
5600 + self.twiddle12.re * x328p.im
5601 + self.twiddle15.re * x427p.im
5602 + self.twiddle11.re * x526p.im
5603 + self.twiddle7.re * x625p.im
5604 + self.twiddle3.re * x724p.im
5605 + self.twiddle1.re * x823p.im
5606 + self.twiddle5.re * x922p.im
5607 + self.twiddle9.re * x1021p.im
5608 + self.twiddle13.re * x1120p.im
5609 + self.twiddle14.re * x1219p.im
5610 + self.twiddle10.re * x1318p.im
5611 + self.twiddle6.re * x1417p.im
5612 + self.twiddle2.re * x1516p.im;
5613 let b427im_b = self.twiddle4.im * x130n.re
5614 + self.twiddle8.im * x229n.re
5615 + self.twiddle12.im * x328n.re
5616 + -self.twiddle15.im * x427n.re
5617 + -self.twiddle11.im * x526n.re
5618 + -self.twiddle7.im * x625n.re
5619 + -self.twiddle3.im * x724n.re
5620 + self.twiddle1.im * x823n.re
5621 + self.twiddle5.im * x922n.re
5622 + self.twiddle9.im * x1021n.re
5623 + self.twiddle13.im * x1120n.re
5624 + -self.twiddle14.im * x1219n.re
5625 + -self.twiddle10.im * x1318n.re
5626 + -self.twiddle6.im * x1417n.re
5627 + -self.twiddle2.im * x1516n.re;
5628 let b526im_a = buffer.load(0).im
5629 + self.twiddle5.re * x130p.im
5630 + self.twiddle10.re * x229p.im
5631 + self.twiddle15.re * x328p.im
5632 + self.twiddle11.re * x427p.im
5633 + self.twiddle6.re * x526p.im
5634 + self.twiddle1.re * x625p.im
5635 + self.twiddle4.re * x724p.im
5636 + self.twiddle9.re * x823p.im
5637 + self.twiddle14.re * x922p.im
5638 + self.twiddle12.re * x1021p.im
5639 + self.twiddle7.re * x1120p.im
5640 + self.twiddle2.re * x1219p.im
5641 + self.twiddle3.re * x1318p.im
5642 + self.twiddle8.re * x1417p.im
5643 + self.twiddle13.re * x1516p.im;
5644 let b526im_b = self.twiddle5.im * x130n.re
5645 + self.twiddle10.im * x229n.re
5646 + self.twiddle15.im * x328n.re
5647 + -self.twiddle11.im * x427n.re
5648 + -self.twiddle6.im * x526n.re
5649 + -self.twiddle1.im * x625n.re
5650 + self.twiddle4.im * x724n.re
5651 + self.twiddle9.im * x823n.re
5652 + self.twiddle14.im * x922n.re
5653 + -self.twiddle12.im * x1021n.re
5654 + -self.twiddle7.im * x1120n.re
5655 + -self.twiddle2.im * x1219n.re
5656 + self.twiddle3.im * x1318n.re
5657 + self.twiddle8.im * x1417n.re
5658 + self.twiddle13.im * x1516n.re;
5659 let b625im_a = buffer.load(0).im
5660 + self.twiddle6.re * x130p.im
5661 + self.twiddle12.re * x229p.im
5662 + self.twiddle13.re * x328p.im
5663 + self.twiddle7.re * x427p.im
5664 + self.twiddle1.re * x526p.im
5665 + self.twiddle5.re * x625p.im
5666 + self.twiddle11.re * x724p.im
5667 + self.twiddle14.re * x823p.im
5668 + self.twiddle8.re * x922p.im
5669 + self.twiddle2.re * x1021p.im
5670 + self.twiddle4.re * x1120p.im
5671 + self.twiddle10.re * x1219p.im
5672 + self.twiddle15.re * x1318p.im
5673 + self.twiddle9.re * x1417p.im
5674 + self.twiddle3.re * x1516p.im;
5675 let b625im_b = self.twiddle6.im * x130n.re
5676 + self.twiddle12.im * x229n.re
5677 + -self.twiddle13.im * x328n.re
5678 + -self.twiddle7.im * x427n.re
5679 + -self.twiddle1.im * x526n.re
5680 + self.twiddle5.im * x625n.re
5681 + self.twiddle11.im * x724n.re
5682 + -self.twiddle14.im * x823n.re
5683 + -self.twiddle8.im * x922n.re
5684 + -self.twiddle2.im * x1021n.re
5685 + self.twiddle4.im * x1120n.re
5686 + self.twiddle10.im * x1219n.re
5687 + -self.twiddle15.im * x1318n.re
5688 + -self.twiddle9.im * x1417n.re
5689 + -self.twiddle3.im * x1516n.re;
5690 let b724im_a = buffer.load(0).im
5691 + self.twiddle7.re * x130p.im
5692 + self.twiddle14.re * x229p.im
5693 + self.twiddle10.re * x328p.im
5694 + self.twiddle3.re * x427p.im
5695 + self.twiddle4.re * x526p.im
5696 + self.twiddle11.re * x625p.im
5697 + self.twiddle13.re * x724p.im
5698 + self.twiddle6.re * x823p.im
5699 + self.twiddle1.re * x922p.im
5700 + self.twiddle8.re * x1021p.im
5701 + self.twiddle15.re * x1120p.im
5702 + self.twiddle9.re * x1219p.im
5703 + self.twiddle2.re * x1318p.im
5704 + self.twiddle5.re * x1417p.im
5705 + self.twiddle12.re * x1516p.im;
5706 let b724im_b = self.twiddle7.im * x130n.re
5707 + self.twiddle14.im * x229n.re
5708 + -self.twiddle10.im * x328n.re
5709 + -self.twiddle3.im * x427n.re
5710 + self.twiddle4.im * x526n.re
5711 + self.twiddle11.im * x625n.re
5712 + -self.twiddle13.im * x724n.re
5713 + -self.twiddle6.im * x823n.re
5714 + self.twiddle1.im * x922n.re
5715 + self.twiddle8.im * x1021n.re
5716 + self.twiddle15.im * x1120n.re
5717 + -self.twiddle9.im * x1219n.re
5718 + -self.twiddle2.im * x1318n.re
5719 + self.twiddle5.im * x1417n.re
5720 + self.twiddle12.im * x1516n.re;
5721 let b823im_a = buffer.load(0).im
5722 + self.twiddle8.re * x130p.im
5723 + self.twiddle15.re * x229p.im
5724 + self.twiddle7.re * x328p.im
5725 + self.twiddle1.re * x427p.im
5726 + self.twiddle9.re * x526p.im
5727 + self.twiddle14.re * x625p.im
5728 + self.twiddle6.re * x724p.im
5729 + self.twiddle2.re * x823p.im
5730 + self.twiddle10.re * x922p.im
5731 + self.twiddle13.re * x1021p.im
5732 + self.twiddle5.re * x1120p.im
5733 + self.twiddle3.re * x1219p.im
5734 + self.twiddle11.re * x1318p.im
5735 + self.twiddle12.re * x1417p.im
5736 + self.twiddle4.re * x1516p.im;
5737 let b823im_b = self.twiddle8.im * x130n.re
5738 + -self.twiddle15.im * x229n.re
5739 + -self.twiddle7.im * x328n.re
5740 + self.twiddle1.im * x427n.re
5741 + self.twiddle9.im * x526n.re
5742 + -self.twiddle14.im * x625n.re
5743 + -self.twiddle6.im * x724n.re
5744 + self.twiddle2.im * x823n.re
5745 + self.twiddle10.im * x922n.re
5746 + -self.twiddle13.im * x1021n.re
5747 + -self.twiddle5.im * x1120n.re
5748 + self.twiddle3.im * x1219n.re
5749 + self.twiddle11.im * x1318n.re
5750 + -self.twiddle12.im * x1417n.re
5751 + -self.twiddle4.im * x1516n.re;
5752 let b922im_a = buffer.load(0).im
5753 + self.twiddle9.re * x130p.im
5754 + self.twiddle13.re * x229p.im
5755 + self.twiddle4.re * x328p.im
5756 + self.twiddle5.re * x427p.im
5757 + self.twiddle14.re * x526p.im
5758 + self.twiddle8.re * x625p.im
5759 + self.twiddle1.re * x724p.im
5760 + self.twiddle10.re * x823p.im
5761 + self.twiddle12.re * x922p.im
5762 + self.twiddle3.re * x1021p.im
5763 + self.twiddle6.re * x1120p.im
5764 + self.twiddle15.re * x1219p.im
5765 + self.twiddle7.re * x1318p.im
5766 + self.twiddle2.re * x1417p.im
5767 + self.twiddle11.re * x1516p.im;
5768 let b922im_b = self.twiddle9.im * x130n.re
5769 + -self.twiddle13.im * x229n.re
5770 + -self.twiddle4.im * x328n.re
5771 + self.twiddle5.im * x427n.re
5772 + self.twiddle14.im * x526n.re
5773 + -self.twiddle8.im * x625n.re
5774 + self.twiddle1.im * x724n.re
5775 + self.twiddle10.im * x823n.re
5776 + -self.twiddle12.im * x922n.re
5777 + -self.twiddle3.im * x1021n.re
5778 + self.twiddle6.im * x1120n.re
5779 + self.twiddle15.im * x1219n.re
5780 + -self.twiddle7.im * x1318n.re
5781 + self.twiddle2.im * x1417n.re
5782 + self.twiddle11.im * x1516n.re;
5783 let b1021im_a = buffer.load(0).im
5784 + self.twiddle10.re * x130p.im
5785 + self.twiddle11.re * x229p.im
5786 + self.twiddle1.re * x328p.im
5787 + self.twiddle9.re * x427p.im
5788 + self.twiddle12.re * x526p.im
5789 + self.twiddle2.re * x625p.im
5790 + self.twiddle8.re * x724p.im
5791 + self.twiddle13.re * x823p.im
5792 + self.twiddle3.re * x922p.im
5793 + self.twiddle7.re * x1021p.im
5794 + self.twiddle14.re * x1120p.im
5795 + self.twiddle4.re * x1219p.im
5796 + self.twiddle6.re * x1318p.im
5797 + self.twiddle15.re * x1417p.im
5798 + self.twiddle5.re * x1516p.im;
5799 let b1021im_b = self.twiddle10.im * x130n.re
5800 + -self.twiddle11.im * x229n.re
5801 + -self.twiddle1.im * x328n.re
5802 + self.twiddle9.im * x427n.re
5803 + -self.twiddle12.im * x526n.re
5804 + -self.twiddle2.im * x625n.re
5805 + self.twiddle8.im * x724n.re
5806 + -self.twiddle13.im * x823n.re
5807 + -self.twiddle3.im * x922n.re
5808 + self.twiddle7.im * x1021n.re
5809 + -self.twiddle14.im * x1120n.re
5810 + -self.twiddle4.im * x1219n.re
5811 + self.twiddle6.im * x1318n.re
5812 + -self.twiddle15.im * x1417n.re
5813 + -self.twiddle5.im * x1516n.re;
5814 let b1120im_a = buffer.load(0).im
5815 + self.twiddle11.re * x130p.im
5816 + self.twiddle9.re * x229p.im
5817 + self.twiddle2.re * x328p.im
5818 + self.twiddle13.re * x427p.im
5819 + self.twiddle7.re * x526p.im
5820 + self.twiddle4.re * x625p.im
5821 + self.twiddle15.re * x724p.im
5822 + self.twiddle5.re * x823p.im
5823 + self.twiddle6.re * x922p.im
5824 + self.twiddle14.re * x1021p.im
5825 + self.twiddle3.re * x1120p.im
5826 + self.twiddle8.re * x1219p.im
5827 + self.twiddle12.re * x1318p.im
5828 + self.twiddle1.re * x1417p.im
5829 + self.twiddle10.re * x1516p.im;
5830 let b1120im_b = self.twiddle11.im * x130n.re
5831 + -self.twiddle9.im * x229n.re
5832 + self.twiddle2.im * x328n.re
5833 + self.twiddle13.im * x427n.re
5834 + -self.twiddle7.im * x526n.re
5835 + self.twiddle4.im * x625n.re
5836 + self.twiddle15.im * x724n.re
5837 + -self.twiddle5.im * x823n.re
5838 + self.twiddle6.im * x922n.re
5839 + -self.twiddle14.im * x1021n.re
5840 + -self.twiddle3.im * x1120n.re
5841 + self.twiddle8.im * x1219n.re
5842 + -self.twiddle12.im * x1318n.re
5843 + -self.twiddle1.im * x1417n.re
5844 + self.twiddle10.im * x1516n.re;
5845 let b1219im_a = buffer.load(0).im
5846 + self.twiddle12.re * x130p.im
5847 + self.twiddle7.re * x229p.im
5848 + self.twiddle5.re * x328p.im
5849 + self.twiddle14.re * x427p.im
5850 + self.twiddle2.re * x526p.im
5851 + self.twiddle10.re * x625p.im
5852 + self.twiddle9.re * x724p.im
5853 + self.twiddle3.re * x823p.im
5854 + self.twiddle15.re * x922p.im
5855 + self.twiddle4.re * x1021p.im
5856 + self.twiddle8.re * x1120p.im
5857 + self.twiddle11.re * x1219p.im
5858 + self.twiddle1.re * x1318p.im
5859 + self.twiddle13.re * x1417p.im
5860 + self.twiddle6.re * x1516p.im;
5861 let b1219im_b = self.twiddle12.im * x130n.re
5862 + -self.twiddle7.im * x229n.re
5863 + self.twiddle5.im * x328n.re
5864 + -self.twiddle14.im * x427n.re
5865 + -self.twiddle2.im * x526n.re
5866 + self.twiddle10.im * x625n.re
5867 + -self.twiddle9.im * x724n.re
5868 + self.twiddle3.im * x823n.re
5869 + self.twiddle15.im * x922n.re
5870 + -self.twiddle4.im * x1021n.re
5871 + self.twiddle8.im * x1120n.re
5872 + -self.twiddle11.im * x1219n.re
5873 + self.twiddle1.im * x1318n.re
5874 + self.twiddle13.im * x1417n.re
5875 + -self.twiddle6.im * x1516n.re;
5876 let b1318im_a = buffer.load(0).im
5877 + self.twiddle13.re * x130p.im
5878 + self.twiddle5.re * x229p.im
5879 + self.twiddle8.re * x328p.im
5880 + self.twiddle10.re * x427p.im
5881 + self.twiddle3.re * x526p.im
5882 + self.twiddle15.re * x625p.im
5883 + self.twiddle2.re * x724p.im
5884 + self.twiddle11.re * x823p.im
5885 + self.twiddle7.re * x922p.im
5886 + self.twiddle6.re * x1021p.im
5887 + self.twiddle12.re * x1120p.im
5888 + self.twiddle1.re * x1219p.im
5889 + self.twiddle14.re * x1318p.im
5890 + self.twiddle4.re * x1417p.im
5891 + self.twiddle9.re * x1516p.im;
5892 let b1318im_b = self.twiddle13.im * x130n.re
5893 + -self.twiddle5.im * x229n.re
5894 + self.twiddle8.im * x328n.re
5895 + -self.twiddle10.im * x427n.re
5896 + self.twiddle3.im * x526n.re
5897 + -self.twiddle15.im * x625n.re
5898 + -self.twiddle2.im * x724n.re
5899 + self.twiddle11.im * x823n.re
5900 + -self.twiddle7.im * x922n.re
5901 + self.twiddle6.im * x1021n.re
5902 + -self.twiddle12.im * x1120n.re
5903 + self.twiddle1.im * x1219n.re
5904 + self.twiddle14.im * x1318n.re
5905 + -self.twiddle4.im * x1417n.re
5906 + self.twiddle9.im * x1516n.re;
5907 let b1417im_a = buffer.load(0).im
5908 + self.twiddle14.re * x130p.im
5909 + self.twiddle3.re * x229p.im
5910 + self.twiddle11.re * x328p.im
5911 + self.twiddle6.re * x427p.im
5912 + self.twiddle8.re * x526p.im
5913 + self.twiddle9.re * x625p.im
5914 + self.twiddle5.re * x724p.im
5915 + self.twiddle12.re * x823p.im
5916 + self.twiddle2.re * x922p.im
5917 + self.twiddle15.re * x1021p.im
5918 + self.twiddle1.re * x1120p.im
5919 + self.twiddle13.re * x1219p.im
5920 + self.twiddle4.re * x1318p.im
5921 + self.twiddle10.re * x1417p.im
5922 + self.twiddle7.re * x1516p.im;
5923 let b1417im_b = self.twiddle14.im * x130n.re
5924 + -self.twiddle3.im * x229n.re
5925 + self.twiddle11.im * x328n.re
5926 + -self.twiddle6.im * x427n.re
5927 + self.twiddle8.im * x526n.re
5928 + -self.twiddle9.im * x625n.re
5929 + self.twiddle5.im * x724n.re
5930 + -self.twiddle12.im * x823n.re
5931 + self.twiddle2.im * x922n.re
5932 + -self.twiddle15.im * x1021n.re
5933 + -self.twiddle1.im * x1120n.re
5934 + self.twiddle13.im * x1219n.re
5935 + -self.twiddle4.im * x1318n.re
5936 + self.twiddle10.im * x1417n.re
5937 + -self.twiddle7.im * x1516n.re;
5938 let b1516im_a = buffer.load(0).im
5939 + self.twiddle15.re * x130p.im
5940 + self.twiddle1.re * x229p.im
5941 + self.twiddle14.re * x328p.im
5942 + self.twiddle2.re * x427p.im
5943 + self.twiddle13.re * x526p.im
5944 + self.twiddle3.re * x625p.im
5945 + self.twiddle12.re * x724p.im
5946 + self.twiddle4.re * x823p.im
5947 + self.twiddle11.re * x922p.im
5948 + self.twiddle5.re * x1021p.im
5949 + self.twiddle10.re * x1120p.im
5950 + self.twiddle6.re * x1219p.im
5951 + self.twiddle9.re * x1318p.im
5952 + self.twiddle7.re * x1417p.im
5953 + self.twiddle8.re * x1516p.im;
5954 let b1516im_b = self.twiddle15.im * x130n.re
5955 + -self.twiddle1.im * x229n.re
5956 + self.twiddle14.im * x328n.re
5957 + -self.twiddle2.im * x427n.re
5958 + self.twiddle13.im * x526n.re
5959 + -self.twiddle3.im * x625n.re
5960 + self.twiddle12.im * x724n.re
5961 + -self.twiddle4.im * x823n.re
5962 + self.twiddle11.im * x922n.re
5963 + -self.twiddle5.im * x1021n.re
5964 + self.twiddle10.im * x1120n.re
5965 + -self.twiddle6.im * x1219n.re
5966 + self.twiddle9.im * x1318n.re
5967 + -self.twiddle7.im * x1417n.re
5968 + self.twiddle8.im * x1516n.re;
5969
5970 let out1re = b130re_a - b130re_b;
5971 let out1im = b130im_a + b130im_b;
5972 let out2re = b229re_a - b229re_b;
5973 let out2im = b229im_a + b229im_b;
5974 let out3re = b328re_a - b328re_b;
5975 let out3im = b328im_a + b328im_b;
5976 let out4re = b427re_a - b427re_b;
5977 let out4im = b427im_a + b427im_b;
5978 let out5re = b526re_a - b526re_b;
5979 let out5im = b526im_a + b526im_b;
5980 let out6re = b625re_a - b625re_b;
5981 let out6im = b625im_a + b625im_b;
5982 let out7re = b724re_a - b724re_b;
5983 let out7im = b724im_a + b724im_b;
5984 let out8re = b823re_a - b823re_b;
5985 let out8im = b823im_a + b823im_b;
5986 let out9re = b922re_a - b922re_b;
5987 let out9im = b922im_a + b922im_b;
5988 let out10re = b1021re_a - b1021re_b;
5989 let out10im = b1021im_a + b1021im_b;
5990 let out11re = b1120re_a - b1120re_b;
5991 let out11im = b1120im_a + b1120im_b;
5992 let out12re = b1219re_a - b1219re_b;
5993 let out12im = b1219im_a + b1219im_b;
5994 let out13re = b1318re_a - b1318re_b;
5995 let out13im = b1318im_a + b1318im_b;
5996 let out14re = b1417re_a - b1417re_b;
5997 let out14im = b1417im_a + b1417im_b;
5998 let out15re = b1516re_a - b1516re_b;
5999 let out15im = b1516im_a + b1516im_b;
6000 let out16re = b1516re_a + b1516re_b;
6001 let out16im = b1516im_a - b1516im_b;
6002 let out17re = b1417re_a + b1417re_b;
6003 let out17im = b1417im_a - b1417im_b;
6004 let out18re = b1318re_a + b1318re_b;
6005 let out18im = b1318im_a - b1318im_b;
6006 let out19re = b1219re_a + b1219re_b;
6007 let out19im = b1219im_a - b1219im_b;
6008 let out20re = b1120re_a + b1120re_b;
6009 let out20im = b1120im_a - b1120im_b;
6010 let out21re = b1021re_a + b1021re_b;
6011 let out21im = b1021im_a - b1021im_b;
6012 let out22re = b922re_a + b922re_b;
6013 let out22im = b922im_a - b922im_b;
6014 let out23re = b823re_a + b823re_b;
6015 let out23im = b823im_a - b823im_b;
6016 let out24re = b724re_a + b724re_b;
6017 let out24im = b724im_a - b724im_b;
6018 let out25re = b625re_a + b625re_b;
6019 let out25im = b625im_a - b625im_b;
6020 let out26re = b526re_a + b526re_b;
6021 let out26im = b526im_a - b526im_b;
6022 let out27re = b427re_a + b427re_b;
6023 let out27im = b427im_a - b427im_b;
6024 let out28re = b328re_a + b328re_b;
6025 let out28im = b328im_a - b328im_b;
6026 let out29re = b229re_a + b229re_b;
6027 let out29im = b229im_a - b229im_b;
6028 let out30re = b130re_a + b130re_b;
6029 let out30im = b130im_a - b130im_b;
6030 buffer.store(sum, 0);
6031 buffer.store(
6032 Complex {
6033 re: out1re,
6034 im: out1im,
6035 },
6036 1,
6037 );
6038 buffer.store(
6039 Complex {
6040 re: out2re,
6041 im: out2im,
6042 },
6043 2,
6044 );
6045 buffer.store(
6046 Complex {
6047 re: out3re,
6048 im: out3im,
6049 },
6050 3,
6051 );
6052 buffer.store(
6053 Complex {
6054 re: out4re,
6055 im: out4im,
6056 },
6057 4,
6058 );
6059 buffer.store(
6060 Complex {
6061 re: out5re,
6062 im: out5im,
6063 },
6064 5,
6065 );
6066 buffer.store(
6067 Complex {
6068 re: out6re,
6069 im: out6im,
6070 },
6071 6,
6072 );
6073 buffer.store(
6074 Complex {
6075 re: out7re,
6076 im: out7im,
6077 },
6078 7,
6079 );
6080 buffer.store(
6081 Complex {
6082 re: out8re,
6083 im: out8im,
6084 },
6085 8,
6086 );
6087 buffer.store(
6088 Complex {
6089 re: out9re,
6090 im: out9im,
6091 },
6092 9,
6093 );
6094 buffer.store(
6095 Complex {
6096 re: out10re,
6097 im: out10im,
6098 },
6099 10,
6100 );
6101 buffer.store(
6102 Complex {
6103 re: out11re,
6104 im: out11im,
6105 },
6106 11,
6107 );
6108 buffer.store(
6109 Complex {
6110 re: out12re,
6111 im: out12im,
6112 },
6113 12,
6114 );
6115 buffer.store(
6116 Complex {
6117 re: out13re,
6118 im: out13im,
6119 },
6120 13,
6121 );
6122 buffer.store(
6123 Complex {
6124 re: out14re,
6125 im: out14im,
6126 },
6127 14,
6128 );
6129 buffer.store(
6130 Complex {
6131 re: out15re,
6132 im: out15im,
6133 },
6134 15,
6135 );
6136 buffer.store(
6137 Complex {
6138 re: out16re,
6139 im: out16im,
6140 },
6141 16,
6142 );
6143 buffer.store(
6144 Complex {
6145 re: out17re,
6146 im: out17im,
6147 },
6148 17,
6149 );
6150 buffer.store(
6151 Complex {
6152 re: out18re,
6153 im: out18im,
6154 },
6155 18,
6156 );
6157 buffer.store(
6158 Complex {
6159 re: out19re,
6160 im: out19im,
6161 },
6162 19,
6163 );
6164 buffer.store(
6165 Complex {
6166 re: out20re,
6167 im: out20im,
6168 },
6169 20,
6170 );
6171 buffer.store(
6172 Complex {
6173 re: out21re,
6174 im: out21im,
6175 },
6176 21,
6177 );
6178 buffer.store(
6179 Complex {
6180 re: out22re,
6181 im: out22im,
6182 },
6183 22,
6184 );
6185 buffer.store(
6186 Complex {
6187 re: out23re,
6188 im: out23im,
6189 },
6190 23,
6191 );
6192 buffer.store(
6193 Complex {
6194 re: out24re,
6195 im: out24im,
6196 },
6197 24,
6198 );
6199 buffer.store(
6200 Complex {
6201 re: out25re,
6202 im: out25im,
6203 },
6204 25,
6205 );
6206 buffer.store(
6207 Complex {
6208 re: out26re,
6209 im: out26im,
6210 },
6211 26,
6212 );
6213 buffer.store(
6214 Complex {
6215 re: out27re,
6216 im: out27im,
6217 },
6218 27,
6219 );
6220 buffer.store(
6221 Complex {
6222 re: out28re,
6223 im: out28im,
6224 },
6225 28,
6226 );
6227 buffer.store(
6228 Complex {
6229 re: out29re,
6230 im: out29im,
6231 },
6232 29,
6233 );
6234 buffer.store(
6235 Complex {
6236 re: out30re,
6237 im: out30im,
6238 },
6239 30,
6240 );
6241 }
6242}
6243pub struct Butterfly32<T> {
6244 butterfly16: Butterfly16<T>,
6245 butterfly8: Butterfly8<T>,
6246 twiddles: [Complex<T>; 7],
6247}
6248boilerplate_fft_butterfly!(Butterfly32, 32, |this: &Butterfly32<_>| this
6249 .butterfly8
6250 .fft_direction());
6251impl<T: FftNum> Butterfly32<T> {
6252 pub fn new(direction: FftDirection) -> Self {
6253 Self {
6254 butterfly16: Butterfly16::new(direction),
6255 butterfly8: Butterfly8::new(direction),
6256 twiddles: [
6257 twiddles::compute_twiddle(1, 32, direction),
6258 twiddles::compute_twiddle(2, 32, direction),
6259 twiddles::compute_twiddle(3, 32, direction),
6260 twiddles::compute_twiddle(4, 32, direction),
6261 twiddles::compute_twiddle(5, 32, direction),
6262 twiddles::compute_twiddle(6, 32, direction),
6263 twiddles::compute_twiddle(7, 32, direction),
6264 ],
6265 }
6266 }
6267
6268 #[inline(never)]
6269 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
6270 let mut scratch_evens = [
6273 buffer.load(0),
6274 buffer.load(2),
6275 buffer.load(4),
6276 buffer.load(6),
6277 buffer.load(8),
6278 buffer.load(10),
6279 buffer.load(12),
6280 buffer.load(14),
6281 buffer.load(16),
6282 buffer.load(18),
6283 buffer.load(20),
6284 buffer.load(22),
6285 buffer.load(24),
6286 buffer.load(26),
6287 buffer.load(28),
6288 buffer.load(30),
6289 ];
6290
6291 let mut scratch_odds_n1 = [
6292 buffer.load(1),
6293 buffer.load(5),
6294 buffer.load(9),
6295 buffer.load(13),
6296 buffer.load(17),
6297 buffer.load(21),
6298 buffer.load(25),
6299 buffer.load(29),
6300 ];
6301 let mut scratch_odds_n3 = [
6302 buffer.load(31),
6303 buffer.load(3),
6304 buffer.load(7),
6305 buffer.load(11),
6306 buffer.load(15),
6307 buffer.load(19),
6308 buffer.load(23),
6309 buffer.load(27),
6310 ];
6311
6312 self.butterfly16.perform_fft_contiguous(&mut scratch_evens);
6314 self.butterfly8.perform_fft_contiguous(&mut scratch_odds_n1);
6315 self.butterfly8.perform_fft_contiguous(&mut scratch_odds_n3);
6316
6317 scratch_odds_n1[1] = scratch_odds_n1[1] * self.twiddles[0];
6319 scratch_odds_n3[1] = scratch_odds_n3[1] * self.twiddles[0].conj();
6320
6321 scratch_odds_n1[2] = scratch_odds_n1[2] * self.twiddles[1];
6322 scratch_odds_n3[2] = scratch_odds_n3[2] * self.twiddles[1].conj();
6323
6324 scratch_odds_n1[3] = scratch_odds_n1[3] * self.twiddles[2];
6325 scratch_odds_n3[3] = scratch_odds_n3[3] * self.twiddles[2].conj();
6326
6327 scratch_odds_n1[4] = scratch_odds_n1[4] * self.twiddles[3];
6328 scratch_odds_n3[4] = scratch_odds_n3[4] * self.twiddles[3].conj();
6329
6330 scratch_odds_n1[5] = scratch_odds_n1[5] * self.twiddles[4];
6331 scratch_odds_n3[5] = scratch_odds_n3[5] * self.twiddles[4].conj();
6332
6333 scratch_odds_n1[6] = scratch_odds_n1[6] * self.twiddles[5];
6334 scratch_odds_n3[6] = scratch_odds_n3[6] * self.twiddles[5].conj();
6335
6336 scratch_odds_n1[7] = scratch_odds_n1[7] * self.twiddles[6];
6337 scratch_odds_n3[7] = scratch_odds_n3[7] * self.twiddles[6].conj();
6338
6339 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[0], &mut scratch_odds_n3[0]);
6341 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[1], &mut scratch_odds_n3[1]);
6342 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[2], &mut scratch_odds_n3[2]);
6343 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[3], &mut scratch_odds_n3[3]);
6344 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[4], &mut scratch_odds_n3[4]);
6345 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[5], &mut scratch_odds_n3[5]);
6346 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[6], &mut scratch_odds_n3[6]);
6347 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[7], &mut scratch_odds_n3[7]);
6348
6349 scratch_odds_n3[0] = twiddles::rotate_90(scratch_odds_n3[0], self.fft_direction());
6351 scratch_odds_n3[1] = twiddles::rotate_90(scratch_odds_n3[1], self.fft_direction());
6352 scratch_odds_n3[2] = twiddles::rotate_90(scratch_odds_n3[2], self.fft_direction());
6353 scratch_odds_n3[3] = twiddles::rotate_90(scratch_odds_n3[3], self.fft_direction());
6354 scratch_odds_n3[4] = twiddles::rotate_90(scratch_odds_n3[4], self.fft_direction());
6355 scratch_odds_n3[5] = twiddles::rotate_90(scratch_odds_n3[5], self.fft_direction());
6356 scratch_odds_n3[6] = twiddles::rotate_90(scratch_odds_n3[6], self.fft_direction());
6357 scratch_odds_n3[7] = twiddles::rotate_90(scratch_odds_n3[7], self.fft_direction());
6358
6359 buffer.store(scratch_evens[0] + scratch_odds_n1[0], 0);
6361 buffer.store(scratch_evens[1] + scratch_odds_n1[1], 1);
6362 buffer.store(scratch_evens[2] + scratch_odds_n1[2], 2);
6363 buffer.store(scratch_evens[3] + scratch_odds_n1[3], 3);
6364 buffer.store(scratch_evens[4] + scratch_odds_n1[4], 4);
6365 buffer.store(scratch_evens[5] + scratch_odds_n1[5], 5);
6366 buffer.store(scratch_evens[6] + scratch_odds_n1[6], 6);
6367 buffer.store(scratch_evens[7] + scratch_odds_n1[7], 7);
6368 buffer.store(scratch_evens[8] + scratch_odds_n3[0], 8);
6369 buffer.store(scratch_evens[9] + scratch_odds_n3[1], 9);
6370 buffer.store(scratch_evens[10] + scratch_odds_n3[2], 10);
6371 buffer.store(scratch_evens[11] + scratch_odds_n3[3], 11);
6372 buffer.store(scratch_evens[12] + scratch_odds_n3[4], 12);
6373 buffer.store(scratch_evens[13] + scratch_odds_n3[5], 13);
6374 buffer.store(scratch_evens[14] + scratch_odds_n3[6], 14);
6375 buffer.store(scratch_evens[15] + scratch_odds_n3[7], 15);
6376 buffer.store(scratch_evens[0] - scratch_odds_n1[0], 16);
6377 buffer.store(scratch_evens[1] - scratch_odds_n1[1], 17);
6378 buffer.store(scratch_evens[2] - scratch_odds_n1[2], 18);
6379 buffer.store(scratch_evens[3] - scratch_odds_n1[3], 19);
6380 buffer.store(scratch_evens[4] - scratch_odds_n1[4], 20);
6381 buffer.store(scratch_evens[5] - scratch_odds_n1[5], 21);
6382 buffer.store(scratch_evens[6] - scratch_odds_n1[6], 22);
6383 buffer.store(scratch_evens[7] - scratch_odds_n1[7], 23);
6384 buffer.store(scratch_evens[8] - scratch_odds_n3[0], 24);
6385 buffer.store(scratch_evens[9] - scratch_odds_n3[1], 25);
6386 buffer.store(scratch_evens[10] - scratch_odds_n3[2], 26);
6387 buffer.store(scratch_evens[11] - scratch_odds_n3[3], 27);
6388 buffer.store(scratch_evens[12] - scratch_odds_n3[4], 28);
6389 buffer.store(scratch_evens[13] - scratch_odds_n3[5], 29);
6390 buffer.store(scratch_evens[14] - scratch_odds_n3[6], 30);
6391 buffer.store(scratch_evens[15] - scratch_odds_n3[7], 31);
6392 }
6393}
6394
6395#[cfg(test)]
6396mod unit_tests {
6397 use super::*;
6398 use crate::test_utils::check_fft_algorithm;
6399
6400 macro_rules! test_butterfly_func {
6403 ($test_name:ident, $struct_name:ident, $size:expr) => {
6404 #[test]
6405 fn $test_name() {
6406 let butterfly = $struct_name::new(FftDirection::Forward);
6407 check_fft_algorithm::<f32>(&butterfly, $size, FftDirection::Forward);
6408
6409 let butterfly_direction = $struct_name::new(FftDirection::Inverse);
6410 check_fft_algorithm::<f32>(&butterfly_direction, $size, FftDirection::Inverse);
6411 }
6412 };
6413 }
6414 test_butterfly_func!(test_butterfly2, Butterfly2, 2);
6415 test_butterfly_func!(test_butterfly3, Butterfly3, 3);
6416 test_butterfly_func!(test_butterfly4, Butterfly4, 4);
6417 test_butterfly_func!(test_butterfly5, Butterfly5, 5);
6418 test_butterfly_func!(test_butterfly6, Butterfly6, 6);
6419 test_butterfly_func!(test_butterfly7, Butterfly7, 7);
6420 test_butterfly_func!(test_butterfly8, Butterfly8, 8);
6421 test_butterfly_func!(test_butterfly9, Butterfly9, 9);
6422 test_butterfly_func!(test_butterfly11, Butterfly11, 11);
6423 test_butterfly_func!(test_butterfly12, Butterfly12, 12);
6424 test_butterfly_func!(test_butterfly13, Butterfly13, 13);
6425 test_butterfly_func!(test_butterfly16, Butterfly16, 16);
6426 test_butterfly_func!(test_butterfly17, Butterfly17, 17);
6427 test_butterfly_func!(test_butterfly19, Butterfly19, 19);
6428 test_butterfly_func!(test_butterfly23, Butterfly23, 23);
6429 test_butterfly_func!(test_butterfly24, Butterfly24, 24);
6430 test_butterfly_func!(test_butterfly27, Butterfly27, 27);
6431 test_butterfly_func!(test_butterfly29, Butterfly29, 29);
6432 test_butterfly_func!(test_butterfly31, Butterfly31, 31);
6433 test_butterfly_func!(test_butterfly32, Butterfly32, 32);
6434}