feanor_math/algorithms/fft/
radix3.rs

1use std::alloc::{Allocator, Global};
2
3use crate::algorithms::fft::FFTAlgorithm;
4use crate::rings::float_complex::Complex64Base;
5use crate::algorithms::fft::complex_fft::*;
6use crate::algorithms::unity_root::{get_prim_root_of_unity, is_prim_root_of_unity};
7use crate::divisibility::{DivisibilityRing, DivisibilityRingStore};
8use crate::homomorphism::*;
9use crate::primitive_int::StaticRing;
10use crate::seq::SwappableVectorViewMut;
11use crate::rings::zn::*;
12use crate::ring::*;
13use crate::seq::VectorFn;
14
15///
16/// Implementation of the Cooley-Tukey FFT algorithm for power-of-three lengths.
17/// 
18#[stability::unstable(feature = "enable")]
19pub struct CooleyTukeyRadix3FFT<R_main, R_twiddle, H, A = Global>
20    where R_main: ?Sized + RingBase,
21        R_twiddle: ?Sized + RingBase + DivisibilityRing,
22        H: Homomorphism<R_twiddle, R_main>,
23        A: Allocator
24{
25    log3_n: usize,
26    hom: H,
27    twiddles: Vec<Vec<R_twiddle::Element>>,
28    inv_twiddles: Vec<Vec<R_twiddle::Element>>,
29    third_root_of_unity: R_twiddle::Element,
30    main_root_of_unity: R_main::Element,
31    allocator: A
32}
33
34const ZZ: StaticRing<i64> = StaticRing::RING;
35
36#[inline(never)]
37fn butterfly_loop<T, S, F>(log3_n: usize, data: &mut [T], step: usize, twiddles: &[S], butterfly: F)
38    where F: Fn(&mut T, &mut T, &mut T, &S, &S) + Clone
39{
40    assert_eq!(ZZ.pow(3, log3_n) as usize, data.len());
41    assert!(step < log3_n);
42    assert_eq!(2 * ZZ.pow(3, step) as usize, twiddles.len());
43    
44    let stride = ZZ.pow(3, log3_n - step - 1) as usize;
45    assert!(data.len() % (3 * stride) == 0);
46    assert_eq!(twiddles.as_chunks::<2>().0.len(), data.chunks_mut(3 * stride).len());
47
48    if stride == 1 {
49        for ([twiddle1, twiddle2], butterfly_data) in twiddles.as_chunks::<2>().0.iter().zip(data.as_chunks_mut::<3>().0.iter_mut()) {
50            let [a, b, c] = butterfly_data.each_mut();
51            butterfly(a, b, c, twiddle1, twiddle2);
52        }
53    } else {
54        for ([twiddle1, twiddle2], butterfly_data) in twiddles.as_chunks::<2>().0.iter().zip(data.chunks_mut(3 * stride)) {
55            let (first, rest) = butterfly_data.split_at_mut(stride);
56            let (second, third) = rest.split_at_mut(stride);
57            for ((a, b), c) in first.iter_mut().zip(second.iter_mut()).zip(third.iter_mut()) {
58                butterfly(a, b, c, twiddle1, twiddle2);
59            }
60        }
61    }
62}
63
64fn threeadic_reverse(mut number: usize, log3_n: usize) -> usize {
65    debug_assert!((number as i64) < ZZ.pow(3, log3_n));
66    let mut result = 0;
67    for _ in 0..log3_n {
68        let (quo, rem) = (number / 3, number % 3);
69        result = 3 * result + rem;
70        number = quo;
71    }
72    assert_eq!(0, number);
73    return result;
74}
75
76impl<R> CooleyTukeyRadix3FFT<R::Type, R::Type, Identity<R>>
77    where R: RingStore,
78        R::Type: DivisibilityRing
79{
80    ///
81    /// Creates an [`CooleyTukeyRadix3FFT`] for a prime field, assuming it has a characteristic
82    /// congruent to 1 modulo `3^lo32_n`.
83    /// 
84    #[stability::unstable(feature = "enable")]
85    pub fn for_zn(ring: R, log3_n: usize) -> Option<Self>
86        where R::Type: ZnRing
87    {
88        let n = ZZ.pow(3, log3_n);
89        let as_field = (&ring).as_field().ok().unwrap();
90        let root_of_unity = as_field.get_ring().unwrap_element(get_prim_root_of_unity(&as_field, n as usize)?);
91        return Some(Self::new_with_hom(ring.into_identity(), root_of_unity, log3_n));
92    }
93}
94
95impl<R_main, R_twiddle, H> CooleyTukeyRadix3FFT<R_main, R_twiddle, H>
96    where R_main: ?Sized + RingBase,
97        R_twiddle: ?Sized + RingBase + DivisibilityRing,
98        H: Homomorphism<R_twiddle, R_main>
99{
100    ///
101    /// Creates an [`CooleyTukeyRadix3FFT`] for the given rings, using the given root of unity.
102    /// 
103    /// Instead of a ring, this function takes a homomorphism `R -> S`. Twiddle factors that are
104    /// precomputed will be stored as elements of `R`, while the main FFT computations will be 
105    /// performed in `S`. This allows both implicit ring conversions, and using patterns like 
106    /// [`zn_64::ZnFastmul`] to precompute some data for better performance.
107    /// 
108    /// Do not use this for approximate rings, as computing the powers of `root_of_unity`
109    /// will incur avoidable precision loss.
110    /// 
111    #[stability::unstable(feature = "enable")]
112    pub fn new_with_hom(hom: H, zeta: R_twiddle::Element, log3_n: usize) -> Self {
113        let ring = hom.domain();
114        let pow_zeta = |i: i64| if i < 0 {
115            ring.invert(&ring.pow(ring.clone_el(&zeta), (-i).try_into().unwrap())).unwrap()
116        } else {
117            ring.pow(ring.clone_el(&zeta), i.try_into().unwrap())
118        };
119        let result = CooleyTukeyRadix3FFT::create(&hom, pow_zeta, log3_n, Global);
120        return Self {
121            allocator: result.allocator,
122            inv_twiddles: result.inv_twiddles,
123            log3_n: result.log3_n,
124            main_root_of_unity: result.main_root_of_unity,
125            third_root_of_unity: result.third_root_of_unity,
126            twiddles: result.twiddles,
127            hom: hom
128        };
129    }
130}
131
132impl<R_main, R_twiddle, H, A> CooleyTukeyRadix3FFT<R_main, R_twiddle, H, A>
133    where R_main: ?Sized + RingBase,
134        R_twiddle: ?Sized + RingBase + DivisibilityRing,
135        H: Homomorphism<R_twiddle, R_main>,
136        A: Allocator
137{
138    ///
139    /// Most general way to create a [`CooleyTukeyRadix3FFT`].
140    /// 
141    /// The given closure should, on input `i`, return `z^i` for a primitive
142    /// `3^log3_n`-th root of unity. The given allocator is used to copy the input
143    /// data in cases where the input data layout is not optimal for the algorithm
144    /// 
145    #[stability::unstable(feature = "enable")]
146    pub fn create<F>(hom: H, mut root_of_unity_pow: F, log3_n: usize, allocator: A) -> Self 
147        where F: FnMut(i64) -> R_twiddle::Element
148    {
149        let n = ZZ.pow(3, log3_n) as usize;
150        assert!(hom.codomain().get_ring().is_approximate() || is_prim_root_of_unity(hom.codomain(), &hom.map(root_of_unity_pow(1)), n));
151        
152        return Self {
153            main_root_of_unity: hom.map(root_of_unity_pow(1)),
154            log3_n: log3_n, 
155            twiddles: Self::create_twiddle_list(hom.domain(), log3_n, &mut root_of_unity_pow), 
156            inv_twiddles: Self::create_inv_twiddle_list(hom.domain(), log3_n, &mut root_of_unity_pow),
157            third_root_of_unity: root_of_unity_pow(2 * n as i64 / 3),
158            hom: hom,
159            allocator: allocator
160        };
161    }
162    
163    ///
164    /// Replaces the ring that this object can compute FFTs over, assuming that the current
165    /// twiddle factors can be mapped into the new ring with the given homomorphism.
166    /// 
167    /// In particular, this function does not recompute twiddles, but uses a different
168    /// homomorphism to map the current twiddles into a new ring. Hence, it is extremely
169    /// cheap. 
170    /// 
171    #[stability::unstable(feature = "enable")]
172    pub fn change_ring<R_new: ?Sized + RingBase, H_new: Homomorphism<R_twiddle, R_new>>(self, new_hom: H_new) -> (CooleyTukeyRadix3FFT<R_new, R_twiddle, H_new, A>, H) {
173        let ring = new_hom.codomain();
174        let root_of_unity = if self.log3_n == 0 {
175            new_hom.codomain().one()
176        } else if self.log3_n == 1 {
177            let root_of_unity = self.hom.domain().pow(self.hom.domain().clone_el(&self.third_root_of_unity), 2);
178            debug_assert!(self.ring().eq_el(&self.hom.map_ref(&root_of_unity), self.root_of_unity(self.hom.codomain())));
179            new_hom.map(root_of_unity)
180        } else {
181            let root_of_unity = &self.inv_twiddles[self.log3_n - 1][threeadic_reverse(1, self.log3_n - 1)];
182            debug_assert!(self.ring().eq_el(&self.hom.map_ref(root_of_unity), self.root_of_unity(self.hom.codomain())));
183            new_hom.map_ref(root_of_unity)
184        };
185        assert!(ring.is_commutative());
186        assert!(ring.get_ring().is_approximate() || is_prim_root_of_unity(&ring, &root_of_unity, self.len()));
187
188        return (
189            CooleyTukeyRadix3FFT {
190                twiddles: self.twiddles,
191                inv_twiddles: self.inv_twiddles,
192                main_root_of_unity: root_of_unity, 
193                third_root_of_unity: self.third_root_of_unity,
194                hom: new_hom, 
195                log3_n: self.log3_n, 
196                allocator: self.allocator
197            },
198            self.hom
199        );
200    }
201
202    ///
203    /// Returns the ring over which this object can compute FFTs.
204    /// 
205    #[stability::unstable(feature = "enable")]
206    pub fn ring<'a>(&'a self) -> &'a <H as Homomorphism<R_twiddle, R_main>>::CodomainStore {
207        self.hom.codomain()
208    }
209
210    ///
211    /// Returns a reference to the allocator currently used for temporary allocations by this FFT.
212    /// 
213    #[stability::unstable(feature = "enable")]
214    pub fn allocator(&self) -> &A {
215        &self.allocator
216    }
217
218    ///
219    /// Replaces the allocator used for temporary allocations by this FFT.
220    /// 
221    #[stability::unstable(feature = "enable")]
222    pub fn with_allocator<A_new: Allocator>(self, allocator: A_new) -> CooleyTukeyRadix3FFT<R_main, R_twiddle, H, A_new> {
223        CooleyTukeyRadix3FFT {
224            twiddles: self.twiddles,
225            inv_twiddles: self.inv_twiddles,
226            main_root_of_unity: self.main_root_of_unity, 
227            third_root_of_unity: self.third_root_of_unity,
228            hom: self.hom, 
229            log3_n: self.log3_n, 
230            allocator: allocator
231        }
232    }
233
234    ///
235    /// Returns a reference to the homomorphism that is used to map the stored twiddle
236    /// factors into main ring, over which FFTs are computed.
237    /// 
238    #[stability::unstable(feature = "enable")]
239    pub fn hom<'a>(&'a self) -> &'a H {
240        &self.hom
241    }
242
243    fn create_twiddle_list<F>(ring: &H::DomainStore, log3_n: usize, mut pow_zeta: F) -> Vec<Vec<R_twiddle::Element>>
244        where F: FnMut(i64) -> R_twiddle::Element
245    {
246        let n = ZZ.pow(3, log3_n);
247        let third_root_of_unity = pow_zeta(-(n / 3));
248        let mut result: Vec<_> = (0..log3_n).map(|_| Vec::new()).collect();
249        for i in 0..log3_n {
250            let current = &mut result[i];
251            for j in 0..ZZ.pow(3, i) {
252                let base_twiddle = pow_zeta(-(threeadic_reverse(j as usize, log3_n - 1) as i64));
253                current.push(ring.clone_el(&base_twiddle));
254                current.push(ring.pow(ring.mul_ref_snd(base_twiddle, &third_root_of_unity), 2));
255            }
256        }
257        return result;
258    }
259    
260    fn create_inv_twiddle_list<F>(ring: &H::DomainStore, log3_n: usize, mut pow_zeta: F) -> Vec<Vec<R_twiddle::Element>>
261        where F: FnMut(i64) -> R_twiddle::Element
262    {
263        let mut result: Vec<_> = (0..log3_n).map(|_| Vec::new()).collect();
264        for i in 0..log3_n {
265            let current = &mut result[i];
266            for j in 0..ZZ.pow(3, i) {
267                let base_twiddle = pow_zeta(threeadic_reverse(j as usize, log3_n - 1) as i64);
268                current.push(ring.clone_el(&base_twiddle));
269                current.push(ring.pow(base_twiddle, 2));
270            }
271        }
272        return result;
273    }
274
275    fn butterfly_step_main<const INV: bool>(&self, data: &mut [R_main::Element], step: usize) {
276        let twiddles = if INV {
277            &self.inv_twiddles[step]
278        } else {
279            &self.twiddles[step]
280        };
281        let third_root_of_unity = &self.third_root_of_unity;
282        // let start = std::time::Instant::now();
283        butterfly_loop(self.log3_n, data, step, twiddles, |x, y, z, twiddle1, twiddle2| if INV {
284            <R_main as CooleyTukeyRadix3Butterfly<R_twiddle>>::inv_butterfly(&self.hom, x, y, z, &third_root_of_unity, twiddle1, twiddle2)
285        } else {
286            <R_main as CooleyTukeyRadix3Butterfly<R_twiddle>>::butterfly(&self.hom, x, y, z, &third_root_of_unity, twiddle1, twiddle2)
287        });
288        // let end = std::time::Instant::now();
289        // BUTTERFLY_RADIX3_TIMES[step].fetch_add((end - start).as_micros() as usize, std::sync::atomic::Ordering::Relaxed);
290    }
291
292    fn fft_impl(&self, data: &mut [R_main::Element]) {
293        for i in 0..data.len() {
294            <R_main as CooleyTukeyRadix3Butterfly<R_twiddle>>::prepare_for_fft(self.hom.codomain().get_ring(), &mut data[i]);
295        }
296        for step in 0..self.log3_n {
297            self.butterfly_step_main::<false>(data, step);
298        }
299    }
300
301    fn inv_fft_impl(&self, data: &mut [R_main::Element]) {
302        for i in 0..data.len() {
303            <R_main as CooleyTukeyRadix3Butterfly<R_twiddle>>::prepare_for_inv_fft(self.hom.codomain().get_ring(), &mut data[i]);
304        }
305        for step in (0..self.log3_n).rev() {
306            self.butterfly_step_main::<true>(data, step);
307        }
308        let n_inv = self.hom.domain().invert(&self.hom.domain().int_hom().map(self.len() as i32)).unwrap();
309        for i in 0..data.len() {
310            self.hom.mul_assign_ref_map(&mut data[i], &n_inv);
311        }
312    }
313}
314
315impl<R_main, R_twiddle, H, A> Clone for CooleyTukeyRadix3FFT<R_main, R_twiddle, H, A> 
316    where R_main: ?Sized + RingBase,
317        R_twiddle: ?Sized + RingBase + DivisibilityRing,
318        H: Homomorphism<R_twiddle, R_main> + Clone,
319        A: Allocator + Clone
320{
321    fn clone(&self) -> Self {
322        Self {
323            hom: self.hom.clone(),
324            inv_twiddles: self.inv_twiddles.iter().map(|list| list.iter().map(|x| self.hom.domain().clone_el(x)).collect()).collect(),
325            twiddles: self.twiddles.iter().map(|list| list.iter().map(|x| self.hom.domain().clone_el(x)).collect()).collect(),
326            main_root_of_unity: self.hom.codomain().clone_el(&self.main_root_of_unity),
327            third_root_of_unity: self.hom.domain().clone_el(&self.third_root_of_unity),
328            log3_n: self.log3_n,
329            allocator: self.allocator.clone()
330        }
331    }
332}
333
334impl<R_main, R_twiddle, H, A> FFTAlgorithm<R_main> for CooleyTukeyRadix3FFT<R_main, R_twiddle, H, A>
335    where R_main: ?Sized + RingBase,
336        R_twiddle: ?Sized + RingBase + DivisibilityRing,
337        H: Homomorphism<R_twiddle, R_main>,
338        A: Allocator
339{
340    fn len(&self) -> usize {
341        if self.log3_n == 0 {
342            return 1;
343        }
344        let result = self.twiddles[self.log3_n - 1].len() / 2 * 3;
345        debug_assert_eq!(ZZ.pow(3, self.log3_n) as usize, result);
346        return result;
347    }
348
349    fn unordered_fft<V, S>(&self, mut values: V, ring: S)
350        where V: SwappableVectorViewMut<R_main::Element>,
351            S: RingStore<Type = R_main> + Copy
352    {
353        assert!(ring.get_ring() == self.hom.codomain().get_ring(), "unsupported ring");
354        assert_eq!(self.len(), values.len());
355        if let Some(data) = values.as_slice_mut() {
356            self.fft_impl(data);
357        } else {
358            let mut data = Vec::with_capacity_in(self.len(), &self.allocator);
359            data.extend(values.clone_ring_els(ring).iter());
360            self.fft_impl(&mut data);
361            for (i, x) in data.into_iter().enumerate() {
362                *values.at_mut(i) = x;
363            }
364        }
365    }
366
367    fn unordered_inv_fft<V, S>(&self, mut values: V, ring: S)
368        where V: SwappableVectorViewMut<R_main::Element>,
369            S: RingStore<Type = R_main> + Copy
370    {
371        assert!(ring.get_ring() == self.hom.codomain().get_ring(), "unsupported ring");
372        assert_eq!(self.len(), values.len());
373        if let Some(data) = values.as_slice_mut() {
374            self.inv_fft_impl(data);
375        } else {
376            let mut data = Vec::with_capacity_in(self.len(), &self.allocator);
377            data.extend(values.clone_ring_els(ring).iter());
378            self.inv_fft_impl(&mut data);
379            for (i, x) in data.into_iter().enumerate() {
380                *values.at_mut(i) = x;
381            }
382        }
383    }
384
385    fn root_of_unity<S>(&self, ring: S) -> &R_main::Element
386        where S: RingStore<Type = R_main> + Copy
387    {
388        assert!(ring.get_ring() == self.hom.codomain().get_ring(), "unsupported ring");   
389        &self.main_root_of_unity 
390    }
391
392    fn unordered_fft_permutation(&self, i: usize) -> usize {
393        threeadic_reverse(i, self.log3_n)
394    }
395
396    fn unordered_fft_permutation_inv(&self, i: usize) -> usize {
397        threeadic_reverse(i, self.log3_n)
398    }
399}
400
401#[stability::unstable(feature = "enable")]
402pub trait CooleyTukeyRadix3Butterfly<S: ?Sized + RingBase>: RingBase {
403
404    ///
405    /// Should compute `(a, b, c) := (a + t b + t^2 c, a + t z b + t^2 z^2 c, a + t z^2 b + t^2 z c)`.
406    /// 
407    /// Here `z` is a third root of unity (i.e. `z^2 + z + 1 = 0`) and `t` is the twiddle factor.
408    /// The function should be given `z, t, t^2 z^2`.
409    /// 
410    /// It is guaranteed that the input elements are either outputs of
411    /// [`CooleyTukeyRadix3Butterfly::butterfly()`] or of [`CooleyTukeyRadix3Butterfly::prepare_for_fft()`].
412    /// 
413    fn butterfly<H: Homomorphism<S, Self>>(
414        hom: H, 
415        a: &mut Self::Element, 
416        b: &mut Self::Element, 
417        c: &mut Self::Element, 
418        z: &S::Element,
419        t: &S::Element,
420        t_sqr_z_sqr: &S::Element
421    );
422    
423    ///
424    /// Should compute `(a, b, c) := (a + b + c, t (a + z^2 b + z c), t^2 (a + z b + z^2 c))`.
425    /// 
426    /// Here `z` is a third root of unity (i.e. `z^2 + z + 1 = 0`) and `t` is the twiddle factor.
427    /// The function should be given `z, t, t^2`.
428    /// 
429    /// It is guaranteed that the input elements are either outputs of
430    /// [`CooleyTukeyRadix3Butterfly::inv_butterfly()`] or of [`CooleyTukeyRadix3Butterfly::prepare_for_inv_fft()`].
431    /// 
432    fn inv_butterfly<H: Homomorphism<S, Self>>(
433        hom: H, 
434        a: &mut Self::Element, 
435        b: &mut Self::Element,
436        c: &mut Self::Element,
437        z: &S::Element,
438        t: &S::Element,
439        t_sqr: &S::Element
440    );
441
442    ///
443    /// Possibly pre-processes elements before the FFT starts. Here you can
444    /// bring ring element into a certain form, and assume during [`CooleyTukeyRadix3Butterfly::butterfly()`]
445    /// that the inputs are in this form.
446    /// 
447    #[inline(always)]
448    fn prepare_for_fft(&self, _value: &mut Self::Element) {}
449    
450    ///
451    /// Possibly pre-processes elements before the inverse FFT starts. Here you can
452    /// bring ring element into a certain form, and assume during [`CooleyTukeyRadix3Butterfly::inv_butterfly()`]
453    /// that the inputs are in this form.
454    /// 
455    #[inline(always)]
456    fn prepare_for_inv_fft(&self, _value: &mut Self::Element) {}
457}
458
459impl<R: ?Sized + RingBase, S: ?Sized + RingBase> CooleyTukeyRadix3Butterfly<S> for R {
460
461    default fn butterfly<H: Homomorphism<S, Self>>(
462        hom: H, 
463        a: &mut Self::Element, 
464        b: &mut Self::Element, 
465        c: &mut Self::Element, 
466        z: &S::Element,
467        t: &S::Element,
468        t_sqr_z_sqr: &S::Element
469    ) {
470        let ring = hom.codomain();
471        hom.mul_assign_ref_map(b, t); // this is now `t b`
472        hom.mul_assign_ref_map(c, t_sqr_z_sqr); // this is now `t^2 z^2 c`
473        let b_ = hom.mul_ref_map(b, z); // this is now `t z b`
474        let c_ = hom.mul_ref_map(c, z); // this is now `t^2 c z`
475        let s1 = ring.add_ref(b, &c_); // this is now `t b + t^2 c`
476        let s2 = ring.add_ref(&b_, c); // this is now `t z b + t^2 z^2 c`
477        let s3 = ring.add_ref(&s1, &s2); // this is now `-(t z^2 b + t^2 z c)`
478        *b = ring.add_ref_fst(a, s2); // this is now `a + t z b + t^2 z^2 c`
479        *c = ring.sub_ref_fst(a, s3); // this is now `a + t z^2 b + t^2 z c`
480        ring.add_assign(a, s1); // this is now `a + t b + t^2 c`
481    }
482    
483    default fn inv_butterfly<H: Homomorphism<S, Self>>(
484        hom: H, 
485        a: &mut Self::Element, 
486        b: &mut Self::Element,
487        c: &mut Self::Element,
488        z: &S::Element,
489        t: &S::Element,
490        t_sqr: &S::Element
491    ) {
492        let ring = hom.codomain();
493        let b_ = hom.mul_ref_map(b, z); // this is now `z b`
494        let s1 = ring.add_ref(b, c); // this is now `b + c`
495        let s2 = ring.add_ref(&b_, &c); // this is now `z b + c`
496        let s2_ = hom.mul_ref_snd_map(s2, z); // this is now `z^2 b + z c`
497        let s3 = ring.add_ref(&s1, &s2_); // this is now `-(z b + z^2 c)`
498        *b = ring.add_ref(a, &s2_); // this is now `a + z^2 b + z c`
499        *c = ring.sub_ref(a, &s3); // this is now `a + z b + z^2 c`
500        ring.add_assign(a, s1); // this is now `a + b + c`
501        hom.mul_assign_ref_map(b, t); // this is now `t (a + z^2 b + z c)`
502        hom.mul_assign_ref_map(c, t_sqr); // this is now `t^2 (a + z b + z^2 c`
503    }
504
505    ///
506    /// Possibly pre-processes elements before the FFT starts. Here you can bring ring element 
507    /// into a certain form, and assume during [`CooleyTukeyRadix3Butterfly::butterfly()`]
508    /// that the inputs are in this form.
509    /// 
510    #[inline(always)]
511    default fn prepare_for_fft(&self, _value: &mut Self::Element) {}
512    
513    ///
514    /// Possibly pre-processes elements before the inverse FFT starts. Here you can bring ring element
515    /// into a certain form, and assume during [`CooleyTukeyRadix3Butterfly::inv_butterfly()`]
516    /// that the inputs are in this form.
517    /// 
518    #[inline(always)]
519    default fn prepare_for_inv_fft(&self, _value: &mut Self::Element) {}
520}
521
522impl<H, A> FFTErrorEstimate for CooleyTukeyRadix3FFT<Complex64Base, Complex64Base, H, A> 
523    where H: Homomorphism<Complex64Base, Complex64Base>,
524        A: Allocator
525{
526    fn expected_absolute_error(&self, input_bound: f64, input_error: f64) -> f64 {
527        // the butterfly performs two multiplications with roots of unity, and then two additions
528        let multiply_absolute_error = 2. * input_bound * root_of_unity_error() + input_bound * f64::EPSILON;
529        let addition_absolute_error = 2. * input_bound * f64::EPSILON;
530        let butterfly_absolute_error = multiply_absolute_error + addition_absolute_error;
531        // the operator inf-norm of the FFT is its length
532        return 2. * self.len() as f64 * butterfly_absolute_error + self.len() as f64 * input_error;
533    }
534}
535
536#[cfg(test)]
537use std::array::from_fn;
538#[cfg(test)]
539use crate::rings::finite::FiniteRingStore;
540#[cfg(test)]
541use crate::assert_el_eq;
542#[cfg(test)]
543use crate::rings::zn::zn_64::*;
544#[cfg(test)]
545use crate::rings::zn::zn_static::Fp;
546
547#[test]
548fn test_radix3_butterflies() {
549    let log3_n = 3;
550    let ring = Zn::new(109);
551    let ring_fastmul = ZnFastmul::new(ring).unwrap();
552    let int_hom = ring.int_hom();
553    let i = |x| int_hom.map(x);
554    let zeta = i(97);
555    let zeta_inv = ring.invert(&zeta).unwrap();
556    let fft = CooleyTukeyRadix3FFT::new_with_hom(ring.into_can_hom(ring_fastmul).ok().unwrap(), ring_fastmul.coerce(&ring, zeta), log3_n);
557
558    const LEN: usize = 27;
559    let data: [_; LEN] = from_fn(|j| i(j as i32));
560    let expected_std_order = |step: usize, group_idx: usize, value_idx: usize| ring.sum(
561        (0..ZZ.pow(3, step)).map(|k| ring.mul(
562            ring.pow(zeta_inv, value_idx * (k * ZZ.pow(3, log3_n - step)) as usize),
563            data[group_idx + (k * ZZ.pow(3, log3_n - step)) as usize]
564        ))
565    );
566    let expected_threeadic_reverse = |step: usize| from_fn(|i| expected_std_order(
567        step,
568        i % ZZ.pow(3, log3_n - step) as usize,
569        threeadic_reverse(i / ZZ.pow(3, log3_n - step) as usize, step)
570    ));
571    let begin = expected_threeadic_reverse(0);
572    for (a, e) in data.iter().zip(begin.iter()) {
573        assert_el_eq!(ring, a, e);
574    }
575
576    let mut actual = data;
577    for i in 0..log3_n {
578        fft.butterfly_step_main::<false>(&mut actual, i);
579        let expected: [ZnEl; LEN] = expected_threeadic_reverse(i + 1);
580        for (a, e) in actual.iter().zip(expected.iter()) {
581            assert_el_eq!(ring, a, e);
582        }
583    }
584}
585
586#[test]
587fn test_radix3_inv_fft() {
588    let log3_n = 3;
589    let ring = Zn::new(109);
590    let ring_fastmul = ZnFastmul::new(ring).unwrap();
591    let zeta = ring.int_hom().map(97);
592    let fft = CooleyTukeyRadix3FFT::new_with_hom(ring.into_can_hom(ring_fastmul).ok().unwrap(), ring_fastmul.coerce(&ring, zeta), log3_n);
593
594    let data = (0..ZZ.pow(3, log3_n)).map(|x| ring.int_hom().map(x as i32)).collect::<Vec<_>>();
595    let mut actual = data.clone();
596    fft.unordered_fft(&mut actual, &ring);
597    fft.unordered_inv_fft(&mut actual, &ring);
598
599    for i in 0..data.len() {
600        assert_el_eq!(&ring, &data[i], &actual[i]);
601    }
602}
603
604#[test]
605fn test_size_1_fft() {
606    let ring = Fp::<17>::RING;
607    let fft = CooleyTukeyRadix3FFT::for_zn(&ring, 0).unwrap().change_ring(ring.identity()).0;
608    let values: [u64; 1] = [3];
609    let mut work = values;
610    fft.unordered_fft(&mut work, ring);
611    assert_eq!(&work, &values);
612    fft.unordered_inv_fft(&mut work, ring);
613    assert_eq!(&work, &values);
614    assert_eq!(0, fft.unordered_fft_permutation(0));
615    assert_eq!(0, fft.unordered_fft_permutation_inv(0));
616}
617
618#[cfg(any(test, feature = "generic_tests"))]
619pub mod generic_tests {
620    use super::*;
621
622    pub fn test_cooley_tuckey_radix3_butterfly<R: RingStore, S: RingStore, I: Iterator<Item = El<R>>>(ring: R, base: S, edge_case_elements: I, test_zeta: &El<S>, test_twiddle: &El<S>)
623        where R::Type: CanHomFrom<S::Type>,
624            S::Type: DivisibilityRing
625    {
626        assert!(base.is_zero(&base.sum([base.one(), base.clone_el(&test_zeta), base.pow(base.clone_el(&test_zeta), 2)])));
627        let test_inv_twiddle = base.invert(&test_twiddle).unwrap();
628        let elements = edge_case_elements.collect::<Vec<_>>();
629        let hom = ring.can_hom(&base).unwrap();
630
631        for a in &elements {
632            for b in &elements {
633                for c in &elements {
634                    
635                    let [mut x, mut y, mut z] = [ring.clone_el(a), ring.clone_el(b), ring.clone_el(c)];
636                    <R::Type as CooleyTukeyRadix3Butterfly<S::Type>>::prepare_for_fft(ring.get_ring(), &mut x);
637                    <R::Type as CooleyTukeyRadix3Butterfly<S::Type>>::prepare_for_fft(ring.get_ring(), &mut y);
638                    <R::Type as CooleyTukeyRadix3Butterfly<S::Type>>::prepare_for_fft(ring.get_ring(), &mut z);
639                    <R::Type as CooleyTukeyRadix3Butterfly<S::Type>>::butterfly(
640                        &hom, 
641                        &mut x, 
642                        &mut y, 
643                        &mut z, 
644                        &test_zeta, 
645                        &test_twiddle, 
646                        &base.pow(base.mul_ref(&test_twiddle, &test_zeta), 2)
647                    );
648                    let mut t = hom.map_ref(&test_twiddle);
649                    assert_el_eq!(ring, ring.add_ref_fst(a, ring.mul_ref_snd(ring.add_ref_fst(b, ring.mul_ref(c, &t)), &t)), &x);
650                    ring.mul_assign(&mut t, hom.map_ref(&test_zeta));
651                    assert_el_eq!(ring, ring.add_ref_fst(a, ring.mul_ref_snd(ring.add_ref_fst(b, ring.mul_ref(c, &t)), &t)), &y);
652                    ring.mul_assign(&mut t, hom.map_ref(&test_zeta));
653                    assert_el_eq!(ring, ring.add_ref_fst(a, ring.mul_ref_snd(ring.add_ref_fst(b, ring.mul_ref(c, &t)), &t)), &z);
654                    
655                    <R::Type as CooleyTukeyRadix3Butterfly<S::Type>>::prepare_for_inv_fft(ring.get_ring(), &mut x);
656                    <R::Type as CooleyTukeyRadix3Butterfly<S::Type>>::prepare_for_inv_fft(ring.get_ring(), &mut y);
657                    <R::Type as CooleyTukeyRadix3Butterfly<S::Type>>::prepare_for_inv_fft(ring.get_ring(), &mut z);
658                    <R::Type as CooleyTukeyRadix3Butterfly<S::Type>>::inv_butterfly(
659                        &hom, 
660                        &mut x, 
661                        &mut y, 
662                        &mut z, 
663                        &test_zeta, 
664                        &test_inv_twiddle, 
665                        &base.pow(base.clone_el(&test_inv_twiddle), 2)
666                    );
667                    assert_el_eq!(ring, ring.int_hom().mul_ref_fst_map(a, 3), &x);
668                    assert_el_eq!(ring, ring.int_hom().mul_ref_fst_map(b, 3), &y);
669                    assert_el_eq!(ring, ring.int_hom().mul_ref_fst_map(c, 3), &z);
670                }
671            }
672        }
673    }
674}
675
676#[test]
677fn test_butterfly() {
678    let ring = Fp::<109>::RING;
679    generic_tests::test_cooley_tuckey_radix3_butterfly(
680        ring,
681        ring,
682        ring.elements().step_by(10),
683        &63,
684        &97,
685    );
686}