rustfft/
plan.rs

1use num_integer::gcd;
2use std::collections::HashMap;
3use std::sync::Arc;
4
5use crate::common::RadixFactor;
6use crate::wasm_simd::wasm_simd_planner::FftPlannerWasmSimd;
7use crate::{common::FftNum, fft_cache::FftCache, FftDirection};
8
9use crate::algorithm::butterflies::*;
10use crate::algorithm::*;
11use crate::Fft;
12
13use crate::FftPlannerAvx;
14use crate::FftPlannerNeon;
15use crate::FftPlannerSse;
16
17use crate::math_utils::PrimeFactors;
18
19enum ChosenFftPlanner<T: FftNum> {
20    Scalar(FftPlannerScalar<T>),
21    Avx(FftPlannerAvx<T>),
22    Sse(FftPlannerSse<T>),
23    Neon(FftPlannerNeon<T>),
24    WasmSimd(FftPlannerWasmSimd<T>),
25    // todo: If we add NEON, avx-512 etc support, add more enum variants for them here
26}
27
28/// The FFT planner creates new FFT algorithm instances.
29///
30/// RustFFT has several FFT algorithms available. For a given FFT size, the `FftPlanner` decides which of the
31/// available FFT algorithms to use and then initializes them.
32///
33/// ~~~
34/// // Perform a forward Fft of size 1234
35/// use std::sync::Arc;
36/// use rustfft::{FftPlanner, num_complex::Complex};
37///
38/// let mut planner = FftPlanner::new();
39/// let fft = planner.plan_fft_forward(1234);
40///
41/// let mut buffer = vec![Complex{ re: 0.0f32, im: 0.0f32 }; 1234];
42/// fft.process(&mut buffer);
43///
44/// // The FFT instance returned by the planner has the type `Arc<dyn Fft<T>>`,
45/// // where T is the numeric type, ie f32 or f64, so it's cheap to clone
46/// let fft_clone = Arc::clone(&fft);
47/// ~~~
48///
49/// If you plan on creating multiple FFT instances, it is recommended to reuse the same planner for all of them. This
50/// is because the planner re-uses internal data across FFT instances wherever possible, saving memory and reducing
51/// setup time. (FFT instances created with one planner will never re-use data and buffers with FFT instances created
52/// by a different planner)
53///
54/// Each FFT instance owns [`Arc`s](std::sync::Arc) to its internal data, rather than borrowing it from the planner, so it's perfectly
55/// safe to drop the planner after creating Fft instances.
56///
57/// In the constructor, the FftPlanner will detect available CPU features. If AVX, SSE, Neon, or WASM SIMD are available, it will set itself up to plan FFTs with the fastest available instruction set.
58/// If no SIMD instruction sets are available, the planner will seamlessly fall back to planning non-SIMD FFTs.
59///
60/// If you'd prefer not to compute a FFT at all if a certain SIMD instruction set isn't available, or otherwise specify your own custom fallback, RustFFT exposes dedicated planners for each instruction set:
61///  - [`FftPlannerAvx`](crate::FftPlannerAvx)
62///  - [`FftPlannerSse`](crate::FftPlannerSse)
63///  - [`FftPlannerNeon`](crate::FftPlannerNeon)
64///  - [`FftPlannerWasmSimd`](crate::FftPlannerWasmSimd)
65///
66/// If you'd prefer to opt out of SIMD algorithms, consider creating a [`FftPlannerScalar`](crate::FftPlannerScalar) instead.
67pub struct FftPlanner<T: FftNum> {
68    chosen_planner: ChosenFftPlanner<T>,
69}
70impl<T: FftNum> FftPlanner<T> {
71    /// Creates a new `FftPlanner` instance.
72    pub fn new() -> Self {
73        if let Ok(avx_planner) = FftPlannerAvx::new() {
74            Self {
75                chosen_planner: ChosenFftPlanner::Avx(avx_planner),
76            }
77        } else if let Ok(sse_planner) = FftPlannerSse::new() {
78            Self {
79                chosen_planner: ChosenFftPlanner::Sse(sse_planner),
80            }
81        } else if let Ok(neon_planner) = FftPlannerNeon::new() {
82            Self {
83                chosen_planner: ChosenFftPlanner::Neon(neon_planner),
84            }
85        } else if let Ok(wasm_simd_planner) = FftPlannerWasmSimd::new() {
86            Self {
87                chosen_planner: ChosenFftPlanner::WasmSimd(wasm_simd_planner),
88            }
89        } else {
90            Self {
91                chosen_planner: ChosenFftPlanner::Scalar(FftPlannerScalar::new()),
92            }
93        }
94    }
95
96    /// Returns a `Fft` instance which computes FFTs of size `len`.
97    ///
98    /// If the provided `direction` is `FftDirection::Forward`, the returned instance will compute forward FFTs. If it's `FftDirection::Inverse`, it will compute inverse FFTs.
99    ///
100    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
101    pub fn plan_fft(&mut self, len: usize, direction: FftDirection) -> Arc<dyn Fft<T>> {
102        match &mut self.chosen_planner {
103            ChosenFftPlanner::Scalar(scalar_planner) => scalar_planner.plan_fft(len, direction),
104            ChosenFftPlanner::Avx(avx_planner) => avx_planner.plan_fft(len, direction),
105            ChosenFftPlanner::Sse(sse_planner) => sse_planner.plan_fft(len, direction),
106            ChosenFftPlanner::Neon(neon_planner) => neon_planner.plan_fft(len, direction),
107            ChosenFftPlanner::WasmSimd(wasm_simd_planner) => {
108                wasm_simd_planner.plan_fft(len, direction)
109            }
110        }
111    }
112
113    /// Returns a `Fft` instance which computes forward FFTs of size `len`
114    ///
115    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
116    pub fn plan_fft_forward(&mut self, len: usize) -> Arc<dyn Fft<T>> {
117        self.plan_fft(len, FftDirection::Forward)
118    }
119
120    /// Returns a `Fft` instance which computes inverse FFTs of size `len`
121    ///
122    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
123    pub fn plan_fft_inverse(&mut self, len: usize) -> Arc<dyn Fft<T>> {
124        self.plan_fft(len, FftDirection::Inverse)
125    }
126}
127
128const MAX_RADIXN_FACTOR: usize = 7; // The largest blutterfly factor that the RadixN algorithm can handle
129const MAX_RADER_PRIME_FACTOR: usize = 23; // don't use Raders if the inner fft length has prime factor larger than this
130
131/// A Recipe is a structure that describes the design of a FFT, without actually creating it.
132/// It is used as a middle step in the planning process.
133#[derive(Debug, PartialEq, Clone)]
134pub(crate) enum Recipe {
135    Dft(usize),
136    MixedRadix {
137        left_fft: Arc<Recipe>,
138        right_fft: Arc<Recipe>,
139    },
140    #[allow(dead_code)]
141    GoodThomasAlgorithm {
142        left_fft: Arc<Recipe>,
143        right_fft: Arc<Recipe>,
144    },
145    MixedRadixSmall {
146        left_fft: Arc<Recipe>,
147        right_fft: Arc<Recipe>,
148    },
149    GoodThomasAlgorithmSmall {
150        left_fft: Arc<Recipe>,
151        right_fft: Arc<Recipe>,
152    },
153    RadersAlgorithm {
154        inner_fft: Arc<Recipe>,
155    },
156    BluesteinsAlgorithm {
157        len: usize,
158        inner_fft: Arc<Recipe>,
159    },
160    RadixN {
161        factors: Box<[RadixFactor]>,
162        base_fft: Arc<Recipe>,
163    },
164    Radix4 {
165        k: u32,
166        base_fft: Arc<Recipe>,
167    },
168    Butterfly2,
169    Butterfly3,
170    Butterfly4,
171    Butterfly5,
172    Butterfly6,
173    Butterfly7,
174    Butterfly8,
175    Butterfly9,
176    Butterfly11,
177    Butterfly12,
178    Butterfly13,
179    Butterfly16,
180    Butterfly17,
181    Butterfly19,
182    Butterfly23,
183    Butterfly24,
184    Butterfly27,
185    Butterfly29,
186    Butterfly31,
187    Butterfly32,
188}
189
190impl Recipe {
191    pub fn len(&self) -> usize {
192        match self {
193            Recipe::Dft(length) => *length,
194            Recipe::RadixN { factors, base_fft } => {
195                base_fft.len() * factors.iter().map(|f| f.radix()).product::<usize>()
196            }
197            Recipe::Radix4 { k, base_fft } => base_fft.len() * (1 << (k * 2)),
198            Recipe::Butterfly2 => 2,
199            Recipe::Butterfly3 => 3,
200            Recipe::Butterfly4 => 4,
201            Recipe::Butterfly5 => 5,
202            Recipe::Butterfly6 => 6,
203            Recipe::Butterfly7 => 7,
204            Recipe::Butterfly8 => 8,
205            Recipe::Butterfly9 => 9,
206            Recipe::Butterfly11 => 11,
207            Recipe::Butterfly12 => 12,
208            Recipe::Butterfly13 => 13,
209            Recipe::Butterfly16 => 16,
210            Recipe::Butterfly17 => 17,
211            Recipe::Butterfly19 => 19,
212            Recipe::Butterfly23 => 23,
213            Recipe::Butterfly24 => 24,
214            Recipe::Butterfly27 => 27,
215            Recipe::Butterfly29 => 29,
216            Recipe::Butterfly31 => 31,
217            Recipe::Butterfly32 => 32,
218            Recipe::MixedRadix {
219                left_fft,
220                right_fft,
221            } => left_fft.len() * right_fft.len(),
222            Recipe::GoodThomasAlgorithm {
223                left_fft,
224                right_fft,
225            } => left_fft.len() * right_fft.len(),
226            Recipe::MixedRadixSmall {
227                left_fft,
228                right_fft,
229            } => left_fft.len() * right_fft.len(),
230            Recipe::GoodThomasAlgorithmSmall {
231                left_fft,
232                right_fft,
233            } => left_fft.len() * right_fft.len(),
234            Recipe::RadersAlgorithm { inner_fft } => inner_fft.len() + 1,
235            Recipe::BluesteinsAlgorithm { len, .. } => *len,
236        }
237    }
238}
239
240/// The Scalar FFT planner creates new FFT algorithm instances using non-SIMD algorithms.
241///
242/// RustFFT has several FFT algorithms available. For a given FFT size, the `FftPlannerScalar` decides which of the
243/// available FFT algorithms to use and then initializes them.
244///
245/// Use `FftPlannerScalar` instead of [`FftPlanner`](crate::FftPlanner) or [`FftPlannerAvx`](crate::FftPlannerAvx) when you want to explicitly opt out of using any SIMD-accelerated algorithms.
246///
247/// ~~~
248/// // Perform a forward Fft of size 1234
249/// use std::sync::Arc;
250/// use rustfft::{FftPlannerScalar, num_complex::Complex};
251///
252/// let mut planner = FftPlannerScalar::new();
253/// let fft = planner.plan_fft_forward(1234);
254///
255/// let mut buffer = vec![Complex{ re: 0.0f32, im: 0.0f32 }; 1234];
256/// fft.process(&mut buffer);
257///
258/// // The FFT instance returned by the planner has the type `Arc<dyn Fft<T>>`,
259/// // where T is the numeric type, ie f32 or f64, so it's cheap to clone
260/// let fft_clone = Arc::clone(&fft);
261/// ~~~
262///
263/// If you plan on creating multiple FFT instances, it is recommended to reuse the same planner for all of them. This
264/// is because the planner re-uses internal data across FFT instances wherever possible, saving memory and reducing
265/// setup time. (FFT instances created with one planner will never re-use data and buffers with FFT instances created
266/// by a different planner)
267///
268/// Each FFT instance owns [`Arc`s](std::sync::Arc) to its internal data, rather than borrowing it from the planner, so it's perfectly
269/// safe to drop the planner after creating Fft instances.
270pub struct FftPlannerScalar<T: FftNum> {
271    algorithm_cache: FftCache<T>,
272    recipe_cache: HashMap<usize, Arc<Recipe>>,
273}
274
275impl<T: FftNum> FftPlannerScalar<T> {
276    /// Creates a new `FftPlannerScalar` instance.
277    pub fn new() -> Self {
278        Self {
279            algorithm_cache: FftCache::new(),
280            recipe_cache: HashMap::new(),
281        }
282    }
283
284    /// Returns a `Fft` instance which computes FFTs of size `len`.
285    ///
286    /// If the provided `direction` is `FftDirection::Forward`, the returned instance will compute forward FFTs. If it's `FftDirection::Inverse`, it will compute inverse FFTs.
287    ///
288    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
289    pub fn plan_fft(&mut self, len: usize, direction: FftDirection) -> Arc<dyn Fft<T>> {
290        // Step 1: Create a "recipe" for this FFT, which will tell us exactly which combination of algorithms to use
291        let recipe = self.design_fft_for_len(len);
292
293        // Step 2: Use our recipe to construct a Fft trait object
294        self.build_fft(&recipe, direction)
295    }
296
297    /// Returns a `Fft` instance which computes forward FFTs of size `len`
298    ///
299    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
300    pub fn plan_fft_forward(&mut self, len: usize) -> Arc<dyn Fft<T>> {
301        self.plan_fft(len, FftDirection::Forward)
302    }
303
304    /// Returns a `Fft` instance which computes inverse FFTs of size `len`
305    ///
306    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
307    pub fn plan_fft_inverse(&mut self, len: usize) -> Arc<dyn Fft<T>> {
308        self.plan_fft(len, FftDirection::Inverse)
309    }
310
311    // Make a recipe for a length
312    fn design_fft_for_len(&mut self, len: usize) -> Arc<Recipe> {
313        if len < 2 {
314            Arc::new(Recipe::Dft(len))
315        } else if let Some(recipe) = self.recipe_cache.get(&len) {
316            Arc::clone(&recipe)
317        } else {
318            let factors = PrimeFactors::compute(len);
319            let recipe = self.design_fft_with_factors(len, factors);
320            self.recipe_cache.insert(len, Arc::clone(&recipe));
321            recipe
322        }
323    }
324
325    // Create the fft from a recipe, take from cache if possible
326    fn build_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>> {
327        let len = recipe.len();
328        if let Some(instance) = self.algorithm_cache.get(len, direction) {
329            instance
330        } else {
331            let fft = self.build_new_fft(recipe, direction);
332            self.algorithm_cache.insert(&fft);
333            fft
334        }
335    }
336
337    // Create a new fft from a recipe
338    fn build_new_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>> {
339        match recipe {
340            Recipe::Dft(len) => Arc::new(Dft::new(*len, direction)) as Arc<dyn Fft<T>>,
341            Recipe::RadixN { factors, base_fft } => {
342                let base_fft = self.build_fft(base_fft, direction);
343                Arc::new(RadixN::new(factors, base_fft)) as Arc<dyn Fft<T>>
344            }
345            Recipe::Radix4 { k, base_fft } => {
346                let base_fft = self.build_fft(base_fft, direction);
347                Arc::new(Radix4::new_with_base(*k, base_fft)) as Arc<dyn Fft<T>>
348            }
349            Recipe::Butterfly2 => Arc::new(Butterfly2::new(direction)) as Arc<dyn Fft<T>>,
350            Recipe::Butterfly3 => Arc::new(Butterfly3::new(direction)) as Arc<dyn Fft<T>>,
351            Recipe::Butterfly4 => Arc::new(Butterfly4::new(direction)) as Arc<dyn Fft<T>>,
352            Recipe::Butterfly5 => Arc::new(Butterfly5::new(direction)) as Arc<dyn Fft<T>>,
353            Recipe::Butterfly6 => Arc::new(Butterfly6::new(direction)) as Arc<dyn Fft<T>>,
354            Recipe::Butterfly7 => Arc::new(Butterfly7::new(direction)) as Arc<dyn Fft<T>>,
355            Recipe::Butterfly8 => Arc::new(Butterfly8::new(direction)) as Arc<dyn Fft<T>>,
356            Recipe::Butterfly9 => Arc::new(Butterfly9::new(direction)) as Arc<dyn Fft<T>>,
357            Recipe::Butterfly11 => Arc::new(Butterfly11::new(direction)) as Arc<dyn Fft<T>>,
358            Recipe::Butterfly12 => Arc::new(Butterfly12::new(direction)) as Arc<dyn Fft<T>>,
359            Recipe::Butterfly13 => Arc::new(Butterfly13::new(direction)) as Arc<dyn Fft<T>>,
360            Recipe::Butterfly16 => Arc::new(Butterfly16::new(direction)) as Arc<dyn Fft<T>>,
361            Recipe::Butterfly17 => Arc::new(Butterfly17::new(direction)) as Arc<dyn Fft<T>>,
362            Recipe::Butterfly19 => Arc::new(Butterfly19::new(direction)) as Arc<dyn Fft<T>>,
363            Recipe::Butterfly23 => Arc::new(Butterfly23::new(direction)) as Arc<dyn Fft<T>>,
364            Recipe::Butterfly24 => Arc::new(Butterfly24::new(direction)) as Arc<dyn Fft<T>>,
365            Recipe::Butterfly27 => Arc::new(Butterfly27::new(direction)) as Arc<dyn Fft<T>>,
366            Recipe::Butterfly29 => Arc::new(Butterfly29::new(direction)) as Arc<dyn Fft<T>>,
367            Recipe::Butterfly31 => Arc::new(Butterfly31::new(direction)) as Arc<dyn Fft<T>>,
368            Recipe::Butterfly32 => Arc::new(Butterfly32::new(direction)) as Arc<dyn Fft<T>>,
369            Recipe::MixedRadix {
370                left_fft,
371                right_fft,
372            } => {
373                let left_fft = self.build_fft(&left_fft, direction);
374                let right_fft = self.build_fft(&right_fft, direction);
375                Arc::new(MixedRadix::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
376            }
377            Recipe::GoodThomasAlgorithm {
378                left_fft,
379                right_fft,
380            } => {
381                let left_fft = self.build_fft(&left_fft, direction);
382                let right_fft = self.build_fft(&right_fft, direction);
383                Arc::new(GoodThomasAlgorithm::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
384            }
385            Recipe::MixedRadixSmall {
386                left_fft,
387                right_fft,
388            } => {
389                let left_fft = self.build_fft(&left_fft, direction);
390                let right_fft = self.build_fft(&right_fft, direction);
391                Arc::new(MixedRadixSmall::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
392            }
393            Recipe::GoodThomasAlgorithmSmall {
394                left_fft,
395                right_fft,
396            } => {
397                let left_fft = self.build_fft(&left_fft, direction);
398                let right_fft = self.build_fft(&right_fft, direction);
399                Arc::new(GoodThomasAlgorithmSmall::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
400            }
401            Recipe::RadersAlgorithm { inner_fft } => {
402                let inner_fft = self.build_fft(&inner_fft, direction);
403                Arc::new(RadersAlgorithm::new(inner_fft)) as Arc<dyn Fft<T>>
404            }
405            Recipe::BluesteinsAlgorithm { len, inner_fft } => {
406                let inner_fft = self.build_fft(&inner_fft, direction);
407                Arc::new(BluesteinsAlgorithm::new(*len, inner_fft)) as Arc<dyn Fft<T>>
408            }
409        }
410    }
411
412    fn design_fft_with_factors(&mut self, len: usize, factors: PrimeFactors) -> Arc<Recipe> {
413        if let Some(fft_instance) = self.design_butterfly_algorithm(len) {
414            fft_instance
415        } else if factors.is_prime() {
416            self.design_prime(len)
417        } else if let Some(butterfly_product) = self.design_butterfly_product(len) {
418            butterfly_product
419        } else if factors.has_factors_leq(MAX_RADIXN_FACTOR) {
420            self.design_radixn(factors)
421        } else {
422            let (left_factors, right_factors) = factors.partition_factors();
423            self.design_mixed_radix(left_factors, right_factors)
424        }
425    }
426
427    fn design_butterfly_product(&mut self, len: usize) -> Option<Arc<Recipe>> {
428        if len > 992 || len.is_power_of_two() {
429            return None;
430        } // 31*32 = 992. if we're above this size, don't bother. anddon't bother for powers of 2 because radix4 is fast
431
432        let limit = (len as f64).sqrt().ceil() as usize + 1;
433        let butterflies = [
434            2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 16, 17, 19, 23, 24, 27, 29, 31, 32,
435        ];
436
437        // search through our butterflies. if we find one that divides the length, see of the quotient is also a butterfly
438        // if it is, we have a butterfly product
439        // if there are multiple valid pairs, take the one with the smallest sum - we want the values to be as close together as possible
440        // ie 32 x 2, sum = 34, 16 x 4, sum = 20, 8 x 8, sum = 16, so even though 32,2 and 16x4 are valid, we want 8x8
441
442        let mut min_sum = usize::MAX;
443        let mut found_butterflies = None;
444        for left in butterflies.iter().take_while(|n| **n < limit) {
445            let right = len / left;
446            if left * right == len && butterflies.contains(&right) {
447                let sum = left + right;
448                if sum < min_sum {
449                    min_sum = sum;
450                    found_butterflies = Some((*left, right))
451                }
452            }
453        }
454
455        // if we found a valid pair of butterflies, construct a recipe for them
456        found_butterflies.map(|(left_len, right_len)| {
457            let left_fft = self.design_fft_for_len(left_len);
458            let right_fft = self.design_fft_for_len(right_len);
459
460            if gcd(left_len, right_len) == 1 {
461                Arc::new(Recipe::GoodThomasAlgorithmSmall {
462                    left_fft,
463                    right_fft,
464                })
465            } else {
466                Arc::new(Recipe::MixedRadixSmall {
467                    left_fft,
468                    right_fft,
469                })
470            }
471        })
472    }
473
474    fn design_mixed_radix(
475        &mut self,
476        left_factors: PrimeFactors,
477        right_factors: PrimeFactors,
478    ) -> Arc<Recipe> {
479        let left_len = left_factors.get_product();
480        let right_len = right_factors.get_product();
481
482        //neither size is a butterfly, so go with the normal algorithm
483        let left_fft = self.design_fft_with_factors(left_len, left_factors);
484        let right_fft = self.design_fft_with_factors(right_len, right_factors);
485
486        //if both left_len and right_len are small, use algorithms optimized for small FFTs
487        if left_len < 31 && right_len < 31 {
488            // for small FFTs, if gcd is 1, good-thomas is faster
489            if gcd(left_len, right_len) == 1 {
490                Arc::new(Recipe::GoodThomasAlgorithmSmall {
491                    left_fft,
492                    right_fft,
493                })
494            } else {
495                Arc::new(Recipe::MixedRadixSmall {
496                    left_fft,
497                    right_fft,
498                })
499            }
500        } else {
501            Arc::new(Recipe::MixedRadix {
502                left_fft,
503                right_fft,
504            })
505        }
506    }
507
508    fn design_radixn(&mut self, factors: PrimeFactors) -> Arc<Recipe> {
509        let p2 = factors.get_power_of_two();
510        let p3 = factors.get_power_of_three();
511        let p5 = factors
512            .get_other_factors()
513            .iter()
514            .find_map(|f| if f.value == 5 { Some(f.count) } else { None }) // if we had rustc 1.62, we could use (f.value == 5).then_some(f.count)
515            .unwrap_or(0);
516        let p7 = factors
517            .get_other_factors()
518            .iter()
519            .find_map(|f| if f.value == 7 { Some(f.count) } else { None })
520            .unwrap_or(0);
521
522        let base_len: usize = if factors.has_factors_gt(MAX_RADIXN_FACTOR) {
523            // If we have factors larger than RadixN can handle, we *must* use the product of those factors as our base
524            factors.product_above(MAX_RADIXN_FACTOR)
525        } else if p7 == 0 && p5 == 0 && p3 < 2 {
526            // here we handle pure powers of 2 and 3 times a power of 2 - we want to hand these to radix4, so we need to consume the correct number of factors to leave us with 4^k
527            if p3 == 0 {
528                // pure power of 2
529                assert!(p2 > 5); // butterflies should have caught this
530                if p2 % 2 == 1 {
531                    8
532                } else {
533                    16
534                }
535            } else {
536                // 3 times a power of 2
537                assert!(p2 > 3); // butterflies should have caught this
538                if p2 % 2 == 1 {
539                    24
540                } else {
541                    12
542                }
543            }
544        } else if p2 > 0 && p3 > 0 {
545            // we have a mixed bag of 2s and 3s
546            // todo: if we have way more 3s than 2s, benchmark using butterfly27 as the base
547            let excess_p2 = p2.saturating_sub(p3);
548            match excess_p2 {
549                0 => 6,
550                1 => 12,
551                _ => 24,
552            }
553        } else if p3 > 2 {
554            27
555        } else if p3 > 1 {
556            9
557        } else if p7 > 0 {
558            7
559        } else {
560            assert!(p5 > 0);
561            5
562        };
563
564        // now that we know the base length, divide it out get what radix4 needs to compute
565        let base_fft = self.design_fft_for_len(base_len);
566        let mut cross_len = factors.get_product() / base_len;
567
568        // see if we can use radix4
569        let cross_bits = cross_len.trailing_zeros();
570        if cross_len.is_power_of_two() && cross_bits % 2 == 0 {
571            let k = cross_bits / 2;
572            return Arc::new(Recipe::Radix4 { k, base_fft });
573        }
574
575        // we weren't able to use radix4, so fall back to RadixN
576        // theoretically we could do this with the p2, p3, p5 etc values above, but our choice of base knocked them out of sync
577        let mut factors = Vec::new();
578        while cross_len % 7 == 0 {
579            cross_len /= 7;
580            factors.push(RadixFactor::Factor7);
581        }
582        while cross_len % 6 == 0 {
583            cross_len /= 6;
584            factors.push(RadixFactor::Factor6);
585        }
586        while cross_len % 5 == 0 {
587            cross_len /= 5;
588            factors.push(RadixFactor::Factor5);
589        }
590        while cross_len % 3 == 0 {
591            cross_len /= 3;
592            factors.push(RadixFactor::Factor3);
593        }
594        assert!(cross_len.is_power_of_two());
595
596        // benchmarking suggests that we want to add the 4s *last*, i suspect because 4 is a better-than-usual value for the transpose
597        let cross_bits = cross_len.trailing_zeros();
598        if cross_bits % 2 == 1 {
599            factors.push(RadixFactor::Factor2);
600        }
601        factors.extend(std::iter::repeat(RadixFactor::Factor4).take(cross_bits as usize / 2));
602
603        Arc::new(Recipe::RadixN {
604            factors: factors.into_boxed_slice(),
605            base_fft,
606        })
607    }
608
609    // Returns Some(instance) if we have a butterfly available for this size. Returns None if there is no butterfly available for this size
610    fn design_butterfly_algorithm(&mut self, len: usize) -> Option<Arc<Recipe>> {
611        match len {
612            2 => Some(Arc::new(Recipe::Butterfly2)),
613            3 => Some(Arc::new(Recipe::Butterfly3)),
614            4 => Some(Arc::new(Recipe::Butterfly4)),
615            5 => Some(Arc::new(Recipe::Butterfly5)),
616            6 => Some(Arc::new(Recipe::Butterfly6)),
617            7 => Some(Arc::new(Recipe::Butterfly7)),
618            8 => Some(Arc::new(Recipe::Butterfly8)),
619            9 => Some(Arc::new(Recipe::Butterfly9)),
620            11 => Some(Arc::new(Recipe::Butterfly11)),
621            12 => Some(Arc::new(Recipe::Butterfly12)),
622            13 => Some(Arc::new(Recipe::Butterfly13)),
623            16 => Some(Arc::new(Recipe::Butterfly16)),
624            17 => Some(Arc::new(Recipe::Butterfly17)),
625            19 => Some(Arc::new(Recipe::Butterfly19)),
626            23 => Some(Arc::new(Recipe::Butterfly23)),
627            24 => Some(Arc::new(Recipe::Butterfly24)),
628            27 => Some(Arc::new(Recipe::Butterfly27)),
629            29 => Some(Arc::new(Recipe::Butterfly29)),
630            31 => Some(Arc::new(Recipe::Butterfly31)),
631            32 => Some(Arc::new(Recipe::Butterfly32)),
632            _ => None,
633        }
634    }
635
636    fn design_prime(&mut self, len: usize) -> Arc<Recipe> {
637        let inner_fft_len_rader = len - 1;
638        let raders_factors = PrimeFactors::compute(inner_fft_len_rader);
639        // If any of the prime factors is too large, Rader's gets slow and Bluestein's is the better choice
640        if raders_factors
641            .get_other_factors()
642            .iter()
643            .any(|val| val.value > MAX_RADER_PRIME_FACTOR)
644        {
645            // we want to use bluestein's algorithm. we have a free choice of which inner FFT length to use
646            // the only restriction is that it has to be (2 * len - 1) or larger. So we want the fastest FFT we can compute at or above that size.
647
648            // the most obvious choice is the next-highest power of two, but there's one trick we can pull to get a smaller fft that we can be 100% certain will be faster
649            let min_inner_len = 2 * len - 1;
650            let inner_len_pow2 = min_inner_len.checked_next_power_of_two().unwrap();
651            let inner_len_factor3 = inner_len_pow2 / 4 * 3;
652
653            let inner_len = if inner_len_factor3 >= min_inner_len {
654                inner_len_factor3
655            } else {
656                inner_len_pow2
657            };
658            let inner_fft = self.design_fft_for_len(inner_len);
659
660            Arc::new(Recipe::BluesteinsAlgorithm { len, inner_fft })
661        } else {
662            let inner_fft = self.design_fft_with_factors(inner_fft_len_rader, raders_factors);
663            Arc::new(Recipe::RadersAlgorithm { inner_fft })
664        }
665    }
666}
667
668#[cfg(test)]
669mod unit_tests {
670    use super::*;
671
672    fn is_mixedradixsmall(plan: &Recipe) -> bool {
673        match plan {
674            &Recipe::MixedRadixSmall { .. } => true,
675            _ => false,
676        }
677    }
678
679    fn is_goodthomassmall(plan: &Recipe) -> bool {
680        match plan {
681            &Recipe::GoodThomasAlgorithmSmall { .. } => true,
682            _ => false,
683        }
684    }
685
686    fn is_raders(plan: &Recipe) -> bool {
687        match plan {
688            &Recipe::RadersAlgorithm { .. } => true,
689            _ => false,
690        }
691    }
692
693    fn is_bluesteins(plan: &Recipe) -> bool {
694        match plan {
695            &Recipe::BluesteinsAlgorithm { .. } => true,
696            _ => false,
697        }
698    }
699
700    #[test]
701    fn test_plan_scalar_trivial() {
702        // Length 0 and 1 should use Dft
703        let mut planner = FftPlannerScalar::<f64>::new();
704        for len in 0..2 {
705            let plan = planner.design_fft_for_len(len);
706            assert_eq!(*plan, Recipe::Dft(len));
707            assert_eq!(plan.len(), len, "Recipe reports wrong length");
708        }
709    }
710
711    #[test]
712    fn test_plan_scalar_largepoweroftwo() {
713        // Powers of 2 above 64 should use Radix4
714        let mut planner = FftPlannerScalar::<f64>::new();
715        for pow in 6..32 {
716            let len = 1 << pow;
717            let plan = planner.design_fft_for_len(len);
718            assert!(matches!(*plan, Recipe::Radix4 { k: _, base_fft: _ }));
719            assert_eq!(plan.len(), len, "Recipe reports wrong length");
720        }
721    }
722
723    #[test]
724    fn test_plan_scalar_butterflies() {
725        // Check that all butterflies are used
726        let mut planner = FftPlannerScalar::<f64>::new();
727        assert_eq!(*planner.design_fft_for_len(2), Recipe::Butterfly2);
728        assert_eq!(*planner.design_fft_for_len(3), Recipe::Butterfly3);
729        assert_eq!(*planner.design_fft_for_len(4), Recipe::Butterfly4);
730        assert_eq!(*planner.design_fft_for_len(5), Recipe::Butterfly5);
731        assert_eq!(*planner.design_fft_for_len(6), Recipe::Butterfly6);
732        assert_eq!(*planner.design_fft_for_len(7), Recipe::Butterfly7);
733        assert_eq!(*planner.design_fft_for_len(8), Recipe::Butterfly8);
734        assert_eq!(*planner.design_fft_for_len(11), Recipe::Butterfly11);
735        assert_eq!(*planner.design_fft_for_len(12), Recipe::Butterfly12);
736        assert_eq!(*planner.design_fft_for_len(13), Recipe::Butterfly13);
737        assert_eq!(*planner.design_fft_for_len(16), Recipe::Butterfly16);
738        assert_eq!(*planner.design_fft_for_len(17), Recipe::Butterfly17);
739        assert_eq!(*planner.design_fft_for_len(19), Recipe::Butterfly19);
740        assert_eq!(*planner.design_fft_for_len(23), Recipe::Butterfly23);
741        assert_eq!(*planner.design_fft_for_len(24), Recipe::Butterfly24);
742        assert_eq!(*planner.design_fft_for_len(29), Recipe::Butterfly29);
743        assert_eq!(*planner.design_fft_for_len(31), Recipe::Butterfly31);
744        assert_eq!(*planner.design_fft_for_len(32), Recipe::Butterfly32);
745    }
746
747    #[test]
748    fn test_plan_scalar_radixn() {
749        // Products of several different small primes should become RadixN
750        let mut planner = FftPlannerScalar::<f64>::new();
751        for pow2 in 2..5 {
752            for pow3 in 2..5 {
753                for pow5 in 2..5 {
754                    for pow7 in 2..5 {
755                        let len = 2usize.pow(pow2)
756                            * 3usize.pow(pow3)
757                            * 5usize.pow(pow5)
758                            * 7usize.pow(pow7);
759                        let plan = planner.design_fft_for_len(len);
760                        assert!(
761                            matches!(
762                                *plan,
763                                Recipe::RadixN {
764                                    factors: _,
765                                    base_fft: _
766                                }
767                            ),
768                            "Expected MixedRadix, got {:?}",
769                            plan
770                        );
771                        assert_eq!(plan.len(), len, "Recipe reports wrong length");
772                    }
773                }
774            }
775        }
776    }
777
778    #[test]
779    fn test_plan_scalar_mixedradixsmall() {
780        // Products of two "small" butterflies < 31 that have a common divisor >1, and isn't a power of 2 should be MixedRadixSmall
781        let mut planner = FftPlannerScalar::<f64>::new();
782        for len in [12 * 3, 6 * 27].iter() {
783            let plan = planner.design_fft_for_len(*len);
784            assert!(
785                is_mixedradixsmall(&plan),
786                "Expected MixedRadixSmall, got {:?}",
787                plan
788            );
789            assert_eq!(plan.len(), *len, "Recipe reports wrong length");
790        }
791    }
792
793    #[test]
794    fn test_plan_scalar_goodthomasbutterfly() {
795        let mut planner = FftPlannerScalar::<f64>::new();
796        for len in [3 * 5, 3 * 7, 5 * 7, 11 * 13].iter() {
797            let plan = planner.design_fft_for_len(*len);
798            assert!(
799                is_goodthomassmall(&plan),
800                "Expected GoodThomasAlgorithmSmall, got {:?}",
801                plan
802            );
803            assert_eq!(plan.len(), *len, "Recipe reports wrong length");
804        }
805    }
806
807    #[test]
808    fn test_plan_scalar_bluestein_vs_rader() {
809        let difficultprimes: [usize; 11] = [59, 83, 107, 149, 167, 173, 179, 359, 719, 1439, 2879];
810        let easyprimes: [usize; 24] = [
811            53, 61, 67, 71, 73, 79, 89, 97, 101, 103, 109, 113, 127, 131, 137, 139, 151, 157, 163,
812            181, 191, 193, 197, 199,
813        ];
814
815        let mut planner = FftPlannerScalar::<f64>::new();
816        for len in difficultprimes.iter() {
817            let plan = planner.design_fft_for_len(*len);
818            assert!(
819                is_bluesteins(&plan),
820                "Expected BluesteinsAlgorithm, got {:?}",
821                plan
822            );
823            assert_eq!(plan.len(), *len, "Recipe reports wrong length");
824        }
825        for len in easyprimes.iter() {
826            let plan = planner.design_fft_for_len(*len);
827            assert!(is_raders(&plan), "Expected RadersAlgorithm, got {:?}", plan);
828            assert_eq!(plan.len(), *len, "Recipe reports wrong length");
829        }
830    }
831
832    #[test]
833    fn test_scalar_fft_cache() {
834        {
835            // Check that FFTs are reused if they're both forward
836            let mut planner = FftPlannerScalar::<f64>::new();
837            let fft_a = planner.plan_fft(1234, FftDirection::Forward);
838            let fft_b = planner.plan_fft(1234, FftDirection::Forward);
839            assert!(Arc::ptr_eq(&fft_a, &fft_b), "Existing fft was not reused");
840        }
841        {
842            // Check that FFTs are reused if they're both inverse
843            let mut planner = FftPlannerScalar::<f64>::new();
844            let fft_a = planner.plan_fft(1234, FftDirection::Inverse);
845            let fft_b = planner.plan_fft(1234, FftDirection::Inverse);
846            assert!(Arc::ptr_eq(&fft_a, &fft_b), "Existing fft was not reused");
847        }
848        {
849            // Check that FFTs are NOT resued if they don't both have the same direction
850            let mut planner = FftPlannerScalar::<f64>::new();
851            let fft_a = planner.plan_fft(1234, FftDirection::Forward);
852            let fft_b = planner.plan_fft(1234, FftDirection::Inverse);
853            assert!(
854                !Arc::ptr_eq(&fft_a, &fft_b),
855                "Existing fft was reused, even though directions don't match"
856            );
857        }
858    }
859
860    #[test]
861    fn test_scalar_recipe_cache() {
862        // Check that all butterflies are used
863        let mut planner = FftPlannerScalar::<f64>::new();
864        let fft_a = planner.design_fft_for_len(1234);
865        let fft_b = planner.design_fft_for_len(1234);
866        assert!(
867            Arc::ptr_eq(&fft_a, &fft_b),
868            "Existing recipe was not reused"
869        );
870    }
871
872    // We don't need to actually compute anything for a FFT size of zero, but we do need to verify that it doesn't explode
873    #[test]
874    fn test_plan_zero_scalar() {
875        let mut planner32 = FftPlannerScalar::<f32>::new();
876        let fft_zero32 = planner32.plan_fft_forward(0);
877        fft_zero32.process(&mut []);
878
879        let mut planner64 = FftPlannerScalar::<f64>::new();
880        let fft_zero64 = planner64.plan_fft_forward(0);
881        fft_zero64.process(&mut []);
882    }
883
884    // This test is not designed to be run, only to compile.
885    // We cannot make it #[test] since there is a generic parameter.
886    #[allow(dead_code)]
887    fn test_impl_fft_planner_send<T: FftNum>() {
888        fn is_send<T: Send>() {}
889        is_send::<FftPlanner<T>>();
890        is_send::<FftPlannerScalar<T>>();
891        is_send::<FftPlannerSse<T>>();
892        is_send::<FftPlannerAvx<T>>();
893    }
894}