Skip to main content

zaft/
lib.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29#![allow(
30    clippy::assign_op_pattern,
31    clippy::only_used_in_recursion,
32    clippy::too_many_arguments,
33    clippy::type_complexity,
34    clippy::modulo_one
35)]
36#![cfg_attr(docsrs, feature(doc_cfg))]
37#![cfg_attr(
38    all(feature = "fcma", target_arch = "aarch64"),
39    feature(stdarch_neon_fcma)
40)]
41#[cfg(all(target_arch = "x86_64", feature = "avx"))]
42mod avx;
43mod bluestein;
44mod butterflies;
45mod complex_fma;
46mod dft;
47mod err;
48mod factory;
49mod factory64;
50mod fast_divider;
51mod good_thomas;
52mod mixed_radix;
53mod mla;
54#[cfg(all(target_arch = "aarch64", feature = "neon"))]
55mod neon;
56mod prime_factors;
57mod r2c;
58mod raders;
59mod radix10;
60mod radix11;
61mod radix13;
62mod radix3;
63mod radix4;
64mod radix5;
65mod radix6;
66mod radix7;
67mod spectrum_arithmetic;
68mod store;
69mod td;
70mod traits;
71mod transpose;
72mod transpose_arbitrary;
73mod util;
74
75#[allow(unused_imports)]
76use radix3::Radix3;
77#[allow(unused_imports)]
78use radix4::Radix4;
79#[allow(unused_imports)]
80use radix5::Radix5;
81#[allow(unused_imports)]
82use radix6::Radix6;
83#[allow(unused_imports)]
84use radix7::Radix7;
85#[allow(unused_imports)]
86use radix10::Radix10;
87#[allow(unused_imports)]
88use radix11::Radix11;
89#[allow(unused_imports)]
90use radix13::Radix13;
91use std::collections::HashMap;
92
93use crate::factory::AlgorithmFactory;
94use crate::prime_factors::{
95    PrimeFactors, can_be_two_factors, split_factors_closest, try_greedy_pure_power_split,
96};
97use crate::r2c::{
98    C2RAlgorithmFactory, C2ROddExpanderFactory, R2CAlgorithmFactory, R2CTwiddlesFactory,
99    strategy_c2r, strategy_r2c,
100};
101use crate::spectrum_arithmetic::ComplexArithFactory;
102use crate::td::{TwoDimensionalC2C, TwoDimensionalC2R, TwoDimensionalR2C};
103use crate::traits::FftTrigonometry;
104use crate::transpose::TransposeFactory;
105use crate::util::{
106    ALWAYS_BLUESTEIN_1000, ALWAYS_BLUESTEIN_2000, ALWAYS_BLUESTEIN_3000, ALWAYS_BLUESTEIN_4000,
107    ALWAYS_BLUESTEIN_5000, ALWAYS_BLUESTEIN_6000,
108};
109pub use err::ZaftError;
110use num_complex::Complex;
111use num_traits::{AsPrimitive, Float, MulAdd};
112pub use r2c::{C2RFftExecutor, R2CFftExecutor};
113use std::fmt::{Debug, Display, Formatter};
114use std::sync::{Arc, OnceLock, RwLock};
115pub use td::{TwoDimensionalExecutorC2R, TwoDimensionalExecutorR2C, TwoDimensionalFftExecutor};
116
117pub(crate) trait FftSample:
118    AlgorithmFactory<Self>
119    + FftTrigonometry
120    + Float
121    + 'static
122    + Send
123    + Sync
124    + MulAdd<Self, Output = Self>
125    + ComplexArithFactory<Self>
126    + TransposeFactory<Self>
127    + Copy
128    + Display
129    + FftPrimeCache<Self>
130    + Default
131    + Debug
132    + R2CAlgorithmFactory<Self>
133    + C2RAlgorithmFactory<Self>
134    + R2CTwiddlesFactory<Self>
135    + C2ROddExpanderFactory
136{
137    const HALF: Self;
138    const SQRT_3_OVER_2: Self;
139}
140
141impl FftSample for f64 {
142    const HALF: Self = 0.5;
143    // from sage.all import *
144    // import struct
145    //
146    // R = RealField(90)
147    //
148    // def float_to_hex(f):
149    //     packed = struct.pack('>f', float(f))
150    //     return '0x' + packed.hex()
151    //
152    // value = R(3).sqrt() / R(2)
153    //
154    // print(float_to_hex(value))
155    //
156    // def double_to_hex(f):
157    //         packed = struct.pack('>d', float(f))
158    //         return '0x' + packed.hex()
159    //
160    // print(double_to_hex(value))
161    const SQRT_3_OVER_2: Self = f64::from_bits(0x3febb67ae8584caa);
162}
163impl FftSample for f32 {
164    const HALF: Self = 0.5;
165    // from sage.all import *
166    // import struct
167    //
168    // R = RealField(90)
169    //
170    // def float_to_hex(f):
171    //     packed = struct.pack('>f', float(f))
172    //     return '0x' + packed.hex()
173    //
174    // value = R(3).sqrt() / R(2)
175    //
176    // print(float_to_hex(value))
177    //
178    // def double_to_hex(f):
179    //         packed = struct.pack('>d', float(f))
180    //         return '0x' + packed.hex()
181    //
182    // print(double_to_hex(value))
183    const SQRT_3_OVER_2: Self = f32::from_bits(0x3f5db3d7);
184}
185
186pub trait FftExecutor<T> {
187    /// Executes the Complex-to-Complex FFT operation **in-place**.
188    ///
189    /// The input/output slice `in_place` must have a length equal to `self.length()`.
190    /// The direction of the transform (Forward or Inverse) is determined by the executor's
191    /// pre-configured state, accessible via `self.direction()`.
192    ///
193    /// # Parameters
194    /// * `in_place`: The mutable slice containing the complex-valued input data. Upon completion,
195    ///   it will contain the complex-valued frequency-domain result (for a Forward transform)
196    ///   or the time-domain result (for an Inverse transform).
197    ///
198    /// # Errors
199    /// Returns a `ZaftError` if the execution fails (e.g., due to an incorrect slice length).
200    fn execute(&self, in_place: &mut [Complex<T>]) -> Result<(), ZaftError>;
201    /// Executes the FFT operation **in-place**, using caller-provided scratch memory.
202    ///
203    /// This variant avoids internal allocations and may improve performance
204    /// in repeated executions.
205    fn execute_with_scratch(
206        &self,
207        in_place: &mut [Complex<T>],
208        scratch: &mut [Complex<T>],
209    ) -> Result<(), ZaftError>;
210    /// Executes the FFT operation **out-of-place**.
211    ///
212    /// The input slice `src` is left unmodified. The result is written
213    /// into `dst`.
214    fn execute_out_of_place(
215        &self,
216        src: &[Complex<T>],
217        dst: &mut [Complex<T>],
218    ) -> Result<(), ZaftError>;
219    /// Executes the FFT operation **out-of-place**, using caller-provided scratch memory.
220    ///
221    /// This allows reuse of scratch memory across multiple FFT calls
222    /// to avoid repeated allocations.
223    fn execute_out_of_place_with_scratch(
224        &self,
225        src: &[Complex<T>],
226        dst: &mut [Complex<T>],
227        scratch: &mut [Complex<T>],
228    ) -> Result<(), ZaftError>;
229    /// Executes the FFT operation using a **destructive input strategy**.
230    ///
231    /// The `src` buffer may be overwritten during computation and should
232    /// not be assumed to retain its original contents after the call.
233    /// The final transform result is written to `dst`.
234    ///
235    /// This variant may enable more memory-efficient algorithms.
236    fn execute_destructive_with_scratch(
237        &self,
238        src: &mut [Complex<T>],
239        dst: &mut [Complex<T>],
240        scratch: &mut [Complex<T>],
241    ) -> Result<(), ZaftError>;
242    /// Returns the **direction** of the transform this executor is configured to perform.
243    ///
244    /// The direction is typically either `FftDirection::Forward` (Time to Frequency) or
245    /// `FftDirection::Inverse` (Frequency to Time).
246    fn direction(&self) -> FftDirection;
247    /// Returns the **length** (size N) of the input and output complex vectors.
248    ///
249    /// This is the number of complex elements that the executor is designed to process.
250    fn length(&self) -> usize;
251    /// Returns the required scratch buffer length for
252    /// [`execute_with_scratch`].
253    /// The returned size is **not a stable constant** and may change
254    /// between crate versions or algorithm implementations.
255    /// Always query this value dynamically.
256    fn scratch_length(&self) -> usize;
257    /// Returns the required scratch buffer length for
258    /// [`execute_out_of_place_with_scratch`].
259    /// The returned size is **not a stable constant** and may change
260    /// between crate versions or algorithm implementations.
261    /// Always query this value dynamically.
262    fn out_of_place_scratch_length(&self) -> usize;
263    /// Returns the required scratch buffer length for
264    /// [`execute_destructive_with_scratch`].
265    /// The returned size is **not a stable constant** and may change
266    /// between crate versions or algorithm implementations.
267    /// Always query this value dynamically.
268    fn destructive_scratch_length(&self) -> usize;
269}
270
271static PRIME_CACHE_F: OnceLock<RwLock<HashMap<usize, Arc<dyn FftExecutor<f32> + Send + Sync>>>> =
272    OnceLock::new();
273
274static PRIME_CACHE_B: OnceLock<RwLock<HashMap<usize, Arc<dyn FftExecutor<f32> + Send + Sync>>>> =
275    OnceLock::new();
276
277static PRIME_CACHE_DF: OnceLock<RwLock<HashMap<usize, Arc<dyn FftExecutor<f64> + Send + Sync>>>> =
278    OnceLock::new();
279
280static PRIME_CACHE_DB: OnceLock<RwLock<HashMap<usize, Arc<dyn FftExecutor<f64> + Send + Sync>>>> =
281    OnceLock::new();
282
283pub(crate) trait FftPrimeCache<T> {
284    fn has_cached_prime(
285        n: usize,
286        fft_direction: FftDirection,
287    ) -> Option<Arc<dyn FftExecutor<T> + Send + Sync>>;
288    fn put_prime_to_cache(fft_direction: FftDirection, fft: Arc<dyn FftExecutor<T> + Send + Sync>);
289}
290
291impl FftPrimeCache<f32> for f32 {
292    fn has_cached_prime(
293        n: usize,
294        fft_direction: FftDirection,
295    ) -> Option<Arc<dyn FftExecutor<f32> + Send + Sync>> {
296        if n >= 4000 {
297            return None;
298        }
299        let cache = (match fft_direction {
300            FftDirection::Forward => &PRIME_CACHE_F,
301            FftDirection::Inverse => &PRIME_CACHE_B,
302        })
303        .get_or_init(|| RwLock::new(HashMap::new()));
304        cache.read().ok()?.get(&n).cloned()
305    }
306
307    fn put_prime_to_cache(
308        fft_direction: FftDirection,
309        fft: Arc<dyn FftExecutor<f32> + Send + Sync>,
310    ) {
311        let length = fft.length();
312        if length > 4000 {
313            return;
314        }
315        let cache = (match fft_direction {
316            FftDirection::Forward => &PRIME_CACHE_F,
317            FftDirection::Inverse => &PRIME_CACHE_B,
318        })
319        .get_or_init(|| RwLock::new(HashMap::new()));
320        _ = cache.write().ok().and_then(|mut x| x.insert(length, fft));
321    }
322}
323
324impl FftPrimeCache<f64> for f64 {
325    fn has_cached_prime(
326        n: usize,
327        fft_direction: FftDirection,
328    ) -> Option<Arc<dyn FftExecutor<f64> + Send + Sync>> {
329        if n >= 4000 {
330            return None;
331        }
332        let cache = (match fft_direction {
333            FftDirection::Forward => &PRIME_CACHE_DF,
334            FftDirection::Inverse => &PRIME_CACHE_DB,
335        })
336        .get_or_init(|| RwLock::new(HashMap::new()));
337        cache.read().ok()?.get(&n).cloned()
338    }
339
340    fn put_prime_to_cache(
341        fft_direction: FftDirection,
342        fft: Arc<dyn FftExecutor<f64> + Send + Sync>,
343    ) {
344        let length = fft.length();
345        if length > 4000 {
346            return;
347        }
348        let cache = (match fft_direction {
349            FftDirection::Forward => &PRIME_CACHE_DF,
350            FftDirection::Inverse => &PRIME_CACHE_DB,
351        })
352        .get_or_init(|| RwLock::new(HashMap::new()));
353        _ = cache.write().ok().and_then(|mut x| x.insert(length, fft));
354    }
355}
356
357pub struct Zaft {}
358
359impl Zaft {
360    fn could_do_split_mixed_radix() -> bool {
361        #[cfg(all(target_arch = "x86_64", feature = "avx"))]
362        if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma")
363        {
364            return true;
365        }
366        #[cfg(all(target_arch = "aarch64", feature = "neon"))]
367        {
368            true
369        }
370        #[cfg(not(all(target_arch = "aarch64", feature = "neon")))]
371        {
372            false
373        }
374    }
375
376    fn try_split_mixed_radix_butterflies<T: FftSample>(
377        _n_length: u64,
378        _q_length: u64,
379        _direction: FftDirection,
380    ) -> Result<Option<Arc<dyn FftExecutor<T> + Send + Sync>>, ZaftError>
381    where
382        f64: AsPrimitive<T>,
383    {
384        #[cfg(not(any(
385            all(target_arch = "aarch64", feature = "neon"),
386            all(target_arch = "x86_64", feature = "avx")
387        )))]
388        {
389            Ok(None)
390        }
391        #[cfg(all(target_arch = "x86_64", feature = "avx"))]
392        if !std::arch::is_x86_feature_detected!("avx2")
393            || !std::arch::is_x86_feature_detected!("fma")
394        {
395            return Ok(None);
396        }
397        #[cfg(any(
398            all(target_arch = "aarch64", feature = "neon"),
399            all(target_arch = "x86_64", feature = "avx")
400        ))]
401        {
402            let min_length = _n_length.min(_q_length);
403            let max_length = _n_length.max(_q_length);
404
405            if !(2..=13).contains(&min_length) {
406                // If no butterfly exists, return None
407                return Ok(None);
408            }
409
410            // 1. Get the initial FFT strategy regardless of the butterfly size.
411            let q_fft = Zaft::strategy(max_length as usize, _direction)?;
412
413            let q_fft_opt = match min_length {
414                2 => T::mixed_radix_butterfly2(q_fft),
415                3 => T::mixed_radix_butterfly3(q_fft),
416                4 => T::mixed_radix_butterfly4(q_fft),
417                5 => T::mixed_radix_butterfly5(q_fft),
418                6 => T::mixed_radix_butterfly6(q_fft),
419                7 => T::mixed_radix_butterfly7(q_fft),
420                8 => T::mixed_radix_butterfly8(q_fft),
421                9 => T::mixed_radix_butterfly9(q_fft),
422                10 => T::mixed_radix_butterfly10(q_fft),
423                11 => T::mixed_radix_butterfly11(q_fft),
424                12 => T::mixed_radix_butterfly12(q_fft),
425                13 => T::mixed_radix_butterfly13(q_fft),
426                // This arm is covered by the early exit, but is required if the early exit is removed.
427                _ => unreachable!("min_length is outside the supported range [2, 13]."),
428            };
429
430            // 3. Handle the result once.
431            if let Some(q_fft_opt) = q_fft_opt? {
432                Ok(Some(q_fft_opt))
433            } else {
434                Ok(None)
435            }
436        }
437    }
438
439    fn make_mixed_radix<T: FftSample>(
440        direction: FftDirection,
441        prime_factors: PrimeFactors,
442    ) -> Result<Arc<dyn FftExecutor<T> + Send + Sync>, ZaftError>
443    where
444        f64: AsPrimitive<T>,
445    {
446        let factorization = &prime_factors.factorization;
447        let product = factorization.iter().map(|&x| x.0.pow(x.1)).product::<u64>();
448
449        let (n_length, q_length) = if product <= 529 {
450            match can_be_two_factors(factorization) {
451                None => match try_greedy_pure_power_split(factorization) {
452                    None => split_factors_closest(factorization),
453                    Some(values) => values,
454                },
455                Some(factors) => factors,
456            }
457        } else {
458            match try_greedy_pure_power_split(factorization) {
459                None => split_factors_closest(factorization),
460                Some(values) => values,
461            }
462        };
463
464        macro_rules! try_mixed_radix {
465            ($q: expr, $p: expr) => {{
466                return if let Some(executor) =
467                    Zaft::try_split_mixed_radix_butterflies($q as u64, $p as u64, direction)?
468                {
469                    Ok(executor)
470                } else {
471                    let p_fft = Zaft::strategy($q as usize, direction)?;
472                    let q_fft = Zaft::strategy($p as usize, direction)?;
473                    if $q < $p {
474                        T::mixed_radix(p_fft, q_fft)
475                    } else {
476                        T::mixed_radix(q_fft, p_fft)
477                    }
478                };
479            }};
480        }
481
482        if prime_factors.is_power_of_two_and_three() {
483            let product2 = prime_factors
484                .factorization
485                .iter()
486                .find(|x| x.0 == 2)
487                .map(|x| x.0.pow(x.1))
488                .expect("Factor of 2 must present in 2^n*3^m branch");
489            let product3 = prime_factors
490                .factorization
491                .iter()
492                .find(|x| x.0 == 3)
493                .map(|x| x.0.pow(x.1))
494                .expect("Factor of 3 must present in 2^n*3^m branch");
495
496            let factor2 = prime_factors.factor_of_2();
497            let factor3 = prime_factors.factor_of_3();
498
499            if factor2 == 1 && factor3 > 3 && T::butterfly54(direction).is_some() {
500                try_mixed_radix!(54, product / 54)
501            }
502
503            if product == 1536 {
504                try_mixed_radix!(8, 192)
505            }
506
507            if factor3 >= 1 && factor2 >= 4 {
508                if product.is_multiple_of(36)
509                    && product / 36 > 1
510                    && product / 36 <= 16
511                    && T::butterfly36(direction).is_some()
512                {
513                    try_mixed_radix!(36, product / 36)
514                }
515                if factor2 > factor3 {
516                    let mut factors_diff = 2u64.pow(factor2 - factor3);
517                    let mut remainder_factor = product / factors_diff;
518                    if remainder_factor <= 8 {
519                        remainder_factor *= 2;
520                        factors_diff /= 2;
521                    }
522                    try_mixed_radix!(factors_diff, remainder_factor)
523                }
524                if product.is_multiple_of(48)
525                    && product / 48 > 1
526                    && product / 48 <= 16
527                    && T::butterfly48(direction).is_some()
528                {
529                    try_mixed_radix!(48, product / 48)
530                }
531            }
532
533            return if let Some(executor) =
534                Zaft::try_split_mixed_radix_butterflies(product2, product3, direction)?
535            {
536                Ok(executor)
537            } else {
538                let p_fft = Zaft::strategy(product2 as usize, direction)?;
539                let q_fft = Zaft::strategy(product3 as usize, direction)?;
540                T::mixed_radix(p_fft, q_fft)
541            };
542        } else if prime_factors.is_power_of_two_and_five() {
543            let factor_of_5 = prime_factors.factor_of_5();
544            let factor_of_2 = prime_factors.factor_of_2();
545            if factor_of_5 == 1 {
546                if factor_of_2 >= 8 && factor_of_2 != 9 {
547                    try_mixed_radix!(5, product / 5)
548                }
549                if (2..=6).contains(&factor_of_2) {
550                    try_mixed_radix!(20, product / 20)
551                }
552            } else if factor_of_5 == 2 {
553                if factor_of_2 >= 3 && T::butterfly100(direction).is_some() {
554                    #[cfg(all(target_arch = "aarch64", feature = "neon"))]
555                    {
556                        try_mixed_radix!(product / 100, 100)
557                    }
558                    #[cfg(all(target_arch = "x86_64", feature = "avx"))]
559                    {
560                        use crate::util::has_valid_avx;
561                        if has_valid_avx() {
562                            try_mixed_radix!(product / 100, 100)
563                        }
564                    }
565                }
566            } else if factor_of_5 == 3 {
567                #[cfg(any(
568                    all(target_arch = "aarch64", feature = "neon"),
569                    all(target_arch = "x86_64", feature = "avx")
570                ))]
571                if product == 500 {
572                    try_mixed_radix!(5, 100)
573                }
574            }
575        } else if prime_factors.is_power_of_three_and_five() {
576            let factor_of_5 = prime_factors.factor_of_5();
577            let factor_of_3 = prime_factors.factor_of_3();
578            if factor_of_5 == 1 && factor_of_3 > 1 {
579                try_mixed_radix!(5, product / 5)
580            } else if factor_of_5 == 2 && factor_of_5 > 1 && factor_of_3 > 2 && factor_of_3 < 10 {
581                // 225 is more effective with mixed radix [9,25]
582                try_mixed_radix!(25, product / 25)
583            }
584        } else if prime_factors.is_power_of_two_and_seven() {
585            let factor_of_7 = prime_factors.factor_of_7();
586            let factor_of_2 = prime_factors.factor_of_2();
587            if factor_of_2 > 1 && factor_of_7 == 1 {
588                try_mixed_radix!(14, product / 14)
589            }
590        } else if prime_factors.has_power_of_five_and_seven() {
591            let factor_of_7 = prime_factors.factor_of_7();
592            let factor_of_5 = prime_factors.factor_of_5();
593            #[allow(clippy::collapsible_if)]
594            if factor_of_7 == 1 || factor_of_5 == 1 {
595                if product == 560 || product == 2240 {
596                    if T::butterfly35(direction).is_some() {
597                        try_mixed_radix!(35, product / 35)
598                    }
599                }
600            }
601            if (product == 210
602                || product == 280
603                || product == 315
604                || product == 350
605                || product == 420)
606                && T::butterfly35(direction).is_some()
607            {
608                try_mixed_radix!(35, product / 35)
609            }
610            if prime_factors.is_power_of_five_and_seven() && (factor_of_7 > 2 && factor_of_5 > 2) {
611                let product7 = prime_factors
612                    .factorization
613                    .iter()
614                    .find(|x| x.0 == 7)
615                    .map(|x| x.0.pow(x.1))
616                    .expect("Power of 7 should exist if factor of 5^n*7^m branch");
617                let product5 = prime_factors
618                    .factorization
619                    .iter()
620                    .find(|x| x.0 == 5)
621                    .map(|x| x.0.pow(x.1))
622                    .expect("Power of 5 should exist if factor of 5^n*7^m branch");
623                let p_fft = Zaft::strategy(product5 as usize, direction)?;
624                let q_fft = Zaft::strategy(product7 as usize, direction)?;
625                return if product5 < product7 {
626                    T::mixed_radix(p_fft, q_fft)
627                } else {
628                    T::mixed_radix(q_fft, p_fft)
629                };
630            }
631        } else if prime_factors.has_power_of_two_and_three() {
632            #[allow(clippy::collapsible_if)]
633            if (product == 84
634                || product == 294
635                || product == 252
636                || product == 378
637                || product == 504
638                || product == 672
639                || product == 756)
640                && T::butterfly42(direction).is_some()
641            {
642                try_mixed_radix!(42, product / 42)
643            }
644            let factor_of_2 = prime_factors.factor_of_2();
645            let factor_of_3 = prime_factors.factor_of_3();
646            let factor_of_5 = prime_factors.factor_of_5();
647
648            if (factor_of_2 == 1 && factor_of_3 == 1 && factor_of_5 > 1)
649                && T::butterfly30(direction).is_some()
650            {
651                // factor out 30
652                try_mixed_radix!(30, product / 30)
653            }
654
655            if ((product.is_multiple_of(144) && (product / 144) <= 16)
656                || product == 5040
657                || product == 4896
658                || product == 8496
659                || product == 8352)
660                && T::butterfly144(direction).is_some()
661            {
662                // factor out 144
663                try_mixed_radix!(product / 144, 144)
664            }
665
666            if ((product.is_multiple_of(72) && (product / 72) <= 16)
667                || product == 2088
668                || product == 3240
669                || product == 3816
670                || product == 4248)
671                && T::butterfly72(direction).is_some()
672            {
673                // factor out 72
674                try_mixed_radix!(product / 72, 72)
675            }
676
677            if product == 858 && T::butterfly66(direction).is_some() {
678                // factor out 66
679                try_mixed_radix!(66, product / 66)
680            }
681        }
682
683        if product.is_multiple_of(63)
684            && product / 63 <= 16
685            && product != 126
686            && T::butterfly64(direction).is_some()
687        {
688            // factor out 63
689            try_mixed_radix!(63, product / 63)
690        }
691
692        if (product == 147 || product == 315 || product == 378 || product == 399)
693            && T::butterfly21(direction).is_some()
694        {
695            try_mixed_radix!(21, product / 21)
696        }
697
698        #[cfg(any(
699            all(target_arch = "aarch64", feature = "neon"),
700            all(target_arch = "x86_64", feature = "avx")
701        ))]
702        {
703            macro_rules! get_mixed_butterflies {
704                ($q: expr, $p: expr) => {{
705                    if let Some(executor) =
706                        Zaft::try_split_mixed_radix_butterflies($q as u64, $p as u64, direction)?
707                    {
708                        return Ok(executor);
709                    }
710                }};
711            }
712            let factor_2 = prime_factors.factor_of_2();
713            let rem2_8 = factor_2 % 3;
714            if product.is_multiple_of(10) {
715                get_mixed_butterflies!(10, product / 10)
716            }
717            if factor_2 > 0 {
718                if rem2_8 == 1 {
719                    get_mixed_butterflies!(2, product / 2)
720                }
721                if rem2_8 == 2 {
722                    get_mixed_butterflies!(4, product / 4)
723                }
724                get_mixed_butterflies!(8, product / 8)
725            }
726            if product.is_multiple_of(12) {
727                get_mixed_butterflies!(12, product / 12)
728            }
729            if product.is_multiple_of(9) {
730                get_mixed_butterflies!(9, product / 9)
731            }
732            if product.is_multiple_of(7) {
733                get_mixed_butterflies!(7, product / 7)
734            }
735            if product.is_multiple_of(13) {
736                get_mixed_butterflies!(13, product / 13)
737            }
738            if product.is_multiple_of(11) {
739                get_mixed_butterflies!(11, product / 11)
740            }
741            if product.is_multiple_of(5) {
742                get_mixed_butterflies!(5, product / 5)
743            }
744            if product.is_multiple_of(3) {
745                get_mixed_butterflies!(3, product / 3)
746            }
747            match Zaft::try_split_mixed_radix_butterflies(n_length, q_length, direction) {
748                Ok(value) => match value {
749                    None => {}
750                    Some(executor) => return Ok(executor),
751                },
752                Err(err) => return Err(err),
753            }
754        }
755
756        let p_fft = Zaft::strategy(n_length as usize, direction)?;
757        let q_fft = Zaft::strategy(q_length as usize, direction)?;
758        if num_integer::gcd(q_length, n_length) == 1 && q_length < 33 && n_length <= 33 {
759            T::good_thomas(p_fft, q_fft)
760        } else {
761            T::mixed_radix(p_fft, q_fft)
762        }
763    }
764
765    fn make_prime<T: FftSample>(
766        n: usize,
767        direction: FftDirection,
768    ) -> Result<Arc<dyn FftExecutor<T> + Send + Sync>, ZaftError>
769    where
770        f64: AsPrimitive<T>,
771    {
772        if let Some(cached) = T::has_cached_prime(n, direction) {
773            return Ok(cached);
774        }
775        let convolve_prime = PrimeFactors::from_number(n as u64 - 1);
776        if n <= 6000 {
777            let bluesteins = [
778                ALWAYS_BLUESTEIN_1000.as_slice(),
779                ALWAYS_BLUESTEIN_2000.as_slice(),
780                ALWAYS_BLUESTEIN_3000.as_slice(),
781                ALWAYS_BLUESTEIN_4000.as_slice(),
782                ALWAYS_BLUESTEIN_5000.as_slice(),
783                ALWAYS_BLUESTEIN_6000.as_slice(),
784            ];
785            let subset = bluesteins[n / 1000];
786            if subset.contains(&n) {
787                return Zaft::make_bluestein(n, direction);
788            }
789            return Zaft::make_raders(n, direction);
790        }
791        // n-1 may result in Cunningham chain, and we want to avoid compute multiple prime numbers FFT at once
792        let big_factor = convolve_prime.factorization.iter().any(|x| x.0 > 31);
793        let new_prime = if !big_factor {
794            Zaft::make_raders(n, direction)
795        } else {
796            Zaft::make_bluestein(n, direction)
797        };
798        let fft_executor = new_prime?;
799        T::put_prime_to_cache(direction, fft_executor.clone());
800        Ok(fft_executor)
801    }
802
803    fn make_raders<T: FftSample>(
804        n: usize,
805        direction: FftDirection,
806    ) -> Result<Arc<dyn FftExecutor<T> + Send + Sync>, ZaftError>
807    where
808        f64: AsPrimitive<T>,
809    {
810        let convolve_fft = Zaft::strategy(n - 1, direction);
811        T::raders(convolve_fft?, n, direction)
812    }
813
814    fn make_bluestein<T: FftSample>(
815        n: usize,
816        direction: FftDirection,
817    ) -> Result<Arc<dyn FftExecutor<T> + Send + Sync>, ZaftError>
818    where
819        f64: AsPrimitive<T>,
820    {
821        // we want to use bluestein's algorithm. we have a free choice of which inner FFT length to use
822        // the only restriction is that it has to be (2 * len - 1) or larger. So we want the fastest FFT we can compute at or above that size.
823
824        // the most obvious choice is the next-highest power of two, but there's one trick we can pull to get a smaller fft that we can be 100% certain will be faster
825        let min_inner_len = 2 * n - 1;
826        let inner_len_pow2 = min_inner_len.checked_next_power_of_two().unwrap();
827        let inner_len_factor3 = inner_len_pow2 / 4 * 3;
828
829        let inner_len = if inner_len_factor3 >= min_inner_len {
830            inner_len_factor3
831        } else {
832            inner_len_pow2
833        };
834        let convolve_fft = Zaft::strategy(inner_len, direction)?;
835        T::bluestein(convolve_fft, n, direction)
836    }
837
838    fn plan_butterfly<T: FftSample>(
839        n: usize,
840        fft_direction: FftDirection,
841    ) -> Option<Result<Arc<dyn FftExecutor<T> + Send + Sync>, ZaftError>> {
842        match n {
843            1 => return Some(T::butterfly1(fft_direction)),
844            2 => return Some(T::butterfly2(fft_direction)),
845            3 => return Some(T::butterfly3(fft_direction)),
846            4 => return Some(T::butterfly4(fft_direction)),
847            5 => return Some(T::butterfly5(fft_direction)),
848            6 => return Some(T::butterfly6(fft_direction)),
849            7 => return Some(T::butterfly7(fft_direction)),
850            8 => return Some(T::butterfly8(fft_direction)),
851            9 => return Some(T::butterfly9(fft_direction)),
852            10 => return Some(T::butterfly10(fft_direction)),
853            11 => return Some(T::butterfly11(fft_direction)),
854            12 => return Some(T::butterfly12(fft_direction)),
855            13 => return Some(T::butterfly13(fft_direction)),
856            14 => return Some(T::butterfly14(fft_direction)),
857            15 => return Some(T::butterfly15(fft_direction)),
858            16 => return Some(T::butterfly16(fft_direction)),
859            17 => return Some(T::butterfly17(fft_direction)),
860            18 => return Some(T::butterfly18(fft_direction)),
861            19 => return Some(T::butterfly19(fft_direction)),
862            20 => return Some(T::butterfly20(fft_direction)),
863            21 => {
864                return T::butterfly21(fft_direction).map(Ok);
865            }
866            23 => return Some(T::butterfly23(fft_direction)),
867            24 => {
868                return T::butterfly24(fft_direction).map(Ok);
869            }
870            25 => return Some(T::butterfly25(fft_direction)),
871            27 => return Some(T::butterfly27(fft_direction)),
872            28 => return T::butterfly28(fft_direction).map(Ok),
873            29 => return Some(T::butterfly29(fft_direction)),
874            30 => {
875                return T::butterfly30(fft_direction).map(Ok);
876            }
877            31 => return Some(T::butterfly31(fft_direction)),
878            32 => return Some(T::butterfly32(fft_direction)),
879            35 => {
880                return T::butterfly35(fft_direction).map(Ok);
881            }
882            36 => {
883                return T::butterfly36(fft_direction).map(Ok);
884            }
885            37 => return Some(T::butterfly37(fft_direction)),
886            40 => {
887                return T::butterfly40(fft_direction).map(Ok);
888            }
889            41 => return Some(T::butterfly41(fft_direction)),
890            42 => {
891                return T::butterfly42(fft_direction).map(Ok);
892            }
893            48 => {
894                return T::butterfly48(fft_direction).map(Ok);
895            }
896            49 => {
897                return T::butterfly49(fft_direction).map(Ok);
898            }
899            54 => {
900                return T::butterfly54(fft_direction).map(Ok);
901            }
902            63 => {
903                return T::butterfly63(fft_direction).map(Ok);
904            }
905            64 => {
906                return T::butterfly64(fft_direction).map(Ok);
907            }
908            66 => {
909                return T::butterfly66(fft_direction).map(Ok);
910            }
911            70 => {
912                return T::butterfly70(fft_direction).map(Ok);
913            }
914            72 => {
915                return T::butterfly72(fft_direction).map(Ok);
916            }
917            78 => {
918                return T::butterfly78(fft_direction).map(Ok);
919            }
920            81 => {
921                return T::butterfly81(fft_direction).map(Ok);
922            }
923            88 => {
924                return T::butterfly88(fft_direction).map(Ok);
925            }
926            96 => {
927                return T::butterfly96(fft_direction).map(Ok);
928            }
929            100 => {
930                return T::butterfly100(fft_direction).map(Ok);
931            }
932            108 => {
933                return T::butterfly108(fft_direction).map(Ok);
934            }
935            121 => {
936                return T::butterfly121(fft_direction).map(Ok);
937            }
938            125 => {
939                return T::butterfly125(fft_direction).map(Ok);
940            }
941            128 => {
942                return T::butterfly128(fft_direction).map(Ok);
943            }
944            144 => {
945                return T::butterfly144(fft_direction).map(Ok);
946            }
947            169 => {
948                return T::butterfly169(fft_direction).map(Ok);
949            }
950            192 => {
951                return T::butterfly192(fft_direction).map(Ok);
952            }
953            216 => {
954                return T::butterfly216(fft_direction).map(Ok);
955            }
956            243 => {
957                return T::butterfly243(fft_direction).map(Ok);
958            }
959            256 => {
960                return T::butterfly256(fft_direction).map(Ok);
961            }
962            512 => {
963                return T::butterfly512(fft_direction).map(Ok);
964            }
965            1024 => {
966                return T::butterfly1024(fft_direction).map(Ok);
967            }
968            _ => {}
969        }
970        None
971    }
972
973    pub(crate) fn strategy<T: FftSample>(
974        n: usize,
975        fft_direction: FftDirection,
976    ) -> Result<Arc<dyn FftExecutor<T> + Send + Sync>, ZaftError>
977    where
978        f64: AsPrimitive<T>,
979    {
980        if n == 0 {
981            return Err(ZaftError::ZeroSizedFft);
982        }
983        if n <= 512 || n == 1024 {
984            if let Some(bf) = Zaft::plan_butterfly(n, fft_direction) {
985                return bf;
986            }
987        }
988        let prime_factors = PrimeFactors::from_number(n as u64);
989        if prime_factors.is_power_of_three {
990            // Use Radix-3 if divisible by 3
991            T::radix3(n, fft_direction)
992        } else if prime_factors.is_power_of_five {
993            // Use Radix-5 if power of 5
994            T::radix5(n, fft_direction)
995        } else if prime_factors.is_power_of_two {
996            // Use Radix-4 if a power of 2
997            if Zaft::could_do_split_mixed_radix() {
998                if n == 2048 {
999                    if let Some(bf) =
1000                        T::mixed_radix_butterfly8(Zaft::strategy(n / 8, fft_direction)?)?
1001                    {
1002                        return Ok(bf);
1003                    }
1004                }
1005                let rem3 = prime_factors.factor_of_2() % 3;
1006                if rem3 == 2 {
1007                    if let Some(bf) =
1008                        T::mixed_radix_butterfly4(Zaft::strategy(n / 4, fft_direction)?)?
1009                    {
1010                        return Ok(bf);
1011                    }
1012                } else if rem3 == 1 {
1013                    let has1024 = T::butterfly1024(fft_direction).is_some();
1014                    if has1024 {
1015                        if let Some(bf) =
1016                            T::mixed_radix_butterfly8(Zaft::strategy(n / 8, fft_direction)?)?
1017                        {
1018                            return Ok(bf);
1019                        }
1020                    }
1021                    if let Some(bf) =
1022                        T::mixed_radix_butterfly2(Zaft::strategy(n / 2, fft_direction)?)?
1023                    {
1024                        return Ok(bf);
1025                    }
1026                }
1027                if let Some(bf) = T::mixed_radix_butterfly8(Zaft::strategy(n / 8, fft_direction)?)?
1028                {
1029                    return Ok(bf);
1030                }
1031            }
1032            T::radix4(n, fft_direction)
1033        } else if prime_factors.is_power_of_six {
1034            T::radix6(n, fft_direction)
1035        } else if prime_factors.is_power_of_seven {
1036            T::radix7(n, fft_direction)
1037        } else if prime_factors.is_power_of_ten {
1038            T::radix10(n, fft_direction)
1039        } else if prime_factors.is_power_of_eleven {
1040            T::radix11(n, fft_direction)
1041        } else if prime_factors.is_power_of_thirteen {
1042            #[cfg(all(target_arch = "x86_64", feature = "avx"))]
1043            {
1044                if Zaft::could_do_split_mixed_radix() {
1045                    let r = n / 13;
1046                    if r == 13 {
1047                        let right_fft = T::butterfly13(fft_direction)?;
1048                        if let Ok(Some(v)) = T::mixed_radix_butterfly13(right_fft) {
1049                            return Ok(v);
1050                        }
1051                    }
1052                    let right_fft = T::radix13(r, fft_direction)?;
1053                    if let Ok(Some(v)) = T::mixed_radix_butterfly13(right_fft) {
1054                        return Ok(v);
1055                    }
1056                }
1057            }
1058            T::radix13(n, fft_direction)
1059        } else if prime_factors.may_be_represented_in_mixed_radix() {
1060            Zaft::make_mixed_radix(fft_direction, prime_factors)
1061        } else if prime_factors.is_prime() {
1062            Zaft::make_prime(n, fft_direction)
1063        } else {
1064            T::dft(n, fft_direction)
1065        }
1066    }
1067
1068    /// Creates a Real-to-Complex (R2C) FFT plan executor for single-precision floating-point numbers (`f32`).
1069    ///
1070    /// This plan transforms a real-valued input array of length `n` into a complex
1071    /// output array of length `n/2 + 1` (or `n` if odd, with a special handling for the last complex element).
1072    ///
1073    /// # Parameters
1074    /// * `n`: The **length** of the real-valued input vector.
1075    ///
1076    /// # Returns
1077    /// A `Result` containing an `Arc` to a dynamically dispatched `R2CFftExecutor<f32>` plan,
1078    /// or a `ZaftError` if the plan cannot be generated.
1079    pub fn make_r2c_fft_f32(
1080        n: usize,
1081    ) -> Result<Arc<dyn R2CFftExecutor<f32> + Send + Sync>, ZaftError> {
1082        strategy_r2c(n)
1083    }
1084
1085    /// Creates a Complex-to-Real (C2R) Inverse FFT plan executor for single-precision floating-point numbers (`f32`).
1086    ///
1087    /// This plan transforms a complex input array (the result of an R2C FFT)
1088    /// back into a real-valued output array of length `n`.
1089    ///
1090    /// # Parameters
1091    /// * `n`: The **length** of the final real-valued output vector.
1092    ///
1093    /// # Returns
1094    /// A `Result` containing an `Arc` to a dynamically dispatched `C2RFftExecutor<f32>` plan,
1095    /// or a `ZaftError` if the plan cannot be generated.
1096    pub fn make_c2r_fft_f32(
1097        n: usize,
1098    ) -> Result<Arc<dyn C2RFftExecutor<f32> + Send + Sync>, ZaftError> {
1099        strategy_c2r(n)
1100    }
1101
1102    /// Creates a standard Complex-to-Complex Forward FFT plan executor for single-precision floating-point numbers (`f32`).
1103    ///
1104    /// This is used for a standard Discrete Fourier Transform (DFT) where both input and output are complex.
1105    ///
1106    /// # Parameters
1107    /// * `n`: The **length** of the input/output complex vectors.
1108    ///
1109    /// # Returns
1110    /// A `Result` containing an `Arc` to a dynamically dispatched `FftExecutor<f32>` plan,
1111    /// or a `ZaftError` if the plan cannot be generated.
1112    pub fn make_forward_fft_f32(
1113        n: usize,
1114    ) -> Result<Arc<dyn FftExecutor<f32> + Send + Sync>, ZaftError> {
1115        Zaft::strategy(n, FftDirection::Forward)
1116    }
1117
1118    /// Creates a standard Complex-to-Complex Forward FFT plan executor for double-precision floating-point numbers (`f64`).
1119    ///
1120    /// # Parameters
1121    /// * `n`: The **length** of the input/output complex vectors.
1122    ///
1123    /// # Returns
1124    /// A `Result` containing an `Arc` to a dynamically dispatched `FftExecutor<f64>` plan,
1125    /// or a `ZaftError` if the plan cannot be generated.
1126    pub fn make_forward_fft_f64(
1127        n: usize,
1128    ) -> Result<Arc<dyn FftExecutor<f64> + Send + Sync>, ZaftError> {
1129        Zaft::strategy(n, FftDirection::Forward)
1130    }
1131
1132    /// Creates a Complex-to-Real (C2R) Inverse FFT plan executor for double-precision floating-point numbers (`f64`).
1133    ///
1134    /// This is the double-precision version of `make_c2r_fft_f32`.
1135    ///
1136    /// # Parameters
1137    /// * `n`: The **length** of the final real-valued output vector.
1138    ///
1139    /// # Returns
1140    /// A `Result` containing an `Arc` to a dynamically dispatched `C2RFftExecutor<f64>` plan,
1141    /// or a `ZaftError` if the plan cannot be generated.
1142    pub fn make_c2r_fft_f64(
1143        n: usize,
1144    ) -> Result<Arc<dyn C2RFftExecutor<f64> + Send + Sync>, ZaftError> {
1145        strategy_c2r(n)
1146    }
1147
1148    /// Creates a Real-to-Complex (R2C) FFT plan executor for double-precision floating-point numbers (`f64`).
1149    ///
1150    /// This is the double-precision version of `make_r2c_fft_f32`.
1151    ///
1152    /// # Parameters
1153    /// * `n`: The **length** of the real-valued input vector.
1154    ///
1155    /// # Returns
1156    /// A `Result` containing an `Arc` to a dynamically dispatched `R2CFftExecutor<f64>` plan,
1157    /// or a `ZaftError` if the plan cannot be generated.
1158    pub fn make_r2c_fft_f64(
1159        n: usize,
1160    ) -> Result<Arc<dyn R2CFftExecutor<f64> + Send + Sync>, ZaftError> {
1161        strategy_r2c(n)
1162    }
1163
1164    /// Creates a standard Complex-to-Complex Inverse FFT plan executor for single-precision floating-point numbers (`f32`).
1165    ///
1166    /// This is the inverse transformation for the standard DFT, used to convert frequency-domain data
1167    /// back into the time domain.
1168    ///
1169    /// # Parameters
1170    /// * `n`: The **length** of the input/output complex vectors.
1171    ///
1172    /// # Returns
1173    /// A `Result` containing an `Arc` to a dynamically dispatched `FftExecutor<f32>` plan,
1174    /// or a `ZaftError` if the plan cannot be generated.
1175    pub fn make_inverse_fft_f32(
1176        n: usize,
1177    ) -> Result<Arc<dyn FftExecutor<f32> + Send + Sync>, ZaftError> {
1178        Zaft::strategy(n, FftDirection::Inverse)
1179    }
1180
1181    /// Creates a standard Complex-to-Complex Inverse FFT plan executor for double-precision floating-point numbers (`f64`).
1182    ///
1183    /// # Parameters
1184    /// * `n`: The **length** of the input/output complex vectors.
1185    ///
1186    /// # Returns
1187    /// A `Result` containing an `Arc` to a dynamically dispatched `FftExecutor<f64>` plan,
1188    /// or a `ZaftError` if the plan cannot be generated.
1189    pub fn make_inverse_fft_f64(
1190        n: usize,
1191    ) -> Result<Arc<dyn FftExecutor<f64> + Send + Sync>, ZaftError> {
1192        Zaft::strategy(n, FftDirection::Inverse)
1193    }
1194
1195    /// Creates a high-performance, two-dimensional Real-to-Complex (R2C) FFT plan executor for single-precision floating-point numbers (`f32`).
1196    ///
1197    /// This function constructs a plan for transforming a **real-valued** 2D input array (with dimensions `height x width`)
1198    /// into its frequency-domain representation. The executor is optimized for parallel
1199    /// execution across the specified number of threads.
1200    ///
1201    /// **The R2C 2D FFT is typically performed in two steps:**
1202    /// 1. A 1D R2C FFT is performed across the **rows** (or columns) of the input.
1203    /// 2. A 1D Complex-to-Complex (C2C) FFT is performed across the **columns** (or rows) of the intermediate complex data.
1204    ///
1205    /// # Parameters
1206    /// * `width`: The number of elements in the X-dimension (columns) of the 2D input array.
1207    /// * `height`: The number of elements in the Y-dimension (rows) of the 2D input array.
1208    /// * `thread_count`: The maximum number of threads the executor is allowed to use during execution.
1209    ///
1210    /// # Returns
1211    /// A `Result` containing an `Arc` to a dynamically dispatched `TwoDimensionalExecutorR2C<f32>` plan,
1212    /// or a `ZaftError` if the plan cannot be generated (e.g., due to invalid dimensions).
1213    ///
1214    /// # Note on Output Size
1215    /// The resulting frequency-domain complex data will typically have dimensions of `height x (width/2 + 1)`
1216    /// complex elements, leveraging the Hermitian symmetry property of real-input FFTs.
1217    pub fn make_2d_r2c_fft_f32(
1218        width: usize,
1219        height: usize,
1220        thread_count: usize,
1221    ) -> Result<Arc<dyn TwoDimensionalExecutorR2C<f32> + Send + Sync>, ZaftError> {
1222        if width * height == 0 {
1223            return Err(ZaftError::ZeroSizedFft);
1224        }
1225        let width_fft = Zaft::make_r2c_fft_f32(width)?;
1226        let height_fft = Zaft::make_forward_fft_f32(height)?;
1227        let width_scratch_length = width_fft.complex_scratch_length();
1228        let height_scratch_length = height_fft.scratch_length();
1229        Ok(Arc::new(TwoDimensionalR2C {
1230            width_r2c_executor: width_fft,
1231            height_c2c_executor: height_fft,
1232            thread_count: thread_count.max(1),
1233            width,
1234            height,
1235            transpose_width_to_height: f32::transpose_strategy((width / 2) + 1, height),
1236            width_scratch_length,
1237            height_scratch_length,
1238        }))
1239    }
1240
1241    /// Creates a high-performance, two-dimensional Real-to-Complex (R2C) FFT plan executor for **double-precision floating-point numbers** (`f64`).
1242    ///
1243    /// This function constructs a plan for transforming a **real-valued** 2D input array (with dimensions `height x width`)
1244    /// into its frequency-domain representation. The executor is highly optimized for parallel
1245    /// execution across the specified number of threads.
1246    ///
1247    /// **The R2C 2D FFT typically follows a row-column or column-row decomposition:**
1248    /// 1. A 1D R2C FFT is performed across the elements of one axis.
1249    /// 2. A 1D Complex-to-Complex (C2C) FFT is performed across the elements of the other axis on the intermediate complex data.
1250    ///
1251    /// This process efficiently computes the 2D transform while leveraging the computational savings offered
1252    /// by the real-valued nature of the input data.
1253    ///
1254    ///
1255    /// # Parameters
1256    /// * `width`: The number of elements in the X-dimension (**columns**) of the 2D input array.
1257    /// * `height`: The number of elements in the Y-dimension (**rows**) of the 2D input array.
1258    /// * `thread_count`: The maximum number of threads the executor is allowed to use during the parallel computation.
1259    ///
1260    /// # Returns
1261    /// A `Result` containing an `Arc` to a dynamically dispatched `TwoDimensionalExecutorR2C<f64>` plan,
1262    /// which is safe to use across threads (`Send + Sync`), or a `ZaftError` if the plan cannot be generated.
1263    ///
1264    /// # Note on Output Size
1265    /// Due to the resulting frequency-domain complex data will only store approximately half
1266    /// the data required for a full C2C transform. Specifically, the output complex array will typically have dimensions
1267    /// of `height x (width/2 + 1)` complex elements.
1268    pub fn make_2d_r2c_fft_f64(
1269        width: usize,
1270        height: usize,
1271        thread_count: usize,
1272    ) -> Result<Arc<dyn TwoDimensionalExecutorR2C<f64> + Send + Sync>, ZaftError> {
1273        if width * height == 0 {
1274            return Err(ZaftError::ZeroSizedFft);
1275        }
1276        let width_fft = Zaft::make_r2c_fft_f64(width)?;
1277        let height_fft = Zaft::make_forward_fft_f64(height)?;
1278        let width_scratch_length = width_fft.complex_scratch_length();
1279        let height_scratch_length = height_fft.scratch_length();
1280        Ok(Arc::new(TwoDimensionalR2C {
1281            width_r2c_executor: width_fft,
1282            height_c2c_executor: height_fft,
1283            thread_count: thread_count.max(1),
1284            width,
1285            height,
1286            transpose_width_to_height: f64::transpose_strategy((width / 2) + 1, height),
1287            width_scratch_length,
1288            height_scratch_length,
1289        }))
1290    }
1291
1292    /// Creates a 2D complex-to-complex FFT executor for f32 inputs.
1293    ///
1294    /// This function constructs a two-dimensional FFT executor that can perform
1295    /// forward or inverse FFTs on a width × height grid of complex f32 values.
1296    /// The executor internally manages separate FFTs along the width and height dimensions,
1297    /// and uses a transpose strategy for efficient computation.
1298    ///
1299    /// # Parameters
1300    ///
1301    /// * width - The number of columns in the input data grid.
1302    /// * height - The number of rows in the input data grid.
1303    /// * fft_direction - The direction of the FFT (Forward or Inverse).
1304    /// * thread_count - Number of threads to use for parallel computation (minimum 1).
1305    ///
1306    /// # Returns
1307    ///
1308    /// Returns a Result containing an Arc to a type implementing
1309    /// [TwoDimensionalFftExecutor<f32>], or a [ZaftError] if FFT creation fails.
1310    ///
1311    /// # Notes
1312    ///
1313    /// The internal width and height of the FFT executors are swapped for inverse FFTs
1314    /// to correctly handle the transformation.
1315    pub fn make_2d_c2c_fft_f32(
1316        width: usize,
1317        height: usize,
1318        fft_direction: FftDirection,
1319        thread_count: usize,
1320    ) -> Result<Arc<dyn TwoDimensionalFftExecutor<f32> + Send + Sync>, ZaftError> {
1321        if width * height == 0 {
1322            return Err(ZaftError::ZeroSizedFft);
1323        }
1324        let fft_width = match fft_direction {
1325            FftDirection::Forward => width,
1326            FftDirection::Inverse => height,
1327        };
1328        let fft_height = match fft_direction {
1329            FftDirection::Forward => height,
1330            FftDirection::Inverse => width,
1331        };
1332        let width_fft = match fft_direction {
1333            FftDirection::Forward => Zaft::make_forward_fft_f32(fft_width)?,
1334            FftDirection::Inverse => Zaft::make_inverse_fft_f32(fft_width)?,
1335        };
1336        let height_fft = match fft_direction {
1337            FftDirection::Forward => Zaft::make_forward_fft_f32(fft_height)?,
1338            FftDirection::Inverse => Zaft::make_inverse_fft_f32(fft_height)?,
1339        };
1340        let oof_scratch_width = width_fft.out_of_place_scratch_length();
1341        let inplace_scratch_height = height_fft.scratch_length();
1342        Ok(Arc::new(TwoDimensionalC2C {
1343            width_c2c_executor: width_fft,
1344            height_c2c_executor: height_fft,
1345            thread_count: thread_count.max(1),
1346            width: fft_width,
1347            height: fft_height,
1348            transpose_width_to_height: f32::transpose_strategy(fft_width, fft_height),
1349            oof_width_scratch_size: oof_scratch_width,
1350            height_scratch_size: inplace_scratch_height,
1351        }))
1352    }
1353
1354    /// Creates a 2D complex-to-complex FFT executor for f64 inputs.
1355    ///
1356    /// This function constructs a two-dimensional FFT executor that can perform
1357    /// forward or inverse FFTs on a width × height grid of complex f64 values.
1358    /// The executor internally manages separate FFTs along the width and height dimensions,
1359    /// and uses a transpose strategy for efficient computation.
1360    ///
1361    /// # Parameters
1362    ///
1363    /// * width - The number of columns in the input data grid.
1364    /// * height - The number of rows in the input data grid.
1365    /// * fft_direction - The direction of the FFT (Forward or Inverse).
1366    /// * thread_count - Number of threads to use for parallel computation (minimum 1).
1367    ///
1368    /// # Returns
1369    ///
1370    /// Returns a Result containing an Arc to a type implementing
1371    /// [TwoDimensionalFftExecutor<f64>], or a [ZaftError] if FFT creation fails.
1372    ///
1373    /// # Notes
1374    ///
1375    /// The internal width and height of the FFT executors are swapped for inverse FFTs
1376    /// to correctly handle the transformation.
1377    pub fn make_2d_c2c_fft_f64(
1378        width: usize,
1379        height: usize,
1380        fft_direction: FftDirection,
1381        thread_count: usize,
1382    ) -> Result<Arc<dyn TwoDimensionalFftExecutor<f64> + Send + Sync>, ZaftError> {
1383        if width * height == 0 {
1384            return Err(ZaftError::ZeroSizedFft);
1385        }
1386        let fft_width = match fft_direction {
1387            FftDirection::Forward => width,
1388            FftDirection::Inverse => height,
1389        };
1390        let fft_height = match fft_direction {
1391            FftDirection::Forward => height,
1392            FftDirection::Inverse => width,
1393        };
1394        let width_fft = match fft_direction {
1395            FftDirection::Forward => Zaft::make_forward_fft_f64(fft_width)?,
1396            FftDirection::Inverse => Zaft::make_inverse_fft_f64(fft_width)?,
1397        };
1398        let height_fft = match fft_direction {
1399            FftDirection::Forward => Zaft::make_forward_fft_f64(fft_height)?,
1400            FftDirection::Inverse => Zaft::make_inverse_fft_f64(fft_height)?,
1401        };
1402        let oof_scratch_width = width_fft.out_of_place_scratch_length();
1403        let inplace_scratch_height = height_fft.scratch_length();
1404        Ok(Arc::new(TwoDimensionalC2C {
1405            width_c2c_executor: width_fft,
1406            height_c2c_executor: height_fft,
1407            thread_count: thread_count.max(1),
1408            width: fft_width,
1409            height: fft_height,
1410            transpose_width_to_height: f64::transpose_strategy(fft_width, fft_height),
1411            oof_width_scratch_size: oof_scratch_width,
1412            height_scratch_size: inplace_scratch_height,
1413        }))
1414    }
1415
1416    /// Creates two-dimensional Complex-to-Real (C2R) Inverse FFT plan executor
1417    /// for **single-precision floating-point numbers** (`f32`).
1418    ///
1419    /// This plan transforms a **Hermitian symmetric complex** frequency-domain input array (the output of an R2C FFT)
1420    /// back into a **real-valued** time-domain output array of size `height x width`. The executor is configured
1421    /// for parallel execution across the specified number of threads.
1422    ///
1423    /// # Parameters
1424    /// * `width`: The number of elements in the X-dimension (**columns**) of the final real output.
1425    /// * `height`: The number of elements in the Y-dimension (**rows**) of the final real output.
1426    /// * `thread_count`: The maximum number of threads the executor is allowed to use during the parallel computation.
1427    ///
1428    /// # Returns
1429    /// A `Result` containing an `Arc` to a dynamically dispatched `TwoDimensionalExecutorC2R<f32>` plan,
1430    /// or a `ZaftError` if the plan cannot be generated (e.g., `ZeroSizedFft`).
1431    ///
1432    /// # Errors
1433    /// Returns `ZaftError::ZeroSizedFft` if `width * height` is zero.
1434    pub fn make_2d_c2r_fft_f32(
1435        width: usize,
1436        height: usize,
1437        thread_count: usize,
1438    ) -> Result<Arc<dyn TwoDimensionalExecutorC2R<f32> + Send + Sync>, ZaftError> {
1439        if width * height == 0 {
1440            return Err(ZaftError::ZeroSizedFft);
1441        }
1442        let width_fft = Zaft::make_c2r_fft_f32(width)?;
1443        let height_fft = Zaft::make_inverse_fft_f32(height)?;
1444        let width_scratch_length = width_fft.complex_scratch_length();
1445        let height_scratch_length = height_fft.scratch_length();
1446        Ok(Arc::new(TwoDimensionalC2R {
1447            width_c2r_executor: width_fft,
1448            height_c2c_executor: height_fft,
1449            thread_count: thread_count.max(1),
1450            width,
1451            height,
1452            transpose_height_to_width: f32::transpose_strategy(height, (width / 2) + 1),
1453            width_scratch_length,
1454            height_scratch_length,
1455        }))
1456    }
1457
1458    /// Creates two-dimensional Complex-to-Real (C2R) Inverse FFT plan executor
1459    /// for **double-precision floating-point numbers** (`f64`).
1460    ///
1461    /// This is the double-precision equivalent of `make_2d_c2r_fft_f32`. It transforms the complex,
1462    /// frequency-domain input back into a real-valued array of size `height x width`.
1463    ///
1464    /// # Parameters
1465    /// * `width`: The number of elements in the X-dimension (**columns**) of the final real output.
1466    /// * `height`: The number of elements in the Y-dimension (**rows**) of the final real output.
1467    /// * `thread_count`: The maximum number of threads the executor is allowed to use during the parallel computation.
1468    ///
1469    /// # Returns
1470    /// A `Result` containing an `Arc` to a dynamically dispatched `TwoDimensionalExecutorC2R<f64>` plan,
1471    /// or a `ZaftError` if the plan cannot be generated.
1472    ///
1473    /// # Errors
1474    /// Returns `ZaftError::ZeroSizedFft` if `width * height` is zero.
1475    pub fn make_2d_c2r_fft_f64(
1476        width: usize,
1477        height: usize,
1478        thread_count: usize,
1479    ) -> Result<Arc<dyn TwoDimensionalExecutorC2R<f64> + Send + Sync>, ZaftError> {
1480        if width * height == 0 {
1481            return Err(ZaftError::ZeroSizedFft);
1482        }
1483        let width_fft = Zaft::make_c2r_fft_f64(width)?;
1484        let height_fft = Zaft::make_inverse_fft_f64(height)?;
1485        let width_scratch_length = width_fft.complex_scratch_length();
1486        let height_scratch_length = height_fft.scratch_length();
1487        Ok(Arc::new(TwoDimensionalC2R {
1488            width_c2r_executor: width_fft,
1489            height_c2c_executor: height_fft,
1490            thread_count: thread_count.max(1),
1491            width,
1492            height,
1493            transpose_height_to_width: f64::transpose_strategy(height, (width / 2) + 1),
1494            width_scratch_length,
1495            height_scratch_length,
1496        }))
1497    }
1498}
1499
1500/// Specifies the direction of a Fast Fourier Transform (FFT) operation.
1501///
1502/// This enum is used to configure FFT executors, indicating whether the transform should
1503/// map from the time domain to the frequency domain, or vice versa.
1504#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
1505pub enum FftDirection {
1506    /// Represents the **Forward** transform, which typically maps data from the **time or spatial domain**
1507    /// into the **frequency domain**.
1508    Forward,
1509    /// Represents the **Inverse** transform, which maps data back from the **frequency domain**
1510    /// into the **time or spatial domain**.
1511    Inverse,
1512}
1513
1514impl FftDirection {
1515    pub fn inverse(self) -> FftDirection {
1516        match self {
1517            FftDirection::Forward => FftDirection::Inverse,
1518            FftDirection::Inverse => FftDirection::Forward,
1519        }
1520    }
1521}
1522
1523impl Display for FftDirection {
1524    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1525        match self {
1526            FftDirection::Forward => f.write_str("FftDirection::Forward"),
1527            FftDirection::Inverse => f.write_str("FftDirection::Inverse"),
1528        }
1529    }
1530}
1531
1532#[cfg(test)]
1533mod tests {
1534    use crate::Zaft;
1535    use num_complex::Complex;
1536    use num_traits::Zero;
1537
1538    #[test]
1539    fn power_of_four() {
1540        fn is_power_of_four(n: u64) -> bool {
1541            n != 0 && (n & (n - 1)) == 0 && (n & 0x5555_5555_5555_5555) != 0
1542        }
1543        assert_eq!(is_power_of_four(4), true);
1544        assert_eq!(is_power_of_four(8), false);
1545        assert_eq!(is_power_of_four(16), true);
1546        assert_eq!(is_power_of_four(20), false);
1547    }
1548
1549    #[test]
1550    fn test_everything_f32() {
1551        for i in 1..2010 {
1552            let mut data = vec![Complex::new(0.0019528865, 0.); i];
1553            for (i, chunk) in data.iter_mut().enumerate() {
1554                *chunk = Complex::new(
1555                    -0.19528865 + i as f32 * 0.001,
1556                    0.0019528865 - i as f32 * 0.001,
1557                );
1558            }
1559            let zaft_exec = Zaft::make_forward_fft_f32(data.len()).expect("Failed to make FFT!");
1560            let zaft_inverse = Zaft::make_inverse_fft_f32(data.len()).expect("Failed to make FFT!");
1561            let reference_clone = data.clone();
1562            zaft_exec
1563                .execute(&mut data)
1564                .expect(&format!("Failed to execute forward FFT for size {i}!"));
1565            zaft_inverse
1566                .execute(&mut data)
1567                .expect(&format!("Failed to execute inverse FFT for size {i}!"));
1568            let data_len = 1. / data.len() as f32;
1569            for i in data.iter_mut() {
1570                *i *= data_len;
1571            }
1572            data.iter()
1573                .zip(reference_clone)
1574                .enumerate()
1575                .for_each(|(idx, (a, b))| {
1576                    assert!(
1577                        (a.re - b.re).abs() < 1e-2,
1578                        "a_re {}, b_re {} at {idx}, for size {i}",
1579                        a.re,
1580                        b.re
1581                    );
1582                    assert!(
1583                        (a.im - b.im).abs() < 1e-2,
1584                        "a_re {}, b_re {} at {idx}, for size {i}",
1585                        a.im,
1586                        b.im
1587                    );
1588                });
1589        }
1590    }
1591
1592    #[test]
1593    fn test_everything_oof_f32() {
1594        for i in 1..2010 {
1595            let mut data = vec![Complex::new(0.0019528865, 0.); i];
1596            let mut scratch = data.to_vec();
1597            for (i, chunk) in data.iter_mut().enumerate() {
1598                *chunk = Complex::new(
1599                    -0.19528865 + i as f32 * 0.001,
1600                    0.0019528865 - i as f32 * 0.001,
1601                );
1602            }
1603            let zaft_exec = Zaft::make_forward_fft_f32(data.len()).expect("Failed to make FFT!");
1604            let zaft_inverse = Zaft::make_inverse_fft_f32(data.len()).expect("Failed to make FFT!");
1605            let reference_clone = data.clone();
1606            zaft_exec
1607                .execute_out_of_place(&data, &mut scratch)
1608                .expect(&format!("Failed to execute forward FFT for size {i}!"));
1609            zaft_inverse
1610                .execute_out_of_place(&scratch, &mut data)
1611                .expect(&format!("Failed to execute inverse FFT for size {i}!"));
1612            let data_len = 1. / data.len() as f32;
1613            for i in data.iter_mut() {
1614                *i *= data_len;
1615            }
1616            data.iter()
1617                .zip(reference_clone)
1618                .enumerate()
1619                .for_each(|(idx, (a, b))| {
1620                    assert!(
1621                        (a.re - b.re).abs() < 1e-2,
1622                        "a_re {}, b_re {} at {idx}, for size {i}",
1623                        a.re,
1624                        b.re
1625                    );
1626                    assert!(
1627                        (a.im - b.im).abs() < 1e-2,
1628                        "a_re {}, b_re {} at {idx}, for size {i}",
1629                        a.im,
1630                        b.im
1631                    );
1632                });
1633        }
1634    }
1635
1636    #[test]
1637    fn test_everything_f64() {
1638        for i in 1..1900 {
1639            let mut data = vec![Complex::new(0.0019528865, 0.); i];
1640            for (i, chunk) in data.iter_mut().enumerate() {
1641                *chunk = Complex::new(
1642                    -0.19528865 + i as f64 * 0.001,
1643                    0.0019528865 - i as f64 * 0.001,
1644                );
1645            }
1646            let zaft_exec = Zaft::make_forward_fft_f64(data.len()).expect("Failed to make FFT!");
1647            let zaft_inverse = Zaft::make_inverse_fft_f64(data.len()).expect("Failed to make FFT!");
1648            let rust_fft_clone = data.clone();
1649            zaft_exec
1650                .execute(&mut data)
1651                .expect(&format!("Failed to execute forward FFT for size {i}!"));
1652            zaft_inverse
1653                .execute(&mut data)
1654                .expect(&format!("Failed to execute inverse FFT for size {i}!"));
1655            let data_len = 1. / data.len() as f64;
1656            for i in data.iter_mut() {
1657                *i *= data_len;
1658            }
1659            data.iter()
1660                .zip(rust_fft_clone)
1661                .enumerate()
1662                .for_each(|(idx, (a, b))| {
1663                    assert!(
1664                        (a.re - b.re).abs() < 1e-6,
1665                        "a_re {}, b_re {} at {idx}, for size {i}",
1666                        a.re,
1667                        b.re
1668                    );
1669                    assert!(
1670                        (a.im - b.im).abs() < 1e-6,
1671                        "a_im {}, b_im {} at {idx}, for size {i}",
1672                        a.im,
1673                        b.im
1674                    );
1675                });
1676        }
1677    }
1678
1679    #[test]
1680    fn test_everything_oof_f64() {
1681        for i in 1..1900 {
1682            let mut data = vec![Complex::new(0.0019528865, 0.); i];
1683            let mut scratch = data.clone();
1684            for (i, chunk) in data.iter_mut().enumerate() {
1685                *chunk = Complex::new(
1686                    -0.19528865 + i as f64 * 0.001,
1687                    0.0019528865 - i as f64 * 0.001,
1688                );
1689            }
1690            let zaft_exec = Zaft::make_forward_fft_f64(data.len()).expect("Failed to make FFT!");
1691            let zaft_inverse = Zaft::make_inverse_fft_f64(data.len()).expect("Failed to make FFT!");
1692            let rust_fft_clone = data.clone();
1693            zaft_exec
1694                .execute_out_of_place(&data, &mut scratch)
1695                .expect(&format!("Failed to execute forward FFT for size {i}!"));
1696            zaft_inverse
1697                .execute_out_of_place(&scratch, &mut data)
1698                .expect(&format!("Failed to execute inverse FFT for size {i}!"));
1699            let data_len = 1. / data.len() as f64;
1700            for i in data.iter_mut() {
1701                *i *= data_len;
1702            }
1703            data.iter()
1704                .zip(rust_fft_clone)
1705                .enumerate()
1706                .for_each(|(idx, (a, b))| {
1707                    assert!(
1708                        (a.re - b.re).abs() < 1e-6,
1709                        "a_re {}, b_re {} at {idx}, for size {i}",
1710                        a.re,
1711                        b.re
1712                    );
1713                    assert!(
1714                        (a.im - b.im).abs() < 1e-6,
1715                        "a_im {}, b_im {} at {idx}, for size {i}",
1716                        a.im,
1717                        b.im
1718                    );
1719                });
1720        }
1721    }
1722
1723    #[test]
1724    fn test_destructive_everything_f64() {
1725        for i in 1..1900 {
1726            let mut data = vec![Complex::new(0.0019528865, 0.); i];
1727            for (i, chunk) in data.iter_mut().enumerate() {
1728                *chunk = Complex::new(
1729                    -0.19528865 + i as f64 * 0.001,
1730                    0.0019528865 - i as f64 * 0.001,
1731                );
1732            }
1733            let zaft_exec = Zaft::make_forward_fft_f64(data.len()).expect("Failed to make FFT!");
1734            let zaft_inverse = Zaft::make_inverse_fft_f64(data.len()).expect("Failed to make FFT!");
1735            let rust_fft_clone = data.clone();
1736            let mut fwd = vec![Complex::zero(); data.len()];
1737            let mut scratch = vec![Complex::zero(); zaft_exec.destructive_scratch_length()];
1738            zaft_exec
1739                .execute_destructive_with_scratch(&mut data, &mut fwd, &mut scratch)
1740                .expect(&format!("Failed to execute forward FFT for size {i}!"));
1741            zaft_inverse
1742                .execute_destructive_with_scratch(&mut fwd, &mut data, &mut scratch)
1743                .expect(&format!("Failed to execute inverse FFT for size {i}!"));
1744            let data_len = 1. / data.len() as f64;
1745            for i in data.iter_mut() {
1746                *i *= data_len;
1747            }
1748            data.iter()
1749                .zip(rust_fft_clone)
1750                .enumerate()
1751                .for_each(|(idx, (a, b))| {
1752                    assert!(
1753                        (a.re - b.re).abs() < 1e-6,
1754                        "a_re {}, b_re {} at {idx}, for size {i}",
1755                        a.re,
1756                        b.re
1757                    );
1758                    assert!(
1759                        (a.im - b.im).abs() < 1e-6,
1760                        "a_im {}, b_im {} at {idx}, for size {i}",
1761                        a.im,
1762                        b.im
1763                    );
1764                });
1765        }
1766    }
1767
1768    #[test]
1769    fn test_destructive_everything_oof_f32() {
1770        for i in 1..2010 {
1771            let mut data = vec![Complex::new(0.0019528865, 0.); i];
1772            for (i, chunk) in data.iter_mut().enumerate() {
1773                *chunk = Complex::new(
1774                    -0.19528865 + i as f32 * 0.001,
1775                    0.0019528865 - i as f32 * 0.001,
1776                );
1777            }
1778            let zaft_exec = Zaft::make_forward_fft_f32(data.len()).expect("Failed to make FFT!");
1779            let zaft_inverse = Zaft::make_inverse_fft_f32(data.len()).expect("Failed to make FFT!");
1780            let reference_clone = data.clone();
1781            let mut scratch = vec![Complex::zero(); zaft_exec.destructive_scratch_length()];
1782            let mut target = vec![Complex::zero(); data.len()];
1783            zaft_exec
1784                .execute_destructive_with_scratch(&mut data, &mut target, &mut scratch)
1785                .expect(&format!("Failed to execute forward FFT for size {i}!"));
1786            zaft_inverse
1787                .execute_destructive_with_scratch(&mut target, &mut data, &mut scratch)
1788                .expect(&format!("Failed to execute inverse FFT for size {i}!"));
1789            let data_len = 1. / data.len() as f32;
1790            for i in data.iter_mut() {
1791                *i *= data_len;
1792            }
1793            data.iter()
1794                .zip(reference_clone)
1795                .enumerate()
1796                .for_each(|(idx, (a, b))| {
1797                    assert!(
1798                        (a.re - b.re).abs() < 1e-2,
1799                        "a_re {}, b_re {} at {idx}, for size {i}",
1800                        a.re,
1801                        b.re
1802                    );
1803                    assert!(
1804                        (a.im - b.im).abs() < 1e-2,
1805                        "a_re {}, b_re {} at {idx}, for size {i}",
1806                        a.im,
1807                        b.im
1808                    );
1809                });
1810        }
1811    }
1812}