simd_kernels/kernels/scientific/
fft.rs

1// Copyright Peter Bower 2025. All Rights Reserved.
2// Licensed under Mozilla Public License (MPL) 2.0.
3
4//! # **Fast Fourier Transform Module** - *High-Performance Frequency Domain Analysis*
5//!
6//! This module implements optimised Fast Fourier Transform (FFT) algorithms for efficient
7//! frequency domain analysis and signal processing applications. It provides both small-scale
8//! radix-optimised transforms and large-scale blocked implementations for scientific
9//! computing and digital signal processing workflows.
10//!
11//! ## Use cases
12//!
13//! The Fast Fourier Transform is fundamental to numerous computational domains:
14//! - **Digital Signal Processing**: Spectral analysis, filtering, and convolution
15//! - **Image Processing**: Frequency domain transformations and enhancement
16//! - **Scientific Computing**: Numerical solution of PDEs via spectral methods
17//! - **Audio Processing**: Frequency analysis and synthesis
18//! - **Telecommunications**: Modulation, demodulation, and channel analysis
19//! - **Machine Learning**: Feature extraction and data preprocessing
20
21use crate::errors::KernelError;
22use minarrow::{FloatArray, Vec64};
23use num_complex::Complex64;
24
25#[inline(always)]
26pub fn butterfly_radix8(buf: &mut [Complex64]) {
27    debug_assert_eq!(buf.len(), 8);
28
29    // Split into even/odd halves and reuse temporaries to minimise loads
30    let (x0, x1, x2, x3, x4, x5, x6, x7) = (
31        buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
32    );
33
34    // First layer (radix-2 butterflies)
35    let a04 = x0 + x4;
36    let s04 = x0 - x4;
37    let a26 = x2 + x6;
38    let s26 = x2 - x6;
39    let a15 = x1 + x5;
40    let s15 = x1 - x5;
41    let a37 = x3 + x7;
42    let s37 = x3 - x7;
43
44    // Second layer (radix-4)
45    let a04a26 = a04 + a26;
46    let a04s26 = a04 - a26;
47    let a15a37 = a15 + a37;
48    let a15s37 = a15 - a37;
49
50    // Calculate ±i·(something) once
51    const J: Complex64 = Complex64 { re: 0.0, im: 1.0 };
52
53    // Radix-8 output
54    buf[0] = a04a26 + a15a37;
55    buf[4] = a04a26 - a15a37;
56
57    let t0 = s04 + J * s26;
58    let t1 = s15 + J * s37;
59    buf[2] = t0 + Complex64::new(0.0, -1.0) * t1; //  e^{-jπ/2}
60    buf[6] = t0 + Complex64::new(0.0, 1.0) * t1; //  e^{ jπ/2}
61
62    let u0 = a04s26;
63    let u1 = Complex64::new(0.0, -1.0) * a15s37;
64    buf[1] = u0 + u1; //  e^{-jπ/4} merged factor
65    buf[5] = u0 - u1; //  e^{ 3jπ/4}
66
67    let v0 = s04 - J * s26;
68    let v1 = s15 - J * s37;
69    buf[3] = v0 - Complex64::new(0.0, 1.0) * v1; //  e^{ jπ/2}
70    buf[7] = v0 - Complex64::new(0.0, -1.0) * v1; //  e^{-jπ/2}
71}
72
73// In-place radix-4 DIT for 4 points.
74#[inline(always)]
75fn fft4_in_place(x: &mut [Complex64; 4]) {
76    let x0 = x[0]; let x1 = x[1]; let x2 = x[2]; let x3 = x[3];
77
78    let a = x0 + x2;          // (0)+(2)
79    let b = x0 - x2;          // (0)-(2)
80    let c = x1 + x3;          // (1)+(3)
81    let d = (x1 - x3) * Complex64::new(0.0, -1.0); // (1)-(3) times -j
82
83    x[0] = a + c;             // k=0
84    x[2] = a - c;             // k=2
85    x[1] = b + d;             // k=1
86    x[3] = b - d;             // k=3
87}
88
89/// 8-point FFT
90/// 8-point FFT (radix-2/4 DIT): split evens/odds -> FFT4 each -> twiddle & combine.
91#[inline(always)]
92pub fn fft8_radix(
93    buf: &mut [Complex64; 8],
94) -> Result<(FloatArray<f64>, FloatArray<f64>), KernelError> {
95    // Split into evens and odds
96    let mut even = [buf[0], buf[2], buf[4], buf[6]];
97    let mut odd  = [buf[1], buf[3], buf[5], buf[7]];
98
99    // 4-point FFTs
100    fft4_in_place(&mut even);
101    fft4_in_place(&mut odd);
102
103    // Twiddles W8^k = exp(-j*2π*k/8)
104    // W8^0 = 1 + 0j
105    // W8^1 =  √2/2 - j√2/2
106    // W8^2 =  0   - j
107    // W8^3 = -√2/2 - j√2/2
108    let s = std::f64::consts::FRAC_1_SQRT_2;
109    let w1 = Complex64::new( s, -s);
110    let w2 = Complex64::new( 0.0, -1.0);
111    let w3 = Complex64::new(-s, -s);
112
113    let t0 = odd[0];         // W8^0 * odd[0]
114    let t1 = w1 * odd[1];    // W8^1 * odd[1]
115    let t2 = w2 * odd[2];    // W8^2 * odd[2]
116    let t3 = w3 * odd[3];    // W8^3 * odd[3]
117
118    buf[0] = even[0] + t0;
119    buf[4] = even[0] - t0;
120
121    buf[1] = even[1] + t1;
122    buf[5] = even[1] - t1;
123
124    buf[2] = even[2] + t2;
125    buf[6] = even[2] - t2;
126
127    buf[3] = even[3] + t3;
128    buf[7] = even[3] - t3;
129
130    // Package outputs (same as your function did)
131    let mut real = Vec64::with_capacity(8);
132    let mut imag = Vec64::with_capacity(8);
133    for &z in buf.iter() {
134        real.push(z.re);
135        imag.push(z.im);
136    }
137    Ok((FloatArray::new(real, None), FloatArray::new(imag, None)))
138}
139
140/// Power-of-two, in-place FFT (≥8, radix-2 stages, radix-8 leaf).
141#[inline]
142pub fn block_fft(
143    data: &mut [Complex64],
144) -> Result<(FloatArray<f64>, FloatArray<f64>), KernelError> {
145    let n = data.len();
146    if n < 2 || (n & (n - 1)) != 0 {
147        return Err(KernelError::InvalidArguments(
148            "block_fft: N must be power-of-two and ≥2".into(),
149        ));
150    }
151
152    // bit-reversal permutation
153    let bits = n.trailing_zeros();
154    for i in 0..n {
155        let rev = i.reverse_bits() >> (usize::BITS - bits);
156        if i < rev { data.swap(i, rev); }
157    }
158
159    // iterative radix-2 DIT
160    let mut m = 2;
161    while m <= n {
162        let half = m / 2;
163        let theta = -2.0 * std::f64::consts::PI / (m as f64);
164        let w_m = Complex64::from_polar(1.0, theta);
165
166        for k in (0..n).step_by(m) {
167            let mut w = Complex64::new(1.0, 0.0);
168            for j in 0..half {
169                let t = w * data[k + j + half];
170                let u = data[k + j];
171                data[k + j]        = u + t;
172                data[k + j + half] = u - t;
173                w *= w_m;
174            }
175        }
176        m <<= 1;
177    }
178
179    let mut real = Vec64::with_capacity(n);
180    let mut imag = Vec64::with_capacity(n);
181    for &z in data.iter() {
182        real.push(z.re);
183        imag.push(z.im);
184    }
185    Ok((FloatArray::new(real, None), FloatArray::new(imag, None)))
186}
187
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use num_complex::Complex64;
193    use rand::Rng;
194
195
196    // ---- SciPy/NumPy FFT references ----
197
198    fn scipy_fft_ref_8_seq_0_7() -> [Complex64; 8] {
199        [
200            Complex64::new(28.0, 0.0),
201            Complex64::new(-4.0, 9.6568542494923797),
202            Complex64::new(-4.0, 4.0),
203            Complex64::new(-4.0, 1.6568542494923806),
204            Complex64::new(-4.0, 0.0),
205            Complex64::new(-4.0, -1.6568542494923806),
206            Complex64::new(-4.0, -4.0),
207            Complex64::new(-4.0, -9.6568542494923797),
208        ]
209    }
210
211    fn scipy_fft_ref_16_seq_0_15() -> [Complex64; 16] {
212        [
213            Complex64::new(120.0, 0.0),
214            Complex64::new(-7.9999999999999991, 40.218715937006785),
215            Complex64::new(-8.0, 19.313708498984759),
216            Complex64::new(-7.9999999999999991, 11.972846101323913),
217            Complex64::new(-8.0, 8.0),
218            Complex64::new(-8.0, 5.345429103354391),
219            Complex64::new(-8.0, 3.3137084989847612),
220            Complex64::new(-8.0, 1.5912989390372658),
221            Complex64::new(-8.0, 0.0),
222            Complex64::new(-7.9999999999999991, -1.5912989390372658),
223            Complex64::new(-8.0, -3.3137084989847612),
224            Complex64::new(-7.9999999999999991, -5.3454291033543946),
225            Complex64::new(-8.0, -8.0),
226            Complex64::new(-8.0, -11.97284610132391),
227            Complex64::new(-8.0, -19.313708498984759),
228            Complex64::new(-8.0, -40.218715937006785),
229        ]
230    }
231
232    #[test]
233    fn butterfly_radix8_impulse_all_ones() {
234        let mut buf = [
235            Complex64::new(1.0, 0.0),
236            Complex64::new(0.0, 0.0),
237            Complex64::new(0.0, 0.0),
238            Complex64::new(0.0, 0.0),
239            Complex64::new(0.0, 0.0),
240            Complex64::new(0.0, 0.0),
241            Complex64::new(0.0, 0.0),
242            Complex64::new(0.0, 0.0),
243        ];
244        butterfly_radix8(&mut buf);
245        let ones = [Complex64::new(1.0, 0.0); 8];
246        assert_vec_close(&buf, &ones, 1e-15);
247    }
248
249    #[test]
250    fn fft8_radix_matches_scipy_seq0_7() {
251        let mut buf = [
252            Complex64::new(0.0, 0.0),
253            Complex64::new(1.0, 0.0),
254            Complex64::new(2.0, 0.0),
255            Complex64::new(3.0, 0.0),
256            Complex64::new(4.0, 0.0),
257            Complex64::new(5.0, 0.0),
258            Complex64::new(6.0, 0.0),
259            Complex64::new(7.0, 0.0),
260        ];
261        let (_re, _im) = fft8_radix(&mut buf).unwrap();
262        let ref_out = scipy_fft_ref_8_seq_0_7();
263        assert_vec_close(&buf, &ref_out, 1e-12);
264    }
265
266    #[test]
267    fn block_fft_matches_scipy_seq0_7() {
268        let mut data = (0..8).map(|v| Complex64::new(v as f64, 0.0)).collect::<Vec<_>>();
269        let (_re, _im) = block_fft(&mut data).unwrap();
270        let ref_out = scipy_fft_ref_8_seq_0_7();
271        assert_vec_close(&data, &ref_out, 1e-12);
272    }
273
274    #[test]
275    fn block_fft_matches_scipy_seq0_15() {
276        let mut data = (0..16).map(|v| Complex64::new(v as f64, 0.0)).collect::<Vec<_>>();
277        let (_re, _im) = block_fft(&mut data).unwrap();
278        let ref_out = scipy_fft_ref_16_seq_0_15();
279        assert_vec_close(&data, &ref_out, 1e-11);
280    }
281
282    // Basic DFT for validation
283    fn dft_naive(x: &[Complex64]) -> Vec<Complex64> {
284        let n = x.len() as f64;
285        (0..x.len())
286            .map(|k| {
287                let mut sum = Complex64::new(0.0, 0.0);
288                for (n_idx, &val) in x.iter().enumerate() {
289                    let angle = -2.0 * std::f64::consts::PI * (k as f64) * (n_idx as f64) / n;
290                    sum += val * Complex64::from_polar(1.0, angle);
291                }
292                sum
293            })
294            .collect()
295    }
296
297    fn assert_vec_close(a: &[Complex64], b: &[Complex64], eps: f64) {
298        assert_eq!(a.len(), b.len());
299        for (x, y) in a.iter().zip(b) {
300            assert!((x - y).norm() < eps, "mismatch: x={:?}, y={:?}", x, y);
301        }
302    }
303
304    #[test]
305    fn radix8_exact() {
306        let mut buf = [
307            Complex64::new(0.0, 0.0),
308            Complex64::new(1.0, 0.0),
309            Complex64::new(2.0, 0.0),
310            Complex64::new(3.0, 0.0),
311            Complex64::new(4.0, 0.0),
312            Complex64::new(5.0, 0.0),
313            Complex64::new(6.0, 0.0),
314            Complex64::new(7.0, 0.0),
315        ];
316        let (_, _) = fft8_radix(&mut buf).unwrap();
317        let ref_out = dft_naive(&[
318            Complex64::new(0.0, 0.0),
319            Complex64::new(1.0, 0.0),
320            Complex64::new(2.0, 0.0),
321            Complex64::new(3.0, 0.0),
322            Complex64::new(4.0, 0.0),
323            Complex64::new(5.0, 0.0),
324            Complex64::new(6.0, 0.0),
325            Complex64::new(7.0, 0.0),
326        ]);
327        assert_vec_close(&buf, &ref_out, 1e-12);
328    }
329
330    #[test]
331    fn block_fft_random_lengths() {
332        let mut rng = rand::rng();
333        for &n in &[8, 16, 32, 64, 128, 256, 512, 1024] {
334            let mut data: Vec<Complex64> = (0..n)
335                .map(|_| Complex64::new(rng.random(), rng.random()))
336                .collect();
337            let ref_data = data.clone();
338            let (_, _) = block_fft(&mut data).unwrap();
339            let ref_out = dft_naive(&ref_data);
340            assert_vec_close(&data, &ref_out, 1e-9); // generous for large n
341        }
342    }
343
344    #[test]
345    fn block_fft_power_of_two_check() {
346        let mut bad = vec![Complex64::new(0.0, 0.0); 12]; // not power of two
347        assert!(block_fft(&mut bad).is_err());
348    }
349}
simd_kernels/kernels/scientific/fft.rs

simd_kernels/kernels/scientific/
fft.rs