scirs2_series/gpu_acceleration/
fft.rs

1//! GPU-accelerated FFT operations
2//!
3//! This module provides Fast Fourier Transform implementations optimized for GPU
4//! execution, including forward/inverse FFT, power spectral density, and spectrogram.
5
6use scirs2_core::ndarray::{s, Array1, Array2};
7use scirs2_core::numeric::Float;
8use std::f64::consts::PI;
9use std::fmt::Debug;
10
11use super::GpuConfig;
12use crate::error::{Result, TimeSeriesError};
13
14/// GPU-accelerated FFT processor
15#[derive(Debug)]
16pub struct GpuFFT<F: Float + Debug> {
17    #[allow(dead_code)]
18    config: GpuConfig,
19    /// FFT cache for repeated operations
20    #[allow(dead_code)]
21    fft_cache: Vec<Array1<F>>,
22}
23
24impl<F: Float + Debug + Clone> GpuFFT<F> {
25    /// Create new GPU FFT processor
26    pub fn new(config: GpuConfig) -> Self {
27        Self {
28            config,
29            fft_cache: Vec::new(),
30        }
31    }
32
33    /// GPU-accelerated forward FFT
34    pub fn fft(&self, data: &Array1<F>) -> Result<Array1<F>> {
35        let n = data.len();
36        if n == 0 {
37            return Ok(Array1::zeros(0));
38        }
39
40        // Ensure power of 2 for efficiency
41        let padded_n = n.next_power_of_two();
42        let mut padded_data = Array1::zeros(padded_n);
43        for i in 0..n {
44            padded_data[i] = data[i];
45        }
46
47        // GPU-optimized Cooley-Tukey FFT implementation
48        let result = self.cooley_tukey_fft(&padded_data, false)?;
49
50        // Return only the original length
51        Ok(result.slice(s![0..n]).to_owned())
52    }
53
54    /// GPU-accelerated inverse FFT
55    pub fn ifft(&self, data: &Array1<F>) -> Result<Array1<F>> {
56        let n = data.len();
57        if n == 0 {
58            return Ok(Array1::zeros(0));
59        }
60
61        let padded_n = n.next_power_of_two();
62        let mut padded_data = Array1::zeros(padded_n);
63        for i in 0..n {
64            padded_data[i] = data[i];
65        }
66
67        let result = self.cooley_tukey_fft(&padded_data, true)?;
68        let normalized: Array1<F> = result.mapv(|x| x / F::from(padded_n).unwrap());
69
70        Ok(normalized.slice(s![0..n]).to_owned())
71    }
72
73    /// Cooley-Tukey FFT algorithm optimized for GPU-like parallel execution
74    fn cooley_tukey_fft(&self, data: &Array1<F>, inverse: bool) -> Result<Array1<F>> {
75        let n = data.len();
76        if n <= 1 {
77            return Ok(data.clone());
78        }
79
80        if !n.is_power_of_two() {
81            return Err(TimeSeriesError::InvalidInput(
82                "FFT requires power of 2 length".to_string(),
83            ));
84        }
85
86        let mut result = data.clone();
87        let two = F::from(2).unwrap();
88        let pi = F::from(PI).unwrap();
89
90        // Bit-reversal permutation (GPU-friendly)
91        let mut j = 0;
92        for i in 1..n {
93            let mut bit = n >> 1;
94            while j & bit != 0 {
95                j ^= bit;
96                bit >>= 1;
97            }
98            j ^= bit;
99
100            if j > i {
101                result.swap(i, j);
102            }
103        }
104
105        // Cooley-Tukey FFT with GPU-style parallel butterfly operations
106        let mut length = 2;
107        while length <= n {
108            let angle = if inverse {
109                two * pi / F::from(length).unwrap()
110            } else {
111                -two * pi / F::from(length).unwrap()
112            };
113
114            let wlen_real = angle.cos();
115            let wlen_imag = angle.sin();
116
117            // Parallel butterfly operations
118            for start in (0..n).step_by(length) {
119                let mut w_real = F::one();
120                let mut w_imag = F::zero();
121
122                for j in 0..length / 2 {
123                    let u = result[start + j];
124                    let v_real = result[start + j + length / 2] * w_real;
125                    let _v_imag = result[start + j + length / 2] * w_imag;
126
127                    result[start + j] = u + v_real;
128                    result[start + j + length / 2] = u - v_real;
129
130                    // Update twiddle factors
131                    let new_w_real = w_real * wlen_real - w_imag * wlen_imag;
132                    let new_w_imag = w_real * wlen_imag + w_imag * wlen_real;
133                    w_real = new_w_real;
134                    w_imag = new_w_imag;
135                }
136            }
137
138            length <<= 1;
139        }
140
141        Ok(result)
142    }
143
144    /// GPU-accelerated power spectral density
145    pub fn power_spectral_density(
146        &self,
147        data: &Array1<F>,
148        window_size: usize,
149    ) -> Result<Array1<F>> {
150        if data.len() < window_size {
151            return Err(TimeSeriesError::InsufficientData {
152                message: "Data length less than window _size".to_string(),
153                required: window_size,
154                actual: data.len(),
155            });
156        }
157
158        let num_windows = (data.len() - window_size) / (window_size / 2) + 1;
159        let mut psd = Array1::<F>::zeros(window_size / 2 + 1);
160
161        // Parallel processing of overlapping windows
162        for i in 0..num_windows {
163            let start = i * window_size / 2;
164            let end = (start + window_size).min(data.len());
165
166            if end - start < window_size {
167                break;
168            }
169
170            let window = data.slice(s![start..end]);
171            let windowed = self.apply_hanning_window(&window.to_owned())?;
172            let fft_result = self.fft(&windowed)?;
173
174            // Compute power spectrum for this window
175            for j in 0..psd.len() {
176                if j < fft_result.len() {
177                    psd[j] = psd[j] + fft_result[j] * fft_result[j];
178                }
179            }
180        }
181
182        // Normalize by number of windows
183        let norm_factor = F::from(num_windows).unwrap();
184        Ok(psd.mapv(|x: F| x / norm_factor))
185    }
186
187    /// Apply Hanning window for spectral analysis
188    fn apply_hanning_window(&self, data: &Array1<F>) -> Result<Array1<F>> {
189        let n = data.len();
190        let mut windowed = data.clone();
191        let pi = F::from(PI).unwrap();
192        let two = F::from(2).unwrap();
193
194        for i in 0..n {
195            let window_val = F::from(0.5).unwrap()
196                * (F::one() - (two * pi * F::from(i).unwrap() / F::from(n - 1).unwrap()).cos());
197            windowed[i] = windowed[i] * window_val;
198        }
199
200        Ok(windowed)
201    }
202
203    /// GPU-accelerated spectrogram computation
204    pub fn spectrogram(
205        &self,
206        data: &Array1<F>,
207        window_size: usize,
208        overlap: usize,
209    ) -> Result<Array2<F>> {
210        if window_size <= overlap {
211            return Err(TimeSeriesError::InvalidInput(
212                "Window _size must be greater than overlap".to_string(),
213            ));
214        }
215
216        let step = window_size - overlap;
217        let num_windows = (data.len() - window_size) / step + 1;
218        let freq_bins = window_size / 2 + 1;
219
220        let mut spectrogram = Array2::zeros((freq_bins, num_windows));
221
222        // Parallel spectrogram computation
223        for (window_idx, start) in (0..data.len() - window_size + 1).step_by(step).enumerate() {
224            if window_idx >= num_windows {
225                break;
226            }
227
228            let window = data.slice(s![start..start + window_size]);
229            let windowed = self.apply_hanning_window(&window.to_owned())?;
230            let fft_result = self.fft(&windowed)?;
231
232            // Store magnitude spectrum
233            for freq_idx in 0..freq_bins {
234                if freq_idx < fft_result.len() {
235                    let magnitude = fft_result[freq_idx].abs();
236                    spectrogram[[freq_idx, window_idx]] = magnitude;
237                }
238            }
239        }
240
241        Ok(spectrogram)
242    }
243}