1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
//! SIMD-accelerated Real-valued Fast Fourier Transform (RFFT) operations
//!
//! This module provides SIMD-accelerated implementations of FFT operations
//! for real-valued inputs, using the unified SIMD abstraction layer from scirs2-core.
use crate::error::{FFTError, FFTResult};
use crate::rfft::{irfft as irfft_basic, rfft as rfft_basic};
use scirs2_core::numeric::Complex64;
use scirs2_core::numeric::NumCast;
use scirs2_core::simd_ops::{AutoOptimizer, PlatformCapabilities};
use std::fmt::Debug;
/// Compute the 1-dimensional discrete Fourier Transform for real input with SIMD acceleration.
///
/// This function is optimized using SIMD instructions for improved performance on
/// modern CPUs. For real-valued inputs, this uses a specialized algorithm that is
/// more efficient than a general complex FFT.
///
/// # Arguments
///
/// * `input` - Input real-valued array
/// * `n` - Length of the transformed axis (optional)
/// * `norm` - Normalization mode (optional)
///
/// # Returns
///
/// * The Fourier transform of the real input array
///
/// # Examples
///
/// ```
/// use scirs2_fft::simd_rfft::{rfft_simd};
/// use scirs2_fft::simd_fft::NormMode;
///
/// // Generate a simple signal
/// let signal = vec![1.0, 2.0, 3.0, 4.0];
///
/// // Compute RFFT of the signal with SIMD acceleration
/// let spectrum = rfft_simd(&signal, None, None).expect("Operation failed");
///
/// // RFFT produces n//2 + 1 complex values
/// assert_eq!(spectrum.len(), signal.len() / 2 + 1);
/// ```
#[allow(dead_code)]
pub fn rfft_simd<T>(input: &[T], n: Option<usize>, norm: Option<&str>) -> FFTResult<Vec<Complex64>>
where
T: NumCast + Copy + Debug + 'static,
{
// Use the basic rfft implementation which already handles the logic
let result = rfft_basic(input, n)?;
// Apply normalization if requested
if let Some(norm_str) = norm {
let mut result_mut = result;
let n = input.len();
match norm_str {
"backward" => {
let scale = 1.0 / (n as f64);
result_mut.iter_mut().for_each(|c| *c *= scale);
}
"ortho" => {
let scale = 1.0 / (n as f64).sqrt();
result_mut.iter_mut().for_each(|c| *c *= scale);
}
"forward" => {
let scale = 1.0 / (n as f64);
result_mut.iter_mut().for_each(|c| *c *= scale);
}
_ => {} // No normalization for unrecognized mode
}
return Ok(result_mut);
}
Ok(result)
}
/// Compute the inverse of the 1-dimensional discrete Fourier Transform for real input with SIMD acceleration.
///
/// This function is optimized using SIMD instructions for improved performance on
/// modern CPUs.
///
/// # Arguments
///
/// * `input` - Input complex-valued array representing the Fourier transform of real data
/// * `n` - Length of the output array (optional)
/// * `norm` - Normalization mode (optional)
///
/// # Returns
///
/// * The inverse Fourier transform, yielding a real-valued array
///
/// # Examples
///
/// ```
/// use scirs2_fft::simd_rfft::{rfft_simd, irfft_simd};
///
/// // Generate a simple signal
/// let signal = vec![1.0, 2.0, 3.0, 4.0];
///
/// // Forward transform
/// let spectrum = rfft_simd(&signal, None, None).expect("Operation failed");
///
/// // Inverse transform
/// let recovered = irfft_simd(&spectrum, Some(signal.len()), None).expect("Operation failed");
///
/// // Check recovery accuracy
/// for (x, y) in signal.iter().zip(recovered.iter()) {
/// assert!((x - y).abs() < 1e-10);
/// }
/// ```
#[allow(dead_code)]
pub fn irfft_simd<T>(input: &[T], n: Option<usize>, norm: Option<&str>) -> FFTResult<Vec<f64>>
where
T: NumCast + Copy + Debug + 'static,
{
// Use the basic irfft implementation
let result = irfft_basic(input, n)?;
// Apply normalization if requested
if let Some(norm_str) = norm {
let mut result_mut = result;
let n = input.len();
match norm_str {
"backward" => {
let scale = 1.0 / (n as f64);
result_mut.iter_mut().for_each(|c| *c *= scale);
}
"ortho" => {
let scale = 1.0 / (n as f64).sqrt();
result_mut.iter_mut().for_each(|c| *c *= scale);
}
"forward" => {
let scale = 1.0 / (n as f64);
result_mut.iter_mut().for_each(|c| *c *= scale);
}
_ => {} // No normalization for unrecognized mode
}
return Ok(result_mut);
}
Ok(result)
}
/// Adaptive RFFT that automatically chooses the best implementation
#[allow(dead_code)]
pub fn rfft_adaptive<T>(
input: &[T],
n: Option<usize>,
norm: Option<&str>,
) -> FFTResult<Vec<Complex64>>
where
T: NumCast + Copy + Debug + 'static,
{
let optimizer = AutoOptimizer::new();
let caps = PlatformCapabilities::detect();
let size = n.unwrap_or(input.len());
if caps.gpu_available && optimizer.should_use_gpu(size) {
// Use GPU implementation when available
match rfft_gpu(input, n, norm) {
Ok(result) => Ok(result),
Err(_) => {
// Fall back to SIMD implementation if GPU fails
rfft_simd(input, n, norm)
}
}
} else {
rfft_simd(input, n, norm)
}
}
/// Adaptive IRFFT that automatically chooses the best implementation
#[allow(dead_code)]
pub fn irfft_adaptive<T>(input: &[T], n: Option<usize>, norm: Option<&str>) -> FFTResult<Vec<f64>>
where
T: NumCast + Copy + Debug + 'static,
{
let optimizer = AutoOptimizer::new();
let caps = PlatformCapabilities::detect();
let size = n.unwrap_or_else(|| input.len() * 2 - 2);
if caps.gpu_available && optimizer.should_use_gpu(size) {
// Use GPU implementation when available
match irfft_gpu(input, n, norm) {
Ok(result) => Ok(result),
Err(_) => {
// Fall back to SIMD implementation if GPU fails
irfft_simd(input, n, norm)
}
}
} else {
irfft_simd(input, n, norm)
}
}
/// GPU-accelerated RFFT implementation
#[cfg(feature = "cuda")]
#[allow(dead_code)]
fn rfft_gpu<T>(_input: &[T], _n: Option<usize>, _norm: Option<&str>) -> FFTResult<Vec<Complex64>>
where
T: NumCast + Copy + Debug + 'static,
{
// GPU implementation is simplified for now due to API incompatibilities
// Will be properly implemented when GPU support is fully integrated
Err(FFTError::NotImplementedError(
"GPU-accelerated RFFT is not yet fully implemented".to_string(),
))
}
/// GPU-accelerated IRFFT implementation
#[cfg(feature = "cuda")]
#[allow(dead_code)]
fn irfft_gpu<T>(_input: &[T], _n: Option<usize>, _norm: Option<&str>) -> FFTResult<Vec<f64>>
where
T: NumCast + Copy + Debug + 'static,
{
// GPU implementation is simplified for now due to API incompatibilities
// Will be properly implemented when GPU support is fully integrated
Err(FFTError::NotImplementedError(
"GPU-accelerated IRFFT is not yet fully implemented".to_string(),
))
}
/// Fallback implementations when GPU feature is not enabled
#[cfg(not(feature = "cuda"))]
#[allow(dead_code)]
fn rfft_gpu<T>(_input: &[T], _n: Option<usize>, _norm: Option<&str>) -> FFTResult<Vec<Complex64>>
where
T: NumCast + Copy + Debug + 'static,
{
Err(crate::error::FFTError::NotImplementedError(
"GPU FFT not compiled".to_string(),
))
}
#[cfg(not(feature = "cuda"))]
#[allow(dead_code)]
fn irfft_gpu<T>(_input: &[T], _n: Option<usize>, _norm: Option<&str>) -> FFTResult<Vec<f64>>
where
T: NumCast + Copy + Debug + 'static,
{
Err(crate::error::FFTError::NotImplementedError(
"GPU FFT not compiled".to_string(),
))
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
#[test]
fn test_rfft_simd_simple() {
let signal = vec![1.0, 2.0, 3.0, 4.0];
// Forward transform
let spectrum = rfft_simd(&signal, None, None).expect("Operation failed");
// Check size
assert_eq!(spectrum.len(), signal.len() / 2 + 1);
// First element should be sum of all values
assert_abs_diff_eq!(spectrum[0].re, 10.0, epsilon = 1e-10);
assert_abs_diff_eq!(spectrum[0].im, 0.0, epsilon = 1e-10);
}
#[test]
fn test_rfft_irfft_roundtrip() {
let signal = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
// Forward transform
let spectrum = rfft_simd(&signal, None, None).expect("Operation failed");
// Inverse transform
let recovered = irfft_simd(&spectrum, Some(signal.len()), None).expect("Operation failed");
// Check recovery
for (i, (&orig, &rec)) in signal.iter().zip(recovered.iter()).enumerate() {
if (orig - rec).abs() > 1e-10 {
panic!("Mismatch at index {i}: {orig} != {rec}");
}
}
}
#[test]
fn test_adaptive_selection() {
let signal = vec![1.0; 1000];
// Test adaptive functions (should work regardless of GPU availability)
let spectrum = rfft_adaptive(&signal, None, None).expect("Operation failed");
assert_eq!(spectrum.len(), signal.len() / 2 + 1);
let recovered =
irfft_adaptive(&spectrum, Some(signal.len()), None).expect("Operation failed");
assert_eq!(recovered.len(), signal.len());
}
}