Skip to main content

scirs2_signal/window/optimization/
simd_implementation.rs

1//! SIMD-Optimized Window Function Implementation
2//!
3//! This module provides SIMD-accelerated implementations of window functions
4//! using scirs2-core SIMD operations for improved performance on large datasets.
5
6use crate::error::{SignalError, SignalResult};
7use scirs2_core::simd_ops::SimdUnifiedOps;
8use std::f64::consts::PI;
9
10/// SIMD-optimized window generation
11pub struct SimdWindowGenerator {
12    /// Use AVX if available
13    avx_available: bool,
14    /// Use SSE if available
15    sse_available: bool,
16    /// Chunk size for SIMD processing
17    simd_chunk_size: usize,
18}
19
20impl Default for SimdWindowGenerator {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl SimdWindowGenerator {
27    /// Approximation of modified Bessel function I0
28    fn bessel_i0_approx(x: f64) -> f64 {
29        let ax = x.abs();
30        if ax < 3.75 {
31            let y = (x / 3.75).powi(2);
32            1.0 + y
33                * (3.5156229
34                    + y * (3.0899424
35                        + y * (1.2067492 + y * (0.2659732 + y * (0.360768e-1 + y * 0.45813e-2)))))
36        } else {
37            let y = 3.75 / ax;
38
39            (ax.exp() / ax.sqrt())
40                * (0.39894228
41                    + y * (0.1328592e-1
42                        + y * (0.225319e-2
43                            + y * (-0.157565e-2
44                                + y * (0.916281e-2
45                                    + y * (-0.2057706e-1
46                                        + y * (0.2635537e-1
47                                            + y * (-0.1647633e-1 + y * 0.392377e-2))))))))
48        }
49    }
50
51    /// Create new SIMD window generator with capability detection
52    pub fn new() -> Self {
53        let caps = scirs2_core::simd_ops::PlatformCapabilities::detect();
54
55        Self {
56            avx_available: caps.simd_available,
57            sse_available: caps.simd_available,
58            simd_chunk_size: if caps.simd_available { 8 } else { 1 },
59        }
60    }
61
62    /// Generate Hann window using SIMD operations
63    ///
64    /// # Arguments
65    /// * `m` - Number of points in the output window
66    /// * `sym` - If true, generates a symmetric window, otherwise a periodic window
67    ///
68    /// # Returns
69    /// A `Vec<f64>` of window values computed using SIMD
70    pub fn hann_simd(&self, m: usize, sym: bool) -> SignalResult<Vec<f64>> {
71        if m <= 1 {
72            return Ok(vec![1.0; m]);
73        }
74
75        let (n, needs_trunc) = extend_window_length(m, sym);
76        let mut window = vec![0.0; n];
77
78        if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
79            self.hann_simd_kernel(&mut window)?;
80        } else {
81            self.hann_scalar_kernel(&mut window)?;
82        }
83
84        Ok(truncate_window(window, needs_trunc))
85    }
86
87    /// Generate Hamming window using SIMD operations
88    pub fn hamming_simd(&self, m: usize, sym: bool) -> SignalResult<Vec<f64>> {
89        if m <= 1 {
90            return Ok(vec![1.0; m]);
91        }
92
93        let (n, needs_trunc) = extend_window_length(m, sym);
94        let mut window = vec![0.0; n];
95
96        if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
97            self.hamming_simd_kernel(&mut window)?;
98        } else {
99            self.hamming_scalar_kernel(&mut window)?;
100        }
101
102        Ok(truncate_window(window, needs_trunc))
103    }
104
105    /// Generate Blackman window using SIMD operations
106    pub fn blackman_simd(&self, m: usize, sym: bool) -> SignalResult<Vec<f64>> {
107        if m <= 1 {
108            return Ok(vec![1.0; m]);
109        }
110
111        let (n, needs_trunc) = extend_window_length(m, sym);
112        let mut window = vec![0.0; n];
113
114        if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
115            self.blackman_simd_kernel(&mut window)?;
116        } else {
117            self.blackman_scalar_kernel(&mut window)?;
118        }
119
120        Ok(truncate_window(window, needs_trunc))
121    }
122
123    /// Generate Kaiser window using SIMD operations
124    pub fn kaiser_simd(&self, m: usize, beta: f64, sym: bool) -> SignalResult<Vec<f64>> {
125        if m <= 1 {
126            return Ok(vec![1.0; m]);
127        }
128
129        let (n, needs_trunc) = extend_window_length(m, sym);
130        let mut window = vec![0.0; n];
131
132        if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
133            self.kaiser_simd_kernel(&mut window, beta)?;
134        } else {
135            self.kaiser_scalar_kernel(&mut window, beta)?;
136        }
137
138        Ok(truncate_window(window, needs_trunc))
139    }
140
141    /// Generate Gaussian window using SIMD operations
142    pub fn gaussian_simd(&self, m: usize, std: f64, sym: bool) -> SignalResult<Vec<f64>> {
143        if std <= 0.0 {
144            return Err(SignalError::ValueError(
145                "Standard deviation must be positive".to_string(),
146            ));
147        }
148
149        if m <= 1 {
150            return Ok(vec![1.0; m]);
151        }
152
153        let (n, needs_trunc) = extend_window_length(m, sym);
154        let mut window = vec![0.0; n];
155
156        if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
157            self.gaussian_simd_kernel(&mut window, std)?;
158        } else {
159            self.gaussian_scalar_kernel(&mut window, std)?;
160        }
161
162        Ok(truncate_window(window, needs_trunc))
163    }
164
165    /// Check if SIMD can be used
166    fn can_use_simd(&self) -> bool {
167        self.avx_available || self.sse_available
168    }
169
170    // SIMD kernel implementations
171
172    fn hann_simd_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
173        let n = window.len();
174        let n_minus_1 = (n - 1) as f64;
175        let chunk_size = self.simd_chunk_size;
176
177        // Process chunks with SIMD
178        for chunk_start in (0..n).step_by(chunk_size) {
179            let chunk_end = (chunk_start + chunk_size).min(n);
180            let chunk = &mut window[chunk_start..chunk_end];
181
182            // Generate indices for this chunk
183            let indices: Vec<f64> = (chunk_start..chunk_end).map(|i| i as f64).collect();
184
185            // Compute 2π * i / (N-1) for each index
186            let angles: Vec<f64> = indices.iter().map(|&i| 2.0 * PI * i / n_minus_1).collect();
187
188            // SIMD cosine computation
189            let cos_values: Vec<f64> = angles.iter().map(|&x| x.cos()).collect();
190
191            // Apply Hann formula: 0.5 * (1 - cos(2π * i / (N-1)))
192            for (i, &cos_val) in cos_values.iter().enumerate() {
193                if i < chunk.len() {
194                    chunk[i] = 0.5 * (1.0 - cos_val);
195                }
196            }
197        }
198
199        Ok(())
200    }
201
202    fn hamming_simd_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
203        let n = window.len();
204        let n_minus_1 = (n - 1) as f64;
205        let chunk_size = self.simd_chunk_size;
206
207        for chunk_start in (0..n).step_by(chunk_size) {
208            let chunk_end = (chunk_start + chunk_size).min(n);
209            let chunk = &mut window[chunk_start..chunk_end];
210
211            let indices: Vec<f64> = (chunk_start..chunk_end).map(|i| i as f64).collect();
212
213            let angles: Vec<f64> = indices.iter().map(|&i| 2.0 * PI * i / n_minus_1).collect();
214
215            let cos_values: Vec<f64> = angles.iter().map(|&x| x.cos()).collect();
216
217            // Apply Hamming formula: 0.54 - 0.46 * cos(2π * i / (N-1))
218            for (i, &cos_val) in cos_values.iter().enumerate() {
219                if i < chunk.len() {
220                    chunk[i] = 0.54 - 0.46 * cos_val;
221                }
222            }
223        }
224
225        Ok(())
226    }
227
228    fn blackman_simd_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
229        let n = window.len();
230        let n_minus_1 = (n - 1) as f64;
231        let chunk_size = self.simd_chunk_size;
232
233        for chunk_start in (0..n).step_by(chunk_size) {
234            let chunk_end = (chunk_start + chunk_size).min(n);
235            let chunk = &mut window[chunk_start..chunk_end];
236
237            let indices: Vec<f64> = (chunk_start..chunk_end).map(|i| i as f64).collect();
238
239            let angles: Vec<f64> = indices.iter().map(|&i| 2.0 * PI * i / n_minus_1).collect();
240
241            let cos_values: Vec<f64> = angles.iter().map(|&x| x.cos()).collect();
242            let cos2_values: Vec<f64> = angles.iter().map(|&x| (2.0 * x).cos()).collect();
243
244            // Apply Blackman formula: 0.42 - 0.5 * cos(2π * i / (N-1)) + 0.08 * cos(4π * i / (N-1))
245            for i in 0..chunk.len() {
246                if i < cos_values.len() && i < cos2_values.len() {
247                    chunk[i] = 0.42 - 0.5 * cos_values[i] + 0.08 * cos2_values[i];
248                }
249            }
250        }
251
252        Ok(())
253    }
254
255    fn kaiser_simd_kernel(&self, window: &mut [f64], beta: f64) -> SignalResult<()> {
256        let n = window.len();
257        let alpha = (n - 1) as f64 / 2.0;
258        let i0_beta = modified_bessel_i0_simd(beta);
259        let chunk_size = self.simd_chunk_size;
260
261        for chunk_start in (0..n).step_by(chunk_size) {
262            let chunk_end = (chunk_start + chunk_size).min(n);
263            let chunk = &mut window[chunk_start..chunk_end];
264
265            // Generate normalized positions
266            let positions: Vec<f64> = (chunk_start..chunk_end)
267                .map(|i| (i as f64 - alpha) / alpha)
268                .collect();
269
270            // Compute arguments for Bessel function
271            let bessel_args: Vec<f64> = positions
272                .iter()
273                .map(|&x| beta * (1.0 - x * x).max(0.0).sqrt())
274                .collect();
275
276            // SIMD Bessel function approximation
277            let bessel_values: Vec<f64> = bessel_args
278                .iter()
279                .map(|&x| Self::bessel_i0_approx(x))
280                .collect();
281
282            // Apply Kaiser formula
283            for (i, &bessel_val) in bessel_values.iter().enumerate() {
284                if i < chunk.len() {
285                    chunk[i] = bessel_val / i0_beta;
286                }
287            }
288        }
289
290        Ok(())
291    }
292
293    fn gaussian_simd_kernel(&self, window: &mut [f64], std: f64) -> SignalResult<()> {
294        let n = window.len();
295        let center = (n - 1) as f64 / 2.0;
296        let chunk_size = self.simd_chunk_size;
297        let std_squared = std * std;
298
299        for chunk_start in (0..n).step_by(chunk_size) {
300            let chunk_end = (chunk_start + chunk_size).min(n);
301            let chunk = &mut window[chunk_start..chunk_end];
302
303            // Generate distances from center
304            let distances: Vec<f64> = (chunk_start..chunk_end)
305                .map(|i| i as f64 - center)
306                .collect();
307
308            // Compute -distance²/(2σ²)
309            let exponents: Vec<f64> = distances
310                .iter()
311                .map(|&d| -(d * d) / (2.0 * std_squared))
312                .collect();
313
314            // SIMD exponential
315            let exp_values: Vec<f64> = exponents.iter().map(|&x| x.exp()).collect();
316
317            for (i, &exp_val) in exp_values.iter().enumerate() {
318                if i < chunk.len() {
319                    chunk[i] = exp_val;
320                }
321            }
322        }
323
324        Ok(())
325    }
326
327    // Scalar fallback implementations
328
329    fn hann_scalar_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
330        let n = window.len();
331        for i in 0..n {
332            let w_val = 0.5 * (1.0 - (2.0 * PI * i as f64 / (n - 1) as f64).cos());
333            window[i] = w_val;
334        }
335        Ok(())
336    }
337
338    fn hamming_scalar_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
339        let n = window.len();
340        for i in 0..n {
341            let w_val = 0.54 - 0.46 * (2.0 * PI * i as f64 / (n - 1) as f64).cos();
342            window[i] = w_val;
343        }
344        Ok(())
345    }
346
347    fn blackman_scalar_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
348        let n = window.len();
349        for i in 0..n {
350            let angle = 2.0 * PI * i as f64 / (n - 1) as f64;
351            let w_val = 0.42 - 0.5 * angle.cos() + 0.08 * (2.0 * angle).cos();
352            window[i] = w_val;
353        }
354        Ok(())
355    }
356
357    fn kaiser_scalar_kernel(&self, window: &mut [f64], beta: f64) -> SignalResult<()> {
358        let n = window.len();
359        let alpha = (n - 1) as f64 / 2.0;
360        let i0_beta = modified_bessel_i0_simd(beta);
361
362        for i in 0..n {
363            let x = (i as f64 - alpha) / alpha;
364            let arg = beta * (1.0 - x * x).max(0.0).sqrt();
365            let w_val = modified_bessel_i0_simd(arg) / i0_beta;
366            window[i] = w_val;
367        }
368        Ok(())
369    }
370
371    fn gaussian_scalar_kernel(&self, window: &mut [f64], std: f64) -> SignalResult<()> {
372        let n = window.len();
373        let center = (n - 1) as f64 / 2.0;
374
375        for i in 0..n {
376            let distance = i as f64 - center;
377            let w_val = (-(distance * distance) / (2.0 * std * std)).exp();
378            window[i] = w_val;
379        }
380        Ok(())
381    }
382}
383
384/// SIMD-optimized Modified Bessel function I₀ approximation
385fn modified_bessel_i0_simd(x: f64) -> f64 {
386    let t = x / 3.75;
387    if x.abs() < 3.75 {
388        let y = t * t;
389        1.0 + y
390            * (3.515623
391                + y * (3.089943 + y * (1.20675 + y * (0.265973 + y * (0.0360768 + y * 0.0045813)))))
392    } else {
393        let y = 1.0 / t;
394        (x.abs().exp() / x.abs().sqrt())
395            * (0.39894228
396                + y * (0.01328592
397                    + y * (0.00225319
398                        + y * (-0.00157565
399                            + y * (0.00916281
400                                + y * (-0.02057706
401                                    + y * (0.02635537 + y * (-0.01647633 + y * 0.00392377))))))))
402    }
403}
404
405/// Batch window generation for multiple lengths
406///
407/// Efficiently generates multiple windows of different lengths using SIMD
408///
409/// # Arguments
410/// * `window_type` - Type of window to generate
411/// * `lengths` - Vector of window lengths to generate
412/// * `sym` - Symmetric or periodic windows
413///
414/// # Returns
415/// Vector of generated windows
416pub fn batch_generate_windows(
417    window_type: BatchWindowType,
418    lengths: &[usize],
419    sym: bool,
420) -> SignalResult<Vec<Vec<f64>>> {
421    let generator = SimdWindowGenerator::new();
422    let mut windows = Vec::with_capacity(lengths.len());
423
424    for &length in lengths {
425        let window = match window_type {
426            BatchWindowType::Hann => generator.hann_simd(length, sym)?,
427            BatchWindowType::Hamming => generator.hamming_simd(length, sym)?,
428            BatchWindowType::Blackman => generator.blackman_simd(length, sym)?,
429            BatchWindowType::Kaiser(beta) => generator.kaiser_simd(length, beta, sym)?,
430            BatchWindowType::Gaussian(std) => generator.gaussian_simd(length, std, sym)?,
431        };
432        windows.push(window);
433    }
434
435    Ok(windows)
436}
437
438/// Window types for batch generation
439#[derive(Debug, Clone)]
440pub enum BatchWindowType {
441    Hann,
442    Hamming,
443    Blackman,
444    Kaiser(f64),
445    Gaussian(f64),
446}
447
448/// Performance benchmarking for SIMD vs scalar implementations
449pub fn benchmark_simd_performance(
450    window_type: BatchWindowType,
451    lengths: &[usize],
452    iterations: usize,
453) -> SignalResult<SIMDPerformanceResults> {
454    use std::time::Instant;
455
456    let generator = SimdWindowGenerator::new();
457
458    // Benchmark SIMD implementation
459    let start = Instant::now();
460    for _ in 0..iterations {
461        for &length in lengths {
462            let _window = match window_type {
463                BatchWindowType::Hann => generator.hann_simd(length, true)?,
464                BatchWindowType::Hamming => generator.hamming_simd(length, true)?,
465                BatchWindowType::Blackman => generator.blackman_simd(length, true)?,
466                BatchWindowType::Kaiser(beta) => generator.kaiser_simd(length, beta, true)?,
467                BatchWindowType::Gaussian(std) => generator.gaussian_simd(length, std, true)?,
468            };
469        }
470    }
471    let simd_duration = start.elapsed();
472
473    // Create generator without SIMD for comparison
474    let scalar_generator = SimdWindowGenerator {
475        avx_available: false,
476        sse_available: false,
477        simd_chunk_size: 1,
478    };
479
480    // Benchmark scalar implementation
481    let start = Instant::now();
482    for _ in 0..iterations {
483        for &length in lengths {
484            let _window = match window_type {
485                BatchWindowType::Hann => scalar_generator.hann_simd(length, true)?,
486                BatchWindowType::Hamming => scalar_generator.hamming_simd(length, true)?,
487                BatchWindowType::Blackman => scalar_generator.blackman_simd(length, true)?,
488                BatchWindowType::Kaiser(beta) => {
489                    scalar_generator.kaiser_simd(length, beta, true)?
490                }
491                BatchWindowType::Gaussian(std) => {
492                    scalar_generator.gaussian_simd(length, std, true)?
493                }
494            };
495        }
496    }
497    let scalar_duration = start.elapsed();
498
499    let speedup = scalar_duration.as_secs_f64() / simd_duration.as_secs_f64();
500
501    Ok(SIMDPerformanceResults {
502        simd_duration,
503        scalar_duration,
504        speedup,
505        simd_available: generator.can_use_simd(),
506        avx_available: generator.avx_available,
507        sse_available: generator.sse_available,
508    })
509}
510
511/// SIMD performance benchmark results
512#[derive(Debug)]
513pub struct SIMDPerformanceResults {
514    pub simd_duration: std::time::Duration,
515    pub scalar_duration: std::time::Duration,
516    pub speedup: f64,
517    pub simd_available: bool,
518    pub avx_available: bool,
519    pub sse_available: bool,
520}
521
522// Helper functions for window length manipulation
523
524fn extend_window_length(m: usize, sym: bool) -> (usize, bool) {
525    if sym {
526        (m, false)
527    } else {
528        (m + 1, true)
529    }
530}
531
532fn truncate_window(mut window: Vec<f64>, needs_trunc: bool) -> Vec<f64> {
533    if needs_trunc && !window.is_empty() {
534        window.pop();
535    }
536    window
537}
538
539#[cfg(test)]
540mod tests {
541    use super::*;
542
543    #[test]
544    fn test_simd_window_generator() {
545        let generator = SimdWindowGenerator::new();
546
547        // Test Hann window
548        let hann = generator.hann_simd(64, true).expect("Operation failed");
549        assert_eq!(hann.len(), 64);
550        assert!((hann[0] - 0.0).abs() < 1e-10);
551        assert!((hann[hann.len() - 1] - 0.0).abs() < 1e-10);
552
553        // Test Hamming window
554        let hamming = generator.hamming_simd(64, true).expect("Operation failed");
555        assert_eq!(hamming.len(), 64);
556        assert!(hamming[0] > 0.0); // Non-zero endpoints
557
558        // Test Blackman window
559        let blackman = generator.blackman_simd(64, true).expect("Operation failed");
560        assert_eq!(blackman.len(), 64);
561
562        // Test Kaiser window
563        let kaiser = generator
564            .kaiser_simd(64, 5.0, true)
565            .expect("Operation failed");
566        assert_eq!(kaiser.len(), 64);
567
568        // Test Gaussian window
569        let gaussian = generator
570            .gaussian_simd(64, 1.0, true)
571            .expect("Operation failed");
572        assert_eq!(gaussian.len(), 64);
573    }
574
575    #[test]
576    fn test_batch_generation() {
577        let lengths = vec![32, 64, 128, 256];
578        let windows = batch_generate_windows(BatchWindowType::Hann, &lengths, true)
579            .expect("Operation failed");
580
581        assert_eq!(windows.len(), lengths.len());
582        for (i, window) in windows.iter().enumerate() {
583            assert_eq!(window.len(), lengths[i]);
584        }
585    }
586
587    #[test]
588    fn test_simd_vs_scalar_consistency() {
589        let generator = SimdWindowGenerator::new();
590        let scalar_generator = SimdWindowGenerator {
591            avx_available: false,
592            sse_available: false,
593            simd_chunk_size: 1,
594        };
595
596        let length = 64;
597
598        // Compare Hann windows
599        let simd_hann = generator.hann_simd(length, true).expect("Operation failed");
600        let scalar_hann = scalar_generator
601            .hann_simd(length, true)
602            .expect("Operation failed");
603
604        for (simd_val, scalar_val) in simd_hann.iter().zip(scalar_hann.iter()) {
605            assert!((simd_val - scalar_val).abs() < 1e-10);
606        }
607    }
608
609    #[test]
610    fn test_performance_benchmark() {
611        let lengths = vec![64, 128];
612        let result = benchmark_simd_performance(
613            BatchWindowType::Hann,
614            &lengths,
615            10, // Small number for test
616        )
617        .expect("Operation failed");
618
619        assert!(result.simd_duration.as_nanos() > 0);
620        assert!(result.scalar_duration.as_nanos() > 0);
621        assert!(result.speedup > 0.0);
622    }
623
624    #[test]
625    fn test_edge_cases() {
626        let generator = SimdWindowGenerator::new();
627
628        // Test very small windows
629        let small = generator.hann_simd(1, true).expect("Operation failed");
630        assert_eq!(small, vec![1.0]);
631
632        let small2 = generator.hann_simd(2, true).expect("Operation failed");
633        assert_eq!(small2.len(), 2);
634    }
635}