scirs2_signal/window/optimization/
simd_implementation.rs1use crate::error::{SignalError, SignalResult};
7use scirs2_core::simd_ops::SimdUnifiedOps;
8use std::f64::consts::PI;
9
10pub struct SimdWindowGenerator {
12 avx_available: bool,
14 sse_available: bool,
16 simd_chunk_size: usize,
18}
19
20impl Default for SimdWindowGenerator {
21 fn default() -> Self {
22 Self::new()
23 }
24}
25
26impl SimdWindowGenerator {
27 fn bessel_i0_approx(x: f64) -> f64 {
29 let ax = x.abs();
30 if ax < 3.75 {
31 let y = (x / 3.75).powi(2);
32 1.0 + y
33 * (3.5156229
34 + y * (3.0899424
35 + y * (1.2067492 + y * (0.2659732 + y * (0.360768e-1 + y * 0.45813e-2)))))
36 } else {
37 let y = 3.75 / ax;
38
39 (ax.exp() / ax.sqrt())
40 * (0.39894228
41 + y * (0.1328592e-1
42 + y * (0.225319e-2
43 + y * (-0.157565e-2
44 + y * (0.916281e-2
45 + y * (-0.2057706e-1
46 + y * (0.2635537e-1
47 + y * (-0.1647633e-1 + y * 0.392377e-2))))))))
48 }
49 }
50
51 pub fn new() -> Self {
53 let caps = scirs2_core::simd_ops::PlatformCapabilities::detect();
54
55 Self {
56 avx_available: caps.simd_available,
57 sse_available: caps.simd_available,
58 simd_chunk_size: if caps.simd_available { 8 } else { 1 },
59 }
60 }
61
62 pub fn hann_simd(&self, m: usize, sym: bool) -> SignalResult<Vec<f64>> {
71 if m <= 1 {
72 return Ok(vec![1.0; m]);
73 }
74
75 let (n, needs_trunc) = extend_window_length(m, sym);
76 let mut window = vec![0.0; n];
77
78 if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
79 self.hann_simd_kernel(&mut window)?;
80 } else {
81 self.hann_scalar_kernel(&mut window)?;
82 }
83
84 Ok(truncate_window(window, needs_trunc))
85 }
86
87 pub fn hamming_simd(&self, m: usize, sym: bool) -> SignalResult<Vec<f64>> {
89 if m <= 1 {
90 return Ok(vec![1.0; m]);
91 }
92
93 let (n, needs_trunc) = extend_window_length(m, sym);
94 let mut window = vec![0.0; n];
95
96 if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
97 self.hamming_simd_kernel(&mut window)?;
98 } else {
99 self.hamming_scalar_kernel(&mut window)?;
100 }
101
102 Ok(truncate_window(window, needs_trunc))
103 }
104
105 pub fn blackman_simd(&self, m: usize, sym: bool) -> SignalResult<Vec<f64>> {
107 if m <= 1 {
108 return Ok(vec![1.0; m]);
109 }
110
111 let (n, needs_trunc) = extend_window_length(m, sym);
112 let mut window = vec![0.0; n];
113
114 if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
115 self.blackman_simd_kernel(&mut window)?;
116 } else {
117 self.blackman_scalar_kernel(&mut window)?;
118 }
119
120 Ok(truncate_window(window, needs_trunc))
121 }
122
123 pub fn kaiser_simd(&self, m: usize, beta: f64, sym: bool) -> SignalResult<Vec<f64>> {
125 if m <= 1 {
126 return Ok(vec![1.0; m]);
127 }
128
129 let (n, needs_trunc) = extend_window_length(m, sym);
130 let mut window = vec![0.0; n];
131
132 if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
133 self.kaiser_simd_kernel(&mut window, beta)?;
134 } else {
135 self.kaiser_scalar_kernel(&mut window, beta)?;
136 }
137
138 Ok(truncate_window(window, needs_trunc))
139 }
140
141 pub fn gaussian_simd(&self, m: usize, std: f64, sym: bool) -> SignalResult<Vec<f64>> {
143 if std <= 0.0 {
144 return Err(SignalError::ValueError(
145 "Standard deviation must be positive".to_string(),
146 ));
147 }
148
149 if m <= 1 {
150 return Ok(vec![1.0; m]);
151 }
152
153 let (n, needs_trunc) = extend_window_length(m, sym);
154 let mut window = vec![0.0; n];
155
156 if self.can_use_simd() && n >= self.simd_chunk_size * 2 {
157 self.gaussian_simd_kernel(&mut window, std)?;
158 } else {
159 self.gaussian_scalar_kernel(&mut window, std)?;
160 }
161
162 Ok(truncate_window(window, needs_trunc))
163 }
164
165 fn can_use_simd(&self) -> bool {
167 self.avx_available || self.sse_available
168 }
169
170 fn hann_simd_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
173 let n = window.len();
174 let n_minus_1 = (n - 1) as f64;
175 let chunk_size = self.simd_chunk_size;
176
177 for chunk_start in (0..n).step_by(chunk_size) {
179 let chunk_end = (chunk_start + chunk_size).min(n);
180 let chunk = &mut window[chunk_start..chunk_end];
181
182 let indices: Vec<f64> = (chunk_start..chunk_end).map(|i| i as f64).collect();
184
185 let angles: Vec<f64> = indices.iter().map(|&i| 2.0 * PI * i / n_minus_1).collect();
187
188 let cos_values: Vec<f64> = angles.iter().map(|&x| x.cos()).collect();
190
191 for (i, &cos_val) in cos_values.iter().enumerate() {
193 if i < chunk.len() {
194 chunk[i] = 0.5 * (1.0 - cos_val);
195 }
196 }
197 }
198
199 Ok(())
200 }
201
202 fn hamming_simd_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
203 let n = window.len();
204 let n_minus_1 = (n - 1) as f64;
205 let chunk_size = self.simd_chunk_size;
206
207 for chunk_start in (0..n).step_by(chunk_size) {
208 let chunk_end = (chunk_start + chunk_size).min(n);
209 let chunk = &mut window[chunk_start..chunk_end];
210
211 let indices: Vec<f64> = (chunk_start..chunk_end).map(|i| i as f64).collect();
212
213 let angles: Vec<f64> = indices.iter().map(|&i| 2.0 * PI * i / n_minus_1).collect();
214
215 let cos_values: Vec<f64> = angles.iter().map(|&x| x.cos()).collect();
216
217 for (i, &cos_val) in cos_values.iter().enumerate() {
219 if i < chunk.len() {
220 chunk[i] = 0.54 - 0.46 * cos_val;
221 }
222 }
223 }
224
225 Ok(())
226 }
227
228 fn blackman_simd_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
229 let n = window.len();
230 let n_minus_1 = (n - 1) as f64;
231 let chunk_size = self.simd_chunk_size;
232
233 for chunk_start in (0..n).step_by(chunk_size) {
234 let chunk_end = (chunk_start + chunk_size).min(n);
235 let chunk = &mut window[chunk_start..chunk_end];
236
237 let indices: Vec<f64> = (chunk_start..chunk_end).map(|i| i as f64).collect();
238
239 let angles: Vec<f64> = indices.iter().map(|&i| 2.0 * PI * i / n_minus_1).collect();
240
241 let cos_values: Vec<f64> = angles.iter().map(|&x| x.cos()).collect();
242 let cos2_values: Vec<f64> = angles.iter().map(|&x| (2.0 * x).cos()).collect();
243
244 for i in 0..chunk.len() {
246 if i < cos_values.len() && i < cos2_values.len() {
247 chunk[i] = 0.42 - 0.5 * cos_values[i] + 0.08 * cos2_values[i];
248 }
249 }
250 }
251
252 Ok(())
253 }
254
255 fn kaiser_simd_kernel(&self, window: &mut [f64], beta: f64) -> SignalResult<()> {
256 let n = window.len();
257 let alpha = (n - 1) as f64 / 2.0;
258 let i0_beta = modified_bessel_i0_simd(beta);
259 let chunk_size = self.simd_chunk_size;
260
261 for chunk_start in (0..n).step_by(chunk_size) {
262 let chunk_end = (chunk_start + chunk_size).min(n);
263 let chunk = &mut window[chunk_start..chunk_end];
264
265 let positions: Vec<f64> = (chunk_start..chunk_end)
267 .map(|i| (i as f64 - alpha) / alpha)
268 .collect();
269
270 let bessel_args: Vec<f64> = positions
272 .iter()
273 .map(|&x| beta * (1.0 - x * x).max(0.0).sqrt())
274 .collect();
275
276 let bessel_values: Vec<f64> = bessel_args
278 .iter()
279 .map(|&x| Self::bessel_i0_approx(x))
280 .collect();
281
282 for (i, &bessel_val) in bessel_values.iter().enumerate() {
284 if i < chunk.len() {
285 chunk[i] = bessel_val / i0_beta;
286 }
287 }
288 }
289
290 Ok(())
291 }
292
293 fn gaussian_simd_kernel(&self, window: &mut [f64], std: f64) -> SignalResult<()> {
294 let n = window.len();
295 let center = (n - 1) as f64 / 2.0;
296 let chunk_size = self.simd_chunk_size;
297 let std_squared = std * std;
298
299 for chunk_start in (0..n).step_by(chunk_size) {
300 let chunk_end = (chunk_start + chunk_size).min(n);
301 let chunk = &mut window[chunk_start..chunk_end];
302
303 let distances: Vec<f64> = (chunk_start..chunk_end)
305 .map(|i| i as f64 - center)
306 .collect();
307
308 let exponents: Vec<f64> = distances
310 .iter()
311 .map(|&d| -(d * d) / (2.0 * std_squared))
312 .collect();
313
314 let exp_values: Vec<f64> = exponents.iter().map(|&x| x.exp()).collect();
316
317 for (i, &exp_val) in exp_values.iter().enumerate() {
318 if i < chunk.len() {
319 chunk[i] = exp_val;
320 }
321 }
322 }
323
324 Ok(())
325 }
326
327 fn hann_scalar_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
330 let n = window.len();
331 for i in 0..n {
332 let w_val = 0.5 * (1.0 - (2.0 * PI * i as f64 / (n - 1) as f64).cos());
333 window[i] = w_val;
334 }
335 Ok(())
336 }
337
338 fn hamming_scalar_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
339 let n = window.len();
340 for i in 0..n {
341 let w_val = 0.54 - 0.46 * (2.0 * PI * i as f64 / (n - 1) as f64).cos();
342 window[i] = w_val;
343 }
344 Ok(())
345 }
346
347 fn blackman_scalar_kernel(&self, window: &mut [f64]) -> SignalResult<()> {
348 let n = window.len();
349 for i in 0..n {
350 let angle = 2.0 * PI * i as f64 / (n - 1) as f64;
351 let w_val = 0.42 - 0.5 * angle.cos() + 0.08 * (2.0 * angle).cos();
352 window[i] = w_val;
353 }
354 Ok(())
355 }
356
357 fn kaiser_scalar_kernel(&self, window: &mut [f64], beta: f64) -> SignalResult<()> {
358 let n = window.len();
359 let alpha = (n - 1) as f64 / 2.0;
360 let i0_beta = modified_bessel_i0_simd(beta);
361
362 for i in 0..n {
363 let x = (i as f64 - alpha) / alpha;
364 let arg = beta * (1.0 - x * x).max(0.0).sqrt();
365 let w_val = modified_bessel_i0_simd(arg) / i0_beta;
366 window[i] = w_val;
367 }
368 Ok(())
369 }
370
371 fn gaussian_scalar_kernel(&self, window: &mut [f64], std: f64) -> SignalResult<()> {
372 let n = window.len();
373 let center = (n - 1) as f64 / 2.0;
374
375 for i in 0..n {
376 let distance = i as f64 - center;
377 let w_val = (-(distance * distance) / (2.0 * std * std)).exp();
378 window[i] = w_val;
379 }
380 Ok(())
381 }
382}
383
384fn modified_bessel_i0_simd(x: f64) -> f64 {
386 let t = x / 3.75;
387 if x.abs() < 3.75 {
388 let y = t * t;
389 1.0 + y
390 * (3.515623
391 + y * (3.089943 + y * (1.20675 + y * (0.265973 + y * (0.0360768 + y * 0.0045813)))))
392 } else {
393 let y = 1.0 / t;
394 (x.abs().exp() / x.abs().sqrt())
395 * (0.39894228
396 + y * (0.01328592
397 + y * (0.00225319
398 + y * (-0.00157565
399 + y * (0.00916281
400 + y * (-0.02057706
401 + y * (0.02635537 + y * (-0.01647633 + y * 0.00392377))))))))
402 }
403}
404
405pub fn batch_generate_windows(
417 window_type: BatchWindowType,
418 lengths: &[usize],
419 sym: bool,
420) -> SignalResult<Vec<Vec<f64>>> {
421 let generator = SimdWindowGenerator::new();
422 let mut windows = Vec::with_capacity(lengths.len());
423
424 for &length in lengths {
425 let window = match window_type {
426 BatchWindowType::Hann => generator.hann_simd(length, sym)?,
427 BatchWindowType::Hamming => generator.hamming_simd(length, sym)?,
428 BatchWindowType::Blackman => generator.blackman_simd(length, sym)?,
429 BatchWindowType::Kaiser(beta) => generator.kaiser_simd(length, beta, sym)?,
430 BatchWindowType::Gaussian(std) => generator.gaussian_simd(length, std, sym)?,
431 };
432 windows.push(window);
433 }
434
435 Ok(windows)
436}
437
438#[derive(Debug, Clone)]
440pub enum BatchWindowType {
441 Hann,
442 Hamming,
443 Blackman,
444 Kaiser(f64),
445 Gaussian(f64),
446}
447
448pub fn benchmark_simd_performance(
450 window_type: BatchWindowType,
451 lengths: &[usize],
452 iterations: usize,
453) -> SignalResult<SIMDPerformanceResults> {
454 use std::time::Instant;
455
456 let generator = SimdWindowGenerator::new();
457
458 let start = Instant::now();
460 for _ in 0..iterations {
461 for &length in lengths {
462 let _window = match window_type {
463 BatchWindowType::Hann => generator.hann_simd(length, true)?,
464 BatchWindowType::Hamming => generator.hamming_simd(length, true)?,
465 BatchWindowType::Blackman => generator.blackman_simd(length, true)?,
466 BatchWindowType::Kaiser(beta) => generator.kaiser_simd(length, beta, true)?,
467 BatchWindowType::Gaussian(std) => generator.gaussian_simd(length, std, true)?,
468 };
469 }
470 }
471 let simd_duration = start.elapsed();
472
473 let scalar_generator = SimdWindowGenerator {
475 avx_available: false,
476 sse_available: false,
477 simd_chunk_size: 1,
478 };
479
480 let start = Instant::now();
482 for _ in 0..iterations {
483 for &length in lengths {
484 let _window = match window_type {
485 BatchWindowType::Hann => scalar_generator.hann_simd(length, true)?,
486 BatchWindowType::Hamming => scalar_generator.hamming_simd(length, true)?,
487 BatchWindowType::Blackman => scalar_generator.blackman_simd(length, true)?,
488 BatchWindowType::Kaiser(beta) => {
489 scalar_generator.kaiser_simd(length, beta, true)?
490 }
491 BatchWindowType::Gaussian(std) => {
492 scalar_generator.gaussian_simd(length, std, true)?
493 }
494 };
495 }
496 }
497 let scalar_duration = start.elapsed();
498
499 let speedup = scalar_duration.as_secs_f64() / simd_duration.as_secs_f64();
500
501 Ok(SIMDPerformanceResults {
502 simd_duration,
503 scalar_duration,
504 speedup,
505 simd_available: generator.can_use_simd(),
506 avx_available: generator.avx_available,
507 sse_available: generator.sse_available,
508 })
509}
510
511#[derive(Debug)]
513pub struct SIMDPerformanceResults {
514 pub simd_duration: std::time::Duration,
515 pub scalar_duration: std::time::Duration,
516 pub speedup: f64,
517 pub simd_available: bool,
518 pub avx_available: bool,
519 pub sse_available: bool,
520}
521
522fn extend_window_length(m: usize, sym: bool) -> (usize, bool) {
525 if sym {
526 (m, false)
527 } else {
528 (m + 1, true)
529 }
530}
531
532fn truncate_window(mut window: Vec<f64>, needs_trunc: bool) -> Vec<f64> {
533 if needs_trunc && !window.is_empty() {
534 window.pop();
535 }
536 window
537}
538
539#[cfg(test)]
540mod tests {
541 use super::*;
542
543 #[test]
544 fn test_simd_window_generator() {
545 let generator = SimdWindowGenerator::new();
546
547 let hann = generator.hann_simd(64, true).expect("Operation failed");
549 assert_eq!(hann.len(), 64);
550 assert!((hann[0] - 0.0).abs() < 1e-10);
551 assert!((hann[hann.len() - 1] - 0.0).abs() < 1e-10);
552
553 let hamming = generator.hamming_simd(64, true).expect("Operation failed");
555 assert_eq!(hamming.len(), 64);
556 assert!(hamming[0] > 0.0); let blackman = generator.blackman_simd(64, true).expect("Operation failed");
560 assert_eq!(blackman.len(), 64);
561
562 let kaiser = generator
564 .kaiser_simd(64, 5.0, true)
565 .expect("Operation failed");
566 assert_eq!(kaiser.len(), 64);
567
568 let gaussian = generator
570 .gaussian_simd(64, 1.0, true)
571 .expect("Operation failed");
572 assert_eq!(gaussian.len(), 64);
573 }
574
575 #[test]
576 fn test_batch_generation() {
577 let lengths = vec![32, 64, 128, 256];
578 let windows = batch_generate_windows(BatchWindowType::Hann, &lengths, true)
579 .expect("Operation failed");
580
581 assert_eq!(windows.len(), lengths.len());
582 for (i, window) in windows.iter().enumerate() {
583 assert_eq!(window.len(), lengths[i]);
584 }
585 }
586
587 #[test]
588 fn test_simd_vs_scalar_consistency() {
589 let generator = SimdWindowGenerator::new();
590 let scalar_generator = SimdWindowGenerator {
591 avx_available: false,
592 sse_available: false,
593 simd_chunk_size: 1,
594 };
595
596 let length = 64;
597
598 let simd_hann = generator.hann_simd(length, true).expect("Operation failed");
600 let scalar_hann = scalar_generator
601 .hann_simd(length, true)
602 .expect("Operation failed");
603
604 for (simd_val, scalar_val) in simd_hann.iter().zip(scalar_hann.iter()) {
605 assert!((simd_val - scalar_val).abs() < 1e-10);
606 }
607 }
608
609 #[test]
610 fn test_performance_benchmark() {
611 let lengths = vec![64, 128];
612 let result = benchmark_simd_performance(
613 BatchWindowType::Hann,
614 &lengths,
615 10, )
617 .expect("Operation failed");
618
619 assert!(result.simd_duration.as_nanos() > 0);
620 assert!(result.scalar_duration.as_nanos() > 0);
621 assert!(result.speedup > 0.0);
622 }
623
624 #[test]
625 fn test_edge_cases() {
626 let generator = SimdWindowGenerator::new();
627
628 let small = generator.hann_simd(1, true).expect("Operation failed");
630 assert_eq!(small, vec![1.0]);
631
632 let small2 = generator.hann_simd(2, true).expect("Operation failed");
633 assert_eq!(small2.len(), 2);
634 }
635}