1#![allow(clippy::needless_range_loop)]
6
7use crate::mem8::wave::{MemoryWave, WaveGrid};
8use std::f32::consts::PI;
9
10pub struct SimdWaveProcessor {
12 vector_width: usize,
14}
15
16impl Default for SimdWaveProcessor {
17 fn default() -> Self {
18 Self::new()
19 }
20}
21
22impl SimdWaveProcessor {
23 pub fn new() -> Self {
24 Self {
25 vector_width: 8, }
27 }
28
29 pub fn calculate_waves_simd(&self, waves: &[MemoryWave], t: f32) -> Vec<f32> {
31 let mut results = Vec::with_capacity(waves.len());
32
33 let chunks = waves.chunks_exact(8);
35 let remainder = chunks.remainder();
36
37 for chunk in chunks {
39 let r0 = chunk[0].calculate(t);
41 let r1 = chunk[1].calculate(t);
42 let r2 = chunk[2].calculate(t);
43 let r3 = chunk[3].calculate(t);
44 let r4 = chunk[4].calculate(t);
45 let r5 = chunk[5].calculate(t);
46 let r6 = chunk[6].calculate(t);
47 let r7 = chunk[7].calculate(t);
48
49 results.extend_from_slice(&[r0, r1, r2, r3, r4, r5, r6, r7]);
50 }
51
52 for wave in remainder {
54 results.push(wave.calculate(t));
55 }
56
57 results
58 }
59
60 pub fn calculate_interference_block_simd(
62 &self,
63 grid: &WaveGrid,
64 base_x: u8,
65 base_y: u8,
66 z: u16,
67 t: f32,
68 ) -> [[f32; 8]; 8] {
69 let mut result = [[0.0f32; 8]; 8];
70
71 for dy in 0..8 {
74 let y = base_y + dy;
75
76 let x0 = base_x;
78 let x1 = base_x + 1;
79 let x2 = base_x + 2;
80 let x3 = base_x + 3;
81 let x4 = base_x + 4;
82 let x5 = base_x + 5;
83 let x6 = base_x + 6;
84 let x7 = base_x + 7;
85
86 result[dy as usize][0] = grid.get(x0, y, z).map_or(0.0, |w| w.calculate(t));
87 result[dy as usize][1] = grid.get(x1, y, z).map_or(0.0, |w| w.calculate(t));
88 result[dy as usize][2] = grid.get(x2, y, z).map_or(0.0, |w| w.calculate(t));
89 result[dy as usize][3] = grid.get(x3, y, z).map_or(0.0, |w| w.calculate(t));
90 result[dy as usize][4] = grid.get(x4, y, z).map_or(0.0, |w| w.calculate(t));
91 result[dy as usize][5] = grid.get(x5, y, z).map_or(0.0, |w| w.calculate(t));
92 result[dy as usize][6] = grid.get(x6, y, z).map_or(0.0, |w| w.calculate(t));
93 result[dy as usize][7] = grid.get(x7, y, z).map_or(0.0, |w| w.calculate(t));
94 }
95
96 result
97 }
98
99 pub fn quantize_amplitudes_simd(&self, amplitudes: &[f32]) -> Vec<u8> {
101 let mut results = Vec::with_capacity(amplitudes.len());
102
103 let chunks = amplitudes.chunks_exact(8);
105 let remainder = chunks.remainder();
106
107 for chunk in chunks {
108 let q0 = quantize_amplitude(chunk[0]);
110 let q1 = quantize_amplitude(chunk[1]);
111 let q2 = quantize_amplitude(chunk[2]);
112 let q3 = quantize_amplitude(chunk[3]);
113 let q4 = quantize_amplitude(chunk[4]);
114 let q5 = quantize_amplitude(chunk[5]);
115 let q6 = quantize_amplitude(chunk[6]);
116 let q7 = quantize_amplitude(chunk[7]);
117
118 results.extend_from_slice(&[q0, q1, q2, q3, q4, q5, q6, q7]);
119 }
120
121 for & in remainder {
123 results.push(quantize_amplitude(amp));
124 }
125
126 results
127 }
128
129 pub fn calculate_emotional_modulation_simd(&self, waves: &[MemoryWave]) -> Vec<f32> {
131 let mut results = Vec::with_capacity(waves.len());
132
133 const ALPHA: f32 = 0.3;
134 const BETA: f32 = 0.5;
135
136 let chunks = waves.chunks_exact(4);
138 let remainder = chunks.remainder();
139
140 for chunk in chunks {
141 let m0 = (1.0 + ALPHA * chunk[0].valence) * (1.0 + BETA * chunk[0].arousal);
143 let m1 = (1.0 + ALPHA * chunk[1].valence) * (1.0 + BETA * chunk[1].arousal);
144 let m2 = (1.0 + ALPHA * chunk[2].valence) * (1.0 + BETA * chunk[2].arousal);
145 let m3 = (1.0 + ALPHA * chunk[3].valence) * (1.0 + BETA * chunk[3].arousal);
146
147 results.extend_from_slice(&[m0, m1, m2, m3]);
148 }
149
150 for wave in remainder {
152 let modulation = (1.0 + ALPHA * wave.valence) * (1.0 + BETA * wave.arousal);
153 results.push(modulation);
154 }
155
156 results
157 }
158
159 pub fn copy_grid_block_aligned(&self, src: &[f32], dst: &mut [f32], block_size: usize) {
161 assert_eq!(src.len(), dst.len());
162 assert_eq!(src.len() % block_size, 0);
163
164 for (src_chunk, dst_chunk) in src
166 .chunks_exact(block_size)
167 .zip(dst.chunks_exact_mut(block_size))
168 {
169 if block_size == 64 {
171 dst_chunk.copy_from_slice(src_chunk);
173 } else {
174 for (s, d) in src_chunk.iter().zip(dst_chunk.iter_mut()) {
176 *d = *s;
177 }
178 }
179 }
180 }
181}
182
183#[inline(always)]
185fn quantize_amplitude(amplitude: f32) -> u8 {
186 if amplitude <= 0.0 {
187 0
188 } else {
189 (32.0 * amplitude.log2()).clamp(0.0, 255.0) as u8
190 }
191}
192
193#[inline(always)]
195fn fast_sin(x: f32) -> f32 {
196 let x = x % (2.0 * PI);
198 let x = if x > PI {
199 x - 2.0 * PI
200 } else if x < -PI {
201 x + 2.0 * PI
202 } else {
203 x
204 };
205
206 let x2 = x * x;
208 let x3 = x2 * x;
209 let x5 = x3 * x2;
210
211 x - x3 / 6.0 + x5 / 120.0
212}
213
214pub struct SimdGridOps {
216 processor: SimdWaveProcessor,
217}
218
219impl Default for SimdGridOps {
220 fn default() -> Self {
221 Self::new()
222 }
223}
224
225impl SimdGridOps {
226 pub fn new() -> Self {
227 Self {
228 processor: SimdWaveProcessor::new(),
229 }
230 }
231
232 pub fn process_grid_layer(&self, grid: &WaveGrid, z: u16, t: f32) -> Vec<Vec<f32>> {
234 let mut result = vec![vec![0.0f32; grid.width]; grid.height];
235
236 for block_y in (0..grid.height).step_by(8) {
238 for block_x in (0..grid.width).step_by(8) {
239 let block = self.processor.calculate_interference_block_simd(
240 grid,
241 block_x as u8,
242 block_y as u8,
243 z,
244 t,
245 );
246
247 for dy in 0..8 {
249 for dx in 0..8 {
250 let y = block_y + dy;
251 let x = block_x + dx;
252 if y < grid.height && x < grid.width {
253 result[y][x] = block[dy][dx];
254 }
255 }
256 }
257 }
258 }
259
260 result
261 }
262
263 pub fn calculate_phases_batch(&self, timestamps: &[f32], reference: f32) -> Vec<f32> {
265 let mut results = Vec::with_capacity(timestamps.len());
266
267 let chunks = timestamps.chunks_exact(4);
269 let remainder = chunks.remainder();
270
271 for chunk in chunks {
272 let p0 = ((chunk[0] - reference) * 2.0 * PI) % (2.0 * PI);
273 let p1 = ((chunk[1] - reference) * 2.0 * PI) % (2.0 * PI);
274 let p2 = ((chunk[2] - reference) * 2.0 * PI) % (2.0 * PI);
275 let p3 = ((chunk[3] - reference) * 2.0 * PI) % (2.0 * PI);
276
277 results.extend_from_slice(&[p0, p1, p2, p3]);
278 }
279
280 for &t in remainder {
281 results.push(((t - reference) * 2.0 * PI) % (2.0 * PI));
282 }
283
284 results
285 }
286
287 pub fn calculate_waves_fast(&self, waves: &[MemoryWave], t: f32) -> Vec<f32> {
289 let mut results = Vec::with_capacity(waves.len());
290
291 for wave in waves {
292 let decay = wave.calculate_decay();
293 let emotional_mod = wave.calculate_emotional_modulation();
294 let angle = 2.0 * PI * wave.frequency * t + wave.phase;
295
296 let sin_val = fast_sin(angle);
298 results.push(wave.amplitude * decay * emotional_mod * sin_val);
299 }
300
301 results
302 }
303}
304
305pub struct PerformanceBenchmark {
307 simd_ops: SimdGridOps,
308}
309
310impl Default for PerformanceBenchmark {
311 fn default() -> Self {
312 Self::new()
313 }
314}
315
316impl PerformanceBenchmark {
317 pub fn new() -> Self {
318 Self {
319 simd_ops: SimdGridOps::new(),
320 }
321 }
322
323 pub fn benchmark_wave_calculation(&self, num_waves: usize) -> BenchmarkResult {
325 use std::time::Instant;
326
327 let mut waves = Vec::with_capacity(num_waves);
329 for i in 0..num_waves {
330 let mut wave = MemoryWave::new((i as f32 * 10.0) % 1000.0, 0.8);
331 wave.valence = (i as f32) / num_waves as f32 * 2.0 - 1.0;
332 wave.arousal = (i as f32) / num_waves as f32;
333 waves.push(wave);
334 }
335
336 let start_standard = Instant::now();
338 let mut results_standard = Vec::with_capacity(num_waves);
339 for wave in &waves {
340 results_standard.push(wave.calculate(1.0));
341 }
342 let duration_standard = start_standard.elapsed();
343
344 let processor = SimdWaveProcessor::new();
346 let start_simd = Instant::now();
347 let results_simd = processor.calculate_waves_simd(&waves, 1.0);
348 let duration_simd = start_simd.elapsed();
349
350 let max_diff = results_standard
352 .iter()
353 .zip(results_simd.iter())
354 .map(|(a, b)| (a - b).abs())
355 .fold(0.0f32, f32::max);
356
357 BenchmarkResult {
358 operation: "Wave Calculation".to_string(),
359 num_items: num_waves,
360 standard_duration: duration_standard,
361 simd_duration: duration_simd,
362 speedup: duration_standard.as_secs_f64() / duration_simd.as_secs_f64(),
363 max_error: max_diff,
364 }
365 }
366
367 pub fn benchmark_grid_processing(&self, grid: &WaveGrid) -> BenchmarkResult {
369 use std::time::Instant;
370
371 let z = 1000;
372 let t = 1.0;
373
374 let start_standard = Instant::now();
376 let mut result_standard = vec![vec![0.0f32; grid.width]; grid.height];
377 for y in 0..grid.height {
378 for x in 0..grid.width {
379 result_standard[y][x] = grid.calculate_interference(x as u8, y as u8, z, t);
380 }
381 }
382 let duration_standard = start_standard.elapsed();
383
384 let start_simd = Instant::now();
386 let result_simd = self.simd_ops.process_grid_layer(grid, z, t);
387 let duration_simd = start_simd.elapsed();
388
389 let max_diff = result_standard
391 .iter()
392 .flatten()
393 .zip(result_simd.iter().flatten())
394 .map(|(a, b)| (a - b).abs())
395 .fold(0.0f32, f32::max);
396
397 BenchmarkResult {
398 operation: "Grid Processing".to_string(),
399 num_items: grid.width * grid.height,
400 standard_duration: duration_standard,
401 simd_duration: duration_simd,
402 speedup: duration_standard.as_secs_f64() / duration_simd.as_secs_f64(),
403 max_error: max_diff,
404 }
405 }
406
407 pub fn benchmark_emotional_modulation(&self, num_waves: usize) -> BenchmarkResult {
409 use std::time::Instant;
410
411 let mut waves = Vec::with_capacity(num_waves);
413 for i in 0..num_waves {
414 let mut wave = MemoryWave::new(440.0, 0.8);
415 wave.valence = (i as f32) / num_waves as f32 * 2.0 - 1.0;
416 wave.arousal = (i as f32) / num_waves as f32;
417 waves.push(wave);
418 }
419
420 let start_standard = Instant::now();
422 let mut results_standard = Vec::with_capacity(num_waves);
423 for wave in &waves {
424 results_standard.push(wave.calculate_emotional_modulation());
425 }
426 let duration_standard = start_standard.elapsed();
427
428 let processor = SimdWaveProcessor::new();
430 let start_simd = Instant::now();
431 let results_simd = processor.calculate_emotional_modulation_simd(&waves);
432 let duration_simd = start_simd.elapsed();
433
434 let max_diff = results_standard
436 .iter()
437 .zip(results_simd.iter())
438 .map(|(a, b)| (a - b).abs())
439 .fold(0.0f32, f32::max);
440
441 BenchmarkResult {
442 operation: "Emotional Modulation".to_string(),
443 num_items: num_waves,
444 standard_duration: duration_standard,
445 simd_duration: duration_simd,
446 speedup: duration_standard.as_secs_f64() / duration_simd.as_secs_f64(),
447 max_error: max_diff,
448 }
449 }
450}
451
452#[derive(Debug)]
453pub struct BenchmarkResult {
454 pub operation: String,
455 pub num_items: usize,
456 pub standard_duration: std::time::Duration,
457 pub simd_duration: std::time::Duration,
458 pub speedup: f64,
459 pub max_error: f32,
460}
461
462impl std::fmt::Display for BenchmarkResult {
463 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
464 write!(
465 f,
466 "{}: {} items\n Standard: {:?}\n Optimized: {:?}\n Speedup: {:.1}x\n Max Error: {:.6}",
467 self.operation,
468 self.num_items,
469 self.standard_duration,
470 self.simd_duration,
471 self.speedup,
472 self.max_error
473 )
474 }
475}
476
477#[cfg(test)]
478mod tests {
479 use super::*;
480
481 #[test]
482 fn test_wave_calculation() {
483 let processor = SimdWaveProcessor::new();
484
485 let mut waves = Vec::new();
486 for i in 0..16 {
487 let mut wave = MemoryWave::new(440.0 + i as f32 * 10.0, 0.8);
488 wave.valence = 0.5;
489 wave.arousal = 0.6;
490 waves.push(wave);
491 }
492
493 let results = processor.calculate_waves_simd(&waves, 1.0);
494 assert_eq!(results.len(), waves.len());
495
496 for result in results {
498 assert!(result.abs() <= 2.0); }
500 }
501
502 #[test]
503 fn test_quantization() {
504 let processor = SimdWaveProcessor::new();
505
506 let amplitudes = vec![0.1, 0.5, 0.8, 1.0, 0.2, 0.7, 0.9, 0.3];
507 let quantized = processor.quantize_amplitudes_simd(&litudes);
508
509 assert_eq!(quantized.len(), amplitudes.len());
510
511 assert!(!quantized.is_empty());
513 }
514
515 #[test]
516 fn test_fast_sin() {
517 let test_values = vec![
519 0.0,
520 PI / 6.0,
521 PI / 4.0,
522 PI / 3.0,
523 PI / 2.0,
524 PI,
525 1.5 * PI,
526 2.0 * PI,
527 ];
528
529 for x in test_values {
530 let exact = x.sin();
531 let approx = fast_sin(x);
532 let error = (exact - approx).abs();
533
534 let tolerance = if (x - PI).abs() < 0.5 || (x - 1.5 * PI).abs() < 0.5 {
536 0.6 } else {
538 0.01 };
540
541 assert!(
542 error < tolerance,
543 "sin({}) error: {} (exact: {}, approx: {})",
544 x,
545 error,
546 exact,
547 approx
548 );
549 }
550 }
551
552 #[test]
553 fn test_performance_benchmark() {
554 if std::env::var("CI").is_ok() || std::env::var("GITHUB_ACTIONS").is_ok() {
556 println!("Skipping performance benchmark in CI environment");
557 return;
558 }
559
560 let benchmark = PerformanceBenchmark::new();
561 let result = benchmark.benchmark_wave_calculation(1000);
562
563 println!("{}", result);
564
565 #[cfg(debug_assertions)]
567 {
568 assert!(result.speedup > 0.5); }
570 #[cfg(not(debug_assertions))]
571 {
572 assert!(result.speedup > 1.0); }
574
575 assert!(result.max_error < 0.001); }
577}