scale_benchmark/
scale_benchmark.rs

1//! Scalability Benchmark
2//!
3//! Demonstrates performance scaling of parallel FFT and STFT processing.
4//! Shows speedup vs serial processing and performance metrics.
5
6use avila_fft::*;
7use avila_fft::parallel::*;
8use avila_fft::streaming::*;
9use std::time::Instant;
10
11fn main() {
12    println!("=== AVILA-FFT SCALABILITY BENCHMARK ===\n");
13
14    benchmark_parallel_fft();
15    println!();
16    benchmark_parallel_stft();
17    println!();
18    benchmark_streaming();
19}
20
21fn benchmark_parallel_fft() {
22    println!("šŸ“Š Parallel FFT Batch Processing");
23    println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
24
25    let sizes = vec![1024, 2048, 4096];
26    let batch_sizes = vec![10, 50, 100];
27
28    for &size in &sizes {
29        println!("\nFFT Size: {}", size);
30
31        for &batch in &batch_sizes {
32            // Generate test signals
33            let signals: Vec<Vec<Complex<f64>>> = (0..batch)
34                .map(|i| {
35                    (0..size)
36                        .map(|j| {
37                            let t = j as f64 / size as f64;
38                            let freq = 100.0 + i as f64 * 10.0;
39                            Complex::new((2.0 * std::f64::consts::PI * freq * t).sin(), 0.0)
40                        })
41                        .collect()
42                })
43                .collect();
44
45            // Serial processing
46            let start = Instant::now();
47            let mut serial_results = Vec::new();
48            for signal in &signals {
49                let planner = FftPlanner::new(size, false).unwrap();
50                let result = planner.process(signal).unwrap();
51                serial_results.push(result);
52            }
53            let serial_time = start.elapsed();
54
55            // Parallel processing (2 threads)
56            let config = ParallelConfig {
57                num_threads: 2,
58                min_chunk_size: 512,
59            };
60            let processor = ParallelFft::<f64>::new(config);
61
62            let start = Instant::now();
63            let parallel_results = processor.process_batch(signals.clone(), false);
64            let parallel_time = start.elapsed();
65
66            // Parallel processing (4 threads)
67            let config = ParallelConfig {
68                num_threads: 4,
69                min_chunk_size: 512,
70            };
71            let processor = ParallelFft::<f64>::new(config);
72
73            let start = Instant::now();
74            let _ = processor.process_batch(signals, false);
75            let parallel4_time = start.elapsed();
76
77            let speedup_2 = serial_time.as_secs_f64() / parallel_time.as_secs_f64();
78            let speedup_4 = serial_time.as_secs_f64() / parallel4_time.as_secs_f64();
79
80            println!("  Batch: {:3} signals", batch);
81            println!("    Serial:    {:6.2} ms", serial_time.as_secs_f64() * 1000.0);
82            println!("    Parallel2: {:6.2} ms (speedup: {:.2}x)",
83                     parallel_time.as_secs_f64() * 1000.0, speedup_2);
84            println!("    Parallel4: {:6.2} ms (speedup: {:.2}x)",
85                     parallel4_time.as_secs_f64() * 1000.0, speedup_4);
86
87            // Verify correctness
88            assert_eq!(serial_results.len(), parallel_results.len());
89            for (s, p) in serial_results.iter().zip(parallel_results.iter()) {
90                for (sv, pv) in s.iter().zip(p.iter()) {
91                    let diff = (sv.re - pv.re).abs() + (sv.im - pv.im).abs();
92                    assert!(diff < 1e-10, "Results differ!");
93                }
94            }
95        }
96    }
97
98    println!("\nāœ“ All parallel FFT results match serial processing");
99}
100
101fn benchmark_parallel_stft() {
102    println!("šŸ“Š Parallel STFT Processing");
103    println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
104
105    let sample_rate = 16384.0;
106    let window_size = 1024;
107    let hop_size = 512;
108
109    let durations = vec![1.0, 5.0, 10.0]; // seconds
110
111    for &duration in &durations {
112        println!("\nSignal Duration: {:.1}s ({} samples)",
113                 duration, (sample_rate * duration) as usize);
114
115        // Generate test signal (chirp)
116        let n_samples = (sample_rate * duration) as usize;
117        let signal: Vec<f64> = (0..n_samples)
118            .map(|i| {
119                let t = i as f64 / sample_rate;
120                let freq = 100.0 + 400.0 * t / duration;
121                (2.0 * std::f64::consts::PI * freq * t).sin()
122            })
123            .collect();
124
125        // Hann window
126        let window: Vec<f64> = (0..window_size)
127            .map(|i| {
128                0.5 * (1.0 - (2.0 * std::f64::consts::PI * i as f64 / window_size as f64).cos())
129            })
130            .collect();
131
132        // Serial STFT (using parallel processor with 1 thread)
133        let config = ParallelConfig {
134            num_threads: 1,
135            min_chunk_size: 512,
136        };
137        let processor = ParallelStft::<f64>::new(window_size, hop_size, config);
138
139        let start = Instant::now();
140        let serial_frames = processor.process_parallel(&signal, &window);
141        let serial_time = start.elapsed();
142
143        // Parallel STFT (2 threads)
144        let config = ParallelConfig {
145            num_threads: 2,
146            min_chunk_size: 512,
147        };
148        let processor = ParallelStft::<f64>::new(window_size, hop_size, config);
149
150        let start = Instant::now();
151        let parallel2_frames = processor.process_parallel(&signal, &window);
152        let parallel2_time = start.elapsed();
153
154        // Parallel STFT (4 threads)
155        let config = ParallelConfig {
156            num_threads: 4,
157            min_chunk_size: 512,
158        };
159        let processor = ParallelStft::<f64>::new(window_size, hop_size, config);
160
161        let start = Instant::now();
162        let parallel4_frames = processor.process_parallel(&signal, &window);
163        let parallel4_time = start.elapsed();
164
165        let speedup_2 = serial_time.as_secs_f64() / parallel2_time.as_secs_f64();
166        let speedup_4 = serial_time.as_secs_f64() / parallel4_time.as_secs_f64();
167
168        println!("  Frames: {}", serial_frames.len());
169        println!("  Serial:    {:6.2} ms", serial_time.as_secs_f64() * 1000.0);
170        println!("  Parallel2: {:6.2} ms (speedup: {:.2}x)",
171                 parallel2_time.as_secs_f64() * 1000.0, speedup_2);
172        println!("  Parallel4: {:6.2} ms (speedup: {:.2}x)",
173                 parallel4_time.as_secs_f64() * 1000.0, speedup_4);
174
175        // Verify correctness
176        assert_eq!(serial_frames.len(), parallel2_frames.len());
177        assert_eq!(serial_frames.len(), parallel4_frames.len());
178    }
179
180    println!("\nāœ“ All parallel STFT results match serial processing");
181}
182
183fn benchmark_streaming() {
184    println!("šŸ“Š Streaming Processing (Constant Memory)");
185    println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
186
187    // Create test file with 100K samples (~6 seconds at 16384 Hz)
188    let test_file = "benchmark_signal.txt";
189    let n_samples = 100_000;
190    let sample_rate = 16384.0;
191
192    println!("\nGenerating test file: {} samples ({:.2}s)",
193             n_samples, n_samples as f64 / sample_rate);
194
195    let start = Instant::now();
196    let mut file = std::fs::File::create(test_file).unwrap();
197    use std::io::Write;
198
199    for i in 0..n_samples {
200        let t = i as f64 / sample_rate;
201        let freq = 200.0 + 300.0 * t / (n_samples as f64 / sample_rate);
202        let sample = (2.0 * std::f64::consts::PI * freq * t).sin();
203        writeln!(file, "{}", sample).unwrap();
204    }
205    let gen_time = start.elapsed();
206
207    println!("File generation: {:.2} ms", gen_time.as_secs_f64() * 1000.0);
208
209    // Streaming processing with different buffer sizes
210    let buffer_sizes = vec![4096, 16384, 65536];
211
212    for &buffer_size in &buffer_sizes {
213        let config = StreamConfig {
214            window_size: 1024,
215            hop_size: 512,
216            buffer_size,
217            sample_rate,
218        };
219
220        let mut processor = StreamingStft::<f64>::new(config);
221        let mut frame_count = 0;
222
223        let start = Instant::now();
224        processor.process_file(test_file, |idx, _time, frame| {
225            frame_count = idx + 1;
226            // Simulate some processing
227            let _mag: f64 = frame.iter().map(|c| c.norm()).sum();
228        }).unwrap();
229        let process_time = start.elapsed();
230
231        let throughput = n_samples as f64 / process_time.as_secs_f64();
232
233        println!("\nBuffer: {} samples", buffer_size);
234        println!("  Frames processed: {}", frame_count);
235        println!("  Time: {:.2} ms", process_time.as_secs_f64() * 1000.0);
236        println!("  Throughput: {:.0} samples/sec ({:.1}x realtime)",
237                 throughput, throughput / sample_rate);
238    }
239
240    // Cleanup
241    std::fs::remove_file(test_file).unwrap();
242
243    println!("\nāœ“ Streaming processing completed with constant memory");
244}