scale_benchmark/
scale_benchmark.rs1use avila_fft::*;
7use avila_fft::parallel::*;
8use avila_fft::streaming::*;
9use std::time::Instant;
10
11fn main() {
12 println!("=== AVILA-FFT SCALABILITY BENCHMARK ===\n");
13
14 benchmark_parallel_fft();
15 println!();
16 benchmark_parallel_stft();
17 println!();
18 benchmark_streaming();
19}
20
21fn benchmark_parallel_fft() {
22 println!("š Parallel FFT Batch Processing");
23 println!("āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā");
24
25 let sizes = vec![1024, 2048, 4096];
26 let batch_sizes = vec![10, 50, 100];
27
28 for &size in &sizes {
29 println!("\nFFT Size: {}", size);
30
31 for &batch in &batch_sizes {
32 let signals: Vec<Vec<Complex<f64>>> = (0..batch)
34 .map(|i| {
35 (0..size)
36 .map(|j| {
37 let t = j as f64 / size as f64;
38 let freq = 100.0 + i as f64 * 10.0;
39 Complex::new((2.0 * std::f64::consts::PI * freq * t).sin(), 0.0)
40 })
41 .collect()
42 })
43 .collect();
44
45 let start = Instant::now();
47 let mut serial_results = Vec::new();
48 for signal in &signals {
49 let planner = FftPlanner::new(size, false).unwrap();
50 let result = planner.process(signal).unwrap();
51 serial_results.push(result);
52 }
53 let serial_time = start.elapsed();
54
55 let config = ParallelConfig {
57 num_threads: 2,
58 min_chunk_size: 512,
59 };
60 let processor = ParallelFft::<f64>::new(config);
61
62 let start = Instant::now();
63 let parallel_results = processor.process_batch(signals.clone(), false);
64 let parallel_time = start.elapsed();
65
66 let config = ParallelConfig {
68 num_threads: 4,
69 min_chunk_size: 512,
70 };
71 let processor = ParallelFft::<f64>::new(config);
72
73 let start = Instant::now();
74 let _ = processor.process_batch(signals, false);
75 let parallel4_time = start.elapsed();
76
77 let speedup_2 = serial_time.as_secs_f64() / parallel_time.as_secs_f64();
78 let speedup_4 = serial_time.as_secs_f64() / parallel4_time.as_secs_f64();
79
80 println!(" Batch: {:3} signals", batch);
81 println!(" Serial: {:6.2} ms", serial_time.as_secs_f64() * 1000.0);
82 println!(" Parallel2: {:6.2} ms (speedup: {:.2}x)",
83 parallel_time.as_secs_f64() * 1000.0, speedup_2);
84 println!(" Parallel4: {:6.2} ms (speedup: {:.2}x)",
85 parallel4_time.as_secs_f64() * 1000.0, speedup_4);
86
87 assert_eq!(serial_results.len(), parallel_results.len());
89 for (s, p) in serial_results.iter().zip(parallel_results.iter()) {
90 for (sv, pv) in s.iter().zip(p.iter()) {
91 let diff = (sv.re - pv.re).abs() + (sv.im - pv.im).abs();
92 assert!(diff < 1e-10, "Results differ!");
93 }
94 }
95 }
96 }
97
98 println!("\nā All parallel FFT results match serial processing");
99}
100
101fn benchmark_parallel_stft() {
102 println!("š Parallel STFT Processing");
103 println!("āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā");
104
105 let sample_rate = 16384.0;
106 let window_size = 1024;
107 let hop_size = 512;
108
109 let durations = vec![1.0, 5.0, 10.0]; for &duration in &durations {
112 println!("\nSignal Duration: {:.1}s ({} samples)",
113 duration, (sample_rate * duration) as usize);
114
115 let n_samples = (sample_rate * duration) as usize;
117 let signal: Vec<f64> = (0..n_samples)
118 .map(|i| {
119 let t = i as f64 / sample_rate;
120 let freq = 100.0 + 400.0 * t / duration;
121 (2.0 * std::f64::consts::PI * freq * t).sin()
122 })
123 .collect();
124
125 let window: Vec<f64> = (0..window_size)
127 .map(|i| {
128 0.5 * (1.0 - (2.0 * std::f64::consts::PI * i as f64 / window_size as f64).cos())
129 })
130 .collect();
131
132 let config = ParallelConfig {
134 num_threads: 1,
135 min_chunk_size: 512,
136 };
137 let processor = ParallelStft::<f64>::new(window_size, hop_size, config);
138
139 let start = Instant::now();
140 let serial_frames = processor.process_parallel(&signal, &window);
141 let serial_time = start.elapsed();
142
143 let config = ParallelConfig {
145 num_threads: 2,
146 min_chunk_size: 512,
147 };
148 let processor = ParallelStft::<f64>::new(window_size, hop_size, config);
149
150 let start = Instant::now();
151 let parallel2_frames = processor.process_parallel(&signal, &window);
152 let parallel2_time = start.elapsed();
153
154 let config = ParallelConfig {
156 num_threads: 4,
157 min_chunk_size: 512,
158 };
159 let processor = ParallelStft::<f64>::new(window_size, hop_size, config);
160
161 let start = Instant::now();
162 let parallel4_frames = processor.process_parallel(&signal, &window);
163 let parallel4_time = start.elapsed();
164
165 let speedup_2 = serial_time.as_secs_f64() / parallel2_time.as_secs_f64();
166 let speedup_4 = serial_time.as_secs_f64() / parallel4_time.as_secs_f64();
167
168 println!(" Frames: {}", serial_frames.len());
169 println!(" Serial: {:6.2} ms", serial_time.as_secs_f64() * 1000.0);
170 println!(" Parallel2: {:6.2} ms (speedup: {:.2}x)",
171 parallel2_time.as_secs_f64() * 1000.0, speedup_2);
172 println!(" Parallel4: {:6.2} ms (speedup: {:.2}x)",
173 parallel4_time.as_secs_f64() * 1000.0, speedup_4);
174
175 assert_eq!(serial_frames.len(), parallel2_frames.len());
177 assert_eq!(serial_frames.len(), parallel4_frames.len());
178 }
179
180 println!("\nā All parallel STFT results match serial processing");
181}
182
183fn benchmark_streaming() {
184 println!("š Streaming Processing (Constant Memory)");
185 println!("āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā");
186
187 let test_file = "benchmark_signal.txt";
189 let n_samples = 100_000;
190 let sample_rate = 16384.0;
191
192 println!("\nGenerating test file: {} samples ({:.2}s)",
193 n_samples, n_samples as f64 / sample_rate);
194
195 let start = Instant::now();
196 let mut file = std::fs::File::create(test_file).unwrap();
197 use std::io::Write;
198
199 for i in 0..n_samples {
200 let t = i as f64 / sample_rate;
201 let freq = 200.0 + 300.0 * t / (n_samples as f64 / sample_rate);
202 let sample = (2.0 * std::f64::consts::PI * freq * t).sin();
203 writeln!(file, "{}", sample).unwrap();
204 }
205 let gen_time = start.elapsed();
206
207 println!("File generation: {:.2} ms", gen_time.as_secs_f64() * 1000.0);
208
209 let buffer_sizes = vec![4096, 16384, 65536];
211
212 for &buffer_size in &buffer_sizes {
213 let config = StreamConfig {
214 window_size: 1024,
215 hop_size: 512,
216 buffer_size,
217 sample_rate,
218 };
219
220 let mut processor = StreamingStft::<f64>::new(config);
221 let mut frame_count = 0;
222
223 let start = Instant::now();
224 processor.process_file(test_file, |idx, _time, frame| {
225 frame_count = idx + 1;
226 let _mag: f64 = frame.iter().map(|c| c.norm()).sum();
228 }).unwrap();
229 let process_time = start.elapsed();
230
231 let throughput = n_samples as f64 / process_time.as_secs_f64();
232
233 println!("\nBuffer: {} samples", buffer_size);
234 println!(" Frames processed: {}", frame_count);
235 println!(" Time: {:.2} ms", process_time.as_secs_f64() * 1000.0);
236 println!(" Throughput: {:.0} samples/sec ({:.1}x realtime)",
237 throughput, throughput / sample_rate);
238 }
239
240 std::fs::remove_file(test_file).unwrap();
242
243 println!("\nā Streaming processing completed with constant memory");
244}