1use crate::algorithm_selector::{
26 AlgorithmRecommendation, AlgorithmSelector, FftAlgorithm, InputCharacteristics,
27 PerformanceEntry, SelectionConfig,
28};
29use crate::error::{FFTError, FFTResult};
30
31use oxifft::{Complex as OxiComplex, Direction};
34
35use scirs2_core::numeric::Complex64;
36use serde::{Deserialize, Serialize};
37use std::collections::HashMap;
38use std::time::{Duration, Instant};
39
40#[derive(Debug, Clone)]
42pub struct ProfileConfig {
43 pub warmup_iterations: usize,
45 pub benchmark_iterations: usize,
47 pub track_memory: bool,
49 pub sizes: Option<Vec<usize>>,
51 pub algorithms: Option<Vec<FftAlgorithm>>,
53 pub include_inverse: bool,
55 pub timeout_ms: u64,
57}
58
59impl Default for ProfileConfig {
60 fn default() -> Self {
61 Self {
62 warmup_iterations: 5,
63 benchmark_iterations: 20,
64 track_memory: true,
65 sizes: None,
66 algorithms: None,
67 include_inverse: true,
68 timeout_ms: 30000, }
70 }
71}
72
73const DEFAULT_SIZES: &[usize] = &[
75 64, 128, 256, 512, 1024, 2048, 4096, 8192, 100, 360, 480, 1000, 2000, 5000, 97, 101, 127, 251, 509, 1009, 2003, 1200, 3600, 7200, 65536, 131072, 262144, ];
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct Measurement {
85 pub time_ns: u64,
87 pub peak_memory_bytes: usize,
89 pub allocated_bytes: usize,
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct ProfileResult {
96 pub size: usize,
98 pub algorithm: FftAlgorithm,
100 pub forward: bool,
102 pub measurements: Vec<Measurement>,
104 pub avg_time: Duration,
106 pub min_time: Duration,
108 pub max_time: Duration,
110 pub std_dev: Duration,
112 pub median_time: Duration,
114 pub p90_time: Duration,
116 pub p99_time: Duration,
118 pub ops_per_second: f64,
120 pub avg_peak_memory: usize,
122 pub elements_per_second: f64,
124 pub input_characteristics: Option<InputCharacteristics>,
126}
127
128impl ProfileResult {
129 pub fn from_measurements(
131 size: usize,
132 algorithm: FftAlgorithm,
133 forward: bool,
134 measurements: Vec<Measurement>,
135 input_characteristics: Option<InputCharacteristics>,
136 ) -> Self {
137 if measurements.is_empty() {
138 return Self {
139 size,
140 algorithm,
141 forward,
142 measurements: Vec::new(),
143 avg_time: Duration::ZERO,
144 min_time: Duration::ZERO,
145 max_time: Duration::ZERO,
146 std_dev: Duration::ZERO,
147 median_time: Duration::ZERO,
148 p90_time: Duration::ZERO,
149 p99_time: Duration::ZERO,
150 ops_per_second: 0.0,
151 avg_peak_memory: 0,
152 elements_per_second: 0.0,
153 input_characteristics,
154 };
155 }
156
157 let times_ns: Vec<u64> = measurements.iter().map(|m| m.time_ns).collect();
158 let n = times_ns.len();
159
160 let sum: u64 = times_ns.iter().sum();
162 let avg_ns = sum / n as u64;
163
164 let min_ns = times_ns.iter().min().copied().unwrap_or(0);
165 let max_ns = times_ns.iter().max().copied().unwrap_or(0);
166
167 let variance: f64 = times_ns
169 .iter()
170 .map(|&t| {
171 let diff = t as f64 - avg_ns as f64;
172 diff * diff
173 })
174 .sum::<f64>()
175 / n as f64;
176 let std_dev_ns = variance.sqrt() as u64;
177
178 let mut sorted_times = times_ns.clone();
180 sorted_times.sort_unstable();
181
182 let median_ns = if n % 2 == 0 {
183 (sorted_times[n / 2 - 1] + sorted_times[n / 2]) / 2
184 } else {
185 sorted_times[n / 2]
186 };
187
188 let p90_idx = (n as f64 * 0.90).ceil() as usize - 1;
189 let p99_idx = (n as f64 * 0.99).ceil() as usize - 1;
190 let p90_ns = sorted_times.get(p90_idx.min(n - 1)).copied().unwrap_or(0);
191 let p99_ns = sorted_times.get(p99_idx.min(n - 1)).copied().unwrap_or(0);
192
193 let ops_per_second = if avg_ns > 0 {
195 1_000_000_000.0 / avg_ns as f64
196 } else {
197 0.0
198 };
199
200 let elements_per_second = ops_per_second * size as f64;
201
202 let avg_peak_memory = if !measurements.is_empty() {
204 measurements
205 .iter()
206 .map(|m| m.peak_memory_bytes)
207 .sum::<usize>()
208 / measurements.len()
209 } else {
210 0
211 };
212
213 Self {
214 size,
215 algorithm,
216 forward,
217 measurements,
218 avg_time: Duration::from_nanos(avg_ns),
219 min_time: Duration::from_nanos(min_ns),
220 max_time: Duration::from_nanos(max_ns),
221 std_dev: Duration::from_nanos(std_dev_ns),
222 median_time: Duration::from_nanos(median_ns),
223 p90_time: Duration::from_nanos(p90_ns),
224 p99_time: Duration::from_nanos(p99_ns),
225 ops_per_second,
226 avg_peak_memory,
227 elements_per_second,
228 input_characteristics,
229 }
230 }
231
232 pub fn as_table_row(&self) -> String {
234 format!(
235 "{:>8} | {:>16} | {:>12?} | {:>12?} | {:>12?} | {:>12.2} | {:>10}",
236 self.size,
237 self.algorithm.to_string(),
238 self.avg_time,
239 self.min_time,
240 self.max_time,
241 self.ops_per_second,
242 format_bytes(self.avg_peak_memory)
243 )
244 }
245}
246
247#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct AlgorithmComparison {
250 pub size: usize,
252 pub results: HashMap<FftAlgorithm, ProfileResult>,
254 pub best_algorithm: FftAlgorithm,
256 pub speedup: f64,
258 pub recommendation: String,
260}
261
262#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct PerformanceReport {
265 pub results: HashMap<(usize, FftAlgorithm), ProfileResult>,
267 pub best_by_size: HashMap<usize, FftAlgorithm>,
269 pub comparisons: Vec<AlgorithmComparison>,
271 pub total_time: Duration,
273 pub config_summary: String,
275}
276
277impl PerformanceReport {
278 pub fn to_text_report(&self) -> String {
280 let mut report = String::new();
281
282 report.push_str("=== FFT Performance Report ===\n\n");
283 report.push_str(&format!("Total profiling time: {:?}\n", self.total_time));
284 report.push_str(&format!("Configuration: {}\n\n", self.config_summary));
285
286 report.push_str("--- Results by Size ---\n\n");
287 report.push_str(&format!(
288 "{:>8} | {:>16} | {:>12} | {:>12} | {:>12} | {:>12} | {:>10}\n",
289 "Size", "Algorithm", "Avg Time", "Min Time", "Max Time", "Ops/sec", "Memory"
290 ));
291 report.push_str(&"-".repeat(100));
292 report.push('\n');
293
294 let mut sizes: Vec<usize> = self.best_by_size.keys().copied().collect();
295 sizes.sort_unstable();
296
297 for size in sizes {
298 if let Some(algo) = self.best_by_size.get(&size) {
299 if let Some(result) = self.results.get(&(size, *algo)) {
300 report.push_str(&result.as_table_row());
301 report.push('\n');
302 }
303 }
304 }
305
306 report.push_str("\n--- Recommendations ---\n\n");
307 for comparison in &self.comparisons {
308 report.push_str(&format!(
309 "Size {:>8}: {} (speedup: {:.2}x)\n",
310 comparison.size, comparison.recommendation, comparison.speedup
311 ));
312 }
313
314 report
315 }
316}
317
318pub struct PerformanceProfiler {
320 config: ProfileConfig,
322 selector: AlgorithmSelector,
324}
325
326impl Default for PerformanceProfiler {
327 fn default() -> Self {
328 Self::new()
329 }
330}
331
332impl PerformanceProfiler {
333 pub fn new() -> Self {
335 Self::with_config(ProfileConfig::default())
336 }
337
338 pub fn with_config(config: ProfileConfig) -> Self {
340 Self {
341 config,
342 selector: AlgorithmSelector::new(),
343 }
344 }
345
346 pub fn profile_size(&self, size: usize, forward: bool) -> FFTResult<ProfileResult> {
348 let recommendation = self.selector.select_algorithm(size, forward)?;
349 self.profile_size_with_algorithm(size, recommendation.algorithm, forward)
350 }
351
352 pub fn profile_size_with_algorithm(
354 &self,
355 size: usize,
356 algorithm: FftAlgorithm,
357 forward: bool,
358 ) -> FFTResult<ProfileResult> {
359 if size == 0 {
361 return Err(FFTError::ValueError("Size must be positive".to_string()));
362 }
363
364 let data: Vec<OxiComplex<f64>> = (0..size)
366 .map(|i| OxiComplex::new(i as f64, (i * 2) as f64))
367 .collect();
368
369 let direction = if forward {
371 Direction::Forward
372 } else {
373 Direction::Backward
374 };
375 let plan = oxifft::Plan::dft_1d(size, direction, oxifft::Flags::ESTIMATE)
376 .ok_or_else(|| FFTError::ComputationError("Failed to create FFT plan".to_string()))?;
377
378 for _ in 0..self.config.warmup_iterations {
380 let work_data = data.clone();
381 let mut out = vec![OxiComplex::new(0.0, 0.0); size];
382 plan.execute(&work_data, &mut out);
383 }
384
385 let mut measurements = Vec::with_capacity(self.config.benchmark_iterations);
387
388 for _ in 0..self.config.benchmark_iterations {
389 let mut work_data = data.clone();
390
391 let memory_before = if self.config.track_memory {
393 estimate_current_allocation()
394 } else {
395 0
396 };
397
398 let start = Instant::now();
400 let mut out = vec![OxiComplex::new(0.0, 0.0); size];
401 plan.execute(&work_data, &mut out);
402 let elapsed = start.elapsed();
403
404 let memory_after = if self.config.track_memory {
406 estimate_current_allocation()
407 } else {
408 0
409 };
410
411 measurements.push(Measurement {
412 time_ns: elapsed.as_nanos() as u64,
413 peak_memory_bytes: size * 16, allocated_bytes: memory_after.saturating_sub(memory_before),
415 });
416 }
417
418 let chars = InputCharacteristics::analyze(size, &self.selector.hardware().cache_info);
420
421 Ok(ProfileResult::from_measurements(
422 size,
423 algorithm,
424 forward,
425 measurements,
426 Some(chars),
427 ))
428 }
429
430 pub fn profile_sizes(&self, sizes: &[usize], forward: bool) -> FFTResult<Vec<ProfileResult>> {
432 let mut results = Vec::with_capacity(sizes.len());
433
434 for &size in sizes {
435 match self.profile_size(size, forward) {
436 Ok(result) => results.push(result),
437 Err(e) => {
438 eprintln!("Failed to profile size {}: {}", size, e);
440 }
441 }
442 }
443
444 Ok(results)
445 }
446
447 pub fn compare_algorithms(
449 &self,
450 size: usize,
451 algorithms: &[FftAlgorithm],
452 forward: bool,
453 ) -> FFTResult<AlgorithmComparison> {
454 let mut results = HashMap::new();
455
456 for &algorithm in algorithms {
457 match self.profile_size_with_algorithm(size, algorithm, forward) {
458 Ok(result) => {
459 results.insert(algorithm, result);
460 }
461 Err(e) => {
462 eprintln!("Failed to profile {} for size {}: {}", algorithm, size, e);
463 }
464 }
465 }
466
467 if results.is_empty() {
468 return Err(FFTError::ComputationError(
469 "No algorithms could be profiled".to_string(),
470 ));
471 }
472
473 let best_algorithm = results
475 .iter()
476 .min_by_key(|(_, r)| r.avg_time)
477 .map(|(&algo, _)| algo)
478 .unwrap_or(FftAlgorithm::MixedRadix);
479
480 let best_time = results
481 .get(&best_algorithm)
482 .map(|r| r.avg_time)
483 .unwrap_or(Duration::ZERO);
484
485 let worst_time = results
486 .values()
487 .map(|r| r.avg_time)
488 .max()
489 .unwrap_or(Duration::ZERO);
490
491 let speedup = if best_time.as_nanos() > 0 {
492 worst_time.as_nanos() as f64 / best_time.as_nanos() as f64
493 } else {
494 1.0
495 };
496
497 let recommendation = format!("{} is fastest ({:?})", best_algorithm, best_time);
498
499 Ok(AlgorithmComparison {
500 size,
501 results,
502 best_algorithm,
503 speedup,
504 recommendation,
505 })
506 }
507
508 pub fn run_comprehensive_profile(&self) -> FFTResult<PerformanceReport> {
510 let start = Instant::now();
511
512 let sizes = self.config.sizes.as_deref().unwrap_or(DEFAULT_SIZES);
513
514 let algorithms = self.config.algorithms.as_deref().unwrap_or(&[
515 FftAlgorithm::CooleyTukeyRadix2,
516 FftAlgorithm::SplitRadix,
517 FftAlgorithm::MixedRadix,
518 FftAlgorithm::Bluestein,
519 ]);
520
521 let mut results = HashMap::new();
522 let mut best_by_size = HashMap::new();
523 let mut comparisons = Vec::new();
524
525 for &size in sizes {
526 let comparison = self.compare_algorithms(size, algorithms, true)?;
528
529 for (algo, result) in &comparison.results {
530 results.insert((size, *algo), result.clone());
531 }
532
533 best_by_size.insert(size, comparison.best_algorithm);
534 comparisons.push(comparison);
535
536 if self.config.include_inverse {
538 if let Ok(inv_comparison) = self.compare_algorithms(size, algorithms, false) {
539 let _ = inv_comparison;
542 }
543 }
544 }
545
546 let total_time = start.elapsed();
547
548 let config_summary = format!(
549 "warmup={}, iterations={}, sizes={}, algorithms={}",
550 self.config.warmup_iterations,
551 self.config.benchmark_iterations,
552 sizes.len(),
553 algorithms.len()
554 );
555
556 Ok(PerformanceReport {
557 results,
558 best_by_size,
559 comparisons,
560 total_time,
561 config_summary,
562 })
563 }
564
565 pub fn auto_tune(&mut self, sizes: &[usize]) -> FFTResult<HashMap<usize, FftAlgorithm>> {
567 let algorithms = [
568 FftAlgorithm::CooleyTukeyRadix2,
569 FftAlgorithm::SplitRadix,
570 FftAlgorithm::MixedRadix,
571 FftAlgorithm::Bluestein,
572 ];
573
574 let mut best_algorithms = HashMap::new();
575
576 for &size in sizes {
577 let comparison = self.compare_algorithms(size, &algorithms, true)?;
578 best_algorithms.insert(size, comparison.best_algorithm);
579
580 if let Some(result) = comparison.results.get(&comparison.best_algorithm) {
582 let entry = PerformanceEntry {
583 size,
584 algorithm: comparison.best_algorithm,
585 forward: true,
586 execution_time_ns: result.avg_time.as_nanos() as u64,
587 peak_memory_bytes: result.avg_peak_memory,
588 timestamp: std::time::SystemTime::now()
589 .duration_since(std::time::UNIX_EPOCH)
590 .map(|d| d.as_secs())
591 .unwrap_or(0),
592 hardware_hash: 0,
593 };
594 let _ = self.selector.record_performance(entry);
595 }
596 }
597
598 Ok(best_algorithms)
599 }
600
601 pub fn selector(&self) -> &AlgorithmSelector {
603 &self.selector
604 }
605
606 pub fn selector_mut(&mut self) -> &mut AlgorithmSelector {
608 &mut self.selector
609 }
610}
611
612pub struct MemoryProfiler {
614 enabled: bool,
616 peak_memory: usize,
618 current_allocation: usize,
620}
621
622impl Default for MemoryProfiler {
623 fn default() -> Self {
624 Self::new()
625 }
626}
627
628impl MemoryProfiler {
629 pub fn new() -> Self {
631 Self {
632 enabled: true,
633 peak_memory: 0,
634 current_allocation: 0,
635 }
636 }
637
638 pub fn record_allocation(&mut self, bytes: usize) {
640 if self.enabled {
641 self.current_allocation += bytes;
642 if self.current_allocation > self.peak_memory {
643 self.peak_memory = self.current_allocation;
644 }
645 }
646 }
647
648 pub fn record_deallocation(&mut self, bytes: usize) {
650 if self.enabled {
651 self.current_allocation = self.current_allocation.saturating_sub(bytes);
652 }
653 }
654
655 pub fn reset(&mut self) {
657 self.peak_memory = 0;
658 self.current_allocation = 0;
659 }
660
661 pub fn peak_memory(&self) -> usize {
663 self.peak_memory
664 }
665
666 pub fn current_allocation(&self) -> usize {
668 self.current_allocation
669 }
670
671 pub fn set_enabled(&mut self, enabled: bool) {
673 self.enabled = enabled;
674 }
675}
676
677pub fn estimate_fft_memory(size: usize, algorithm: FftAlgorithm) -> usize {
679 let base_memory = size * 16; let overhead = match algorithm {
683 FftAlgorithm::CooleyTukeyRadix2 => base_memory / 4, FftAlgorithm::Radix4 => base_memory / 4,
685 FftAlgorithm::SplitRadix => base_memory / 3,
686 FftAlgorithm::MixedRadix => base_memory / 2,
687 FftAlgorithm::Bluestein => base_memory * 2, FftAlgorithm::Rader => base_memory,
689 FftAlgorithm::Winograd => base_memory / 2,
690 FftAlgorithm::GoodThomas => base_memory / 3,
691 FftAlgorithm::Streaming => base_memory / 8, FftAlgorithm::CacheOblivious => base_memory / 4,
693 FftAlgorithm::InPlace => 0, FftAlgorithm::SimdOptimized => base_memory / 4,
695 FftAlgorithm::Parallel => base_memory, FftAlgorithm::Hybrid => base_memory / 2,
697 };
698
699 base_memory + overhead
700}
701
702fn estimate_current_allocation() -> usize {
704 0
707}
708
709fn format_bytes(bytes: usize) -> String {
711 if bytes >= 1024 * 1024 * 1024 {
712 format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
713 } else if bytes >= 1024 * 1024 {
714 format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
715 } else if bytes >= 1024 {
716 format!("{:.2} KB", bytes as f64 / 1024.0)
717 } else {
718 format!("{} B", bytes)
719 }
720}
721
722#[cfg(test)]
723mod tests {
724 use super::*;
725
726 #[test]
727 fn test_profile_result_statistics() {
728 let measurements = vec![
729 Measurement {
730 time_ns: 1000,
731 peak_memory_bytes: 1000,
732 allocated_bytes: 100,
733 },
734 Measurement {
735 time_ns: 1200,
736 peak_memory_bytes: 1000,
737 allocated_bytes: 100,
738 },
739 Measurement {
740 time_ns: 1100,
741 peak_memory_bytes: 1000,
742 allocated_bytes: 100,
743 },
744 Measurement {
745 time_ns: 1050,
746 peak_memory_bytes: 1000,
747 allocated_bytes: 100,
748 },
749 ];
750
751 let result = ProfileResult::from_measurements(
752 256,
753 FftAlgorithm::MixedRadix,
754 true,
755 measurements,
756 None,
757 );
758
759 assert_eq!(result.size, 256);
760 assert!(result.avg_time.as_nanos() > 0);
761 assert!(result.min_time <= result.avg_time);
762 assert!(result.max_time >= result.avg_time);
763 assert!(result.ops_per_second > 0.0);
764 }
765
766 #[test]
767 fn test_profile_single_size() {
768 let profiler = PerformanceProfiler::new();
769 let result = profiler.profile_size(256, true);
770
771 assert!(result.is_ok());
772 let profile = result.expect("Profiling failed");
773 assert_eq!(profile.size, 256);
774 assert!(profile.avg_time.as_nanos() > 0);
775 }
776
777 #[test]
778 fn test_compare_algorithms() {
779 let profiler = PerformanceProfiler::new();
780 let algorithms = [FftAlgorithm::MixedRadix, FftAlgorithm::Bluestein];
781
782 let result = profiler.compare_algorithms(256, &algorithms, true);
783
784 assert!(result.is_ok());
785 let comparison = result.expect("Comparison failed");
786 assert_eq!(comparison.size, 256);
787 assert!(!comparison.results.is_empty());
788 assert!(comparison.speedup >= 1.0);
789 }
790
791 #[test]
792 fn test_memory_profiler() {
793 let mut mp = MemoryProfiler::new();
794
795 mp.record_allocation(1000);
796 assert_eq!(mp.current_allocation(), 1000);
797 assert_eq!(mp.peak_memory(), 1000);
798
799 mp.record_allocation(500);
800 assert_eq!(mp.current_allocation(), 1500);
801 assert_eq!(mp.peak_memory(), 1500);
802
803 mp.record_deallocation(1000);
804 assert_eq!(mp.current_allocation(), 500);
805 assert_eq!(mp.peak_memory(), 1500);
806
807 mp.reset();
808 assert_eq!(mp.current_allocation(), 0);
809 assert_eq!(mp.peak_memory(), 0);
810 }
811
812 #[test]
813 fn test_estimate_fft_memory() {
814 let size = 1024;
815
816 let inplace_mem = estimate_fft_memory(size, FftAlgorithm::InPlace);
817 let bluestein_mem = estimate_fft_memory(size, FftAlgorithm::Bluestein);
818
819 assert!(bluestein_mem > inplace_mem);
821 }
822
823 #[test]
824 fn test_format_bytes() {
825 assert_eq!(format_bytes(512), "512 B");
826 assert_eq!(format_bytes(1024), "1.00 KB");
827 assert_eq!(format_bytes(1024 * 1024), "1.00 MB");
828 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.00 GB");
829 }
830
831 #[test]
832 fn test_profile_config_default() {
833 let config = ProfileConfig::default();
834 assert!(config.warmup_iterations > 0);
835 assert!(config.benchmark_iterations > 0);
836 assert!(config.track_memory);
837 }
838}