scirs2_fft/
performance_profiler.rs

1//! FFT Performance Profiling Module
2//!
3//! This module provides comprehensive performance profiling for FFT operations,
4//! including execution time measurement, memory usage tracking, and performance
5//! analysis across different FFT sizes and algorithms.
6//!
7//! # Features
8//!
9//! - **Execution Time Profiling**: High-resolution timing for FFT operations
10//! - **Memory Usage Tracking**: Monitor peak memory usage and allocation patterns
11//! - **Algorithm Comparison**: Compare performance across different FFT algorithms
12//! - **Statistical Analysis**: Mean, std dev, percentiles for benchmark results
13//! - **Performance Reporting**: Generate human-readable performance reports
14//!
15//! # Example
16//!
17//! ```rust,no_run
18//! use scirs2_fft::performance_profiler::{PerformanceProfiler, ProfileConfig};
19//!
20//! let profiler = PerformanceProfiler::new();
21//! let report = profiler.profile_size(1024, true).expect("Profiling failed");
22//! println!("Average time: {:?}", report.avg_time);
23//! ```
24
25use crate::algorithm_selector::{
26    AlgorithmRecommendation, AlgorithmSelector, FftAlgorithm, InputCharacteristics,
27    PerformanceEntry, SelectionConfig,
28};
29use crate::error::{FFTError, FFTResult};
30
31// Backend-specific imports
32#[cfg(feature = "rustfft-backend")]
33use rustfft::FftPlanner;
34
35#[cfg(not(feature = "rustfft-backend"))]
36use oxifft::{Complex as OxiComplex, Direction};
37
38use scirs2_core::numeric::Complex64;
39use serde::{Deserialize, Serialize};
40use std::collections::HashMap;
41use std::time::{Duration, Instant};
42
43/// Configuration for performance profiling
44#[derive(Debug, Clone)]
45pub struct ProfileConfig {
46    /// Number of warm-up iterations (not timed)
47    pub warmup_iterations: usize,
48    /// Number of benchmark iterations
49    pub benchmark_iterations: usize,
50    /// Whether to track memory usage
51    pub track_memory: bool,
52    /// Sizes to benchmark (if None, use default sizes)
53    pub sizes: Option<Vec<usize>>,
54    /// Algorithms to compare (if None, use all)
55    pub algorithms: Option<Vec<FftAlgorithm>>,
56    /// Whether to include inverse FFT in profiling
57    pub include_inverse: bool,
58    /// Timeout per benchmark in milliseconds
59    pub timeout_ms: u64,
60}
61
62impl Default for ProfileConfig {
63    fn default() -> Self {
64        Self {
65            warmup_iterations: 5,
66            benchmark_iterations: 20,
67            track_memory: true,
68            sizes: None,
69            algorithms: None,
70            include_inverse: true,
71            timeout_ms: 30000, // 30 seconds
72        }
73    }
74}
75
76/// Default benchmark sizes covering common use cases
77const DEFAULT_SIZES: &[usize] = &[
78    64, 128, 256, 512, 1024, 2048, 4096, 8192, // Powers of 2
79    100, 360, 480, 1000, 2000, 5000, // Smooth numbers
80    97, 101, 127, 251, 509, 1009, 2003, // Primes
81    1200, 3600, 7200, // Common signal sizes
82    65536, 131072, 262144, // Large powers of 2
83];
84
85/// Single benchmark measurement
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct Measurement {
88    /// Execution time in nanoseconds
89    pub time_ns: u64,
90    /// Peak memory usage in bytes
91    pub peak_memory_bytes: usize,
92    /// Allocated memory in bytes
93    pub allocated_bytes: usize,
94}
95
96/// Profiling result for a single (size, algorithm) combination
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct ProfileResult {
99    /// FFT size
100    pub size: usize,
101    /// Algorithm used
102    pub algorithm: FftAlgorithm,
103    /// Whether this was a forward transform
104    pub forward: bool,
105    /// Individual measurements
106    pub measurements: Vec<Measurement>,
107    /// Average execution time
108    pub avg_time: Duration,
109    /// Minimum execution time
110    pub min_time: Duration,
111    /// Maximum execution time
112    pub max_time: Duration,
113    /// Standard deviation
114    pub std_dev: Duration,
115    /// Median execution time
116    pub median_time: Duration,
117    /// 90th percentile execution time
118    pub p90_time: Duration,
119    /// 99th percentile execution time
120    pub p99_time: Duration,
121    /// Operations per second (throughput)
122    pub ops_per_second: f64,
123    /// Average peak memory usage
124    pub avg_peak_memory: usize,
125    /// Throughput in elements per second
126    pub elements_per_second: f64,
127    /// Input characteristics
128    pub input_characteristics: Option<InputCharacteristics>,
129}
130
131impl ProfileResult {
132    /// Create a profile result from measurements
133    pub fn from_measurements(
134        size: usize,
135        algorithm: FftAlgorithm,
136        forward: bool,
137        measurements: Vec<Measurement>,
138        input_characteristics: Option<InputCharacteristics>,
139    ) -> Self {
140        if measurements.is_empty() {
141            return Self {
142                size,
143                algorithm,
144                forward,
145                measurements: Vec::new(),
146                avg_time: Duration::ZERO,
147                min_time: Duration::ZERO,
148                max_time: Duration::ZERO,
149                std_dev: Duration::ZERO,
150                median_time: Duration::ZERO,
151                p90_time: Duration::ZERO,
152                p99_time: Duration::ZERO,
153                ops_per_second: 0.0,
154                avg_peak_memory: 0,
155                elements_per_second: 0.0,
156                input_characteristics,
157            };
158        }
159
160        let times_ns: Vec<u64> = measurements.iter().map(|m| m.time_ns).collect();
161        let n = times_ns.len();
162
163        // Calculate statistics
164        let sum: u64 = times_ns.iter().sum();
165        let avg_ns = sum / n as u64;
166
167        let min_ns = times_ns.iter().min().copied().unwrap_or(0);
168        let max_ns = times_ns.iter().max().copied().unwrap_or(0);
169
170        // Standard deviation
171        let variance: f64 = times_ns
172            .iter()
173            .map(|&t| {
174                let diff = t as f64 - avg_ns as f64;
175                diff * diff
176            })
177            .sum::<f64>()
178            / n as f64;
179        let std_dev_ns = variance.sqrt() as u64;
180
181        // Percentiles
182        let mut sorted_times = times_ns.clone();
183        sorted_times.sort_unstable();
184
185        let median_ns = if n % 2 == 0 {
186            (sorted_times[n / 2 - 1] + sorted_times[n / 2]) / 2
187        } else {
188            sorted_times[n / 2]
189        };
190
191        let p90_idx = (n as f64 * 0.90).ceil() as usize - 1;
192        let p99_idx = (n as f64 * 0.99).ceil() as usize - 1;
193        let p90_ns = sorted_times.get(p90_idx.min(n - 1)).copied().unwrap_or(0);
194        let p99_ns = sorted_times.get(p99_idx.min(n - 1)).copied().unwrap_or(0);
195
196        // Throughput
197        let ops_per_second = if avg_ns > 0 {
198            1_000_000_000.0 / avg_ns as f64
199        } else {
200            0.0
201        };
202
203        let elements_per_second = ops_per_second * size as f64;
204
205        // Average memory
206        let avg_peak_memory = if !measurements.is_empty() {
207            measurements
208                .iter()
209                .map(|m| m.peak_memory_bytes)
210                .sum::<usize>()
211                / measurements.len()
212        } else {
213            0
214        };
215
216        Self {
217            size,
218            algorithm,
219            forward,
220            measurements,
221            avg_time: Duration::from_nanos(avg_ns),
222            min_time: Duration::from_nanos(min_ns),
223            max_time: Duration::from_nanos(max_ns),
224            std_dev: Duration::from_nanos(std_dev_ns),
225            median_time: Duration::from_nanos(median_ns),
226            p90_time: Duration::from_nanos(p90_ns),
227            p99_time: Duration::from_nanos(p99_ns),
228            ops_per_second,
229            avg_peak_memory,
230            elements_per_second,
231            input_characteristics,
232        }
233    }
234
235    /// Format as a table row
236    pub fn as_table_row(&self) -> String {
237        format!(
238            "{:>8} | {:>16} | {:>12?} | {:>12?} | {:>12?} | {:>12.2} | {:>10}",
239            self.size,
240            self.algorithm.to_string(),
241            self.avg_time,
242            self.min_time,
243            self.max_time,
244            self.ops_per_second,
245            format_bytes(self.avg_peak_memory)
246        )
247    }
248}
249
250/// Comparison result between algorithms
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct AlgorithmComparison {
253    /// FFT size
254    pub size: usize,
255    /// Results for each algorithm
256    pub results: HashMap<FftAlgorithm, ProfileResult>,
257    /// Best algorithm for this size
258    pub best_algorithm: FftAlgorithm,
259    /// Speedup of best vs worst
260    pub speedup: f64,
261    /// Recommendation
262    pub recommendation: String,
263}
264
265/// Performance report for a range of sizes
266#[derive(Debug, Clone, Serialize, Deserialize)]
267pub struct PerformanceReport {
268    /// Results by (size, algorithm)
269    pub results: HashMap<(usize, FftAlgorithm), ProfileResult>,
270    /// Best algorithm for each size
271    pub best_by_size: HashMap<usize, FftAlgorithm>,
272    /// Algorithm comparisons
273    pub comparisons: Vec<AlgorithmComparison>,
274    /// Total profiling time
275    pub total_time: Duration,
276    /// Configuration used
277    pub config_summary: String,
278}
279
280impl PerformanceReport {
281    /// Generate a text report
282    pub fn to_text_report(&self) -> String {
283        let mut report = String::new();
284
285        report.push_str("=== FFT Performance Report ===\n\n");
286        report.push_str(&format!("Total profiling time: {:?}\n", self.total_time));
287        report.push_str(&format!("Configuration: {}\n\n", self.config_summary));
288
289        report.push_str("--- Results by Size ---\n\n");
290        report.push_str(&format!(
291            "{:>8} | {:>16} | {:>12} | {:>12} | {:>12} | {:>12} | {:>10}\n",
292            "Size", "Algorithm", "Avg Time", "Min Time", "Max Time", "Ops/sec", "Memory"
293        ));
294        report.push_str(&"-".repeat(100));
295        report.push('\n');
296
297        let mut sizes: Vec<usize> = self.best_by_size.keys().copied().collect();
298        sizes.sort_unstable();
299
300        for size in sizes {
301            if let Some(algo) = self.best_by_size.get(&size) {
302                if let Some(result) = self.results.get(&(size, *algo)) {
303                    report.push_str(&result.as_table_row());
304                    report.push('\n');
305                }
306            }
307        }
308
309        report.push_str("\n--- Recommendations ---\n\n");
310        for comparison in &self.comparisons {
311            report.push_str(&format!(
312                "Size {:>8}: {} (speedup: {:.2}x)\n",
313                comparison.size, comparison.recommendation, comparison.speedup
314            ));
315        }
316
317        report
318    }
319}
320
321/// Performance profiler for FFT operations
322pub struct PerformanceProfiler {
323    /// Configuration
324    config: ProfileConfig,
325    /// Algorithm selector for recommendations
326    selector: AlgorithmSelector,
327}
328
329impl Default for PerformanceProfiler {
330    fn default() -> Self {
331        Self::new()
332    }
333}
334
335impl PerformanceProfiler {
336    /// Create a new performance profiler with default configuration
337    pub fn new() -> Self {
338        Self::with_config(ProfileConfig::default())
339    }
340
341    /// Create a new performance profiler with custom configuration
342    pub fn with_config(config: ProfileConfig) -> Self {
343        Self {
344            config,
345            selector: AlgorithmSelector::new(),
346        }
347    }
348
349    /// Profile a single size with the recommended algorithm
350    pub fn profile_size(&self, size: usize, forward: bool) -> FFTResult<ProfileResult> {
351        let recommendation = self.selector.select_algorithm(size, forward)?;
352        self.profile_size_with_algorithm(size, recommendation.algorithm, forward)
353    }
354
355    /// Profile a single size with a specific algorithm (RustFFT backend)
356    #[cfg(feature = "rustfft-backend")]
357    pub fn profile_size_with_algorithm(
358        &self,
359        size: usize,
360        algorithm: FftAlgorithm,
361        forward: bool,
362    ) -> FFTResult<ProfileResult> {
363        // Validate size
364        if size == 0 {
365            return Err(FFTError::ValueError("Size must be positive".to_string()));
366        }
367
368        // Create test data
369        let mut data: Vec<Complex64> = (0..size)
370            .map(|i| Complex64::new(i as f64, (i * 2) as f64))
371            .collect();
372
373        // Create FFT planner
374        let mut planner = FftPlanner::new();
375        let fft = if forward {
376            planner.plan_fft_forward(size)
377        } else {
378            planner.plan_fft_inverse(size)
379        };
380
381        // Warm-up iterations
382        for _ in 0..self.config.warmup_iterations {
383            let mut work_data = data.clone();
384            fft.process(&mut work_data);
385        }
386
387        // Benchmark iterations
388        let mut measurements = Vec::with_capacity(self.config.benchmark_iterations);
389
390        for _ in 0..self.config.benchmark_iterations {
391            let mut work_data = data.clone();
392
393            // Track memory before
394            let memory_before = if self.config.track_memory {
395                estimate_current_allocation()
396            } else {
397                0
398            };
399
400            // Time the FFT
401            let start = Instant::now();
402            fft.process(&mut work_data);
403            let elapsed = start.elapsed();
404
405            // Track memory after
406            let memory_after = if self.config.track_memory {
407                estimate_current_allocation()
408            } else {
409                0
410            };
411
412            measurements.push(Measurement {
413                time_ns: elapsed.as_nanos() as u64,
414                peak_memory_bytes: size * 16, // Complex64 = 16 bytes
415                allocated_bytes: memory_after.saturating_sub(memory_before),
416            });
417        }
418
419        // Get input characteristics
420        let chars = InputCharacteristics::analyze(size, &self.selector.hardware().cache_info);
421
422        Ok(ProfileResult::from_measurements(
423            size,
424            algorithm,
425            forward,
426            measurements,
427            Some(chars),
428        ))
429    }
430
431    /// Profile a single size with a specific algorithm (OxiFFT backend)
432    #[cfg(not(feature = "rustfft-backend"))]
433    pub fn profile_size_with_algorithm(
434        &self,
435        size: usize,
436        algorithm: FftAlgorithm,
437        forward: bool,
438    ) -> FFTResult<ProfileResult> {
439        // Validate size
440        if size == 0 {
441            return Err(FFTError::ValueError("Size must be positive".to_string()));
442        }
443
444        // Create test data (convert to OxiFFT format)
445        let data: Vec<OxiComplex<f64>> = (0..size)
446            .map(|i| OxiComplex::new(i as f64, (i * 2) as f64))
447            .collect();
448
449        // Create FFT plan using OxiFFT
450        let mut plan = if forward {
451            oxifft::Plan::new(Direction::Forward, size, oxifft::Flags::Estimate).map_err(|e| {
452                FFTError::ComputationError(format!("Failed to create FFT plan: {:?}", e))
453            })?
454        } else {
455            oxifft::Plan::new(Direction::Backward, size, oxifft::Flags::Estimate).map_err(|e| {
456                FFTError::ComputationError(format!("Failed to create FFT plan: {:?}", e))
457            })?
458        };
459
460        // Warm-up iterations
461        for _ in 0..self.config.warmup_iterations {
462            let mut work_data = data.clone();
463            plan.execute_c2c(&mut work_data).map_err(|e| {
464                FFTError::ComputationError(format!("FFT execution failed: {:?}", e))
465            })?;
466        }
467
468        // Benchmark iterations
469        let mut measurements = Vec::with_capacity(self.config.benchmark_iterations);
470
471        for _ in 0..self.config.benchmark_iterations {
472            let mut work_data = data.clone();
473
474            // Track memory before
475            let memory_before = if self.config.track_memory {
476                estimate_current_allocation()
477            } else {
478                0
479            };
480
481            // Time the FFT
482            let start = Instant::now();
483            plan.execute_c2c(&mut work_data).map_err(|e| {
484                FFTError::ComputationError(format!("FFT execution failed: {:?}", e))
485            })?;
486            let elapsed = start.elapsed();
487
488            // Track memory after
489            let memory_after = if self.config.track_memory {
490                estimate_current_allocation()
491            } else {
492                0
493            };
494
495            measurements.push(Measurement {
496                time_ns: elapsed.as_nanos() as u64,
497                peak_memory_bytes: size * 16, // Complex64 = 16 bytes
498                allocated_bytes: memory_after.saturating_sub(memory_before),
499            });
500        }
501
502        // Get input characteristics
503        let chars = InputCharacteristics::analyze(size, &self.selector.hardware().cache_info);
504
505        Ok(ProfileResult::from_measurements(
506            size,
507            algorithm,
508            forward,
509            measurements,
510            Some(chars),
511        ))
512    }
513
514    /// Profile multiple sizes
515    pub fn profile_sizes(&self, sizes: &[usize], forward: bool) -> FFTResult<Vec<ProfileResult>> {
516        let mut results = Vec::with_capacity(sizes.len());
517
518        for &size in sizes {
519            match self.profile_size(size, forward) {
520                Ok(result) => results.push(result),
521                Err(e) => {
522                    // Log error but continue with other sizes
523                    eprintln!("Failed to profile size {}: {}", size, e);
524                }
525            }
526        }
527
528        Ok(results)
529    }
530
531    /// Compare algorithms for a given size
532    pub fn compare_algorithms(
533        &self,
534        size: usize,
535        algorithms: &[FftAlgorithm],
536        forward: bool,
537    ) -> FFTResult<AlgorithmComparison> {
538        let mut results = HashMap::new();
539
540        for &algorithm in algorithms {
541            match self.profile_size_with_algorithm(size, algorithm, forward) {
542                Ok(result) => {
543                    results.insert(algorithm, result);
544                }
545                Err(e) => {
546                    eprintln!("Failed to profile {} for size {}: {}", algorithm, size, e);
547                }
548            }
549        }
550
551        if results.is_empty() {
552            return Err(FFTError::ComputationError(
553                "No algorithms could be profiled".to_string(),
554            ));
555        }
556
557        // Find best and worst
558        let best_algorithm = results
559            .iter()
560            .min_by_key(|(_, r)| r.avg_time)
561            .map(|(&algo, _)| algo)
562            .unwrap_or(FftAlgorithm::MixedRadix);
563
564        let best_time = results
565            .get(&best_algorithm)
566            .map(|r| r.avg_time)
567            .unwrap_or(Duration::ZERO);
568
569        let worst_time = results
570            .values()
571            .map(|r| r.avg_time)
572            .max()
573            .unwrap_or(Duration::ZERO);
574
575        let speedup = if best_time.as_nanos() > 0 {
576            worst_time.as_nanos() as f64 / best_time.as_nanos() as f64
577        } else {
578            1.0
579        };
580
581        let recommendation = format!("{} is fastest ({:?})", best_algorithm, best_time);
582
583        Ok(AlgorithmComparison {
584            size,
585            results,
586            best_algorithm,
587            speedup,
588            recommendation,
589        })
590    }
591
592    /// Run comprehensive profiling
593    pub fn run_comprehensive_profile(&self) -> FFTResult<PerformanceReport> {
594        let start = Instant::now();
595
596        let sizes = self.config.sizes.as_deref().unwrap_or(DEFAULT_SIZES);
597
598        let algorithms = self.config.algorithms.as_deref().unwrap_or(&[
599            FftAlgorithm::CooleyTukeyRadix2,
600            FftAlgorithm::SplitRadix,
601            FftAlgorithm::MixedRadix,
602            FftAlgorithm::Bluestein,
603        ]);
604
605        let mut results = HashMap::new();
606        let mut best_by_size = HashMap::new();
607        let mut comparisons = Vec::new();
608
609        for &size in sizes {
610            // Profile with each algorithm
611            let comparison = self.compare_algorithms(size, algorithms, true)?;
612
613            for (algo, result) in &comparison.results {
614                results.insert((size, *algo), result.clone());
615            }
616
617            best_by_size.insert(size, comparison.best_algorithm);
618            comparisons.push(comparison);
619
620            // Also profile inverse if configured
621            if self.config.include_inverse {
622                if let Ok(inv_comparison) = self.compare_algorithms(size, algorithms, false) {
623                    // Store inverse results with a different key or in a separate structure
624                    // For now, we focus on forward transforms
625                    let _ = inv_comparison;
626                }
627            }
628        }
629
630        let total_time = start.elapsed();
631
632        let config_summary = format!(
633            "warmup={}, iterations={}, sizes={}, algorithms={}",
634            self.config.warmup_iterations,
635            self.config.benchmark_iterations,
636            sizes.len(),
637            algorithms.len()
638        );
639
640        Ok(PerformanceReport {
641            results,
642            best_by_size,
643            comparisons,
644            total_time,
645            config_summary,
646        })
647    }
648
649    /// Auto-tune for specific sizes and record results
650    pub fn auto_tune(&mut self, sizes: &[usize]) -> FFTResult<HashMap<usize, FftAlgorithm>> {
651        let algorithms = [
652            FftAlgorithm::CooleyTukeyRadix2,
653            FftAlgorithm::SplitRadix,
654            FftAlgorithm::MixedRadix,
655            FftAlgorithm::Bluestein,
656        ];
657
658        let mut best_algorithms = HashMap::new();
659
660        for &size in sizes {
661            let comparison = self.compare_algorithms(size, &algorithms, true)?;
662            best_algorithms.insert(size, comparison.best_algorithm);
663
664            // Record to the selector's history
665            if let Some(result) = comparison.results.get(&comparison.best_algorithm) {
666                let entry = PerformanceEntry {
667                    size,
668                    algorithm: comparison.best_algorithm,
669                    forward: true,
670                    execution_time_ns: result.avg_time.as_nanos() as u64,
671                    peak_memory_bytes: result.avg_peak_memory,
672                    timestamp: std::time::SystemTime::now()
673                        .duration_since(std::time::UNIX_EPOCH)
674                        .map(|d| d.as_secs())
675                        .unwrap_or(0),
676                    hardware_hash: 0,
677                };
678                let _ = self.selector.record_performance(entry);
679            }
680        }
681
682        Ok(best_algorithms)
683    }
684
685    /// Get selector reference
686    pub fn selector(&self) -> &AlgorithmSelector {
687        &self.selector
688    }
689
690    /// Get selector mutable reference
691    pub fn selector_mut(&mut self) -> &mut AlgorithmSelector {
692        &mut self.selector
693    }
694}
695
696/// Memory profiler for tracking FFT memory usage
697pub struct MemoryProfiler {
698    /// Tracking enabled
699    enabled: bool,
700    /// Peak memory recorded
701    peak_memory: usize,
702    /// Current allocation estimate
703    current_allocation: usize,
704}
705
706impl Default for MemoryProfiler {
707    fn default() -> Self {
708        Self::new()
709    }
710}
711
712impl MemoryProfiler {
713    /// Create a new memory profiler
714    pub fn new() -> Self {
715        Self {
716            enabled: true,
717            peak_memory: 0,
718            current_allocation: 0,
719        }
720    }
721
722    /// Record an allocation
723    pub fn record_allocation(&mut self, bytes: usize) {
724        if self.enabled {
725            self.current_allocation += bytes;
726            if self.current_allocation > self.peak_memory {
727                self.peak_memory = self.current_allocation;
728            }
729        }
730    }
731
732    /// Record a deallocation
733    pub fn record_deallocation(&mut self, bytes: usize) {
734        if self.enabled {
735            self.current_allocation = self.current_allocation.saturating_sub(bytes);
736        }
737    }
738
739    /// Reset counters
740    pub fn reset(&mut self) {
741        self.peak_memory = 0;
742        self.current_allocation = 0;
743    }
744
745    /// Get peak memory usage
746    pub fn peak_memory(&self) -> usize {
747        self.peak_memory
748    }
749
750    /// Get current allocation
751    pub fn current_allocation(&self) -> usize {
752        self.current_allocation
753    }
754
755    /// Enable/disable tracking
756    pub fn set_enabled(&mut self, enabled: bool) {
757        self.enabled = enabled;
758    }
759}
760
761/// Estimate memory required for FFT of given size
762pub fn estimate_fft_memory(size: usize, algorithm: FftAlgorithm) -> usize {
763    let base_memory = size * 16; // Complex64 = 16 bytes
764
765    // Algorithm-specific overhead
766    let overhead = match algorithm {
767        FftAlgorithm::CooleyTukeyRadix2 => base_memory / 4, // Minimal scratch
768        FftAlgorithm::Radix4 => base_memory / 4,
769        FftAlgorithm::SplitRadix => base_memory / 3,
770        FftAlgorithm::MixedRadix => base_memory / 2,
771        FftAlgorithm::Bluestein => base_memory * 2, // Needs convolution buffer
772        FftAlgorithm::Rader => base_memory,
773        FftAlgorithm::Winograd => base_memory / 2,
774        FftAlgorithm::GoodThomas => base_memory / 3,
775        FftAlgorithm::Streaming => base_memory / 8, // Minimal memory
776        FftAlgorithm::CacheOblivious => base_memory / 4,
777        FftAlgorithm::InPlace => 0, // No extra memory
778        FftAlgorithm::SimdOptimized => base_memory / 4,
779        FftAlgorithm::Parallel => base_memory, // Thread buffers
780        FftAlgorithm::Hybrid => base_memory / 2,
781    };
782
783    base_memory + overhead
784}
785
786/// Estimate current memory allocation (simplified)
787fn estimate_current_allocation() -> usize {
788    // This is a simplified estimation
789    // In a real implementation, this would query the allocator
790    0
791}
792
793/// Format bytes in human-readable form
794fn format_bytes(bytes: usize) -> String {
795    if bytes >= 1024 * 1024 * 1024 {
796        format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
797    } else if bytes >= 1024 * 1024 {
798        format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
799    } else if bytes >= 1024 {
800        format!("{:.2} KB", bytes as f64 / 1024.0)
801    } else {
802        format!("{} B", bytes)
803    }
804}
805
806#[cfg(test)]
807mod tests {
808    use super::*;
809
810    #[test]
811    fn test_profile_result_statistics() {
812        let measurements = vec![
813            Measurement {
814                time_ns: 1000,
815                peak_memory_bytes: 1000,
816                allocated_bytes: 100,
817            },
818            Measurement {
819                time_ns: 1200,
820                peak_memory_bytes: 1000,
821                allocated_bytes: 100,
822            },
823            Measurement {
824                time_ns: 1100,
825                peak_memory_bytes: 1000,
826                allocated_bytes: 100,
827            },
828            Measurement {
829                time_ns: 1050,
830                peak_memory_bytes: 1000,
831                allocated_bytes: 100,
832            },
833        ];
834
835        let result = ProfileResult::from_measurements(
836            256,
837            FftAlgorithm::MixedRadix,
838            true,
839            measurements,
840            None,
841        );
842
843        assert_eq!(result.size, 256);
844        assert!(result.avg_time.as_nanos() > 0);
845        assert!(result.min_time <= result.avg_time);
846        assert!(result.max_time >= result.avg_time);
847        assert!(result.ops_per_second > 0.0);
848    }
849
850    #[test]
851    fn test_profile_single_size() {
852        let profiler = PerformanceProfiler::new();
853        let result = profiler.profile_size(256, true);
854
855        assert!(result.is_ok());
856        let profile = result.expect("Profiling failed");
857        assert_eq!(profile.size, 256);
858        assert!(profile.avg_time.as_nanos() > 0);
859    }
860
861    #[test]
862    fn test_compare_algorithms() {
863        let profiler = PerformanceProfiler::new();
864        let algorithms = [FftAlgorithm::MixedRadix, FftAlgorithm::Bluestein];
865
866        let result = profiler.compare_algorithms(256, &algorithms, true);
867
868        assert!(result.is_ok());
869        let comparison = result.expect("Comparison failed");
870        assert_eq!(comparison.size, 256);
871        assert!(!comparison.results.is_empty());
872        assert!(comparison.speedup >= 1.0);
873    }
874
875    #[test]
876    fn test_memory_profiler() {
877        let mut mp = MemoryProfiler::new();
878
879        mp.record_allocation(1000);
880        assert_eq!(mp.current_allocation(), 1000);
881        assert_eq!(mp.peak_memory(), 1000);
882
883        mp.record_allocation(500);
884        assert_eq!(mp.current_allocation(), 1500);
885        assert_eq!(mp.peak_memory(), 1500);
886
887        mp.record_deallocation(1000);
888        assert_eq!(mp.current_allocation(), 500);
889        assert_eq!(mp.peak_memory(), 1500);
890
891        mp.reset();
892        assert_eq!(mp.current_allocation(), 0);
893        assert_eq!(mp.peak_memory(), 0);
894    }
895
896    #[test]
897    fn test_estimate_fft_memory() {
898        let size = 1024;
899
900        let inplace_mem = estimate_fft_memory(size, FftAlgorithm::InPlace);
901        let bluestein_mem = estimate_fft_memory(size, FftAlgorithm::Bluestein);
902
903        // Bluestein should use more memory than in-place
904        assert!(bluestein_mem > inplace_mem);
905    }
906
907    #[test]
908    fn test_format_bytes() {
909        assert_eq!(format_bytes(512), "512 B");
910        assert_eq!(format_bytes(1024), "1.00 KB");
911        assert_eq!(format_bytes(1024 * 1024), "1.00 MB");
912        assert_eq!(format_bytes(1024 * 1024 * 1024), "1.00 GB");
913    }
914
915    #[test]
916    fn test_profile_config_default() {
917        let config = ProfileConfig::default();
918        assert!(config.warmup_iterations > 0);
919        assert!(config.benchmark_iterations > 0);
920        assert!(config.track_memory);
921    }
922}
scirs2_fft/performance_profiler.rs

scirs2_fft/
performance_profiler.rs