scirs2_fft/
performance_profiler.rs

1//! FFT Performance Profiling Module
2//!
3//! This module provides comprehensive performance profiling for FFT operations,
4//! including execution time measurement, memory usage tracking, and performance
5//! analysis across different FFT sizes and algorithms.
6//!
7//! # Features
8//!
9//! - **Execution Time Profiling**: High-resolution timing for FFT operations
10//! - **Memory Usage Tracking**: Monitor peak memory usage and allocation patterns
11//! - **Algorithm Comparison**: Compare performance across different FFT algorithms
12//! - **Statistical Analysis**: Mean, std dev, percentiles for benchmark results
13//! - **Performance Reporting**: Generate human-readable performance reports
14//!
15//! # Example
16//!
17//! ```rust,no_run
18//! use scirs2_fft::performance_profiler::{PerformanceProfiler, ProfileConfig};
19//!
20//! let profiler = PerformanceProfiler::new();
21//! let report = profiler.profile_size(1024, true).expect("Profiling failed");
22//! println!("Average time: {:?}", report.avg_time);
23//! ```
24
25use crate::algorithm_selector::{
26    AlgorithmRecommendation, AlgorithmSelector, FftAlgorithm, InputCharacteristics,
27    PerformanceEntry, SelectionConfig,
28};
29use crate::error::{FFTError, FFTResult};
30
31// Backend-specific imports
32
33use oxifft::{Complex as OxiComplex, Direction};
34
35use scirs2_core::numeric::Complex64;
36use serde::{Deserialize, Serialize};
37use std::collections::HashMap;
38use std::time::{Duration, Instant};
39
40/// Configuration for performance profiling
41#[derive(Debug, Clone)]
42pub struct ProfileConfig {
43    /// Number of warm-up iterations (not timed)
44    pub warmup_iterations: usize,
45    /// Number of benchmark iterations
46    pub benchmark_iterations: usize,
47    /// Whether to track memory usage
48    pub track_memory: bool,
49    /// Sizes to benchmark (if None, use default sizes)
50    pub sizes: Option<Vec<usize>>,
51    /// Algorithms to compare (if None, use all)
52    pub algorithms: Option<Vec<FftAlgorithm>>,
53    /// Whether to include inverse FFT in profiling
54    pub include_inverse: bool,
55    /// Timeout per benchmark in milliseconds
56    pub timeout_ms: u64,
57}
58
59impl Default for ProfileConfig {
60    fn default() -> Self {
61        Self {
62            warmup_iterations: 5,
63            benchmark_iterations: 20,
64            track_memory: true,
65            sizes: None,
66            algorithms: None,
67            include_inverse: true,
68            timeout_ms: 30000, // 30 seconds
69        }
70    }
71}
72
73/// Default benchmark sizes covering common use cases
74const DEFAULT_SIZES: &[usize] = &[
75    64, 128, 256, 512, 1024, 2048, 4096, 8192, // Powers of 2
76    100, 360, 480, 1000, 2000, 5000, // Smooth numbers
77    97, 101, 127, 251, 509, 1009, 2003, // Primes
78    1200, 3600, 7200, // Common signal sizes
79    65536, 131072, 262144, // Large powers of 2
80];
81
82/// Single benchmark measurement
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct Measurement {
85    /// Execution time in nanoseconds
86    pub time_ns: u64,
87    /// Peak memory usage in bytes
88    pub peak_memory_bytes: usize,
89    /// Allocated memory in bytes
90    pub allocated_bytes: usize,
91}
92
93/// Profiling result for a single (size, algorithm) combination
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct ProfileResult {
96    /// FFT size
97    pub size: usize,
98    /// Algorithm used
99    pub algorithm: FftAlgorithm,
100    /// Whether this was a forward transform
101    pub forward: bool,
102    /// Individual measurements
103    pub measurements: Vec<Measurement>,
104    /// Average execution time
105    pub avg_time: Duration,
106    /// Minimum execution time
107    pub min_time: Duration,
108    /// Maximum execution time
109    pub max_time: Duration,
110    /// Standard deviation
111    pub std_dev: Duration,
112    /// Median execution time
113    pub median_time: Duration,
114    /// 90th percentile execution time
115    pub p90_time: Duration,
116    /// 99th percentile execution time
117    pub p99_time: Duration,
118    /// Operations per second (throughput)
119    pub ops_per_second: f64,
120    /// Average peak memory usage
121    pub avg_peak_memory: usize,
122    /// Throughput in elements per second
123    pub elements_per_second: f64,
124    /// Input characteristics
125    pub input_characteristics: Option<InputCharacteristics>,
126}
127
128impl ProfileResult {
129    /// Create a profile result from measurements
130    pub fn from_measurements(
131        size: usize,
132        algorithm: FftAlgorithm,
133        forward: bool,
134        measurements: Vec<Measurement>,
135        input_characteristics: Option<InputCharacteristics>,
136    ) -> Self {
137        if measurements.is_empty() {
138            return Self {
139                size,
140                algorithm,
141                forward,
142                measurements: Vec::new(),
143                avg_time: Duration::ZERO,
144                min_time: Duration::ZERO,
145                max_time: Duration::ZERO,
146                std_dev: Duration::ZERO,
147                median_time: Duration::ZERO,
148                p90_time: Duration::ZERO,
149                p99_time: Duration::ZERO,
150                ops_per_second: 0.0,
151                avg_peak_memory: 0,
152                elements_per_second: 0.0,
153                input_characteristics,
154            };
155        }
156
157        let times_ns: Vec<u64> = measurements.iter().map(|m| m.time_ns).collect();
158        let n = times_ns.len();
159
160        // Calculate statistics
161        let sum: u64 = times_ns.iter().sum();
162        let avg_ns = sum / n as u64;
163
164        let min_ns = times_ns.iter().min().copied().unwrap_or(0);
165        let max_ns = times_ns.iter().max().copied().unwrap_or(0);
166
167        // Standard deviation
168        let variance: f64 = times_ns
169            .iter()
170            .map(|&t| {
171                let diff = t as f64 - avg_ns as f64;
172                diff * diff
173            })
174            .sum::<f64>()
175            / n as f64;
176        let std_dev_ns = variance.sqrt() as u64;
177
178        // Percentiles
179        let mut sorted_times = times_ns.clone();
180        sorted_times.sort_unstable();
181
182        let median_ns = if n % 2 == 0 {
183            (sorted_times[n / 2 - 1] + sorted_times[n / 2]) / 2
184        } else {
185            sorted_times[n / 2]
186        };
187
188        let p90_idx = (n as f64 * 0.90).ceil() as usize - 1;
189        let p99_idx = (n as f64 * 0.99).ceil() as usize - 1;
190        let p90_ns = sorted_times.get(p90_idx.min(n - 1)).copied().unwrap_or(0);
191        let p99_ns = sorted_times.get(p99_idx.min(n - 1)).copied().unwrap_or(0);
192
193        // Throughput
194        let ops_per_second = if avg_ns > 0 {
195            1_000_000_000.0 / avg_ns as f64
196        } else {
197            0.0
198        };
199
200        let elements_per_second = ops_per_second * size as f64;
201
202        // Average memory
203        let avg_peak_memory = if !measurements.is_empty() {
204            measurements
205                .iter()
206                .map(|m| m.peak_memory_bytes)
207                .sum::<usize>()
208                / measurements.len()
209        } else {
210            0
211        };
212
213        Self {
214            size,
215            algorithm,
216            forward,
217            measurements,
218            avg_time: Duration::from_nanos(avg_ns),
219            min_time: Duration::from_nanos(min_ns),
220            max_time: Duration::from_nanos(max_ns),
221            std_dev: Duration::from_nanos(std_dev_ns),
222            median_time: Duration::from_nanos(median_ns),
223            p90_time: Duration::from_nanos(p90_ns),
224            p99_time: Duration::from_nanos(p99_ns),
225            ops_per_second,
226            avg_peak_memory,
227            elements_per_second,
228            input_characteristics,
229        }
230    }
231
232    /// Format as a table row
233    pub fn as_table_row(&self) -> String {
234        format!(
235            "{:>8} | {:>16} | {:>12?} | {:>12?} | {:>12?} | {:>12.2} | {:>10}",
236            self.size,
237            self.algorithm.to_string(),
238            self.avg_time,
239            self.min_time,
240            self.max_time,
241            self.ops_per_second,
242            format_bytes(self.avg_peak_memory)
243        )
244    }
245}
246
247/// Comparison result between algorithms
248#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct AlgorithmComparison {
250    /// FFT size
251    pub size: usize,
252    /// Results for each algorithm
253    pub results: HashMap<FftAlgorithm, ProfileResult>,
254    /// Best algorithm for this size
255    pub best_algorithm: FftAlgorithm,
256    /// Speedup of best vs worst
257    pub speedup: f64,
258    /// Recommendation
259    pub recommendation: String,
260}
261
262/// Performance report for a range of sizes
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct PerformanceReport {
265    /// Results by (size, algorithm)
266    pub results: HashMap<(usize, FftAlgorithm), ProfileResult>,
267    /// Best algorithm for each size
268    pub best_by_size: HashMap<usize, FftAlgorithm>,
269    /// Algorithm comparisons
270    pub comparisons: Vec<AlgorithmComparison>,
271    /// Total profiling time
272    pub total_time: Duration,
273    /// Configuration used
274    pub config_summary: String,
275}
276
277impl PerformanceReport {
278    /// Generate a text report
279    pub fn to_text_report(&self) -> String {
280        let mut report = String::new();
281
282        report.push_str("=== FFT Performance Report ===\n\n");
283        report.push_str(&format!("Total profiling time: {:?}\n", self.total_time));
284        report.push_str(&format!("Configuration: {}\n\n", self.config_summary));
285
286        report.push_str("--- Results by Size ---\n\n");
287        report.push_str(&format!(
288            "{:>8} | {:>16} | {:>12} | {:>12} | {:>12} | {:>12} | {:>10}\n",
289            "Size", "Algorithm", "Avg Time", "Min Time", "Max Time", "Ops/sec", "Memory"
290        ));
291        report.push_str(&"-".repeat(100));
292        report.push('\n');
293
294        let mut sizes: Vec<usize> = self.best_by_size.keys().copied().collect();
295        sizes.sort_unstable();
296
297        for size in sizes {
298            if let Some(algo) = self.best_by_size.get(&size) {
299                if let Some(result) = self.results.get(&(size, *algo)) {
300                    report.push_str(&result.as_table_row());
301                    report.push('\n');
302                }
303            }
304        }
305
306        report.push_str("\n--- Recommendations ---\n\n");
307        for comparison in &self.comparisons {
308            report.push_str(&format!(
309                "Size {:>8}: {} (speedup: {:.2}x)\n",
310                comparison.size, comparison.recommendation, comparison.speedup
311            ));
312        }
313
314        report
315    }
316}
317
318/// Performance profiler for FFT operations
319pub struct PerformanceProfiler {
320    /// Configuration
321    config: ProfileConfig,
322    /// Algorithm selector for recommendations
323    selector: AlgorithmSelector,
324}
325
326impl Default for PerformanceProfiler {
327    fn default() -> Self {
328        Self::new()
329    }
330}
331
332impl PerformanceProfiler {
333    /// Create a new performance profiler with default configuration
334    pub fn new() -> Self {
335        Self::with_config(ProfileConfig::default())
336    }
337
338    /// Create a new performance profiler with custom configuration
339    pub fn with_config(config: ProfileConfig) -> Self {
340        Self {
341            config,
342            selector: AlgorithmSelector::new(),
343        }
344    }
345
346    /// Profile a single size with the recommended algorithm
347    pub fn profile_size(&self, size: usize, forward: bool) -> FFTResult<ProfileResult> {
348        let recommendation = self.selector.select_algorithm(size, forward)?;
349        self.profile_size_with_algorithm(size, recommendation.algorithm, forward)
350    }
351
352    /// Profile a single size with a specific algorithm (OxiFFT backend)
353    pub fn profile_size_with_algorithm(
354        &self,
355        size: usize,
356        algorithm: FftAlgorithm,
357        forward: bool,
358    ) -> FFTResult<ProfileResult> {
359        // Validate size
360        if size == 0 {
361            return Err(FFTError::ValueError("Size must be positive".to_string()));
362        }
363
364        // Create test data (convert to OxiFFT format)
365        let data: Vec<OxiComplex<f64>> = (0..size)
366            .map(|i| OxiComplex::new(i as f64, (i * 2) as f64))
367            .collect();
368
369        // Create FFT plan using OxiFFT
370        let direction = if forward {
371            Direction::Forward
372        } else {
373            Direction::Backward
374        };
375        let plan = oxifft::Plan::dft_1d(size, direction, oxifft::Flags::ESTIMATE)
376            .ok_or_else(|| FFTError::ComputationError("Failed to create FFT plan".to_string()))?;
377
378        // Warm-up iterations
379        for _ in 0..self.config.warmup_iterations {
380            let work_data = data.clone();
381            let mut out = vec![OxiComplex::new(0.0, 0.0); size];
382            plan.execute(&work_data, &mut out);
383        }
384
385        // Benchmark iterations
386        let mut measurements = Vec::with_capacity(self.config.benchmark_iterations);
387
388        for _ in 0..self.config.benchmark_iterations {
389            let mut work_data = data.clone();
390
391            // Track memory before
392            let memory_before = if self.config.track_memory {
393                estimate_current_allocation()
394            } else {
395                0
396            };
397
398            // Time the FFT
399            let start = Instant::now();
400            let mut out = vec![OxiComplex::new(0.0, 0.0); size];
401            plan.execute(&work_data, &mut out);
402            let elapsed = start.elapsed();
403
404            // Track memory after
405            let memory_after = if self.config.track_memory {
406                estimate_current_allocation()
407            } else {
408                0
409            };
410
411            measurements.push(Measurement {
412                time_ns: elapsed.as_nanos() as u64,
413                peak_memory_bytes: size * 16, // Complex64 = 16 bytes
414                allocated_bytes: memory_after.saturating_sub(memory_before),
415            });
416        }
417
418        // Get input characteristics
419        let chars = InputCharacteristics::analyze(size, &self.selector.hardware().cache_info);
420
421        Ok(ProfileResult::from_measurements(
422            size,
423            algorithm,
424            forward,
425            measurements,
426            Some(chars),
427        ))
428    }
429
430    /// Profile multiple sizes
431    pub fn profile_sizes(&self, sizes: &[usize], forward: bool) -> FFTResult<Vec<ProfileResult>> {
432        let mut results = Vec::with_capacity(sizes.len());
433
434        for &size in sizes {
435            match self.profile_size(size, forward) {
436                Ok(result) => results.push(result),
437                Err(e) => {
438                    // Log error but continue with other sizes
439                    eprintln!("Failed to profile size {}: {}", size, e);
440                }
441            }
442        }
443
444        Ok(results)
445    }
446
447    /// Compare algorithms for a given size
448    pub fn compare_algorithms(
449        &self,
450        size: usize,
451        algorithms: &[FftAlgorithm],
452        forward: bool,
453    ) -> FFTResult<AlgorithmComparison> {
454        let mut results = HashMap::new();
455
456        for &algorithm in algorithms {
457            match self.profile_size_with_algorithm(size, algorithm, forward) {
458                Ok(result) => {
459                    results.insert(algorithm, result);
460                }
461                Err(e) => {
462                    eprintln!("Failed to profile {} for size {}: {}", algorithm, size, e);
463                }
464            }
465        }
466
467        if results.is_empty() {
468            return Err(FFTError::ComputationError(
469                "No algorithms could be profiled".to_string(),
470            ));
471        }
472
473        // Find best and worst
474        let best_algorithm = results
475            .iter()
476            .min_by_key(|(_, r)| r.avg_time)
477            .map(|(&algo, _)| algo)
478            .unwrap_or(FftAlgorithm::MixedRadix);
479
480        let best_time = results
481            .get(&best_algorithm)
482            .map(|r| r.avg_time)
483            .unwrap_or(Duration::ZERO);
484
485        let worst_time = results
486            .values()
487            .map(|r| r.avg_time)
488            .max()
489            .unwrap_or(Duration::ZERO);
490
491        let speedup = if best_time.as_nanos() > 0 {
492            worst_time.as_nanos() as f64 / best_time.as_nanos() as f64
493        } else {
494            1.0
495        };
496
497        let recommendation = format!("{} is fastest ({:?})", best_algorithm, best_time);
498
499        Ok(AlgorithmComparison {
500            size,
501            results,
502            best_algorithm,
503            speedup,
504            recommendation,
505        })
506    }
507
508    /// Run comprehensive profiling
509    pub fn run_comprehensive_profile(&self) -> FFTResult<PerformanceReport> {
510        let start = Instant::now();
511
512        let sizes = self.config.sizes.as_deref().unwrap_or(DEFAULT_SIZES);
513
514        let algorithms = self.config.algorithms.as_deref().unwrap_or(&[
515            FftAlgorithm::CooleyTukeyRadix2,
516            FftAlgorithm::SplitRadix,
517            FftAlgorithm::MixedRadix,
518            FftAlgorithm::Bluestein,
519        ]);
520
521        let mut results = HashMap::new();
522        let mut best_by_size = HashMap::new();
523        let mut comparisons = Vec::new();
524
525        for &size in sizes {
526            // Profile with each algorithm
527            let comparison = self.compare_algorithms(size, algorithms, true)?;
528
529            for (algo, result) in &comparison.results {
530                results.insert((size, *algo), result.clone());
531            }
532
533            best_by_size.insert(size, comparison.best_algorithm);
534            comparisons.push(comparison);
535
536            // Also profile inverse if configured
537            if self.config.include_inverse {
538                if let Ok(inv_comparison) = self.compare_algorithms(size, algorithms, false) {
539                    // Store inverse results with a different key or in a separate structure
540                    // For now, we focus on forward transforms
541                    let _ = inv_comparison;
542                }
543            }
544        }
545
546        let total_time = start.elapsed();
547
548        let config_summary = format!(
549            "warmup={}, iterations={}, sizes={}, algorithms={}",
550            self.config.warmup_iterations,
551            self.config.benchmark_iterations,
552            sizes.len(),
553            algorithms.len()
554        );
555
556        Ok(PerformanceReport {
557            results,
558            best_by_size,
559            comparisons,
560            total_time,
561            config_summary,
562        })
563    }
564
565    /// Auto-tune for specific sizes and record results
566    pub fn auto_tune(&mut self, sizes: &[usize]) -> FFTResult<HashMap<usize, FftAlgorithm>> {
567        let algorithms = [
568            FftAlgorithm::CooleyTukeyRadix2,
569            FftAlgorithm::SplitRadix,
570            FftAlgorithm::MixedRadix,
571            FftAlgorithm::Bluestein,
572        ];
573
574        let mut best_algorithms = HashMap::new();
575
576        for &size in sizes {
577            let comparison = self.compare_algorithms(size, &algorithms, true)?;
578            best_algorithms.insert(size, comparison.best_algorithm);
579
580            // Record to the selector's history
581            if let Some(result) = comparison.results.get(&comparison.best_algorithm) {
582                let entry = PerformanceEntry {
583                    size,
584                    algorithm: comparison.best_algorithm,
585                    forward: true,
586                    execution_time_ns: result.avg_time.as_nanos() as u64,
587                    peak_memory_bytes: result.avg_peak_memory,
588                    timestamp: std::time::SystemTime::now()
589                        .duration_since(std::time::UNIX_EPOCH)
590                        .map(|d| d.as_secs())
591                        .unwrap_or(0),
592                    hardware_hash: 0,
593                };
594                let _ = self.selector.record_performance(entry);
595            }
596        }
597
598        Ok(best_algorithms)
599    }
600
601    /// Get selector reference
602    pub fn selector(&self) -> &AlgorithmSelector {
603        &self.selector
604    }
605
606    /// Get selector mutable reference
607    pub fn selector_mut(&mut self) -> &mut AlgorithmSelector {
608        &mut self.selector
609    }
610}
611
612/// Memory profiler for tracking FFT memory usage
613pub struct MemoryProfiler {
614    /// Tracking enabled
615    enabled: bool,
616    /// Peak memory recorded
617    peak_memory: usize,
618    /// Current allocation estimate
619    current_allocation: usize,
620}
621
622impl Default for MemoryProfiler {
623    fn default() -> Self {
624        Self::new()
625    }
626}
627
628impl MemoryProfiler {
629    /// Create a new memory profiler
630    pub fn new() -> Self {
631        Self {
632            enabled: true,
633            peak_memory: 0,
634            current_allocation: 0,
635        }
636    }
637
638    /// Record an allocation
639    pub fn record_allocation(&mut self, bytes: usize) {
640        if self.enabled {
641            self.current_allocation += bytes;
642            if self.current_allocation > self.peak_memory {
643                self.peak_memory = self.current_allocation;
644            }
645        }
646    }
647
648    /// Record a deallocation
649    pub fn record_deallocation(&mut self, bytes: usize) {
650        if self.enabled {
651            self.current_allocation = self.current_allocation.saturating_sub(bytes);
652        }
653    }
654
655    /// Reset counters
656    pub fn reset(&mut self) {
657        self.peak_memory = 0;
658        self.current_allocation = 0;
659    }
660
661    /// Get peak memory usage
662    pub fn peak_memory(&self) -> usize {
663        self.peak_memory
664    }
665
666    /// Get current allocation
667    pub fn current_allocation(&self) -> usize {
668        self.current_allocation
669    }
670
671    /// Enable/disable tracking
672    pub fn set_enabled(&mut self, enabled: bool) {
673        self.enabled = enabled;
674    }
675}
676
677/// Estimate memory required for FFT of given size
678pub fn estimate_fft_memory(size: usize, algorithm: FftAlgorithm) -> usize {
679    let base_memory = size * 16; // Complex64 = 16 bytes
680
681    // Algorithm-specific overhead
682    let overhead = match algorithm {
683        FftAlgorithm::CooleyTukeyRadix2 => base_memory / 4, // Minimal scratch
684        FftAlgorithm::Radix4 => base_memory / 4,
685        FftAlgorithm::SplitRadix => base_memory / 3,
686        FftAlgorithm::MixedRadix => base_memory / 2,
687        FftAlgorithm::Bluestein => base_memory * 2, // Needs convolution buffer
688        FftAlgorithm::Rader => base_memory,
689        FftAlgorithm::Winograd => base_memory / 2,
690        FftAlgorithm::GoodThomas => base_memory / 3,
691        FftAlgorithm::Streaming => base_memory / 8, // Minimal memory
692        FftAlgorithm::CacheOblivious => base_memory / 4,
693        FftAlgorithm::InPlace => 0, // No extra memory
694        FftAlgorithm::SimdOptimized => base_memory / 4,
695        FftAlgorithm::Parallel => base_memory, // Thread buffers
696        FftAlgorithm::Hybrid => base_memory / 2,
697    };
698
699    base_memory + overhead
700}
701
702/// Estimate current memory allocation (simplified)
703fn estimate_current_allocation() -> usize {
704    // This is a simplified estimation
705    // In a real implementation, this would query the allocator
706    0
707}
708
709/// Format bytes in human-readable form
710fn format_bytes(bytes: usize) -> String {
711    if bytes >= 1024 * 1024 * 1024 {
712        format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
713    } else if bytes >= 1024 * 1024 {
714        format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
715    } else if bytes >= 1024 {
716        format!("{:.2} KB", bytes as f64 / 1024.0)
717    } else {
718        format!("{} B", bytes)
719    }
720}
721
722#[cfg(test)]
723mod tests {
724    use super::*;
725
726    #[test]
727    fn test_profile_result_statistics() {
728        let measurements = vec![
729            Measurement {
730                time_ns: 1000,
731                peak_memory_bytes: 1000,
732                allocated_bytes: 100,
733            },
734            Measurement {
735                time_ns: 1200,
736                peak_memory_bytes: 1000,
737                allocated_bytes: 100,
738            },
739            Measurement {
740                time_ns: 1100,
741                peak_memory_bytes: 1000,
742                allocated_bytes: 100,
743            },
744            Measurement {
745                time_ns: 1050,
746                peak_memory_bytes: 1000,
747                allocated_bytes: 100,
748            },
749        ];
750
751        let result = ProfileResult::from_measurements(
752            256,
753            FftAlgorithm::MixedRadix,
754            true,
755            measurements,
756            None,
757        );
758
759        assert_eq!(result.size, 256);
760        assert!(result.avg_time.as_nanos() > 0);
761        assert!(result.min_time <= result.avg_time);
762        assert!(result.max_time >= result.avg_time);
763        assert!(result.ops_per_second > 0.0);
764    }
765
766    #[test]
767    fn test_profile_single_size() {
768        let profiler = PerformanceProfiler::new();
769        let result = profiler.profile_size(256, true);
770
771        assert!(result.is_ok());
772        let profile = result.expect("Profiling failed");
773        assert_eq!(profile.size, 256);
774        assert!(profile.avg_time.as_nanos() > 0);
775    }
776
777    #[test]
778    fn test_compare_algorithms() {
779        let profiler = PerformanceProfiler::new();
780        let algorithms = [FftAlgorithm::MixedRadix, FftAlgorithm::Bluestein];
781
782        let result = profiler.compare_algorithms(256, &algorithms, true);
783
784        assert!(result.is_ok());
785        let comparison = result.expect("Comparison failed");
786        assert_eq!(comparison.size, 256);
787        assert!(!comparison.results.is_empty());
788        assert!(comparison.speedup >= 1.0);
789    }
790
791    #[test]
792    fn test_memory_profiler() {
793        let mut mp = MemoryProfiler::new();
794
795        mp.record_allocation(1000);
796        assert_eq!(mp.current_allocation(), 1000);
797        assert_eq!(mp.peak_memory(), 1000);
798
799        mp.record_allocation(500);
800        assert_eq!(mp.current_allocation(), 1500);
801        assert_eq!(mp.peak_memory(), 1500);
802
803        mp.record_deallocation(1000);
804        assert_eq!(mp.current_allocation(), 500);
805        assert_eq!(mp.peak_memory(), 1500);
806
807        mp.reset();
808        assert_eq!(mp.current_allocation(), 0);
809        assert_eq!(mp.peak_memory(), 0);
810    }
811
812    #[test]
813    fn test_estimate_fft_memory() {
814        let size = 1024;
815
816        let inplace_mem = estimate_fft_memory(size, FftAlgorithm::InPlace);
817        let bluestein_mem = estimate_fft_memory(size, FftAlgorithm::Bluestein);
818
819        // Bluestein should use more memory than in-place
820        assert!(bluestein_mem > inplace_mem);
821    }
822
823    #[test]
824    fn test_format_bytes() {
825        assert_eq!(format_bytes(512), "512 B");
826        assert_eq!(format_bytes(1024), "1.00 KB");
827        assert_eq!(format_bytes(1024 * 1024), "1.00 MB");
828        assert_eq!(format_bytes(1024 * 1024 * 1024), "1.00 GB");
829    }
830
831    #[test]
832    fn test_profile_config_default() {
833        let config = ProfileConfig::default();
834        assert!(config.warmup_iterations > 0);
835        assert!(config.benchmark_iterations > 0);
836        assert!(config.track_memory);
837    }
838}
scirs2_fft/performance_profiler.rs

scirs2_fft/
performance_profiler.rs