use crate::algorithm_selector::{
AlgorithmRecommendation, AlgorithmSelector, FftAlgorithm, InputCharacteristics,
PerformanceEntry, SelectionConfig,
};
use crate::error::{FFTError, FFTResult};
#[cfg(feature = "rustfft-backend")]
use rustfft::FftPlanner;
#[cfg(not(feature = "rustfft-backend"))]
use oxifft::{Complex as OxiComplex, Direction};
use scirs2_core::numeric::Complex64;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::{Duration, Instant};
#[derive(Debug, Clone)]
pub struct ProfileConfig {
pub warmup_iterations: usize,
pub benchmark_iterations: usize,
pub track_memory: bool,
pub sizes: Option<Vec<usize>>,
pub algorithms: Option<Vec<FftAlgorithm>>,
pub include_inverse: bool,
pub timeout_ms: u64,
}
impl Default for ProfileConfig {
fn default() -> Self {
Self {
warmup_iterations: 5,
benchmark_iterations: 20,
track_memory: true,
sizes: None,
algorithms: None,
include_inverse: true,
timeout_ms: 30000, }
}
}
const DEFAULT_SIZES: &[usize] = &[
64, 128, 256, 512, 1024, 2048, 4096, 8192, 100, 360, 480, 1000, 2000, 5000, 97, 101, 127, 251, 509, 1009, 2003, 1200, 3600, 7200, 65536, 131072, 262144, ];
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Measurement {
pub time_ns: u64,
pub peak_memory_bytes: usize,
pub allocated_bytes: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfileResult {
pub size: usize,
pub algorithm: FftAlgorithm,
pub forward: bool,
pub measurements: Vec<Measurement>,
pub avg_time: Duration,
pub min_time: Duration,
pub max_time: Duration,
pub std_dev: Duration,
pub median_time: Duration,
pub p90_time: Duration,
pub p99_time: Duration,
pub ops_per_second: f64,
pub avg_peak_memory: usize,
pub elements_per_second: f64,
pub input_characteristics: Option<InputCharacteristics>,
}
impl ProfileResult {
pub fn from_measurements(
size: usize,
algorithm: FftAlgorithm,
forward: bool,
measurements: Vec<Measurement>,
input_characteristics: Option<InputCharacteristics>,
) -> Self {
if measurements.is_empty() {
return Self {
size,
algorithm,
forward,
measurements: Vec::new(),
avg_time: Duration::ZERO,
min_time: Duration::ZERO,
max_time: Duration::ZERO,
std_dev: Duration::ZERO,
median_time: Duration::ZERO,
p90_time: Duration::ZERO,
p99_time: Duration::ZERO,
ops_per_second: 0.0,
avg_peak_memory: 0,
elements_per_second: 0.0,
input_characteristics,
};
}
let times_ns: Vec<u64> = measurements.iter().map(|m| m.time_ns).collect();
let n = times_ns.len();
let sum: u64 = times_ns.iter().sum();
let avg_ns = sum / n as u64;
let min_ns = times_ns.iter().min().copied().unwrap_or(0);
let max_ns = times_ns.iter().max().copied().unwrap_or(0);
let variance: f64 = times_ns
.iter()
.map(|&t| {
let diff = t as f64 - avg_ns as f64;
diff * diff
})
.sum::<f64>()
/ n as f64;
let std_dev_ns = variance.sqrt() as u64;
let mut sorted_times = times_ns.clone();
sorted_times.sort_unstable();
let median_ns = if n % 2 == 0 {
(sorted_times[n / 2 - 1] + sorted_times[n / 2]) / 2
} else {
sorted_times[n / 2]
};
let p90_idx = (n as f64 * 0.90).ceil() as usize - 1;
let p99_idx = (n as f64 * 0.99).ceil() as usize - 1;
let p90_ns = sorted_times.get(p90_idx.min(n - 1)).copied().unwrap_or(0);
let p99_ns = sorted_times.get(p99_idx.min(n - 1)).copied().unwrap_or(0);
let ops_per_second = if avg_ns > 0 {
1_000_000_000.0 / avg_ns as f64
} else {
0.0
};
let elements_per_second = ops_per_second * size as f64;
let avg_peak_memory = if !measurements.is_empty() {
measurements
.iter()
.map(|m| m.peak_memory_bytes)
.sum::<usize>()
/ measurements.len()
} else {
0
};
Self {
size,
algorithm,
forward,
measurements,
avg_time: Duration::from_nanos(avg_ns),
min_time: Duration::from_nanos(min_ns),
max_time: Duration::from_nanos(max_ns),
std_dev: Duration::from_nanos(std_dev_ns),
median_time: Duration::from_nanos(median_ns),
p90_time: Duration::from_nanos(p90_ns),
p99_time: Duration::from_nanos(p99_ns),
ops_per_second,
avg_peak_memory,
elements_per_second,
input_characteristics,
}
}
pub fn as_table_row(&self) -> String {
format!(
"{:>8} | {:>16} | {:>12?} | {:>12?} | {:>12?} | {:>12.2} | {:>10}",
self.size,
self.algorithm.to_string(),
self.avg_time,
self.min_time,
self.max_time,
self.ops_per_second,
format_bytes(self.avg_peak_memory)
)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlgorithmComparison {
pub size: usize,
pub results: HashMap<FftAlgorithm, ProfileResult>,
pub best_algorithm: FftAlgorithm,
pub speedup: f64,
pub recommendation: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceReport {
pub results: HashMap<(usize, FftAlgorithm), ProfileResult>,
pub best_by_size: HashMap<usize, FftAlgorithm>,
pub comparisons: Vec<AlgorithmComparison>,
pub total_time: Duration,
pub config_summary: String,
}
impl PerformanceReport {
pub fn to_text_report(&self) -> String {
let mut report = String::new();
report.push_str("=== FFT Performance Report ===\n\n");
report.push_str(&format!("Total profiling time: {:?}\n", self.total_time));
report.push_str(&format!("Configuration: {}\n\n", self.config_summary));
report.push_str("--- Results by Size ---\n\n");
report.push_str(&format!(
"{:>8} | {:>16} | {:>12} | {:>12} | {:>12} | {:>12} | {:>10}\n",
"Size", "Algorithm", "Avg Time", "Min Time", "Max Time", "Ops/sec", "Memory"
));
report.push_str(&"-".repeat(100));
report.push('\n');
let mut sizes: Vec<usize> = self.best_by_size.keys().copied().collect();
sizes.sort_unstable();
for size in sizes {
if let Some(algo) = self.best_by_size.get(&size) {
if let Some(result) = self.results.get(&(size, *algo)) {
report.push_str(&result.as_table_row());
report.push('\n');
}
}
}
report.push_str("\n--- Recommendations ---\n\n");
for comparison in &self.comparisons {
report.push_str(&format!(
"Size {:>8}: {} (speedup: {:.2}x)\n",
comparison.size, comparison.recommendation, comparison.speedup
));
}
report
}
}
pub struct PerformanceProfiler {
config: ProfileConfig,
selector: AlgorithmSelector,
}
impl Default for PerformanceProfiler {
fn default() -> Self {
Self::new()
}
}
impl PerformanceProfiler {
pub fn new() -> Self {
Self::with_config(ProfileConfig::default())
}
pub fn with_config(config: ProfileConfig) -> Self {
Self {
config,
selector: AlgorithmSelector::new(),
}
}
pub fn profile_size(&self, size: usize, forward: bool) -> FFTResult<ProfileResult> {
let recommendation = self.selector.select_algorithm(size, forward)?;
self.profile_size_with_algorithm(size, recommendation.algorithm, forward)
}
#[cfg(feature = "rustfft-backend")]
pub fn profile_size_with_algorithm(
&self,
size: usize,
algorithm: FftAlgorithm,
forward: bool,
) -> FFTResult<ProfileResult> {
if size == 0 {
return Err(FFTError::ValueError("Size must be positive".to_string()));
}
let mut data: Vec<Complex64> = (0..size)
.map(|i| Complex64::new(i as f64, (i * 2) as f64))
.collect();
let mut planner = FftPlanner::new();
let fft = if forward {
planner.plan_fft_forward(size)
} else {
planner.plan_fft_inverse(size)
};
for _ in 0..self.config.warmup_iterations {
let mut work_data = data.clone();
fft.process(&mut work_data);
}
let mut measurements = Vec::with_capacity(self.config.benchmark_iterations);
for _ in 0..self.config.benchmark_iterations {
let mut work_data = data.clone();
let memory_before = if self.config.track_memory {
estimate_current_allocation()
} else {
0
};
let start = Instant::now();
fft.process(&mut work_data);
let elapsed = start.elapsed();
let memory_after = if self.config.track_memory {
estimate_current_allocation()
} else {
0
};
measurements.push(Measurement {
time_ns: elapsed.as_nanos() as u64,
peak_memory_bytes: size * 16, allocated_bytes: memory_after.saturating_sub(memory_before),
});
}
let chars = InputCharacteristics::analyze(size, &self.selector.hardware().cache_info);
Ok(ProfileResult::from_measurements(
size,
algorithm,
forward,
measurements,
Some(chars),
))
}
#[cfg(not(feature = "rustfft-backend"))]
pub fn profile_size_with_algorithm(
&self,
size: usize,
algorithm: FftAlgorithm,
forward: bool,
) -> FFTResult<ProfileResult> {
if size == 0 {
return Err(FFTError::ValueError("Size must be positive".to_string()));
}
let data: Vec<OxiComplex<f64>> = (0..size)
.map(|i| OxiComplex::new(i as f64, (i * 2) as f64))
.collect();
let mut plan = if forward {
oxifft::Plan::new(Direction::Forward, size, oxifft::Flags::Estimate).map_err(|e| {
FFTError::ComputationError(format!("Failed to create FFT plan: {:?}", e))
})?
} else {
oxifft::Plan::new(Direction::Backward, size, oxifft::Flags::Estimate).map_err(|e| {
FFTError::ComputationError(format!("Failed to create FFT plan: {:?}", e))
})?
};
for _ in 0..self.config.warmup_iterations {
let mut work_data = data.clone();
plan.execute_c2c(&mut work_data).map_err(|e| {
FFTError::ComputationError(format!("FFT execution failed: {:?}", e))
})?;
}
let mut measurements = Vec::with_capacity(self.config.benchmark_iterations);
for _ in 0..self.config.benchmark_iterations {
let mut work_data = data.clone();
let memory_before = if self.config.track_memory {
estimate_current_allocation()
} else {
0
};
let start = Instant::now();
plan.execute_c2c(&mut work_data).map_err(|e| {
FFTError::ComputationError(format!("FFT execution failed: {:?}", e))
})?;
let elapsed = start.elapsed();
let memory_after = if self.config.track_memory {
estimate_current_allocation()
} else {
0
};
measurements.push(Measurement {
time_ns: elapsed.as_nanos() as u64,
peak_memory_bytes: size * 16, allocated_bytes: memory_after.saturating_sub(memory_before),
});
}
let chars = InputCharacteristics::analyze(size, &self.selector.hardware().cache_info);
Ok(ProfileResult::from_measurements(
size,
algorithm,
forward,
measurements,
Some(chars),
))
}
pub fn profile_sizes(&self, sizes: &[usize], forward: bool) -> FFTResult<Vec<ProfileResult>> {
let mut results = Vec::with_capacity(sizes.len());
for &size in sizes {
match self.profile_size(size, forward) {
Ok(result) => results.push(result),
Err(e) => {
eprintln!("Failed to profile size {}: {}", size, e);
}
}
}
Ok(results)
}
pub fn compare_algorithms(
&self,
size: usize,
algorithms: &[FftAlgorithm],
forward: bool,
) -> FFTResult<AlgorithmComparison> {
let mut results = HashMap::new();
for &algorithm in algorithms {
match self.profile_size_with_algorithm(size, algorithm, forward) {
Ok(result) => {
results.insert(algorithm, result);
}
Err(e) => {
eprintln!("Failed to profile {} for size {}: {}", algorithm, size, e);
}
}
}
if results.is_empty() {
return Err(FFTError::ComputationError(
"No algorithms could be profiled".to_string(),
));
}
let best_algorithm = results
.iter()
.min_by_key(|(_, r)| r.avg_time)
.map(|(&algo, _)| algo)
.unwrap_or(FftAlgorithm::MixedRadix);
let best_time = results
.get(&best_algorithm)
.map(|r| r.avg_time)
.unwrap_or(Duration::ZERO);
let worst_time = results
.values()
.map(|r| r.avg_time)
.max()
.unwrap_or(Duration::ZERO);
let speedup = if best_time.as_nanos() > 0 {
worst_time.as_nanos() as f64 / best_time.as_nanos() as f64
} else {
1.0
};
let recommendation = format!("{} is fastest ({:?})", best_algorithm, best_time);
Ok(AlgorithmComparison {
size,
results,
best_algorithm,
speedup,
recommendation,
})
}
pub fn run_comprehensive_profile(&self) -> FFTResult<PerformanceReport> {
let start = Instant::now();
let sizes = self.config.sizes.as_deref().unwrap_or(DEFAULT_SIZES);
let algorithms = self.config.algorithms.as_deref().unwrap_or(&[
FftAlgorithm::CooleyTukeyRadix2,
FftAlgorithm::SplitRadix,
FftAlgorithm::MixedRadix,
FftAlgorithm::Bluestein,
]);
let mut results = HashMap::new();
let mut best_by_size = HashMap::new();
let mut comparisons = Vec::new();
for &size in sizes {
let comparison = self.compare_algorithms(size, algorithms, true)?;
for (algo, result) in &comparison.results {
results.insert((size, *algo), result.clone());
}
best_by_size.insert(size, comparison.best_algorithm);
comparisons.push(comparison);
if self.config.include_inverse {
if let Ok(inv_comparison) = self.compare_algorithms(size, algorithms, false) {
let _ = inv_comparison;
}
}
}
let total_time = start.elapsed();
let config_summary = format!(
"warmup={}, iterations={}, sizes={}, algorithms={}",
self.config.warmup_iterations,
self.config.benchmark_iterations,
sizes.len(),
algorithms.len()
);
Ok(PerformanceReport {
results,
best_by_size,
comparisons,
total_time,
config_summary,
})
}
pub fn auto_tune(&mut self, sizes: &[usize]) -> FFTResult<HashMap<usize, FftAlgorithm>> {
let algorithms = [
FftAlgorithm::CooleyTukeyRadix2,
FftAlgorithm::SplitRadix,
FftAlgorithm::MixedRadix,
FftAlgorithm::Bluestein,
];
let mut best_algorithms = HashMap::new();
for &size in sizes {
let comparison = self.compare_algorithms(size, &algorithms, true)?;
best_algorithms.insert(size, comparison.best_algorithm);
if let Some(result) = comparison.results.get(&comparison.best_algorithm) {
let entry = PerformanceEntry {
size,
algorithm: comparison.best_algorithm,
forward: true,
execution_time_ns: result.avg_time.as_nanos() as u64,
peak_memory_bytes: result.avg_peak_memory,
timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0),
hardware_hash: 0,
};
let _ = self.selector.record_performance(entry);
}
}
Ok(best_algorithms)
}
pub fn selector(&self) -> &AlgorithmSelector {
&self.selector
}
pub fn selector_mut(&mut self) -> &mut AlgorithmSelector {
&mut self.selector
}
}
pub struct MemoryProfiler {
enabled: bool,
peak_memory: usize,
current_allocation: usize,
}
impl Default for MemoryProfiler {
fn default() -> Self {
Self::new()
}
}
impl MemoryProfiler {
pub fn new() -> Self {
Self {
enabled: true,
peak_memory: 0,
current_allocation: 0,
}
}
pub fn record_allocation(&mut self, bytes: usize) {
if self.enabled {
self.current_allocation += bytes;
if self.current_allocation > self.peak_memory {
self.peak_memory = self.current_allocation;
}
}
}
pub fn record_deallocation(&mut self, bytes: usize) {
if self.enabled {
self.current_allocation = self.current_allocation.saturating_sub(bytes);
}
}
pub fn reset(&mut self) {
self.peak_memory = 0;
self.current_allocation = 0;
}
pub fn peak_memory(&self) -> usize {
self.peak_memory
}
pub fn current_allocation(&self) -> usize {
self.current_allocation
}
pub fn set_enabled(&mut self, enabled: bool) {
self.enabled = enabled;
}
}
pub fn estimate_fft_memory(size: usize, algorithm: FftAlgorithm) -> usize {
let base_memory = size * 16;
let overhead = match algorithm {
FftAlgorithm::CooleyTukeyRadix2 => base_memory / 4, FftAlgorithm::Radix4 => base_memory / 4,
FftAlgorithm::SplitRadix => base_memory / 3,
FftAlgorithm::MixedRadix => base_memory / 2,
FftAlgorithm::Bluestein => base_memory * 2, FftAlgorithm::Rader => base_memory,
FftAlgorithm::Winograd => base_memory / 2,
FftAlgorithm::GoodThomas => base_memory / 3,
FftAlgorithm::Streaming => base_memory / 8, FftAlgorithm::CacheOblivious => base_memory / 4,
FftAlgorithm::InPlace => 0, FftAlgorithm::SimdOptimized => base_memory / 4,
FftAlgorithm::Parallel => base_memory, FftAlgorithm::Hybrid => base_memory / 2,
};
base_memory + overhead
}
fn estimate_current_allocation() -> usize {
0
}
fn format_bytes(bytes: usize) -> String {
if bytes >= 1024 * 1024 * 1024 {
format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
} else if bytes >= 1024 * 1024 {
format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
} else if bytes >= 1024 {
format!("{:.2} KB", bytes as f64 / 1024.0)
} else {
format!("{} B", bytes)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_profile_result_statistics() {
let measurements = vec![
Measurement {
time_ns: 1000,
peak_memory_bytes: 1000,
allocated_bytes: 100,
},
Measurement {
time_ns: 1200,
peak_memory_bytes: 1000,
allocated_bytes: 100,
},
Measurement {
time_ns: 1100,
peak_memory_bytes: 1000,
allocated_bytes: 100,
},
Measurement {
time_ns: 1050,
peak_memory_bytes: 1000,
allocated_bytes: 100,
},
];
let result = ProfileResult::from_measurements(
256,
FftAlgorithm::MixedRadix,
true,
measurements,
None,
);
assert_eq!(result.size, 256);
assert!(result.avg_time.as_nanos() > 0);
assert!(result.min_time <= result.avg_time);
assert!(result.max_time >= result.avg_time);
assert!(result.ops_per_second > 0.0);
}
#[test]
fn test_profile_single_size() {
let profiler = PerformanceProfiler::new();
let result = profiler.profile_size(256, true);
assert!(result.is_ok());
let profile = result.expect("Profiling failed");
assert_eq!(profile.size, 256);
assert!(profile.avg_time.as_nanos() > 0);
}
#[test]
fn test_compare_algorithms() {
let profiler = PerformanceProfiler::new();
let algorithms = [FftAlgorithm::MixedRadix, FftAlgorithm::Bluestein];
let result = profiler.compare_algorithms(256, &algorithms, true);
assert!(result.is_ok());
let comparison = result.expect("Comparison failed");
assert_eq!(comparison.size, 256);
assert!(!comparison.results.is_empty());
assert!(comparison.speedup >= 1.0);
}
#[test]
fn test_memory_profiler() {
let mut mp = MemoryProfiler::new();
mp.record_allocation(1000);
assert_eq!(mp.current_allocation(), 1000);
assert_eq!(mp.peak_memory(), 1000);
mp.record_allocation(500);
assert_eq!(mp.current_allocation(), 1500);
assert_eq!(mp.peak_memory(), 1500);
mp.record_deallocation(1000);
assert_eq!(mp.current_allocation(), 500);
assert_eq!(mp.peak_memory(), 1500);
mp.reset();
assert_eq!(mp.current_allocation(), 0);
assert_eq!(mp.peak_memory(), 0);
}
#[test]
fn test_estimate_fft_memory() {
let size = 1024;
let inplace_mem = estimate_fft_memory(size, FftAlgorithm::InPlace);
let bluestein_mem = estimate_fft_memory(size, FftAlgorithm::Bluestein);
assert!(bluestein_mem > inplace_mem);
}
#[test]
fn test_format_bytes() {
assert_eq!(format_bytes(512), "512 B");
assert_eq!(format_bytes(1024), "1.00 KB");
assert_eq!(format_bytes(1024 * 1024), "1.00 MB");
assert_eq!(format_bytes(1024 * 1024 * 1024), "1.00 GB");
}
#[test]
fn test_profile_config_default() {
let config = ProfileConfig::default();
assert!(config.warmup_iterations > 0);
assert!(config.benchmark_iterations > 0);
assert!(config.track_memory);
}
}