use crate::error::{RusTorchError, RusTorchResult};
use crate::profiler::metrics_collector::{CustomMetric, MetricType, MetricsCollector};
use std::collections::HashMap;
use std::time::{Duration, Instant};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum BenchmarkCategory {
TensorOps,
Memory,
Gpu,
NeuralNetwork,
LinearAlgebra,
System,
Custom(String),
}
#[derive(Debug, Clone)]
pub struct BenchmarkConfiguration {
pub warmup_iterations: usize,
pub measurement_iterations: usize,
pub min_duration_ms: u64,
pub max_duration_ms: u64,
pub confidence_level: f64,
pub variance_threshold: f64,
pub enable_memory_profiling: bool,
pub enable_gpu_profiling: bool,
pub enable_system_metrics: bool,
pub collect_gc_stats: bool,
}
impl Default for BenchmarkConfiguration {
fn default() -> Self {
Self {
warmup_iterations: 10,
measurement_iterations: 100,
min_duration_ms: 1000,
max_duration_ms: 60000,
confidence_level: 0.95,
variance_threshold: 0.1,
enable_memory_profiling: true,
enable_gpu_profiling: true,
enable_system_metrics: true,
collect_gc_stats: false,
}
}
}
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
pub name: String,
pub category: BenchmarkCategory,
pub config: BenchmarkConfiguration,
pub timings_ms: Vec<f64>,
pub statistics: BenchmarkStatistics,
pub memory_metrics: Option<MemoryBenchmarkMetrics>,
pub gpu_metrics: Option<GpuBenchmarkMetrics>,
pub system_metrics: Option<SystemBenchmarkMetrics>,
pub error: Option<String>,
pub timestamp: Instant,
}
#[derive(Debug, Clone)]
pub struct BenchmarkStatistics {
pub sample_count: usize,
pub mean_ms: f64,
pub median_ms: f64,
pub std_dev_ms: f64,
pub min_ms: f64,
pub max_ms: f64,
pub p95_ms: f64,
pub p99_ms: f64,
pub coefficient_of_variation: f64,
pub throughput_ops_per_sec: f64,
pub confidence_interval_ms: (f64, f64),
pub is_stable: bool,
}
#[derive(Debug, Clone)]
pub struct MemoryBenchmarkMetrics {
pub peak_memory_bytes: u64,
pub avg_memory_bytes: u64,
pub allocations: usize,
pub deallocations: usize,
pub total_allocated_bytes: u64,
pub total_deallocated_bytes: u64,
pub fragmentation_score: f64,
}
#[derive(Debug, Clone)]
pub struct GpuBenchmarkMetrics {
pub gpu_utilization_percent: f64,
pub memory_utilization_percent: f64,
pub gpu_memory_used_bytes: u64,
pub kernel_launches: usize,
pub total_kernel_time_ms: f64,
pub memory_transfer_time_ms: f64,
pub gpu_temperature_celsius: Option<f32>,
pub power_consumption_watts: Option<f32>,
}
#[derive(Debug, Clone)]
pub struct SystemBenchmarkMetrics {
pub cpu_utilization_percent: f64,
pub system_memory_bytes: u64,
pub disk_io_operations: usize,
pub network_io_bytes: u64,
pub load_average: f64,
pub context_switches: usize,
}
#[derive(Debug)]
pub struct AdvancedBenchmarkSuite {
pub name: String,
pub default_config: BenchmarkConfiguration,
metrics_collector: MetricsCollector,
results: HashMap<String, BenchmarkResult>,
pub suite_metadata: SuiteMetadata,
}
#[derive(Debug, Clone)]
pub struct SuiteMetadata {
pub total_execution_time: Duration,
pub benchmarks_run: usize,
pub benchmarks_failed: usize,
pub system_info: SystemInfo,
}
#[derive(Debug, Clone)]
pub struct SystemInfo {
pub cpu_model: String,
pub cpu_cores: usize,
pub total_memory_bytes: u64,
pub os_version: String,
pub rust_version: String,
pub gpu_info: Option<String>,
}
impl AdvancedBenchmarkSuite {
pub fn new(name: String) -> Self {
Self {
name,
default_config: BenchmarkConfiguration::default(),
metrics_collector: MetricsCollector::new(),
results: HashMap::new(),
suite_metadata: SuiteMetadata {
total_execution_time: Duration::ZERO,
benchmarks_run: 0,
benchmarks_failed: 0,
system_info: Self::collect_system_info(),
},
}
}
pub fn with_config(mut self, config: BenchmarkConfiguration) -> Self {
self.default_config = config;
self
}
pub fn benchmark<F, T>(
&mut self,
name: &str,
category: BenchmarkCategory,
config: Option<BenchmarkConfiguration>,
mut operation: F,
) -> RusTorchResult<()>
where
F: FnMut() -> RusTorchResult<T>,
{
let config = config.unwrap_or_else(|| self.default_config.clone());
let start_time = Instant::now();
println!("🏁 Running benchmark: {}", name);
let timing_metric = CustomMetric::new(format!("{}_timing", name), MetricType::TimingMs);
self.metrics_collector.register_metric(timing_metric)?;
if config.enable_memory_profiling {
let memory_metric =
CustomMetric::new(format!("{}_memory", name), MetricType::MemoryBytes);
self.metrics_collector.register_metric(memory_metric)?;
}
println!(
" 🔥 Warmup phase ({} iterations)...",
config.warmup_iterations
);
for _ in 0..config.warmup_iterations {
let _ = operation(); }
println!(
" 📊 Measurement phase ({} iterations)...",
config.measurement_iterations
);
let mut timings = Vec::with_capacity(config.measurement_iterations);
let mut failed_iterations = 0;
for i in 0..config.measurement_iterations {
let iteration_start = Instant::now();
match operation() {
Ok(_) => {
let elapsed = iteration_start.elapsed();
let elapsed_ms = elapsed.as_secs_f64() * 1000.0;
timings.push(elapsed_ms);
self.metrics_collector
.record_timing(&format!("{}_timing", name), elapsed)?;
if config.enable_memory_profiling {
self.metrics_collector
.update_metric(&format!("{}_memory", name), 0.0)?;
}
}
Err(e) => {
failed_iterations += 1;
println!(" ❌ Iteration {} failed: {}", i + 1, e);
}
}
if (i + 1) % (config.measurement_iterations / 10).max(1) == 0 {
let progress = ((i + 1) as f64 / config.measurement_iterations as f64) * 100.0;
println!(" Progress: {:.1}%", progress);
}
}
if timings.len() < config.measurement_iterations / 2 {
let error_msg = format!(
"Too many failed iterations: {}/{}",
failed_iterations, config.measurement_iterations
);
self.results.insert(
name.to_string(),
BenchmarkResult {
name: name.to_string(),
category,
config,
timings_ms: Vec::new(),
statistics: BenchmarkStatistics::default(),
memory_metrics: None,
gpu_metrics: None,
system_metrics: None,
error: Some(error_msg.clone()),
timestamp: start_time,
},
);
self.suite_metadata.benchmarks_failed += 1;
return Err(RusTorchError::Profiling { message: error_msg });
}
let statistics = Self::calculate_statistics(&timings, &config);
let memory_metrics = if config.enable_memory_profiling {
Some(self.collect_memory_metrics(name)?)
} else {
None
};
let gpu_metrics = if config.enable_gpu_profiling {
Some(self.collect_gpu_metrics(name)?)
} else {
None
};
let system_metrics = if config.enable_system_metrics {
Some(self.collect_system_metrics()?)
} else {
None
};
let result = BenchmarkResult {
name: name.to_string(),
category,
config,
timings_ms: timings,
statistics,
memory_metrics,
gpu_metrics,
system_metrics,
error: None,
timestamp: start_time,
};
self.results.insert(name.to_string(), result);
self.suite_metadata.benchmarks_run += 1;
let total_time = start_time.elapsed();
println!(
" ✅ Benchmark completed in {:.2}s",
total_time.as_secs_f64()
);
println!(
" Mean: {:.3}ms, Median: {:.3}ms, StdDev: {:.3}ms",
self.results[name].statistics.mean_ms,
self.results[name].statistics.median_ms,
self.results[name].statistics.std_dev_ms
);
Ok(())
}
pub fn benchmark_default<F, T>(
&mut self,
name: &str,
category: BenchmarkCategory,
operation: F,
) -> RusTorchResult<()>
where
F: FnMut() -> RusTorchResult<T>,
{
self.benchmark(name, category, None, operation)
}
pub fn get_result(&self, name: &str) -> Option<&BenchmarkResult> {
self.results.get(name)
}
pub fn get_all_results(&self) -> &HashMap<String, BenchmarkResult> {
&self.results
}
pub fn get_results_by_category(&self, category: &BenchmarkCategory) -> Vec<&BenchmarkResult> {
self.results
.values()
.filter(|result| &result.category == category)
.collect()
}
pub fn generate_report(&self) -> String {
let mut report = String::new();
report.push_str(&format!("📊 Benchmark Suite Report: {}\n", self.name));
report.push_str(&format!("{}", "=".repeat(50)));
report.push_str("\n\n");
report.push_str("🏆 Suite Summary:\n");
report.push_str(&format!(
" Benchmarks Run: {}\n",
self.suite_metadata.benchmarks_run
));
report.push_str(&format!(
" Benchmarks Failed: {}\n",
self.suite_metadata.benchmarks_failed
));
report.push_str(&format!(
" Success Rate: {:.1}%\n",
if self.suite_metadata.benchmarks_run > 0 {
(self.suite_metadata.benchmarks_run - self.suite_metadata.benchmarks_failed) as f64
/ self.suite_metadata.benchmarks_run as f64
* 100.0
} else {
0.0
}
));
report.push_str(&format!(
" Total Execution Time: {:.2}s\n\n",
self.suite_metadata.total_execution_time.as_secs_f64()
));
report.push_str("💻 System Information:\n");
report.push_str(&format!(
" CPU: {}\n",
self.suite_metadata.system_info.cpu_model
));
report.push_str(&format!(
" Cores: {}\n",
self.suite_metadata.system_info.cpu_cores
));
report.push_str(&format!(
" Memory: {:.2} GB\n",
self.suite_metadata.system_info.total_memory_bytes as f64 / (1024.0 * 1024.0 * 1024.0)
));
report.push_str(&format!(
" OS: {}\n",
self.suite_metadata.system_info.os_version
));
if let Some(ref gpu_info) = self.suite_metadata.system_info.gpu_info {
report.push_str(&format!(" GPU: {}\n", gpu_info));
}
report.push_str("\n");
let categories: std::collections::HashSet<_> =
self.results.values().map(|r| &r.category).collect();
for category in categories {
let category_results = self.get_results_by_category(category);
if !category_results.is_empty() {
report.push_str(&format!("📈 {:?} Results:\n", category));
report.push_str(&format!(
"{:<30} {:>10} {:>10} {:>10} {:>10} {:>15}\n",
"Benchmark",
"Mean(ms)",
"Median(ms)",
"StdDev(ms)",
"P99(ms)",
"Throughput(ops/s)"
));
report.push_str(&"-".repeat(100));
report.push_str("\n");
for result in category_results {
if result.error.is_none() {
report.push_str(&format!(
"{:<30} {:>10.3} {:>10.3} {:>10.3} {:>10.3} {:>15.2}\n",
if result.name.len() > 29 {
&result.name[..29]
} else {
&result.name
},
result.statistics.mean_ms,
result.statistics.median_ms,
result.statistics.std_dev_ms,
result.statistics.p99_ms,
result.statistics.throughput_ops_per_sec
));
} else {
report.push_str(&format!("{:<30} {:>50}\n", result.name, "❌ FAILED"));
}
}
report.push_str("\n");
}
}
report.push_str("💡 Performance Insights:\n");
self.generate_insights(&mut report);
report
}
pub fn export_json(&self) -> RusTorchResult<String> {
Ok("{}".to_string())
}
pub fn clear_results(&mut self) {
self.results.clear();
self.suite_metadata.benchmarks_run = 0;
self.suite_metadata.benchmarks_failed = 0;
let _ = self.metrics_collector.clear_metrics();
}
fn calculate_statistics(
timings: &[f64],
config: &BenchmarkConfiguration,
) -> BenchmarkStatistics {
if timings.is_empty() {
return BenchmarkStatistics::default();
}
let mut sorted_timings = timings.to_vec();
sorted_timings.sort_by(|a, b| a.partial_cmp(b).unwrap());
let sample_count = timings.len();
let sum: f64 = timings.iter().sum();
let mean_ms = sum / sample_count as f64;
let median_ms = if sample_count % 2 == 0 {
(sorted_timings[sample_count / 2 - 1] + sorted_timings[sample_count / 2]) / 2.0
} else {
sorted_timings[sample_count / 2]
};
let variance =
timings.iter().map(|&t| (t - mean_ms).powi(2)).sum::<f64>() / sample_count as f64;
let std_dev_ms = variance.sqrt();
let min_ms = sorted_timings[0];
let max_ms = sorted_timings[sample_count - 1];
let p95_index = ((sample_count as f64) * 0.95) as usize;
let p95_ms = sorted_timings[p95_index.min(sample_count - 1)];
let p99_index = ((sample_count as f64) * 0.99) as usize;
let p99_ms = sorted_timings[p99_index.min(sample_count - 1)];
let coefficient_of_variation = if mean_ms > 0.0 {
std_dev_ms / mean_ms
} else {
0.0
};
let throughput_ops_per_sec = if mean_ms > 0.0 { 1000.0 / mean_ms } else { 0.0 };
let t_value = 1.96; let margin_of_error = t_value * std_dev_ms / (sample_count as f64).sqrt();
let confidence_interval_ms = (mean_ms - margin_of_error, mean_ms + margin_of_error);
let is_stable = coefficient_of_variation <= config.variance_threshold;
BenchmarkStatistics {
sample_count,
mean_ms,
median_ms,
std_dev_ms,
min_ms,
max_ms,
p95_ms,
p99_ms,
coefficient_of_variation,
throughput_ops_per_sec,
confidence_interval_ms,
is_stable,
}
}
fn collect_memory_metrics(&self, _name: &str) -> RusTorchResult<MemoryBenchmarkMetrics> {
Ok(MemoryBenchmarkMetrics {
peak_memory_bytes: 0,
avg_memory_bytes: 0,
allocations: 0,
deallocations: 0,
total_allocated_bytes: 0,
total_deallocated_bytes: 0,
fragmentation_score: 0.0,
})
}
fn collect_gpu_metrics(&self, _name: &str) -> RusTorchResult<GpuBenchmarkMetrics> {
Ok(GpuBenchmarkMetrics {
gpu_utilization_percent: 0.0,
memory_utilization_percent: 0.0,
gpu_memory_used_bytes: 0,
kernel_launches: 0,
total_kernel_time_ms: 0.0,
memory_transfer_time_ms: 0.0,
gpu_temperature_celsius: None,
power_consumption_watts: None,
})
}
fn collect_system_metrics(&self) -> RusTorchResult<SystemBenchmarkMetrics> {
Ok(SystemBenchmarkMetrics {
cpu_utilization_percent: 0.0,
system_memory_bytes: 0,
disk_io_operations: 0,
network_io_bytes: 0,
load_average: 0.0,
context_switches: 0,
})
}
fn collect_system_info() -> SystemInfo {
SystemInfo {
cpu_model: "Unknown CPU".to_string(),
cpu_cores: num_cpus::get(),
total_memory_bytes: 0, os_version: std::env::consts::OS.to_string(),
rust_version: "Rust 1.70+".to_string(), gpu_info: None,
}
}
fn generate_insights(&self, report: &mut String) {
let successful_results: Vec<_> = self
.results
.values()
.filter(|r| r.error.is_none())
.collect();
if successful_results.is_empty() {
report.push_str(" No successful benchmarks to analyze.\n\n");
return;
}
if let (Some(fastest), Some(slowest)) = (
successful_results.iter().min_by(|a, b| {
a.statistics
.mean_ms
.partial_cmp(&b.statistics.mean_ms)
.unwrap()
}),
successful_results.iter().max_by(|a, b| {
a.statistics
.mean_ms
.partial_cmp(&b.statistics.mean_ms)
.unwrap()
}),
) {
report.push_str(&format!(
" 🚀 Fastest: {} ({:.3}ms)\n",
fastest.name, fastest.statistics.mean_ms
));
report.push_str(&format!(
" 🐌 Slowest: {} ({:.3}ms)\n",
slowest.name, slowest.statistics.mean_ms
));
if fastest.statistics.mean_ms > 0.0 {
let speedup = slowest.statistics.mean_ms / fastest.statistics.mean_ms;
report.push_str(&format!(
" 📊 Performance Range: {:.1}x difference\n",
speedup
));
}
}
let unstable_count = successful_results
.iter()
.filter(|r| !r.statistics.is_stable)
.count();
if unstable_count > 0 {
report.push_str(&format!(
" ⚠️ {} benchmarks show high variance (>{}%)\n",
unstable_count,
self.default_config.variance_threshold * 100.0
));
}
report.push_str("\n");
}
}
impl Default for BenchmarkStatistics {
fn default() -> Self {
Self {
sample_count: 0,
mean_ms: 0.0,
median_ms: 0.0,
std_dev_ms: 0.0,
min_ms: 0.0,
max_ms: 0.0,
p95_ms: 0.0,
p99_ms: 0.0,
coefficient_of_variation: 0.0,
throughput_ops_per_sec: 0.0,
confidence_interval_ms: (0.0, 0.0),
is_stable: false,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
#[test]
fn test_benchmark_suite_creation() {
let suite = AdvancedBenchmarkSuite::new("test_suite".to_string());
assert_eq!(suite.name, "test_suite");
assert_eq!(suite.results.len(), 0);
}
#[test]
fn test_simple_benchmark() {
let mut suite = AdvancedBenchmarkSuite::new("test".to_string());
let config = BenchmarkConfiguration {
warmup_iterations: 2,
measurement_iterations: 5,
..Default::default()
};
let result = suite.benchmark(
"sleep_test",
BenchmarkCategory::System,
Some(config),
|| -> RusTorchResult<()> {
thread::sleep(Duration::from_millis(10));
Ok(())
},
);
assert!(result.is_ok());
let benchmark_result = suite.get_result("sleep_test").unwrap();
assert_eq!(benchmark_result.name, "sleep_test");
assert!(benchmark_result.error.is_none());
assert!(benchmark_result.statistics.mean_ms >= 10.0);
assert_eq!(benchmark_result.statistics.sample_count, 5);
}
#[test]
fn test_benchmark_statistics() {
let timings = vec![10.0, 12.0, 11.0, 13.0, 10.5, 11.5, 12.5];
let config = BenchmarkConfiguration::default();
let stats = AdvancedBenchmarkSuite::calculate_statistics(&timings, &config);
assert_eq!(stats.sample_count, 7);
assert!((stats.mean_ms - 11.5).abs() < 0.1);
assert_eq!(stats.min_ms, 10.0);
assert_eq!(stats.max_ms, 13.0);
assert!(stats.std_dev_ms > 0.0);
}
#[test]
fn test_benchmark_categories() {
let mut suite = AdvancedBenchmarkSuite::new("category_test".to_string());
let config = BenchmarkConfiguration {
warmup_iterations: 1,
measurement_iterations: 2,
..Default::default()
};
suite
.benchmark(
"tensor_op",
BenchmarkCategory::TensorOps,
Some(config.clone()),
|| Ok(()),
)
.unwrap();
suite
.benchmark("memory_op", BenchmarkCategory::Memory, Some(config), || {
Ok(())
})
.unwrap();
let tensor_results = suite.get_results_by_category(&BenchmarkCategory::TensorOps);
let memory_results = suite.get_results_by_category(&BenchmarkCategory::Memory);
assert_eq!(tensor_results.len(), 1);
assert_eq!(memory_results.len(), 1);
assert_eq!(tensor_results[0].name, "tensor_op");
assert_eq!(memory_results[0].name, "memory_op");
}
#[test]
fn test_failed_benchmark() {
let mut suite = AdvancedBenchmarkSuite::new("fail_test".to_string());
let config = BenchmarkConfiguration {
warmup_iterations: 1,
measurement_iterations: 3,
..Default::default()
};
let result = suite.benchmark(
"failing_test",
BenchmarkCategory::System,
Some(config),
|| -> RusTorchResult<()> {
Err(RusTorchError::Profiling {
message: "Intentional failure".to_string(),
})
},
);
assert!(result.is_err());
let benchmark_result = suite.get_result("failing_test").unwrap();
assert!(benchmark_result.error.is_some());
assert_eq!(suite.suite_metadata.benchmarks_failed, 1);
}
#[test]
fn test_report_generation() {
let mut suite = AdvancedBenchmarkSuite::new("report_test".to_string());
let config = BenchmarkConfiguration {
warmup_iterations: 1,
measurement_iterations: 2,
..Default::default()
};
suite
.benchmark("test1", BenchmarkCategory::System, Some(config), || Ok(()))
.unwrap();
let report = suite.generate_report();
assert!(report.contains("Benchmark Suite Report"));
assert!(report.contains("test1"));
assert!(report.contains("System Information"));
}
}