use crate::optim::common::*;
use crate::optim::utils::{OptimizerMetrics, OptimizerUtils};
use crate::optim::*;
use crate::tensor::Tensor;
use std::collections::HashMap;
use std::time::{Duration, Instant};
pub struct OptimizerBenchmark {
test_configs: Vec<BenchmarkConfig>,
results: HashMap<String, BenchmarkResult>,
}
#[derive(Debug, Clone)]
pub struct BenchmarkConfig {
pub name: String,
pub param_shape: Vec<usize>,
pub num_steps: usize,
pub warmup_steps: usize,
pub num_iterations: usize,
pub learning_rate: f32,
}
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
pub avg_step_time_us: f64,
pub step_time_std_us: f64,
pub peak_memory_mb: usize,
pub convergence_rate: f32,
pub total_duration_ms: u64,
pub steps_per_second: f64,
}
impl OptimizerBenchmark {
pub fn new() -> Self {
let test_configs = vec![
BenchmarkConfig {
name: "small_dense".to_string(),
param_shape: vec![1000],
num_steps: 1000,
warmup_steps: 100,
num_iterations: 5,
learning_rate: 1e-3,
},
BenchmarkConfig {
name: "medium_dense".to_string(),
param_shape: vec![10000],
num_steps: 1000,
warmup_steps: 100,
num_iterations: 5,
learning_rate: 1e-3,
},
BenchmarkConfig {
name: "large_dense".to_string(),
param_shape: vec![100000],
num_steps: 500,
warmup_steps: 50,
num_iterations: 3,
learning_rate: 1e-4,
},
BenchmarkConfig {
name: "matrix_2d".to_string(),
param_shape: vec![512, 512],
num_steps: 500,
warmup_steps: 50,
num_iterations: 3,
learning_rate: 1e-3,
},
BenchmarkConfig {
name: "tensor_3d".to_string(),
param_shape: vec![64, 64, 64],
num_steps: 200,
warmup_steps: 20,
num_iterations: 3,
learning_rate: 1e-3,
},
];
Self {
test_configs,
results: HashMap::new(),
}
}
pub fn run_adam_comparison(&mut self) -> HashMap<String, HashMap<String, BenchmarkResult>> {
let mut all_results = HashMap::new();
for config in &self.test_configs.clone() {
println!(
"🚀 ベンチマーク実行中: {} (形状: {:?})",
config.name, config.param_shape
);
let mut config_results = HashMap::new();
if let Ok(nadam) = nadam::NAdam::default_params(config.learning_rate) {
println!(" 📊 NAdam をベンチマーク中...");
let result = self.benchmark_optimizer(config, Box::new(nadam));
config_results.insert("NAdam".to_string(), result);
}
if let Ok(radam) = radam::RAdam::default_params(config.learning_rate) {
println!(" 📊 RAdam をベンチマーク中...");
let result = self.benchmark_optimizer(config, Box::new(radam));
config_results.insert("RAdam".to_string(), result);
}
if let Ok(adamax) = adamax::Adamax::default_params(config.learning_rate) {
println!(" 📊 Adamax をベンチマーク中...");
let result = self.benchmark_optimizer(config, Box::new(adamax));
config_results.insert("Adamax".to_string(), result);
}
all_results.insert(config.name.clone(), config_results);
}
all_results
}
fn benchmark_optimizer(
&self,
config: &BenchmarkConfig,
mut optimizer: Box<dyn Optimizer>,
) -> BenchmarkResult {
let mut step_times = Vec::new();
let mut metrics = OptimizerMetrics::new(config.num_steps + config.warmup_steps);
let start_total = Instant::now();
for iteration in 0..config.num_iterations {
let param = Tensor::ones(&config.param_shape);
let mut grad_norms = Vec::new();
for step in 0..config.warmup_steps {
let grad = self.generate_synthetic_gradient(&config.param_shape, step);
let grad_norm = OptimizerUtils::l2_norm(&grad);
optimizer.step(¶m, &grad);
grad_norms.push(grad_norm);
}
for step in 0..config.num_steps {
let grad = self.generate_synthetic_gradient(&config.param_shape, step);
let grad_norm = OptimizerUtils::l2_norm(&grad);
let step_start = Instant::now();
optimizer.step(¶m, &grad);
let step_time = step_start.elapsed();
if iteration == 0 {
step_times.push(step_time.as_micros() as f64);
}
metrics.record_step(
grad_norm,
0.1, optimizer.learning_rate(),
step_time.as_secs_f32(),
);
grad_norms.push(grad_norm);
}
}
let total_duration = start_total.elapsed();
let avg_step_time = step_times.iter().sum::<f64>() / step_times.len() as f64;
let variance = step_times
.iter()
.map(|&t| (t - avg_step_time).powi(2))
.sum::<f64>()
/ step_times.len() as f64;
let std_dev = variance.sqrt();
let total_elements = config.param_shape.iter().product::<usize>();
let estimated_memory_mb = (total_elements * 4 * 3) / (1024 * 1024);
BenchmarkResult {
avg_step_time_us: avg_step_time,
step_time_std_us: std_dev,
peak_memory_mb: estimated_memory_mb,
convergence_rate: 0.95, total_duration_ms: total_duration.as_millis() as u64,
steps_per_second: (config.num_steps as f64) / total_duration.as_secs_f64()
* config.num_iterations as f64,
}
}
fn generate_synthetic_gradient(&self, shape: &[usize], step: usize) -> Tensor<f32> {
let total_elements = shape.iter().product();
let mut data = Vec::with_capacity(total_elements);
let step_factor = 1.0 / (1.0 + step as f32 * 0.001);
for i in 0..total_elements {
let noise = (i as f32 * 0.123 + step as f32 * 0.456).sin() * 0.1;
let gradient = (0.1 + noise) * step_factor;
data.push(gradient);
}
Tensor::from_vec(data, shape.to_vec())
}
pub fn run_lbfgs_benchmark(&mut self) -> HashMap<String, BenchmarkResult> {
let mut results = HashMap::new();
for config in &self.test_configs.clone() {
if config.param_shape.iter().product::<usize>() > 50000 {
continue; }
println!("🔬 L-BFGS ベンチマーク: {}", config.name);
if let Ok(lbfgs) = lbfgs::LBFGS::new(config.learning_rate) {
let result = self.benchmark_optimizer(config, Box::new(lbfgs));
results.insert(config.name.clone(), result);
}
}
results
}
pub fn generate_report(
&self,
results: &HashMap<String, HashMap<String, BenchmarkResult>>,
) -> String {
let mut report = String::new();
report.push_str("# 🚀 RusTorch フェーズ2 最適化器パフォーマンスレポート\n\n");
for (config_name, optimizer_results) in results {
report.push_str(&format!("## 📊 テスト設定: {}\n\n", config_name));
report.push_str("| 最適化器 | 平均ステップ時間(μs) | 標準偏差(μs) | メモリ使用量(MB) | ステップ/秒 |\n");
report.push_str(
"|----------|---------------------|-------------|----------------|----------|\n",
);
let mut sorted_results: Vec<_> = optimizer_results.iter().collect();
sorted_results.sort_by(|a, b| {
a.1.avg_step_time_us
.partial_cmp(&b.1.avg_step_time_us)
.unwrap_or(std::cmp::Ordering::Equal)
});
for (optimizer_name, result) in sorted_results {
report.push_str(&format!(
"| {} | {:.2} | {:.2} | {} | {:.1} |\n",
optimizer_name,
result.avg_step_time_us,
result.step_time_std_us,
result.peak_memory_mb,
result.steps_per_second
));
}
report.push_str("\n");
}
report.push_str("## 🎯 パフォーマンス要約\n\n");
let mut fastest_by_config = HashMap::new();
let mut most_efficient_memory = HashMap::new();
for (config_name, optimizer_results) in results {
let fastest = optimizer_results
.iter()
.min_by(|a, b| {
a.1.avg_step_time_us
.partial_cmp(&b.1.avg_step_time_us)
.unwrap()
})
.map(|(name, result)| (name.as_str(), result.avg_step_time_us));
let most_memory_efficient = optimizer_results
.iter()
.min_by_key(|(_, result)| result.peak_memory_mb)
.map(|(name, result)| (name.as_str(), result.peak_memory_mb));
if let Some((name, time)) = fastest {
fastest_by_config.insert(config_name, (name, time));
}
if let Some((name, memory)) = most_memory_efficient {
most_efficient_memory.insert(config_name, (name, memory));
}
}
report.push_str("### ⚡ 速度チャンピオン:\n");
for (config, (optimizer, time)) in fastest_by_config {
report.push_str(&format!(
"- **{}**: {} ({:.2}μs/ステップ)\n",
config, optimizer, time
));
}
report.push_str("\n### 💾 メモリ効率チャンピオン:\n");
for (config, (optimizer, memory)) in most_efficient_memory {
report.push_str(&format!("- **{}**: {} ({}MB)\n", config, optimizer, memory));
}
report
}
}
pub fn quick_performance_test() {
println!("🔥 クイックパフォーマンステスト開始");
let mut benchmark = OptimizerBenchmark::new();
let quick_config = BenchmarkConfig {
name: "quick_test".to_string(),
param_shape: vec![1000],
num_steps: 100,
warmup_steps: 10,
num_iterations: 2,
learning_rate: 1e-3,
};
benchmark.test_configs = vec![quick_config];
let results = benchmark.run_adam_comparison();
println!("\n📈 結果:");
for (config_name, optimizer_results) in results {
println!("\n🎯 {}", config_name);
for (optimizer_name, result) in optimizer_results {
println!(
" {} : {:.2}μs/step, {:.1} steps/sec",
optimizer_name, result.avg_step_time_us, result.steps_per_second
);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_benchmark_config_creation() {
let benchmark = OptimizerBenchmark::new();
assert!(!benchmark.test_configs.is_empty());
assert_eq!(benchmark.results.len(), 0);
}
#[test]
fn test_synthetic_gradient_generation() {
let benchmark = OptimizerBenchmark::new();
let shape = vec![10, 10];
let grad = benchmark.generate_synthetic_gradient(&shape, 0);
assert_eq!(grad.shape(), &shape);
let data = grad.as_slice().unwrap();
assert_eq!(data.len(), 100);
for &val in data {
assert!(val.abs() < 1.0); assert!(val.is_finite()); }
}
#[test]
fn test_benchmark_result_fields() {
let result = BenchmarkResult {
avg_step_time_us: 100.0,
step_time_std_us: 5.0,
peak_memory_mb: 10,
convergence_rate: 0.95,
total_duration_ms: 1000,
steps_per_second: 500.0,
};
assert_eq!(result.avg_step_time_us, 100.0);
assert_eq!(result.peak_memory_mb, 10);
assert!(result.steps_per_second > 0.0);
}
#[test]
fn test_performance_test_execution() {
let benchmark = OptimizerBenchmark::new();
assert_eq!(benchmark.test_configs.len(), 5); }
}