use scirs2_datasets::{
benchmarks::{BenchmarkRunner, BenchmarkSuite},
load_boston, load_breast_cancer, load_digits, load_iris, load_wine, make_classification,
make_regression, Dataset,
};
use std::collections::HashMap;
use std::process::Command;
use std::time::{Duration, Instant};
#[allow(dead_code)]
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("ð SciRS2 vs Scikit-Learn Performance Benchmarks");
println!("================================================\n");
let runner = BenchmarkRunner::new()
.with_iterations(5)
.with_warmup(2)
.with_memory_measurement(false);
let scirs2suites = runner.run_comprehensive_benchmarks();
println!("{}", "\n".to_owned() + &"=".repeat(60));
println!("DETAILED ANALYSIS");
println!("{}", "=".repeat(60));
analyze_toy_dataset_performance(&scirs2suites);
analyze_data_generation_performance(&scirs2suites);
run_python_comparison_benchmarks();
generate_performance_report(&scirs2suites);
println!("\nð Benchmark suite completed successfully!");
println!("Check the generated performance report for detailed analysis.");
Ok(())
}
#[allow(dead_code)]
fn analyze_toy_dataset_performance(suites: &[BenchmarkSuite]) {
if let Some(toy_suite) = suites.iter().find(|s| s.name == "Toy Datasets") {
println!("\nð TOY DATASET LOADING ANALYSIS");
println!("{}", "-".repeat(40));
let mut total_loading_time = Duration::ZERO;
let mut total_samples = 0;
let mut fastestdataset = ("", Duration::MAX);
let mut slowestdataset = ("", Duration::ZERO);
for result in toy_suite.successful_results() {
total_loading_time += result.duration;
total_samples += result.samples;
if result.duration < fastestdataset.1 {
fastestdataset = (&result.operation, result.duration);
}
if result.duration > slowestdataset.1 {
slowestdataset = (&result.operation, result.duration);
}
println!(
" {}: {} ({} samples, {:.1} samples/s)",
result.operation.replace("load_", ""),
result.formatted_duration(),
result.samples,
result.throughput
);
}
println!("\n Summary:");
println!(
" Total loading time: {:.2}s",
total_loading_time.as_secs_f64()
);
println!(" Total samples loaded: {total_samples}");
println!(
" Average throughput: {:.1} samples/s",
total_samples as f64 / total_loading_time.as_secs_f64()
);
println!(
" Fastest: {} ({})",
fastestdataset.0,
format_duration(fastestdataset.1)
);
println!(
" Slowest: {} ({})",
slowestdataset.0,
format_duration(slowestdataset.1)
);
}
}
#[allow(dead_code)]
fn analyze_data_generation_performance(suites: &[BenchmarkSuite]) {
if let Some(gen_suite) = suites.iter().find(|s| s.name == "Data Generation") {
println!("\nðŽ DATA GENERATION ANALYSIS");
println!("{}", "-".repeat(40));
let mut classification_results = Vec::new();
let mut regression_results = Vec::new();
let mut clustering_results = Vec::new();
for result in gen_suite.successful_results() {
if result.operation.contains("classification") {
classification_results.push(result);
} else if result.operation.contains("regression") {
regression_results.push(result);
} else if result.operation.contains("blobs") {
clustering_results.push(result);
}
}
analyze_generation_type("Classification", &classification_results);
analyze_generation_type("Regression", ®ression_results);
analyze_generation_type("Clustering", &clustering_results);
analyze_scaling_performance(gen_suite);
}
}
#[allow(dead_code)]
fn analyze_generation_type(
gen_type: &str,
results: &[&scirs2_datasets::benchmarks::BenchmarkResult],
) {
if results.is_empty() {
return;
}
println!("\n {gen_type} Generation:");
let total_samples: usize = results.iter().map(|r| r.samples).sum();
let total_duration: Duration = results.iter().map(|r| r.duration).sum();
let avg_throughput = total_samples as f64 / total_duration.as_secs_f64();
println!(" Configurations tested: {}", results.len());
println!(" Total samples generated: {total_samples}");
println!(" Average throughput: {avg_throughput:.1} samples/s");
let best = results.iter().max_by(|a, b| {
a.throughput
.partial_cmp(&b.throughput)
.expect("Operation failed")
});
let worst = results.iter().min_by(|a, b| {
a.throughput
.partial_cmp(&b.throughput)
.expect("Operation failed")
});
if let (Some(best), Some(worst)) = (best, worst) {
println!(
" Best: {} ({:.1} samples/s)",
best.operation.split('_').next_back().unwrap_or("unknown"),
best.throughput
);
println!(
" Worst: {} ({:.1} samples/s)",
worst.operation.split('_').next_back().unwrap_or("unknown"),
worst.throughput
);
}
}
#[allow(dead_code)]
fn analyze_scaling_performance(suite: &BenchmarkSuite) {
println!("\n ð SCALING ANALYSIS:");
let mut size_groups: HashMap<usize, Vec<_>> = HashMap::new();
for result in suite.successful_results() {
size_groups.entry(result.samples).or_default().push(result);
}
let mut sizes: Vec<_> = size_groups.keys().collect();
sizes.sort();
for &size in &sizes {
if let Some(results) = size_groups.get(size) {
let avg_throughput =
results.iter().map(|r| r.throughput).sum::<f64>() / results.len() as f64;
let avg_duration = results
.iter()
.map(|r| r.duration.as_secs_f64())
.sum::<f64>()
/ results.len() as f64;
println!(" {size} samples: {avg_throughput:.1} samples/s (avg {avg_duration:.2}s)");
}
}
if sizes.len() >= 2 {
let smallsize = sizes[0];
let largesize = sizes[sizes.len() - 1];
if let (Some(small_results), Some(large_results)) =
(size_groups.get(smallsize), size_groups.get(largesize))
{
let small_avg = small_results.iter().map(|r| r.throughput).sum::<f64>()
/ small_results.len() as f64;
let large_avg = large_results.iter().map(|r| r.throughput).sum::<f64>()
/ large_results.len() as f64;
let efficiency = large_avg / small_avg;
let size_ratio = *largesize as f64 / *smallsize as f64;
println!(" Scaling efficiency: {efficiency:.2}x (size increased {size_ratio:.1}x)");
if efficiency > 0.8 {
println!(" â
Good scaling performance");
} else if efficiency > 0.5 {
println!(" â ïļ Moderate scaling performance");
} else {
println!(" â Poor scaling performance");
}
}
}
}
#[allow(dead_code)]
fn run_python_comparison_benchmarks() {
println!("\nð PYTHON SCIKIT-LEARN COMPARISON");
println!("{}", "-".repeat(40));
let python_check = Command::new("python3")
.arg("-c")
.arg("import sklearn; print('scikit-learn', sklearn.__version__)")
.output();
match python_check {
Ok(output) if output.status.success() => {
let version = String::from_utf8_lossy(&output.stdout);
println!(" â
Found {}", version.trim());
run_sklearn_toy_dataset_comparison();
run_sklearn_generation_comparison();
}
_ => {
println!(" â Python scikit-learn not available");
println!(" Install with: pip install scikit-learn");
println!(" Skipping Python comparison benchmarks");
}
}
}
#[allow(dead_code)]
fn run_sklearn_toy_dataset_comparison() {
println!("\n ð Toy Dataset Loading Comparison:");
let datasets = vec![
(
"iris",
"from sklearn.datasets import load_iris; load_iris()",
),
(
"boston",
"from sklearn.datasets import load_boston; load_boston()",
),
(
"digits",
"from sklearn.datasets import load_digits; load_digits()",
),
(
"wine",
"from sklearn.datasets import load_wine; load_wine()",
),
(
"breast_cancer",
"from sklearn.datasets import load_breast_cancer; load_breast_cancer()",
),
];
for (name, python_code) in datasets {
let _start = Instant::now();
let python_result = Command::new("python3")
.arg("-c")
.arg(format!(
"import time; start=time.time(); {python_code}; print(f'{{:.4f}}', time.time()-start)"
))
.output();
match python_result {
Ok(output) if output.status.success() => {
let python_time = String::from_utf8_lossy(&output.stdout)
.trim()
.parse::<f64>()
.unwrap_or(0.0);
let scirs2_start = Instant::now();
let _scirs2_result = match name {
"iris" => load_iris().map(|_| ()),
"boston" => load_boston().map(|_| ()),
"digits" => load_digits().map(|_| ()),
"wine" => load_wine().map(|_| ()),
"breast_cancer" => load_breast_cancer().map(|_| ()),
_ => Ok(()),
};
let scirs2_time = scirs2_start.elapsed().as_secs_f64();
let speedup = python_time / scirs2_time;
let status = if speedup > 1.2 {
"ð FASTER"
} else if speedup > 0.8 {
"â SIMILAR"
} else {
"ð SLOWER"
};
println!(
" {}: SciRS2 {:.2}ms vs sklearn {:.2}ms ({:.1}x {}",
name,
scirs2_time * 1000.0,
python_time * 1000.0,
speedup,
status
);
}
_ => {
println!(" {name}: Failed to benchmark Python version");
}
}
}
}
#[allow(dead_code)]
fn run_sklearn_generation_comparison() {
println!("\n ðŽ Data Generation Comparison:");
let configs = vec![
(1000, 10, "classification"),
(5000, 20, "classification"),
(1000, 10, "regression"),
(5000, 20, "regression"),
];
for (n_samples, n_features, gen_type) in configs {
#[allow(clippy::type_complexity)]
let (python_code, scirs2_fn): (&str, Box<dyn Fn() -> Result<Dataset, Box<dyn std::error::Error>>>) = match gen_type {
"classification" => (
&format!("from sklearn.datasets import make_classification; make_classification(n_samples={n_samples}, n_features={n_features}, random_state=42)"),
Box::new(move || make_classification(n_samples, n_features, 3, 2, 4, Some(42)).map_err(|e| Box::new(e) as Box<dyn std::error::Error>))
),
"regression" => (
&format!("from sklearn.datasets import make_regression; make_regression(n_samples={n_samples}, n_features={n_features}, random_state=42)"),
Box::new(move || make_regression(n_samples, n_features, 3, 0.1, Some(42)).map_err(|e| Box::new(e) as Box<dyn std::error::Error>))
),
_ => continue,
};
let python_result = Command::new("python3")
.arg("-c")
.arg(format!(
"import time; start=time.time(); {python_code}; print(f'{{:.4f}}', time.time()-start)"
))
.output();
match python_result {
Ok(output) if output.status.success() => {
let python_time = String::from_utf8_lossy(&output.stdout)
.trim()
.parse::<f64>()
.unwrap_or(0.0);
let scirs2_start = Instant::now();
let _scirs2_result = scirs2_fn();
let scirs2_time = scirs2_start.elapsed().as_secs_f64();
let speedup = python_time / scirs2_time;
let status = if speedup > 1.2 {
"ð FASTER"
} else if speedup > 0.8 {
"â SIMILAR"
} else {
"ð SLOWER"
};
println!(
" {} {}x{}: SciRS2 {:.2}ms vs sklearn {:.2}ms ({:.1}x {})",
gen_type,
n_samples,
n_features,
scirs2_time * 1000.0,
python_time * 1000.0,
speedup,
status
);
}
_ => {
println!(
" {gen_type} {n_samples}x{n_features}: Failed to benchmark Python version"
);
}
}
}
}
#[allow(dead_code)]
fn generate_performance_report(suites: &[BenchmarkSuite]) {
println!("\nð PERFORMANCE SUMMARY REPORT");
println!("{}", "=".repeat(60));
let mut total_operations = 0;
let mut total_samples = 0;
let mut total_duration = Duration::ZERO;
for suite in suites {
total_operations += suite.results.len();
total_samples += suite.total_samples();
total_duration += suite.total_duration;
}
println!(" Total operations benchmarked: {total_operations}");
println!(" Total samples processed: {total_samples}");
println!(
" Total benchmark time: {:.2}s",
total_duration.as_secs_f64()
);
println!(
" Overall throughput: {:.1} samples/s",
total_samples as f64 / total_duration.as_secs_f64()
);
let avg_throughput = total_samples as f64 / total_duration.as_secs_f64();
println!("\n ðŊ PERFORMANCE ASSESSMENT:");
if avg_throughput > 50000.0 {
println!(" â EXCELLENT - High-performance implementation");
} else if avg_throughput > 10000.0 {
println!(" â
GOOD - Solid performance for scientific computing");
} else if avg_throughput > 1000.0 {
println!(" â ïļ MODERATE - Acceptable for most use cases");
} else {
println!(" â SLOW - May need optimization");
}
println!("\n ðĄ RECOMMENDATIONS:");
if let Some(gen_suite) = suites.iter().find(|s| s.name == "Data Generation") {
let successful = gen_suite.successful_results();
let failed = gen_suite.failed_results();
if !failed.is_empty() {
println!(
" âĒ Fix {} failed data generation operations",
failed.len()
);
}
if !successful.is_empty() {
let avg_gen_throughput =
successful.iter().map(|r| r.throughput).sum::<f64>() / successful.len() as f64;
if avg_gen_throughput < 1000.0 {
println!(" âĒ Consider optimizing data generation algorithms");
println!(" âĒ Implement SIMD operations for numeric computations");
println!(" âĒ Use parallel processing for large datasets");
}
}
}
println!(" âĒ Consider GPU acceleration for large-scale operations");
println!(" âĒ Implement streaming for memory-efficient processing");
println!(" âĒ Add caching for frequently accessed datasets");
}
#[allow(dead_code)]
fn format_duration(duration: Duration) -> String {
if duration.as_secs() > 0 {
format!("{:.2}s", duration.as_secs_f64())
} else if duration.as_millis() > 0 {
format!("{}ms", duration.as_millis())
} else {
format!("{}Ξs", duration.as_micros())
}
}