1use scirs2_datasets::{
12 benchmarks::{BenchmarkRunner, BenchmarkSuite},
13 load_boston, load_breast_cancer, load_digits, load_iris, load_wine, make_classification,
14 make_regression, Dataset,
15};
16use std::collections::HashMap;
17use std::process::Command;
18use std::time::{Duration, Instant};
19
20#[allow(dead_code)]
21fn main() -> Result<(), Box<dyn std::error::Error>> {
22 println!("ð SciRS2 vs Scikit-Learn Performance Benchmarks");
23 println!("================================================\n");
24
25 let runner = BenchmarkRunner::new()
26 .with_iterations(5)
27 .with_warmup(2)
28 .with_memory_measurement(false);
29
30 let scirs2suites = runner.run_comprehensive_benchmarks();
32
33 println!("{}", "\n".to_owned() + &"=".repeat(60));
34 println!("DETAILED ANALYSIS");
35 println!("{}", "=".repeat(60));
36
37 analyze_toy_dataset_performance(&scirs2suites);
39
40 analyze_data_generation_performance(&scirs2suites);
42
43 run_python_comparison_benchmarks();
45
46 generate_performance_report(&scirs2suites);
48
49 println!("\nð Benchmark suite completed successfully!");
50 println!("Check the generated performance report for detailed analysis.");
51
52 Ok(())
53}
54
55#[allow(dead_code)]
56fn analyze_toy_dataset_performance(suites: &[BenchmarkSuite]) {
57 if let Some(toy_suite) = suites.iter().find(|s| s.name == "Toy Datasets") {
58 println!("\nð TOY DATASET LOADING ANALYSIS");
59 println!("{}", "-".repeat(40));
60
61 let mut total_loading_time = Duration::ZERO;
62 let mut total_samples = 0;
63 let mut fastestdataset = ("", Duration::MAX);
64 let mut slowestdataset = ("", Duration::ZERO);
65
66 for result in toy_suite.successful_results() {
67 total_loading_time += result.duration;
68 total_samples += result.samples;
69
70 if result.duration < fastestdataset.1 {
71 fastestdataset = (&result.operation, result.duration);
72 }
73 if result.duration > slowestdataset.1 {
74 slowestdataset = (&result.operation, result.duration);
75 }
76
77 println!(
78 " {}: {} ({} samples, {:.1} samples/s)",
79 result.operation.replace("load_", ""),
80 result.formatted_duration(),
81 result.samples,
82 result.throughput
83 );
84 }
85
86 println!("\n Summary:");
87 println!(
88 " Total loading time: {:.2}s",
89 total_loading_time.as_secs_f64()
90 );
91 println!(" Total samples loaded: {total_samples}");
92 println!(
93 " Average throughput: {:.1} samples/s",
94 total_samples as f64 / total_loading_time.as_secs_f64()
95 );
96 println!(
97 " Fastest: {} ({})",
98 fastestdataset.0,
99 format_duration(fastestdataset.1)
100 );
101 println!(
102 " Slowest: {} ({})",
103 slowestdataset.0,
104 format_duration(slowestdataset.1)
105 );
106 }
107}
108
109#[allow(dead_code)]
110fn analyze_data_generation_performance(suites: &[BenchmarkSuite]) {
111 if let Some(gen_suite) = suites.iter().find(|s| s.name == "Data Generation") {
112 println!("\nðŽ DATA GENERATION ANALYSIS");
113 println!("{}", "-".repeat(40));
114
115 let mut classification_results = Vec::new();
116 let mut regression_results = Vec::new();
117 let mut clustering_results = Vec::new();
118
119 for result in gen_suite.successful_results() {
120 if result.operation.contains("classification") {
121 classification_results.push(result);
122 } else if result.operation.contains("regression") {
123 regression_results.push(result);
124 } else if result.operation.contains("blobs") {
125 clustering_results.push(result);
126 }
127 }
128
129 analyze_generation_type("Classification", &classification_results);
130 analyze_generation_type("Regression", ®ression_results);
131 analyze_generation_type("Clustering", &clustering_results);
132
133 analyze_scaling_performance(gen_suite);
135 }
136}
137
138#[allow(dead_code)]
139fn analyze_generation_type(
140 gen_type: &str,
141 results: &[&scirs2_datasets::benchmarks::BenchmarkResult],
142) {
143 if results.is_empty() {
144 return;
145 }
146
147 println!("\n {gen_type} Generation:");
148
149 let total_samples: usize = results.iter().map(|r| r.samples).sum();
150 let total_duration: Duration = results.iter().map(|r| r.duration).sum();
151 let avg_throughput = total_samples as f64 / total_duration.as_secs_f64();
152
153 println!(" Configurations tested: {}", results.len());
154 println!(" Total samples generated: {total_samples}");
155 println!(" Average throughput: {avg_throughput:.1} samples/s");
156
157 let best = results
159 .iter()
160 .max_by(|a, b| a.throughput.partial_cmp(&b.throughput).unwrap());
161 let worst = results
162 .iter()
163 .min_by(|a, b| a.throughput.partial_cmp(&b.throughput).unwrap());
164
165 if let (Some(best), Some(worst)) = (best, worst) {
166 println!(
167 " Best: {} ({:.1} samples/s)",
168 best.operation.split('_').next_back().unwrap_or("unknown"),
169 best.throughput
170 );
171 println!(
172 " Worst: {} ({:.1} samples/s)",
173 worst.operation.split('_').next_back().unwrap_or("unknown"),
174 worst.throughput
175 );
176 }
177}
178
179#[allow(dead_code)]
180fn analyze_scaling_performance(suite: &BenchmarkSuite) {
181 println!("\n ð SCALING ANALYSIS:");
182
183 let mut size_groups: HashMap<usize, Vec<_>> = HashMap::new();
185
186 for result in suite.successful_results() {
187 size_groups.entry(result.samples).or_default().push(result);
188 }
189
190 let mut sizes: Vec<_> = size_groups.keys().collect();
191 sizes.sort();
192
193 for &size in &sizes {
194 if let Some(results) = size_groups.get(size) {
195 let avg_throughput =
196 results.iter().map(|r| r.throughput).sum::<f64>() / results.len() as f64;
197 let avg_duration = results
198 .iter()
199 .map(|r| r.duration.as_secs_f64())
200 .sum::<f64>()
201 / results.len() as f64;
202
203 println!(" {size} samples: {avg_throughput:.1} samples/s (avg {avg_duration:.2}s)");
204 }
205 }
206
207 if sizes.len() >= 2 {
209 let smallsize = sizes[0];
210 let largesize = sizes[sizes.len() - 1];
211
212 if let (Some(small_results), Some(large_results)) =
213 (size_groups.get(smallsize), size_groups.get(largesize))
214 {
215 let small_avg = small_results.iter().map(|r| r.throughput).sum::<f64>()
216 / small_results.len() as f64;
217 let large_avg = large_results.iter().map(|r| r.throughput).sum::<f64>()
218 / large_results.len() as f64;
219
220 let efficiency = large_avg / small_avg;
221 let size_ratio = *largesize as f64 / *smallsize as f64;
222
223 println!(" Scaling efficiency: {efficiency:.2}x (size increased {size_ratio:.1}x)");
224
225 if efficiency > 0.8 {
226 println!(" â
Good scaling performance");
227 } else if efficiency > 0.5 {
228 println!(" â ïļ Moderate scaling performance");
229 } else {
230 println!(" â Poor scaling performance");
231 }
232 }
233 }
234}
235
236#[allow(dead_code)]
237fn run_python_comparison_benchmarks() {
238 println!("\nð PYTHON SCIKIT-LEARN COMPARISON");
239 println!("{}", "-".repeat(40));
240
241 let python_check = Command::new("python3")
243 .arg("-c")
244 .arg("import sklearn; print('scikit-learn', sklearn.__version__)")
245 .output();
246
247 match python_check {
248 Ok(output) if output.status.success() => {
249 let version = String::from_utf8_lossy(&output.stdout);
250 println!(" â
Found {}", version.trim());
251
252 run_sklearn_toy_dataset_comparison();
254 run_sklearn_generation_comparison();
255 }
256 _ => {
257 println!(" â Python scikit-learn not available");
258 println!(" Install with: pip install scikit-learn");
259 println!(" Skipping Python comparison benchmarks");
260 }
261 }
262}
263
264#[allow(dead_code)]
265fn run_sklearn_toy_dataset_comparison() {
266 println!("\n ð Toy Dataset Loading Comparison:");
267
268 let datasets = vec![
269 (
270 "iris",
271 "from sklearn.datasets import load_iris; load_iris()",
272 ),
273 (
274 "boston",
275 "from sklearn.datasets import load_boston; load_boston()",
276 ),
277 (
278 "digits",
279 "from sklearn.datasets import load_digits; load_digits()",
280 ),
281 (
282 "wine",
283 "from sklearn.datasets import load_wine; load_wine()",
284 ),
285 (
286 "breast_cancer",
287 "from sklearn.datasets import load_breast_cancer; load_breast_cancer()",
288 ),
289 ];
290
291 for (name, python_code) in datasets {
292 let _start = Instant::now();
294 let python_result = Command::new("python3")
295 .arg("-c")
296 .arg(format!(
297 "import time; start=time.time(); {python_code}; print(f'{{:.4f}}', time.time()-start)"
298 ))
299 .output();
300
301 match python_result {
302 Ok(output) if output.status.success() => {
303 let python_time = String::from_utf8_lossy(&output.stdout)
304 .trim()
305 .parse::<f64>()
306 .unwrap_or(0.0);
307
308 let scirs2_start = Instant::now();
310 let _scirs2_result = match name {
311 "iris" => load_iris().map(|_| ()),
312 "boston" => load_boston().map(|_| ()),
313 "digits" => load_digits().map(|_| ()),
314 "wine" => load_wine(false).map(|_| ()),
315 "breast_cancer" => load_breast_cancer().map(|_| ()),
316 _ => Ok(()),
317 };
318 let scirs2_time = scirs2_start.elapsed().as_secs_f64();
319
320 let speedup = python_time / scirs2_time;
321 let status = if speedup > 1.2 {
322 "ð FASTER"
323 } else if speedup > 0.8 {
324 "â SIMILAR"
325 } else {
326 "ð SLOWER"
327 };
328
329 println!(
330 " {}: SciRS2 {:.2}ms vs sklearn {:.2}ms ({:.1}x {}",
331 name,
332 scirs2_time * 1000.0,
333 python_time * 1000.0,
334 speedup,
335 status
336 );
337 }
338 _ => {
339 println!(" {name}: Failed to benchmark Python version");
340 }
341 }
342 }
343}
344
345#[allow(dead_code)]
346fn run_sklearn_generation_comparison() {
347 println!("\n ðŽ Data Generation Comparison:");
348
349 let configs = vec![
350 (1000, 10, "classification"),
351 (5000, 20, "classification"),
352 (1000, 10, "regression"),
353 (5000, 20, "regression"),
354 ];
355
356 for (n_samples, n_features, gen_type) in configs {
357 #[allow(clippy::type_complexity)]
358 let (python_code, scirs2_fn): (&str, Box<dyn Fn() -> Result<Dataset, Box<dyn std::error::Error>>>) = match gen_type {
359 "classification" => (
360 &format!("from sklearn.datasets import make_classification; make_classification(n_samples={n_samples}, n_features={n_features}, random_state=42)"),
361 Box::new(move || make_classification(n_samples, n_features, 3, 2, 4, Some(42)).map_err(|e| Box::new(e) as Box<dyn std::error::Error>))
362 ),
363 "regression" => (
364 &format!("from sklearn.datasets import make_regression; make_regression(n_samples={n_samples}, n_features={n_features}, random_state=42)"),
365 Box::new(move || make_regression(n_samples, n_features, 3, 0.1, Some(42)).map_err(|e| Box::new(e) as Box<dyn std::error::Error>))
366 ),
367 _ => continue,
368 };
369
370 let python_result = Command::new("python3")
372 .arg("-c")
373 .arg(format!(
374 "import time; start=time.time(); {python_code}; print(f'{{:.4f}}', time.time()-start)"
375 ))
376 .output();
377
378 match python_result {
379 Ok(output) if output.status.success() => {
380 let python_time = String::from_utf8_lossy(&output.stdout)
381 .trim()
382 .parse::<f64>()
383 .unwrap_or(0.0);
384
385 let scirs2_start = Instant::now();
387 let _scirs2_result = scirs2_fn();
388 let scirs2_time = scirs2_start.elapsed().as_secs_f64();
389
390 let speedup = python_time / scirs2_time;
391 let status = if speedup > 1.2 {
392 "ð FASTER"
393 } else if speedup > 0.8 {
394 "â SIMILAR"
395 } else {
396 "ð SLOWER"
397 };
398
399 println!(
400 " {} {}x{}: SciRS2 {:.2}ms vs sklearn {:.2}ms ({:.1}x {})",
401 gen_type,
402 n_samples,
403 n_features,
404 scirs2_time * 1000.0,
405 python_time * 1000.0,
406 speedup,
407 status
408 );
409 }
410 _ => {
411 println!(
412 " {gen_type} {n_samples}x{n_features}: Failed to benchmark Python version"
413 );
414 }
415 }
416 }
417}
418
419#[allow(dead_code)]
420fn generate_performance_report(suites: &[BenchmarkSuite]) {
421 println!("\nð PERFORMANCE SUMMARY REPORT");
422 println!("{}", "=".repeat(60));
423
424 let mut total_operations = 0;
425 let mut total_samples = 0;
426 let mut total_duration = Duration::ZERO;
427
428 for suite in suites {
429 total_operations += suite.results.len();
430 total_samples += suite.total_samples();
431 total_duration += suite.total_duration;
432 }
433
434 println!(" Total operations benchmarked: {total_operations}");
435 println!(" Total samples processed: {total_samples}");
436 println!(
437 " Total benchmark time: {:.2}s",
438 total_duration.as_secs_f64()
439 );
440 println!(
441 " Overall throughput: {:.1} samples/s",
442 total_samples as f64 / total_duration.as_secs_f64()
443 );
444
445 let avg_throughput = total_samples as f64 / total_duration.as_secs_f64();
447
448 println!("\n ðŊ PERFORMANCE ASSESSMENT:");
449 if avg_throughput > 50000.0 {
450 println!(" â EXCELLENT - High-performance implementation");
451 } else if avg_throughput > 10000.0 {
452 println!(" â
GOOD - Solid performance for scientific computing");
453 } else if avg_throughput > 1000.0 {
454 println!(" â ïļ MODERATE - Acceptable for most use cases");
455 } else {
456 println!(" â SLOW - May need optimization");
457 }
458
459 println!("\n ðĄ RECOMMENDATIONS:");
461
462 if let Some(gen_suite) = suites.iter().find(|s| s.name == "Data Generation") {
463 let successful = gen_suite.successful_results();
464 let failed = gen_suite.failed_results();
465
466 if !failed.is_empty() {
467 println!(
468 " âĒ Fix {} failed data generation operations",
469 failed.len()
470 );
471 }
472
473 if !successful.is_empty() {
474 let avg_gen_throughput =
475 successful.iter().map(|r| r.throughput).sum::<f64>() / successful.len() as f64;
476 if avg_gen_throughput < 1000.0 {
477 println!(" âĒ Consider optimizing data generation algorithms");
478 println!(" âĒ Implement SIMD operations for numeric computations");
479 println!(" âĒ Use parallel processing for large datasets");
480 }
481 }
482 }
483
484 println!(" âĒ Consider GPU acceleration for large-scale operations");
485 println!(" âĒ Implement streaming for memory-efficient processing");
486 println!(" âĒ Add caching for frequently accessed datasets");
487}
488
489#[allow(dead_code)]
490fn format_duration(duration: Duration) -> String {
491 if duration.as_secs() > 0 {
492 format!("{:.2}s", duration.as_secs_f64())
493 } else if duration.as_millis() > 0 {
494 format!("{}ms", duration.as_millis())
495 } else {
496 format!("{}Ξs", duration.as_micros())
497 }
498}