1use crate::generators::*;
7use crate::loaders::{load_csv, load_csv_parallel, CsvConfig, StreamingConfig};
8use crate::sample::load_wine;
9use crate::toy::{load_diabetes, *};
10use std::collections::HashMap;
11use std::path::Path;
12use std::time::{Duration, Instant};
13
14#[derive(Debug, Clone)]
16pub struct BenchmarkResult {
17 pub operation: String,
19 pub parameters: HashMap<String, String>,
21 pub duration: Duration,
23 pub memory_used: Option<usize>,
25 pub samples: usize,
27 pub features: usize,
29 pub throughput: f64,
31 pub success: bool,
33 pub error: Option<String>,
35}
36
37impl BenchmarkResult {
38 pub fn new(operation: String, parameters: HashMap<String, String>) -> Self {
40 Self {
41 operation,
42 parameters,
43 duration: Duration::ZERO,
44 memory_used: None,
45 samples: 0,
46 features: 0,
47 throughput: 0.0,
48 success: false,
49 error: None,
50 }
51 }
52
53 pub fn success(mut self, duration: Duration, samples: usize, features: usize) -> Self {
55 self.duration = duration;
56 self.samples = samples;
57 self.features = features;
58 self.throughput = if duration.as_secs_f64() > 0.0 {
59 samples as f64 / duration.as_secs_f64()
60 } else {
61 0.0
62 };
63 self.success = true;
64 self
65 }
66
67 pub fn failure(mut self, error: String) -> Self {
69 self.success = false;
70 self.error = Some(error);
71 self
72 }
73
74 pub fn with_memory(mut self, memoryused: usize) -> Self {
76 self.memory_used = Some(memoryused);
77 self
78 }
79
80 pub fn formatted_duration(&self) -> String {
82 if self.duration.as_secs() > 0 {
83 format!("{:.2}s", self.duration.as_secs_f64())
84 } else if self.duration.as_millis() > 0 {
85 format!("{}ms", self.duration.as_millis())
86 } else {
87 format!("{}Ξs", self.duration.as_micros())
88 }
89 }
90
91 pub fn formatted_throughput(&self) -> String {
93 if self.throughput >= 1000.0 {
94 format!("{:.1}K samples/s", self.throughput / 1000.0)
95 } else {
96 format!("{:.1} samples/s", self.throughput)
97 }
98 }
99
100 pub fn formatted_memory(&self) -> String {
102 match self.memory_used {
103 Some(bytes) => {
104 if bytes >= 1024 * 1024 * 1024 {
105 format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
106 } else if bytes >= 1024 * 1024 {
107 format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
108 } else if bytes >= 1024 {
109 format!("{:.1} KB", bytes as f64 / 1024.0)
110 } else {
111 format!("{bytes} B")
112 }
113 }
114 None => "N/A".to_string(),
115 }
116 }
117}
118
119#[derive(Debug, Clone)]
121pub struct BenchmarkSuite {
122 pub name: String,
124 pub results: Vec<BenchmarkResult>,
126 pub total_duration: Duration,
128}
129
130impl BenchmarkSuite {
131 pub fn new(name: String) -> Self {
133 Self {
134 name,
135 results: Vec::new(),
136 total_duration: Duration::ZERO,
137 }
138 }
139
140 pub fn add_result(&mut self, result: BenchmarkResult) {
142 self.total_duration += result.duration;
143 self.results.push(result);
144 }
145
146 pub fn successful_results(&self) -> Vec<&BenchmarkResult> {
148 self.results.iter().filter(|r| r.success).collect()
149 }
150
151 pub fn failed_results(&self) -> Vec<&BenchmarkResult> {
153 self.results.iter().filter(|r| !r.success).collect()
154 }
155
156 pub fn average_throughput(&self) -> f64 {
158 let successful = self.successful_results();
159 if successful.is_empty() {
160 0.0
161 } else {
162 successful.iter().map(|r| r.throughput).sum::<f64>() / successful.len() as f64
163 }
164 }
165
166 pub fn total_samples(&self) -> usize {
168 self.successful_results().iter().map(|r| r.samples).sum()
169 }
170
171 pub fn print_summary(&self) {
173 println!("=== Benchmark Suite: {} ===", self.name);
174 println!("Total duration: {:.2}s", self.total_duration.as_secs_f64());
175 println!(
176 "Successful benchmarks: {}/{}",
177 self.successful_results().len(),
178 self.results.len()
179 );
180 println!("Total samples processed: {}", self.total_samples());
181 println!(
182 "Average throughput: {:.1} samples/s",
183 self.average_throughput()
184 );
185
186 if !self.failed_results().is_empty() {
187 println!("\nFailed benchmarks:");
188 for result in self.failed_results() {
189 println!(
190 " - {}: {}",
191 result.operation,
192 result
193 .error
194 .as_ref()
195 .unwrap_or(&"Unknown error".to_string())
196 );
197 }
198 }
199
200 println!("\nDetailed results:");
201 for result in &self.results {
202 if result.success {
203 println!(
204 " {} - {} ({} samples, {} features) - {}",
205 result.operation,
206 result.formatted_duration(),
207 result.samples,
208 result.features,
209 result.formatted_throughput()
210 );
211 }
212 }
213 }
214}
215
216pub struct BenchmarkRunner {
218 pub iterations: usize,
220 pub measure_memory: bool,
222 pub warmup_iterations: usize,
224}
225
226impl Default for BenchmarkRunner {
227 fn default() -> Self {
228 Self {
229 iterations: 5,
230 measure_memory: false,
231 warmup_iterations: 1,
232 }
233 }
234}
235
236impl BenchmarkRunner {
237 pub fn new() -> Self {
239 Self::default()
240 }
241
242 pub fn with_iterations(mut self, iterations: usize) -> Self {
244 self.iterations = iterations;
245 self
246 }
247
248 pub fn with_memory_measurement(mut self, measure: bool) -> Self {
250 self.measure_memory = measure;
251 self
252 }
253
254 pub fn with_warmup(mut self, warmupiterations: usize) -> Self {
256 self.warmup_iterations = warmupiterations;
257 self
258 }
259
260 pub fn run_benchmark<F>(
262 &self,
263 name: &str,
264 parameters: HashMap<String, String>,
265 mut benchmark_fn: F,
266 ) -> BenchmarkResult
267 where
268 F: FnMut() -> std::result::Result<(usize, usize), String>,
269 {
270 for _ in 0..self.warmup_iterations {
272 let _ = benchmark_fn();
273 }
274
275 let mut durations = Vec::new();
276 let mut last_samples = 0;
277 let mut last_features = 0;
278 let mut last_error = None;
279
280 for _ in 0..self.iterations {
282 let start = Instant::now();
283 match benchmark_fn() {
284 Ok((samples, features)) => {
285 let duration = start.elapsed();
286 durations.push(duration);
287 last_samples = samples;
288 last_features = features;
289 }
290 Err(e) => {
291 last_error = Some(e);
292 break;
293 }
294 }
295 }
296
297 if let Some(error) = last_error {
298 return BenchmarkResult::new(name.to_string(), parameters).failure(error);
299 }
300
301 if durations.is_empty() {
302 return BenchmarkResult::new(name.to_string(), parameters)
303 .failure("No successful runs".to_string());
304 }
305
306 let total_duration: Duration = durations.iter().sum();
308 let avg_duration = total_duration / durations.len() as u32;
309
310 BenchmarkResult::new(name.to_string(), parameters).success(
311 avg_duration,
312 last_samples,
313 last_features,
314 )
315 }
316
317 pub fn benchmark_toy_datasets(&self) -> BenchmarkSuite {
319 let mut suite = BenchmarkSuite::new("Toy Datasets".to_string());
320
321 let iris_params = HashMap::from([("dataset".to_string(), "iris".to_string())]);
323 let iris_result = self.run_benchmark("load_iris", iris_params, || match load_iris() {
324 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
325 Err(e) => Err(format!("Failed to load iris: {e}")),
326 });
327 suite.add_result(iris_result);
328
329 let boston_params = HashMap::from([("dataset".to_string(), "boston".to_string())]);
331 let boston_result =
332 self.run_benchmark("load_boston", boston_params, || match load_boston() {
333 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
334 Err(e) => Err(format!("Failed to load boston: {e}")),
335 });
336 suite.add_result(boston_result);
337
338 let digits_params = HashMap::from([("dataset".to_string(), "digits".to_string())]);
340 let digits_result =
341 self.run_benchmark("load_digits", digits_params, || match load_digits() {
342 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
343 Err(e) => Err(format!("Failed to load digits: {e}")),
344 });
345 suite.add_result(digits_result);
346
347 let wine_params = HashMap::from([("dataset".to_string(), "wine".to_string())]);
349 let wine_result = self.run_benchmark("load_wine", wine_params, || match load_wine(false) {
350 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
351 Err(e) => Err(format!("Failed to load wine: {e}")),
352 });
353 suite.add_result(wine_result);
354
355 let bc_params = HashMap::from([("dataset".to_string(), "breast_cancer".to_string())]);
357 let bc_result =
358 self.run_benchmark(
359 "load_breast_cancer",
360 bc_params,
361 || match load_breast_cancer() {
362 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
363 Err(e) => Err(format!("Failed to load breastcancer: {e}")),
364 },
365 );
366 suite.add_result(bc_result);
367
368 let diabetes_params = HashMap::from([("dataset".to_string(), "diabetes".to_string())]);
370 let diabetes_result =
371 self.run_benchmark("load_diabetes", diabetes_params, || match load_diabetes() {
372 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
373 Err(e) => Err(format!("Failed to load diabetes: {e}")),
374 });
375 suite.add_result(diabetes_result);
376
377 suite
378 }
379
380 pub fn benchmark_data_generation(&self) -> BenchmarkSuite {
382 let mut suite = BenchmarkSuite::new("Data Generation".to_string());
383
384 let sizes = vec![100, 1000, 10000];
386 let features = vec![5, 20, 100];
387
388 for &n_samples in &sizes {
389 for &n_features in &features {
390 let class_params = HashMap::from([
392 ("type".to_string(), "classification".to_string()),
393 ("samples".to_string(), n_samples.to_string()),
394 ("features".to_string(), n_features.to_string()),
395 ]);
396 let class_result = self.run_benchmark(
397 &format!("make_classification_{n_samples}x{n_features}"),
398 class_params,
399 || match make_classification(n_samples, n_features, 3, 2, 4, Some(42)) {
400 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
401 Err(e) => Err(format!("Failed to generate classification data: {e}")),
402 },
403 );
404 suite.add_result(class_result);
405
406 let reg_params = HashMap::from([
408 ("type".to_string(), "regression".to_string()),
409 ("samples".to_string(), n_samples.to_string()),
410 ("features".to_string(), n_features.to_string()),
411 ]);
412 let reg_result = self.run_benchmark(
413 &format!("make_regression_{n_samples}x{n_features}"),
414 reg_params,
415 || match make_regression(n_samples, n_features, 3, 0.1, Some(42)) {
416 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
417 Err(e) => Err(format!("Failed to generate regression data: {e}")),
418 },
419 );
420 suite.add_result(reg_result);
421
422 if n_features <= 10 {
424 let blob_params = HashMap::from([
425 ("type".to_string(), "blobs".to_string()),
426 ("samples".to_string(), n_samples.to_string()),
427 ("features".to_string(), n_features.to_string()),
428 ]);
429 let blob_result = self.run_benchmark(
430 &format!("make_blobs_{n_samples}x{n_features}"),
431 blob_params,
432 || match make_blobs(n_samples, n_features, 4, 1.0, Some(42)) {
433 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
434 Err(e) => Err(format!("Failed to generate blob data: {e}")),
435 },
436 );
437 suite.add_result(blob_result);
438 }
439 }
440 }
441
442 suite
443 }
444
445 pub fn benchmark_csv_loading<P: AsRef<Path>>(&self, csvpath: P) -> BenchmarkSuite {
447 let mut suite = BenchmarkSuite::new("CSV Loading".to_string());
448 let path = csvpath.as_ref();
449
450 if !path.exists() {
451 let mut result = BenchmarkResult::new("csv_loading".to_string(), HashMap::new());
452 result = result.failure("CSV file not found".to_string());
453 suite.add_result(result);
454 return suite;
455 }
456
457 let std_params = HashMap::from([
459 ("method".to_string(), "standard".to_string()),
460 ("file".to_string(), path.to_string_lossy().to_string()),
461 ]);
462 let std_result = self.run_benchmark("csv_standard", std_params, || {
463 let config = CsvConfig::default().with_header(true);
464 match load_csv(path, config) {
465 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
466 Err(e) => Err(format!("Failed to load CSV: {e}")),
467 }
468 });
469 suite.add_result(std_result);
470
471 let par_params = HashMap::from([
473 ("method".to_string(), "parallel".to_string()),
474 ("file".to_string(), path.to_string_lossy().to_string()),
475 ]);
476 let par_result = self.run_benchmark("csv_parallel", par_params, || {
477 let csv_config = CsvConfig::default().with_header(true);
478 let streaming_config = StreamingConfig::default()
479 .with_parallel(true)
480 .with_chunk_size(1000);
481 match load_csv_parallel(path, csv_config, streaming_config) {
482 Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
483 Err(e) => Err(format!("Failed to load CSV in parallel: {e}")),
484 }
485 });
486 suite.add_result(par_result);
487
488 suite
489 }
490
491 pub fn run_comprehensive_benchmarks(&self) -> Vec<BenchmarkSuite> {
493 println!("Running comprehensive SciRS2 performance benchmarks...\n");
494
495 let mut suites = Vec::new();
496
497 println!("Benchmarking toy datasets...");
499 let toy_suite = self.benchmark_toy_datasets();
500 toy_suite.print_summary();
501 suites.push(toy_suite);
502 println!();
503
504 println!("Benchmarking data generation...");
506 let gen_suite = self.benchmark_data_generation();
507 gen_suite.print_summary();
508 suites.push(gen_suite);
509 println!();
510
511 suites
512 }
513}
514
515pub struct PerformanceComparison {
517 pub baseline: BenchmarkSuite,
519 pub current: BenchmarkSuite,
521}
522
523impl PerformanceComparison {
524 pub fn new(baseline: BenchmarkSuite, current: BenchmarkSuite) -> Self {
526 Self { baseline, current }
527 }
528
529 pub fn calculate_speedups(&self) -> HashMap<String, f64> {
531 let mut speedups = HashMap::new();
532
533 for current_result in &self.current.results {
534 if let Some(baseline_result) = self
535 .baseline
536 .results
537 .iter()
538 .find(|r| r.operation == current_result.operation)
539 {
540 if baseline_result.success && current_result.success {
541 let speedup = baseline_result.duration.as_secs_f64()
542 / current_result.duration.as_secs_f64();
543 speedups.insert(current_result.operation.clone(), speedup);
544 }
545 }
546 }
547
548 speedups
549 }
550
551 pub fn print_comparison(&self) {
553 println!("=== Performance Comparison ===");
554 println!("Baseline: {}", self.baseline.name);
555 println!("Current: {}", self.current.name);
556 println!();
557
558 let speedups = self.calculate_speedups();
559
560 if speedups.is_empty() {
561 println!("No matching operations found for comparison.");
562 return;
563 }
564
565 let mut improvements = 0;
566 let mut regressions = 0;
567 let mut total_speedup = 0.0;
568
569 println!("Speedup Analysis:");
570 for (operation, speedup) in &speedups {
571 let status = if *speedup > 1.1 {
572 improvements += 1;
573 "ð FASTER"
574 } else if *speedup < 0.9 {
575 regressions += 1;
576 "ð SLOWER"
577 } else {
578 "â SAME"
579 };
580
581 println!(" {operation}: {speedup:.2}x {status}");
582 total_speedup += speedup;
583 }
584
585 let avg_speedup = total_speedup / speedups.len() as f64;
586
587 println!();
588 println!("Summary:");
589 println!(" Improvements: {improvements}");
590 println!(" Regressions: {regressions}");
591 println!(
592 " Unchanged: {}",
593 speedups.len() - improvements - regressions
594 );
595 println!(" Average speedup: {avg_speedup:.2}x");
596
597 if avg_speedup > 1.1 {
598 println!(" Overall assessment: ð SIGNIFICANT IMPROVEMENT");
599 } else if avg_speedup > 1.0 {
600 println!(" Overall assessment: â
MINOR IMPROVEMENT");
601 } else if avg_speedup > 0.9 {
602 println!(" Overall assessment: â COMPARABLE PERFORMANCE");
603 } else {
604 println!(" Overall assessment: â ïļ PERFORMANCE REGRESSION");
605 }
606 }
607}
608
609#[cfg(test)]
610mod tests {
611 use super::*;
612
613 #[test]
614 #[ignore = "timeout"]
615 fn test_benchmark_result() {
616 let params = HashMap::from([("test".to_string(), "value".to_string())]);
617 let result = BenchmarkResult::new("test_op".to_string(), params).success(
618 Duration::from_millis(100),
619 1000,
620 10,
621 );
622
623 assert!(result.success);
624 assert_eq!(result.samples, 1000);
625 assert_eq!(result.features, 10);
626 assert!(result.throughput > 0.0);
627 }
628
629 #[test]
630 #[ignore = "timeout"]
631 fn test_benchmark_suite() {
632 let mut suite = BenchmarkSuite::new("test_suite".to_string());
633
634 let result1 = BenchmarkResult::new("op1".to_string(), HashMap::new()).success(
635 Duration::from_millis(50),
636 500,
637 5,
638 );
639 let result2 = BenchmarkResult::new("op2".to_string(), HashMap::new())
640 .failure("test error".to_string());
641
642 suite.add_result(result1);
643 suite.add_result(result2);
644
645 assert_eq!(suite.results.len(), 2);
646 assert_eq!(suite.successful_results().len(), 1);
647 assert_eq!(suite.failed_results().len(), 1);
648 assert_eq!(suite.total_samples(), 500);
649 }
650
651 #[test]
652 #[ignore = "timeout"]
653 fn test_benchmark_runner() {
654 let runner = BenchmarkRunner::new().with_iterations(3).with_warmup(1);
655
656 let params = HashMap::new();
657 let result = runner.run_benchmark("test", params, || {
658 std::thread::sleep(Duration::from_millis(1));
659 Ok((100, 10))
660 });
661
662 assert!(result.success);
663 assert_eq!(result.samples, 100);
664 assert_eq!(result.features, 10);
665 }
666
667 #[test]
668 #[ignore = "timeout"]
669 fn test_toy_datasets_benchmark() {
670 let runner = BenchmarkRunner::new().with_iterations(1);
671 let suite = runner.benchmark_toy_datasets();
672
673 assert!(!suite.results.is_empty());
674 assert!(!suite.successful_results().is_empty());
675 }
676
677 #[test]
678 #[ignore = "timeout"]
679 fn test_data_generation_benchmark() {
680 let runner = BenchmarkRunner::new().with_iterations(1);
681 let suite = runner.benchmark_data_generation();
682
683 assert!(!suite.results.is_empty());
684 assert!(!suite.successful_results().is_empty());
686 }
687}