scirs2_datasets/
benchmarks.rs

1//! Performance benchmarking utilities
2//!
3//! This module provides tools for benchmarking dataset operations against scikit-learn
4//! and other reference implementations to measure performance improvements.
5
6use crate::generators::*;
7use crate::loaders::{load_csv, load_csv_parallel, CsvConfig, StreamingConfig};
8use crate::sample::load_wine;
9use crate::toy::{load_diabetes, *};
10use std::collections::HashMap;
11use std::path::Path;
12use std::time::{Duration, Instant};
13
14/// Benchmark result for a single operation
15#[derive(Debug, Clone)]
16pub struct BenchmarkResult {
17    /// Name of the operation
18    pub operation: String,
19    /// Parameters used in the benchmark
20    pub parameters: HashMap<String, String>,
21    /// Total execution time
22    pub duration: Duration,
23    /// Memory usage in bytes (if measured)
24    pub memory_used: Option<usize>,
25    /// Number of samples processed
26    pub samples: usize,
27    /// Number of features
28    pub features: usize,
29    /// Throughput (samples per second)
30    pub throughput: f64,
31    /// Success/failure status
32    pub success: bool,
33    /// Error message if failed
34    pub error: Option<String>,
35}
36
37impl BenchmarkResult {
38    /// Create a new benchmark result
39    pub fn new(operation: String, parameters: HashMap<String, String>) -> Self {
40        Self {
41            operation,
42            parameters,
43            duration: Duration::ZERO,
44            memory_used: None,
45            samples: 0,
46            features: 0,
47            throughput: 0.0,
48            success: false,
49            error: None,
50        }
51    }
52
53    /// Mark the benchmark as successful with timing information
54    pub fn success(mut self, duration: Duration, samples: usize, features: usize) -> Self {
55        self.duration = duration;
56        self.samples = samples;
57        self.features = features;
58        self.throughput = if duration.as_secs_f64() > 0.0 {
59            samples as f64 / duration.as_secs_f64()
60        } else {
61            0.0
62        };
63        self.success = true;
64        self
65    }
66
67    /// Mark the benchmark as failed with error message
68    pub fn failure(mut self, error: String) -> Self {
69        self.success = false;
70        self.error = Some(error);
71        self
72    }
73
74    /// Set memory usage
75    pub fn with_memory(mut self, memoryused: usize) -> Self {
76        self.memory_used = Some(memoryused);
77        self
78    }
79
80    /// Get formatted duration string
81    pub fn formatted_duration(&self) -> String {
82        if self.duration.as_secs() > 0 {
83            format!("{:.2}s", self.duration.as_secs_f64())
84        } else if self.duration.as_millis() > 0 {
85            format!("{}ms", self.duration.as_millis())
86        } else {
87            format!("{}Ξs", self.duration.as_micros())
88        }
89    }
90
91    /// Get formatted throughput string
92    pub fn formatted_throughput(&self) -> String {
93        if self.throughput >= 1000.0 {
94            format!("{:.1}K samples/s", self.throughput / 1000.0)
95        } else {
96            format!("{:.1} samples/s", self.throughput)
97        }
98    }
99
100    /// Get formatted memory usage string
101    pub fn formatted_memory(&self) -> String {
102        match self.memory_used {
103            Some(bytes) => {
104                if bytes >= 1024 * 1024 * 1024 {
105                    format!("{:.1} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
106                } else if bytes >= 1024 * 1024 {
107                    format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
108                } else if bytes >= 1024 {
109                    format!("{:.1} KB", bytes as f64 / 1024.0)
110                } else {
111                    format!("{bytes} B")
112                }
113            }
114            None => "N/A".to_string(),
115        }
116    }
117}
118
119/// Collection of benchmark results
120#[derive(Debug, Clone)]
121pub struct BenchmarkSuite {
122    /// Name of the benchmark suite
123    pub name: String,
124    /// Individual benchmark results
125    pub results: Vec<BenchmarkResult>,
126    /// Total time for the entire suite
127    pub total_duration: Duration,
128}
129
130impl BenchmarkSuite {
131    /// Create a new benchmark suite
132    pub fn new(name: String) -> Self {
133        Self {
134            name,
135            results: Vec::new(),
136            total_duration: Duration::ZERO,
137        }
138    }
139
140    /// Add a benchmark result
141    pub fn add_result(&mut self, result: BenchmarkResult) {
142        self.total_duration += result.duration;
143        self.results.push(result);
144    }
145
146    /// Get successful results only
147    pub fn successful_results(&self) -> Vec<&BenchmarkResult> {
148        self.results.iter().filter(|r| r.success).collect()
149    }
150
151    /// Get failed results only
152    pub fn failed_results(&self) -> Vec<&BenchmarkResult> {
153        self.results.iter().filter(|r| !r.success).collect()
154    }
155
156    /// Calculate average throughput
157    pub fn average_throughput(&self) -> f64 {
158        let successful = self.successful_results();
159        if successful.is_empty() {
160            0.0
161        } else {
162            successful.iter().map(|r| r.throughput).sum::<f64>() / successful.len() as f64
163        }
164    }
165
166    /// Get total samples processed
167    pub fn total_samples(&self) -> usize {
168        self.successful_results().iter().map(|r| r.samples).sum()
169    }
170
171    /// Print a summary report
172    pub fn print_summary(&self) {
173        println!("=== Benchmark Suite: {} ===", self.name);
174        println!("Total duration: {:.2}s", self.total_duration.as_secs_f64());
175        println!(
176            "Successful benchmarks: {}/{}",
177            self.successful_results().len(),
178            self.results.len()
179        );
180        println!("Total samples processed: {}", self.total_samples());
181        println!(
182            "Average throughput: {:.1} samples/s",
183            self.average_throughput()
184        );
185
186        if !self.failed_results().is_empty() {
187            println!("\nFailed benchmarks:");
188            for result in self.failed_results() {
189                println!(
190                    "  - {}: {}",
191                    result.operation,
192                    result
193                        .error
194                        .as_ref()
195                        .unwrap_or(&"Unknown error".to_string())
196                );
197            }
198        }
199
200        println!("\nDetailed results:");
201        for result in &self.results {
202            if result.success {
203                println!(
204                    "  {} - {} ({} samples, {} features) - {}",
205                    result.operation,
206                    result.formatted_duration(),
207                    result.samples,
208                    result.features,
209                    result.formatted_throughput()
210                );
211            }
212        }
213    }
214}
215
216/// Benchmark runner for dataset operations
217pub struct BenchmarkRunner {
218    /// Number of iterations for each benchmark
219    pub iterations: usize,
220    /// Whether to include memory measurements
221    pub measure_memory: bool,
222    /// Warmup iterations before actual benchmarks
223    pub warmup_iterations: usize,
224}
225
226impl Default for BenchmarkRunner {
227    fn default() -> Self {
228        Self {
229            iterations: 5,
230            measure_memory: false,
231            warmup_iterations: 1,
232        }
233    }
234}
235
236impl BenchmarkRunner {
237    /// Create a new benchmark runner
238    pub fn new() -> Self {
239        Self::default()
240    }
241
242    /// Set number of iterations
243    pub fn with_iterations(mut self, iterations: usize) -> Self {
244        self.iterations = iterations;
245        self
246    }
247
248    /// Enable memory measurement
249    pub fn with_memory_measurement(mut self, measure: bool) -> Self {
250        self.measure_memory = measure;
251        self
252    }
253
254    /// Set warmup iterations
255    pub fn with_warmup(mut self, warmupiterations: usize) -> Self {
256        self.warmup_iterations = warmupiterations;
257        self
258    }
259
260    /// Run a benchmark function multiple times and return average result
261    pub fn run_benchmark<F>(
262        &self,
263        name: &str,
264        parameters: HashMap<String, String>,
265        mut benchmark_fn: F,
266    ) -> BenchmarkResult
267    where
268        F: FnMut() -> std::result::Result<(usize, usize), String>,
269    {
270        // Warmup runs
271        for _ in 0..self.warmup_iterations {
272            let _ = benchmark_fn();
273        }
274
275        let mut durations = Vec::new();
276        let mut last_samples = 0;
277        let mut last_features = 0;
278        let mut last_error = None;
279
280        // Actual benchmark runs
281        for _ in 0..self.iterations {
282            let start = Instant::now();
283            match benchmark_fn() {
284                Ok((samples, features)) => {
285                    let duration = start.elapsed();
286                    durations.push(duration);
287                    last_samples = samples;
288                    last_features = features;
289                }
290                Err(e) => {
291                    last_error = Some(e);
292                    break;
293                }
294            }
295        }
296
297        if let Some(error) = last_error {
298            return BenchmarkResult::new(name.to_string(), parameters).failure(error);
299        }
300
301        if durations.is_empty() {
302            return BenchmarkResult::new(name.to_string(), parameters)
303                .failure("No successful runs".to_string());
304        }
305
306        // Calculate average duration
307        let total_duration: Duration = durations.iter().sum();
308        let avg_duration = total_duration / durations.len() as u32;
309
310        BenchmarkResult::new(name.to_string(), parameters).success(
311            avg_duration,
312            last_samples,
313            last_features,
314        )
315    }
316
317    /// Benchmark toy dataset loading
318    pub fn benchmark_toy_datasets(&self) -> BenchmarkSuite {
319        let mut suite = BenchmarkSuite::new("Toy Datasets".to_string());
320
321        // Benchmark iris dataset
322        let iris_params = HashMap::from([("dataset".to_string(), "iris".to_string())]);
323        let iris_result = self.run_benchmark("load_iris", iris_params, || match load_iris() {
324            Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
325            Err(e) => Err(format!("Failed to load iris: {e}")),
326        });
327        suite.add_result(iris_result);
328
329        // Benchmark boston dataset
330        let boston_params = HashMap::from([("dataset".to_string(), "boston".to_string())]);
331        let boston_result =
332            self.run_benchmark("load_boston", boston_params, || match load_boston() {
333                Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
334                Err(e) => Err(format!("Failed to load boston: {e}")),
335            });
336        suite.add_result(boston_result);
337
338        // Benchmark digits dataset
339        let digits_params = HashMap::from([("dataset".to_string(), "digits".to_string())]);
340        let digits_result =
341            self.run_benchmark("load_digits", digits_params, || match load_digits() {
342                Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
343                Err(e) => Err(format!("Failed to load digits: {e}")),
344            });
345        suite.add_result(digits_result);
346
347        // Benchmark wine dataset
348        let wine_params = HashMap::from([("dataset".to_string(), "wine".to_string())]);
349        let wine_result = self.run_benchmark("load_wine", wine_params, || match load_wine(false) {
350            Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
351            Err(e) => Err(format!("Failed to load wine: {e}")),
352        });
353        suite.add_result(wine_result);
354
355        // Benchmark breast cancer dataset
356        let bc_params = HashMap::from([("dataset".to_string(), "breast_cancer".to_string())]);
357        let bc_result =
358            self.run_benchmark(
359                "load_breast_cancer",
360                bc_params,
361                || match load_breast_cancer() {
362                    Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
363                    Err(e) => Err(format!("Failed to load breastcancer: {e}")),
364                },
365            );
366        suite.add_result(bc_result);
367
368        // Benchmark diabetes dataset
369        let diabetes_params = HashMap::from([("dataset".to_string(), "diabetes".to_string())]);
370        let diabetes_result =
371            self.run_benchmark("load_diabetes", diabetes_params, || match load_diabetes() {
372                Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
373                Err(e) => Err(format!("Failed to load diabetes: {e}")),
374            });
375        suite.add_result(diabetes_result);
376
377        suite
378    }
379
380    /// Benchmark synthetic data generation
381    pub fn benchmark_data_generation(&self) -> BenchmarkSuite {
382        let mut suite = BenchmarkSuite::new("Data Generation".to_string());
383
384        // Test different dataset sizes
385        let sizes = vec![100, 1000, 10000];
386        let features = vec![5, 20, 100];
387
388        for &n_samples in &sizes {
389            for &n_features in &features {
390                // Classification benchmark
391                let class_params = HashMap::from([
392                    ("type".to_string(), "classification".to_string()),
393                    ("samples".to_string(), n_samples.to_string()),
394                    ("features".to_string(), n_features.to_string()),
395                ]);
396                let class_result = self.run_benchmark(
397                    &format!("make_classification_{n_samples}x{n_features}"),
398                    class_params,
399                    || match make_classification(n_samples, n_features, 3, 2, 4, Some(42)) {
400                        Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
401                        Err(e) => Err(format!("Failed to generate classification data: {e}")),
402                    },
403                );
404                suite.add_result(class_result);
405
406                // Regression benchmark
407                let reg_params = HashMap::from([
408                    ("type".to_string(), "regression".to_string()),
409                    ("samples".to_string(), n_samples.to_string()),
410                    ("features".to_string(), n_features.to_string()),
411                ]);
412                let reg_result = self.run_benchmark(
413                    &format!("make_regression_{n_samples}x{n_features}"),
414                    reg_params,
415                    || match make_regression(n_samples, n_features, 3, 0.1, Some(42)) {
416                        Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
417                        Err(e) => Err(format!("Failed to generate regression data: {e}")),
418                    },
419                );
420                suite.add_result(reg_result);
421
422                // Clustering benchmark (only for 2D for now)
423                if n_features <= 10 {
424                    let blob_params = HashMap::from([
425                        ("type".to_string(), "blobs".to_string()),
426                        ("samples".to_string(), n_samples.to_string()),
427                        ("features".to_string(), n_features.to_string()),
428                    ]);
429                    let blob_result = self.run_benchmark(
430                        &format!("make_blobs_{n_samples}x{n_features}"),
431                        blob_params,
432                        || match make_blobs(n_samples, n_features, 4, 1.0, Some(42)) {
433                            Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
434                            Err(e) => Err(format!("Failed to generate blob data: {e}")),
435                        },
436                    );
437                    suite.add_result(blob_result);
438                }
439            }
440        }
441
442        suite
443    }
444
445    /// Benchmark CSV loading performance
446    pub fn benchmark_csv_loading<P: AsRef<Path>>(&self, csvpath: P) -> BenchmarkSuite {
447        let mut suite = BenchmarkSuite::new("CSV Loading".to_string());
448        let path = csvpath.as_ref();
449
450        if !path.exists() {
451            let mut result = BenchmarkResult::new("csv_loading".to_string(), HashMap::new());
452            result = result.failure("CSV file not found".to_string());
453            suite.add_result(result);
454            return suite;
455        }
456
457        // Standard CSV loading
458        let std_params = HashMap::from([
459            ("method".to_string(), "standard".to_string()),
460            ("file".to_string(), path.to_string_lossy().to_string()),
461        ]);
462        let std_result = self.run_benchmark("csv_standard", std_params, || {
463            let config = CsvConfig::default().with_header(true);
464            match load_csv(path, config) {
465                Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
466                Err(e) => Err(format!("Failed to load CSV: {e}")),
467            }
468        });
469        suite.add_result(std_result);
470
471        // Parallel CSV loading
472        let par_params = HashMap::from([
473            ("method".to_string(), "parallel".to_string()),
474            ("file".to_string(), path.to_string_lossy().to_string()),
475        ]);
476        let par_result = self.run_benchmark("csv_parallel", par_params, || {
477            let csv_config = CsvConfig::default().with_header(true);
478            let streaming_config = StreamingConfig::default()
479                .with_parallel(true)
480                .with_chunk_size(1000);
481            match load_csv_parallel(path, csv_config, streaming_config) {
482                Ok(dataset) => Ok((dataset.n_samples(), dataset.n_features())),
483                Err(e) => Err(format!("Failed to load CSV in parallel: {e}")),
484            }
485        });
486        suite.add_result(par_result);
487
488        suite
489    }
490
491    /// Run comprehensive benchmarks comparing SciRS2 performance
492    pub fn run_comprehensive_benchmarks(&self) -> Vec<BenchmarkSuite> {
493        println!("Running comprehensive SciRS2 performance benchmarks...\n");
494
495        let mut suites = Vec::new();
496
497        // Toy datasets benchmark
498        println!("Benchmarking toy datasets...");
499        let toy_suite = self.benchmark_toy_datasets();
500        toy_suite.print_summary();
501        suites.push(toy_suite);
502        println!();
503
504        // Data generation benchmark
505        println!("Benchmarking data generation...");
506        let gen_suite = self.benchmark_data_generation();
507        gen_suite.print_summary();
508        suites.push(gen_suite);
509        println!();
510
511        suites
512    }
513}
514
515/// Performance comparison utilities
516pub struct PerformanceComparison {
517    /// Reference (baseline) results
518    pub baseline: BenchmarkSuite,
519    /// Current implementation results
520    pub current: BenchmarkSuite,
521}
522
523impl PerformanceComparison {
524    /// Create a new performance comparison
525    pub fn new(baseline: BenchmarkSuite, current: BenchmarkSuite) -> Self {
526        Self { baseline, current }
527    }
528
529    /// Calculate speedup ratio for matching operations
530    pub fn calculate_speedups(&self) -> HashMap<String, f64> {
531        let mut speedups = HashMap::new();
532
533        for current_result in &self.current.results {
534            if let Some(baseline_result) = self
535                .baseline
536                .results
537                .iter()
538                .find(|r| r.operation == current_result.operation)
539            {
540                if baseline_result.success && current_result.success {
541                    let speedup = baseline_result.duration.as_secs_f64()
542                        / current_result.duration.as_secs_f64();
543                    speedups.insert(current_result.operation.clone(), speedup);
544                }
545            }
546        }
547
548        speedups
549    }
550
551    /// Print comparison report
552    pub fn print_comparison(&self) {
553        println!("=== Performance Comparison ===");
554        println!("Baseline: {}", self.baseline.name);
555        println!("Current:  {}", self.current.name);
556        println!();
557
558        let speedups = self.calculate_speedups();
559
560        if speedups.is_empty() {
561            println!("No matching operations found for comparison.");
562            return;
563        }
564
565        let mut improvements = 0;
566        let mut regressions = 0;
567        let mut total_speedup = 0.0;
568
569        println!("Speedup Analysis:");
570        for (operation, speedup) in &speedups {
571            let status = if *speedup > 1.1 {
572                improvements += 1;
573                "🚀 FASTER"
574            } else if *speedup < 0.9 {
575                regressions += 1;
576                "🐌 SLOWER"
577            } else {
578                "≈ SAME"
579            };
580
581            println!("  {operation}: {speedup:.2}x {status}");
582            total_speedup += speedup;
583        }
584
585        let avg_speedup = total_speedup / speedups.len() as f64;
586
587        println!();
588        println!("Summary:");
589        println!("  Improvements: {improvements}");
590        println!("  Regressions:  {regressions}");
591        println!(
592            "  Unchanged:    {}",
593            speedups.len() - improvements - regressions
594        );
595        println!("  Average speedup: {avg_speedup:.2}x");
596
597        if avg_speedup > 1.1 {
598            println!("  Overall assessment: 🎉 SIGNIFICANT IMPROVEMENT");
599        } else if avg_speedup > 1.0 {
600            println!("  Overall assessment: ✅ MINOR IMPROVEMENT");
601        } else if avg_speedup > 0.9 {
602            println!("  Overall assessment: ≈ COMPARABLE PERFORMANCE");
603        } else {
604            println!("  Overall assessment: ⚠ïļ PERFORMANCE REGRESSION");
605        }
606    }
607}
608
609#[cfg(test)]
610mod tests {
611    use super::*;
612
613    #[test]
614    #[ignore = "timeout"]
615    fn test_benchmark_result() {
616        let params = HashMap::from([("test".to_string(), "value".to_string())]);
617        let result = BenchmarkResult::new("test_op".to_string(), params).success(
618            Duration::from_millis(100),
619            1000,
620            10,
621        );
622
623        assert!(result.success);
624        assert_eq!(result.samples, 1000);
625        assert_eq!(result.features, 10);
626        assert!(result.throughput > 0.0);
627    }
628
629    #[test]
630    #[ignore = "timeout"]
631    fn test_benchmark_suite() {
632        let mut suite = BenchmarkSuite::new("test_suite".to_string());
633
634        let result1 = BenchmarkResult::new("op1".to_string(), HashMap::new()).success(
635            Duration::from_millis(50),
636            500,
637            5,
638        );
639        let result2 = BenchmarkResult::new("op2".to_string(), HashMap::new())
640            .failure("test error".to_string());
641
642        suite.add_result(result1);
643        suite.add_result(result2);
644
645        assert_eq!(suite.results.len(), 2);
646        assert_eq!(suite.successful_results().len(), 1);
647        assert_eq!(suite.failed_results().len(), 1);
648        assert_eq!(suite.total_samples(), 500);
649    }
650
651    #[test]
652    #[ignore = "timeout"]
653    fn test_benchmark_runner() {
654        let runner = BenchmarkRunner::new().with_iterations(3).with_warmup(1);
655
656        let params = HashMap::new();
657        let result = runner.run_benchmark("test", params, || {
658            std::thread::sleep(Duration::from_millis(1));
659            Ok((100, 10))
660        });
661
662        assert!(result.success);
663        assert_eq!(result.samples, 100);
664        assert_eq!(result.features, 10);
665    }
666
667    #[test]
668    #[ignore = "timeout"]
669    fn test_toy_datasets_benchmark() {
670        let runner = BenchmarkRunner::new().with_iterations(1);
671        let suite = runner.benchmark_toy_datasets();
672
673        assert!(!suite.results.is_empty());
674        assert!(!suite.successful_results().is_empty());
675    }
676
677    #[test]
678    #[ignore = "timeout"]
679    fn test_data_generation_benchmark() {
680        let runner = BenchmarkRunner::new().with_iterations(1);
681        let suite = runner.benchmark_data_generation();
682
683        assert!(!suite.results.is_empty());
684        // Allow some failures due to parameter combinations
685        assert!(!suite.successful_results().is_empty());
686    }
687}