1use scirs2_core::ndarray::{Array1, Array2, Axis};
7use scirs2_core::random::essentials::Uniform as RandUniform;
8use scirs2_core::random::rngs::StdRng as RealStdRng;
9use scirs2_core::random::RngExt;
10use scirs2_core::random::SeedableRng;
11use sklears_core::prelude::Result;
12use std::collections::HashMap;
13use std::time::{Duration, Instant};
14
15#[derive(Debug, Clone)]
17pub struct KernelApproximationBenchmark {
19 config: BenchmarkConfig,
20 datasets: Vec<BenchmarkDataset>,
21 results: Vec<BenchmarkResult>,
22}
23
24#[derive(Debug, Clone)]
26pub struct BenchmarkConfig {
28 pub repetitions: usize,
30 pub quality_metrics: Vec<QualityMetric>,
32 pub performance_metrics: Vec<PerformanceMetric>,
34 pub approximation_dimensions: Vec<usize>,
36 pub random_state: Option<u64>,
38 pub warmup_iterations: usize,
40 pub timeout_seconds: Option<u64>,
42}
43
44impl Default for BenchmarkConfig {
45 fn default() -> Self {
46 Self {
47 repetitions: 5,
48 quality_metrics: vec![
49 QualityMetric::KernelAlignment,
50 QualityMetric::FrobeniusError,
51 QualityMetric::SpectralError,
52 QualityMetric::RelativeError,
53 ],
54 performance_metrics: vec![
55 PerformanceMetric::FitTime,
56 PerformanceMetric::TransformTime,
57 PerformanceMetric::MemoryUsage,
58 PerformanceMetric::Throughput,
59 ],
60 approximation_dimensions: vec![50, 100, 200, 500, 1000],
61 random_state: Some(42),
62 warmup_iterations: 3,
63 timeout_seconds: Some(300),
64 }
65 }
66}
67
68#[derive(Debug, Clone, PartialEq, Eq, Hash)]
70pub enum QualityMetric {
72 KernelAlignment,
74 FrobeniusError,
76 SpectralError,
78 RelativeError,
80 NuclearError,
82 EffectiveRank,
84 EigenvalueError,
86}
87
88#[derive(Debug, Clone, PartialEq, Eq, Hash)]
90pub enum PerformanceMetric {
92 FitTime,
94 TransformTime,
96 MemoryUsage,
98 Throughput,
100 TotalTime,
102 MemoryEfficiency,
104}
105
106#[derive(Debug, Clone)]
108pub struct BenchmarkDataset {
110 pub name: String,
112 pub data: Array2<f64>,
114 pub target: Option<Array1<f64>>,
116 pub true_kernel: Option<Array2<f64>>,
118 pub description: String,
120}
121
122impl BenchmarkDataset {
123 pub fn gaussian(n_samples: usize, n_features: usize, noise: f64, seed: u64) -> Self {
125 let mut rng = RealStdRng::seed_from_u64(seed);
126
127 let normal_sample = |rng: &mut RealStdRng, mean: f64, std_dev: f64| -> f64 {
129 let u1: f64 = rng.random();
130 let u2: f64 = rng.random();
131 let z = (-2.0_f64 * u1.ln()).sqrt() * (2.0_f64 * std::f64::consts::PI * u2).cos();
132 mean + z * std_dev
133 };
134
135 let mut data = Array2::zeros((n_samples, n_features));
136 for i in 0..n_samples {
137 for j in 0..n_features {
138 data[[i, j]] =
139 normal_sample(&mut rng, 0.0, 1.0) + normal_sample(&mut rng, 0.0, noise);
140 }
141 }
142
143 let mut target = Array1::zeros(n_samples);
145 let weights = Array1::from_shape_fn(n_features, |_| normal_sample(&mut rng, 0.0, 1.0));
146
147 for (i, sample) in data.axis_iter(Axis(0)).enumerate() {
148 target[i] = sample.dot(&weights) + normal_sample(&mut rng, 0.0, noise);
149 }
150
151 Self {
152 name: format!("Gaussian_{}x{}_noise{}", n_samples, n_features, noise),
153 data,
154 target: Some(target),
155 true_kernel: None,
156 description: format!(
157 "Gaussian dataset with {} samples, {} features, and noise level {}",
158 n_samples, n_features, noise
159 ),
160 }
161 }
162
163 pub fn polynomial(n_samples: usize, n_features: usize, degree: usize, seed: u64) -> Self {
165 let mut rng = RealStdRng::seed_from_u64(seed);
166 let uniform = RandUniform::new(-1.0, 1.0).expect("operation should succeed");
167
168 let mut data = Array2::zeros((n_samples, n_features));
169 for i in 0..n_samples {
170 for j in 0..n_features {
171 data[[i, j]] = rng.sample(uniform);
172 }
173 }
174
175 let mut target = Array1::zeros(n_samples);
177 for (i, sample) in data.axis_iter(Axis(0)).enumerate() {
178 let mut poly_value = 0.0;
179 for &x in sample.iter() {
180 let x: f64 = x;
181 poly_value += x.powi(degree as i32);
182 }
183 target[i] = poly_value / n_features as f64;
184 }
185
186 Self {
187 name: format!("Polynomial_{}x{}_deg{}", n_samples, n_features, degree),
188 data,
189 target: Some(target),
190 true_kernel: None,
191 description: format!(
192 "Polynomial dataset with {} samples, {} features, and degree {}",
193 n_samples, n_features, degree
194 ),
195 }
196 }
197
198 pub fn classification_clusters(
200 n_samples: usize,
201 n_features: usize,
202 n_clusters: usize,
203 seed: u64,
204 ) -> Self {
205 let mut rng = RealStdRng::seed_from_u64(seed);
206
207 let normal_sample = |rng: &mut RealStdRng, mean: f64, std_dev: f64| -> f64 {
209 let u1: f64 = rng.random();
210 let u2: f64 = rng.random();
211 let z = (-2.0_f64 * u1.ln()).sqrt() * (2.0_f64 * std::f64::consts::PI * u2).cos();
212 mean + z * std_dev
213 };
214
215 let samples_per_cluster = n_samples / n_clusters;
216 let mut data = Array2::zeros((n_samples, n_features));
217 let mut target = Array1::zeros(n_samples);
218
219 let cluster_separation = 3.0;
221 let mut centers = Array2::zeros((n_clusters, n_features));
222 for i in 0..n_clusters {
223 for j in 0..n_features {
224 centers[[i, j]] =
225 (i as f64 * cluster_separation) * if j % 2 == 0 { 1.0 } else { -1.0 };
226 }
227 }
228
229 let mut sample_idx = 0;
231 for cluster in 0..n_clusters {
232 for _ in 0..samples_per_cluster {
233 if sample_idx >= n_samples {
234 break;
235 }
236
237 for j in 0..n_features {
238 data[[sample_idx, j]] =
239 centers[[cluster, j]] + normal_sample(&mut rng, 0.0, 0.5);
240 }
241 target[sample_idx] = cluster as f64;
242 sample_idx += 1;
243 }
244 }
245
246 Self {
247 name: format!("Clusters_{}x{}_k{}", n_samples, n_features, n_clusters),
248 data,
249 target: Some(target),
250 true_kernel: None,
251 description: format!(
252 "Classification dataset with {} samples, {} features, and {} clusters",
253 n_samples, n_features, n_clusters
254 ),
255 }
256 }
257}
258
259#[derive(Debug, Clone)]
261pub struct BenchmarkResult {
263 pub method_name: String,
265 pub dataset_name: String,
267 pub approximation_dimension: usize,
269 pub quality_scores: HashMap<QualityMetric, f64>,
271 pub performance_scores: HashMap<PerformanceMetric, Duration>,
273 pub memory_usage_mb: f64,
275 pub success: bool,
277 pub error_message: Option<String>,
279 pub repetition: usize,
281}
282
283#[derive(Debug, Clone)]
285pub struct BenchmarkSummary {
287 pub method_name: String,
289 pub dataset_name: String,
291 pub approximation_dimension: usize,
293 pub quality_means: HashMap<QualityMetric, f64>,
295 pub quality_stds: HashMap<QualityMetric, f64>,
297 pub performance_means: HashMap<PerformanceMetric, Duration>,
299 pub performance_stds: HashMap<PerformanceMetric, Duration>,
301 pub success_rate: f64,
303 pub memory_usage_mean_mb: f64,
305 pub memory_usage_std_mb: f64,
307}
308
309pub trait BenchmarkableKernelMethod {
311 fn method_name(&self) -> String;
313
314 fn benchmark_fit(
316 &self,
317 data: &Array2<f64>,
318 target: Option<&Array1<f64>>,
319 ) -> Result<(Box<dyn BenchmarkableTransformer>, Duration)>;
320
321 fn clone_method(&self) -> Box<dyn BenchmarkableKernelMethod>;
323}
324
325pub trait BenchmarkableTransformer {
327 fn benchmark_transform(&self, data: &Array2<f64>) -> Result<(Array2<f64>, Duration)>;
329
330 fn approximation_dimension(&self) -> usize;
332
333 fn memory_usage_mb(&self) -> f64;
335}
336
337impl KernelApproximationBenchmark {
338 pub fn new(config: BenchmarkConfig) -> Self {
340 Self {
341 config,
342 datasets: Vec::new(),
343 results: Vec::new(),
344 }
345 }
346
347 pub fn add_dataset(&mut self, dataset: BenchmarkDataset) {
349 self.datasets.push(dataset);
350 }
351
352 pub fn add_synthetic_datasets(&mut self, seed: u64) {
354 self.add_dataset(BenchmarkDataset::gaussian(100, 10, 0.1, seed));
356 self.add_dataset(BenchmarkDataset::polynomial(100, 5, 2, seed + 1));
357 self.add_dataset(BenchmarkDataset::classification_clusters(
358 120,
359 8,
360 3,
361 seed + 2,
362 ));
363
364 self.add_dataset(BenchmarkDataset::gaussian(500, 20, 0.2, seed + 3));
366 self.add_dataset(BenchmarkDataset::polynomial(500, 15, 3, seed + 4));
367 self.add_dataset(BenchmarkDataset::classification_clusters(
368 600,
369 25,
370 4,
371 seed + 5,
372 ));
373
374 self.add_dataset(BenchmarkDataset::gaussian(1000, 50, 0.1, seed + 6));
376 self.add_dataset(BenchmarkDataset::polynomial(1000, 30, 2, seed + 7));
377 self.add_dataset(BenchmarkDataset::classification_clusters(
378 1200,
379 40,
380 5,
381 seed + 8,
382 ));
383 }
384
385 pub fn benchmark_method(
387 &mut self,
388 method: &dyn BenchmarkableKernelMethod,
389 ) -> Result<Vec<BenchmarkResult>> {
390 let mut method_results = Vec::new();
391 let _method_name = method.method_name();
392
393 for dataset in &self.datasets {
394 for &n_components in &self.config.approximation_dimensions {
395 for repetition in 0..self.config.repetitions {
396 let result =
397 self.run_single_benchmark(method, dataset, n_components, repetition)?;
398
399 method_results.push(result.clone());
400 self.results.push(result);
401 }
402 }
403 }
404
405 Ok(method_results)
406 }
407
408 fn run_single_benchmark(
409 &self,
410 method: &dyn BenchmarkableKernelMethod,
411 dataset: &BenchmarkDataset,
412 n_components: usize,
413 repetition: usize,
414 ) -> Result<BenchmarkResult> {
415 let start_time = Instant::now();
416
417 if let Some(timeout) = self.config.timeout_seconds {
419 if start_time.elapsed().as_secs() > timeout {
420 return Ok(BenchmarkResult {
421 method_name: method.method_name(),
422 dataset_name: dataset.name.clone(),
423 approximation_dimension: n_components,
424 quality_scores: HashMap::new(),
425 performance_scores: HashMap::new(),
426 memory_usage_mb: 0.0,
427 success: false,
428 error_message: Some("Timeout exceeded".to_string()),
429 repetition,
430 });
431 }
432 }
433
434 for _ in 0..self.config.warmup_iterations {
436 let _ = method.benchmark_fit(&dataset.data, dataset.target.as_ref());
437 }
438
439 let _fit_start = Instant::now();
441 let (fitted_method, fit_time) =
442 match method.benchmark_fit(&dataset.data, dataset.target.as_ref()) {
443 Ok(result) => result,
444 Err(e) => {
445 return Ok(BenchmarkResult {
446 method_name: method.method_name(),
447 dataset_name: dataset.name.clone(),
448 approximation_dimension: n_components,
449 quality_scores: HashMap::new(),
450 performance_scores: HashMap::new(),
451 memory_usage_mb: 0.0,
452 success: false,
453 error_message: Some(format!("Fit error: {}", e)),
454 repetition,
455 });
456 }
457 };
458
459 let (transformed_data, transform_time) =
460 match fitted_method.benchmark_transform(&dataset.data) {
461 Ok(result) => result,
462 Err(e) => {
463 return Ok(BenchmarkResult {
464 method_name: method.method_name(),
465 dataset_name: dataset.name.clone(),
466 approximation_dimension: n_components,
467 quality_scores: HashMap::new(),
468 performance_scores: HashMap::new(),
469 memory_usage_mb: 0.0,
470 success: false,
471 error_message: Some(format!("Transform error: {}", e)),
472 repetition,
473 });
474 }
475 };
476
477 let memory_usage = fitted_method.memory_usage_mb();
478 let total_time = fit_time + transform_time;
479 let transform_secs = transform_time.as_secs_f64();
480 let throughput_time = if transform_secs > 0.0 {
481 Duration::from_secs_f64(dataset.data.nrows() as f64 / transform_secs)
482 } else {
483 Duration::from_secs(0)
484 };
485
486 let mut quality_scores = HashMap::new();
488
489 if let Some(ref true_kernel) = dataset.true_kernel {
490 let approx_kernel = self.compute_approximate_kernel(&transformed_data)?;
492
493 for metric in &self.config.quality_metrics {
494 let score = self.compute_quality_metric(metric, true_kernel, &approx_kernel)?;
495 quality_scores.insert(metric.clone(), score);
496 }
497 } else {
498 for metric in &self.config.quality_metrics {
500 let score = self.compute_approximate_quality_metric(
501 metric,
502 &dataset.data,
503 &transformed_data,
504 )?;
505 quality_scores.insert(metric.clone(), score);
506 }
507 }
508
509 let mut performance_scores = HashMap::new();
511 performance_scores.insert(PerformanceMetric::FitTime, fit_time);
512 performance_scores.insert(PerformanceMetric::TransformTime, transform_time);
513 performance_scores.insert(PerformanceMetric::TotalTime, total_time);
514 performance_scores.insert(PerformanceMetric::Throughput, throughput_time);
515 performance_scores.insert(
516 PerformanceMetric::MemoryUsage,
517 Duration::from_secs_f64(memory_usage),
518 );
519 performance_scores.insert(
520 PerformanceMetric::MemoryEfficiency,
521 Duration::from_secs_f64(
522 quality_scores
523 .get(&QualityMetric::KernelAlignment)
524 .unwrap_or(&0.0)
525 / memory_usage.max(1.0),
526 ),
527 );
528
529 Ok(BenchmarkResult {
530 method_name: method.method_name(),
531 dataset_name: dataset.name.clone(),
532 approximation_dimension: n_components,
533 quality_scores,
534 performance_scores,
535 memory_usage_mb: memory_usage,
536 success: true,
537 error_message: None,
538 repetition,
539 })
540 }
541
542 fn compute_approximate_kernel(&self, features: &Array2<f64>) -> Result<Array2<f64>> {
543 let n_samples = features.nrows();
544 let mut kernel = Array2::zeros((n_samples, n_samples));
545
546 for i in 0..n_samples {
547 for j in i..n_samples {
548 let similarity = features.row(i).dot(&features.row(j));
549 kernel[[i, j]] = similarity;
550 kernel[[j, i]] = similarity;
551 }
552 }
553
554 Ok(kernel)
555 }
556
557 fn compute_quality_metric(
558 &self,
559 metric: &QualityMetric,
560 true_kernel: &Array2<f64>,
561 approx_kernel: &Array2<f64>,
562 ) -> Result<f64> {
563 match metric {
564 QualityMetric::KernelAlignment => {
565 Ok(self.compute_kernel_alignment(true_kernel, approx_kernel)?)
566 }
567 QualityMetric::FrobeniusError => {
568 let diff = true_kernel - approx_kernel;
569 let frobenius_norm = diff.mapv(|x| x * x).sum().sqrt();
570 Ok(frobenius_norm)
571 }
572 QualityMetric::SpectralError => {
573 let diff = true_kernel - approx_kernel;
575 let max_abs = diff
576 .mapv(|x: f64| x.abs())
577 .fold(0.0_f64, |acc, &x| acc.max(x));
578 Ok(max_abs)
579 }
580 QualityMetric::RelativeError => {
581 let diff = true_kernel - approx_kernel;
582 let frobenius_diff = diff.mapv(|x| x * x).sum().sqrt();
583 let frobenius_true = true_kernel.mapv(|x| x * x).sum().sqrt();
584 Ok(frobenius_diff / frobenius_true.max(1e-8))
585 }
586 QualityMetric::NuclearError => {
587 let diff = true_kernel - approx_kernel;
589 let nuclear_norm = diff.mapv(|x| x.abs()).sum();
590 Ok(nuclear_norm)
591 }
592 QualityMetric::EffectiveRank => {
593 let eigenvalues = self.compute_approximate_eigenvalues(approx_kernel)?;
595 let sum_eigenvalues = eigenvalues.sum();
596 let sum_squared_eigenvalues = eigenvalues.mapv(|x| x * x).sum();
597 Ok(sum_eigenvalues * sum_eigenvalues / sum_squared_eigenvalues.max(1e-8))
598 }
599 QualityMetric::EigenvalueError => {
600 let true_eigenvalues = self.compute_approximate_eigenvalues(true_kernel)?;
601 let approx_eigenvalues = self.compute_approximate_eigenvalues(approx_kernel)?;
602
603 let min_len = true_eigenvalues.len().min(approx_eigenvalues.len());
604 let mut error = 0.0;
605 for i in 0..min_len {
606 error += (true_eigenvalues[i] - approx_eigenvalues[i]).abs();
607 }
608 Ok(error / min_len as f64)
609 }
610 }
611 }
612
613 fn compute_approximate_quality_metric(
614 &self,
615 metric: &QualityMetric,
616 original_data: &Array2<f64>,
617 features: &Array2<f64>,
618 ) -> Result<f64> {
619 match metric {
620 QualityMetric::KernelAlignment => {
621 let rbf_kernel = self.compute_rbf_kernel(original_data, 1.0)?;
623 let approx_kernel = self.compute_approximate_kernel(features)?;
624 Ok(self.compute_kernel_alignment(&rbf_kernel, &approx_kernel)?)
625 }
626 QualityMetric::EffectiveRank => {
627 let approx_kernel = self.compute_approximate_kernel(features)?;
628 let eigenvalues = self.compute_approximate_eigenvalues(&approx_kernel)?;
629 let sum_eigenvalues = eigenvalues.sum();
630 let sum_squared_eigenvalues = eigenvalues.mapv(|x| x * x).sum();
631 Ok(sum_eigenvalues * sum_eigenvalues / sum_squared_eigenvalues.max(1e-8))
632 }
633 _ => {
634 Ok(features.ncols() as f64)
636 }
637 }
638 }
639
640 fn compute_kernel_alignment(&self, k1: &Array2<f64>, k2: &Array2<f64>) -> Result<f64> {
641 let n = k1.nrows();
642
643 let row_means_k1 = k1.mean_axis(Axis(1)).expect("operation should succeed");
645 let row_means_k2 = k2.mean_axis(Axis(1)).expect("operation should succeed");
646 let total_mean_k1 = row_means_k1.mean().expect("operation should succeed");
647 let total_mean_k2 = row_means_k2.mean().expect("operation should succeed");
648
649 let mut k1_centered = k1.clone();
650 let mut k2_centered = k2.clone();
651
652 for i in 0..n {
653 for j in 0..n {
654 k1_centered[[i, j]] -= row_means_k1[i] + row_means_k1[j] - total_mean_k1;
655 k2_centered[[i, j]] -= row_means_k2[i] + row_means_k2[j] - total_mean_k2;
656 }
657 }
658
659 let numerator = (&k1_centered * &k2_centered).sum();
661 let denom1 = (&k1_centered * &k1_centered).sum().sqrt();
662 let denom2 = (&k2_centered * &k2_centered).sum().sqrt();
663
664 Ok(numerator / (denom1 * denom2).max(1e-8))
665 }
666
667 fn compute_rbf_kernel(&self, data: &Array2<f64>, gamma: f64) -> Result<Array2<f64>> {
668 let n_samples = data.nrows();
669 let mut kernel = Array2::zeros((n_samples, n_samples));
670
671 for i in 0..n_samples {
672 for j in i..n_samples {
673 let diff = &data.row(i) - &data.row(j);
674 let dist_sq = diff.mapv(|x| x * x).sum();
675 let similarity = (-gamma * dist_sq).exp();
676 kernel[[i, j]] = similarity;
677 kernel[[j, i]] = similarity;
678 }
679 }
680
681 Ok(kernel)
682 }
683
684 fn compute_approximate_eigenvalues(&self, matrix: &Array2<f64>) -> Result<Array1<f64>> {
685 let n = matrix.nrows();
687 let mut eigenvalues = Array1::zeros(n);
688
689 for i in 0..n {
690 eigenvalues[i] = matrix[[i, i]];
691 }
692
693 let mut eigenvalues_vec: Vec<f64> = eigenvalues.to_vec();
695 eigenvalues_vec.sort_by(|a, b| b.partial_cmp(a).expect("operation should succeed"));
696
697 Ok(Array1::from_vec(eigenvalues_vec))
698 }
699
700 pub fn summarize_results(&self) -> Vec<BenchmarkSummary> {
702 let mut summaries = Vec::new();
703 let mut grouped_results: HashMap<(String, String, usize), Vec<&BenchmarkResult>> =
704 HashMap::new();
705
706 for result in &self.results {
708 let key = (
709 result.method_name.clone(),
710 result.dataset_name.clone(),
711 result.approximation_dimension,
712 );
713 grouped_results.entry(key).or_default().push(result);
714 }
715
716 for ((method_name, dataset_name, approximation_dimension), results) in grouped_results {
718 let total_results = results.len();
719 let successful_results: Vec<_> = results.into_iter().filter(|r| r.success).collect();
720
721 if successful_results.is_empty() {
722 continue;
723 }
724
725 let mut quality_means = HashMap::new();
726 let mut quality_stds = HashMap::new();
727 let mut performance_means = HashMap::new();
728 let mut performance_stds = HashMap::new();
729
730 for metric in &self.config.quality_metrics {
732 let values: Vec<f64> = successful_results
733 .iter()
734 .filter_map(|r| r.quality_scores.get(metric))
735 .cloned()
736 .collect();
737
738 if !values.is_empty() {
739 let mean = values.iter().sum::<f64>() / values.len() as f64;
740 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
741 / values.len() as f64;
742 let std = variance.sqrt();
743
744 quality_means.insert(metric.clone(), mean);
745 quality_stds.insert(metric.clone(), std);
746 }
747 }
748
749 for metric in &self.config.performance_metrics {
751 let values: Vec<Duration> = successful_results
752 .iter()
753 .filter_map(|r| r.performance_scores.get(metric))
754 .cloned()
755 .collect();
756
757 if !values.is_empty() {
758 let mean_secs: f64 =
759 values.iter().map(|d| d.as_secs_f64()).sum::<f64>() / values.len() as f64;
760 let variance: f64 = values
761 .iter()
762 .map(|d| (d.as_secs_f64() - mean_secs).powi(2))
763 .sum::<f64>()
764 / values.len() as f64;
765 let std_secs = variance.sqrt();
766
767 performance_means.insert(metric.clone(), Duration::from_secs_f64(mean_secs));
768 performance_stds.insert(metric.clone(), Duration::from_secs_f64(std_secs));
769 }
770 }
771
772 let memory_values: Vec<f64> = successful_results
774 .iter()
775 .map(|r| r.memory_usage_mb)
776 .collect();
777
778 let memory_mean = memory_values.iter().sum::<f64>() / memory_values.len() as f64;
779 let memory_variance = memory_values
780 .iter()
781 .map(|x| (x - memory_mean).powi(2))
782 .sum::<f64>()
783 / memory_values.len() as f64;
784 let memory_std = memory_variance.sqrt();
785
786 let success_rate = successful_results.len() as f64 / total_results as f64;
787
788 summaries.push(BenchmarkSummary {
789 method_name,
790 dataset_name,
791 approximation_dimension,
792 quality_means,
793 quality_stds,
794 performance_means,
795 performance_stds,
796 success_rate,
797 memory_usage_mean_mb: memory_mean,
798 memory_usage_std_mb: memory_std,
799 });
800 }
801
802 summaries
803 }
804
805 pub fn get_results(&self) -> &[BenchmarkResult] {
807 &self.results
808 }
809
810 pub fn clear_results(&mut self) {
812 self.results.clear();
813 }
814
815 pub fn export_results_csv(&self) -> String {
817 let mut csv = String::new();
818
819 csv.push_str("method,dataset,n_components,repetition,success,");
821 csv.push_str("kernel_alignment,frobenius_error,spectral_error,relative_error,");
822 csv.push_str("fit_time_ms,transform_time_ms,total_time_ms,memory_mb\n");
823
824 for result in &self.results {
826 csv.push_str(&format!(
827 "{},{},{},{},{},",
828 result.method_name,
829 result.dataset_name,
830 result.approximation_dimension,
831 result.repetition,
832 result.success
833 ));
834
835 let kernel_alignment = result
837 .quality_scores
838 .get(&QualityMetric::KernelAlignment)
839 .map(|x| x.to_string())
840 .unwrap_or_else(|| "".to_string());
841 let frobenius_error = result
842 .quality_scores
843 .get(&QualityMetric::FrobeniusError)
844 .map(|x| x.to_string())
845 .unwrap_or_else(|| "".to_string());
846 let spectral_error = result
847 .quality_scores
848 .get(&QualityMetric::SpectralError)
849 .map(|x| x.to_string())
850 .unwrap_or_else(|| "".to_string());
851 let relative_error = result
852 .quality_scores
853 .get(&QualityMetric::RelativeError)
854 .map(|x| x.to_string())
855 .unwrap_or_else(|| "".to_string());
856
857 csv.push_str(&format!(
858 "{},{},{},{},",
859 kernel_alignment, frobenius_error, spectral_error, relative_error
860 ));
861
862 let fit_time = result
864 .performance_scores
865 .get(&PerformanceMetric::FitTime)
866 .map(|d| d.as_millis().to_string())
867 .unwrap_or_else(|| "".to_string());
868 let transform_time = result
869 .performance_scores
870 .get(&PerformanceMetric::TransformTime)
871 .map(|d| d.as_millis().to_string())
872 .unwrap_or_else(|| "".to_string());
873 let total_time = result
874 .performance_scores
875 .get(&PerformanceMetric::TotalTime)
876 .map(|d| d.as_millis().to_string())
877 .unwrap_or_else(|| "".to_string());
878
879 csv.push_str(&format!(
880 "{},{},{},{}\n",
881 fit_time, transform_time, total_time, result.memory_usage_mb
882 ));
883 }
884
885 csv
886 }
887}
888
889#[allow(non_snake_case)]
890#[cfg(test)]
891mod tests {
892 use super::*;
893 struct MockRBFMethod {
895 n_components: usize,
896 }
897
898 impl BenchmarkableKernelMethod for MockRBFMethod {
899 fn method_name(&self) -> String {
900 "MockRBF".to_string()
901 }
902
903 fn benchmark_fit(
904 &self,
905 _data: &Array2<f64>,
906 _target: Option<&Array1<f64>>,
907 ) -> Result<(Box<dyn BenchmarkableTransformer>, Duration)> {
908 let start = Instant::now();
909
910 let duration = start.elapsed();
912 Ok((
913 Box::new(MockFittedRBF {
914 n_components: self.n_components,
915 }),
916 duration,
917 ))
918 }
919
920 fn clone_method(&self) -> Box<dyn BenchmarkableKernelMethod> {
921 Box::new(MockRBFMethod {
922 n_components: self.n_components,
923 })
924 }
925 }
926
927 struct MockFittedRBF {
928 n_components: usize,
929 }
930
931 impl BenchmarkableTransformer for MockFittedRBF {
932 fn benchmark_transform(&self, data: &Array2<f64>) -> Result<(Array2<f64>, Duration)> {
933 let start = Instant::now();
934 let result = Array2::zeros((data.nrows(), self.n_components));
936 let duration = start.elapsed();
937 Ok((result, duration))
938 }
939
940 fn approximation_dimension(&self) -> usize {
941 self.n_components
942 }
943
944 fn memory_usage_mb(&self) -> f64 {
945 (self.n_components * 10 * 8) as f64 / (1024.0 * 1024.0)
947 }
948 }
949
950 #[test]
951 fn test_benchmark_dataset_creation() {
952 let dataset = BenchmarkDataset::gaussian(100, 10, 0.1, 42);
953 assert_eq!(dataset.data.shape(), &[100, 10]);
954 assert!(dataset.target.is_some());
955 assert_eq!(
956 dataset
957 .target
958 .as_ref()
959 .expect("operation should succeed")
960 .len(),
961 100
962 );
963 }
964
965 #[test]
966 fn test_benchmark_execution() {
967 let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
968 repetitions: 2,
969 approximation_dimensions: vec![10, 20],
970 ..Default::default()
971 });
972
973 let dataset = BenchmarkDataset::gaussian(50, 5, 0.1, 42);
974 benchmark.add_dataset(dataset);
975
976 let method = MockRBFMethod { n_components: 10 };
977 let results = benchmark
978 .benchmark_method(&method)
979 .expect("operation should succeed");
980
981 assert_eq!(results.len(), 4); assert!(results.iter().all(|r| r.success));
983 }
984
985 #[test]
986 fn test_benchmark_summary() {
987 let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
988 repetitions: 3,
989 approximation_dimensions: vec![10],
990 ..Default::default()
991 });
992
993 let dataset = BenchmarkDataset::gaussian(50, 5, 0.1, 42);
994 benchmark.add_dataset(dataset);
995
996 let method = MockRBFMethod { n_components: 10 };
997 benchmark
998 .benchmark_method(&method)
999 .expect("operation should succeed");
1000
1001 let summaries = benchmark.summarize_results();
1002 assert_eq!(summaries.len(), 1);
1003 assert_eq!(summaries[0].success_rate, 1.0);
1004 }
1005
1006 #[test]
1007 fn test_csv_export() {
1008 let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
1009 repetitions: 1,
1010 approximation_dimensions: vec![5],
1011 ..Default::default()
1012 });
1013
1014 let dataset = BenchmarkDataset::gaussian(20, 3, 0.1, 42);
1015 benchmark.add_dataset(dataset);
1016
1017 let method = MockRBFMethod { n_components: 5 };
1018 benchmark
1019 .benchmark_method(&method)
1020 .expect("operation should succeed");
1021
1022 let csv = benchmark.export_results_csv();
1023 assert!(csv.contains("method,dataset"));
1024 assert!(csv.contains("MockRBF"));
1025 assert!(csv.contains("Gaussian"));
1026 }
1027}