1use scirs2_core::ndarray::{Array1, Array2, Axis};
7use scirs2_core::random::essentials::Uniform as RandUniform;
8use scirs2_core::random::rngs::StdRng as RealStdRng;
9use scirs2_core::random::Rng;
10use scirs2_core::random::SeedableRng;
11use sklears_core::prelude::Result;
12use std::collections::HashMap;
13use std::time::{Duration, Instant};
14
15#[derive(Debug, Clone)]
17pub struct KernelApproximationBenchmark {
19 config: BenchmarkConfig,
20 datasets: Vec<BenchmarkDataset>,
21 results: Vec<BenchmarkResult>,
22}
23
24#[derive(Debug, Clone)]
26pub struct BenchmarkConfig {
28 pub repetitions: usize,
30 pub quality_metrics: Vec<QualityMetric>,
32 pub performance_metrics: Vec<PerformanceMetric>,
34 pub approximation_dimensions: Vec<usize>,
36 pub random_state: Option<u64>,
38 pub warmup_iterations: usize,
40 pub timeout_seconds: Option<u64>,
42}
43
44impl Default for BenchmarkConfig {
45 fn default() -> Self {
46 Self {
47 repetitions: 5,
48 quality_metrics: vec![
49 QualityMetric::KernelAlignment,
50 QualityMetric::FrobeniusError,
51 QualityMetric::SpectralError,
52 QualityMetric::RelativeError,
53 ],
54 performance_metrics: vec![
55 PerformanceMetric::FitTime,
56 PerformanceMetric::TransformTime,
57 PerformanceMetric::MemoryUsage,
58 PerformanceMetric::Throughput,
59 ],
60 approximation_dimensions: vec![50, 100, 200, 500, 1000],
61 random_state: Some(42),
62 warmup_iterations: 3,
63 timeout_seconds: Some(300),
64 }
65 }
66}
67
68#[derive(Debug, Clone, PartialEq, Eq, Hash)]
70pub enum QualityMetric {
72 KernelAlignment,
74 FrobeniusError,
76 SpectralError,
78 RelativeError,
80 NuclearError,
82 EffectiveRank,
84 EigenvalueError,
86}
87
88#[derive(Debug, Clone, PartialEq, Eq, Hash)]
90pub enum PerformanceMetric {
92 FitTime,
94 TransformTime,
96 MemoryUsage,
98 Throughput,
100 TotalTime,
102 MemoryEfficiency,
104}
105
106#[derive(Debug, Clone)]
108pub struct BenchmarkDataset {
110 pub name: String,
112 pub data: Array2<f64>,
114 pub target: Option<Array1<f64>>,
116 pub true_kernel: Option<Array2<f64>>,
118 pub description: String,
120}
121
122impl BenchmarkDataset {
123 pub fn gaussian(n_samples: usize, n_features: usize, noise: f64, seed: u64) -> Self {
125 let mut rng = RealStdRng::seed_from_u64(seed);
126
127 let normal_sample = |rng: &mut RealStdRng, mean: f64, std_dev: f64| -> f64 {
129 let u1: f64 = rng.gen();
130 let u2: f64 = rng.gen();
131 let z = (-2.0_f64 * u1.ln()).sqrt() * (2.0_f64 * std::f64::consts::PI * u2).cos();
132 mean + z * std_dev
133 };
134
135 let mut data = Array2::zeros((n_samples, n_features));
136 for i in 0..n_samples {
137 for j in 0..n_features {
138 data[[i, j]] =
139 normal_sample(&mut rng, 0.0, 1.0) + normal_sample(&mut rng, 0.0, noise);
140 }
141 }
142
143 let mut target = Array1::zeros(n_samples);
145 let weights = Array1::from_shape_fn(n_features, |_| normal_sample(&mut rng, 0.0, 1.0));
146
147 for (i, sample) in data.axis_iter(Axis(0)).enumerate() {
148 target[i] = sample.dot(&weights) + normal_sample(&mut rng, 0.0, noise);
149 }
150
151 Self {
152 name: format!("Gaussian_{}x{}_noise{}", n_samples, n_features, noise),
153 data,
154 target: Some(target),
155 true_kernel: None,
156 description: format!(
157 "Gaussian dataset with {} samples, {} features, and noise level {}",
158 n_samples, n_features, noise
159 ),
160 }
161 }
162
163 pub fn polynomial(n_samples: usize, n_features: usize, degree: usize, seed: u64) -> Self {
165 let mut rng = RealStdRng::seed_from_u64(seed);
166 let uniform = RandUniform::new(-1.0, 1.0).unwrap();
167
168 let mut data = Array2::zeros((n_samples, n_features));
169 for i in 0..n_samples {
170 for j in 0..n_features {
171 data[[i, j]] = rng.sample(uniform);
172 }
173 }
174
175 let mut target = Array1::zeros(n_samples);
177 for (i, sample) in data.axis_iter(Axis(0)).enumerate() {
178 let mut poly_value = 0.0;
179 for &x in sample.iter() {
180 let x: f64 = x;
181 poly_value += x.powi(degree as i32);
182 }
183 target[i] = poly_value / n_features as f64;
184 }
185
186 Self {
187 name: format!("Polynomial_{}x{}_deg{}", n_samples, n_features, degree),
188 data,
189 target: Some(target),
190 true_kernel: None,
191 description: format!(
192 "Polynomial dataset with {} samples, {} features, and degree {}",
193 n_samples, n_features, degree
194 ),
195 }
196 }
197
198 pub fn classification_clusters(
200 n_samples: usize,
201 n_features: usize,
202 n_clusters: usize,
203 seed: u64,
204 ) -> Self {
205 let mut rng = RealStdRng::seed_from_u64(seed);
206
207 let normal_sample = |rng: &mut RealStdRng, mean: f64, std_dev: f64| -> f64 {
209 let u1: f64 = rng.gen();
210 let u2: f64 = rng.gen();
211 let z = (-2.0_f64 * u1.ln()).sqrt() * (2.0_f64 * std::f64::consts::PI * u2).cos();
212 mean + z * std_dev
213 };
214
215 let samples_per_cluster = n_samples / n_clusters;
216 let mut data = Array2::zeros((n_samples, n_features));
217 let mut target = Array1::zeros(n_samples);
218
219 let cluster_separation = 3.0;
221 let mut centers = Array2::zeros((n_clusters, n_features));
222 for i in 0..n_clusters {
223 for j in 0..n_features {
224 centers[[i, j]] =
225 (i as f64 * cluster_separation) * if j % 2 == 0 { 1.0 } else { -1.0 };
226 }
227 }
228
229 let mut sample_idx = 0;
231 for cluster in 0..n_clusters {
232 for _ in 0..samples_per_cluster {
233 if sample_idx >= n_samples {
234 break;
235 }
236
237 for j in 0..n_features {
238 data[[sample_idx, j]] =
239 centers[[cluster, j]] + normal_sample(&mut rng, 0.0, 0.5);
240 }
241 target[sample_idx] = cluster as f64;
242 sample_idx += 1;
243 }
244 }
245
246 Self {
247 name: format!("Clusters_{}x{}_k{}", n_samples, n_features, n_clusters),
248 data,
249 target: Some(target),
250 true_kernel: None,
251 description: format!(
252 "Classification dataset with {} samples, {} features, and {} clusters",
253 n_samples, n_features, n_clusters
254 ),
255 }
256 }
257}
258
259#[derive(Debug, Clone)]
261pub struct BenchmarkResult {
263 pub method_name: String,
265 pub dataset_name: String,
267 pub approximation_dimension: usize,
269 pub quality_scores: HashMap<QualityMetric, f64>,
271 pub performance_scores: HashMap<PerformanceMetric, Duration>,
273 pub memory_usage_mb: f64,
275 pub success: bool,
277 pub error_message: Option<String>,
279 pub repetition: usize,
281}
282
283#[derive(Debug, Clone)]
285pub struct BenchmarkSummary {
287 pub method_name: String,
289 pub dataset_name: String,
291 pub approximation_dimension: usize,
293 pub quality_means: HashMap<QualityMetric, f64>,
295 pub quality_stds: HashMap<QualityMetric, f64>,
297 pub performance_means: HashMap<PerformanceMetric, Duration>,
299 pub performance_stds: HashMap<PerformanceMetric, Duration>,
301 pub success_rate: f64,
303 pub memory_usage_mean_mb: f64,
305 pub memory_usage_std_mb: f64,
307}
308
309pub trait BenchmarkableKernelMethod {
311 fn method_name(&self) -> String;
313
314 fn benchmark_fit(
316 &self,
317 data: &Array2<f64>,
318 target: Option<&Array1<f64>>,
319 ) -> Result<(Box<dyn BenchmarkableTransformer>, Duration)>;
320
321 fn clone_method(&self) -> Box<dyn BenchmarkableKernelMethod>;
323}
324
325pub trait BenchmarkableTransformer {
327 fn benchmark_transform(&self, data: &Array2<f64>) -> Result<(Array2<f64>, Duration)>;
329
330 fn approximation_dimension(&self) -> usize;
332
333 fn memory_usage_mb(&self) -> f64;
335}
336
337impl KernelApproximationBenchmark {
338 pub fn new(config: BenchmarkConfig) -> Self {
340 Self {
341 config,
342 datasets: Vec::new(),
343 results: Vec::new(),
344 }
345 }
346
347 pub fn add_dataset(&mut self, dataset: BenchmarkDataset) {
349 self.datasets.push(dataset);
350 }
351
352 pub fn add_synthetic_datasets(&mut self, seed: u64) {
354 self.add_dataset(BenchmarkDataset::gaussian(100, 10, 0.1, seed));
356 self.add_dataset(BenchmarkDataset::polynomial(100, 5, 2, seed + 1));
357 self.add_dataset(BenchmarkDataset::classification_clusters(
358 120,
359 8,
360 3,
361 seed + 2,
362 ));
363
364 self.add_dataset(BenchmarkDataset::gaussian(500, 20, 0.2, seed + 3));
366 self.add_dataset(BenchmarkDataset::polynomial(500, 15, 3, seed + 4));
367 self.add_dataset(BenchmarkDataset::classification_clusters(
368 600,
369 25,
370 4,
371 seed + 5,
372 ));
373
374 self.add_dataset(BenchmarkDataset::gaussian(1000, 50, 0.1, seed + 6));
376 self.add_dataset(BenchmarkDataset::polynomial(1000, 30, 2, seed + 7));
377 self.add_dataset(BenchmarkDataset::classification_clusters(
378 1200,
379 40,
380 5,
381 seed + 8,
382 ));
383 }
384
385 pub fn benchmark_method(
387 &mut self,
388 method: &dyn BenchmarkableKernelMethod,
389 ) -> Result<Vec<BenchmarkResult>> {
390 let mut method_results = Vec::new();
391 let _method_name = method.method_name();
392
393 for dataset in &self.datasets {
394 for &n_components in &self.config.approximation_dimensions {
395 for repetition in 0..self.config.repetitions {
396 let result =
397 self.run_single_benchmark(method, dataset, n_components, repetition)?;
398
399 method_results.push(result.clone());
400 self.results.push(result);
401 }
402 }
403 }
404
405 Ok(method_results)
406 }
407
408 fn run_single_benchmark(
409 &self,
410 method: &dyn BenchmarkableKernelMethod,
411 dataset: &BenchmarkDataset,
412 n_components: usize,
413 repetition: usize,
414 ) -> Result<BenchmarkResult> {
415 let start_time = Instant::now();
416
417 if let Some(timeout) = self.config.timeout_seconds {
419 if start_time.elapsed().as_secs() > timeout {
420 return Ok(BenchmarkResult {
421 method_name: method.method_name(),
422 dataset_name: dataset.name.clone(),
423 approximation_dimension: n_components,
424 quality_scores: HashMap::new(),
425 performance_scores: HashMap::new(),
426 memory_usage_mb: 0.0,
427 success: false,
428 error_message: Some("Timeout exceeded".to_string()),
429 repetition,
430 });
431 }
432 }
433
434 for _ in 0..self.config.warmup_iterations {
436 let _ = method.benchmark_fit(&dataset.data, dataset.target.as_ref());
437 }
438
439 let _fit_start = Instant::now();
441 let (fitted_method, fit_time) =
442 match method.benchmark_fit(&dataset.data, dataset.target.as_ref()) {
443 Ok(result) => result,
444 Err(e) => {
445 return Ok(BenchmarkResult {
446 method_name: method.method_name(),
447 dataset_name: dataset.name.clone(),
448 approximation_dimension: n_components,
449 quality_scores: HashMap::new(),
450 performance_scores: HashMap::new(),
451 memory_usage_mb: 0.0,
452 success: false,
453 error_message: Some(format!("Fit error: {}", e)),
454 repetition,
455 });
456 }
457 };
458
459 let (transformed_data, transform_time) =
460 match fitted_method.benchmark_transform(&dataset.data) {
461 Ok(result) => result,
462 Err(e) => {
463 return Ok(BenchmarkResult {
464 method_name: method.method_name(),
465 dataset_name: dataset.name.clone(),
466 approximation_dimension: n_components,
467 quality_scores: HashMap::new(),
468 performance_scores: HashMap::new(),
469 memory_usage_mb: 0.0,
470 success: false,
471 error_message: Some(format!("Transform error: {}", e)),
472 repetition,
473 });
474 }
475 };
476
477 let memory_usage = fitted_method.memory_usage_mb();
478 let total_time = fit_time + transform_time;
479 let throughput_time =
480 Duration::from_secs_f64(dataset.data.nrows() as f64 / transform_time.as_secs_f64());
481
482 let mut quality_scores = HashMap::new();
484
485 if let Some(ref true_kernel) = dataset.true_kernel {
486 let approx_kernel = self.compute_approximate_kernel(&transformed_data)?;
488
489 for metric in &self.config.quality_metrics {
490 let score = self.compute_quality_metric(metric, true_kernel, &approx_kernel)?;
491 quality_scores.insert(metric.clone(), score);
492 }
493 } else {
494 for metric in &self.config.quality_metrics {
496 let score = self.compute_approximate_quality_metric(
497 metric,
498 &dataset.data,
499 &transformed_data,
500 )?;
501 quality_scores.insert(metric.clone(), score);
502 }
503 }
504
505 let mut performance_scores = HashMap::new();
507 performance_scores.insert(PerformanceMetric::FitTime, fit_time);
508 performance_scores.insert(PerformanceMetric::TransformTime, transform_time);
509 performance_scores.insert(PerformanceMetric::TotalTime, total_time);
510 performance_scores.insert(PerformanceMetric::Throughput, throughput_time);
511 performance_scores.insert(
512 PerformanceMetric::MemoryUsage,
513 Duration::from_secs_f64(memory_usage),
514 );
515 performance_scores.insert(
516 PerformanceMetric::MemoryEfficiency,
517 Duration::from_secs_f64(
518 quality_scores
519 .get(&QualityMetric::KernelAlignment)
520 .unwrap_or(&0.0)
521 / memory_usage.max(1.0),
522 ),
523 );
524
525 Ok(BenchmarkResult {
526 method_name: method.method_name(),
527 dataset_name: dataset.name.clone(),
528 approximation_dimension: n_components,
529 quality_scores,
530 performance_scores,
531 memory_usage_mb: memory_usage,
532 success: true,
533 error_message: None,
534 repetition,
535 })
536 }
537
538 fn compute_approximate_kernel(&self, features: &Array2<f64>) -> Result<Array2<f64>> {
539 let n_samples = features.nrows();
540 let mut kernel = Array2::zeros((n_samples, n_samples));
541
542 for i in 0..n_samples {
543 for j in i..n_samples {
544 let similarity = features.row(i).dot(&features.row(j));
545 kernel[[i, j]] = similarity;
546 kernel[[j, i]] = similarity;
547 }
548 }
549
550 Ok(kernel)
551 }
552
553 fn compute_quality_metric(
554 &self,
555 metric: &QualityMetric,
556 true_kernel: &Array2<f64>,
557 approx_kernel: &Array2<f64>,
558 ) -> Result<f64> {
559 match metric {
560 QualityMetric::KernelAlignment => {
561 Ok(self.compute_kernel_alignment(true_kernel, approx_kernel)?)
562 }
563 QualityMetric::FrobeniusError => {
564 let diff = true_kernel - approx_kernel;
565 let frobenius_norm = diff.mapv(|x| x * x).sum().sqrt();
566 Ok(frobenius_norm)
567 }
568 QualityMetric::SpectralError => {
569 let diff = true_kernel - approx_kernel;
571 let max_abs = diff
572 .mapv(|x: f64| x.abs())
573 .fold(0.0_f64, |acc, &x| acc.max(x));
574 Ok(max_abs)
575 }
576 QualityMetric::RelativeError => {
577 let diff = true_kernel - approx_kernel;
578 let frobenius_diff = diff.mapv(|x| x * x).sum().sqrt();
579 let frobenius_true = true_kernel.mapv(|x| x * x).sum().sqrt();
580 Ok(frobenius_diff / frobenius_true.max(1e-8))
581 }
582 QualityMetric::NuclearError => {
583 let diff = true_kernel - approx_kernel;
585 let nuclear_norm = diff.mapv(|x| x.abs()).sum();
586 Ok(nuclear_norm)
587 }
588 QualityMetric::EffectiveRank => {
589 let eigenvalues = self.compute_approximate_eigenvalues(approx_kernel)?;
591 let sum_eigenvalues = eigenvalues.sum();
592 let sum_squared_eigenvalues = eigenvalues.mapv(|x| x * x).sum();
593 Ok(sum_eigenvalues * sum_eigenvalues / sum_squared_eigenvalues.max(1e-8))
594 }
595 QualityMetric::EigenvalueError => {
596 let true_eigenvalues = self.compute_approximate_eigenvalues(true_kernel)?;
597 let approx_eigenvalues = self.compute_approximate_eigenvalues(approx_kernel)?;
598
599 let min_len = true_eigenvalues.len().min(approx_eigenvalues.len());
600 let mut error = 0.0;
601 for i in 0..min_len {
602 error += (true_eigenvalues[i] - approx_eigenvalues[i]).abs();
603 }
604 Ok(error / min_len as f64)
605 }
606 }
607 }
608
609 fn compute_approximate_quality_metric(
610 &self,
611 metric: &QualityMetric,
612 original_data: &Array2<f64>,
613 features: &Array2<f64>,
614 ) -> Result<f64> {
615 match metric {
616 QualityMetric::KernelAlignment => {
617 let rbf_kernel = self.compute_rbf_kernel(original_data, 1.0)?;
619 let approx_kernel = self.compute_approximate_kernel(features)?;
620 Ok(self.compute_kernel_alignment(&rbf_kernel, &approx_kernel)?)
621 }
622 QualityMetric::EffectiveRank => {
623 let approx_kernel = self.compute_approximate_kernel(features)?;
624 let eigenvalues = self.compute_approximate_eigenvalues(&approx_kernel)?;
625 let sum_eigenvalues = eigenvalues.sum();
626 let sum_squared_eigenvalues = eigenvalues.mapv(|x| x * x).sum();
627 Ok(sum_eigenvalues * sum_eigenvalues / sum_squared_eigenvalues.max(1e-8))
628 }
629 _ => {
630 Ok(features.ncols() as f64)
632 }
633 }
634 }
635
636 fn compute_kernel_alignment(&self, k1: &Array2<f64>, k2: &Array2<f64>) -> Result<f64> {
637 let n = k1.nrows();
638
639 let row_means_k1 = k1.mean_axis(Axis(1)).unwrap();
641 let row_means_k2 = k2.mean_axis(Axis(1)).unwrap();
642 let total_mean_k1 = row_means_k1.mean().unwrap();
643 let total_mean_k2 = row_means_k2.mean().unwrap();
644
645 let mut k1_centered = k1.clone();
646 let mut k2_centered = k2.clone();
647
648 for i in 0..n {
649 for j in 0..n {
650 k1_centered[[i, j]] -= row_means_k1[i] + row_means_k1[j] - total_mean_k1;
651 k2_centered[[i, j]] -= row_means_k2[i] + row_means_k2[j] - total_mean_k2;
652 }
653 }
654
655 let numerator = (&k1_centered * &k2_centered).sum();
657 let denom1 = (&k1_centered * &k1_centered).sum().sqrt();
658 let denom2 = (&k2_centered * &k2_centered).sum().sqrt();
659
660 Ok(numerator / (denom1 * denom2).max(1e-8))
661 }
662
663 fn compute_rbf_kernel(&self, data: &Array2<f64>, gamma: f64) -> Result<Array2<f64>> {
664 let n_samples = data.nrows();
665 let mut kernel = Array2::zeros((n_samples, n_samples));
666
667 for i in 0..n_samples {
668 for j in i..n_samples {
669 let diff = &data.row(i) - &data.row(j);
670 let dist_sq = diff.mapv(|x| x * x).sum();
671 let similarity = (-gamma * dist_sq).exp();
672 kernel[[i, j]] = similarity;
673 kernel[[j, i]] = similarity;
674 }
675 }
676
677 Ok(kernel)
678 }
679
680 fn compute_approximate_eigenvalues(&self, matrix: &Array2<f64>) -> Result<Array1<f64>> {
681 let n = matrix.nrows();
683 let mut eigenvalues = Array1::zeros(n);
684
685 for i in 0..n {
686 eigenvalues[i] = matrix[[i, i]];
687 }
688
689 let mut eigenvalues_vec: Vec<f64> = eigenvalues.to_vec();
691 eigenvalues_vec.sort_by(|a, b| b.partial_cmp(a).unwrap());
692
693 Ok(Array1::from_vec(eigenvalues_vec))
694 }
695
696 pub fn summarize_results(&self) -> Vec<BenchmarkSummary> {
698 let mut summaries = Vec::new();
699 let mut grouped_results: HashMap<(String, String, usize), Vec<&BenchmarkResult>> =
700 HashMap::new();
701
702 for result in &self.results {
704 let key = (
705 result.method_name.clone(),
706 result.dataset_name.clone(),
707 result.approximation_dimension,
708 );
709 grouped_results.entry(key).or_default().push(result);
710 }
711
712 for ((method_name, dataset_name, approximation_dimension), results) in grouped_results {
714 let total_results = results.len();
715 let successful_results: Vec<_> = results.into_iter().filter(|r| r.success).collect();
716
717 if successful_results.is_empty() {
718 continue;
719 }
720
721 let mut quality_means = HashMap::new();
722 let mut quality_stds = HashMap::new();
723 let mut performance_means = HashMap::new();
724 let mut performance_stds = HashMap::new();
725
726 for metric in &self.config.quality_metrics {
728 let values: Vec<f64> = successful_results
729 .iter()
730 .filter_map(|r| r.quality_scores.get(metric))
731 .cloned()
732 .collect();
733
734 if !values.is_empty() {
735 let mean = values.iter().sum::<f64>() / values.len() as f64;
736 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
737 / values.len() as f64;
738 let std = variance.sqrt();
739
740 quality_means.insert(metric.clone(), mean);
741 quality_stds.insert(metric.clone(), std);
742 }
743 }
744
745 for metric in &self.config.performance_metrics {
747 let values: Vec<Duration> = successful_results
748 .iter()
749 .filter_map(|r| r.performance_scores.get(metric))
750 .cloned()
751 .collect();
752
753 if !values.is_empty() {
754 let mean_secs: f64 =
755 values.iter().map(|d| d.as_secs_f64()).sum::<f64>() / values.len() as f64;
756 let variance: f64 = values
757 .iter()
758 .map(|d| (d.as_secs_f64() - mean_secs).powi(2))
759 .sum::<f64>()
760 / values.len() as f64;
761 let std_secs = variance.sqrt();
762
763 performance_means.insert(metric.clone(), Duration::from_secs_f64(mean_secs));
764 performance_stds.insert(metric.clone(), Duration::from_secs_f64(std_secs));
765 }
766 }
767
768 let memory_values: Vec<f64> = successful_results
770 .iter()
771 .map(|r| r.memory_usage_mb)
772 .collect();
773
774 let memory_mean = memory_values.iter().sum::<f64>() / memory_values.len() as f64;
775 let memory_variance = memory_values
776 .iter()
777 .map(|x| (x - memory_mean).powi(2))
778 .sum::<f64>()
779 / memory_values.len() as f64;
780 let memory_std = memory_variance.sqrt();
781
782 let success_rate = successful_results.len() as f64 / total_results as f64;
783
784 summaries.push(BenchmarkSummary {
785 method_name,
786 dataset_name,
787 approximation_dimension,
788 quality_means,
789 quality_stds,
790 performance_means,
791 performance_stds,
792 success_rate,
793 memory_usage_mean_mb: memory_mean,
794 memory_usage_std_mb: memory_std,
795 });
796 }
797
798 summaries
799 }
800
801 pub fn get_results(&self) -> &[BenchmarkResult] {
803 &self.results
804 }
805
806 pub fn clear_results(&mut self) {
808 self.results.clear();
809 }
810
811 pub fn export_results_csv(&self) -> String {
813 let mut csv = String::new();
814
815 csv.push_str("method,dataset,n_components,repetition,success,");
817 csv.push_str("kernel_alignment,frobenius_error,spectral_error,relative_error,");
818 csv.push_str("fit_time_ms,transform_time_ms,total_time_ms,memory_mb\n");
819
820 for result in &self.results {
822 csv.push_str(&format!(
823 "{},{},{},{},{},",
824 result.method_name,
825 result.dataset_name,
826 result.approximation_dimension,
827 result.repetition,
828 result.success
829 ));
830
831 let kernel_alignment = result
833 .quality_scores
834 .get(&QualityMetric::KernelAlignment)
835 .map(|x| x.to_string())
836 .unwrap_or_else(|| "".to_string());
837 let frobenius_error = result
838 .quality_scores
839 .get(&QualityMetric::FrobeniusError)
840 .map(|x| x.to_string())
841 .unwrap_or_else(|| "".to_string());
842 let spectral_error = result
843 .quality_scores
844 .get(&QualityMetric::SpectralError)
845 .map(|x| x.to_string())
846 .unwrap_or_else(|| "".to_string());
847 let relative_error = result
848 .quality_scores
849 .get(&QualityMetric::RelativeError)
850 .map(|x| x.to_string())
851 .unwrap_or_else(|| "".to_string());
852
853 csv.push_str(&format!(
854 "{},{},{},{},",
855 kernel_alignment, frobenius_error, spectral_error, relative_error
856 ));
857
858 let fit_time = result
860 .performance_scores
861 .get(&PerformanceMetric::FitTime)
862 .map(|d| d.as_millis().to_string())
863 .unwrap_or_else(|| "".to_string());
864 let transform_time = result
865 .performance_scores
866 .get(&PerformanceMetric::TransformTime)
867 .map(|d| d.as_millis().to_string())
868 .unwrap_or_else(|| "".to_string());
869 let total_time = result
870 .performance_scores
871 .get(&PerformanceMetric::TotalTime)
872 .map(|d| d.as_millis().to_string())
873 .unwrap_or_else(|| "".to_string());
874
875 csv.push_str(&format!(
876 "{},{},{},{}\n",
877 fit_time, transform_time, total_time, result.memory_usage_mb
878 ));
879 }
880
881 csv
882 }
883}
884
885#[allow(non_snake_case)]
886#[cfg(test)]
887mod tests {
888 use super::*;
889 struct MockRBFMethod {
891 n_components: usize,
892 }
893
894 impl BenchmarkableKernelMethod for MockRBFMethod {
895 fn method_name(&self) -> String {
896 "MockRBF".to_string()
897 }
898
899 fn benchmark_fit(
900 &self,
901 _data: &Array2<f64>,
902 _target: Option<&Array1<f64>>,
903 ) -> Result<(Box<dyn BenchmarkableTransformer>, Duration)> {
904 let start = Instant::now();
905
906 let duration = start.elapsed();
908 Ok((
909 Box::new(MockFittedRBF {
910 n_components: self.n_components,
911 }),
912 duration,
913 ))
914 }
915
916 fn clone_method(&self) -> Box<dyn BenchmarkableKernelMethod> {
917 Box::new(MockRBFMethod {
918 n_components: self.n_components,
919 })
920 }
921 }
922
923 struct MockFittedRBF {
924 n_components: usize,
925 }
926
927 impl BenchmarkableTransformer for MockFittedRBF {
928 fn benchmark_transform(&self, data: &Array2<f64>) -> Result<(Array2<f64>, Duration)> {
929 let start = Instant::now();
930 let result = Array2::zeros((data.nrows(), self.n_components));
932 let duration = start.elapsed();
933 Ok((result, duration))
934 }
935
936 fn approximation_dimension(&self) -> usize {
937 self.n_components
938 }
939
940 fn memory_usage_mb(&self) -> f64 {
941 (self.n_components * 10 * 8) as f64 / (1024.0 * 1024.0)
943 }
944 }
945
946 #[test]
947 fn test_benchmark_dataset_creation() {
948 let dataset = BenchmarkDataset::gaussian(100, 10, 0.1, 42);
949 assert_eq!(dataset.data.shape(), &[100, 10]);
950 assert!(dataset.target.is_some());
951 assert_eq!(dataset.target.as_ref().unwrap().len(), 100);
952 }
953
954 #[test]
955 fn test_benchmark_execution() {
956 let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
957 repetitions: 2,
958 approximation_dimensions: vec![10, 20],
959 ..Default::default()
960 });
961
962 let dataset = BenchmarkDataset::gaussian(50, 5, 0.1, 42);
963 benchmark.add_dataset(dataset);
964
965 let method = MockRBFMethod { n_components: 10 };
966 let results = benchmark.benchmark_method(&method).unwrap();
967
968 assert_eq!(results.len(), 4); assert!(results.iter().all(|r| r.success));
970 }
971
972 #[test]
973 fn test_benchmark_summary() {
974 let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
975 repetitions: 3,
976 approximation_dimensions: vec![10],
977 ..Default::default()
978 });
979
980 let dataset = BenchmarkDataset::gaussian(50, 5, 0.1, 42);
981 benchmark.add_dataset(dataset);
982
983 let method = MockRBFMethod { n_components: 10 };
984 benchmark.benchmark_method(&method).unwrap();
985
986 let summaries = benchmark.summarize_results();
987 assert_eq!(summaries.len(), 1);
988 assert_eq!(summaries[0].success_rate, 1.0);
989 }
990
991 #[test]
992 fn test_csv_export() {
993 let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
994 repetitions: 1,
995 approximation_dimensions: vec![5],
996 ..Default::default()
997 });
998
999 let dataset = BenchmarkDataset::gaussian(20, 3, 0.1, 42);
1000 benchmark.add_dataset(dataset);
1001
1002 let method = MockRBFMethod { n_components: 5 };
1003 benchmark.benchmark_method(&method).unwrap();
1004
1005 let csv = benchmark.export_results_csv();
1006 assert!(csv.contains("method,dataset"));
1007 assert!(csv.contains("MockRBF"));
1008 assert!(csv.contains("Gaussian"));
1009 }
1010}