sklears_kernel_approximation/
benchmarking.rs

1//! Comprehensive Benchmarking Framework for Kernel Approximation Methods
2//!
3//! This module provides tools for benchmarking and comparing different kernel approximation
4//! methods in terms of approximation quality, computational performance, and memory usage.
5
6use scirs2_core::ndarray::{Array1, Array2, Axis};
7use scirs2_core::random::essentials::Uniform as RandUniform;
8use scirs2_core::random::rngs::StdRng as RealStdRng;
9use scirs2_core::random::Rng;
10use scirs2_core::random::SeedableRng;
11use sklears_core::prelude::Result;
12use std::collections::HashMap;
13use std::time::{Duration, Instant};
14
15/// Benchmarking suite for kernel approximation methods
16#[derive(Debug, Clone)]
17/// KernelApproximationBenchmark
18pub struct KernelApproximationBenchmark {
19    config: BenchmarkConfig,
20    datasets: Vec<BenchmarkDataset>,
21    results: Vec<BenchmarkResult>,
22}
23
24/// Configuration for benchmarking
25#[derive(Debug, Clone)]
26/// BenchmarkConfig
27pub struct BenchmarkConfig {
28    /// repetitions
29    pub repetitions: usize,
30    /// quality_metrics
31    pub quality_metrics: Vec<QualityMetric>,
32    /// performance_metrics
33    pub performance_metrics: Vec<PerformanceMetric>,
34    /// approximation_dimensions
35    pub approximation_dimensions: Vec<usize>,
36    /// random_state
37    pub random_state: Option<u64>,
38    /// warmup_iterations
39    pub warmup_iterations: usize,
40    /// timeout_seconds
41    pub timeout_seconds: Option<u64>,
42}
43
44impl Default for BenchmarkConfig {
45    fn default() -> Self {
46        Self {
47            repetitions: 5,
48            quality_metrics: vec![
49                QualityMetric::KernelAlignment,
50                QualityMetric::FrobeniusError,
51                QualityMetric::SpectralError,
52                QualityMetric::RelativeError,
53            ],
54            performance_metrics: vec![
55                PerformanceMetric::FitTime,
56                PerformanceMetric::TransformTime,
57                PerformanceMetric::MemoryUsage,
58                PerformanceMetric::Throughput,
59            ],
60            approximation_dimensions: vec![50, 100, 200, 500, 1000],
61            random_state: Some(42),
62            warmup_iterations: 3,
63            timeout_seconds: Some(300),
64        }
65    }
66}
67
68/// Quality metrics for evaluating approximation quality
69#[derive(Debug, Clone, PartialEq, Eq, Hash)]
70/// QualityMetric
71pub enum QualityMetric {
72    /// Centered kernel alignment
73    KernelAlignment,
74    /// Frobenius norm error
75    FrobeniusError,
76    /// Spectral norm error
77    SpectralError,
78    /// Relative approximation error
79    RelativeError,
80    /// Nuclear norm error
81    NuclearError,
82    /// Effective rank comparison
83    EffectiveRank,
84    /// Eigenvalue preservation
85    EigenvalueError,
86}
87
88/// Performance metrics for computational evaluation
89#[derive(Debug, Clone, PartialEq, Eq, Hash)]
90/// PerformanceMetric
91pub enum PerformanceMetric {
92    /// Time to fit the approximation
93    FitTime,
94    /// Time to transform data
95    TransformTime,
96    /// Peak memory usage
97    MemoryUsage,
98    /// Throughput (samples per second)
99    Throughput,
100    /// Total computational cost
101    TotalTime,
102    /// Memory efficiency (quality per MB)
103    MemoryEfficiency,
104}
105
106/// Benchmark dataset specification
107#[derive(Debug, Clone)]
108/// BenchmarkDataset
109pub struct BenchmarkDataset {
110    /// name
111    pub name: String,
112    /// data
113    pub data: Array2<f64>,
114    /// target
115    pub target: Option<Array1<f64>>,
116    /// true_kernel
117    pub true_kernel: Option<Array2<f64>>,
118    /// description
119    pub description: String,
120}
121
122impl BenchmarkDataset {
123    /// Create a synthetic Gaussian dataset
124    pub fn gaussian(n_samples: usize, n_features: usize, noise: f64, seed: u64) -> Self {
125        let mut rng = RealStdRng::seed_from_u64(seed);
126
127        // Helper function to generate normal distribution samples using Box-Muller
128        let normal_sample = |rng: &mut RealStdRng, mean: f64, std_dev: f64| -> f64 {
129            let u1: f64 = rng.gen();
130            let u2: f64 = rng.gen();
131            let z = (-2.0_f64 * u1.ln()).sqrt() * (2.0_f64 * std::f64::consts::PI * u2).cos();
132            mean + z * std_dev
133        };
134
135        let mut data = Array2::zeros((n_samples, n_features));
136        for i in 0..n_samples {
137            for j in 0..n_features {
138                data[[i, j]] =
139                    normal_sample(&mut rng, 0.0, 1.0) + normal_sample(&mut rng, 0.0, noise);
140            }
141        }
142
143        // Generate a simple target based on linear combination
144        let mut target = Array1::zeros(n_samples);
145        let weights = Array1::from_shape_fn(n_features, |_| normal_sample(&mut rng, 0.0, 1.0));
146
147        for (i, sample) in data.axis_iter(Axis(0)).enumerate() {
148            target[i] = sample.dot(&weights) + normal_sample(&mut rng, 0.0, noise);
149        }
150
151        Self {
152            name: format!("Gaussian_{}x{}_noise{}", n_samples, n_features, noise),
153            data,
154            target: Some(target),
155            true_kernel: None,
156            description: format!(
157                "Gaussian dataset with {} samples, {} features, and noise level {}",
158                n_samples, n_features, noise
159            ),
160        }
161    }
162
163    /// Create a polynomial dataset
164    pub fn polynomial(n_samples: usize, n_features: usize, degree: usize, seed: u64) -> Self {
165        let mut rng = RealStdRng::seed_from_u64(seed);
166        let uniform = RandUniform::new(-1.0, 1.0).unwrap();
167
168        let mut data = Array2::zeros((n_samples, n_features));
169        for i in 0..n_samples {
170            for j in 0..n_features {
171                data[[i, j]] = rng.sample(uniform);
172            }
173        }
174
175        // Generate polynomial target
176        let mut target = Array1::zeros(n_samples);
177        for (i, sample) in data.axis_iter(Axis(0)).enumerate() {
178            let mut poly_value = 0.0;
179            for &x in sample.iter() {
180                let x: f64 = x;
181                poly_value += x.powi(degree as i32);
182            }
183            target[i] = poly_value / n_features as f64;
184        }
185
186        Self {
187            name: format!("Polynomial_{}x{}_deg{}", n_samples, n_features, degree),
188            data,
189            target: Some(target),
190            true_kernel: None,
191            description: format!(
192                "Polynomial dataset with {} samples, {} features, and degree {}",
193                n_samples, n_features, degree
194            ),
195        }
196    }
197
198    /// Create a classification dataset with distinct clusters
199    pub fn classification_clusters(
200        n_samples: usize,
201        n_features: usize,
202        n_clusters: usize,
203        seed: u64,
204    ) -> Self {
205        let mut rng = RealStdRng::seed_from_u64(seed);
206
207        // Helper function to generate normal distribution samples using Box-Muller
208        let normal_sample = |rng: &mut RealStdRng, mean: f64, std_dev: f64| -> f64 {
209            let u1: f64 = rng.gen();
210            let u2: f64 = rng.gen();
211            let z = (-2.0_f64 * u1.ln()).sqrt() * (2.0_f64 * std::f64::consts::PI * u2).cos();
212            mean + z * std_dev
213        };
214
215        let samples_per_cluster = n_samples / n_clusters;
216        let mut data = Array2::zeros((n_samples, n_features));
217        let mut target = Array1::zeros(n_samples);
218
219        // Generate cluster centers
220        let cluster_separation = 3.0;
221        let mut centers = Array2::zeros((n_clusters, n_features));
222        for i in 0..n_clusters {
223            for j in 0..n_features {
224                centers[[i, j]] =
225                    (i as f64 * cluster_separation) * if j % 2 == 0 { 1.0 } else { -1.0 };
226            }
227        }
228
229        // Generate samples around centers
230        let mut sample_idx = 0;
231        for cluster in 0..n_clusters {
232            for _ in 0..samples_per_cluster {
233                if sample_idx >= n_samples {
234                    break;
235                }
236
237                for j in 0..n_features {
238                    data[[sample_idx, j]] =
239                        centers[[cluster, j]] + normal_sample(&mut rng, 0.0, 0.5);
240                }
241                target[sample_idx] = cluster as f64;
242                sample_idx += 1;
243            }
244        }
245
246        Self {
247            name: format!("Clusters_{}x{}_k{}", n_samples, n_features, n_clusters),
248            data,
249            target: Some(target),
250            true_kernel: None,
251            description: format!(
252                "Classification dataset with {} samples, {} features, and {} clusters",
253                n_samples, n_features, n_clusters
254            ),
255        }
256    }
257}
258
259/// Result of a benchmark run
260#[derive(Debug, Clone)]
261/// BenchmarkResult
262pub struct BenchmarkResult {
263    /// method_name
264    pub method_name: String,
265    /// dataset_name
266    pub dataset_name: String,
267    /// approximation_dimension
268    pub approximation_dimension: usize,
269    /// quality_scores
270    pub quality_scores: HashMap<QualityMetric, f64>,
271    /// performance_scores
272    pub performance_scores: HashMap<PerformanceMetric, Duration>,
273    /// memory_usage_mb
274    pub memory_usage_mb: f64,
275    /// success
276    pub success: bool,
277    /// error_message
278    pub error_message: Option<String>,
279    /// repetition
280    pub repetition: usize,
281}
282
283/// Summary statistics across multiple benchmark runs
284#[derive(Debug, Clone)]
285/// BenchmarkSummary
286pub struct BenchmarkSummary {
287    /// method_name
288    pub method_name: String,
289    /// dataset_name
290    pub dataset_name: String,
291    /// approximation_dimension
292    pub approximation_dimension: usize,
293    /// quality_means
294    pub quality_means: HashMap<QualityMetric, f64>,
295    /// quality_stds
296    pub quality_stds: HashMap<QualityMetric, f64>,
297    /// performance_means
298    pub performance_means: HashMap<PerformanceMetric, Duration>,
299    /// performance_stds
300    pub performance_stds: HashMap<PerformanceMetric, Duration>,
301    /// success_rate
302    pub success_rate: f64,
303    /// memory_usage_mean_mb
304    pub memory_usage_mean_mb: f64,
305    /// memory_usage_std_mb
306    pub memory_usage_std_mb: f64,
307}
308
309/// Trait for benchmarkable kernel approximation methods
310pub trait BenchmarkableKernelMethod {
311    /// Get the name of the method
312    fn method_name(&self) -> String;
313
314    /// Fit the approximation method and return timing
315    fn benchmark_fit(
316        &self,
317        data: &Array2<f64>,
318        target: Option<&Array1<f64>>,
319    ) -> Result<(Box<dyn BenchmarkableTransformer>, Duration)>;
320
321    /// Clone the method for multiple repetitions
322    fn clone_method(&self) -> Box<dyn BenchmarkableKernelMethod>;
323}
324
325/// Trait for fitted kernel approximation methods that can transform data
326pub trait BenchmarkableTransformer {
327    /// Transform data and return timing
328    fn benchmark_transform(&self, data: &Array2<f64>) -> Result<(Array2<f64>, Duration)>;
329
330    /// Get approximation dimension
331    fn approximation_dimension(&self) -> usize;
332
333    /// Estimate memory usage in MB
334    fn memory_usage_mb(&self) -> f64;
335}
336
337impl KernelApproximationBenchmark {
338    /// Create a new benchmark suite
339    pub fn new(config: BenchmarkConfig) -> Self {
340        Self {
341            config,
342            datasets: Vec::new(),
343            results: Vec::new(),
344        }
345    }
346
347    /// Add a dataset to benchmark
348    pub fn add_dataset(&mut self, dataset: BenchmarkDataset) {
349        self.datasets.push(dataset);
350    }
351
352    /// Add multiple synthetic datasets
353    pub fn add_synthetic_datasets(&mut self, seed: u64) {
354        // Small datasets for quick testing
355        self.add_dataset(BenchmarkDataset::gaussian(100, 10, 0.1, seed));
356        self.add_dataset(BenchmarkDataset::polynomial(100, 5, 2, seed + 1));
357        self.add_dataset(BenchmarkDataset::classification_clusters(
358            120,
359            8,
360            3,
361            seed + 2,
362        ));
363
364        // Medium datasets
365        self.add_dataset(BenchmarkDataset::gaussian(500, 20, 0.2, seed + 3));
366        self.add_dataset(BenchmarkDataset::polynomial(500, 15, 3, seed + 4));
367        self.add_dataset(BenchmarkDataset::classification_clusters(
368            600,
369            25,
370            4,
371            seed + 5,
372        ));
373
374        // Large datasets
375        self.add_dataset(BenchmarkDataset::gaussian(1000, 50, 0.1, seed + 6));
376        self.add_dataset(BenchmarkDataset::polynomial(1000, 30, 2, seed + 7));
377        self.add_dataset(BenchmarkDataset::classification_clusters(
378            1200,
379            40,
380            5,
381            seed + 8,
382        ));
383    }
384
385    /// Run benchmark for a specific method
386    pub fn benchmark_method(
387        &mut self,
388        method: &dyn BenchmarkableKernelMethod,
389    ) -> Result<Vec<BenchmarkResult>> {
390        let mut method_results = Vec::new();
391        let _method_name = method.method_name();
392
393        for dataset in &self.datasets {
394            for &n_components in &self.config.approximation_dimensions {
395                for repetition in 0..self.config.repetitions {
396                    let result =
397                        self.run_single_benchmark(method, dataset, n_components, repetition)?;
398
399                    method_results.push(result.clone());
400                    self.results.push(result);
401                }
402            }
403        }
404
405        Ok(method_results)
406    }
407
408    fn run_single_benchmark(
409        &self,
410        method: &dyn BenchmarkableKernelMethod,
411        dataset: &BenchmarkDataset,
412        n_components: usize,
413        repetition: usize,
414    ) -> Result<BenchmarkResult> {
415        let start_time = Instant::now();
416
417        // Check timeout
418        if let Some(timeout) = self.config.timeout_seconds {
419            if start_time.elapsed().as_secs() > timeout {
420                return Ok(BenchmarkResult {
421                    method_name: method.method_name(),
422                    dataset_name: dataset.name.clone(),
423                    approximation_dimension: n_components,
424                    quality_scores: HashMap::new(),
425                    performance_scores: HashMap::new(),
426                    memory_usage_mb: 0.0,
427                    success: false,
428                    error_message: Some("Timeout exceeded".to_string()),
429                    repetition,
430                });
431            }
432        }
433
434        // Warmup iterations
435        for _ in 0..self.config.warmup_iterations {
436            let _ = method.benchmark_fit(&dataset.data, dataset.target.as_ref());
437        }
438
439        // Actual benchmark run
440        let _fit_start = Instant::now();
441        let (fitted_method, fit_time) =
442            match method.benchmark_fit(&dataset.data, dataset.target.as_ref()) {
443                Ok(result) => result,
444                Err(e) => {
445                    return Ok(BenchmarkResult {
446                        method_name: method.method_name(),
447                        dataset_name: dataset.name.clone(),
448                        approximation_dimension: n_components,
449                        quality_scores: HashMap::new(),
450                        performance_scores: HashMap::new(),
451                        memory_usage_mb: 0.0,
452                        success: false,
453                        error_message: Some(format!("Fit error: {}", e)),
454                        repetition,
455                    });
456                }
457            };
458
459        let (transformed_data, transform_time) =
460            match fitted_method.benchmark_transform(&dataset.data) {
461                Ok(result) => result,
462                Err(e) => {
463                    return Ok(BenchmarkResult {
464                        method_name: method.method_name(),
465                        dataset_name: dataset.name.clone(),
466                        approximation_dimension: n_components,
467                        quality_scores: HashMap::new(),
468                        performance_scores: HashMap::new(),
469                        memory_usage_mb: 0.0,
470                        success: false,
471                        error_message: Some(format!("Transform error: {}", e)),
472                        repetition,
473                    });
474                }
475            };
476
477        let memory_usage = fitted_method.memory_usage_mb();
478        let total_time = fit_time + transform_time;
479        let throughput_time =
480            Duration::from_secs_f64(dataset.data.nrows() as f64 / transform_time.as_secs_f64());
481
482        // Compute quality metrics
483        let mut quality_scores = HashMap::new();
484
485        if let Some(ref true_kernel) = dataset.true_kernel {
486            // Use provided true kernel for comparison
487            let approx_kernel = self.compute_approximate_kernel(&transformed_data)?;
488
489            for metric in &self.config.quality_metrics {
490                let score = self.compute_quality_metric(metric, true_kernel, &approx_kernel)?;
491                quality_scores.insert(metric.clone(), score);
492            }
493        } else {
494            // Compute approximate quality metrics without true kernel
495            for metric in &self.config.quality_metrics {
496                let score = self.compute_approximate_quality_metric(
497                    metric,
498                    &dataset.data,
499                    &transformed_data,
500                )?;
501                quality_scores.insert(metric.clone(), score);
502            }
503        }
504
505        // Collect performance metrics
506        let mut performance_scores = HashMap::new();
507        performance_scores.insert(PerformanceMetric::FitTime, fit_time);
508        performance_scores.insert(PerformanceMetric::TransformTime, transform_time);
509        performance_scores.insert(PerformanceMetric::TotalTime, total_time);
510        performance_scores.insert(PerformanceMetric::Throughput, throughput_time);
511        performance_scores.insert(
512            PerformanceMetric::MemoryUsage,
513            Duration::from_secs_f64(memory_usage),
514        );
515        performance_scores.insert(
516            PerformanceMetric::MemoryEfficiency,
517            Duration::from_secs_f64(
518                quality_scores
519                    .get(&QualityMetric::KernelAlignment)
520                    .unwrap_or(&0.0)
521                    / memory_usage.max(1.0),
522            ),
523        );
524
525        Ok(BenchmarkResult {
526            method_name: method.method_name(),
527            dataset_name: dataset.name.clone(),
528            approximation_dimension: n_components,
529            quality_scores,
530            performance_scores,
531            memory_usage_mb: memory_usage,
532            success: true,
533            error_message: None,
534            repetition,
535        })
536    }
537
538    fn compute_approximate_kernel(&self, features: &Array2<f64>) -> Result<Array2<f64>> {
539        let n_samples = features.nrows();
540        let mut kernel = Array2::zeros((n_samples, n_samples));
541
542        for i in 0..n_samples {
543            for j in i..n_samples {
544                let similarity = features.row(i).dot(&features.row(j));
545                kernel[[i, j]] = similarity;
546                kernel[[j, i]] = similarity;
547            }
548        }
549
550        Ok(kernel)
551    }
552
553    fn compute_quality_metric(
554        &self,
555        metric: &QualityMetric,
556        true_kernel: &Array2<f64>,
557        approx_kernel: &Array2<f64>,
558    ) -> Result<f64> {
559        match metric {
560            QualityMetric::KernelAlignment => {
561                Ok(self.compute_kernel_alignment(true_kernel, approx_kernel)?)
562            }
563            QualityMetric::FrobeniusError => {
564                let diff = true_kernel - approx_kernel;
565                let frobenius_norm = diff.mapv(|x| x * x).sum().sqrt();
566                Ok(frobenius_norm)
567            }
568            QualityMetric::SpectralError => {
569                // Simplified spectral norm approximation
570                let diff = true_kernel - approx_kernel;
571                let max_abs = diff
572                    .mapv(|x: f64| x.abs())
573                    .fold(0.0_f64, |acc, &x| acc.max(x));
574                Ok(max_abs)
575            }
576            QualityMetric::RelativeError => {
577                let diff = true_kernel - approx_kernel;
578                let frobenius_diff = diff.mapv(|x| x * x).sum().sqrt();
579                let frobenius_true = true_kernel.mapv(|x| x * x).sum().sqrt();
580                Ok(frobenius_diff / frobenius_true.max(1e-8))
581            }
582            QualityMetric::NuclearError => {
583                // Simplified nuclear norm (sum of absolute values as approximation)
584                let diff = true_kernel - approx_kernel;
585                let nuclear_norm = diff.mapv(|x| x.abs()).sum();
586                Ok(nuclear_norm)
587            }
588            QualityMetric::EffectiveRank => {
589                // Simplified effective rank computation
590                let eigenvalues = self.compute_approximate_eigenvalues(approx_kernel)?;
591                let sum_eigenvalues = eigenvalues.sum();
592                let sum_squared_eigenvalues = eigenvalues.mapv(|x| x * x).sum();
593                Ok(sum_eigenvalues * sum_eigenvalues / sum_squared_eigenvalues.max(1e-8))
594            }
595            QualityMetric::EigenvalueError => {
596                let true_eigenvalues = self.compute_approximate_eigenvalues(true_kernel)?;
597                let approx_eigenvalues = self.compute_approximate_eigenvalues(approx_kernel)?;
598
599                let min_len = true_eigenvalues.len().min(approx_eigenvalues.len());
600                let mut error = 0.0;
601                for i in 0..min_len {
602                    error += (true_eigenvalues[i] - approx_eigenvalues[i]).abs();
603                }
604                Ok(error / min_len as f64)
605            }
606        }
607    }
608
609    fn compute_approximate_quality_metric(
610        &self,
611        metric: &QualityMetric,
612        original_data: &Array2<f64>,
613        features: &Array2<f64>,
614    ) -> Result<f64> {
615        match metric {
616            QualityMetric::KernelAlignment => {
617                // Compute alignment with RBF kernel on original data
618                let rbf_kernel = self.compute_rbf_kernel(original_data, 1.0)?;
619                let approx_kernel = self.compute_approximate_kernel(features)?;
620                Ok(self.compute_kernel_alignment(&rbf_kernel, &approx_kernel)?)
621            }
622            QualityMetric::EffectiveRank => {
623                let approx_kernel = self.compute_approximate_kernel(features)?;
624                let eigenvalues = self.compute_approximate_eigenvalues(&approx_kernel)?;
625                let sum_eigenvalues = eigenvalues.sum();
626                let sum_squared_eigenvalues = eigenvalues.mapv(|x| x * x).sum();
627                Ok(sum_eigenvalues * sum_eigenvalues / sum_squared_eigenvalues.max(1e-8))
628            }
629            _ => {
630                // For other metrics without ground truth, return approximation dimension as proxy
631                Ok(features.ncols() as f64)
632            }
633        }
634    }
635
636    fn compute_kernel_alignment(&self, k1: &Array2<f64>, k2: &Array2<f64>) -> Result<f64> {
637        let n = k1.nrows();
638
639        // Center the kernels
640        let row_means_k1 = k1.mean_axis(Axis(1)).unwrap();
641        let row_means_k2 = k2.mean_axis(Axis(1)).unwrap();
642        let total_mean_k1 = row_means_k1.mean().unwrap();
643        let total_mean_k2 = row_means_k2.mean().unwrap();
644
645        let mut k1_centered = k1.clone();
646        let mut k2_centered = k2.clone();
647
648        for i in 0..n {
649            for j in 0..n {
650                k1_centered[[i, j]] -= row_means_k1[i] + row_means_k1[j] - total_mean_k1;
651                k2_centered[[i, j]] -= row_means_k2[i] + row_means_k2[j] - total_mean_k2;
652            }
653        }
654
655        // Compute centered kernel alignment
656        let numerator = (&k1_centered * &k2_centered).sum();
657        let denom1 = (&k1_centered * &k1_centered).sum().sqrt();
658        let denom2 = (&k2_centered * &k2_centered).sum().sqrt();
659
660        Ok(numerator / (denom1 * denom2).max(1e-8))
661    }
662
663    fn compute_rbf_kernel(&self, data: &Array2<f64>, gamma: f64) -> Result<Array2<f64>> {
664        let n_samples = data.nrows();
665        let mut kernel = Array2::zeros((n_samples, n_samples));
666
667        for i in 0..n_samples {
668            for j in i..n_samples {
669                let diff = &data.row(i) - &data.row(j);
670                let dist_sq = diff.mapv(|x| x * x).sum();
671                let similarity = (-gamma * dist_sq).exp();
672                kernel[[i, j]] = similarity;
673                kernel[[j, i]] = similarity;
674            }
675        }
676
677        Ok(kernel)
678    }
679
680    fn compute_approximate_eigenvalues(&self, matrix: &Array2<f64>) -> Result<Array1<f64>> {
681        // Simplified eigenvalue computation (diagonal elements as approximation)
682        let n = matrix.nrows();
683        let mut eigenvalues = Array1::zeros(n);
684
685        for i in 0..n {
686            eigenvalues[i] = matrix[[i, i]];
687        }
688
689        // Sort in descending order
690        let mut eigenvalues_vec: Vec<f64> = eigenvalues.to_vec();
691        eigenvalues_vec.sort_by(|a, b| b.partial_cmp(a).unwrap());
692
693        Ok(Array1::from_vec(eigenvalues_vec))
694    }
695
696    /// Generate summary statistics from benchmark results
697    pub fn summarize_results(&self) -> Vec<BenchmarkSummary> {
698        let mut summaries = Vec::new();
699        let mut grouped_results: HashMap<(String, String, usize), Vec<&BenchmarkResult>> =
700            HashMap::new();
701
702        // Group results by method, dataset, and approximation dimension
703        for result in &self.results {
704            let key = (
705                result.method_name.clone(),
706                result.dataset_name.clone(),
707                result.approximation_dimension,
708            );
709            grouped_results.entry(key).or_default().push(result);
710        }
711
712        // Compute summary statistics for each group
713        for ((method_name, dataset_name, approximation_dimension), results) in grouped_results {
714            let total_results = results.len();
715            let successful_results: Vec<_> = results.into_iter().filter(|r| r.success).collect();
716
717            if successful_results.is_empty() {
718                continue;
719            }
720
721            let mut quality_means = HashMap::new();
722            let mut quality_stds = HashMap::new();
723            let mut performance_means = HashMap::new();
724            let mut performance_stds = HashMap::new();
725
726            // Compute quality metric statistics
727            for metric in &self.config.quality_metrics {
728                let values: Vec<f64> = successful_results
729                    .iter()
730                    .filter_map(|r| r.quality_scores.get(metric))
731                    .cloned()
732                    .collect();
733
734                if !values.is_empty() {
735                    let mean = values.iter().sum::<f64>() / values.len() as f64;
736                    let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
737                        / values.len() as f64;
738                    let std = variance.sqrt();
739
740                    quality_means.insert(metric.clone(), mean);
741                    quality_stds.insert(metric.clone(), std);
742                }
743            }
744
745            // Compute performance metric statistics
746            for metric in &self.config.performance_metrics {
747                let values: Vec<Duration> = successful_results
748                    .iter()
749                    .filter_map(|r| r.performance_scores.get(metric))
750                    .cloned()
751                    .collect();
752
753                if !values.is_empty() {
754                    let mean_secs: f64 =
755                        values.iter().map(|d| d.as_secs_f64()).sum::<f64>() / values.len() as f64;
756                    let variance: f64 = values
757                        .iter()
758                        .map(|d| (d.as_secs_f64() - mean_secs).powi(2))
759                        .sum::<f64>()
760                        / values.len() as f64;
761                    let std_secs = variance.sqrt();
762
763                    performance_means.insert(metric.clone(), Duration::from_secs_f64(mean_secs));
764                    performance_stds.insert(metric.clone(), Duration::from_secs_f64(std_secs));
765                }
766            }
767
768            // Compute memory usage statistics
769            let memory_values: Vec<f64> = successful_results
770                .iter()
771                .map(|r| r.memory_usage_mb)
772                .collect();
773
774            let memory_mean = memory_values.iter().sum::<f64>() / memory_values.len() as f64;
775            let memory_variance = memory_values
776                .iter()
777                .map(|x| (x - memory_mean).powi(2))
778                .sum::<f64>()
779                / memory_values.len() as f64;
780            let memory_std = memory_variance.sqrt();
781
782            let success_rate = successful_results.len() as f64 / total_results as f64;
783
784            summaries.push(BenchmarkSummary {
785                method_name,
786                dataset_name,
787                approximation_dimension,
788                quality_means,
789                quality_stds,
790                performance_means,
791                performance_stds,
792                success_rate,
793                memory_usage_mean_mb: memory_mean,
794                memory_usage_std_mb: memory_std,
795            });
796        }
797
798        summaries
799    }
800
801    /// Get all benchmark results
802    pub fn get_results(&self) -> &[BenchmarkResult] {
803        &self.results
804    }
805
806    /// Clear all results
807    pub fn clear_results(&mut self) {
808        self.results.clear();
809    }
810
811    /// Export results to CSV format
812    pub fn export_results_csv(&self) -> String {
813        let mut csv = String::new();
814
815        // Header
816        csv.push_str("method,dataset,n_components,repetition,success,");
817        csv.push_str("kernel_alignment,frobenius_error,spectral_error,relative_error,");
818        csv.push_str("fit_time_ms,transform_time_ms,total_time_ms,memory_mb\n");
819
820        // Data rows
821        for result in &self.results {
822            csv.push_str(&format!(
823                "{},{},{},{},{},",
824                result.method_name,
825                result.dataset_name,
826                result.approximation_dimension,
827                result.repetition,
828                result.success
829            ));
830
831            // Quality metrics
832            let kernel_alignment = result
833                .quality_scores
834                .get(&QualityMetric::KernelAlignment)
835                .map(|x| x.to_string())
836                .unwrap_or_else(|| "".to_string());
837            let frobenius_error = result
838                .quality_scores
839                .get(&QualityMetric::FrobeniusError)
840                .map(|x| x.to_string())
841                .unwrap_or_else(|| "".to_string());
842            let spectral_error = result
843                .quality_scores
844                .get(&QualityMetric::SpectralError)
845                .map(|x| x.to_string())
846                .unwrap_or_else(|| "".to_string());
847            let relative_error = result
848                .quality_scores
849                .get(&QualityMetric::RelativeError)
850                .map(|x| x.to_string())
851                .unwrap_or_else(|| "".to_string());
852
853            csv.push_str(&format!(
854                "{},{},{},{},",
855                kernel_alignment, frobenius_error, spectral_error, relative_error
856            ));
857
858            // Performance metrics
859            let fit_time = result
860                .performance_scores
861                .get(&PerformanceMetric::FitTime)
862                .map(|d| d.as_millis().to_string())
863                .unwrap_or_else(|| "".to_string());
864            let transform_time = result
865                .performance_scores
866                .get(&PerformanceMetric::TransformTime)
867                .map(|d| d.as_millis().to_string())
868                .unwrap_or_else(|| "".to_string());
869            let total_time = result
870                .performance_scores
871                .get(&PerformanceMetric::TotalTime)
872                .map(|d| d.as_millis().to_string())
873                .unwrap_or_else(|| "".to_string());
874
875            csv.push_str(&format!(
876                "{},{},{},{}\n",
877                fit_time, transform_time, total_time, result.memory_usage_mb
878            ));
879        }
880
881        csv
882    }
883}
884
885#[allow(non_snake_case)]
886#[cfg(test)]
887mod tests {
888    use super::*;
889    // Mock implementation for testing
890    struct MockRBFMethod {
891        n_components: usize,
892    }
893
894    impl BenchmarkableKernelMethod for MockRBFMethod {
895        fn method_name(&self) -> String {
896            "MockRBF".to_string()
897        }
898
899        fn benchmark_fit(
900            &self,
901            _data: &Array2<f64>,
902            _target: Option<&Array1<f64>>,
903        ) -> Result<(Box<dyn BenchmarkableTransformer>, Duration)> {
904            let start = Instant::now();
905
906            // Simple mock implementation
907            let duration = start.elapsed();
908            Ok((
909                Box::new(MockFittedRBF {
910                    n_components: self.n_components,
911                }),
912                duration,
913            ))
914        }
915
916        fn clone_method(&self) -> Box<dyn BenchmarkableKernelMethod> {
917            Box::new(MockRBFMethod {
918                n_components: self.n_components,
919            })
920        }
921    }
922
923    struct MockFittedRBF {
924        n_components: usize,
925    }
926
927    impl BenchmarkableTransformer for MockFittedRBF {
928        fn benchmark_transform(&self, data: &Array2<f64>) -> Result<(Array2<f64>, Duration)> {
929            let start = Instant::now();
930            // Simple mock transformation
931            let result = Array2::zeros((data.nrows(), self.n_components));
932            let duration = start.elapsed();
933            Ok((result, duration))
934        }
935
936        fn approximation_dimension(&self) -> usize {
937            self.n_components
938        }
939
940        fn memory_usage_mb(&self) -> f64 {
941            // Rough estimate: n_components * n_features * 8 bytes
942            (self.n_components * 10 * 8) as f64 / (1024.0 * 1024.0)
943        }
944    }
945
946    #[test]
947    fn test_benchmark_dataset_creation() {
948        let dataset = BenchmarkDataset::gaussian(100, 10, 0.1, 42);
949        assert_eq!(dataset.data.shape(), &[100, 10]);
950        assert!(dataset.target.is_some());
951        assert_eq!(dataset.target.as_ref().unwrap().len(), 100);
952    }
953
954    #[test]
955    fn test_benchmark_execution() {
956        let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
957            repetitions: 2,
958            approximation_dimensions: vec![10, 20],
959            ..Default::default()
960        });
961
962        let dataset = BenchmarkDataset::gaussian(50, 5, 0.1, 42);
963        benchmark.add_dataset(dataset);
964
965        let method = MockRBFMethod { n_components: 10 };
966        let results = benchmark.benchmark_method(&method).unwrap();
967
968        assert_eq!(results.len(), 4); // 1 dataset * 2 dimensions * 2 repetitions
969        assert!(results.iter().all(|r| r.success));
970    }
971
972    #[test]
973    fn test_benchmark_summary() {
974        let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
975            repetitions: 3,
976            approximation_dimensions: vec![10],
977            ..Default::default()
978        });
979
980        let dataset = BenchmarkDataset::gaussian(50, 5, 0.1, 42);
981        benchmark.add_dataset(dataset);
982
983        let method = MockRBFMethod { n_components: 10 };
984        benchmark.benchmark_method(&method).unwrap();
985
986        let summaries = benchmark.summarize_results();
987        assert_eq!(summaries.len(), 1);
988        assert_eq!(summaries[0].success_rate, 1.0);
989    }
990
991    #[test]
992    fn test_csv_export() {
993        let mut benchmark = KernelApproximationBenchmark::new(BenchmarkConfig {
994            repetitions: 1,
995            approximation_dimensions: vec![5],
996            ..Default::default()
997        });
998
999        let dataset = BenchmarkDataset::gaussian(20, 3, 0.1, 42);
1000        benchmark.add_dataset(dataset);
1001
1002        let method = MockRBFMethod { n_components: 5 };
1003        benchmark.benchmark_method(&method).unwrap();
1004
1005        let csv = benchmark.export_results_csv();
1006        assert!(csv.contains("method,dataset"));
1007        assert!(csv.contains("MockRBF"));
1008        assert!(csv.contains("Gaussian"));
1009    }
1010}
sklears_kernel_approximation/benchmarking.rs

sklears_kernel_approximation/
benchmarking.rs