sklears_kernel_approximation/
advanced_testing.rs

1//! Advanced testing and validation for kernel approximations
2//!
3//! This module provides comprehensive testing frameworks for convergence analysis,
4//! approximation error bounds testing, and quality assessment of kernel approximation methods.
5
6use scirs2_core::ndarray::{Array1, Array2, Axis};
7use scirs2_core::random::essentials::{Normal as RandNormal, Uniform as RandUniform};
8use scirs2_core::random::rngs::StdRng as RealStdRng;
9use scirs2_core::random::Distribution;
10use scirs2_core::random::{thread_rng, Rng, SeedableRng};
11use serde::{Deserialize, Serialize};
12
13use sklears_core::error::{Result, SklearsError};
14use std::collections::HashMap;
15use std::f64::consts::PI;
16
17/// Convergence rate analysis for kernel approximation methods
18#[derive(Debug, Clone, Serialize, Deserialize)]
19/// ConvergenceAnalyzer
20pub struct ConvergenceAnalyzer {
21    /// max_components
22    pub max_components: usize,
23    /// component_steps
24    pub component_steps: Vec<usize>,
25    /// n_trials
26    pub n_trials: usize,
27    /// convergence_tolerance
28    pub convergence_tolerance: f64,
29    /// reference_method
30    pub reference_method: ReferenceMethod,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34/// ReferenceMethod
35pub enum ReferenceMethod {
36    /// ExactKernel
37    ExactKernel,
38    /// HighPrecisionApproximation
39    HighPrecisionApproximation { n_components: usize },
40    /// MonteCarloEstimate
41    MonteCarloEstimate { n_samples: usize },
42}
43
44impl ConvergenceAnalyzer {
45    pub fn new(max_components: usize) -> Self {
46        let component_steps = (1..=10)
47            .map(|i| (max_components * i) / 10)
48            .filter(|&x| x > 0)
49            .collect();
50
51        Self {
52            max_components,
53            component_steps,
54            n_trials: 10,
55            convergence_tolerance: 1e-6,
56            reference_method: ReferenceMethod::ExactKernel,
57        }
58    }
59
60    pub fn component_steps(mut self, steps: Vec<usize>) -> Self {
61        self.component_steps = steps;
62        self
63    }
64
65    pub fn n_trials(mut self, n_trials: usize) -> Self {
66        self.n_trials = n_trials;
67        self
68    }
69
70    pub fn convergence_tolerance(mut self, tolerance: f64) -> Self {
71        self.convergence_tolerance = tolerance;
72        self
73    }
74
75    pub fn reference_method(mut self, method: ReferenceMethod) -> Self {
76        self.reference_method = method;
77        self
78    }
79
80    pub fn analyze_rbf_convergence(
81        &self,
82        x: &Array2<f64>,
83        gamma: f64,
84    ) -> Result<ConvergenceResult> {
85        let mut convergence_rates = Vec::new();
86        let mut approximation_errors = Vec::new();
87
88        // Compute reference kernel matrix
89        let reference_kernel = self.compute_reference_kernel(x, x, gamma)?;
90
91        for &n_components in &self.component_steps {
92            let mut trial_errors = Vec::new();
93
94            for _ in 0..self.n_trials {
95                // Generate RBF approximation
96                let approximated_kernel =
97                    self.compute_rbf_approximation(x, x, gamma, n_components)?;
98
99                // Compute approximation error
100                let error =
101                    self.compute_approximation_error(&reference_kernel, &approximated_kernel)?;
102                trial_errors.push(error);
103            }
104
105            let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
106            let std_error = {
107                let variance = trial_errors
108                    .iter()
109                    .map(|&e| (e - mean_error).powi(2))
110                    .sum::<f64>()
111                    / trial_errors.len() as f64;
112                variance.sqrt()
113            };
114
115            convergence_rates.push(n_components);
116            approximation_errors.push(ApproximationError {
117                mean: mean_error,
118                std: std_error,
119                min: trial_errors.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
120                max: trial_errors
121                    .iter()
122                    .fold(f64::NEG_INFINITY, |a, &b| a.max(b)),
123            });
124        }
125
126        // Compute convergence rate
127        let convergence_rate =
128            self.estimate_convergence_rate(&convergence_rates, &approximation_errors)?;
129        let is_converged = approximation_errors.last().unwrap().mean < self.convergence_tolerance;
130
131        Ok(ConvergenceResult {
132            component_counts: convergence_rates,
133            approximation_errors,
134            convergence_rate,
135            is_converged,
136        })
137    }
138
139    fn compute_reference_kernel(
140        &self,
141        x: &Array2<f64>,
142        y: &Array2<f64>,
143        gamma: f64,
144    ) -> Result<Array2<f64>> {
145        match &self.reference_method {
146            ReferenceMethod::ExactKernel => self.compute_exact_rbf_kernel(x, y, gamma),
147            ReferenceMethod::HighPrecisionApproximation { n_components } => {
148                self.compute_rbf_approximation(x, y, gamma, *n_components)
149            }
150            ReferenceMethod::MonteCarloEstimate { n_samples } => {
151                self.compute_monte_carlo_estimate(x, y, gamma, *n_samples)
152            }
153        }
154    }
155
156    fn compute_exact_rbf_kernel(
157        &self,
158        x: &Array2<f64>,
159        y: &Array2<f64>,
160        gamma: f64,
161    ) -> Result<Array2<f64>> {
162        let n_x = x.nrows();
163        let n_y = y.nrows();
164        let mut kernel_matrix = Array2::zeros((n_x, n_y));
165
166        for i in 0..n_x {
167            for j in 0..n_y {
168                let diff = &x.row(i) - &y.row(j);
169                let squared_norm = diff.dot(&diff);
170                kernel_matrix[[i, j]] = (-gamma * squared_norm).exp();
171            }
172        }
173
174        Ok(kernel_matrix)
175    }
176
177    fn compute_rbf_approximation(
178        &self,
179        x: &Array2<f64>,
180        y: &Array2<f64>,
181        gamma: f64,
182        n_components: usize,
183    ) -> Result<Array2<f64>> {
184        let mut rng = RealStdRng::from_seed(thread_rng().gen());
185        let normal = RandNormal::new(0.0, (2.0 * gamma).sqrt()).unwrap();
186        let uniform = RandUniform::new(0.0, 2.0 * PI).unwrap();
187
188        let input_dim = x.ncols();
189
190        // Generate random weights and biases
191        let mut weights = Array2::zeros((n_components, input_dim));
192        let mut biases = Array1::zeros(n_components);
193
194        for i in 0..n_components {
195            for j in 0..input_dim {
196                weights[[i, j]] = rng.sample(normal);
197            }
198            biases[i] = rng.sample(uniform);
199        }
200
201        // Compute features for x and y
202        let features_x = self.compute_rff_features(x, &weights, &biases, n_components)?;
203        let features_y = self.compute_rff_features(y, &weights, &biases, n_components)?;
204
205        // Approximate kernel matrix
206        let n_x = x.nrows();
207        let n_y = y.nrows();
208        let mut kernel_matrix = Array2::zeros((n_x, n_y));
209
210        for i in 0..n_x {
211            for j in 0..n_y {
212                kernel_matrix[[i, j]] = features_x.row(i).dot(&features_y.row(j));
213            }
214        }
215
216        Ok(kernel_matrix)
217    }
218
219    fn compute_rff_features(
220        &self,
221        x: &Array2<f64>,
222        weights: &Array2<f64>,
223        biases: &Array1<f64>,
224        n_components: usize,
225    ) -> Result<Array2<f64>> {
226        let n_samples = x.nrows();
227        let mut features = Array2::zeros((n_samples, n_components));
228        let scaling = (2.0 / n_components as f64).sqrt();
229
230        for i in 0..n_samples {
231            for j in 0..n_components {
232                let mut dot_product = 0.0;
233                for k in 0..x.ncols() {
234                    dot_product += x[[i, k]] * weights[[j, k]];
235                }
236                dot_product += biases[j];
237                features[[i, j]] = scaling * dot_product.cos();
238            }
239        }
240
241        Ok(features)
242    }
243
244    fn compute_monte_carlo_estimate(
245        &self,
246        x: &Array2<f64>,
247        y: &Array2<f64>,
248        gamma: f64,
249        n_samples: usize,
250    ) -> Result<Array2<f64>> {
251        // Monte Carlo estimation using multiple RBF approximations
252        let mut kernel_sum = Array2::zeros((x.nrows(), y.nrows()));
253
254        for _ in 0..n_samples {
255            let approx = self.compute_rbf_approximation(x, y, gamma, 1000)?;
256            kernel_sum = kernel_sum + approx;
257        }
258
259        Ok(kernel_sum / n_samples as f64)
260    }
261
262    fn compute_approximation_error(
263        &self,
264        reference: &Array2<f64>,
265        approximation: &Array2<f64>,
266    ) -> Result<f64> {
267        if reference.shape() != approximation.shape() {
268            return Err(SklearsError::InvalidInput(
269                "Matrix dimensions don't match".to_string(),
270            ));
271        }
272
273        // Frobenius norm of the difference
274        let diff = reference - approximation;
275        let frobenius_norm = diff.mapv(|x| x * x).sum().sqrt();
276
277        // Normalized by reference norm
278        let reference_norm = reference.mapv(|x| x * x).sum().sqrt();
279
280        if reference_norm > 0.0 {
281            Ok(frobenius_norm / reference_norm)
282        } else {
283            Ok(frobenius_norm)
284        }
285    }
286
287    fn estimate_convergence_rate(
288        &self,
289        components: &[usize],
290        errors: &[ApproximationError],
291    ) -> Result<f64> {
292        if components.len() < 2 || errors.len() < 2 {
293            return Ok(0.0);
294        }
295
296        // Fit power law: error ~ n^(-rate)
297        let mut log_components = Vec::new();
298        let mut log_errors = Vec::new();
299
300        for (i, error) in errors.iter().enumerate() {
301            if error.mean > 0.0 {
302                log_components.push((components[i] as f64).ln());
303                log_errors.push(error.mean.ln());
304            }
305        }
306
307        if log_components.len() < 2 {
308            return Ok(0.0);
309        }
310
311        // Simple linear regression in log space
312        let n = log_components.len() as f64;
313        let sum_x = log_components.iter().sum::<f64>();
314        let sum_y = log_errors.iter().sum::<f64>();
315        let sum_xy = log_components
316            .iter()
317            .zip(log_errors.iter())
318            .map(|(x, y)| x * y)
319            .sum::<f64>();
320        let sum_x2 = log_components.iter().map(|x| x * x).sum::<f64>();
321
322        let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
323
324        Ok(-slope) // Negative because we expect error to decrease
325    }
326}
327
328#[derive(Debug, Clone, Serialize, Deserialize)]
329/// ApproximationError
330pub struct ApproximationError {
331    pub mean: f64,
332    pub std: f64,
333    pub min: f64,
334    pub max: f64,
335}
336
337#[derive(Debug, Clone, Serialize, Deserialize)]
338/// ConvergenceResult
339pub struct ConvergenceResult {
340    /// component_counts
341    pub component_counts: Vec<usize>,
342    /// approximation_errors
343    pub approximation_errors: Vec<ApproximationError>,
344    /// convergence_rate
345    pub convergence_rate: f64,
346    /// is_converged
347    pub is_converged: bool,
348}
349
350/// Error bounds testing framework
351#[derive(Debug, Clone, Serialize, Deserialize)]
352/// ErrorBoundsValidator
353pub struct ErrorBoundsValidator {
354    /// confidence_level
355    pub confidence_level: f64,
356    /// n_bootstrap_samples
357    pub n_bootstrap_samples: usize,
358    /// bound_types
359    pub bound_types: Vec<BoundType>,
360}
361
362#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
363/// BoundType
364pub enum BoundType {
365    /// Hoeffding
366    Hoeffding,
367    /// McDiarmid
368    McDiarmid,
369    /// Azuma
370    Azuma,
371    /// Bernstein
372    Bernstein,
373    /// Empirical
374    Empirical,
375}
376
377impl ErrorBoundsValidator {
378    pub fn new() -> Self {
379        Self {
380            confidence_level: 0.95,
381            n_bootstrap_samples: 1000,
382            bound_types: vec![
383                BoundType::Hoeffding,
384                BoundType::McDiarmid,
385                BoundType::Bernstein,
386                BoundType::Empirical,
387            ],
388        }
389    }
390
391    pub fn confidence_level(mut self, level: f64) -> Self {
392        self.confidence_level = level;
393        self
394    }
395
396    pub fn n_bootstrap_samples(mut self, n_samples: usize) -> Self {
397        self.n_bootstrap_samples = n_samples;
398        self
399    }
400
401    pub fn bound_types(mut self, bounds: Vec<BoundType>) -> Self {
402        self.bound_types = bounds;
403        self
404    }
405
406    pub fn validate_rff_bounds(
407        &self,
408        x: &Array2<f64>,
409        gamma: f64,
410        n_components: usize,
411    ) -> Result<ErrorBoundsResult> {
412        let mut bound_results = HashMap::new();
413
414        // Compute exact kernel for comparison
415        let exact_kernel = self.compute_exact_kernel(x, gamma)?;
416
417        // Generate multiple approximations for empirical distribution
418        let mut approximation_errors = Vec::new();
419
420        for _ in 0..self.n_bootstrap_samples {
421            let approx_kernel = self.compute_rff_approximation(x, gamma, n_components)?;
422            let error = self.compute_relative_error(&exact_kernel, &approx_kernel)?;
423            approximation_errors.push(error);
424        }
425
426        approximation_errors.sort_by(|a, b| a.partial_cmp(b).unwrap());
427
428        let empirical_mean =
429            approximation_errors.iter().sum::<f64>() / approximation_errors.len() as f64;
430        let empirical_variance = approximation_errors
431            .iter()
432            .map(|&e| (e - empirical_mean).powi(2))
433            .sum::<f64>()
434            / approximation_errors.len() as f64;
435
436        // Compute theoretical bounds
437        for bound_type in &self.bound_types {
438            let bound = self.compute_theoretical_bound(
439                bound_type,
440                n_components,
441                x.ncols(),
442                self.confidence_level,
443            )?;
444            let empirical_quantile_idx =
445                ((1.0 - self.confidence_level) * approximation_errors.len() as f64) as usize;
446            let empirical_bound =
447                approximation_errors[empirical_quantile_idx.min(approximation_errors.len() - 1)];
448
449            bound_results.insert(
450                bound_type.clone(),
451                BoundValidation {
452                    theoretical_bound: bound,
453                    empirical_bound,
454                    is_valid: bound >= empirical_bound,
455                    tightness_ratio: if bound > 0.0 {
456                        empirical_bound / bound
457                    } else {
458                        0.0
459                    },
460                },
461            );
462        }
463
464        Ok(ErrorBoundsResult {
465            empirical_mean,
466            empirical_variance,
467            empirical_quantiles: self.compute_quantiles(&approximation_errors),
468            bound_validations: bound_results,
469        })
470    }
471
472    fn compute_exact_kernel(&self, x: &Array2<f64>, gamma: f64) -> Result<Array2<f64>> {
473        let n = x.nrows();
474        let mut kernel = Array2::zeros((n, n));
475
476        for i in 0..n {
477            for j in 0..n {
478                let diff = &x.row(i) - &x.row(j);
479                let squared_norm = diff.dot(&diff);
480                kernel[[i, j]] = (-gamma * squared_norm).exp();
481            }
482        }
483
484        Ok(kernel)
485    }
486
487    fn compute_rff_approximation(
488        &self,
489        x: &Array2<f64>,
490        gamma: f64,
491        n_components: usize,
492    ) -> Result<Array2<f64>> {
493        let mut rng = RealStdRng::from_seed(thread_rng().gen());
494        let normal = RandNormal::new(0.0, (2.0 * gamma).sqrt()).unwrap();
495        let uniform = RandUniform::new(0.0, 2.0 * PI).unwrap();
496
497        let input_dim = x.ncols();
498        let n_samples = x.nrows();
499
500        // Generate random features
501        let mut weights = Array2::zeros((n_components, input_dim));
502        let mut biases = Array1::zeros(n_components);
503
504        for i in 0..n_components {
505            for j in 0..input_dim {
506                weights[[i, j]] = rng.sample(normal);
507            }
508            biases[i] = rng.sample(uniform);
509        }
510
511        // Compute features
512        let mut features = Array2::zeros((n_samples, n_components));
513        let scaling = (2.0 / n_components as f64).sqrt();
514
515        for i in 0..n_samples {
516            for j in 0..n_components {
517                let mut dot_product = 0.0;
518                for k in 0..input_dim {
519                    dot_product += x[[i, k]] * weights[[j, k]];
520                }
521                dot_product += biases[j];
522                features[[i, j]] = scaling * dot_product.cos();
523            }
524        }
525
526        // Compute approximate kernel
527        let mut kernel = Array2::zeros((n_samples, n_samples));
528        for i in 0..n_samples {
529            for j in 0..n_samples {
530                kernel[[i, j]] = features.row(i).dot(&features.row(j));
531            }
532        }
533
534        Ok(kernel)
535    }
536
537    fn compute_relative_error(&self, exact: &Array2<f64>, approx: &Array2<f64>) -> Result<f64> {
538        let diff = exact - approx;
539        let frobenius_error = diff.mapv(|x| x * x).sum().sqrt();
540        let exact_norm = exact.mapv(|x| x * x).sum().sqrt();
541
542        if exact_norm > 0.0 {
543            Ok(frobenius_error / exact_norm)
544        } else {
545            Ok(frobenius_error)
546        }
547    }
548
549    fn compute_theoretical_bound(
550        &self,
551        bound_type: &BoundType,
552        n_components: usize,
553        input_dim: usize,
554        confidence: f64,
555    ) -> Result<f64> {
556        let delta = 1.0 - confidence;
557
558        match bound_type {
559            BoundType::Hoeffding => {
560                // Hoeffding bound for RFF approximation
561                let c = 2.0; // Bounded by assumption
562                Ok(c * (2.0 * delta.ln().abs() / n_components as f64).sqrt())
563            }
564            BoundType::McDiarmid => {
565                // McDiarmid bound with bounded differences
566                let c = 4.0 / (n_components as f64).sqrt();
567                Ok(c * (2.0 * delta.ln().abs() / n_components as f64).sqrt())
568            }
569            BoundType::Bernstein => {
570                // Bernstein bound (simplified)
571                let variance_bound = 1.0 / n_components as f64;
572                let range_bound = 2.0 / (n_components as f64).sqrt();
573                let term1 = (2.0 * variance_bound * delta.ln().abs() / n_components as f64).sqrt();
574                let term2 = 2.0 * range_bound * delta.ln().abs() / (3.0 * n_components as f64);
575                Ok(term1 + term2)
576            }
577            BoundType::Azuma => {
578                // Azuma-Hoeffding for martingale differences
579                let c = 1.0 / (n_components as f64).sqrt();
580                Ok(c * (2.0 * delta.ln().abs()).sqrt())
581            }
582            BoundType::Empirical => {
583                // Empirical bound based on Rademacher complexity
584                let rademacher_complexity = (input_dim as f64 / n_components as f64).sqrt();
585                Ok(2.0 * rademacher_complexity
586                    + (delta.ln().abs() / (2.0 * n_components as f64)).sqrt())
587            }
588        }
589    }
590
591    fn compute_quantiles(&self, data: &[f64]) -> HashMap<String, f64> {
592        let mut quantiles = HashMap::new();
593        let n = data.len();
594
595        for &p in &[0.05, 0.25, 0.5, 0.75, 0.95, 0.99] {
596            let idx = ((p * n as f64) as usize).min(n - 1);
597            quantiles.insert(format!("q{}", (p * 100.0) as u8), data[idx]);
598        }
599
600        quantiles
601    }
602}
603
604#[derive(Debug, Clone, Serialize, Deserialize)]
605/// BoundValidation
606pub struct BoundValidation {
607    /// theoretical_bound
608    pub theoretical_bound: f64,
609    /// empirical_bound
610    pub empirical_bound: f64,
611    /// is_valid
612    pub is_valid: bool,
613    /// tightness_ratio
614    pub tightness_ratio: f64,
615}
616
617#[derive(Debug, Clone, Serialize, Deserialize)]
618/// ErrorBoundsResult
619pub struct ErrorBoundsResult {
620    /// empirical_mean
621    pub empirical_mean: f64,
622    /// empirical_variance
623    pub empirical_variance: f64,
624    /// empirical_quantiles
625    pub empirical_quantiles: HashMap<String, f64>,
626    /// bound_validations
627    pub bound_validations: HashMap<BoundType, BoundValidation>,
628}
629
630/// Quality assessment framework for kernel approximations
631#[derive(Debug, Clone, Serialize, Deserialize)]
632/// QualityAssessment
633pub struct QualityAssessment {
634    /// metrics
635    pub metrics: Vec<QualityMetric>,
636    /// baseline_methods
637    pub baseline_methods: Vec<BaselineMethod>,
638}
639
640#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
641/// QualityMetric
642pub enum QualityMetric {
643    /// KernelAlignment
644    KernelAlignment,
645    /// SpectralError
646    SpectralError,
647    /// FrobeniusError
648    FrobeniusError,
649    /// NuclearNormError
650    NuclearNormError,
651    /// OperatorNormError
652    OperatorNormError,
653    /// RelativeError
654    RelativeError,
655    /// EffectiveRank
656    EffectiveRank,
657}
658
659#[derive(Debug, Clone, Serialize, Deserialize)]
660/// BaselineMethod
661pub enum BaselineMethod {
662    /// RandomSampling
663    RandomSampling,
664    /// UniformSampling
665    UniformSampling,
666    /// ExactMethod
667    ExactMethod,
668    /// PreviousBest
669    PreviousBest { method_name: String },
670}
671
672impl QualityAssessment {
673    pub fn new() -> Self {
674        Self {
675            metrics: vec![
676                QualityMetric::KernelAlignment,
677                QualityMetric::SpectralError,
678                QualityMetric::FrobeniusError,
679                QualityMetric::RelativeError,
680            ],
681            baseline_methods: vec![BaselineMethod::RandomSampling, BaselineMethod::ExactMethod],
682        }
683    }
684
685    pub fn assess_approximation(
686        &self,
687        exact_kernel: &Array2<f64>,
688        approx_kernel: &Array2<f64>,
689    ) -> Result<QualityResult> {
690        let mut metric_scores = HashMap::new();
691
692        for metric in &self.metrics {
693            let score = self.compute_metric(metric, exact_kernel, approx_kernel)?;
694            metric_scores.insert(metric.clone(), score);
695        }
696
697        let overall_score = self.compute_overall_score(&metric_scores);
698        Ok(QualityResult {
699            metric_scores: metric_scores.clone(),
700            overall_score,
701        })
702    }
703
704    fn compute_metric(
705        &self,
706        metric: &QualityMetric,
707        exact: &Array2<f64>,
708        approx: &Array2<f64>,
709    ) -> Result<f64> {
710        match metric {
711            QualityMetric::KernelAlignment => {
712                let exact_centered = self.center_kernel(exact)?;
713                let approx_centered = self.center_kernel(approx)?;
714
715                let numerator = self.frobenius_inner_product(&exact_centered, &approx_centered)?;
716                let exact_norm = self.frobenius_norm(&exact_centered)?;
717                let approx_norm = self.frobenius_norm(&approx_centered)?;
718
719                if exact_norm > 0.0 && approx_norm > 0.0 {
720                    Ok(numerator / (exact_norm * approx_norm))
721                } else {
722                    Ok(0.0)
723                }
724            }
725            QualityMetric::FrobeniusError => {
726                let diff = exact - approx;
727                Ok(self.frobenius_norm(&diff)?)
728            }
729            QualityMetric::RelativeError => {
730                let diff = exact - approx;
731                let error_norm = self.frobenius_norm(&diff)?;
732                let exact_norm = self.frobenius_norm(exact)?;
733
734                if exact_norm > 0.0 {
735                    Ok(error_norm / exact_norm)
736                } else {
737                    Ok(error_norm)
738                }
739            }
740            QualityMetric::SpectralError => {
741                // Simplified spectral error (largest singular value of difference)
742                let diff = exact - approx;
743                self.largest_singular_value(&diff)
744            }
745            QualityMetric::NuclearNormError => {
746                let diff = exact - approx;
747                self.nuclear_norm(&diff)
748            }
749            QualityMetric::OperatorNormError => {
750                let diff = exact - approx;
751                self.operator_norm(&diff)
752            }
753            QualityMetric::EffectiveRank => self.effective_rank(approx),
754        }
755    }
756
757    fn center_kernel(&self, kernel: &Array2<f64>) -> Result<Array2<f64>> {
758        let n = kernel.nrows();
759        let row_means = kernel.mean_axis(Axis(1)).unwrap();
760        let col_means = kernel.mean_axis(Axis(0)).unwrap();
761        let overall_mean = kernel.mean().unwrap();
762
763        let mut centered = kernel.clone();
764
765        for i in 0..n {
766            for j in 0..n {
767                centered[[i, j]] = kernel[[i, j]] - row_means[i] - col_means[j] + overall_mean;
768            }
769        }
770
771        Ok(centered)
772    }
773
774    fn frobenius_inner_product(&self, a: &Array2<f64>, b: &Array2<f64>) -> Result<f64> {
775        if a.shape() != b.shape() {
776            return Err(SklearsError::InvalidInput(
777                "Matrix dimensions don't match".to_string(),
778            ));
779        }
780
781        Ok(a.iter().zip(b.iter()).map(|(x, y)| x * y).sum())
782    }
783
784    fn frobenius_norm(&self, matrix: &Array2<f64>) -> Result<f64> {
785        Ok(matrix.mapv(|x| x * x).sum().sqrt())
786    }
787
788    fn largest_singular_value(&self, matrix: &Array2<f64>) -> Result<f64> {
789        // Simplified using power iteration
790        let n = matrix.nrows();
791        let m = matrix.ncols();
792
793        if n == 0 || m == 0 {
794            return Ok(0.0);
795        }
796
797        let mut v = Array1::from_vec(vec![1.0; m]);
798        v /= (v.dot(&v) as f64).sqrt();
799
800        for _ in 0..50 {
801            let mut av: Array1<f64> = Array1::zeros(n);
802            for i in 0..n {
803                for j in 0..m {
804                    av[i] += matrix[[i, j]] * v[j];
805                }
806            }
807
808            let mut ata_v: Array1<f64> = Array1::zeros(m);
809            for j in 0..m {
810                for i in 0..n {
811                    ata_v[j] += matrix[[i, j]] * av[i];
812                }
813            }
814
815            let norm = ata_v.dot(&ata_v).sqrt();
816            if norm > 1e-12 {
817                v = ata_v / norm;
818            } else {
819                break;
820            }
821        }
822
823        let mut av: Array1<f64> = Array1::zeros(n);
824        for i in 0..n {
825            for j in 0..m {
826                av[i] += matrix[[i, j]] * v[j];
827            }
828        }
829
830        Ok(av.dot(&av).sqrt())
831    }
832
833    fn nuclear_norm(&self, matrix: &Array2<f64>) -> Result<f64> {
834        // Simplified nuclear norm estimation
835        // In practice, this would use SVD
836        let trace = (0..matrix.nrows().min(matrix.ncols()))
837            .map(|i| matrix[[i, i]].abs())
838            .sum::<f64>();
839        Ok(trace)
840    }
841
842    fn operator_norm(&self, matrix: &Array2<f64>) -> Result<f64> {
843        // Operator norm is the largest singular value
844        self.largest_singular_value(matrix)
845    }
846
847    fn effective_rank(&self, matrix: &Array2<f64>) -> Result<f64> {
848        // Simplified effective rank using trace and Frobenius norm
849        let trace = (0..matrix.nrows().min(matrix.ncols()))
850            .map(|i| matrix[[i, i]])
851            .sum::<f64>();
852        let frobenius_squared = matrix.mapv(|x| x * x).sum();
853
854        if frobenius_squared > 0.0 {
855            Ok(trace * trace / frobenius_squared)
856        } else {
857            Ok(0.0)
858        }
859    }
860
861    fn compute_overall_score(&self, metric_scores: &HashMap<QualityMetric, f64>) -> f64 {
862        // Simple weighted average (in practice, use domain-specific weights)
863        let weights = [
864            (QualityMetric::KernelAlignment, 0.4),
865            (QualityMetric::RelativeError, 0.3),
866            (QualityMetric::FrobeniusError, 0.2),
867            (QualityMetric::EffectiveRank, 0.1),
868        ];
869
870        let mut weighted_sum = 0.0;
871        let mut total_weight = 0.0;
872
873        for (metric, weight) in &weights {
874            if let Some(&score) = metric_scores.get(metric) {
875                let normalized_score = match metric {
876                    QualityMetric::KernelAlignment => score, // Higher is better
877                    QualityMetric::EffectiveRank => score / matrix_size_as_float(metric_scores),
878                    _ => 1.0 / (1.0 + score), // Lower is better, transform to 0-1 scale
879                };
880                weighted_sum += weight * normalized_score;
881                total_weight += weight;
882            }
883        }
884
885        if total_weight > 0.0 {
886            weighted_sum / total_weight
887        } else {
888            0.0
889        }
890    }
891}
892
893fn matrix_size_as_float(_metric_scores: &HashMap<QualityMetric, f64>) -> f64 {
894    // Placeholder for matrix size normalization
895    100.0
896}
897
898#[derive(Debug, Clone, Serialize, Deserialize)]
899/// QualityResult
900pub struct QualityResult {
901    /// metric_scores
902    pub metric_scores: HashMap<QualityMetric, f64>,
903    /// overall_score
904    pub overall_score: f64,
905}
906
907#[allow(non_snake_case)]
908#[cfg(test)]
909mod tests {
910    use super::*;
911    use scirs2_core::essentials::Normal;
912
913    use scirs2_core::ndarray::{Array, Array2};
914    use scirs2_core::random::thread_rng;
915
916    #[test]
917    fn test_convergence_analyzer() {
918        let x: Array2<f64> = Array::from_shape_fn((20, 5), |_| {
919            let mut rng = thread_rng();
920            rng.sample(&Normal::new(0.0, 1.0).unwrap())
921        });
922        let analyzer = ConvergenceAnalyzer::new(50)
923            .component_steps(vec![10, 20, 30, 40, 50])
924            .n_trials(3);
925
926        let result = analyzer.analyze_rbf_convergence(&x, 1.0).unwrap();
927
928        assert_eq!(result.component_counts.len(), 5);
929        assert!(result.convergence_rate >= 0.0);
930    }
931
932    #[test]
933    fn test_error_bounds_validator() {
934        let x: Array2<f64> = Array::from_shape_fn((15, 4), |_| {
935            let mut rng = thread_rng();
936            rng.sample(&Normal::new(0.0, 1.0).unwrap())
937        });
938        let validator = ErrorBoundsValidator::new()
939            .confidence_level(0.9)
940            .n_bootstrap_samples(50);
941
942        let result = validator.validate_rff_bounds(&x, 1.0, 20).unwrap();
943
944        assert!(result.empirical_mean >= 0.0);
945        assert!(result.empirical_variance >= 0.0);
946        assert!(!result.bound_validations.is_empty());
947    }
948
949    #[test]
950    fn test_quality_assessment() {
951        let exact = Array::eye(10);
952        let mut approx = Array::eye(10);
953        approx[[0, 0]] = 0.9; // Small perturbation
954
955        let assessment = QualityAssessment::new();
956        let result = assessment.assess_approximation(&exact, &approx).unwrap();
957
958        assert!(result.overall_score > 0.0);
959        assert!(result.overall_score <= 1.0);
960        assert!(!result.metric_scores.is_empty());
961    }
962}
sklears_kernel_approximation/advanced_testing.rs

sklears_kernel_approximation/
advanced_testing.rs