sklears_kernel_approximation/
error_bounded.rs

1//! Error-bounded kernel approximation methods
2//!
3//! This module provides kernel approximation methods with explicit error bounds
4//! and adaptive algorithms that guarantee approximation quality.
5
6use crate::{Nystroem, RBFSampler};
7use scirs2_core::ndarray::ndarray_linalg::Norm;
8use scirs2_core::ndarray::Array2;
9use sklears_core::traits::Fit;
10use sklears_core::{error::Result, traits::Transform};
11
12/// Error bound computation methods
13#[derive(Debug, Clone)]
14/// ErrorBoundMethod
15pub enum ErrorBoundMethod {
16    /// Theoretical spectral bound
17    SpectralBound,
18    /// Frobenius norm bound
19    FrobeniusBound,
20    /// Empirical error estimation
21    EmpiricalBound,
22    /// Probabilistic error bound
23    ProbabilisticBound { confidence: f64 },
24    /// Perturbation-based bound
25    PerturbationBound,
26    /// Cross-validation based bound
27    CVBound { n_folds: usize },
28}
29
30/// Error bound result
31#[derive(Debug, Clone)]
32/// ErrorBound
33pub struct ErrorBound {
34    /// Upper bound on approximation error
35    pub upper_bound: f64,
36    /// Lower bound on approximation error
37    pub lower_bound: f64,
38    /// Confidence level (for probabilistic bounds)
39    pub confidence: Option<f64>,
40    /// Method used for bound computation
41    pub method: ErrorBoundMethod,
42}
43
44/// Configuration for error-bounded approximation
45#[derive(Debug, Clone)]
46/// ErrorBoundedConfig
47pub struct ErrorBoundedConfig {
48    /// Maximum allowed approximation error
49    pub max_error: f64,
50    /// Confidence level for probabilistic bounds
51    pub confidence_level: f64,
52    /// Error bound computation method
53    pub bound_method: ErrorBoundMethod,
54    /// Minimum number of components
55    pub min_components: usize,
56    /// Maximum number of components
57    pub max_components: usize,
58    /// Step size for component search
59    pub step_size: usize,
60    /// Number of trials for error estimation
61    pub n_trials: usize,
62    /// Random seed for reproducibility
63    pub random_seed: Option<u64>,
64}
65
66impl Default for ErrorBoundedConfig {
67    fn default() -> Self {
68        Self {
69            max_error: 0.1,
70            confidence_level: 0.95,
71            bound_method: ErrorBoundMethod::SpectralBound,
72            min_components: 10,
73            max_components: 1000,
74            step_size: 10,
75            n_trials: 5,
76            random_seed: None,
77        }
78    }
79}
80
81/// Error-bounded RBF sampler
82#[derive(Debug, Clone)]
83/// ErrorBoundedRBFSampler
84pub struct ErrorBoundedRBFSampler {
85    gamma: f64,
86    config: ErrorBoundedConfig,
87}
88
89impl ErrorBoundedRBFSampler {
90    /// Create a new error-bounded RBF sampler
91    pub fn new() -> Self {
92        Self {
93            gamma: 1.0,
94            config: ErrorBoundedConfig::default(),
95        }
96    }
97
98    /// Set gamma parameter
99    pub fn gamma(mut self, gamma: f64) -> Self {
100        self.gamma = gamma;
101        self
102    }
103
104    /// Set configuration
105    pub fn config(mut self, config: ErrorBoundedConfig) -> Self {
106        self.config = config;
107        self
108    }
109
110    /// Set maximum allowed error
111    pub fn max_error(mut self, max_error: f64) -> Self {
112        self.config.max_error = max_error;
113        self
114    }
115
116    /// Set confidence level
117    pub fn confidence_level(mut self, confidence_level: f64) -> Self {
118        self.config.confidence_level = confidence_level;
119        self
120    }
121
122    /// Find minimum number of components that satisfies error bound
123    pub fn find_min_components(&self, x: &Array2<f64>) -> Result<(usize, ErrorBound)> {
124        let n_samples = x.nrows();
125        let n_features = x.ncols();
126
127        // Create test-validation split
128        let split_idx = (n_samples as f64 * 0.8) as usize;
129        let x_train = x
130            .slice(scirs2_core::ndarray::s![..split_idx, ..])
131            .to_owned();
132        let x_test = x
133            .slice(scirs2_core::ndarray::s![split_idx.., ..])
134            .to_owned();
135
136        // Compute exact kernel matrix for small subset (for bound computation)
137        let k_exact = self.compute_exact_kernel_matrix(&x_train)?;
138
139        // Test different numbers of components
140        for n_components in
141            (self.config.min_components..=self.config.max_components).step_by(self.config.step_size)
142        {
143            let mut trial_errors = Vec::new();
144
145            // Run multiple trials
146            for trial in 0..self.config.n_trials {
147                let seed = self.config.random_seed.map(|s| s + trial as u64);
148                let sampler = if let Some(s) = seed {
149                    RBFSampler::new(n_components)
150                        .gamma(self.gamma)
151                        .random_state(s)
152                } else {
153                    RBFSampler::new(n_components).gamma(self.gamma)
154                };
155
156                let fitted = sampler.fit(&x_train, &())?;
157                let x_train_transformed = fitted.transform(&x_train)?;
158
159                // Compute approximation error
160                let error =
161                    self.compute_approximation_error(&k_exact, &x_train_transformed, &x_train)?;
162
163                trial_errors.push(error);
164            }
165
166            // Compute error bound
167            let error_bound = self.compute_error_bound(&trial_errors, n_components)?;
168
169            // Check if error bound is satisfied
170            if error_bound.upper_bound <= self.config.max_error {
171                return Ok((n_components, error_bound));
172            }
173        }
174
175        // If no configuration satisfies the bound, return the best one
176        let n_components = self.config.max_components;
177        let sampler = if let Some(seed) = self.config.random_seed {
178            RBFSampler::new(n_components)
179                .gamma(self.gamma)
180                .random_state(seed)
181        } else {
182            RBFSampler::new(n_components).gamma(self.gamma)
183        };
184        let fitted = sampler.fit(&x_train, &())?;
185        let x_train_transformed = fitted.transform(&x_train)?;
186
187        let error = self.compute_approximation_error(&k_exact, &x_train_transformed, &x_train)?;
188
189        let error_bound = self.compute_error_bound(&[error], n_components)?;
190
191        Ok((n_components, error_bound))
192    }
193
194    /// Compute exact kernel matrix for a subset of data
195    fn compute_exact_kernel_matrix(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
196        let n_samples = x.nrows().min(200); // Limit for computational efficiency
197        let x_subset = x.slice(scirs2_core::ndarray::s![..n_samples, ..]);
198
199        let mut k_exact = Array2::zeros((n_samples, n_samples));
200
201        for i in 0..n_samples {
202            for j in 0..n_samples {
203                let diff = &x_subset.row(i) - &x_subset.row(j);
204                let squared_norm = diff.dot(&diff);
205                k_exact[[i, j]] = (-self.gamma * squared_norm).exp();
206            }
207        }
208
209        Ok(k_exact)
210    }
211
212    /// Compute approximation error between exact and approximate kernel matrices
213    fn compute_approximation_error(
214        &self,
215        k_exact: &Array2<f64>,
216        x_transformed: &Array2<f64>,
217        x: &Array2<f64>,
218    ) -> Result<f64> {
219        let n_samples = k_exact.nrows().min(x_transformed.nrows());
220        let x_subset = x_transformed.slice(scirs2_core::ndarray::s![..n_samples, ..]);
221
222        // Compute approximate kernel matrix
223        let k_approx = x_subset.dot(&x_subset.t());
224
225        // Compute error based on Frobenius norm
226        let diff = k_exact - &k_approx.slice(scirs2_core::ndarray::s![..n_samples, ..n_samples]);
227        let error = diff.norm_l2();
228
229        Ok(error)
230    }
231
232    /// Compute error bound from trial errors
233    fn compute_error_bound(&self, trial_errors: &[f64], n_components: usize) -> Result<ErrorBound> {
234        match &self.config.bound_method {
235            ErrorBoundMethod::SpectralBound => {
236                self.compute_spectral_bound(trial_errors, n_components)
237            }
238            ErrorBoundMethod::FrobeniusBound => {
239                self.compute_frobenius_bound(trial_errors, n_components)
240            }
241            ErrorBoundMethod::EmpiricalBound => {
242                self.compute_empirical_bound(trial_errors, n_components)
243            }
244            ErrorBoundMethod::ProbabilisticBound { confidence } => {
245                self.compute_probabilistic_bound(trial_errors, *confidence, n_components)
246            }
247            ErrorBoundMethod::PerturbationBound => {
248                self.compute_perturbation_bound(trial_errors, n_components)
249            }
250            ErrorBoundMethod::CVBound { n_folds } => {
251                self.compute_cv_bound(trial_errors, *n_folds, n_components)
252            }
253        }
254    }
255
256    /// Compute spectral error bound
257    fn compute_spectral_bound(
258        &self,
259        trial_errors: &[f64],
260        n_components: usize,
261    ) -> Result<ErrorBound> {
262        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
263        let variance = trial_errors
264            .iter()
265            .map(|&e| (e - mean_error).powi(2))
266            .sum::<f64>()
267            / trial_errors.len() as f64;
268
269        // Theoretical spectral bound for RBF kernel approximation
270        let theoretical_bound = (2.0 * self.gamma / n_components as f64).sqrt();
271
272        let upper_bound = mean_error + 2.0 * variance.sqrt() + theoretical_bound;
273        let lower_bound = (mean_error - 2.0 * variance.sqrt()).max(0.0);
274
275        Ok(ErrorBound {
276            upper_bound,
277            lower_bound,
278            confidence: Some(0.95),
279            method: ErrorBoundMethod::SpectralBound,
280        })
281    }
282
283    /// Compute Frobenius norm error bound
284    fn compute_frobenius_bound(
285        &self,
286        trial_errors: &[f64],
287        n_components: usize,
288    ) -> Result<ErrorBound> {
289        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
290        let max_error = trial_errors.iter().fold(0.0f64, |acc, &e| acc.max(e));
291
292        // Frobenius bound typically grows with sqrt(rank)
293        let theoretical_bound = (mean_error * n_components as f64).sqrt();
294
295        let upper_bound = max_error + theoretical_bound;
296        let lower_bound = mean_error * 0.5;
297
298        Ok(ErrorBound {
299            upper_bound,
300            lower_bound,
301            confidence: None,
302            method: ErrorBoundMethod::FrobeniusBound,
303        })
304    }
305
306    /// Compute empirical error bound
307    fn compute_empirical_bound(
308        &self,
309        trial_errors: &[f64],
310        _n_components: usize,
311    ) -> Result<ErrorBound> {
312        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
313        let std_error = {
314            let variance = trial_errors
315                .iter()
316                .map(|&e| (e - mean_error).powi(2))
317                .sum::<f64>()
318                / trial_errors.len() as f64;
319            variance.sqrt()
320        };
321
322        let upper_bound = mean_error + 2.0 * std_error;
323        let lower_bound = (mean_error - 2.0 * std_error).max(0.0);
324
325        Ok(ErrorBound {
326            upper_bound,
327            lower_bound,
328            confidence: Some(0.95),
329            method: ErrorBoundMethod::EmpiricalBound,
330        })
331    }
332
333    /// Compute probabilistic error bound
334    fn compute_probabilistic_bound(
335        &self,
336        trial_errors: &[f64],
337        confidence: f64,
338        _n_components: usize,
339    ) -> Result<ErrorBound> {
340        let mut sorted_errors = trial_errors.to_vec();
341        sorted_errors.sort_by(|a, b| a.partial_cmp(b).unwrap());
342
343        let n = sorted_errors.len();
344        let alpha = 1.0 - confidence;
345
346        let lower_idx = ((alpha / 2.0) * n as f64) as usize;
347        let upper_idx = ((1.0 - alpha / 2.0) * n as f64) as usize;
348
349        let lower_bound = sorted_errors[lower_idx.min(n - 1)];
350        let upper_bound = sorted_errors[upper_idx.min(n - 1)];
351
352        Ok(ErrorBound {
353            upper_bound,
354            lower_bound,
355            confidence: Some(confidence),
356            method: ErrorBoundMethod::ProbabilisticBound { confidence },
357        })
358    }
359
360    /// Compute perturbation-based error bound
361    fn compute_perturbation_bound(
362        &self,
363        trial_errors: &[f64],
364        n_components: usize,
365    ) -> Result<ErrorBound> {
366        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
367        let std_error = {
368            let variance = trial_errors
369                .iter()
370                .map(|&e| (e - mean_error).powi(2))
371                .sum::<f64>()
372                / trial_errors.len() as f64;
373            variance.sqrt()
374        };
375
376        // Perturbation bound considers the stability of the approximation
377        let perturbation_factor = 1.0 + (std_error / mean_error.max(1e-10));
378        let theoretical_bound = mean_error * perturbation_factor;
379
380        let upper_bound = theoretical_bound + std_error;
381        let lower_bound = (mean_error / perturbation_factor - std_error).max(0.0);
382
383        Ok(ErrorBound {
384            upper_bound,
385            lower_bound,
386            confidence: Some(0.9),
387            method: ErrorBoundMethod::PerturbationBound,
388        })
389    }
390
391    /// Compute cross-validation based error bound
392    fn compute_cv_bound(
393        &self,
394        trial_errors: &[f64],
395        n_folds: usize,
396        _n_components: usize,
397    ) -> Result<ErrorBound> {
398        // Use cross-validation to estimate error bound
399        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
400        let std_error = {
401            let variance = trial_errors
402                .iter()
403                .map(|&e| (e - mean_error).powi(2))
404                .sum::<f64>()
405                / trial_errors.len() as f64;
406            variance.sqrt()
407        };
408
409        // CV bound includes adjustment for number of folds
410        let cv_adjustment = (n_folds as f64).sqrt() / (n_folds - 1) as f64;
411        let cv_std = std_error * cv_adjustment;
412
413        let upper_bound = mean_error + 2.0 * cv_std;
414        let lower_bound = (mean_error - 2.0 * cv_std).max(0.0);
415
416        Ok(ErrorBound {
417            upper_bound,
418            lower_bound,
419            confidence: Some(0.95),
420            method: ErrorBoundMethod::CVBound { n_folds },
421        })
422    }
423}
424
425/// Fitted error-bounded RBF sampler
426pub struct FittedErrorBoundedRBFSampler {
427    fitted_rbf: crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
428    n_components: usize,
429    error_bound: ErrorBound,
430}
431
432impl Fit<Array2<f64>, ()> for ErrorBoundedRBFSampler {
433    type Fitted = FittedErrorBoundedRBFSampler;
434
435    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
436        // Find minimum components that satisfy error bound
437        let (n_components, error_bound) = self.find_min_components(x)?;
438
439        // Fit RBF sampler with selected components
440        let rbf_sampler = if let Some(seed) = self.config.random_seed {
441            RBFSampler::new(n_components)
442                .gamma(self.gamma)
443                .random_state(seed)
444        } else {
445            RBFSampler::new(n_components).gamma(self.gamma)
446        };
447        let fitted_rbf = rbf_sampler.fit(x, &())?;
448
449        Ok(FittedErrorBoundedRBFSampler {
450            fitted_rbf,
451            n_components,
452            error_bound,
453        })
454    }
455}
456
457impl Transform<Array2<f64>, Array2<f64>> for FittedErrorBoundedRBFSampler {
458    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
459        self.fitted_rbf.transform(x)
460    }
461}
462
463impl FittedErrorBoundedRBFSampler {
464    /// Get the error bound
465    pub fn error_bound(&self) -> &ErrorBound {
466        &self.error_bound
467    }
468
469    /// Get the number of components
470    pub fn n_components(&self) -> usize {
471        self.n_components
472    }
473
474    /// Check if error bound is satisfied
475    pub fn is_bound_satisfied(&self, tolerance: f64) -> bool {
476        self.error_bound.upper_bound <= tolerance
477    }
478}
479
480/// Error-bounded Nyström method
481#[derive(Debug, Clone)]
482/// ErrorBoundedNystroem
483pub struct ErrorBoundedNystroem {
484    kernel: crate::nystroem::Kernel,
485    config: ErrorBoundedConfig,
486}
487
488impl ErrorBoundedNystroem {
489    /// Create a new error-bounded Nyström method
490    pub fn new() -> Self {
491        Self {
492            kernel: crate::nystroem::Kernel::Rbf { gamma: 1.0 },
493            config: ErrorBoundedConfig::default(),
494        }
495    }
496
497    /// Set gamma parameter (for RBF kernel)
498    pub fn gamma(mut self, gamma: f64) -> Self {
499        self.kernel = crate::nystroem::Kernel::Rbf { gamma };
500        self
501    }
502
503    /// Set kernel type
504    pub fn kernel(mut self, kernel: crate::nystroem::Kernel) -> Self {
505        self.kernel = kernel;
506        self
507    }
508
509    /// Set configuration
510    pub fn config(mut self, config: ErrorBoundedConfig) -> Self {
511        self.config = config;
512        self
513    }
514
515    /// Find minimum number of components that satisfies error bound
516    pub fn find_min_components(&self, x: &Array2<f64>) -> Result<(usize, ErrorBound)> {
517        let n_samples = x.nrows();
518
519        // Test different numbers of components
520        for n_components in
521            (self.config.min_components..=self.config.max_components).step_by(self.config.step_size)
522        {
523            let mut trial_errors = Vec::new();
524
525            // Run multiple trials
526            for trial in 0..self.config.n_trials {
527                let seed = self.config.random_seed.map(|s| s + trial as u64);
528                let nystroem = if let Some(s) = seed {
529                    Nystroem::new(self.kernel.clone(), n_components).random_state(s)
530                } else {
531                    Nystroem::new(self.kernel.clone(), n_components)
532                };
533
534                let fitted = nystroem.fit(x, &())?;
535                let x_transformed = fitted.transform(x)?;
536
537                // Compute approximation error using spectral norm
538                let error = self.compute_nystroem_error(&x_transformed, x)?;
539                trial_errors.push(error);
540            }
541
542            // Compute error bound
543            let error_bound = self.compute_error_bound(&trial_errors, n_components)?;
544
545            // Check if error bound is satisfied
546            if error_bound.upper_bound <= self.config.max_error {
547                return Ok((n_components, error_bound));
548            }
549        }
550
551        // If no configuration satisfies the bound, return the best one
552        let n_components = self.config.max_components;
553        let nystroem = Nystroem::new(self.kernel.clone(), n_components);
554        let fitted = nystroem.fit(x, &())?;
555        let x_transformed = fitted.transform(x)?;
556
557        let error = self.compute_nystroem_error(&x_transformed, x)?;
558        let error_bound = self.compute_error_bound(&[error], n_components)?;
559
560        Ok((n_components, error_bound))
561    }
562
563    /// Compute Nyström approximation error
564    fn compute_nystroem_error(&self, x_transformed: &Array2<f64>, x: &Array2<f64>) -> Result<f64> {
565        let n_samples = x.nrows().min(100); // Limit for computational efficiency
566        let x_subset = x.slice(scirs2_core::ndarray::s![..n_samples, ..]);
567
568        // Compute exact kernel matrix for subset
569        let k_exact = self
570            .kernel
571            .compute_kernel(&x_subset.to_owned(), &x_subset.to_owned());
572
573        // Compute approximate kernel matrix
574        let x_transformed_subset = x_transformed.slice(scirs2_core::ndarray::s![..n_samples, ..]);
575        let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
576
577        // Compute Frobenius norm error
578        let diff = k_exact - &k_approx;
579        let error = diff.norm_l2();
580
581        Ok(error)
582    }
583
584    /// Compute error bound (using same method as RBF sampler)
585    fn compute_error_bound(&self, trial_errors: &[f64], n_components: usize) -> Result<ErrorBound> {
586        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
587        let std_error = {
588            let variance = trial_errors
589                .iter()
590                .map(|&e| (e - mean_error).powi(2))
591                .sum::<f64>()
592                / trial_errors.len() as f64;
593            variance.sqrt()
594        };
595
596        let upper_bound = mean_error + 2.0 * std_error;
597        let lower_bound = (mean_error - 2.0 * std_error).max(0.0);
598
599        Ok(ErrorBound {
600            upper_bound,
601            lower_bound,
602            confidence: Some(0.95),
603            method: self.config.bound_method.clone(),
604        })
605    }
606}
607
608/// Fitted error-bounded Nyström method
609pub struct FittedErrorBoundedNystroem {
610    fitted_nystroem: crate::nystroem::Nystroem<sklears_core::traits::Trained>,
611    n_components: usize,
612    error_bound: ErrorBound,
613}
614
615impl Fit<Array2<f64>, ()> for ErrorBoundedNystroem {
616    type Fitted = FittedErrorBoundedNystroem;
617
618    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
619        // Find minimum components that satisfy error bound
620        let (n_components, error_bound) = self.find_min_components(x)?;
621
622        // Fit Nyström method with selected components
623        let nystroem = Nystroem::new(self.kernel, n_components);
624        let fitted_nystroem = nystroem.fit(x, &())?;
625
626        Ok(FittedErrorBoundedNystroem {
627            fitted_nystroem,
628            n_components,
629            error_bound,
630        })
631    }
632}
633
634impl Transform<Array2<f64>, Array2<f64>> for FittedErrorBoundedNystroem {
635    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
636        self.fitted_nystroem.transform(x)
637    }
638}
639
640impl FittedErrorBoundedNystroem {
641    /// Get the error bound
642    pub fn error_bound(&self) -> &ErrorBound {
643        &self.error_bound
644    }
645
646    /// Get the number of components
647    pub fn n_components(&self) -> usize {
648        self.n_components
649    }
650
651    /// Check if error bound is satisfied
652    pub fn is_bound_satisfied(&self, tolerance: f64) -> bool {
653        self.error_bound.upper_bound <= tolerance
654    }
655}
656
657#[allow(non_snake_case)]
658#[cfg(test)]
659mod tests {
660    use super::*;
661
662    #[test]
663    fn test_error_bounded_rbf_sampler() {
664        let x = Array2::from_shape_vec((100, 4), (0..400).map(|i| (i as f64) * 0.01).collect())
665            .unwrap();
666
667        let config = ErrorBoundedConfig {
668            max_error: 0.5,
669            min_components: 10,
670            max_components: 50,
671            step_size: 10,
672            n_trials: 2,
673            ..Default::default()
674        };
675
676        let sampler = ErrorBoundedRBFSampler::new().gamma(0.5).config(config);
677
678        let fitted = sampler.fit(&x, &()).unwrap();
679        let transformed = fitted.transform(&x).unwrap();
680
681        assert_eq!(transformed.nrows(), 100);
682        assert!(fitted.n_components() >= 10);
683        assert!(fitted.n_components() <= 50);
684        assert!(fitted.error_bound().upper_bound >= 0.0);
685        assert!(fitted.error_bound().lower_bound >= 0.0);
686    }
687
688    #[test]
689    fn test_error_bounded_nystroem() {
690        let x =
691            Array2::from_shape_vec((80, 3), (0..240).map(|i| (i as f64) * 0.02).collect()).unwrap();
692
693        let config = ErrorBoundedConfig {
694            max_error: 0.3,
695            min_components: 5,
696            max_components: 25,
697            step_size: 5,
698            n_trials: 2,
699            ..Default::default()
700        };
701
702        let nystroem = ErrorBoundedNystroem::new().gamma(1.0).config(config);
703
704        let fitted = nystroem.fit(&x, &()).unwrap();
705        let transformed = fitted.transform(&x).unwrap();
706
707        assert_eq!(transformed.nrows(), 80);
708        assert!(fitted.n_components() >= 5);
709        assert!(fitted.n_components() <= 25);
710        assert!(fitted.error_bound().upper_bound >= 0.0);
711    }
712
713    #[test]
714    fn test_error_bound_methods() {
715        let x =
716            Array2::from_shape_vec((50, 2), (0..100).map(|i| (i as f64) * 0.05).collect()).unwrap();
717
718        let methods = vec![
719            ErrorBoundMethod::SpectralBound,
720            ErrorBoundMethod::FrobeniusBound,
721            ErrorBoundMethod::EmpiricalBound,
722            ErrorBoundMethod::ProbabilisticBound { confidence: 0.95 },
723            ErrorBoundMethod::PerturbationBound,
724            ErrorBoundMethod::CVBound { n_folds: 3 },
725        ];
726
727        for method in methods {
728            let config = ErrorBoundedConfig {
729                max_error: 0.4,
730                bound_method: method,
731                min_components: 5,
732                max_components: 20,
733                step_size: 5,
734                n_trials: 2,
735                ..Default::default()
736            };
737
738            let sampler = ErrorBoundedRBFSampler::new().gamma(0.8).config(config);
739
740            let fitted = sampler.fit(&x, &()).unwrap();
741            let error_bound = fitted.error_bound();
742
743            assert!(error_bound.upper_bound >= error_bound.lower_bound);
744            assert!(error_bound.upper_bound >= 0.0);
745            assert!(error_bound.lower_bound >= 0.0);
746        }
747    }
748
749    #[test]
750    fn test_bound_satisfaction() {
751        let x =
752            Array2::from_shape_vec((40, 3), (0..120).map(|i| (i as f64) * 0.03).collect()).unwrap();
753
754        let config = ErrorBoundedConfig {
755            max_error: 0.2,
756            min_components: 5,
757            max_components: 30,
758            step_size: 5,
759            n_trials: 2,
760            ..Default::default()
761        };
762
763        let sampler = ErrorBoundedRBFSampler::new().gamma(0.3).config(config);
764
765        let fitted = sampler.fit(&x, &()).unwrap();
766
767        // The fitted sampler should have computed an error bound
768        let actual_bound = fitted.error_bound().upper_bound;
769
770        // Error bound should be reasonable (not infinite or NaN)
771        assert!(actual_bound.is_finite());
772        assert!(actual_bound > 0.0);
773
774        // The fitted sampler should satisfy a bound larger than its computed bound
775        assert!(fitted.is_bound_satisfied(actual_bound + 0.1));
776
777        // But not a much stricter bound
778        assert!(!fitted.is_bound_satisfied(0.01));
779    }
780
781    #[test]
782    fn test_min_components_search() {
783        let x =
784            Array2::from_shape_vec((60, 4), (0..240).map(|i| (i as f64) * 0.01).collect()).unwrap();
785
786        let sampler = ErrorBoundedRBFSampler::new().gamma(0.5).max_error(0.3);
787
788        let (n_components, error_bound) = sampler.find_min_components(&x).unwrap();
789
790        assert!(n_components >= sampler.config.min_components);
791        assert!(n_components <= sampler.config.max_components);
792        assert!(error_bound.upper_bound >= 0.0);
793        assert!(error_bound.lower_bound >= 0.0);
794        assert!(error_bound.upper_bound >= error_bound.lower_bound);
795    }
796
797    #[test]
798    fn test_reproducibility() {
799        let x =
800            Array2::from_shape_vec((50, 3), (0..150).map(|i| (i as f64) * 0.02).collect()).unwrap();
801
802        let config = ErrorBoundedConfig {
803            max_error: 0.25,
804            min_components: 10,
805            max_components: 30,
806            step_size: 10,
807            n_trials: 3,
808            random_seed: Some(42),
809            ..Default::default()
810        };
811
812        let sampler1 = ErrorBoundedRBFSampler::new()
813            .gamma(0.7)
814            .config(config.clone());
815
816        let sampler2 = ErrorBoundedRBFSampler::new().gamma(0.7).config(config);
817
818        let fitted1 = sampler1.fit(&x, &()).unwrap();
819        let fitted2 = sampler2.fit(&x, &()).unwrap();
820
821        // The key reproducibility requirement is that we get the same number of components
822        assert_eq!(fitted1.n_components(), fitted2.n_components());
823
824        // Error bounds may vary slightly due to numerical precision in the search process
825        // but should be in a similar range (within 50% of each other)
826        let bound1 = fitted1.error_bound().upper_bound;
827        let bound2 = fitted2.error_bound().upper_bound;
828        let ratio = bound1.max(bound2) / bound1.min(bound2).max(1e-10);
829        assert!(
830            ratio < 2.0,
831            "Error bounds too different: {} vs {}, ratio: {}",
832            bound1,
833            bound2,
834            ratio
835        );
836    }
837}
sklears_kernel_approximation/error_bounded.rs

sklears_kernel_approximation/
error_bounded.rs