sklears_kernel_approximation/
error_bounded.rs

1//! Error-bounded kernel approximation methods
2//!
3//! This module provides kernel approximation methods with explicit error bounds
4//! and adaptive algorithms that guarantee approximation quality.
5
6use crate::{Nystroem, RBFSampler};
7use scirs2_core::ndarray::Array2;
8use scirs2_linalg::compat::Norm;
9use sklears_core::traits::Fit;
10use sklears_core::{error::Result, traits::Transform};
11
12/// Error bound computation methods
13#[derive(Debug, Clone)]
14/// ErrorBoundMethod
15pub enum ErrorBoundMethod {
16    /// Theoretical spectral bound
17    SpectralBound,
18    /// Frobenius norm bound
19    FrobeniusBound,
20    /// Empirical error estimation
21    EmpiricalBound,
22    /// Probabilistic error bound
23    ProbabilisticBound { confidence: f64 },
24    /// Perturbation-based bound
25    PerturbationBound,
26    /// Cross-validation based bound
27    CVBound { n_folds: usize },
28}
29
30/// Error bound result
31#[derive(Debug, Clone)]
32/// ErrorBound
33pub struct ErrorBound {
34    /// Upper bound on approximation error
35    pub upper_bound: f64,
36    /// Lower bound on approximation error
37    pub lower_bound: f64,
38    /// Confidence level (for probabilistic bounds)
39    pub confidence: Option<f64>,
40    /// Method used for bound computation
41    pub method: ErrorBoundMethod,
42}
43
44/// Configuration for error-bounded approximation
45#[derive(Debug, Clone)]
46/// ErrorBoundedConfig
47pub struct ErrorBoundedConfig {
48    /// Maximum allowed approximation error
49    pub max_error: f64,
50    /// Confidence level for probabilistic bounds
51    pub confidence_level: f64,
52    /// Error bound computation method
53    pub bound_method: ErrorBoundMethod,
54    /// Minimum number of components
55    pub min_components: usize,
56    /// Maximum number of components
57    pub max_components: usize,
58    /// Step size for component search
59    pub step_size: usize,
60    /// Number of trials for error estimation
61    pub n_trials: usize,
62    /// Random seed for reproducibility
63    pub random_seed: Option<u64>,
64}
65
66impl Default for ErrorBoundedConfig {
67    fn default() -> Self {
68        Self {
69            max_error: 0.1,
70            confidence_level: 0.95,
71            bound_method: ErrorBoundMethod::SpectralBound,
72            min_components: 10,
73            max_components: 1000,
74            step_size: 10,
75            n_trials: 5,
76            random_seed: None,
77        }
78    }
79}
80
81/// Error-bounded RBF sampler
82#[derive(Debug, Clone)]
83/// ErrorBoundedRBFSampler
84pub struct ErrorBoundedRBFSampler {
85    gamma: f64,
86    config: ErrorBoundedConfig,
87}
88
89impl Default for ErrorBoundedRBFSampler {
90    fn default() -> Self {
91        Self::new()
92    }
93}
94
95impl ErrorBoundedRBFSampler {
96    /// Create a new error-bounded RBF sampler
97    pub fn new() -> Self {
98        Self {
99            gamma: 1.0,
100            config: ErrorBoundedConfig::default(),
101        }
102    }
103
104    /// Set gamma parameter
105    pub fn gamma(mut self, gamma: f64) -> Self {
106        self.gamma = gamma;
107        self
108    }
109
110    /// Set configuration
111    pub fn config(mut self, config: ErrorBoundedConfig) -> Self {
112        self.config = config;
113        self
114    }
115
116    /// Set maximum allowed error
117    pub fn max_error(mut self, max_error: f64) -> Self {
118        self.config.max_error = max_error;
119        self
120    }
121
122    /// Set confidence level
123    pub fn confidence_level(mut self, confidence_level: f64) -> Self {
124        self.config.confidence_level = confidence_level;
125        self
126    }
127
128    /// Find minimum number of components that satisfies error bound
129    pub fn find_min_components(&self, x: &Array2<f64>) -> Result<(usize, ErrorBound)> {
130        let n_samples = x.nrows();
131        let _n_features = x.ncols();
132
133        // Create test-validation split
134        let split_idx = (n_samples as f64 * 0.8) as usize;
135        let x_train = x
136            .slice(scirs2_core::ndarray::s![..split_idx, ..])
137            .to_owned();
138        let _x_test = x
139            .slice(scirs2_core::ndarray::s![split_idx.., ..])
140            .to_owned();
141
142        // Compute exact kernel matrix for small subset (for bound computation)
143        let k_exact = self.compute_exact_kernel_matrix(&x_train)?;
144
145        // Test different numbers of components
146        for n_components in
147            (self.config.min_components..=self.config.max_components).step_by(self.config.step_size)
148        {
149            let mut trial_errors = Vec::new();
150
151            // Run multiple trials
152            for trial in 0..self.config.n_trials {
153                let seed = self.config.random_seed.map(|s| s + trial as u64);
154                let sampler = if let Some(s) = seed {
155                    RBFSampler::new(n_components)
156                        .gamma(self.gamma)
157                        .random_state(s)
158                } else {
159                    RBFSampler::new(n_components).gamma(self.gamma)
160                };
161
162                let fitted = sampler.fit(&x_train, &())?;
163                let x_train_transformed = fitted.transform(&x_train)?;
164
165                // Compute approximation error
166                let error =
167                    self.compute_approximation_error(&k_exact, &x_train_transformed, &x_train)?;
168
169                trial_errors.push(error);
170            }
171
172            // Compute error bound
173            let error_bound = self.compute_error_bound(&trial_errors, n_components)?;
174
175            // Check if error bound is satisfied
176            if error_bound.upper_bound <= self.config.max_error {
177                return Ok((n_components, error_bound));
178            }
179        }
180
181        // If no configuration satisfies the bound, return the best one
182        let n_components = self.config.max_components;
183        let sampler = if let Some(seed) = self.config.random_seed {
184            RBFSampler::new(n_components)
185                .gamma(self.gamma)
186                .random_state(seed)
187        } else {
188            RBFSampler::new(n_components).gamma(self.gamma)
189        };
190        let fitted = sampler.fit(&x_train, &())?;
191        let x_train_transformed = fitted.transform(&x_train)?;
192
193        let error = self.compute_approximation_error(&k_exact, &x_train_transformed, &x_train)?;
194
195        let error_bound = self.compute_error_bound(&[error], n_components)?;
196
197        Ok((n_components, error_bound))
198    }
199
200    /// Compute exact kernel matrix for a subset of data
201    fn compute_exact_kernel_matrix(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
202        let n_samples = x.nrows().min(200); // Limit for computational efficiency
203        let x_subset = x.slice(scirs2_core::ndarray::s![..n_samples, ..]);
204
205        let mut k_exact = Array2::zeros((n_samples, n_samples));
206
207        for i in 0..n_samples {
208            for j in 0..n_samples {
209                let diff = &x_subset.row(i) - &x_subset.row(j);
210                let squared_norm = diff.dot(&diff);
211                k_exact[[i, j]] = (-self.gamma * squared_norm).exp();
212            }
213        }
214
215        Ok(k_exact)
216    }
217
218    /// Compute approximation error between exact and approximate kernel matrices
219    fn compute_approximation_error(
220        &self,
221        k_exact: &Array2<f64>,
222        x_transformed: &Array2<f64>,
223        _x: &Array2<f64>,
224    ) -> Result<f64> {
225        let n_samples = k_exact.nrows().min(x_transformed.nrows());
226        let x_subset = x_transformed.slice(scirs2_core::ndarray::s![..n_samples, ..]);
227
228        // Compute approximate kernel matrix
229        let k_approx = x_subset.dot(&x_subset.t());
230
231        // Compute error based on Frobenius norm
232        let diff = k_exact - &k_approx.slice(scirs2_core::ndarray::s![..n_samples, ..n_samples]);
233        let error = diff.norm_l2();
234
235        Ok(error)
236    }
237
238    /// Compute error bound from trial errors
239    fn compute_error_bound(&self, trial_errors: &[f64], n_components: usize) -> Result<ErrorBound> {
240        match &self.config.bound_method {
241            ErrorBoundMethod::SpectralBound => {
242                self.compute_spectral_bound(trial_errors, n_components)
243            }
244            ErrorBoundMethod::FrobeniusBound => {
245                self.compute_frobenius_bound(trial_errors, n_components)
246            }
247            ErrorBoundMethod::EmpiricalBound => {
248                self.compute_empirical_bound(trial_errors, n_components)
249            }
250            ErrorBoundMethod::ProbabilisticBound { confidence } => {
251                self.compute_probabilistic_bound(trial_errors, *confidence, n_components)
252            }
253            ErrorBoundMethod::PerturbationBound => {
254                self.compute_perturbation_bound(trial_errors, n_components)
255            }
256            ErrorBoundMethod::CVBound { n_folds } => {
257                self.compute_cv_bound(trial_errors, *n_folds, n_components)
258            }
259        }
260    }
261
262    /// Compute spectral error bound
263    fn compute_spectral_bound(
264        &self,
265        trial_errors: &[f64],
266        n_components: usize,
267    ) -> Result<ErrorBound> {
268        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
269        let variance = trial_errors
270            .iter()
271            .map(|&e| (e - mean_error).powi(2))
272            .sum::<f64>()
273            / trial_errors.len() as f64;
274
275        // Theoretical spectral bound for RBF kernel approximation
276        let theoretical_bound = (2.0 * self.gamma / n_components as f64).sqrt();
277
278        let upper_bound = mean_error + 2.0 * variance.sqrt() + theoretical_bound;
279        let lower_bound = (mean_error - 2.0 * variance.sqrt()).max(0.0);
280
281        Ok(ErrorBound {
282            upper_bound,
283            lower_bound,
284            confidence: Some(0.95),
285            method: ErrorBoundMethod::SpectralBound,
286        })
287    }
288
289    /// Compute Frobenius norm error bound
290    fn compute_frobenius_bound(
291        &self,
292        trial_errors: &[f64],
293        n_components: usize,
294    ) -> Result<ErrorBound> {
295        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
296        let max_error = trial_errors.iter().fold(0.0f64, |acc, &e| acc.max(e));
297
298        // Frobenius bound typically grows with sqrt(rank)
299        let theoretical_bound = (mean_error * n_components as f64).sqrt();
300
301        let upper_bound = max_error + theoretical_bound;
302        let lower_bound = mean_error * 0.5;
303
304        Ok(ErrorBound {
305            upper_bound,
306            lower_bound,
307            confidence: None,
308            method: ErrorBoundMethod::FrobeniusBound,
309        })
310    }
311
312    /// Compute empirical error bound
313    fn compute_empirical_bound(
314        &self,
315        trial_errors: &[f64],
316        _n_components: usize,
317    ) -> Result<ErrorBound> {
318        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
319        let std_error = {
320            let variance = trial_errors
321                .iter()
322                .map(|&e| (e - mean_error).powi(2))
323                .sum::<f64>()
324                / trial_errors.len() as f64;
325            variance.sqrt()
326        };
327
328        let upper_bound = mean_error + 2.0 * std_error;
329        let lower_bound = (mean_error - 2.0 * std_error).max(0.0);
330
331        Ok(ErrorBound {
332            upper_bound,
333            lower_bound,
334            confidence: Some(0.95),
335            method: ErrorBoundMethod::EmpiricalBound,
336        })
337    }
338
339    /// Compute probabilistic error bound
340    fn compute_probabilistic_bound(
341        &self,
342        trial_errors: &[f64],
343        confidence: f64,
344        _n_components: usize,
345    ) -> Result<ErrorBound> {
346        let mut sorted_errors = trial_errors.to_vec();
347        sorted_errors.sort_by(|a, b| a.partial_cmp(b).unwrap());
348
349        let n = sorted_errors.len();
350        let alpha = 1.0 - confidence;
351
352        let lower_idx = ((alpha / 2.0) * n as f64) as usize;
353        let upper_idx = ((1.0 - alpha / 2.0) * n as f64) as usize;
354
355        let lower_bound = sorted_errors[lower_idx.min(n - 1)];
356        let upper_bound = sorted_errors[upper_idx.min(n - 1)];
357
358        Ok(ErrorBound {
359            upper_bound,
360            lower_bound,
361            confidence: Some(confidence),
362            method: ErrorBoundMethod::ProbabilisticBound { confidence },
363        })
364    }
365
366    /// Compute perturbation-based error bound
367    fn compute_perturbation_bound(
368        &self,
369        trial_errors: &[f64],
370        _n_components: usize,
371    ) -> Result<ErrorBound> {
372        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
373        let std_error = {
374            let variance = trial_errors
375                .iter()
376                .map(|&e| (e - mean_error).powi(2))
377                .sum::<f64>()
378                / trial_errors.len() as f64;
379            variance.sqrt()
380        };
381
382        // Perturbation bound considers the stability of the approximation
383        let perturbation_factor = 1.0 + (std_error / mean_error.max(1e-10));
384        let theoretical_bound = mean_error * perturbation_factor;
385
386        let upper_bound = theoretical_bound + std_error;
387        let lower_bound = (mean_error / perturbation_factor - std_error).max(0.0);
388
389        Ok(ErrorBound {
390            upper_bound,
391            lower_bound,
392            confidence: Some(0.9),
393            method: ErrorBoundMethod::PerturbationBound,
394        })
395    }
396
397    /// Compute cross-validation based error bound
398    fn compute_cv_bound(
399        &self,
400        trial_errors: &[f64],
401        n_folds: usize,
402        _n_components: usize,
403    ) -> Result<ErrorBound> {
404        // Use cross-validation to estimate error bound
405        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
406        let std_error = {
407            let variance = trial_errors
408                .iter()
409                .map(|&e| (e - mean_error).powi(2))
410                .sum::<f64>()
411                / trial_errors.len() as f64;
412            variance.sqrt()
413        };
414
415        // CV bound includes adjustment for number of folds
416        let cv_adjustment = (n_folds as f64).sqrt() / (n_folds - 1) as f64;
417        let cv_std = std_error * cv_adjustment;
418
419        let upper_bound = mean_error + 2.0 * cv_std;
420        let lower_bound = (mean_error - 2.0 * cv_std).max(0.0);
421
422        Ok(ErrorBound {
423            upper_bound,
424            lower_bound,
425            confidence: Some(0.95),
426            method: ErrorBoundMethod::CVBound { n_folds },
427        })
428    }
429}
430
431/// Fitted error-bounded RBF sampler
432pub struct FittedErrorBoundedRBFSampler {
433    fitted_rbf: crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
434    n_components: usize,
435    error_bound: ErrorBound,
436}
437
438impl Fit<Array2<f64>, ()> for ErrorBoundedRBFSampler {
439    type Fitted = FittedErrorBoundedRBFSampler;
440
441    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
442        // Find minimum components that satisfy error bound
443        let (n_components, error_bound) = self.find_min_components(x)?;
444
445        // Fit RBF sampler with selected components
446        let rbf_sampler = if let Some(seed) = self.config.random_seed {
447            RBFSampler::new(n_components)
448                .gamma(self.gamma)
449                .random_state(seed)
450        } else {
451            RBFSampler::new(n_components).gamma(self.gamma)
452        };
453        let fitted_rbf = rbf_sampler.fit(x, &())?;
454
455        Ok(FittedErrorBoundedRBFSampler {
456            fitted_rbf,
457            n_components,
458            error_bound,
459        })
460    }
461}
462
463impl Transform<Array2<f64>, Array2<f64>> for FittedErrorBoundedRBFSampler {
464    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
465        self.fitted_rbf.transform(x)
466    }
467}
468
469impl FittedErrorBoundedRBFSampler {
470    /// Get the error bound
471    pub fn error_bound(&self) -> &ErrorBound {
472        &self.error_bound
473    }
474
475    /// Get the number of components
476    pub fn n_components(&self) -> usize {
477        self.n_components
478    }
479
480    /// Check if error bound is satisfied
481    pub fn is_bound_satisfied(&self, tolerance: f64) -> bool {
482        self.error_bound.upper_bound <= tolerance
483    }
484}
485
486/// Error-bounded Nyström method
487#[derive(Debug, Clone)]
488/// ErrorBoundedNystroem
489pub struct ErrorBoundedNystroem {
490    kernel: crate::nystroem::Kernel,
491    config: ErrorBoundedConfig,
492}
493
494impl Default for ErrorBoundedNystroem {
495    fn default() -> Self {
496        Self::new()
497    }
498}
499
500impl ErrorBoundedNystroem {
501    /// Create a new error-bounded Nyström method
502    pub fn new() -> Self {
503        Self {
504            kernel: crate::nystroem::Kernel::Rbf { gamma: 1.0 },
505            config: ErrorBoundedConfig::default(),
506        }
507    }
508
509    /// Set gamma parameter (for RBF kernel)
510    pub fn gamma(mut self, gamma: f64) -> Self {
511        self.kernel = crate::nystroem::Kernel::Rbf { gamma };
512        self
513    }
514
515    /// Set kernel type
516    pub fn kernel(mut self, kernel: crate::nystroem::Kernel) -> Self {
517        self.kernel = kernel;
518        self
519    }
520
521    /// Set configuration
522    pub fn config(mut self, config: ErrorBoundedConfig) -> Self {
523        self.config = config;
524        self
525    }
526
527    /// Find minimum number of components that satisfies error bound
528    pub fn find_min_components(&self, x: &Array2<f64>) -> Result<(usize, ErrorBound)> {
529        let _n_samples = x.nrows();
530
531        // Test different numbers of components
532        for n_components in
533            (self.config.min_components..=self.config.max_components).step_by(self.config.step_size)
534        {
535            let mut trial_errors = Vec::new();
536
537            // Run multiple trials
538            for trial in 0..self.config.n_trials {
539                let seed = self.config.random_seed.map(|s| s + trial as u64);
540                let nystroem = if let Some(s) = seed {
541                    Nystroem::new(self.kernel.clone(), n_components).random_state(s)
542                } else {
543                    Nystroem::new(self.kernel.clone(), n_components)
544                };
545
546                let fitted = nystroem.fit(x, &())?;
547                let x_transformed = fitted.transform(x)?;
548
549                // Compute approximation error using spectral norm
550                let error = self.compute_nystroem_error(&x_transformed, x)?;
551                trial_errors.push(error);
552            }
553
554            // Compute error bound
555            let error_bound = self.compute_error_bound(&trial_errors, n_components)?;
556
557            // Check if error bound is satisfied
558            if error_bound.upper_bound <= self.config.max_error {
559                return Ok((n_components, error_bound));
560            }
561        }
562
563        // If no configuration satisfies the bound, return the best one
564        let n_components = self.config.max_components;
565        let nystroem = Nystroem::new(self.kernel.clone(), n_components);
566        let fitted = nystroem.fit(x, &())?;
567        let x_transformed = fitted.transform(x)?;
568
569        let error = self.compute_nystroem_error(&x_transformed, x)?;
570        let error_bound = self.compute_error_bound(&[error], n_components)?;
571
572        Ok((n_components, error_bound))
573    }
574
575    /// Compute Nyström approximation error
576    fn compute_nystroem_error(&self, x_transformed: &Array2<f64>, x: &Array2<f64>) -> Result<f64> {
577        let n_samples = x.nrows().min(100); // Limit for computational efficiency
578        let x_subset = x.slice(scirs2_core::ndarray::s![..n_samples, ..]);
579
580        // Compute exact kernel matrix for subset
581        let k_exact = self
582            .kernel
583            .compute_kernel(&x_subset.to_owned(), &x_subset.to_owned());
584
585        // Compute approximate kernel matrix
586        let x_transformed_subset = x_transformed.slice(scirs2_core::ndarray::s![..n_samples, ..]);
587        let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
588
589        // Compute Frobenius norm error
590        let diff = k_exact - &k_approx;
591        let error = diff.norm_l2();
592
593        Ok(error)
594    }
595
596    /// Compute error bound (using same method as RBF sampler)
597    fn compute_error_bound(
598        &self,
599        trial_errors: &[f64],
600        _n_components: usize,
601    ) -> Result<ErrorBound> {
602        let mean_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
603        let std_error = {
604            let variance = trial_errors
605                .iter()
606                .map(|&e| (e - mean_error).powi(2))
607                .sum::<f64>()
608                / trial_errors.len() as f64;
609            variance.sqrt()
610        };
611
612        let upper_bound = mean_error + 2.0 * std_error;
613        let lower_bound = (mean_error - 2.0 * std_error).max(0.0);
614
615        Ok(ErrorBound {
616            upper_bound,
617            lower_bound,
618            confidence: Some(0.95),
619            method: self.config.bound_method.clone(),
620        })
621    }
622}
623
624/// Fitted error-bounded Nyström method
625pub struct FittedErrorBoundedNystroem {
626    fitted_nystroem: crate::nystroem::Nystroem<sklears_core::traits::Trained>,
627    n_components: usize,
628    error_bound: ErrorBound,
629}
630
631impl Fit<Array2<f64>, ()> for ErrorBoundedNystroem {
632    type Fitted = FittedErrorBoundedNystroem;
633
634    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
635        // Find minimum components that satisfy error bound
636        let (n_components, error_bound) = self.find_min_components(x)?;
637
638        // Fit Nyström method with selected components
639        let nystroem = Nystroem::new(self.kernel, n_components);
640        let fitted_nystroem = nystroem.fit(x, &())?;
641
642        Ok(FittedErrorBoundedNystroem {
643            fitted_nystroem,
644            n_components,
645            error_bound,
646        })
647    }
648}
649
650impl Transform<Array2<f64>, Array2<f64>> for FittedErrorBoundedNystroem {
651    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
652        self.fitted_nystroem.transform(x)
653    }
654}
655
656impl FittedErrorBoundedNystroem {
657    /// Get the error bound
658    pub fn error_bound(&self) -> &ErrorBound {
659        &self.error_bound
660    }
661
662    /// Get the number of components
663    pub fn n_components(&self) -> usize {
664        self.n_components
665    }
666
667    /// Check if error bound is satisfied
668    pub fn is_bound_satisfied(&self, tolerance: f64) -> bool {
669        self.error_bound.upper_bound <= tolerance
670    }
671}
672
673#[allow(non_snake_case)]
674#[cfg(test)]
675mod tests {
676    use super::*;
677
678    #[test]
679    fn test_error_bounded_rbf_sampler() {
680        let x = Array2::from_shape_vec((100, 4), (0..400).map(|i| (i as f64) * 0.01).collect())
681            .unwrap();
682
683        let config = ErrorBoundedConfig {
684            max_error: 0.5,
685            min_components: 10,
686            max_components: 50,
687            step_size: 10,
688            n_trials: 2,
689            ..Default::default()
690        };
691
692        let sampler = ErrorBoundedRBFSampler::new().gamma(0.5).config(config);
693
694        let fitted = sampler.fit(&x, &()).unwrap();
695        let transformed = fitted.transform(&x).unwrap();
696
697        assert_eq!(transformed.nrows(), 100);
698        assert!(fitted.n_components() >= 10);
699        assert!(fitted.n_components() <= 50);
700        assert!(fitted.error_bound().upper_bound >= 0.0);
701        assert!(fitted.error_bound().lower_bound >= 0.0);
702    }
703
704    #[test]
705    fn test_error_bounded_nystroem() {
706        let x =
707            Array2::from_shape_vec((80, 3), (0..240).map(|i| (i as f64) * 0.02).collect()).unwrap();
708
709        let config = ErrorBoundedConfig {
710            max_error: 0.3,
711            min_components: 5,
712            max_components: 25,
713            step_size: 5,
714            n_trials: 2,
715            ..Default::default()
716        };
717
718        let nystroem = ErrorBoundedNystroem::new().gamma(1.0).config(config);
719
720        let fitted = nystroem.fit(&x, &()).unwrap();
721        let transformed = fitted.transform(&x).unwrap();
722
723        assert_eq!(transformed.nrows(), 80);
724        assert!(fitted.n_components() >= 5);
725        assert!(fitted.n_components() <= 25);
726        assert!(fitted.error_bound().upper_bound >= 0.0);
727    }
728
729    #[test]
730    fn test_error_bound_methods() {
731        let x =
732            Array2::from_shape_vec((50, 2), (0..100).map(|i| (i as f64) * 0.05).collect()).unwrap();
733
734        let methods = vec![
735            ErrorBoundMethod::SpectralBound,
736            ErrorBoundMethod::FrobeniusBound,
737            ErrorBoundMethod::EmpiricalBound,
738            ErrorBoundMethod::ProbabilisticBound { confidence: 0.95 },
739            ErrorBoundMethod::PerturbationBound,
740            ErrorBoundMethod::CVBound { n_folds: 3 },
741        ];
742
743        for method in methods {
744            let config = ErrorBoundedConfig {
745                max_error: 0.4,
746                bound_method: method,
747                min_components: 5,
748                max_components: 20,
749                step_size: 5,
750                n_trials: 2,
751                ..Default::default()
752            };
753
754            let sampler = ErrorBoundedRBFSampler::new().gamma(0.8).config(config);
755
756            let fitted = sampler.fit(&x, &()).unwrap();
757            let error_bound = fitted.error_bound();
758
759            assert!(error_bound.upper_bound >= error_bound.lower_bound);
760            assert!(error_bound.upper_bound >= 0.0);
761            assert!(error_bound.lower_bound >= 0.0);
762        }
763    }
764
765    #[test]
766    fn test_bound_satisfaction() {
767        let x =
768            Array2::from_shape_vec((40, 3), (0..120).map(|i| (i as f64) * 0.03).collect()).unwrap();
769
770        let config = ErrorBoundedConfig {
771            max_error: 0.2,
772            min_components: 5,
773            max_components: 30,
774            step_size: 5,
775            n_trials: 2,
776            ..Default::default()
777        };
778
779        let sampler = ErrorBoundedRBFSampler::new().gamma(0.3).config(config);
780
781        let fitted = sampler.fit(&x, &()).unwrap();
782
783        // The fitted sampler should have computed an error bound
784        let actual_bound = fitted.error_bound().upper_bound;
785
786        // Error bound should be reasonable (not infinite or NaN)
787        assert!(actual_bound.is_finite());
788        assert!(actual_bound > 0.0);
789
790        // The fitted sampler should satisfy a bound larger than its computed bound
791        assert!(fitted.is_bound_satisfied(actual_bound + 0.1));
792
793        // But not a much stricter bound
794        assert!(!fitted.is_bound_satisfied(0.01));
795    }
796
797    #[test]
798    fn test_min_components_search() {
799        let x =
800            Array2::from_shape_vec((60, 4), (0..240).map(|i| (i as f64) * 0.01).collect()).unwrap();
801
802        let sampler = ErrorBoundedRBFSampler::new().gamma(0.5).max_error(0.3);
803
804        let (n_components, error_bound) = sampler.find_min_components(&x).unwrap();
805
806        assert!(n_components >= sampler.config.min_components);
807        assert!(n_components <= sampler.config.max_components);
808        assert!(error_bound.upper_bound >= 0.0);
809        assert!(error_bound.lower_bound >= 0.0);
810        assert!(error_bound.upper_bound >= error_bound.lower_bound);
811    }
812
813    #[test]
814    fn test_reproducibility() {
815        let x =
816            Array2::from_shape_vec((50, 3), (0..150).map(|i| (i as f64) * 0.02).collect()).unwrap();
817
818        let config = ErrorBoundedConfig {
819            max_error: 0.25,
820            min_components: 10,
821            max_components: 30,
822            step_size: 10,
823            n_trials: 3,
824            random_seed: Some(42),
825            ..Default::default()
826        };
827
828        let sampler1 = ErrorBoundedRBFSampler::new()
829            .gamma(0.7)
830            .config(config.clone());
831
832        let sampler2 = ErrorBoundedRBFSampler::new().gamma(0.7).config(config);
833
834        let fitted1 = sampler1.fit(&x, &()).unwrap();
835        let fitted2 = sampler2.fit(&x, &()).unwrap();
836
837        // The key reproducibility requirement is that we get the same number of components
838        assert_eq!(fitted1.n_components(), fitted2.n_components());
839
840        // Error bounds may vary slightly due to numerical precision in the search process
841        // but should be in a similar range (within 50% of each other)
842        let bound1 = fitted1.error_bound().upper_bound;
843        let bound2 = fitted2.error_bound().upper_bound;
844        let ratio = bound1.max(bound2) / bound1.min(bound2).max(1e-10);
845        assert!(
846            ratio < 2.0,
847            "Error bounds too different: {} vs {}, ratio: {}",
848            bound1,
849            bound2,
850            ratio
851        );
852    }
853}
sklears_kernel_approximation/error_bounded.rs

sklears_kernel_approximation/
error_bounded.rs