sklears_kernel_approximation/
adaptive_dimension.rs

1//! Adaptive feature dimension selection for kernel approximations
2//!
3//! This module provides methods to automatically determine the optimal number of features
4//! for kernel approximation methods based on approximation quality metrics.
5
6use crate::RBFSampler;
7use scirs2_core::ndarray::{concatenate, s, Array2, Axis};
8use scirs2_linalg::compat::{Norm, SVD};
9use sklears_core::traits::Fit;
10use sklears_core::{
11    error::{Result, SklearsError},
12    traits::Transform,
13};
14use std::collections::HashMap;
15
16/// Quality metrics for approximation assessment
17#[derive(Debug, Clone)]
18/// QualityMetric
19pub enum QualityMetric {
20    /// Frobenius norm of the approximation error
21    FrobeniusNorm,
22    /// Spectral norm (largest singular value) of the error
23    SpectralNorm,
24    /// Nuclear norm (sum of singular values) of the error
25    NuclearNorm,
26    /// Relative Frobenius error
27    RelativeFrobeniusNorm,
28    /// Kernel alignment score
29    KernelAlignment,
30    /// Effective rank of the approximation
31    EffectiveRank,
32    /// Cross-validation score
33    CrossValidation,
34    /// Approximation trace
35    Trace,
36}
37
38/// Selection strategy for adaptive dimension selection
39#[derive(Debug, Clone)]
40/// SelectionStrategy
41pub enum SelectionStrategy {
42    /// Select dimension that meets error tolerance
43    ErrorTolerance { tolerance: f64 },
44    /// Select dimension with best quality/dimension trade-off
45    QualityEfficiency { efficiency_threshold: f64 },
46    /// Select dimension using elbow method
47    ElbowMethod { sensitivity: f64 },
48    /// Select dimension with cross-validation
49    CrossValidation { n_folds: usize },
50    /// Select dimension based on information criteria
51    InformationCriteria { criterion: String },
52    /// Select dimension using early stopping
53    EarlyStopping {
54        patience: usize,
55        min_improvement: f64,
56    },
57}
58
59/// Configuration for adaptive dimension selection
60#[derive(Debug, Clone)]
61/// AdaptiveDimensionConfig
62pub struct AdaptiveDimensionConfig {
63    /// Minimum number of features to test
64    pub min_features: usize,
65    /// Maximum number of features to test
66    pub max_features: usize,
67    /// Step size for feature testing
68    pub step_size: usize,
69    /// Quality metric to optimize
70    pub quality_metric: QualityMetric,
71    /// Selection strategy
72    pub selection_strategy: SelectionStrategy,
73    /// Number of random trials for each dimension
74    pub n_trials: usize,
75    /// Random seed for reproducibility
76    pub random_seed: Option<u64>,
77    /// Validation fraction for quality assessment
78    pub validation_fraction: f64,
79}
80
81impl Default for AdaptiveDimensionConfig {
82    fn default() -> Self {
83        Self {
84            min_features: 10,
85            max_features: 1000,
86            step_size: 10,
87            quality_metric: QualityMetric::KernelAlignment,
88            selection_strategy: SelectionStrategy::ErrorTolerance { tolerance: 0.1 },
89            n_trials: 3,
90            random_seed: None,
91            validation_fraction: 0.2,
92        }
93    }
94}
95
96/// Results from adaptive dimension selection
97#[derive(Debug, Clone)]
98/// DimensionSelectionResult
99pub struct DimensionSelectionResult {
100    /// Selected optimal dimension
101    pub optimal_dimension: usize,
102    /// Quality scores for all tested dimensions
103    pub quality_scores: HashMap<usize, f64>,
104    /// Approximation errors for all tested dimensions
105    pub approximation_errors: HashMap<usize, f64>,
106    /// Computational times for all tested dimensions
107    pub computation_times: HashMap<usize, f64>,
108    /// Memory usage for all tested dimensions
109    pub memory_usage: HashMap<usize, usize>,
110}
111
112/// Adaptive RBF sampler with automatic dimension selection
113#[derive(Debug, Clone)]
114/// AdaptiveRBFSampler
115pub struct AdaptiveRBFSampler {
116    gamma: f64,
117    config: AdaptiveDimensionConfig,
118}
119
120impl Default for AdaptiveRBFSampler {
121    fn default() -> Self {
122        Self::new()
123    }
124}
125
126impl AdaptiveRBFSampler {
127    /// Create a new adaptive RBF sampler
128    pub fn new() -> Self {
129        Self {
130            gamma: 1.0,
131            config: AdaptiveDimensionConfig::default(),
132        }
133    }
134
135    /// Set gamma parameter
136    pub fn gamma(mut self, gamma: f64) -> Self {
137        self.gamma = gamma;
138        self
139    }
140
141    /// Set configuration
142    pub fn config(mut self, config: AdaptiveDimensionConfig) -> Self {
143        self.config = config;
144        self
145    }
146
147    /// Perform adaptive dimension selection
148    pub fn select_dimension(&self, x: &Array2<f64>) -> Result<DimensionSelectionResult> {
149        let n_samples = x.nrows();
150        let n_features = x.ncols();
151
152        // Split data for validation
153        let split_idx = (n_samples as f64 * (1.0 - self.config.validation_fraction)) as usize;
154        let x_train = x.slice(s![..split_idx, ..]).to_owned();
155        let x_val = x.slice(s![split_idx.., ..]).to_owned();
156
157        let mut quality_scores = HashMap::new();
158        let mut approximation_errors = HashMap::new();
159        let mut computation_times = HashMap::new();
160        let mut memory_usage = HashMap::new();
161
162        // Test different dimensions
163        let dimensions: Vec<usize> = (self.config.min_features..=self.config.max_features)
164            .step_by(self.config.step_size)
165            .collect();
166
167        for &n_components in &dimensions {
168            let mut trial_scores = Vec::new();
169            let mut trial_errors = Vec::new();
170            let mut trial_times = Vec::new();
171
172            // Run multiple trials for each dimension
173            for trial in 0..self.config.n_trials {
174                let start_time = std::time::Instant::now();
175
176                // Create RBF sampler with current dimension
177                let seed = self.config.random_seed.map(|s| s + trial as u64);
178                let sampler = if let Some(s) = seed {
179                    RBFSampler::new(n_components)
180                        .gamma(self.gamma)
181                        .random_state(s)
182                } else {
183                    RBFSampler::new(n_components).gamma(self.gamma)
184                };
185
186                // Fit and transform
187                let fitted = sampler.fit(&x_train, &())?;
188                let x_train_transformed = fitted.transform(&x_train)?;
189                let x_val_transformed = fitted.transform(&x_val)?;
190
191                let elapsed = start_time.elapsed().as_secs_f64();
192                trial_times.push(elapsed);
193
194                // Compute quality metrics
195                let quality_score = self.compute_quality_score(
196                    &x_train,
197                    &x_val,
198                    &x_train_transformed,
199                    &x_val_transformed,
200                    &fitted,
201                )?;
202
203                let approximation_error = self.compute_approximation_error(
204                    &x_train,
205                    &x_val,
206                    &x_train_transformed,
207                    &x_val_transformed,
208                )?;
209
210                trial_scores.push(quality_score);
211                trial_errors.push(approximation_error);
212            }
213
214            // Average across trials
215            let avg_score = trial_scores.iter().sum::<f64>() / trial_scores.len() as f64;
216            let avg_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
217            let avg_time = trial_times.iter().sum::<f64>() / trial_times.len() as f64;
218
219            quality_scores.insert(n_components, avg_score);
220            approximation_errors.insert(n_components, avg_error);
221            computation_times.insert(n_components, avg_time);
222            memory_usage.insert(n_components, n_components * n_features * 8); // Rough estimate
223        }
224
225        // Select optimal dimension based on strategy
226        let optimal_dimension =
227            self.select_optimal_dimension(&dimensions, &quality_scores, &approximation_errors)?;
228
229        Ok(DimensionSelectionResult {
230            optimal_dimension,
231            quality_scores,
232            approximation_errors,
233            computation_times,
234            memory_usage,
235        })
236    }
237
238    fn compute_quality_score(
239        &self,
240        x_train: &Array2<f64>,
241        _x_val: &Array2<f64>,
242        x_train_transformed: &Array2<f64>,
243        _x_val_transformed: &Array2<f64>,
244        _fitted_sampler: &crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
245    ) -> Result<f64> {
246        match &self.config.quality_metric {
247            QualityMetric::KernelAlignment => {
248                self.compute_kernel_alignment(x_train, x_train_transformed)
249            }
250            QualityMetric::EffectiveRank => self.compute_effective_rank(x_train_transformed),
251            QualityMetric::FrobeniusNorm => {
252                self.compute_frobenius_approximation_quality(x_train, x_train_transformed)
253            }
254            QualityMetric::RelativeFrobeniusNorm => {
255                self.compute_relative_frobenius_quality(x_train, x_train_transformed)
256            }
257            QualityMetric::CrossValidation => {
258                self.compute_cross_validation_score(x_train, x_train_transformed)
259            }
260            _ => {
261                // Default to kernel alignment for unsupported metrics
262                self.compute_kernel_alignment(x_train, x_train_transformed)
263            }
264        }
265    }
266
267    fn compute_kernel_alignment(
268        &self,
269        x: &Array2<f64>,
270        x_transformed: &Array2<f64>,
271    ) -> Result<f64> {
272        // Compute exact kernel matrix (small subset for efficiency)
273        let n_samples = x.nrows().min(100); // Limit for computational efficiency
274        let x_subset = x.slice(s![..n_samples, ..]);
275
276        let mut k_exact = Array2::zeros((n_samples, n_samples));
277        for i in 0..n_samples {
278            for j in 0..n_samples {
279                let diff = &x_subset.row(i) - &x_subset.row(j);
280                let squared_norm = diff.dot(&diff);
281                k_exact[[i, j]] = (-self.gamma * squared_norm).exp();
282            }
283        }
284
285        // Compute approximate kernel matrix
286        let x_transformed_subset = x_transformed.slice(s![..n_samples, ..]);
287        let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
288
289        // Compute alignment
290        let k_exact_frobenius = k_exact.iter().map(|&x| x * x).sum::<f64>().sqrt();
291        let k_approx_frobenius = k_approx.iter().map(|&x| x * x).sum::<f64>().sqrt();
292        let k_product = (&k_exact * &k_approx).sum();
293
294        let alignment = k_product / (k_exact_frobenius * k_approx_frobenius);
295        Ok(alignment)
296    }
297
298    fn compute_effective_rank(&self, x_transformed: &Array2<f64>) -> Result<f64> {
299        // Compute SVD of transformed data
300        let (_, s, _) = x_transformed
301            .svd(true)
302            .map_err(|_| SklearsError::InvalidInput("SVD computation failed".to_string()))?;
303
304        // Compute effective rank using entropy
305        let s_sum = s.sum();
306        if s_sum == 0.0 {
307            return Ok(0.0);
308        }
309
310        let s_normalized = &s / s_sum;
311        let entropy = -s_normalized
312            .iter()
313            .filter(|&&x| x > 1e-12)
314            .map(|&x| x * x.ln())
315            .sum::<f64>();
316
317        let effective_rank = entropy.exp();
318        Ok(effective_rank)
319    }
320
321    fn compute_frobenius_approximation_quality(
322        &self,
323        x: &Array2<f64>,
324        x_transformed: &Array2<f64>,
325    ) -> Result<f64> {
326        // This is a simplified version - in practice, you'd want to compute
327        // the actual kernel approximation error
328        let reconstruction_error = self.compute_reconstruction_error(x, x_transformed)?;
329        Ok(1.0 / (1.0 + reconstruction_error)) // Convert error to quality score
330    }
331
332    fn compute_relative_frobenius_quality(
333        &self,
334        x: &Array2<f64>,
335        x_transformed: &Array2<f64>,
336    ) -> Result<f64> {
337        let reconstruction_error = self.compute_reconstruction_error(x, x_transformed)?;
338        let original_norm = x.iter().map(|&x| x * x).sum::<f64>().sqrt();
339        let relative_error = reconstruction_error / original_norm;
340        Ok(1.0 / (1.0 + relative_error))
341    }
342
343    fn compute_cross_validation_score(
344        &self,
345        x: &Array2<f64>,
346        x_transformed: &Array2<f64>,
347    ) -> Result<f64> {
348        // Simplified cross-validation score based on feature stability
349        let n_samples = x.nrows();
350        let fold_size = n_samples / 5; // 5-fold CV
351        let mut cv_scores = Vec::new();
352
353        for fold in 0..5 {
354            let start = fold * fold_size;
355            let end = if fold == 4 {
356                n_samples
357            } else {
358                (fold + 1) * fold_size
359            };
360
361            let val_features = x_transformed.slice(s![start..end, ..]);
362            let train_features = if start == 0 {
363                x_transformed.slice(s![end.., ..]).to_owned()
364            } else if end == n_samples {
365                x_transformed.slice(s![..start, ..]).to_owned()
366            } else {
367                let part1 = x_transformed.slice(s![..start, ..]);
368                let part2 = x_transformed.slice(s![end.., ..]);
369                concatenate![Axis(0), part1, part2]
370            };
371
372            // Compute similarity between validation and training features
373            let train_mean = train_features
374                .mean_axis(Axis(0))
375                .expect("operation should succeed");
376            let val_mean = val_features
377                .mean_axis(Axis(0))
378                .expect("operation should succeed");
379            let diff = &train_mean - &val_mean;
380            let similarity = 1.0 / (1.0 + diff.dot(&diff).sqrt());
381
382            cv_scores.push(similarity);
383        }
384
385        Ok(cv_scores.iter().sum::<f64>() / cv_scores.len() as f64)
386    }
387
388    fn compute_reconstruction_error(
389        &self,
390        x: &Array2<f64>,
391        x_transformed: &Array2<f64>,
392    ) -> Result<f64> {
393        // Simplified reconstruction error - assumes we can approximate original features
394        // This is a placeholder for more sophisticated error computation
395        let x_mean = x.mean_axis(Axis(0)).expect("operation should succeed");
396        let x_transformed_projected =
397            x_transformed.sum_axis(Axis(1)) / x_transformed.ncols() as f64;
398
399        let mut error = 0.0;
400        for (i, &projected_val) in x_transformed_projected.iter().enumerate() {
401            let original_norm = x.row(i).dot(&x_mean);
402            error += (projected_val - original_norm).powi(2);
403        }
404
405        Ok(error.sqrt())
406    }
407
408    fn compute_approximation_error(
409        &self,
410        _x_train: &Array2<f64>,
411        x_val: &Array2<f64>,
412        _x_train_transformed: &Array2<f64>,
413        x_val_transformed: &Array2<f64>,
414    ) -> Result<f64> {
415        // Compute approximation error based on kernel matrix approximation
416        // This is a simplified version that computes the Frobenius norm of the error
417
418        // For simplicity, compute the validation error as the norm difference
419        let val_norm = x_val.norm_l2();
420        let transformed_norm = x_val_transformed.norm_l2();
421
422        // Relative error
423        let error = if val_norm > 1e-12 {
424            (val_norm - transformed_norm).abs() / val_norm
425        } else {
426            (val_norm - transformed_norm).abs()
427        };
428
429        Ok(error)
430    }
431
432    fn select_optimal_dimension(
433        &self,
434        dimensions: &[usize],
435        quality_scores: &HashMap<usize, f64>,
436        approximation_errors: &HashMap<usize, f64>,
437    ) -> Result<usize> {
438        match &self.config.selection_strategy {
439            SelectionStrategy::ErrorTolerance { tolerance } => {
440                // Find first dimension that meets error tolerance
441                for &dim in dimensions {
442                    if let Some(&error) = approximation_errors.get(&dim) {
443                        if error <= *tolerance {
444                            return Ok(dim);
445                        }
446                    }
447                }
448                // If no dimension meets tolerance, return best performing
449                self.select_best_quality_dimension(dimensions, quality_scores)
450            }
451            SelectionStrategy::QualityEfficiency {
452                efficiency_threshold,
453            } => {
454                // Find dimension with best quality/dimension ratio above threshold
455                let mut best_efficiency = 0.0;
456                let mut best_dim = dimensions[0];
457
458                for &dim in dimensions {
459                    if let Some(&quality) = quality_scores.get(&dim) {
460                        let efficiency = quality / dim as f64;
461                        if efficiency >= *efficiency_threshold && efficiency > best_efficiency {
462                            best_efficiency = efficiency;
463                            best_dim = dim;
464                        }
465                    }
466                }
467                Ok(best_dim)
468            }
469            SelectionStrategy::ElbowMethod { sensitivity } => {
470                self.select_elbow_dimension(dimensions, quality_scores, *sensitivity)
471            }
472            _ => {
473                // Default to best quality
474                self.select_best_quality_dimension(dimensions, quality_scores)
475            }
476        }
477    }
478
479    fn select_best_quality_dimension(
480        &self,
481        dimensions: &[usize],
482        quality_scores: &HashMap<usize, f64>,
483    ) -> Result<usize> {
484        let mut best_score = f64::NEG_INFINITY;
485        let mut best_dim = dimensions[0];
486
487        for &dim in dimensions {
488            if let Some(&score) = quality_scores.get(&dim) {
489                if score > best_score {
490                    best_score = score;
491                    best_dim = dim;
492                }
493            }
494        }
495
496        Ok(best_dim)
497    }
498
499    fn select_elbow_dimension(
500        &self,
501        dimensions: &[usize],
502        quality_scores: &HashMap<usize, f64>,
503        sensitivity: f64,
504    ) -> Result<usize> {
505        if dimensions.len() < 3 {
506            return Ok(dimensions[0]);
507        }
508
509        // Convert to sorted vectors
510        let mut dim_score_pairs: Vec<_> = dimensions
511            .iter()
512            .filter_map(|&dim| quality_scores.get(&dim).map(|&score| (dim, score)))
513            .collect();
514        dim_score_pairs.sort_by(|a, b| a.0.cmp(&b.0));
515
516        // Find elbow using second derivative
517        let mut best_elbow_idx = 1;
518        let mut max_curvature = 0.0;
519
520        for i in 1..(dim_score_pairs.len() - 1) {
521            let (d1, s1) = dim_score_pairs[i - 1];
522            let (d2, s2) = dim_score_pairs[i];
523            let (d3, s3) = dim_score_pairs[i + 1];
524
525            // Compute second derivative (curvature)
526            let first_deriv1 = (s2 - s1) / (d2 - d1) as f64;
527            let first_deriv2 = (s3 - s2) / (d3 - d2) as f64;
528            let second_deriv = (first_deriv2 - first_deriv1) / ((d3 - d1) as f64 / 2.0);
529
530            let curvature = second_deriv.abs();
531            if curvature > max_curvature && curvature > sensitivity {
532                max_curvature = curvature;
533                best_elbow_idx = i;
534            }
535        }
536
537        Ok(dim_score_pairs[best_elbow_idx].0)
538    }
539}
540
541/// Fitted adaptive RBF sampler
542pub struct FittedAdaptiveRBFSampler {
543    fitted_rbf: crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
544    selection_result: DimensionSelectionResult,
545}
546
547impl Fit<Array2<f64>, ()> for AdaptiveRBFSampler {
548    type Fitted = FittedAdaptiveRBFSampler;
549
550    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
551        // Perform dimension selection
552        let selection_result = self.select_dimension(x)?;
553
554        // Fit RBF sampler with optimal dimension
555        let rbf_sampler = RBFSampler::new(selection_result.optimal_dimension).gamma(self.gamma);
556
557        let fitted_rbf = rbf_sampler.fit(x, &())?;
558
559        Ok(FittedAdaptiveRBFSampler {
560            fitted_rbf,
561            selection_result,
562        })
563    }
564}
565
566impl Transform<Array2<f64>, Array2<f64>> for FittedAdaptiveRBFSampler {
567    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
568        self.fitted_rbf.transform(x)
569    }
570}
571
572impl FittedAdaptiveRBFSampler {
573    /// Get the dimension selection result
574    pub fn selection_result(&self) -> &DimensionSelectionResult {
575        &self.selection_result
576    }
577
578    /// Get the optimal dimension
579    pub fn optimal_dimension(&self) -> usize {
580        self.selection_result.optimal_dimension
581    }
582}
583
584#[allow(non_snake_case)]
585#[cfg(test)]
586mod tests {
587    use super::*;
588    use approx::assert_abs_diff_eq;
589
590    #[test]
591    fn test_adaptive_rbf_sampler_basic() {
592        let x = Array2::from_shape_vec((50, 5), (0..250).map(|i| i as f64 * 0.1).collect())
593            .expect("operation should succeed");
594
595        let config = AdaptiveDimensionConfig {
596            min_features: 10,
597            max_features: 50,
598            step_size: 10,
599            n_trials: 2,
600            ..Default::default()
601        };
602
603        let sampler = AdaptiveRBFSampler::new().gamma(0.5).config(config);
604
605        let fitted = sampler.fit(&x, &()).expect("operation should succeed");
606        let transformed = fitted.transform(&x).expect("operation should succeed");
607
608        assert_eq!(transformed.nrows(), 50);
609        assert!(transformed.ncols() >= 10);
610        assert!(transformed.ncols() <= 50);
611
612        // Check that dimension is reasonable
613        let optimal_dim = fitted.optimal_dimension();
614        assert!(optimal_dim >= 10);
615        assert!(optimal_dim <= 50);
616    }
617
618    #[test]
619    fn test_dimension_selection_error_tolerance() {
620        let x = Array2::from_shape_vec((40, 4), (0..160).map(|i| i as f64 * 0.05).collect())
621            .expect("operation should succeed");
622
623        let config = AdaptiveDimensionConfig {
624            min_features: 5,
625            max_features: 25,
626            step_size: 5,
627            selection_strategy: SelectionStrategy::ErrorTolerance { tolerance: 0.2 },
628            n_trials: 1,
629            validation_fraction: 0.3,
630            ..Default::default()
631        };
632
633        let sampler = AdaptiveRBFSampler::new().gamma(1.0).config(config);
634
635        let result = sampler
636            .select_dimension(&x)
637            .expect("operation should succeed");
638
639        assert!(result.optimal_dimension >= 5);
640        assert!(result.optimal_dimension <= 25);
641        assert!(!result.quality_scores.is_empty());
642        assert!(!result.approximation_errors.is_empty());
643    }
644
645    #[test]
646    fn test_dimension_selection_quality_efficiency() {
647        let x = Array2::from_shape_vec((30, 3), (0..90).map(|i| i as f64 * 0.1).collect())
648            .expect("operation should succeed");
649
650        let config = AdaptiveDimensionConfig {
651            min_features: 5,
652            max_features: 20,
653            step_size: 5,
654            selection_strategy: SelectionStrategy::QualityEfficiency {
655                efficiency_threshold: 0.01,
656            },
657            quality_metric: QualityMetric::EffectiveRank,
658            n_trials: 1,
659            ..Default::default()
660        };
661
662        let sampler = AdaptiveRBFSampler::new().gamma(0.8).config(config);
663
664        let result = sampler
665            .select_dimension(&x)
666            .expect("operation should succeed");
667
668        assert!(result.optimal_dimension >= 5);
669        assert!(result.optimal_dimension <= 20);
670
671        // Check that quality scores are computed
672        for &dim in &[5, 10, 15, 20] {
673            assert!(result.quality_scores.contains_key(&dim));
674        }
675    }
676
677    #[test]
678    fn test_dimension_selection_elbow_method() {
679        let x = Array2::from_shape_vec((60, 6), (0..360).map(|i| i as f64 * 0.02).collect())
680            .expect("operation should succeed");
681
682        let config = AdaptiveDimensionConfig {
683            min_features: 10,
684            max_features: 40,
685            step_size: 10,
686            selection_strategy: SelectionStrategy::ElbowMethod { sensitivity: 0.01 },
687            quality_metric: QualityMetric::KernelAlignment,
688            n_trials: 1,
689            ..Default::default()
690        };
691
692        let sampler = AdaptiveRBFSampler::new().gamma(0.3).config(config);
693
694        let result = sampler
695            .select_dimension(&x)
696            .expect("operation should succeed");
697
698        assert!(result.optimal_dimension >= 10);
699        assert!(result.optimal_dimension <= 40);
700
701        // Verify that computation times are recorded
702        assert!(!result.computation_times.is_empty());
703        assert!(!result.memory_usage.is_empty());
704    }
705
706    #[test]
707    fn test_quality_metrics() {
708        let x = Array2::from_shape_vec((25, 4), (0..100).map(|i| i as f64 * 0.1).collect())
709            .expect("operation should succeed");
710
711        let sampler = AdaptiveRBFSampler::new().gamma(1.0);
712
713        // Test kernel alignment
714        let rbf = RBFSampler::new(15).gamma(1.0);
715        let fitted_rbf = rbf.fit(&x, &()).expect("operation should succeed");
716        let x_transformed = fitted_rbf.transform(&x).expect("operation should succeed");
717
718        let alignment = sampler
719            .compute_kernel_alignment(&x, &x_transformed)
720            .expect("operation should succeed");
721        assert!(alignment >= 0.0);
722        assert!(alignment <= 1.0);
723
724        // Test effective rank
725        let eff_rank = sampler
726            .compute_effective_rank(&x_transformed)
727            .expect("operation should succeed");
728        assert!(eff_rank > 0.0);
729        assert!(eff_rank <= x_transformed.ncols() as f64);
730
731        // Test reconstruction error
732        let recon_error = sampler
733            .compute_reconstruction_error(&x, &x_transformed)
734            .expect("operation should succeed");
735        assert!(recon_error >= 0.0);
736    }
737
738    #[test]
739    fn test_adaptive_sampler_reproducibility() {
740        let x = Array2::from_shape_vec((40, 5), (0..200).map(|i| i as f64 * 0.08).collect())
741            .expect("operation should succeed");
742
743        let config = AdaptiveDimensionConfig {
744            min_features: 10,
745            max_features: 30,
746            step_size: 10,
747            n_trials: 2,
748            random_seed: Some(42),
749            ..Default::default()
750        };
751
752        let sampler1 = AdaptiveRBFSampler::new().gamma(0.5).config(config.clone());
753
754        let sampler2 = AdaptiveRBFSampler::new().gamma(0.5).config(config);
755
756        let result1 = sampler1
757            .select_dimension(&x)
758            .expect("operation should succeed");
759        let result2 = sampler2
760            .select_dimension(&x)
761            .expect("operation should succeed");
762
763        assert_eq!(result1.optimal_dimension, result2.optimal_dimension);
764
765        // Quality scores should be similar (allowing for small numerical differences)
766        for (&dim, &score1) in &result1.quality_scores {
767            if let Some(&score2) = result2.quality_scores.get(&dim) {
768                assert_abs_diff_eq!(score1, score2, epsilon = 1e-10);
769            }
770        }
771    }
772
773    #[test]
774    fn test_dimension_selection_result() {
775        let x = Array2::from_shape_vec((35, 3), (0..105).map(|i| i as f64 * 0.1).collect())
776            .expect("operation should succeed");
777
778        let config = AdaptiveDimensionConfig {
779            min_features: 5,
780            max_features: 15,
781            step_size: 5,
782            n_trials: 1,
783            ..Default::default()
784        };
785
786        let sampler = AdaptiveRBFSampler::new().gamma(0.7).config(config);
787
788        let fitted = sampler.fit(&x, &()).expect("operation should succeed");
789        let result = fitted.selection_result();
790
791        // Verify all required fields are present
792        assert!(result.optimal_dimension >= 5);
793        assert!(result.optimal_dimension <= 15);
794        assert_eq!(result.quality_scores.len(), 3); // 5, 10, 15
795        assert_eq!(result.approximation_errors.len(), 3);
796        assert_eq!(result.computation_times.len(), 3);
797        assert_eq!(result.memory_usage.len(), 3);
798
799        // Verify memory usage is reasonable
800        for (&dim, &memory) in &result.memory_usage {
801            assert_eq!(memory, dim * 3 * 8); // features * dimensions * sizeof(f64)
802        }
803    }
804}
sklears_kernel_approximation/adaptive_dimension.rs

sklears_kernel_approximation/
adaptive_dimension.rs