1use crate::RBFSampler;
7use scirs2_core::ndarray::{concatenate, s, Array2, Axis};
8use scirs2_linalg::compat::{Norm, SVD};
9use sklears_core::traits::Fit;
10use sklears_core::{
11 error::{Result, SklearsError},
12 traits::Transform,
13};
14use std::collections::HashMap;
15
16#[derive(Debug, Clone)]
18pub enum QualityMetric {
20 FrobeniusNorm,
22 SpectralNorm,
24 NuclearNorm,
26 RelativeFrobeniusNorm,
28 KernelAlignment,
30 EffectiveRank,
32 CrossValidation,
34 Trace,
36}
37
38#[derive(Debug, Clone)]
40pub enum SelectionStrategy {
42 ErrorTolerance { tolerance: f64 },
44 QualityEfficiency { efficiency_threshold: f64 },
46 ElbowMethod { sensitivity: f64 },
48 CrossValidation { n_folds: usize },
50 InformationCriteria { criterion: String },
52 EarlyStopping {
54 patience: usize,
55 min_improvement: f64,
56 },
57}
58
59#[derive(Debug, Clone)]
61pub struct AdaptiveDimensionConfig {
63 pub min_features: usize,
65 pub max_features: usize,
67 pub step_size: usize,
69 pub quality_metric: QualityMetric,
71 pub selection_strategy: SelectionStrategy,
73 pub n_trials: usize,
75 pub random_seed: Option<u64>,
77 pub validation_fraction: f64,
79}
80
81impl Default for AdaptiveDimensionConfig {
82 fn default() -> Self {
83 Self {
84 min_features: 10,
85 max_features: 1000,
86 step_size: 10,
87 quality_metric: QualityMetric::KernelAlignment,
88 selection_strategy: SelectionStrategy::ErrorTolerance { tolerance: 0.1 },
89 n_trials: 3,
90 random_seed: None,
91 validation_fraction: 0.2,
92 }
93 }
94}
95
96#[derive(Debug, Clone)]
98pub struct DimensionSelectionResult {
100 pub optimal_dimension: usize,
102 pub quality_scores: HashMap<usize, f64>,
104 pub approximation_errors: HashMap<usize, f64>,
106 pub computation_times: HashMap<usize, f64>,
108 pub memory_usage: HashMap<usize, usize>,
110}
111
112#[derive(Debug, Clone)]
114pub struct AdaptiveRBFSampler {
116 gamma: f64,
117 config: AdaptiveDimensionConfig,
118}
119
120impl Default for AdaptiveRBFSampler {
121 fn default() -> Self {
122 Self::new()
123 }
124}
125
126impl AdaptiveRBFSampler {
127 pub fn new() -> Self {
129 Self {
130 gamma: 1.0,
131 config: AdaptiveDimensionConfig::default(),
132 }
133 }
134
135 pub fn gamma(mut self, gamma: f64) -> Self {
137 self.gamma = gamma;
138 self
139 }
140
141 pub fn config(mut self, config: AdaptiveDimensionConfig) -> Self {
143 self.config = config;
144 self
145 }
146
147 pub fn select_dimension(&self, x: &Array2<f64>) -> Result<DimensionSelectionResult> {
149 let n_samples = x.nrows();
150 let n_features = x.ncols();
151
152 let split_idx = (n_samples as f64 * (1.0 - self.config.validation_fraction)) as usize;
154 let x_train = x.slice(s![..split_idx, ..]).to_owned();
155 let x_val = x.slice(s![split_idx.., ..]).to_owned();
156
157 let mut quality_scores = HashMap::new();
158 let mut approximation_errors = HashMap::new();
159 let mut computation_times = HashMap::new();
160 let mut memory_usage = HashMap::new();
161
162 let dimensions: Vec<usize> = (self.config.min_features..=self.config.max_features)
164 .step_by(self.config.step_size)
165 .collect();
166
167 for &n_components in &dimensions {
168 let mut trial_scores = Vec::new();
169 let mut trial_errors = Vec::new();
170 let mut trial_times = Vec::new();
171
172 for trial in 0..self.config.n_trials {
174 let start_time = std::time::Instant::now();
175
176 let seed = self.config.random_seed.map(|s| s + trial as u64);
178 let sampler = if let Some(s) = seed {
179 RBFSampler::new(n_components)
180 .gamma(self.gamma)
181 .random_state(s)
182 } else {
183 RBFSampler::new(n_components).gamma(self.gamma)
184 };
185
186 let fitted = sampler.fit(&x_train, &())?;
188 let x_train_transformed = fitted.transform(&x_train)?;
189 let x_val_transformed = fitted.transform(&x_val)?;
190
191 let elapsed = start_time.elapsed().as_secs_f64();
192 trial_times.push(elapsed);
193
194 let quality_score = self.compute_quality_score(
196 &x_train,
197 &x_val,
198 &x_train_transformed,
199 &x_val_transformed,
200 &fitted,
201 )?;
202
203 let approximation_error = self.compute_approximation_error(
204 &x_train,
205 &x_val,
206 &x_train_transformed,
207 &x_val_transformed,
208 )?;
209
210 trial_scores.push(quality_score);
211 trial_errors.push(approximation_error);
212 }
213
214 let avg_score = trial_scores.iter().sum::<f64>() / trial_scores.len() as f64;
216 let avg_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
217 let avg_time = trial_times.iter().sum::<f64>() / trial_times.len() as f64;
218
219 quality_scores.insert(n_components, avg_score);
220 approximation_errors.insert(n_components, avg_error);
221 computation_times.insert(n_components, avg_time);
222 memory_usage.insert(n_components, n_components * n_features * 8); }
224
225 let optimal_dimension =
227 self.select_optimal_dimension(&dimensions, &quality_scores, &approximation_errors)?;
228
229 Ok(DimensionSelectionResult {
230 optimal_dimension,
231 quality_scores,
232 approximation_errors,
233 computation_times,
234 memory_usage,
235 })
236 }
237
238 fn compute_quality_score(
239 &self,
240 x_train: &Array2<f64>,
241 _x_val: &Array2<f64>,
242 x_train_transformed: &Array2<f64>,
243 _x_val_transformed: &Array2<f64>,
244 _fitted_sampler: &crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
245 ) -> Result<f64> {
246 match &self.config.quality_metric {
247 QualityMetric::KernelAlignment => {
248 self.compute_kernel_alignment(x_train, x_train_transformed)
249 }
250 QualityMetric::EffectiveRank => self.compute_effective_rank(x_train_transformed),
251 QualityMetric::FrobeniusNorm => {
252 self.compute_frobenius_approximation_quality(x_train, x_train_transformed)
253 }
254 QualityMetric::RelativeFrobeniusNorm => {
255 self.compute_relative_frobenius_quality(x_train, x_train_transformed)
256 }
257 QualityMetric::CrossValidation => {
258 self.compute_cross_validation_score(x_train, x_train_transformed)
259 }
260 _ => {
261 self.compute_kernel_alignment(x_train, x_train_transformed)
263 }
264 }
265 }
266
267 fn compute_kernel_alignment(
268 &self,
269 x: &Array2<f64>,
270 x_transformed: &Array2<f64>,
271 ) -> Result<f64> {
272 let n_samples = x.nrows().min(100); let x_subset = x.slice(s![..n_samples, ..]);
275
276 let mut k_exact = Array2::zeros((n_samples, n_samples));
277 for i in 0..n_samples {
278 for j in 0..n_samples {
279 let diff = &x_subset.row(i) - &x_subset.row(j);
280 let squared_norm = diff.dot(&diff);
281 k_exact[[i, j]] = (-self.gamma * squared_norm).exp();
282 }
283 }
284
285 let x_transformed_subset = x_transformed.slice(s![..n_samples, ..]);
287 let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
288
289 let k_exact_frobenius = k_exact.iter().map(|&x| x * x).sum::<f64>().sqrt();
291 let k_approx_frobenius = k_approx.iter().map(|&x| x * x).sum::<f64>().sqrt();
292 let k_product = (&k_exact * &k_approx).sum();
293
294 let alignment = k_product / (k_exact_frobenius * k_approx_frobenius);
295 Ok(alignment)
296 }
297
298 fn compute_effective_rank(&self, x_transformed: &Array2<f64>) -> Result<f64> {
299 let (_, s, _) = x_transformed
301 .svd(true)
302 .map_err(|_| SklearsError::InvalidInput("SVD computation failed".to_string()))?;
303
304 let s_sum = s.sum();
306 if s_sum == 0.0 {
307 return Ok(0.0);
308 }
309
310 let s_normalized = &s / s_sum;
311 let entropy = -s_normalized
312 .iter()
313 .filter(|&&x| x > 1e-12)
314 .map(|&x| x * x.ln())
315 .sum::<f64>();
316
317 let effective_rank = entropy.exp();
318 Ok(effective_rank)
319 }
320
321 fn compute_frobenius_approximation_quality(
322 &self,
323 x: &Array2<f64>,
324 x_transformed: &Array2<f64>,
325 ) -> Result<f64> {
326 let reconstruction_error = self.compute_reconstruction_error(x, x_transformed)?;
329 Ok(1.0 / (1.0 + reconstruction_error)) }
331
332 fn compute_relative_frobenius_quality(
333 &self,
334 x: &Array2<f64>,
335 x_transformed: &Array2<f64>,
336 ) -> Result<f64> {
337 let reconstruction_error = self.compute_reconstruction_error(x, x_transformed)?;
338 let original_norm = x.iter().map(|&x| x * x).sum::<f64>().sqrt();
339 let relative_error = reconstruction_error / original_norm;
340 Ok(1.0 / (1.0 + relative_error))
341 }
342
343 fn compute_cross_validation_score(
344 &self,
345 x: &Array2<f64>,
346 x_transformed: &Array2<f64>,
347 ) -> Result<f64> {
348 let n_samples = x.nrows();
350 let fold_size = n_samples / 5; let mut cv_scores = Vec::new();
352
353 for fold in 0..5 {
354 let start = fold * fold_size;
355 let end = if fold == 4 {
356 n_samples
357 } else {
358 (fold + 1) * fold_size
359 };
360
361 let val_features = x_transformed.slice(s![start..end, ..]);
362 let train_features = if start == 0 {
363 x_transformed.slice(s![end.., ..]).to_owned()
364 } else if end == n_samples {
365 x_transformed.slice(s![..start, ..]).to_owned()
366 } else {
367 let part1 = x_transformed.slice(s![..start, ..]);
368 let part2 = x_transformed.slice(s![end.., ..]);
369 concatenate![Axis(0), part1, part2]
370 };
371
372 let train_mean = train_features
374 .mean_axis(Axis(0))
375 .expect("operation should succeed");
376 let val_mean = val_features
377 .mean_axis(Axis(0))
378 .expect("operation should succeed");
379 let diff = &train_mean - &val_mean;
380 let similarity = 1.0 / (1.0 + diff.dot(&diff).sqrt());
381
382 cv_scores.push(similarity);
383 }
384
385 Ok(cv_scores.iter().sum::<f64>() / cv_scores.len() as f64)
386 }
387
388 fn compute_reconstruction_error(
389 &self,
390 x: &Array2<f64>,
391 x_transformed: &Array2<f64>,
392 ) -> Result<f64> {
393 let x_mean = x.mean_axis(Axis(0)).expect("operation should succeed");
396 let x_transformed_projected =
397 x_transformed.sum_axis(Axis(1)) / x_transformed.ncols() as f64;
398
399 let mut error = 0.0;
400 for (i, &projected_val) in x_transformed_projected.iter().enumerate() {
401 let original_norm = x.row(i).dot(&x_mean);
402 error += (projected_val - original_norm).powi(2);
403 }
404
405 Ok(error.sqrt())
406 }
407
408 fn compute_approximation_error(
409 &self,
410 _x_train: &Array2<f64>,
411 x_val: &Array2<f64>,
412 _x_train_transformed: &Array2<f64>,
413 x_val_transformed: &Array2<f64>,
414 ) -> Result<f64> {
415 let val_norm = x_val.norm_l2();
420 let transformed_norm = x_val_transformed.norm_l2();
421
422 let error = if val_norm > 1e-12 {
424 (val_norm - transformed_norm).abs() / val_norm
425 } else {
426 (val_norm - transformed_norm).abs()
427 };
428
429 Ok(error)
430 }
431
432 fn select_optimal_dimension(
433 &self,
434 dimensions: &[usize],
435 quality_scores: &HashMap<usize, f64>,
436 approximation_errors: &HashMap<usize, f64>,
437 ) -> Result<usize> {
438 match &self.config.selection_strategy {
439 SelectionStrategy::ErrorTolerance { tolerance } => {
440 for &dim in dimensions {
442 if let Some(&error) = approximation_errors.get(&dim) {
443 if error <= *tolerance {
444 return Ok(dim);
445 }
446 }
447 }
448 self.select_best_quality_dimension(dimensions, quality_scores)
450 }
451 SelectionStrategy::QualityEfficiency {
452 efficiency_threshold,
453 } => {
454 let mut best_efficiency = 0.0;
456 let mut best_dim = dimensions[0];
457
458 for &dim in dimensions {
459 if let Some(&quality) = quality_scores.get(&dim) {
460 let efficiency = quality / dim as f64;
461 if efficiency >= *efficiency_threshold && efficiency > best_efficiency {
462 best_efficiency = efficiency;
463 best_dim = dim;
464 }
465 }
466 }
467 Ok(best_dim)
468 }
469 SelectionStrategy::ElbowMethod { sensitivity } => {
470 self.select_elbow_dimension(dimensions, quality_scores, *sensitivity)
471 }
472 _ => {
473 self.select_best_quality_dimension(dimensions, quality_scores)
475 }
476 }
477 }
478
479 fn select_best_quality_dimension(
480 &self,
481 dimensions: &[usize],
482 quality_scores: &HashMap<usize, f64>,
483 ) -> Result<usize> {
484 let mut best_score = f64::NEG_INFINITY;
485 let mut best_dim = dimensions[0];
486
487 for &dim in dimensions {
488 if let Some(&score) = quality_scores.get(&dim) {
489 if score > best_score {
490 best_score = score;
491 best_dim = dim;
492 }
493 }
494 }
495
496 Ok(best_dim)
497 }
498
499 fn select_elbow_dimension(
500 &self,
501 dimensions: &[usize],
502 quality_scores: &HashMap<usize, f64>,
503 sensitivity: f64,
504 ) -> Result<usize> {
505 if dimensions.len() < 3 {
506 return Ok(dimensions[0]);
507 }
508
509 let mut dim_score_pairs: Vec<_> = dimensions
511 .iter()
512 .filter_map(|&dim| quality_scores.get(&dim).map(|&score| (dim, score)))
513 .collect();
514 dim_score_pairs.sort_by(|a, b| a.0.cmp(&b.0));
515
516 let mut best_elbow_idx = 1;
518 let mut max_curvature = 0.0;
519
520 for i in 1..(dim_score_pairs.len() - 1) {
521 let (d1, s1) = dim_score_pairs[i - 1];
522 let (d2, s2) = dim_score_pairs[i];
523 let (d3, s3) = dim_score_pairs[i + 1];
524
525 let first_deriv1 = (s2 - s1) / (d2 - d1) as f64;
527 let first_deriv2 = (s3 - s2) / (d3 - d2) as f64;
528 let second_deriv = (first_deriv2 - first_deriv1) / ((d3 - d1) as f64 / 2.0);
529
530 let curvature = second_deriv.abs();
531 if curvature > max_curvature && curvature > sensitivity {
532 max_curvature = curvature;
533 best_elbow_idx = i;
534 }
535 }
536
537 Ok(dim_score_pairs[best_elbow_idx].0)
538 }
539}
540
541pub struct FittedAdaptiveRBFSampler {
543 fitted_rbf: crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
544 selection_result: DimensionSelectionResult,
545}
546
547impl Fit<Array2<f64>, ()> for AdaptiveRBFSampler {
548 type Fitted = FittedAdaptiveRBFSampler;
549
550 fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
551 let selection_result = self.select_dimension(x)?;
553
554 let rbf_sampler = RBFSampler::new(selection_result.optimal_dimension).gamma(self.gamma);
556
557 let fitted_rbf = rbf_sampler.fit(x, &())?;
558
559 Ok(FittedAdaptiveRBFSampler {
560 fitted_rbf,
561 selection_result,
562 })
563 }
564}
565
566impl Transform<Array2<f64>, Array2<f64>> for FittedAdaptiveRBFSampler {
567 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
568 self.fitted_rbf.transform(x)
569 }
570}
571
572impl FittedAdaptiveRBFSampler {
573 pub fn selection_result(&self) -> &DimensionSelectionResult {
575 &self.selection_result
576 }
577
578 pub fn optimal_dimension(&self) -> usize {
580 self.selection_result.optimal_dimension
581 }
582}
583
584#[allow(non_snake_case)]
585#[cfg(test)]
586mod tests {
587 use super::*;
588 use approx::assert_abs_diff_eq;
589
590 #[test]
591 fn test_adaptive_rbf_sampler_basic() {
592 let x = Array2::from_shape_vec((50, 5), (0..250).map(|i| i as f64 * 0.1).collect())
593 .expect("operation should succeed");
594
595 let config = AdaptiveDimensionConfig {
596 min_features: 10,
597 max_features: 50,
598 step_size: 10,
599 n_trials: 2,
600 ..Default::default()
601 };
602
603 let sampler = AdaptiveRBFSampler::new().gamma(0.5).config(config);
604
605 let fitted = sampler.fit(&x, &()).expect("operation should succeed");
606 let transformed = fitted.transform(&x).expect("operation should succeed");
607
608 assert_eq!(transformed.nrows(), 50);
609 assert!(transformed.ncols() >= 10);
610 assert!(transformed.ncols() <= 50);
611
612 let optimal_dim = fitted.optimal_dimension();
614 assert!(optimal_dim >= 10);
615 assert!(optimal_dim <= 50);
616 }
617
618 #[test]
619 fn test_dimension_selection_error_tolerance() {
620 let x = Array2::from_shape_vec((40, 4), (0..160).map(|i| i as f64 * 0.05).collect())
621 .expect("operation should succeed");
622
623 let config = AdaptiveDimensionConfig {
624 min_features: 5,
625 max_features: 25,
626 step_size: 5,
627 selection_strategy: SelectionStrategy::ErrorTolerance { tolerance: 0.2 },
628 n_trials: 1,
629 validation_fraction: 0.3,
630 ..Default::default()
631 };
632
633 let sampler = AdaptiveRBFSampler::new().gamma(1.0).config(config);
634
635 let result = sampler
636 .select_dimension(&x)
637 .expect("operation should succeed");
638
639 assert!(result.optimal_dimension >= 5);
640 assert!(result.optimal_dimension <= 25);
641 assert!(!result.quality_scores.is_empty());
642 assert!(!result.approximation_errors.is_empty());
643 }
644
645 #[test]
646 fn test_dimension_selection_quality_efficiency() {
647 let x = Array2::from_shape_vec((30, 3), (0..90).map(|i| i as f64 * 0.1).collect())
648 .expect("operation should succeed");
649
650 let config = AdaptiveDimensionConfig {
651 min_features: 5,
652 max_features: 20,
653 step_size: 5,
654 selection_strategy: SelectionStrategy::QualityEfficiency {
655 efficiency_threshold: 0.01,
656 },
657 quality_metric: QualityMetric::EffectiveRank,
658 n_trials: 1,
659 ..Default::default()
660 };
661
662 let sampler = AdaptiveRBFSampler::new().gamma(0.8).config(config);
663
664 let result = sampler
665 .select_dimension(&x)
666 .expect("operation should succeed");
667
668 assert!(result.optimal_dimension >= 5);
669 assert!(result.optimal_dimension <= 20);
670
671 for &dim in &[5, 10, 15, 20] {
673 assert!(result.quality_scores.contains_key(&dim));
674 }
675 }
676
677 #[test]
678 fn test_dimension_selection_elbow_method() {
679 let x = Array2::from_shape_vec((60, 6), (0..360).map(|i| i as f64 * 0.02).collect())
680 .expect("operation should succeed");
681
682 let config = AdaptiveDimensionConfig {
683 min_features: 10,
684 max_features: 40,
685 step_size: 10,
686 selection_strategy: SelectionStrategy::ElbowMethod { sensitivity: 0.01 },
687 quality_metric: QualityMetric::KernelAlignment,
688 n_trials: 1,
689 ..Default::default()
690 };
691
692 let sampler = AdaptiveRBFSampler::new().gamma(0.3).config(config);
693
694 let result = sampler
695 .select_dimension(&x)
696 .expect("operation should succeed");
697
698 assert!(result.optimal_dimension >= 10);
699 assert!(result.optimal_dimension <= 40);
700
701 assert!(!result.computation_times.is_empty());
703 assert!(!result.memory_usage.is_empty());
704 }
705
706 #[test]
707 fn test_quality_metrics() {
708 let x = Array2::from_shape_vec((25, 4), (0..100).map(|i| i as f64 * 0.1).collect())
709 .expect("operation should succeed");
710
711 let sampler = AdaptiveRBFSampler::new().gamma(1.0);
712
713 let rbf = RBFSampler::new(15).gamma(1.0);
715 let fitted_rbf = rbf.fit(&x, &()).expect("operation should succeed");
716 let x_transformed = fitted_rbf.transform(&x).expect("operation should succeed");
717
718 let alignment = sampler
719 .compute_kernel_alignment(&x, &x_transformed)
720 .expect("operation should succeed");
721 assert!(alignment >= 0.0);
722 assert!(alignment <= 1.0);
723
724 let eff_rank = sampler
726 .compute_effective_rank(&x_transformed)
727 .expect("operation should succeed");
728 assert!(eff_rank > 0.0);
729 assert!(eff_rank <= x_transformed.ncols() as f64);
730
731 let recon_error = sampler
733 .compute_reconstruction_error(&x, &x_transformed)
734 .expect("operation should succeed");
735 assert!(recon_error >= 0.0);
736 }
737
738 #[test]
739 fn test_adaptive_sampler_reproducibility() {
740 let x = Array2::from_shape_vec((40, 5), (0..200).map(|i| i as f64 * 0.08).collect())
741 .expect("operation should succeed");
742
743 let config = AdaptiveDimensionConfig {
744 min_features: 10,
745 max_features: 30,
746 step_size: 10,
747 n_trials: 2,
748 random_seed: Some(42),
749 ..Default::default()
750 };
751
752 let sampler1 = AdaptiveRBFSampler::new().gamma(0.5).config(config.clone());
753
754 let sampler2 = AdaptiveRBFSampler::new().gamma(0.5).config(config);
755
756 let result1 = sampler1
757 .select_dimension(&x)
758 .expect("operation should succeed");
759 let result2 = sampler2
760 .select_dimension(&x)
761 .expect("operation should succeed");
762
763 assert_eq!(result1.optimal_dimension, result2.optimal_dimension);
764
765 for (&dim, &score1) in &result1.quality_scores {
767 if let Some(&score2) = result2.quality_scores.get(&dim) {
768 assert_abs_diff_eq!(score1, score2, epsilon = 1e-10);
769 }
770 }
771 }
772
773 #[test]
774 fn test_dimension_selection_result() {
775 let x = Array2::from_shape_vec((35, 3), (0..105).map(|i| i as f64 * 0.1).collect())
776 .expect("operation should succeed");
777
778 let config = AdaptiveDimensionConfig {
779 min_features: 5,
780 max_features: 15,
781 step_size: 5,
782 n_trials: 1,
783 ..Default::default()
784 };
785
786 let sampler = AdaptiveRBFSampler::new().gamma(0.7).config(config);
787
788 let fitted = sampler.fit(&x, &()).expect("operation should succeed");
789 let result = fitted.selection_result();
790
791 assert!(result.optimal_dimension >= 5);
793 assert!(result.optimal_dimension <= 15);
794 assert_eq!(result.quality_scores.len(), 3); assert_eq!(result.approximation_errors.len(), 3);
796 assert_eq!(result.computation_times.len(), 3);
797 assert_eq!(result.memory_usage.len(), 3);
798
799 for (&dim, &memory) in &result.memory_usage {
801 assert_eq!(memory, dim * 3 * 8); }
803 }
804}