1use crate::RBFSampler;
7use scirs2_core::ndarray::ndarray_linalg::{Norm, SVD};
8use scirs2_core::ndarray::{concatenate, s, Array2, Axis};
9use sklears_core::traits::Fit;
10use sklears_core::{
11 error::{Result, SklearsError},
12 traits::Transform,
13};
14use std::collections::HashMap;
15
16#[derive(Debug, Clone)]
18pub enum QualityMetric {
20 FrobeniusNorm,
22 SpectralNorm,
24 NuclearNorm,
26 RelativeFrobeniusNorm,
28 KernelAlignment,
30 EffectiveRank,
32 CrossValidation,
34 Trace,
36}
37
38#[derive(Debug, Clone)]
40pub enum SelectionStrategy {
42 ErrorTolerance { tolerance: f64 },
44 QualityEfficiency { efficiency_threshold: f64 },
46 ElbowMethod { sensitivity: f64 },
48 CrossValidation { n_folds: usize },
50 InformationCriteria { criterion: String },
52 EarlyStopping {
54 patience: usize,
55 min_improvement: f64,
56 },
57}
58
59#[derive(Debug, Clone)]
61pub struct AdaptiveDimensionConfig {
63 pub min_features: usize,
65 pub max_features: usize,
67 pub step_size: usize,
69 pub quality_metric: QualityMetric,
71 pub selection_strategy: SelectionStrategy,
73 pub n_trials: usize,
75 pub random_seed: Option<u64>,
77 pub validation_fraction: f64,
79}
80
81impl Default for AdaptiveDimensionConfig {
82 fn default() -> Self {
83 Self {
84 min_features: 10,
85 max_features: 1000,
86 step_size: 10,
87 quality_metric: QualityMetric::KernelAlignment,
88 selection_strategy: SelectionStrategy::ErrorTolerance { tolerance: 0.1 },
89 n_trials: 3,
90 random_seed: None,
91 validation_fraction: 0.2,
92 }
93 }
94}
95
96#[derive(Debug, Clone)]
98pub struct DimensionSelectionResult {
100 pub optimal_dimension: usize,
102 pub quality_scores: HashMap<usize, f64>,
104 pub approximation_errors: HashMap<usize, f64>,
106 pub computation_times: HashMap<usize, f64>,
108 pub memory_usage: HashMap<usize, usize>,
110}
111
112#[derive(Debug, Clone)]
114pub struct AdaptiveRBFSampler {
116 gamma: f64,
117 config: AdaptiveDimensionConfig,
118}
119
120impl AdaptiveRBFSampler {
121 pub fn new() -> Self {
123 Self {
124 gamma: 1.0,
125 config: AdaptiveDimensionConfig::default(),
126 }
127 }
128
129 pub fn gamma(mut self, gamma: f64) -> Self {
131 self.gamma = gamma;
132 self
133 }
134
135 pub fn config(mut self, config: AdaptiveDimensionConfig) -> Self {
137 self.config = config;
138 self
139 }
140
141 pub fn select_dimension(&self, x: &Array2<f64>) -> Result<DimensionSelectionResult> {
143 let n_samples = x.nrows();
144 let n_features = x.ncols();
145
146 let split_idx = (n_samples as f64 * (1.0 - self.config.validation_fraction)) as usize;
148 let x_train = x.slice(s![..split_idx, ..]).to_owned();
149 let x_val = x.slice(s![split_idx.., ..]).to_owned();
150
151 let mut quality_scores = HashMap::new();
152 let mut approximation_errors = HashMap::new();
153 let mut computation_times = HashMap::new();
154 let mut memory_usage = HashMap::new();
155
156 let dimensions: Vec<usize> = (self.config.min_features..=self.config.max_features)
158 .step_by(self.config.step_size)
159 .collect();
160
161 for &n_components in &dimensions {
162 let mut trial_scores = Vec::new();
163 let mut trial_errors = Vec::new();
164 let mut trial_times = Vec::new();
165
166 for trial in 0..self.config.n_trials {
168 let start_time = std::time::Instant::now();
169
170 let seed = self.config.random_seed.map(|s| s + trial as u64);
172 let sampler = if let Some(s) = seed {
173 RBFSampler::new(n_components)
174 .gamma(self.gamma)
175 .random_state(s)
176 } else {
177 RBFSampler::new(n_components).gamma(self.gamma)
178 };
179
180 let fitted = sampler.fit(&x_train, &())?;
182 let x_train_transformed = fitted.transform(&x_train)?;
183 let x_val_transformed = fitted.transform(&x_val)?;
184
185 let elapsed = start_time.elapsed().as_secs_f64();
186 trial_times.push(elapsed);
187
188 let quality_score = self.compute_quality_score(
190 &x_train,
191 &x_val,
192 &x_train_transformed,
193 &x_val_transformed,
194 &fitted,
195 )?;
196
197 let approximation_error = self.compute_approximation_error(
198 &x_train,
199 &x_val,
200 &x_train_transformed,
201 &x_val_transformed,
202 )?;
203
204 trial_scores.push(quality_score);
205 trial_errors.push(approximation_error);
206 }
207
208 let avg_score = trial_scores.iter().sum::<f64>() / trial_scores.len() as f64;
210 let avg_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
211 let avg_time = trial_times.iter().sum::<f64>() / trial_times.len() as f64;
212
213 quality_scores.insert(n_components, avg_score);
214 approximation_errors.insert(n_components, avg_error);
215 computation_times.insert(n_components, avg_time);
216 memory_usage.insert(n_components, n_components * n_features * 8); }
218
219 let optimal_dimension =
221 self.select_optimal_dimension(&dimensions, &quality_scores, &approximation_errors)?;
222
223 Ok(DimensionSelectionResult {
224 optimal_dimension,
225 quality_scores,
226 approximation_errors,
227 computation_times,
228 memory_usage,
229 })
230 }
231
232 fn compute_quality_score(
233 &self,
234 x_train: &Array2<f64>,
235 x_val: &Array2<f64>,
236 x_train_transformed: &Array2<f64>,
237 x_val_transformed: &Array2<f64>,
238 fitted_sampler: &crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
239 ) -> Result<f64> {
240 match &self.config.quality_metric {
241 QualityMetric::KernelAlignment => {
242 self.compute_kernel_alignment(x_train, x_train_transformed)
243 }
244 QualityMetric::EffectiveRank => self.compute_effective_rank(x_train_transformed),
245 QualityMetric::FrobeniusNorm => {
246 self.compute_frobenius_approximation_quality(x_train, x_train_transformed)
247 }
248 QualityMetric::RelativeFrobeniusNorm => {
249 self.compute_relative_frobenius_quality(x_train, x_train_transformed)
250 }
251 QualityMetric::CrossValidation => {
252 self.compute_cross_validation_score(x_train, x_train_transformed)
253 }
254 _ => {
255 self.compute_kernel_alignment(x_train, x_train_transformed)
257 }
258 }
259 }
260
261 fn compute_kernel_alignment(
262 &self,
263 x: &Array2<f64>,
264 x_transformed: &Array2<f64>,
265 ) -> Result<f64> {
266 let n_samples = x.nrows().min(100); let x_subset = x.slice(s![..n_samples, ..]);
269
270 let mut k_exact = Array2::zeros((n_samples, n_samples));
271 for i in 0..n_samples {
272 for j in 0..n_samples {
273 let diff = &x_subset.row(i) - &x_subset.row(j);
274 let squared_norm = diff.dot(&diff);
275 k_exact[[i, j]] = (-self.gamma * squared_norm).exp();
276 }
277 }
278
279 let x_transformed_subset = x_transformed.slice(s![..n_samples, ..]);
281 let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
282
283 let k_exact_frobenius = k_exact.iter().map(|&x| x * x).sum::<f64>().sqrt();
285 let k_approx_frobenius = k_approx.iter().map(|&x| x * x).sum::<f64>().sqrt();
286 let k_product = (&k_exact * &k_approx).sum();
287
288 let alignment = k_product / (k_exact_frobenius * k_approx_frobenius);
289 Ok(alignment)
290 }
291
292 fn compute_effective_rank(&self, x_transformed: &Array2<f64>) -> Result<f64> {
293 let (_, s, _) = x_transformed
295 .svd(true, true)
296 .map_err(|_| SklearsError::InvalidInput("SVD computation failed".to_string()))?;
297
298 let s_sum = s.sum();
300 if s_sum == 0.0 {
301 return Ok(0.0);
302 }
303
304 let s_normalized = &s / s_sum;
305 let entropy = -s_normalized
306 .iter()
307 .filter(|&&x| x > 1e-12)
308 .map(|&x| x * x.ln())
309 .sum::<f64>();
310
311 let effective_rank = entropy.exp();
312 Ok(effective_rank)
313 }
314
315 fn compute_frobenius_approximation_quality(
316 &self,
317 x: &Array2<f64>,
318 x_transformed: &Array2<f64>,
319 ) -> Result<f64> {
320 let reconstruction_error = self.compute_reconstruction_error(x, x_transformed)?;
323 Ok(1.0 / (1.0 + reconstruction_error)) }
325
326 fn compute_relative_frobenius_quality(
327 &self,
328 x: &Array2<f64>,
329 x_transformed: &Array2<f64>,
330 ) -> Result<f64> {
331 let reconstruction_error = self.compute_reconstruction_error(x, x_transformed)?;
332 let original_norm = x.iter().map(|&x| x * x).sum::<f64>().sqrt();
333 let relative_error = reconstruction_error / original_norm;
334 Ok(1.0 / (1.0 + relative_error))
335 }
336
337 fn compute_cross_validation_score(
338 &self,
339 x: &Array2<f64>,
340 x_transformed: &Array2<f64>,
341 ) -> Result<f64> {
342 let n_samples = x.nrows();
344 let fold_size = n_samples / 5; let mut cv_scores = Vec::new();
346
347 for fold in 0..5 {
348 let start = fold * fold_size;
349 let end = if fold == 4 {
350 n_samples
351 } else {
352 (fold + 1) * fold_size
353 };
354
355 let val_features = x_transformed.slice(s![start..end, ..]);
356 let train_features = if start == 0 {
357 x_transformed.slice(s![end.., ..]).to_owned()
358 } else if end == n_samples {
359 x_transformed.slice(s![..start, ..]).to_owned()
360 } else {
361 let part1 = x_transformed.slice(s![..start, ..]);
362 let part2 = x_transformed.slice(s![end.., ..]);
363 concatenate![Axis(0), part1, part2]
364 };
365
366 let train_mean = train_features.mean_axis(Axis(0)).unwrap();
368 let val_mean = val_features.mean_axis(Axis(0)).unwrap();
369 let diff = &train_mean - &val_mean;
370 let similarity = 1.0 / (1.0 + diff.dot(&diff).sqrt());
371
372 cv_scores.push(similarity);
373 }
374
375 Ok(cv_scores.iter().sum::<f64>() / cv_scores.len() as f64)
376 }
377
378 fn compute_reconstruction_error(
379 &self,
380 x: &Array2<f64>,
381 x_transformed: &Array2<f64>,
382 ) -> Result<f64> {
383 let x_mean = x.mean_axis(Axis(0)).unwrap();
386 let x_transformed_projected =
387 x_transformed.sum_axis(Axis(1)) / x_transformed.ncols() as f64;
388
389 let mut error = 0.0;
390 for (i, &projected_val) in x_transformed_projected.iter().enumerate() {
391 let original_norm = x.row(i).dot(&x_mean);
392 error += (projected_val - original_norm).powi(2);
393 }
394
395 Ok(error.sqrt())
396 }
397
398 fn compute_approximation_error(
399 &self,
400 x_train: &Array2<f64>,
401 x_val: &Array2<f64>,
402 x_train_transformed: &Array2<f64>,
403 x_val_transformed: &Array2<f64>,
404 ) -> Result<f64> {
405 let val_norm = x_val.norm_l2();
410 let transformed_norm = x_val_transformed.norm_l2();
411
412 let error = if val_norm > 1e-12 {
414 (val_norm - transformed_norm).abs() / val_norm
415 } else {
416 (val_norm - transformed_norm).abs()
417 };
418
419 Ok(error)
420 }
421
422 fn select_optimal_dimension(
423 &self,
424 dimensions: &[usize],
425 quality_scores: &HashMap<usize, f64>,
426 approximation_errors: &HashMap<usize, f64>,
427 ) -> Result<usize> {
428 match &self.config.selection_strategy {
429 SelectionStrategy::ErrorTolerance { tolerance } => {
430 for &dim in dimensions {
432 if let Some(&error) = approximation_errors.get(&dim) {
433 if error <= *tolerance {
434 return Ok(dim);
435 }
436 }
437 }
438 self.select_best_quality_dimension(dimensions, quality_scores)
440 }
441 SelectionStrategy::QualityEfficiency {
442 efficiency_threshold,
443 } => {
444 let mut best_efficiency = 0.0;
446 let mut best_dim = dimensions[0];
447
448 for &dim in dimensions {
449 if let Some(&quality) = quality_scores.get(&dim) {
450 let efficiency = quality / dim as f64;
451 if efficiency >= *efficiency_threshold && efficiency > best_efficiency {
452 best_efficiency = efficiency;
453 best_dim = dim;
454 }
455 }
456 }
457 Ok(best_dim)
458 }
459 SelectionStrategy::ElbowMethod { sensitivity } => {
460 self.select_elbow_dimension(dimensions, quality_scores, *sensitivity)
461 }
462 _ => {
463 self.select_best_quality_dimension(dimensions, quality_scores)
465 }
466 }
467 }
468
469 fn select_best_quality_dimension(
470 &self,
471 dimensions: &[usize],
472 quality_scores: &HashMap<usize, f64>,
473 ) -> Result<usize> {
474 let mut best_score = f64::NEG_INFINITY;
475 let mut best_dim = dimensions[0];
476
477 for &dim in dimensions {
478 if let Some(&score) = quality_scores.get(&dim) {
479 if score > best_score {
480 best_score = score;
481 best_dim = dim;
482 }
483 }
484 }
485
486 Ok(best_dim)
487 }
488
489 fn select_elbow_dimension(
490 &self,
491 dimensions: &[usize],
492 quality_scores: &HashMap<usize, f64>,
493 sensitivity: f64,
494 ) -> Result<usize> {
495 if dimensions.len() < 3 {
496 return Ok(dimensions[0]);
497 }
498
499 let mut dim_score_pairs: Vec<_> = dimensions
501 .iter()
502 .filter_map(|&dim| quality_scores.get(&dim).map(|&score| (dim, score)))
503 .collect();
504 dim_score_pairs.sort_by(|a, b| a.0.cmp(&b.0));
505
506 let mut best_elbow_idx = 1;
508 let mut max_curvature = 0.0;
509
510 for i in 1..(dim_score_pairs.len() - 1) {
511 let (d1, s1) = dim_score_pairs[i - 1];
512 let (d2, s2) = dim_score_pairs[i];
513 let (d3, s3) = dim_score_pairs[i + 1];
514
515 let first_deriv1 = (s2 - s1) / (d2 - d1) as f64;
517 let first_deriv2 = (s3 - s2) / (d3 - d2) as f64;
518 let second_deriv = (first_deriv2 - first_deriv1) / ((d3 - d1) as f64 / 2.0);
519
520 let curvature = second_deriv.abs();
521 if curvature > max_curvature && curvature > sensitivity {
522 max_curvature = curvature;
523 best_elbow_idx = i;
524 }
525 }
526
527 Ok(dim_score_pairs[best_elbow_idx].0)
528 }
529}
530
531pub struct FittedAdaptiveRBFSampler {
533 fitted_rbf: crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
534 selection_result: DimensionSelectionResult,
535}
536
537impl Fit<Array2<f64>, ()> for AdaptiveRBFSampler {
538 type Fitted = FittedAdaptiveRBFSampler;
539
540 fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
541 let selection_result = self.select_dimension(x)?;
543
544 let rbf_sampler = RBFSampler::new(selection_result.optimal_dimension).gamma(self.gamma);
546
547 let fitted_rbf = rbf_sampler.fit(x, &())?;
548
549 Ok(FittedAdaptiveRBFSampler {
550 fitted_rbf,
551 selection_result,
552 })
553 }
554}
555
556impl Transform<Array2<f64>, Array2<f64>> for FittedAdaptiveRBFSampler {
557 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
558 self.fitted_rbf.transform(x)
559 }
560}
561
562impl FittedAdaptiveRBFSampler {
563 pub fn selection_result(&self) -> &DimensionSelectionResult {
565 &self.selection_result
566 }
567
568 pub fn optimal_dimension(&self) -> usize {
570 self.selection_result.optimal_dimension
571 }
572}
573
574#[allow(non_snake_case)]
575#[cfg(test)]
576mod tests {
577 use super::*;
578 use approx::assert_abs_diff_eq;
579
580 #[test]
581 fn test_adaptive_rbf_sampler_basic() {
582 let x =
583 Array2::from_shape_vec((50, 5), (0..250).map(|i| i as f64 * 0.1).collect()).unwrap();
584
585 let config = AdaptiveDimensionConfig {
586 min_features: 10,
587 max_features: 50,
588 step_size: 10,
589 n_trials: 2,
590 ..Default::default()
591 };
592
593 let sampler = AdaptiveRBFSampler::new().gamma(0.5).config(config);
594
595 let fitted = sampler.fit(&x, &()).unwrap();
596 let transformed = fitted.transform(&x).unwrap();
597
598 assert_eq!(transformed.nrows(), 50);
599 assert!(transformed.ncols() >= 10);
600 assert!(transformed.ncols() <= 50);
601
602 let optimal_dim = fitted.optimal_dimension();
604 assert!(optimal_dim >= 10);
605 assert!(optimal_dim <= 50);
606 }
607
608 #[test]
609 fn test_dimension_selection_error_tolerance() {
610 let x =
611 Array2::from_shape_vec((40, 4), (0..160).map(|i| i as f64 * 0.05).collect()).unwrap();
612
613 let config = AdaptiveDimensionConfig {
614 min_features: 5,
615 max_features: 25,
616 step_size: 5,
617 selection_strategy: SelectionStrategy::ErrorTolerance { tolerance: 0.2 },
618 n_trials: 1,
619 validation_fraction: 0.3,
620 ..Default::default()
621 };
622
623 let sampler = AdaptiveRBFSampler::new().gamma(1.0).config(config);
624
625 let result = sampler.select_dimension(&x).unwrap();
626
627 assert!(result.optimal_dimension >= 5);
628 assert!(result.optimal_dimension <= 25);
629 assert!(!result.quality_scores.is_empty());
630 assert!(!result.approximation_errors.is_empty());
631 }
632
633 #[test]
634 fn test_dimension_selection_quality_efficiency() {
635 let x = Array2::from_shape_vec((30, 3), (0..90).map(|i| i as f64 * 0.1).collect()).unwrap();
636
637 let config = AdaptiveDimensionConfig {
638 min_features: 5,
639 max_features: 20,
640 step_size: 5,
641 selection_strategy: SelectionStrategy::QualityEfficiency {
642 efficiency_threshold: 0.01,
643 },
644 quality_metric: QualityMetric::EffectiveRank,
645 n_trials: 1,
646 ..Default::default()
647 };
648
649 let sampler = AdaptiveRBFSampler::new().gamma(0.8).config(config);
650
651 let result = sampler.select_dimension(&x).unwrap();
652
653 assert!(result.optimal_dimension >= 5);
654 assert!(result.optimal_dimension <= 20);
655
656 for &dim in &[5, 10, 15, 20] {
658 assert!(result.quality_scores.contains_key(&dim));
659 }
660 }
661
662 #[test]
663 fn test_dimension_selection_elbow_method() {
664 let x =
665 Array2::from_shape_vec((60, 6), (0..360).map(|i| i as f64 * 0.02).collect()).unwrap();
666
667 let config = AdaptiveDimensionConfig {
668 min_features: 10,
669 max_features: 40,
670 step_size: 10,
671 selection_strategy: SelectionStrategy::ElbowMethod { sensitivity: 0.01 },
672 quality_metric: QualityMetric::KernelAlignment,
673 n_trials: 1,
674 ..Default::default()
675 };
676
677 let sampler = AdaptiveRBFSampler::new().gamma(0.3).config(config);
678
679 let result = sampler.select_dimension(&x).unwrap();
680
681 assert!(result.optimal_dimension >= 10);
682 assert!(result.optimal_dimension <= 40);
683
684 assert!(!result.computation_times.is_empty());
686 assert!(!result.memory_usage.is_empty());
687 }
688
689 #[test]
690 fn test_quality_metrics() {
691 let x =
692 Array2::from_shape_vec((25, 4), (0..100).map(|i| i as f64 * 0.1).collect()).unwrap();
693
694 let sampler = AdaptiveRBFSampler::new().gamma(1.0);
695
696 let rbf = RBFSampler::new(15).gamma(1.0);
698 let fitted_rbf = rbf.fit(&x, &()).unwrap();
699 let x_transformed = fitted_rbf.transform(&x).unwrap();
700
701 let alignment = sampler
702 .compute_kernel_alignment(&x, &x_transformed)
703 .unwrap();
704 assert!(alignment >= 0.0);
705 assert!(alignment <= 1.0);
706
707 let eff_rank = sampler.compute_effective_rank(&x_transformed).unwrap();
709 assert!(eff_rank > 0.0);
710 assert!(eff_rank <= x_transformed.ncols() as f64);
711
712 let recon_error = sampler
714 .compute_reconstruction_error(&x, &x_transformed)
715 .unwrap();
716 assert!(recon_error >= 0.0);
717 }
718
719 #[test]
720 fn test_adaptive_sampler_reproducibility() {
721 let x =
722 Array2::from_shape_vec((40, 5), (0..200).map(|i| i as f64 * 0.08).collect()).unwrap();
723
724 let config = AdaptiveDimensionConfig {
725 min_features: 10,
726 max_features: 30,
727 step_size: 10,
728 n_trials: 2,
729 random_seed: Some(42),
730 ..Default::default()
731 };
732
733 let sampler1 = AdaptiveRBFSampler::new().gamma(0.5).config(config.clone());
734
735 let sampler2 = AdaptiveRBFSampler::new().gamma(0.5).config(config);
736
737 let result1 = sampler1.select_dimension(&x).unwrap();
738 let result2 = sampler2.select_dimension(&x).unwrap();
739
740 assert_eq!(result1.optimal_dimension, result2.optimal_dimension);
741
742 for (&dim, &score1) in &result1.quality_scores {
744 if let Some(&score2) = result2.quality_scores.get(&dim) {
745 assert_abs_diff_eq!(score1, score2, epsilon = 1e-10);
746 }
747 }
748 }
749
750 #[test]
751 fn test_dimension_selection_result() {
752 let x =
753 Array2::from_shape_vec((35, 3), (0..105).map(|i| i as f64 * 0.1).collect()).unwrap();
754
755 let config = AdaptiveDimensionConfig {
756 min_features: 5,
757 max_features: 15,
758 step_size: 5,
759 n_trials: 1,
760 ..Default::default()
761 };
762
763 let sampler = AdaptiveRBFSampler::new().gamma(0.7).config(config);
764
765 let fitted = sampler.fit(&x, &()).unwrap();
766 let result = fitted.selection_result();
767
768 assert!(result.optimal_dimension >= 5);
770 assert!(result.optimal_dimension <= 15);
771 assert_eq!(result.quality_scores.len(), 3); assert_eq!(result.approximation_errors.len(), 3);
773 assert_eq!(result.computation_times.len(), 3);
774 assert_eq!(result.memory_usage.len(), 3);
775
776 for (&dim, &memory) in &result.memory_usage {
778 assert_eq!(memory, dim * 3 * 8); }
780 }
781}