1use crate::RBFSampler;
7use scirs2_core::ndarray::{concatenate, s, Array2, Axis};
8use scirs2_linalg::compat::{Norm, SVD};
9use sklears_core::traits::Fit;
10use sklears_core::{
11 error::{Result, SklearsError},
12 traits::Transform,
13};
14use std::collections::HashMap;
15
16#[derive(Debug, Clone)]
18pub enum QualityMetric {
20 FrobeniusNorm,
22 SpectralNorm,
24 NuclearNorm,
26 RelativeFrobeniusNorm,
28 KernelAlignment,
30 EffectiveRank,
32 CrossValidation,
34 Trace,
36}
37
38#[derive(Debug, Clone)]
40pub enum SelectionStrategy {
42 ErrorTolerance { tolerance: f64 },
44 QualityEfficiency { efficiency_threshold: f64 },
46 ElbowMethod { sensitivity: f64 },
48 CrossValidation { n_folds: usize },
50 InformationCriteria { criterion: String },
52 EarlyStopping {
54 patience: usize,
55 min_improvement: f64,
56 },
57}
58
59#[derive(Debug, Clone)]
61pub struct AdaptiveDimensionConfig {
63 pub min_features: usize,
65 pub max_features: usize,
67 pub step_size: usize,
69 pub quality_metric: QualityMetric,
71 pub selection_strategy: SelectionStrategy,
73 pub n_trials: usize,
75 pub random_seed: Option<u64>,
77 pub validation_fraction: f64,
79}
80
81impl Default for AdaptiveDimensionConfig {
82 fn default() -> Self {
83 Self {
84 min_features: 10,
85 max_features: 1000,
86 step_size: 10,
87 quality_metric: QualityMetric::KernelAlignment,
88 selection_strategy: SelectionStrategy::ErrorTolerance { tolerance: 0.1 },
89 n_trials: 3,
90 random_seed: None,
91 validation_fraction: 0.2,
92 }
93 }
94}
95
96#[derive(Debug, Clone)]
98pub struct DimensionSelectionResult {
100 pub optimal_dimension: usize,
102 pub quality_scores: HashMap<usize, f64>,
104 pub approximation_errors: HashMap<usize, f64>,
106 pub computation_times: HashMap<usize, f64>,
108 pub memory_usage: HashMap<usize, usize>,
110}
111
112#[derive(Debug, Clone)]
114pub struct AdaptiveRBFSampler {
116 gamma: f64,
117 config: AdaptiveDimensionConfig,
118}
119
120impl Default for AdaptiveRBFSampler {
121 fn default() -> Self {
122 Self::new()
123 }
124}
125
126impl AdaptiveRBFSampler {
127 pub fn new() -> Self {
129 Self {
130 gamma: 1.0,
131 config: AdaptiveDimensionConfig::default(),
132 }
133 }
134
135 pub fn gamma(mut self, gamma: f64) -> Self {
137 self.gamma = gamma;
138 self
139 }
140
141 pub fn config(mut self, config: AdaptiveDimensionConfig) -> Self {
143 self.config = config;
144 self
145 }
146
147 pub fn select_dimension(&self, x: &Array2<f64>) -> Result<DimensionSelectionResult> {
149 let n_samples = x.nrows();
150 let n_features = x.ncols();
151
152 let split_idx = (n_samples as f64 * (1.0 - self.config.validation_fraction)) as usize;
154 let x_train = x.slice(s![..split_idx, ..]).to_owned();
155 let x_val = x.slice(s![split_idx.., ..]).to_owned();
156
157 let mut quality_scores = HashMap::new();
158 let mut approximation_errors = HashMap::new();
159 let mut computation_times = HashMap::new();
160 let mut memory_usage = HashMap::new();
161
162 let dimensions: Vec<usize> = (self.config.min_features..=self.config.max_features)
164 .step_by(self.config.step_size)
165 .collect();
166
167 for &n_components in &dimensions {
168 let mut trial_scores = Vec::new();
169 let mut trial_errors = Vec::new();
170 let mut trial_times = Vec::new();
171
172 for trial in 0..self.config.n_trials {
174 let start_time = std::time::Instant::now();
175
176 let seed = self.config.random_seed.map(|s| s + trial as u64);
178 let sampler = if let Some(s) = seed {
179 RBFSampler::new(n_components)
180 .gamma(self.gamma)
181 .random_state(s)
182 } else {
183 RBFSampler::new(n_components).gamma(self.gamma)
184 };
185
186 let fitted = sampler.fit(&x_train, &())?;
188 let x_train_transformed = fitted.transform(&x_train)?;
189 let x_val_transformed = fitted.transform(&x_val)?;
190
191 let elapsed = start_time.elapsed().as_secs_f64();
192 trial_times.push(elapsed);
193
194 let quality_score = self.compute_quality_score(
196 &x_train,
197 &x_val,
198 &x_train_transformed,
199 &x_val_transformed,
200 &fitted,
201 )?;
202
203 let approximation_error = self.compute_approximation_error(
204 &x_train,
205 &x_val,
206 &x_train_transformed,
207 &x_val_transformed,
208 )?;
209
210 trial_scores.push(quality_score);
211 trial_errors.push(approximation_error);
212 }
213
214 let avg_score = trial_scores.iter().sum::<f64>() / trial_scores.len() as f64;
216 let avg_error = trial_errors.iter().sum::<f64>() / trial_errors.len() as f64;
217 let avg_time = trial_times.iter().sum::<f64>() / trial_times.len() as f64;
218
219 quality_scores.insert(n_components, avg_score);
220 approximation_errors.insert(n_components, avg_error);
221 computation_times.insert(n_components, avg_time);
222 memory_usage.insert(n_components, n_components * n_features * 8); }
224
225 let optimal_dimension =
227 self.select_optimal_dimension(&dimensions, &quality_scores, &approximation_errors)?;
228
229 Ok(DimensionSelectionResult {
230 optimal_dimension,
231 quality_scores,
232 approximation_errors,
233 computation_times,
234 memory_usage,
235 })
236 }
237
238 fn compute_quality_score(
239 &self,
240 x_train: &Array2<f64>,
241 _x_val: &Array2<f64>,
242 x_train_transformed: &Array2<f64>,
243 _x_val_transformed: &Array2<f64>,
244 _fitted_sampler: &crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
245 ) -> Result<f64> {
246 match &self.config.quality_metric {
247 QualityMetric::KernelAlignment => {
248 self.compute_kernel_alignment(x_train, x_train_transformed)
249 }
250 QualityMetric::EffectiveRank => self.compute_effective_rank(x_train_transformed),
251 QualityMetric::FrobeniusNorm => {
252 self.compute_frobenius_approximation_quality(x_train, x_train_transformed)
253 }
254 QualityMetric::RelativeFrobeniusNorm => {
255 self.compute_relative_frobenius_quality(x_train, x_train_transformed)
256 }
257 QualityMetric::CrossValidation => {
258 self.compute_cross_validation_score(x_train, x_train_transformed)
259 }
260 _ => {
261 self.compute_kernel_alignment(x_train, x_train_transformed)
263 }
264 }
265 }
266
267 fn compute_kernel_alignment(
268 &self,
269 x: &Array2<f64>,
270 x_transformed: &Array2<f64>,
271 ) -> Result<f64> {
272 let n_samples = x.nrows().min(100); let x_subset = x.slice(s![..n_samples, ..]);
275
276 let mut k_exact = Array2::zeros((n_samples, n_samples));
277 for i in 0..n_samples {
278 for j in 0..n_samples {
279 let diff = &x_subset.row(i) - &x_subset.row(j);
280 let squared_norm = diff.dot(&diff);
281 k_exact[[i, j]] = (-self.gamma * squared_norm).exp();
282 }
283 }
284
285 let x_transformed_subset = x_transformed.slice(s![..n_samples, ..]);
287 let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
288
289 let k_exact_frobenius = k_exact.iter().map(|&x| x * x).sum::<f64>().sqrt();
291 let k_approx_frobenius = k_approx.iter().map(|&x| x * x).sum::<f64>().sqrt();
292 let k_product = (&k_exact * &k_approx).sum();
293
294 let alignment = k_product / (k_exact_frobenius * k_approx_frobenius);
295 Ok(alignment)
296 }
297
298 fn compute_effective_rank(&self, x_transformed: &Array2<f64>) -> Result<f64> {
299 let (_, s, _) = x_transformed
301 .svd(true)
302 .map_err(|_| SklearsError::InvalidInput("SVD computation failed".to_string()))?;
303
304 let s_sum = s.sum();
306 if s_sum == 0.0 {
307 return Ok(0.0);
308 }
309
310 let s_normalized = &s / s_sum;
311 let entropy = -s_normalized
312 .iter()
313 .filter(|&&x| x > 1e-12)
314 .map(|&x| x * x.ln())
315 .sum::<f64>();
316
317 let effective_rank = entropy.exp();
318 Ok(effective_rank)
319 }
320
321 fn compute_frobenius_approximation_quality(
322 &self,
323 x: &Array2<f64>,
324 x_transformed: &Array2<f64>,
325 ) -> Result<f64> {
326 let reconstruction_error = self.compute_reconstruction_error(x, x_transformed)?;
329 Ok(1.0 / (1.0 + reconstruction_error)) }
331
332 fn compute_relative_frobenius_quality(
333 &self,
334 x: &Array2<f64>,
335 x_transformed: &Array2<f64>,
336 ) -> Result<f64> {
337 let reconstruction_error = self.compute_reconstruction_error(x, x_transformed)?;
338 let original_norm = x.iter().map(|&x| x * x).sum::<f64>().sqrt();
339 let relative_error = reconstruction_error / original_norm;
340 Ok(1.0 / (1.0 + relative_error))
341 }
342
343 fn compute_cross_validation_score(
344 &self,
345 x: &Array2<f64>,
346 x_transformed: &Array2<f64>,
347 ) -> Result<f64> {
348 let n_samples = x.nrows();
350 let fold_size = n_samples / 5; let mut cv_scores = Vec::new();
352
353 for fold in 0..5 {
354 let start = fold * fold_size;
355 let end = if fold == 4 {
356 n_samples
357 } else {
358 (fold + 1) * fold_size
359 };
360
361 let val_features = x_transformed.slice(s![start..end, ..]);
362 let train_features = if start == 0 {
363 x_transformed.slice(s![end.., ..]).to_owned()
364 } else if end == n_samples {
365 x_transformed.slice(s![..start, ..]).to_owned()
366 } else {
367 let part1 = x_transformed.slice(s![..start, ..]);
368 let part2 = x_transformed.slice(s![end.., ..]);
369 concatenate![Axis(0), part1, part2]
370 };
371
372 let train_mean = train_features.mean_axis(Axis(0)).unwrap();
374 let val_mean = val_features.mean_axis(Axis(0)).unwrap();
375 let diff = &train_mean - &val_mean;
376 let similarity = 1.0 / (1.0 + diff.dot(&diff).sqrt());
377
378 cv_scores.push(similarity);
379 }
380
381 Ok(cv_scores.iter().sum::<f64>() / cv_scores.len() as f64)
382 }
383
384 fn compute_reconstruction_error(
385 &self,
386 x: &Array2<f64>,
387 x_transformed: &Array2<f64>,
388 ) -> Result<f64> {
389 let x_mean = x.mean_axis(Axis(0)).unwrap();
392 let x_transformed_projected =
393 x_transformed.sum_axis(Axis(1)) / x_transformed.ncols() as f64;
394
395 let mut error = 0.0;
396 for (i, &projected_val) in x_transformed_projected.iter().enumerate() {
397 let original_norm = x.row(i).dot(&x_mean);
398 error += (projected_val - original_norm).powi(2);
399 }
400
401 Ok(error.sqrt())
402 }
403
404 fn compute_approximation_error(
405 &self,
406 _x_train: &Array2<f64>,
407 x_val: &Array2<f64>,
408 _x_train_transformed: &Array2<f64>,
409 x_val_transformed: &Array2<f64>,
410 ) -> Result<f64> {
411 let val_norm = x_val.norm_l2();
416 let transformed_norm = x_val_transformed.norm_l2();
417
418 let error = if val_norm > 1e-12 {
420 (val_norm - transformed_norm).abs() / val_norm
421 } else {
422 (val_norm - transformed_norm).abs()
423 };
424
425 Ok(error)
426 }
427
428 fn select_optimal_dimension(
429 &self,
430 dimensions: &[usize],
431 quality_scores: &HashMap<usize, f64>,
432 approximation_errors: &HashMap<usize, f64>,
433 ) -> Result<usize> {
434 match &self.config.selection_strategy {
435 SelectionStrategy::ErrorTolerance { tolerance } => {
436 for &dim in dimensions {
438 if let Some(&error) = approximation_errors.get(&dim) {
439 if error <= *tolerance {
440 return Ok(dim);
441 }
442 }
443 }
444 self.select_best_quality_dimension(dimensions, quality_scores)
446 }
447 SelectionStrategy::QualityEfficiency {
448 efficiency_threshold,
449 } => {
450 let mut best_efficiency = 0.0;
452 let mut best_dim = dimensions[0];
453
454 for &dim in dimensions {
455 if let Some(&quality) = quality_scores.get(&dim) {
456 let efficiency = quality / dim as f64;
457 if efficiency >= *efficiency_threshold && efficiency > best_efficiency {
458 best_efficiency = efficiency;
459 best_dim = dim;
460 }
461 }
462 }
463 Ok(best_dim)
464 }
465 SelectionStrategy::ElbowMethod { sensitivity } => {
466 self.select_elbow_dimension(dimensions, quality_scores, *sensitivity)
467 }
468 _ => {
469 self.select_best_quality_dimension(dimensions, quality_scores)
471 }
472 }
473 }
474
475 fn select_best_quality_dimension(
476 &self,
477 dimensions: &[usize],
478 quality_scores: &HashMap<usize, f64>,
479 ) -> Result<usize> {
480 let mut best_score = f64::NEG_INFINITY;
481 let mut best_dim = dimensions[0];
482
483 for &dim in dimensions {
484 if let Some(&score) = quality_scores.get(&dim) {
485 if score > best_score {
486 best_score = score;
487 best_dim = dim;
488 }
489 }
490 }
491
492 Ok(best_dim)
493 }
494
495 fn select_elbow_dimension(
496 &self,
497 dimensions: &[usize],
498 quality_scores: &HashMap<usize, f64>,
499 sensitivity: f64,
500 ) -> Result<usize> {
501 if dimensions.len() < 3 {
502 return Ok(dimensions[0]);
503 }
504
505 let mut dim_score_pairs: Vec<_> = dimensions
507 .iter()
508 .filter_map(|&dim| quality_scores.get(&dim).map(|&score| (dim, score)))
509 .collect();
510 dim_score_pairs.sort_by(|a, b| a.0.cmp(&b.0));
511
512 let mut best_elbow_idx = 1;
514 let mut max_curvature = 0.0;
515
516 for i in 1..(dim_score_pairs.len() - 1) {
517 let (d1, s1) = dim_score_pairs[i - 1];
518 let (d2, s2) = dim_score_pairs[i];
519 let (d3, s3) = dim_score_pairs[i + 1];
520
521 let first_deriv1 = (s2 - s1) / (d2 - d1) as f64;
523 let first_deriv2 = (s3 - s2) / (d3 - d2) as f64;
524 let second_deriv = (first_deriv2 - first_deriv1) / ((d3 - d1) as f64 / 2.0);
525
526 let curvature = second_deriv.abs();
527 if curvature > max_curvature && curvature > sensitivity {
528 max_curvature = curvature;
529 best_elbow_idx = i;
530 }
531 }
532
533 Ok(dim_score_pairs[best_elbow_idx].0)
534 }
535}
536
537pub struct FittedAdaptiveRBFSampler {
539 fitted_rbf: crate::rbf_sampler::RBFSampler<sklears_core::traits::Trained>,
540 selection_result: DimensionSelectionResult,
541}
542
543impl Fit<Array2<f64>, ()> for AdaptiveRBFSampler {
544 type Fitted = FittedAdaptiveRBFSampler;
545
546 fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
547 let selection_result = self.select_dimension(x)?;
549
550 let rbf_sampler = RBFSampler::new(selection_result.optimal_dimension).gamma(self.gamma);
552
553 let fitted_rbf = rbf_sampler.fit(x, &())?;
554
555 Ok(FittedAdaptiveRBFSampler {
556 fitted_rbf,
557 selection_result,
558 })
559 }
560}
561
562impl Transform<Array2<f64>, Array2<f64>> for FittedAdaptiveRBFSampler {
563 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
564 self.fitted_rbf.transform(x)
565 }
566}
567
568impl FittedAdaptiveRBFSampler {
569 pub fn selection_result(&self) -> &DimensionSelectionResult {
571 &self.selection_result
572 }
573
574 pub fn optimal_dimension(&self) -> usize {
576 self.selection_result.optimal_dimension
577 }
578}
579
580#[allow(non_snake_case)]
581#[cfg(test)]
582mod tests {
583 use super::*;
584 use approx::assert_abs_diff_eq;
585
586 #[test]
587 fn test_adaptive_rbf_sampler_basic() {
588 let x =
589 Array2::from_shape_vec((50, 5), (0..250).map(|i| i as f64 * 0.1).collect()).unwrap();
590
591 let config = AdaptiveDimensionConfig {
592 min_features: 10,
593 max_features: 50,
594 step_size: 10,
595 n_trials: 2,
596 ..Default::default()
597 };
598
599 let sampler = AdaptiveRBFSampler::new().gamma(0.5).config(config);
600
601 let fitted = sampler.fit(&x, &()).unwrap();
602 let transformed = fitted.transform(&x).unwrap();
603
604 assert_eq!(transformed.nrows(), 50);
605 assert!(transformed.ncols() >= 10);
606 assert!(transformed.ncols() <= 50);
607
608 let optimal_dim = fitted.optimal_dimension();
610 assert!(optimal_dim >= 10);
611 assert!(optimal_dim <= 50);
612 }
613
614 #[test]
615 fn test_dimension_selection_error_tolerance() {
616 let x =
617 Array2::from_shape_vec((40, 4), (0..160).map(|i| i as f64 * 0.05).collect()).unwrap();
618
619 let config = AdaptiveDimensionConfig {
620 min_features: 5,
621 max_features: 25,
622 step_size: 5,
623 selection_strategy: SelectionStrategy::ErrorTolerance { tolerance: 0.2 },
624 n_trials: 1,
625 validation_fraction: 0.3,
626 ..Default::default()
627 };
628
629 let sampler = AdaptiveRBFSampler::new().gamma(1.0).config(config);
630
631 let result = sampler.select_dimension(&x).unwrap();
632
633 assert!(result.optimal_dimension >= 5);
634 assert!(result.optimal_dimension <= 25);
635 assert!(!result.quality_scores.is_empty());
636 assert!(!result.approximation_errors.is_empty());
637 }
638
639 #[test]
640 fn test_dimension_selection_quality_efficiency() {
641 let x = Array2::from_shape_vec((30, 3), (0..90).map(|i| i as f64 * 0.1).collect()).unwrap();
642
643 let config = AdaptiveDimensionConfig {
644 min_features: 5,
645 max_features: 20,
646 step_size: 5,
647 selection_strategy: SelectionStrategy::QualityEfficiency {
648 efficiency_threshold: 0.01,
649 },
650 quality_metric: QualityMetric::EffectiveRank,
651 n_trials: 1,
652 ..Default::default()
653 };
654
655 let sampler = AdaptiveRBFSampler::new().gamma(0.8).config(config);
656
657 let result = sampler.select_dimension(&x).unwrap();
658
659 assert!(result.optimal_dimension >= 5);
660 assert!(result.optimal_dimension <= 20);
661
662 for &dim in &[5, 10, 15, 20] {
664 assert!(result.quality_scores.contains_key(&dim));
665 }
666 }
667
668 #[test]
669 fn test_dimension_selection_elbow_method() {
670 let x =
671 Array2::from_shape_vec((60, 6), (0..360).map(|i| i as f64 * 0.02).collect()).unwrap();
672
673 let config = AdaptiveDimensionConfig {
674 min_features: 10,
675 max_features: 40,
676 step_size: 10,
677 selection_strategy: SelectionStrategy::ElbowMethod { sensitivity: 0.01 },
678 quality_metric: QualityMetric::KernelAlignment,
679 n_trials: 1,
680 ..Default::default()
681 };
682
683 let sampler = AdaptiveRBFSampler::new().gamma(0.3).config(config);
684
685 let result = sampler.select_dimension(&x).unwrap();
686
687 assert!(result.optimal_dimension >= 10);
688 assert!(result.optimal_dimension <= 40);
689
690 assert!(!result.computation_times.is_empty());
692 assert!(!result.memory_usage.is_empty());
693 }
694
695 #[test]
696 fn test_quality_metrics() {
697 let x =
698 Array2::from_shape_vec((25, 4), (0..100).map(|i| i as f64 * 0.1).collect()).unwrap();
699
700 let sampler = AdaptiveRBFSampler::new().gamma(1.0);
701
702 let rbf = RBFSampler::new(15).gamma(1.0);
704 let fitted_rbf = rbf.fit(&x, &()).unwrap();
705 let x_transformed = fitted_rbf.transform(&x).unwrap();
706
707 let alignment = sampler
708 .compute_kernel_alignment(&x, &x_transformed)
709 .unwrap();
710 assert!(alignment >= 0.0);
711 assert!(alignment <= 1.0);
712
713 let eff_rank = sampler.compute_effective_rank(&x_transformed).unwrap();
715 assert!(eff_rank > 0.0);
716 assert!(eff_rank <= x_transformed.ncols() as f64);
717
718 let recon_error = sampler
720 .compute_reconstruction_error(&x, &x_transformed)
721 .unwrap();
722 assert!(recon_error >= 0.0);
723 }
724
725 #[test]
726 fn test_adaptive_sampler_reproducibility() {
727 let x =
728 Array2::from_shape_vec((40, 5), (0..200).map(|i| i as f64 * 0.08).collect()).unwrap();
729
730 let config = AdaptiveDimensionConfig {
731 min_features: 10,
732 max_features: 30,
733 step_size: 10,
734 n_trials: 2,
735 random_seed: Some(42),
736 ..Default::default()
737 };
738
739 let sampler1 = AdaptiveRBFSampler::new().gamma(0.5).config(config.clone());
740
741 let sampler2 = AdaptiveRBFSampler::new().gamma(0.5).config(config);
742
743 let result1 = sampler1.select_dimension(&x).unwrap();
744 let result2 = sampler2.select_dimension(&x).unwrap();
745
746 assert_eq!(result1.optimal_dimension, result2.optimal_dimension);
747
748 for (&dim, &score1) in &result1.quality_scores {
750 if let Some(&score2) = result2.quality_scores.get(&dim) {
751 assert_abs_diff_eq!(score1, score2, epsilon = 1e-10);
752 }
753 }
754 }
755
756 #[test]
757 fn test_dimension_selection_result() {
758 let x =
759 Array2::from_shape_vec((35, 3), (0..105).map(|i| i as f64 * 0.1).collect()).unwrap();
760
761 let config = AdaptiveDimensionConfig {
762 min_features: 5,
763 max_features: 15,
764 step_size: 5,
765 n_trials: 1,
766 ..Default::default()
767 };
768
769 let sampler = AdaptiveRBFSampler::new().gamma(0.7).config(config);
770
771 let fitted = sampler.fit(&x, &()).unwrap();
772 let result = fitted.selection_result();
773
774 assert!(result.optimal_dimension >= 5);
776 assert!(result.optimal_dimension <= 15);
777 assert_eq!(result.quality_scores.len(), 3); assert_eq!(result.approximation_errors.len(), 3);
779 assert_eq!(result.computation_times.len(), 3);
780 assert_eq!(result.memory_usage.len(), 3);
781
782 for (&dim, &memory) in &result.memory_usage {
784 assert_eq!(memory, dim * 3 * 8); }
786 }
787}