1use crate::{Nystroem, RBFSampler};
7use rayon::prelude::*;
8use scirs2_core::ndarray::ndarray_linalg::SVD;
9use scirs2_core::ndarray::{s, Array1, Array2};
10use scirs2_core::random::rngs::StdRng;
11use scirs2_core::random::{thread_rng, Rng, SeedableRng};
12use sklears_core::{
13 error::{Result, SklearsError},
14 traits::{Fit, Transform},
15};
16
17#[derive(Debug, Clone)]
19pub enum SearchStrategy {
21 GridSearch {
23 n_points: usize,
25 },
26 RandomSearch {
28 n_samples: usize,
30 },
31 BayesianOptimization {
33 n_initial: usize,
35 n_iterations: usize,
37 exploration_factor: f64,
39 },
40 CoordinateDescent {
42 max_iterations: usize,
44 tolerance: f64,
46 },
47}
48
49#[derive(Debug, Clone)]
51pub struct ParameterBounds {
53 pub gamma_bounds: (f64, f64),
55 pub n_components_bounds: (usize, usize),
57 pub degree_bounds: Option<(i32, i32)>,
59 pub coef0_bounds: Option<(f64, f64)>,
61}
62
63impl Default for ParameterBounds {
64 fn default() -> Self {
65 Self {
66 gamma_bounds: (1e-6, 1e2),
67 n_components_bounds: (10, 1000),
68 degree_bounds: Some((2, 5)),
69 coef0_bounds: Some((-1.0, 1.0)),
70 }
71 }
72}
73
74#[derive(Debug, Clone)]
76pub enum ObjectiveFunction {
78 KernelAlignment,
80 CrossValidationError { n_folds: usize },
82 ApproximationQuality,
84 EffectiveRank,
86 Custom,
88}
89
90#[derive(Debug, Clone)]
92pub struct ParameterLearningConfig {
94 pub search_strategy: SearchStrategy,
96 pub parameter_bounds: ParameterBounds,
98 pub objective_function: ObjectiveFunction,
100 pub validation_fraction: f64,
102 pub random_seed: Option<u64>,
104 pub n_jobs: usize,
106 pub verbose: bool,
108}
109
110impl Default for ParameterLearningConfig {
111 fn default() -> Self {
112 Self {
113 search_strategy: SearchStrategy::GridSearch { n_points: 10 },
114 parameter_bounds: ParameterBounds::default(),
115 objective_function: ObjectiveFunction::KernelAlignment,
116 validation_fraction: 0.2,
117 random_seed: None,
118 n_jobs: num_cpus::get(),
119 verbose: false,
120 }
121 }
122}
123
124#[derive(Debug, Clone)]
126pub struct ParameterSet {
128 pub gamma: f64,
130 pub n_components: usize,
132 pub degree: Option<i32>,
134 pub coef0: Option<f64>,
136}
137
138impl PartialEq for ParameterSet {
139 fn eq(&self, other: &Self) -> bool {
140 (self.gamma - other.gamma).abs() < f64::EPSILON
141 && self.n_components == other.n_components
142 && self.degree == other.degree
143 && match (self.coef0, other.coef0) {
144 (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
145 (None, None) => true,
146 _ => false,
147 }
148 }
149}
150
151impl Eq for ParameterSet {}
152
153impl std::hash::Hash for ParameterSet {
154 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
155 self.gamma.to_bits().hash(state);
157 self.n_components.hash(state);
158 self.degree.hash(state);
159 if let Some(coef0) = self.coef0 {
160 coef0.to_bits().hash(state);
161 }
162 }
163}
164
165#[derive(Debug, Clone)]
167pub struct OptimizationResult {
169 pub best_parameters: ParameterSet,
171 pub best_score: f64,
173 pub parameter_history: Vec<(ParameterSet, f64)>,
175 pub converged: bool,
177 pub n_evaluations: usize,
179}
180
181pub struct ParameterLearner {
183 config: ParameterLearningConfig,
184}
185
186impl ParameterLearner {
187 pub fn new(config: ParameterLearningConfig) -> Self {
189 Self { config }
190 }
191
192 pub fn optimize_rbf_parameters(
194 &self,
195 x: &Array2<f64>,
196 y: Option<&Array1<f64>>,
197 ) -> Result<OptimizationResult> {
198 match &self.config.search_strategy {
199 SearchStrategy::GridSearch { n_points } => self.grid_search_rbf(x, y, *n_points),
200 SearchStrategy::RandomSearch { n_samples } => self.random_search_rbf(x, y, *n_samples),
201 SearchStrategy::BayesianOptimization {
202 n_initial,
203 n_iterations,
204 exploration_factor,
205 } => {
206 self.bayesian_optimization_rbf(x, y, *n_initial, *n_iterations, *exploration_factor)
207 }
208 SearchStrategy::CoordinateDescent {
209 max_iterations,
210 tolerance,
211 } => self.coordinate_descent_rbf(x, y, *max_iterations, *tolerance),
212 }
213 }
214
215 pub fn optimize_nystroem_parameters(
217 &self,
218 x: &Array2<f64>,
219 y: Option<&Array1<f64>>,
220 ) -> Result<OptimizationResult> {
221 match &self.config.search_strategy {
223 SearchStrategy::GridSearch { n_points } => self.grid_search_nystroem(x, y, *n_points),
224 SearchStrategy::RandomSearch { n_samples } => {
225 self.random_search_nystroem(x, y, *n_samples)
226 }
227 _ => {
228 self.grid_search_nystroem(x, y, 10)
230 }
231 }
232 }
233
234 fn grid_search_rbf(
235 &self,
236 x: &Array2<f64>,
237 y: Option<&Array1<f64>>,
238 n_points: usize,
239 ) -> Result<OptimizationResult> {
240 let gamma_values = self.create_parameter_grid(
241 self.config.parameter_bounds.gamma_bounds.0,
242 self.config.parameter_bounds.gamma_bounds.1,
243 n_points,
244 true, );
246
247 let n_components_values = self
248 .create_parameter_grid(
249 self.config.parameter_bounds.n_components_bounds.0 as f64,
250 self.config.parameter_bounds.n_components_bounds.1 as f64,
251 n_points,
252 false, )
254 .into_iter()
255 .map(|x| x as usize)
256 .collect::<Vec<_>>();
257
258 let mut parameter_history = Vec::new();
259 let mut best_score = f64::NEG_INFINITY;
260 let mut best_parameters = ParameterSet {
261 gamma: gamma_values[0],
262 n_components: n_components_values[0],
263 degree: None,
264 coef0: None,
265 };
266
267 let parameter_combinations: Vec<_> = gamma_values
269 .iter()
270 .flat_map(|&gamma| {
271 n_components_values
272 .iter()
273 .map(move |&n_components| ParameterSet {
274 gamma,
275 n_components,
276 degree: None,
277 coef0: None,
278 })
279 })
280 .collect();
281
282 if self.config.verbose {
283 println!(
284 "Evaluating {} parameter combinations",
285 parameter_combinations.len()
286 );
287 }
288
289 let scores: Result<Vec<_>> = parameter_combinations
291 .par_iter()
292 .map(|params| {
293 let score = self.evaluate_rbf_parameters(x, y, params)?;
294 Ok((params.clone(), score))
295 })
296 .collect();
297
298 let scores = scores?;
299
300 for (params, score) in scores {
301 parameter_history.push((params.clone(), score));
302
303 if score > best_score {
304 best_score = score;
305 best_parameters = params;
306 }
307 }
308
309 let n_evaluations = parameter_history.len();
310 Ok(OptimizationResult {
311 best_parameters,
312 best_score,
313 parameter_history,
314 converged: true,
315 n_evaluations,
316 })
317 }
318
319 fn random_search_rbf(
320 &self,
321 x: &Array2<f64>,
322 y: Option<&Array1<f64>>,
323 n_samples: usize,
324 ) -> Result<OptimizationResult> {
325 let mut rng = if let Some(seed) = self.config.random_seed {
326 StdRng::seed_from_u64(seed)
327 } else {
328 StdRng::from_seed(thread_rng().gen())
329 };
330
331 let mut parameter_history = Vec::new();
332 let mut best_score = f64::NEG_INFINITY;
333 let mut best_parameters = ParameterSet {
334 gamma: 1.0,
335 n_components: 100,
336 degree: None,
337 coef0: None,
338 };
339
340 for _ in 0..n_samples {
341 let gamma = self.sample_log_uniform(
343 &mut rng,
344 self.config.parameter_bounds.gamma_bounds.0,
345 self.config.parameter_bounds.gamma_bounds.1,
346 );
347
348 let n_components = rng.gen_range(
349 self.config.parameter_bounds.n_components_bounds.0
350 ..=self.config.parameter_bounds.n_components_bounds.1,
351 );
352
353 let params = ParameterSet {
354 gamma,
355 n_components,
356 degree: None,
357 coef0: None,
358 };
359
360 let score = self.evaluate_rbf_parameters(x, y, ¶ms)?;
361 parameter_history.push((params.clone(), score));
362
363 if score > best_score {
364 best_score = score;
365 best_parameters = params;
366 }
367 }
368
369 Ok(OptimizationResult {
370 best_parameters,
371 best_score,
372 parameter_history,
373 converged: true,
374 n_evaluations: n_samples,
375 })
376 }
377
378 fn bayesian_optimization_rbf(
379 &self,
380 x: &Array2<f64>,
381 y: Option<&Array1<f64>>,
382 n_initial: usize,
383 n_iterations: usize,
384 exploration_factor: f64,
385 ) -> Result<OptimizationResult> {
386 let mut rng = if let Some(seed) = self.config.random_seed {
388 StdRng::seed_from_u64(seed)
389 } else {
390 StdRng::from_seed(thread_rng().gen())
391 };
392
393 let mut parameter_history = Vec::new();
394 let mut best_score = f64::NEG_INFINITY;
395 let mut best_parameters = ParameterSet {
396 gamma: 1.0,
397 n_components: 100,
398 degree: None,
399 coef0: None,
400 };
401
402 for _ in 0..n_initial {
404 let gamma = self.sample_log_uniform(
405 &mut rng,
406 self.config.parameter_bounds.gamma_bounds.0,
407 self.config.parameter_bounds.gamma_bounds.1,
408 );
409
410 let n_components = rng.gen_range(
411 self.config.parameter_bounds.n_components_bounds.0
412 ..=self.config.parameter_bounds.n_components_bounds.1,
413 );
414
415 let params = ParameterSet {
416 gamma,
417 n_components,
418 degree: None,
419 coef0: None,
420 };
421
422 let score = self.evaluate_rbf_parameters(x, y, ¶ms)?;
423 parameter_history.push((params.clone(), score));
424
425 if score > best_score {
426 best_score = score;
427 best_parameters = params;
428 }
429 }
430
431 for iteration in 0..n_iterations {
433 let next_params =
435 self.acquisition_function_rbf(¶meter_history, exploration_factor, &mut rng);
436
437 let score = self.evaluate_rbf_parameters(x, y, &next_params)?;
438 parameter_history.push((next_params.clone(), score));
439
440 if score > best_score {
441 best_score = score;
442 best_parameters = next_params;
443 }
444
445 if self.config.verbose {
446 println!(
447 "Iteration {}: Best score = {:.6}",
448 iteration + 1,
449 best_score
450 );
451 }
452 }
453
454 Ok(OptimizationResult {
455 best_parameters,
456 best_score,
457 parameter_history,
458 converged: true,
459 n_evaluations: n_initial + n_iterations,
460 })
461 }
462
463 fn coordinate_descent_rbf(
464 &self,
465 x: &Array2<f64>,
466 y: Option<&Array1<f64>>,
467 max_iterations: usize,
468 tolerance: f64,
469 ) -> Result<OptimizationResult> {
470 let mut current_params = ParameterSet {
471 gamma: (self.config.parameter_bounds.gamma_bounds.0
472 * self.config.parameter_bounds.gamma_bounds.1)
473 .sqrt(),
474 n_components: (self.config.parameter_bounds.n_components_bounds.0
475 + self.config.parameter_bounds.n_components_bounds.1)
476 / 2,
477 degree: None,
478 coef0: None,
479 };
480
481 let mut current_score = self.evaluate_rbf_parameters(x, y, ¤t_params)?;
482 let mut parameter_history = vec![(current_params.clone(), current_score)];
483 let mut converged = false;
484
485 for iteration in 0..max_iterations {
486 let prev_score = current_score;
487
488 current_params = self.optimize_gamma_coordinate(x, y, ¤t_params)?;
490 current_score = self.evaluate_rbf_parameters(x, y, ¤t_params)?;
491 parameter_history.push((current_params.clone(), current_score));
492
493 current_params = self.optimize_n_components_coordinate(x, y, ¤t_params)?;
495 current_score = self.evaluate_rbf_parameters(x, y, ¤t_params)?;
496 parameter_history.push((current_params.clone(), current_score));
497
498 if (current_score - prev_score).abs() < tolerance {
500 converged = true;
501 if self.config.verbose {
502 println!(
503 "Converged at iteration {} with score {:.6}",
504 iteration + 1,
505 current_score
506 );
507 }
508 break;
509 }
510
511 if self.config.verbose {
512 println!("Iteration {}: Score = {:.6}", iteration + 1, current_score);
513 }
514 }
515
516 let n_evaluations = parameter_history.len();
517 Ok(OptimizationResult {
518 best_parameters: current_params,
519 best_score: current_score,
520 parameter_history,
521 converged,
522 n_evaluations,
523 })
524 }
525
526 fn grid_search_nystroem(
527 &self,
528 x: &Array2<f64>,
529 y: Option<&Array1<f64>>,
530 n_points: usize,
531 ) -> Result<OptimizationResult> {
532 let gamma_values = self.create_parameter_grid(
534 self.config.parameter_bounds.gamma_bounds.0,
535 self.config.parameter_bounds.gamma_bounds.1,
536 n_points,
537 true,
538 );
539
540 let n_components_values = self
541 .create_parameter_grid(
542 self.config.parameter_bounds.n_components_bounds.0 as f64,
543 self.config.parameter_bounds.n_components_bounds.1 as f64,
544 n_points,
545 false,
546 )
547 .into_iter()
548 .map(|x| x as usize)
549 .collect::<Vec<_>>();
550
551 let mut parameter_history = Vec::new();
552 let mut best_score = f64::NEG_INFINITY;
553 let mut best_parameters = ParameterSet {
554 gamma: gamma_values[0],
555 n_components: n_components_values[0],
556 degree: None,
557 coef0: None,
558 };
559
560 for &gamma in &gamma_values {
561 for &n_components in &n_components_values {
562 let params = ParameterSet {
563 gamma,
564 n_components,
565 degree: None,
566 coef0: None,
567 };
568
569 let score = self.evaluate_nystroem_parameters(x, y, ¶ms)?;
570 parameter_history.push((params.clone(), score));
571
572 if score > best_score {
573 best_score = score;
574 best_parameters = params;
575 }
576 }
577 }
578
579 let n_evaluations = parameter_history.len();
580 Ok(OptimizationResult {
581 best_parameters,
582 best_score,
583 parameter_history,
584 converged: true,
585 n_evaluations,
586 })
587 }
588
589 fn random_search_nystroem(
590 &self,
591 x: &Array2<f64>,
592 y: Option<&Array1<f64>>,
593 n_samples: usize,
594 ) -> Result<OptimizationResult> {
595 let mut rng = if let Some(seed) = self.config.random_seed {
596 StdRng::seed_from_u64(seed)
597 } else {
598 StdRng::from_seed(thread_rng().gen())
599 };
600
601 let mut parameter_history = Vec::new();
602 let mut best_score = f64::NEG_INFINITY;
603 let mut best_parameters = ParameterSet {
604 gamma: 1.0,
605 n_components: 100,
606 degree: None,
607 coef0: None,
608 };
609
610 for _ in 0..n_samples {
611 let gamma = self.sample_log_uniform(
612 &mut rng,
613 self.config.parameter_bounds.gamma_bounds.0,
614 self.config.parameter_bounds.gamma_bounds.1,
615 );
616
617 let n_components = rng.gen_range(
618 self.config.parameter_bounds.n_components_bounds.0
619 ..=self.config.parameter_bounds.n_components_bounds.1,
620 );
621
622 let params = ParameterSet {
623 gamma,
624 n_components,
625 degree: None,
626 coef0: None,
627 };
628
629 let score = self.evaluate_nystroem_parameters(x, y, ¶ms)?;
630 parameter_history.push((params.clone(), score));
631
632 if score > best_score {
633 best_score = score;
634 best_parameters = params;
635 }
636 }
637
638 Ok(OptimizationResult {
639 best_parameters,
640 best_score,
641 parameter_history,
642 converged: true,
643 n_evaluations: n_samples,
644 })
645 }
646
647 fn evaluate_rbf_parameters(
648 &self,
649 x: &Array2<f64>,
650 y: Option<&Array1<f64>>,
651 params: &ParameterSet,
652 ) -> Result<f64> {
653 let sampler = RBFSampler::new(params.n_components).gamma(params.gamma);
654 let fitted = sampler.fit(x, &())?;
655 let x_transformed = fitted.transform(x)?;
656
657 match &self.config.objective_function {
658 ObjectiveFunction::KernelAlignment => {
659 self.compute_kernel_alignment(x, &x_transformed, params.gamma)
660 }
661 ObjectiveFunction::CrossValidationError { n_folds } => {
662 if let Some(y_data) = y {
663 self.compute_cross_validation_score(&x_transformed, y_data, *n_folds)
664 } else {
665 self.compute_kernel_alignment(x, &x_transformed, params.gamma)
667 }
668 }
669 ObjectiveFunction::ApproximationQuality => {
670 self.compute_approximation_quality(x, &x_transformed, params.gamma)
671 }
672 ObjectiveFunction::EffectiveRank => self.compute_effective_rank(&x_transformed),
673 ObjectiveFunction::Custom => {
674 Ok(0.0)
676 }
677 }
678 }
679
680 fn evaluate_nystroem_parameters(
681 &self,
682 x: &Array2<f64>,
683 y: Option<&Array1<f64>>,
684 params: &ParameterSet,
685 ) -> Result<f64> {
686 use crate::nystroem::Kernel;
687
688 let kernel = Kernel::Rbf {
689 gamma: params.gamma,
690 };
691 let nystroem = Nystroem::new(kernel, params.n_components);
692 let fitted = nystroem.fit(x, &())?;
693 let x_transformed = fitted.transform(x)?;
694
695 match &self.config.objective_function {
696 ObjectiveFunction::KernelAlignment => {
697 self.compute_kernel_alignment(x, &x_transformed, params.gamma)
698 }
699 ObjectiveFunction::CrossValidationError { n_folds } => {
700 if let Some(y_data) = y {
701 self.compute_cross_validation_score(&x_transformed, y_data, *n_folds)
702 } else {
703 self.compute_kernel_alignment(x, &x_transformed, params.gamma)
704 }
705 }
706 ObjectiveFunction::ApproximationQuality => {
707 self.compute_approximation_quality(x, &x_transformed, params.gamma)
708 }
709 ObjectiveFunction::EffectiveRank => self.compute_effective_rank(&x_transformed),
710 ObjectiveFunction::Custom => Ok(0.0),
711 }
712 }
713
714 fn compute_kernel_alignment(
715 &self,
716 x: &Array2<f64>,
717 x_transformed: &Array2<f64>,
718 gamma: f64,
719 ) -> Result<f64> {
720 let n_samples = x.nrows().min(100); let x_subset = x.slice(s![..n_samples, ..]);
722
723 let mut k_exact = Array2::zeros((n_samples, n_samples));
725 for i in 0..n_samples {
726 for j in 0..n_samples {
727 let diff = &x_subset.row(i) - &x_subset.row(j);
728 let squared_norm = diff.dot(&diff);
729 k_exact[[i, j]] = (-gamma * squared_norm).exp();
730 }
731 }
732
733 let x_transformed_subset = x_transformed.slice(s![..n_samples, ..]);
735 let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
736
737 let k_exact_frobenius = k_exact.iter().map(|&x| x * x).sum::<f64>().sqrt();
739 let k_approx_frobenius = k_approx.iter().map(|&x| x * x).sum::<f64>().sqrt();
740 let k_product = (&k_exact * &k_approx).sum();
741
742 let alignment = k_product / (k_exact_frobenius * k_approx_frobenius);
743 Ok(alignment)
744 }
745
746 fn compute_cross_validation_score(
747 &self,
748 x_transformed: &Array2<f64>,
749 y: &Array1<f64>,
750 n_folds: usize,
751 ) -> Result<f64> {
752 let n_samples = x_transformed.nrows();
753 let fold_size = n_samples / n_folds;
754 let mut cv_scores = Vec::new();
755
756 for fold in 0..n_folds {
757 let start = fold * fold_size;
758 let end = if fold == n_folds - 1 {
759 n_samples
760 } else {
761 (fold + 1) * fold_size
762 };
763
764 let val_features = x_transformed.slice(s![start..end, ..]);
766 let val_targets = y.slice(s![start..end]);
767
768 let mut correlations = Vec::new();
770 for j in 0..val_features.ncols() {
771 let feature_col = val_features.column(j);
772 let correlation =
773 self.compute_correlation(feature_col.into_owned().view(), val_targets);
774 correlations.push(correlation.abs());
775 }
776
777 let mean_correlation = correlations.iter().sum::<f64>() / correlations.len() as f64;
778 cv_scores.push(mean_correlation);
779 }
780
781 Ok(cv_scores.iter().sum::<f64>() / cv_scores.len() as f64)
782 }
783
784 fn compute_approximation_quality(
785 &self,
786 x: &Array2<f64>,
787 x_transformed: &Array2<f64>,
788 gamma: f64,
789 ) -> Result<f64> {
790 let n_samples = x.nrows().min(50); let x_subset = x.slice(s![..n_samples, ..]);
793 let x_transformed_subset = x_transformed.slice(s![..n_samples, ..]);
794
795 let mut k_exact = Array2::zeros((n_samples, n_samples));
797 for i in 0..n_samples {
798 for j in 0..n_samples {
799 let diff = &x_subset.row(i) - &x_subset.row(j);
800 let squared_norm = diff.dot(&diff);
801 k_exact[[i, j]] = (-gamma * squared_norm).exp();
802 }
803 }
804
805 let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
807
808 let error_matrix = &k_exact - &k_approx;
810 let frobenius_error = error_matrix.iter().map(|&x| x * x).sum::<f64>().sqrt();
811 let exact_frobenius = k_exact.iter().map(|&x| x * x).sum::<f64>().sqrt();
812
813 let relative_error = frobenius_error / exact_frobenius;
814 Ok(1.0 / (1.0 + relative_error)) }
816
817 fn compute_effective_rank(&self, x_transformed: &Array2<f64>) -> Result<f64> {
818 let (_, s, _) = x_transformed
820 .svd(true, true)
821 .map_err(|_| SklearsError::InvalidInput("SVD computation failed".to_string()))?;
822
823 let s_sum = s.sum();
825 if s_sum == 0.0 {
826 return Ok(0.0);
827 }
828
829 let s_normalized = &s / s_sum;
830 let entropy = -s_normalized
831 .iter()
832 .filter(|&&x| x > 1e-12)
833 .map(|&x| x * x.ln())
834 .sum::<f64>();
835
836 Ok(entropy.exp())
837 }
838
839 fn compute_correlation(
840 &self,
841 x: scirs2_core::ndarray::ArrayView1<f64>,
842 y: scirs2_core::ndarray::ArrayView1<f64>,
843 ) -> f64 {
844 let x_mean = x.mean().unwrap_or(0.0);
845 let y_mean = y.mean().unwrap_or(0.0);
846
847 let numerator: f64 = x
848 .iter()
849 .zip(y.iter())
850 .map(|(&xi, &yi)| (xi - x_mean) * (yi - y_mean))
851 .sum();
852
853 let x_var: f64 = x.iter().map(|&xi| (xi - x_mean).powi(2)).sum();
854 let y_var: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
855
856 let denominator = (x_var * y_var).sqrt();
857
858 if denominator == 0.0 {
859 0.0
860 } else {
861 numerator / denominator
862 }
863 }
864
865 fn create_parameter_grid(
867 &self,
868 min_val: f64,
869 max_val: f64,
870 n_points: usize,
871 log_scale: bool,
872 ) -> Vec<f64> {
873 if log_scale {
874 let log_min = min_val.ln();
875 let log_max = max_val.ln();
876 (0..n_points)
877 .map(|i| {
878 let t = i as f64 / (n_points - 1) as f64;
879 (log_min + t * (log_max - log_min)).exp()
880 })
881 .collect()
882 } else {
883 (0..n_points)
884 .map(|i| {
885 let t = i as f64 / (n_points - 1) as f64;
886 min_val + t * (max_val - min_val)
887 })
888 .collect()
889 }
890 }
891
892 fn sample_log_uniform(&self, rng: &mut StdRng, min_val: f64, max_val: f64) -> f64 {
893 let log_min = min_val.ln();
894 let log_max = max_val.ln();
895 let log_val = rng.gen_range(log_min..=log_max);
896 log_val.exp()
897 }
898
899 fn acquisition_function_rbf(
900 &self,
901 parameter_history: &[(ParameterSet, f64)],
902 exploration_factor: f64,
903 rng: &mut StdRng,
904 ) -> ParameterSet {
905 let best_score = parameter_history
909 .iter()
910 .map(|(_, score)| *score)
911 .fold(f64::NEG_INFINITY, f64::max);
912
913 let mut best_acquisition = f64::NEG_INFINITY;
915 let mut best_candidate = parameter_history[0].0.clone();
916
917 for _ in 0..100 {
918 let gamma = self.sample_log_uniform(
920 rng,
921 self.config.parameter_bounds.gamma_bounds.0,
922 self.config.parameter_bounds.gamma_bounds.1,
923 );
924
925 let n_components = rng.gen_range(
926 self.config.parameter_bounds.n_components_bounds.0
927 ..=self.config.parameter_bounds.n_components_bounds.1,
928 );
929
930 let candidate = ParameterSet {
931 gamma,
932 n_components,
933 degree: None,
934 coef0: None,
935 };
936
937 let mean_score = self.predict_score_from_history(parameter_history, &candidate);
939 let uncertainty =
940 exploration_factor * self.estimate_uncertainty(parameter_history, &candidate);
941 let acquisition = mean_score + uncertainty;
942
943 if acquisition > best_acquisition {
944 best_acquisition = acquisition;
945 best_candidate = candidate;
946 }
947 }
948
949 best_candidate
950 }
951
952 fn predict_score_from_history(
953 &self,
954 parameter_history: &[(ParameterSet, f64)],
955 candidate: &ParameterSet,
956 ) -> f64 {
957 let mut weighted_sum = 0.0;
959 let mut weight_sum = 0.0;
960
961 for (params, score) in parameter_history {
962 let distance = self.parameter_distance(candidate, params);
963 let weight = (-distance).exp();
964 weighted_sum += weight * score;
965 weight_sum += weight;
966 }
967
968 if weight_sum > 0.0 {
969 weighted_sum / weight_sum
970 } else {
971 0.0
972 }
973 }
974
975 fn estimate_uncertainty(
976 &self,
977 parameter_history: &[(ParameterSet, f64)],
978 candidate: &ParameterSet,
979 ) -> f64 {
980 let min_distance = parameter_history
982 .iter()
983 .map(|(params, _)| self.parameter_distance(candidate, params))
984 .fold(f64::INFINITY, f64::min);
985
986 min_distance
987 }
988
989 fn parameter_distance(&self, p1: &ParameterSet, p2: &ParameterSet) -> f64 {
990 let gamma_diff = (p1.gamma.ln() - p2.gamma.ln()).powi(2);
991 let n_components_diff =
992 ((p1.n_components as f64).ln() - (p2.n_components as f64).ln()).powi(2);
993 (gamma_diff + n_components_diff).sqrt()
994 }
995
996 fn optimize_gamma_coordinate(
997 &self,
998 x: &Array2<f64>,
999 y: Option<&Array1<f64>>,
1000 current_params: &ParameterSet,
1001 ) -> Result<ParameterSet> {
1002 let gamma_values = self.create_parameter_grid(
1003 self.config.parameter_bounds.gamma_bounds.0,
1004 self.config.parameter_bounds.gamma_bounds.1,
1005 10,
1006 true,
1007 );
1008
1009 let mut best_gamma = current_params.gamma;
1010 let mut best_score = f64::NEG_INFINITY;
1011
1012 for &gamma in &gamma_values {
1013 let test_params = ParameterSet {
1014 gamma,
1015 n_components: current_params.n_components,
1016 degree: current_params.degree,
1017 coef0: current_params.coef0,
1018 };
1019
1020 let score = self.evaluate_rbf_parameters(x, y, &test_params)?;
1021 if score > best_score {
1022 best_score = score;
1023 best_gamma = gamma;
1024 }
1025 }
1026
1027 Ok(ParameterSet {
1028 gamma: best_gamma,
1029 n_components: current_params.n_components,
1030 degree: current_params.degree,
1031 coef0: current_params.coef0,
1032 })
1033 }
1034
1035 fn optimize_n_components_coordinate(
1036 &self,
1037 x: &Array2<f64>,
1038 y: Option<&Array1<f64>>,
1039 current_params: &ParameterSet,
1040 ) -> Result<ParameterSet> {
1041 let n_components_values = self
1042 .create_parameter_grid(
1043 self.config.parameter_bounds.n_components_bounds.0 as f64,
1044 self.config.parameter_bounds.n_components_bounds.1 as f64,
1045 10,
1046 false,
1047 )
1048 .into_iter()
1049 .map(|x| x as usize)
1050 .collect::<Vec<_>>();
1051
1052 let mut best_n_components = current_params.n_components;
1053 let mut best_score = f64::NEG_INFINITY;
1054
1055 for &n_components in &n_components_values {
1056 let test_params = ParameterSet {
1057 gamma: current_params.gamma,
1058 n_components,
1059 degree: current_params.degree,
1060 coef0: current_params.coef0,
1061 };
1062
1063 let score = self.evaluate_rbf_parameters(x, y, &test_params)?;
1064 if score > best_score {
1065 best_score = score;
1066 best_n_components = n_components;
1067 }
1068 }
1069
1070 Ok(ParameterSet {
1071 gamma: current_params.gamma,
1072 n_components: best_n_components,
1073 degree: current_params.degree,
1074 coef0: current_params.coef0,
1075 })
1076 }
1077}
1078
1079#[allow(non_snake_case)]
1080#[cfg(test)]
1081mod tests {
1082 use super::*;
1083 use approx::assert_abs_diff_eq;
1084
1085 #[test]
1086 fn test_parameter_learner_grid_search() {
1087 let x =
1088 Array2::from_shape_vec((50, 5), (0..250).map(|i| i as f64 * 0.01).collect()).unwrap();
1089
1090 let config = ParameterLearningConfig {
1091 search_strategy: SearchStrategy::GridSearch { n_points: 3 },
1092 parameter_bounds: ParameterBounds {
1093 gamma_bounds: (0.1, 10.0),
1094 n_components_bounds: (10, 50),
1095 ..Default::default()
1096 },
1097 objective_function: ObjectiveFunction::KernelAlignment,
1098 ..Default::default()
1099 };
1100
1101 let learner = ParameterLearner::new(config);
1102 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1103
1104 assert!(result.best_score > 0.0);
1105 assert!(result.best_parameters.gamma >= 0.1);
1106 assert!(result.best_parameters.gamma <= 10.0);
1107 assert!(result.best_parameters.n_components >= 10);
1108 assert!(result.best_parameters.n_components <= 50);
1109 assert_eq!(result.parameter_history.len(), 9); assert!(result.converged);
1111 }
1112
1113 #[test]
1114 fn test_parameter_learner_random_search() {
1115 let x =
1116 Array2::from_shape_vec((30, 4), (0..120).map(|i| i as f64 * 0.05).collect()).unwrap();
1117
1118 let config = ParameterLearningConfig {
1119 search_strategy: SearchStrategy::RandomSearch { n_samples: 5 },
1120 parameter_bounds: ParameterBounds {
1121 gamma_bounds: (0.01, 1.0),
1122 n_components_bounds: (5, 25),
1123 ..Default::default()
1124 },
1125 random_seed: Some(42),
1126 ..Default::default()
1127 };
1128
1129 let learner = ParameterLearner::new(config);
1130 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1131
1132 assert!(result.best_score > 0.0);
1133 assert_eq!(result.parameter_history.len(), 5);
1134 assert_eq!(result.n_evaluations, 5);
1135 }
1136
1137 #[test]
1138 fn test_parameter_learner_bayesian_optimization() {
1139 let x = Array2::from_shape_vec((25, 3), (0..75).map(|i| i as f64 * 0.1).collect()).unwrap();
1140
1141 let config = ParameterLearningConfig {
1142 search_strategy: SearchStrategy::BayesianOptimization {
1143 n_initial: 3,
1144 n_iterations: 2,
1145 exploration_factor: 1.0,
1146 },
1147 parameter_bounds: ParameterBounds {
1148 gamma_bounds: (0.1, 5.0),
1149 n_components_bounds: (10, 30),
1150 ..Default::default()
1151 },
1152 random_seed: Some(123),
1153 ..Default::default()
1154 };
1155
1156 let learner = ParameterLearner::new(config);
1157 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1158
1159 assert!(result.best_score > 0.0);
1160 assert_eq!(result.parameter_history.len(), 5); assert_eq!(result.n_evaluations, 5);
1162 }
1163
1164 #[test]
1165 fn test_parameter_learner_coordinate_descent() {
1166 let x =
1167 Array2::from_shape_vec((40, 6), (0..240).map(|i| i as f64 * 0.02).collect()).unwrap();
1168
1169 let config = ParameterLearningConfig {
1170 search_strategy: SearchStrategy::CoordinateDescent {
1171 max_iterations: 3,
1172 tolerance: 1e-6,
1173 },
1174 parameter_bounds: ParameterBounds {
1175 gamma_bounds: (0.1, 2.0),
1176 n_components_bounds: (15, 35),
1177 ..Default::default()
1178 },
1179 ..Default::default()
1180 };
1181
1182 let learner = ParameterLearner::new(config);
1183 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1184
1185 assert!(result.best_score > 0.0);
1186 assert!(result.parameter_history.len() >= 1);
1187 }
1189
1190 #[test]
1191 fn test_parameter_learner_nystroem() {
1192 let x =
1193 Array2::from_shape_vec((35, 4), (0..140).map(|i| i as f64 * 0.03).collect()).unwrap();
1194
1195 let config = ParameterLearningConfig {
1196 search_strategy: SearchStrategy::GridSearch { n_points: 2 },
1197 parameter_bounds: ParameterBounds {
1198 gamma_bounds: (0.5, 2.0),
1199 n_components_bounds: (10, 20),
1200 ..Default::default()
1201 },
1202 ..Default::default()
1203 };
1204
1205 let learner = ParameterLearner::new(config);
1206 let result = learner.optimize_nystroem_parameters(&x, None).unwrap();
1207
1208 assert!(result.best_score > 0.0);
1209 assert_eq!(result.parameter_history.len(), 4); }
1211
1212 #[test]
1213 fn test_cross_validation_objective() {
1214 let x =
1215 Array2::from_shape_vec((50, 3), (0..150).map(|i| i as f64 * 0.01).collect()).unwrap();
1216 let y = Array1::from_shape_fn(50, |i| (i as f64 * 0.1).sin());
1217
1218 let config = ParameterLearningConfig {
1219 search_strategy: SearchStrategy::GridSearch { n_points: 2 },
1220 objective_function: ObjectiveFunction::CrossValidationError { n_folds: 3 },
1221 ..Default::default()
1222 };
1223
1224 let learner = ParameterLearner::new(config);
1225 let result = learner.optimize_rbf_parameters(&x, Some(&y)).unwrap();
1226
1227 assert!(result.best_score >= 0.0);
1228 assert!(result.parameter_history.len() > 0);
1229 }
1230
1231 #[test]
1232 fn test_effective_rank_objective() {
1233 let x =
1234 Array2::from_shape_vec((30, 5), (0..150).map(|i| i as f64 * 0.02).collect()).unwrap();
1235
1236 let config = ParameterLearningConfig {
1237 search_strategy: SearchStrategy::RandomSearch { n_samples: 3 },
1238 objective_function: ObjectiveFunction::EffectiveRank,
1239 random_seed: Some(456),
1240 ..Default::default()
1241 };
1242
1243 let learner = ParameterLearner::new(config);
1244 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1245
1246 assert!(result.best_score > 0.0);
1247 assert_eq!(result.parameter_history.len(), 3);
1248 }
1249
1250 #[test]
1251 fn test_parameter_grid_creation() {
1252 let learner = ParameterLearner::new(ParameterLearningConfig::default());
1253
1254 let linear_grid = learner.create_parameter_grid(0.0, 10.0, 5, false);
1256 assert_eq!(linear_grid.len(), 5);
1257 assert_abs_diff_eq!(linear_grid[0], 0.0, epsilon = 1e-10);
1258 assert_abs_diff_eq!(linear_grid[4], 10.0, epsilon = 1e-10);
1259
1260 let log_grid = learner.create_parameter_grid(0.1, 10.0, 3, true);
1262 assert_eq!(log_grid.len(), 3);
1263 assert_abs_diff_eq!(log_grid[0], 0.1, epsilon = 1e-10);
1264 assert_abs_diff_eq!(log_grid[2], 10.0, epsilon = 1e-10);
1265 assert!(log_grid[1] > log_grid[0]);
1266 assert!(log_grid[1] < log_grid[2]);
1267 }
1268
1269 #[test]
1270 fn test_optimization_result() {
1271 let x = Array2::from_shape_vec((20, 3), (0..60).map(|i| i as f64 * 0.1).collect()).unwrap();
1272
1273 let config = ParameterLearningConfig {
1274 search_strategy: SearchStrategy::GridSearch { n_points: 2 },
1275 ..Default::default()
1276 };
1277
1278 let learner = ParameterLearner::new(config);
1279 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1280
1281 assert!(result.best_score > 0.0);
1283 assert!(result.best_parameters.gamma > 0.0);
1284 assert!(result.best_parameters.n_components > 0);
1285 assert!(result.converged);
1286 assert_eq!(result.n_evaluations, result.parameter_history.len());
1287
1288 let max_score = result
1290 .parameter_history
1291 .iter()
1292 .map(|(_, score)| *score)
1293 .fold(f64::NEG_INFINITY, f64::max);
1294 assert_abs_diff_eq!(result.best_score, max_score, epsilon = 1e-10);
1295 }
1296}