1use crate::{Nystroem, RBFSampler};
7use rayon::prelude::*;
8use scirs2_core::ndarray::ndarray_linalg::SVD;
9use scirs2_core::ndarray::{s, Array1, Array2};
10use scirs2_core::random::rngs::StdRng;
11use scirs2_core::random::Rng;
12use scirs2_core::random::{thread_rng, SeedableRng};
13use sklears_core::{
14 error::{Result, SklearsError},
15 traits::{Fit, Transform},
16};
17
18#[derive(Debug, Clone)]
20pub enum SearchStrategy {
22 GridSearch {
24 n_points: usize,
26 },
27 RandomSearch {
29 n_samples: usize,
31 },
32 BayesianOptimization {
34 n_initial: usize,
36 n_iterations: usize,
38 exploration_factor: f64,
40 },
41 CoordinateDescent {
43 max_iterations: usize,
45 tolerance: f64,
47 },
48}
49
50#[derive(Debug, Clone)]
52pub struct ParameterBounds {
54 pub gamma_bounds: (f64, f64),
56 pub n_components_bounds: (usize, usize),
58 pub degree_bounds: Option<(i32, i32)>,
60 pub coef0_bounds: Option<(f64, f64)>,
62}
63
64impl Default for ParameterBounds {
65 fn default() -> Self {
66 Self {
67 gamma_bounds: (1e-6, 1e2),
68 n_components_bounds: (10, 1000),
69 degree_bounds: Some((2, 5)),
70 coef0_bounds: Some((-1.0, 1.0)),
71 }
72 }
73}
74
75#[derive(Debug, Clone)]
77pub enum ObjectiveFunction {
79 KernelAlignment,
81 CrossValidationError { n_folds: usize },
83 ApproximationQuality,
85 EffectiveRank,
87 Custom,
89}
90
91#[derive(Debug, Clone)]
93pub struct ParameterLearningConfig {
95 pub search_strategy: SearchStrategy,
97 pub parameter_bounds: ParameterBounds,
99 pub objective_function: ObjectiveFunction,
101 pub validation_fraction: f64,
103 pub random_seed: Option<u64>,
105 pub n_jobs: usize,
107 pub verbose: bool,
109}
110
111impl Default for ParameterLearningConfig {
112 fn default() -> Self {
113 Self {
114 search_strategy: SearchStrategy::GridSearch { n_points: 10 },
115 parameter_bounds: ParameterBounds::default(),
116 objective_function: ObjectiveFunction::KernelAlignment,
117 validation_fraction: 0.2,
118 random_seed: None,
119 n_jobs: num_cpus::get(),
120 verbose: false,
121 }
122 }
123}
124
125#[derive(Debug, Clone)]
127pub struct ParameterSet {
129 pub gamma: f64,
131 pub n_components: usize,
133 pub degree: Option<i32>,
135 pub coef0: Option<f64>,
137}
138
139impl PartialEq for ParameterSet {
140 fn eq(&self, other: &Self) -> bool {
141 (self.gamma - other.gamma).abs() < f64::EPSILON
142 && self.n_components == other.n_components
143 && self.degree == other.degree
144 && match (self.coef0, other.coef0) {
145 (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON,
146 (None, None) => true,
147 _ => false,
148 }
149 }
150}
151
152impl Eq for ParameterSet {}
153
154impl std::hash::Hash for ParameterSet {
155 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
156 self.gamma.to_bits().hash(state);
158 self.n_components.hash(state);
159 self.degree.hash(state);
160 if let Some(coef0) = self.coef0 {
161 coef0.to_bits().hash(state);
162 }
163 }
164}
165
166#[derive(Debug, Clone)]
168pub struct OptimizationResult {
170 pub best_parameters: ParameterSet,
172 pub best_score: f64,
174 pub parameter_history: Vec<(ParameterSet, f64)>,
176 pub converged: bool,
178 pub n_evaluations: usize,
180}
181
182pub struct ParameterLearner {
184 config: ParameterLearningConfig,
185}
186
187impl ParameterLearner {
188 pub fn new(config: ParameterLearningConfig) -> Self {
190 Self { config }
191 }
192
193 pub fn optimize_rbf_parameters(
195 &self,
196 x: &Array2<f64>,
197 y: Option<&Array1<f64>>,
198 ) -> Result<OptimizationResult> {
199 match &self.config.search_strategy {
200 SearchStrategy::GridSearch { n_points } => self.grid_search_rbf(x, y, *n_points),
201 SearchStrategy::RandomSearch { n_samples } => self.random_search_rbf(x, y, *n_samples),
202 SearchStrategy::BayesianOptimization {
203 n_initial,
204 n_iterations,
205 exploration_factor,
206 } => {
207 self.bayesian_optimization_rbf(x, y, *n_initial, *n_iterations, *exploration_factor)
208 }
209 SearchStrategy::CoordinateDescent {
210 max_iterations,
211 tolerance,
212 } => self.coordinate_descent_rbf(x, y, *max_iterations, *tolerance),
213 }
214 }
215
216 pub fn optimize_nystroem_parameters(
218 &self,
219 x: &Array2<f64>,
220 y: Option<&Array1<f64>>,
221 ) -> Result<OptimizationResult> {
222 match &self.config.search_strategy {
224 SearchStrategy::GridSearch { n_points } => self.grid_search_nystroem(x, y, *n_points),
225 SearchStrategy::RandomSearch { n_samples } => {
226 self.random_search_nystroem(x, y, *n_samples)
227 }
228 _ => {
229 self.grid_search_nystroem(x, y, 10)
231 }
232 }
233 }
234
235 fn grid_search_rbf(
236 &self,
237 x: &Array2<f64>,
238 y: Option<&Array1<f64>>,
239 n_points: usize,
240 ) -> Result<OptimizationResult> {
241 let gamma_values = self.create_parameter_grid(
242 self.config.parameter_bounds.gamma_bounds.0,
243 self.config.parameter_bounds.gamma_bounds.1,
244 n_points,
245 true, );
247
248 let n_components_values = self
249 .create_parameter_grid(
250 self.config.parameter_bounds.n_components_bounds.0 as f64,
251 self.config.parameter_bounds.n_components_bounds.1 as f64,
252 n_points,
253 false, )
255 .into_iter()
256 .map(|x| x as usize)
257 .collect::<Vec<_>>();
258
259 let mut parameter_history = Vec::new();
260 let mut best_score = f64::NEG_INFINITY;
261 let mut best_parameters = ParameterSet {
262 gamma: gamma_values[0],
263 n_components: n_components_values[0],
264 degree: None,
265 coef0: None,
266 };
267
268 let parameter_combinations: Vec<_> = gamma_values
270 .iter()
271 .flat_map(|&gamma| {
272 n_components_values
273 .iter()
274 .map(move |&n_components| ParameterSet {
275 gamma,
276 n_components,
277 degree: None,
278 coef0: None,
279 })
280 })
281 .collect();
282
283 if self.config.verbose {
284 println!(
285 "Evaluating {} parameter combinations",
286 parameter_combinations.len()
287 );
288 }
289
290 let scores: Result<Vec<_>> = parameter_combinations
292 .par_iter()
293 .map(|params| {
294 let score = self.evaluate_rbf_parameters(x, y, params)?;
295 Ok((params.clone(), score))
296 })
297 .collect();
298
299 let scores = scores?;
300
301 for (params, score) in scores {
302 parameter_history.push((params.clone(), score));
303
304 if score > best_score {
305 best_score = score;
306 best_parameters = params;
307 }
308 }
309
310 let n_evaluations = parameter_history.len();
311 Ok(OptimizationResult {
312 best_parameters,
313 best_score,
314 parameter_history,
315 converged: true,
316 n_evaluations,
317 })
318 }
319
320 fn random_search_rbf(
321 &self,
322 x: &Array2<f64>,
323 y: Option<&Array1<f64>>,
324 n_samples: usize,
325 ) -> Result<OptimizationResult> {
326 let mut rng = if let Some(seed) = self.config.random_seed {
327 StdRng::seed_from_u64(seed)
328 } else {
329 StdRng::from_seed(thread_rng().gen())
330 };
331
332 let mut parameter_history = Vec::new();
333 let mut best_score = f64::NEG_INFINITY;
334 let mut best_parameters = ParameterSet {
335 gamma: 1.0,
336 n_components: 100,
337 degree: None,
338 coef0: None,
339 };
340
341 for _ in 0..n_samples {
342 let gamma = self.sample_log_uniform(
344 &mut rng,
345 self.config.parameter_bounds.gamma_bounds.0,
346 self.config.parameter_bounds.gamma_bounds.1,
347 );
348
349 let n_components = rng.gen_range(
350 self.config.parameter_bounds.n_components_bounds.0
351 ..=self.config.parameter_bounds.n_components_bounds.1,
352 );
353
354 let params = ParameterSet {
355 gamma,
356 n_components,
357 degree: None,
358 coef0: None,
359 };
360
361 let score = self.evaluate_rbf_parameters(x, y, ¶ms)?;
362 parameter_history.push((params.clone(), score));
363
364 if score > best_score {
365 best_score = score;
366 best_parameters = params;
367 }
368 }
369
370 Ok(OptimizationResult {
371 best_parameters,
372 best_score,
373 parameter_history,
374 converged: true,
375 n_evaluations: n_samples,
376 })
377 }
378
379 fn bayesian_optimization_rbf(
380 &self,
381 x: &Array2<f64>,
382 y: Option<&Array1<f64>>,
383 n_initial: usize,
384 n_iterations: usize,
385 exploration_factor: f64,
386 ) -> Result<OptimizationResult> {
387 let mut rng = if let Some(seed) = self.config.random_seed {
389 StdRng::seed_from_u64(seed)
390 } else {
391 StdRng::from_seed(thread_rng().gen())
392 };
393
394 let mut parameter_history = Vec::new();
395 let mut best_score = f64::NEG_INFINITY;
396 let mut best_parameters = ParameterSet {
397 gamma: 1.0,
398 n_components: 100,
399 degree: None,
400 coef0: None,
401 };
402
403 for _ in 0..n_initial {
405 let gamma = self.sample_log_uniform(
406 &mut rng,
407 self.config.parameter_bounds.gamma_bounds.0,
408 self.config.parameter_bounds.gamma_bounds.1,
409 );
410
411 let n_components = rng.gen_range(
412 self.config.parameter_bounds.n_components_bounds.0
413 ..=self.config.parameter_bounds.n_components_bounds.1,
414 );
415
416 let params = ParameterSet {
417 gamma,
418 n_components,
419 degree: None,
420 coef0: None,
421 };
422
423 let score = self.evaluate_rbf_parameters(x, y, ¶ms)?;
424 parameter_history.push((params.clone(), score));
425
426 if score > best_score {
427 best_score = score;
428 best_parameters = params;
429 }
430 }
431
432 for iteration in 0..n_iterations {
434 let next_params =
436 self.acquisition_function_rbf(¶meter_history, exploration_factor, &mut rng);
437
438 let score = self.evaluate_rbf_parameters(x, y, &next_params)?;
439 parameter_history.push((next_params.clone(), score));
440
441 if score > best_score {
442 best_score = score;
443 best_parameters = next_params;
444 }
445
446 if self.config.verbose {
447 println!(
448 "Iteration {}: Best score = {:.6}",
449 iteration + 1,
450 best_score
451 );
452 }
453 }
454
455 Ok(OptimizationResult {
456 best_parameters,
457 best_score,
458 parameter_history,
459 converged: true,
460 n_evaluations: n_initial + n_iterations,
461 })
462 }
463
464 fn coordinate_descent_rbf(
465 &self,
466 x: &Array2<f64>,
467 y: Option<&Array1<f64>>,
468 max_iterations: usize,
469 tolerance: f64,
470 ) -> Result<OptimizationResult> {
471 let mut current_params = ParameterSet {
472 gamma: (self.config.parameter_bounds.gamma_bounds.0
473 * self.config.parameter_bounds.gamma_bounds.1)
474 .sqrt(),
475 n_components: (self.config.parameter_bounds.n_components_bounds.0
476 + self.config.parameter_bounds.n_components_bounds.1)
477 / 2,
478 degree: None,
479 coef0: None,
480 };
481
482 let mut current_score = self.evaluate_rbf_parameters(x, y, ¤t_params)?;
483 let mut parameter_history = vec![(current_params.clone(), current_score)];
484 let mut converged = false;
485
486 for iteration in 0..max_iterations {
487 let prev_score = current_score;
488
489 current_params = self.optimize_gamma_coordinate(x, y, ¤t_params)?;
491 current_score = self.evaluate_rbf_parameters(x, y, ¤t_params)?;
492 parameter_history.push((current_params.clone(), current_score));
493
494 current_params = self.optimize_n_components_coordinate(x, y, ¤t_params)?;
496 current_score = self.evaluate_rbf_parameters(x, y, ¤t_params)?;
497 parameter_history.push((current_params.clone(), current_score));
498
499 if (current_score - prev_score).abs() < tolerance {
501 converged = true;
502 if self.config.verbose {
503 println!(
504 "Converged at iteration {} with score {:.6}",
505 iteration + 1,
506 current_score
507 );
508 }
509 break;
510 }
511
512 if self.config.verbose {
513 println!("Iteration {}: Score = {:.6}", iteration + 1, current_score);
514 }
515 }
516
517 let n_evaluations = parameter_history.len();
518 Ok(OptimizationResult {
519 best_parameters: current_params,
520 best_score: current_score,
521 parameter_history,
522 converged,
523 n_evaluations,
524 })
525 }
526
527 fn grid_search_nystroem(
528 &self,
529 x: &Array2<f64>,
530 y: Option<&Array1<f64>>,
531 n_points: usize,
532 ) -> Result<OptimizationResult> {
533 let gamma_values = self.create_parameter_grid(
535 self.config.parameter_bounds.gamma_bounds.0,
536 self.config.parameter_bounds.gamma_bounds.1,
537 n_points,
538 true,
539 );
540
541 let n_components_values = self
542 .create_parameter_grid(
543 self.config.parameter_bounds.n_components_bounds.0 as f64,
544 self.config.parameter_bounds.n_components_bounds.1 as f64,
545 n_points,
546 false,
547 )
548 .into_iter()
549 .map(|x| x as usize)
550 .collect::<Vec<_>>();
551
552 let mut parameter_history = Vec::new();
553 let mut best_score = f64::NEG_INFINITY;
554 let mut best_parameters = ParameterSet {
555 gamma: gamma_values[0],
556 n_components: n_components_values[0],
557 degree: None,
558 coef0: None,
559 };
560
561 for &gamma in &gamma_values {
562 for &n_components in &n_components_values {
563 let params = ParameterSet {
564 gamma,
565 n_components,
566 degree: None,
567 coef0: None,
568 };
569
570 let score = self.evaluate_nystroem_parameters(x, y, ¶ms)?;
571 parameter_history.push((params.clone(), score));
572
573 if score > best_score {
574 best_score = score;
575 best_parameters = params;
576 }
577 }
578 }
579
580 let n_evaluations = parameter_history.len();
581 Ok(OptimizationResult {
582 best_parameters,
583 best_score,
584 parameter_history,
585 converged: true,
586 n_evaluations,
587 })
588 }
589
590 fn random_search_nystroem(
591 &self,
592 x: &Array2<f64>,
593 y: Option<&Array1<f64>>,
594 n_samples: usize,
595 ) -> Result<OptimizationResult> {
596 let mut rng = if let Some(seed) = self.config.random_seed {
597 StdRng::seed_from_u64(seed)
598 } else {
599 StdRng::from_seed(thread_rng().gen())
600 };
601
602 let mut parameter_history = Vec::new();
603 let mut best_score = f64::NEG_INFINITY;
604 let mut best_parameters = ParameterSet {
605 gamma: 1.0,
606 n_components: 100,
607 degree: None,
608 coef0: None,
609 };
610
611 for _ in 0..n_samples {
612 let gamma = self.sample_log_uniform(
613 &mut rng,
614 self.config.parameter_bounds.gamma_bounds.0,
615 self.config.parameter_bounds.gamma_bounds.1,
616 );
617
618 let n_components = rng.gen_range(
619 self.config.parameter_bounds.n_components_bounds.0
620 ..=self.config.parameter_bounds.n_components_bounds.1,
621 );
622
623 let params = ParameterSet {
624 gamma,
625 n_components,
626 degree: None,
627 coef0: None,
628 };
629
630 let score = self.evaluate_nystroem_parameters(x, y, ¶ms)?;
631 parameter_history.push((params.clone(), score));
632
633 if score > best_score {
634 best_score = score;
635 best_parameters = params;
636 }
637 }
638
639 Ok(OptimizationResult {
640 best_parameters,
641 best_score,
642 parameter_history,
643 converged: true,
644 n_evaluations: n_samples,
645 })
646 }
647
648 fn evaluate_rbf_parameters(
649 &self,
650 x: &Array2<f64>,
651 y: Option<&Array1<f64>>,
652 params: &ParameterSet,
653 ) -> Result<f64> {
654 let sampler = RBFSampler::new(params.n_components).gamma(params.gamma);
655 let fitted = sampler.fit(x, &())?;
656 let x_transformed = fitted.transform(x)?;
657
658 match &self.config.objective_function {
659 ObjectiveFunction::KernelAlignment => {
660 self.compute_kernel_alignment(x, &x_transformed, params.gamma)
661 }
662 ObjectiveFunction::CrossValidationError { n_folds } => {
663 if let Some(y_data) = y {
664 self.compute_cross_validation_score(&x_transformed, y_data, *n_folds)
665 } else {
666 self.compute_kernel_alignment(x, &x_transformed, params.gamma)
668 }
669 }
670 ObjectiveFunction::ApproximationQuality => {
671 self.compute_approximation_quality(x, &x_transformed, params.gamma)
672 }
673 ObjectiveFunction::EffectiveRank => self.compute_effective_rank(&x_transformed),
674 ObjectiveFunction::Custom => {
675 Ok(0.0)
677 }
678 }
679 }
680
681 fn evaluate_nystroem_parameters(
682 &self,
683 x: &Array2<f64>,
684 y: Option<&Array1<f64>>,
685 params: &ParameterSet,
686 ) -> Result<f64> {
687 use crate::nystroem::Kernel;
688
689 let kernel = Kernel::Rbf {
690 gamma: params.gamma,
691 };
692 let nystroem = Nystroem::new(kernel, params.n_components);
693 let fitted = nystroem.fit(x, &())?;
694 let x_transformed = fitted.transform(x)?;
695
696 match &self.config.objective_function {
697 ObjectiveFunction::KernelAlignment => {
698 self.compute_kernel_alignment(x, &x_transformed, params.gamma)
699 }
700 ObjectiveFunction::CrossValidationError { n_folds } => {
701 if let Some(y_data) = y {
702 self.compute_cross_validation_score(&x_transformed, y_data, *n_folds)
703 } else {
704 self.compute_kernel_alignment(x, &x_transformed, params.gamma)
705 }
706 }
707 ObjectiveFunction::ApproximationQuality => {
708 self.compute_approximation_quality(x, &x_transformed, params.gamma)
709 }
710 ObjectiveFunction::EffectiveRank => self.compute_effective_rank(&x_transformed),
711 ObjectiveFunction::Custom => Ok(0.0),
712 }
713 }
714
715 fn compute_kernel_alignment(
716 &self,
717 x: &Array2<f64>,
718 x_transformed: &Array2<f64>,
719 gamma: f64,
720 ) -> Result<f64> {
721 let n_samples = x.nrows().min(100); let x_subset = x.slice(s![..n_samples, ..]);
723
724 let mut k_exact = Array2::zeros((n_samples, n_samples));
726 for i in 0..n_samples {
727 for j in 0..n_samples {
728 let diff = &x_subset.row(i) - &x_subset.row(j);
729 let squared_norm = diff.dot(&diff);
730 k_exact[[i, j]] = (-gamma * squared_norm).exp();
731 }
732 }
733
734 let x_transformed_subset = x_transformed.slice(s![..n_samples, ..]);
736 let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
737
738 let k_exact_frobenius = k_exact.iter().map(|&x| x * x).sum::<f64>().sqrt();
740 let k_approx_frobenius = k_approx.iter().map(|&x| x * x).sum::<f64>().sqrt();
741 let k_product = (&k_exact * &k_approx).sum();
742
743 let alignment = k_product / (k_exact_frobenius * k_approx_frobenius);
744 Ok(alignment)
745 }
746
747 fn compute_cross_validation_score(
748 &self,
749 x_transformed: &Array2<f64>,
750 y: &Array1<f64>,
751 n_folds: usize,
752 ) -> Result<f64> {
753 let n_samples = x_transformed.nrows();
754 let fold_size = n_samples / n_folds;
755 let mut cv_scores = Vec::new();
756
757 for fold in 0..n_folds {
758 let start = fold * fold_size;
759 let end = if fold == n_folds - 1 {
760 n_samples
761 } else {
762 (fold + 1) * fold_size
763 };
764
765 let val_features = x_transformed.slice(s![start..end, ..]);
767 let val_targets = y.slice(s![start..end]);
768
769 let mut correlations = Vec::new();
771 for j in 0..val_features.ncols() {
772 let feature_col = val_features.column(j);
773 let correlation =
774 self.compute_correlation(feature_col.into_owned().view(), val_targets);
775 correlations.push(correlation.abs());
776 }
777
778 let mean_correlation = correlations.iter().sum::<f64>() / correlations.len() as f64;
779 cv_scores.push(mean_correlation);
780 }
781
782 Ok(cv_scores.iter().sum::<f64>() / cv_scores.len() as f64)
783 }
784
785 fn compute_approximation_quality(
786 &self,
787 x: &Array2<f64>,
788 x_transformed: &Array2<f64>,
789 gamma: f64,
790 ) -> Result<f64> {
791 let n_samples = x.nrows().min(50); let x_subset = x.slice(s![..n_samples, ..]);
794 let x_transformed_subset = x_transformed.slice(s![..n_samples, ..]);
795
796 let mut k_exact = Array2::zeros((n_samples, n_samples));
798 for i in 0..n_samples {
799 for j in 0..n_samples {
800 let diff = &x_subset.row(i) - &x_subset.row(j);
801 let squared_norm = diff.dot(&diff);
802 k_exact[[i, j]] = (-gamma * squared_norm).exp();
803 }
804 }
805
806 let k_approx = x_transformed_subset.dot(&x_transformed_subset.t());
808
809 let error_matrix = &k_exact - &k_approx;
811 let frobenius_error = error_matrix.iter().map(|&x| x * x).sum::<f64>().sqrt();
812 let exact_frobenius = k_exact.iter().map(|&x| x * x).sum::<f64>().sqrt();
813
814 let relative_error = frobenius_error / exact_frobenius;
815 Ok(1.0 / (1.0 + relative_error)) }
817
818 fn compute_effective_rank(&self, x_transformed: &Array2<f64>) -> Result<f64> {
819 let (_, s, _) = x_transformed
821 .svd(true, true)
822 .map_err(|_| SklearsError::InvalidInput("SVD computation failed".to_string()))?;
823
824 let s_sum = s.sum();
826 if s_sum == 0.0 {
827 return Ok(0.0);
828 }
829
830 let s_normalized = &s / s_sum;
831 let entropy = -s_normalized
832 .iter()
833 .filter(|&&x| x > 1e-12)
834 .map(|&x| x * x.ln())
835 .sum::<f64>();
836
837 Ok(entropy.exp())
838 }
839
840 fn compute_correlation(
841 &self,
842 x: scirs2_core::ndarray::ArrayView1<f64>,
843 y: scirs2_core::ndarray::ArrayView1<f64>,
844 ) -> f64 {
845 let x_mean = x.mean().unwrap_or(0.0);
846 let y_mean = y.mean().unwrap_or(0.0);
847
848 let numerator: f64 = x
849 .iter()
850 .zip(y.iter())
851 .map(|(&xi, &yi)| (xi - x_mean) * (yi - y_mean))
852 .sum();
853
854 let x_var: f64 = x.iter().map(|&xi| (xi - x_mean).powi(2)).sum();
855 let y_var: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
856
857 let denominator = (x_var * y_var).sqrt();
858
859 if denominator == 0.0 {
860 0.0
861 } else {
862 numerator / denominator
863 }
864 }
865
866 fn create_parameter_grid(
868 &self,
869 min_val: f64,
870 max_val: f64,
871 n_points: usize,
872 log_scale: bool,
873 ) -> Vec<f64> {
874 if log_scale {
875 let log_min = min_val.ln();
876 let log_max = max_val.ln();
877 (0..n_points)
878 .map(|i| {
879 let t = i as f64 / (n_points - 1) as f64;
880 (log_min + t * (log_max - log_min)).exp()
881 })
882 .collect()
883 } else {
884 (0..n_points)
885 .map(|i| {
886 let t = i as f64 / (n_points - 1) as f64;
887 min_val + t * (max_val - min_val)
888 })
889 .collect()
890 }
891 }
892
893 fn sample_log_uniform(&self, rng: &mut StdRng, min_val: f64, max_val: f64) -> f64 {
894 let log_min = min_val.ln();
895 let log_max = max_val.ln();
896 let log_val = rng.gen_range(log_min..log_max);
897 log_val.exp()
898 }
899
900 fn acquisition_function_rbf(
901 &self,
902 parameter_history: &[(ParameterSet, f64)],
903 exploration_factor: f64,
904 rng: &mut StdRng,
905 ) -> ParameterSet {
906 let _best_score = parameter_history
910 .iter()
911 .map(|(_, score)| *score)
912 .fold(f64::NEG_INFINITY, f64::max);
913
914 let mut best_acquisition = f64::NEG_INFINITY;
916 let mut best_candidate = parameter_history[0].0.clone();
917
918 for _ in 0..100 {
919 let gamma = self.sample_log_uniform(
921 rng,
922 self.config.parameter_bounds.gamma_bounds.0,
923 self.config.parameter_bounds.gamma_bounds.1,
924 );
925
926 let n_components = rng.gen_range(
927 self.config.parameter_bounds.n_components_bounds.0
928 ..=self.config.parameter_bounds.n_components_bounds.1,
929 );
930
931 let candidate = ParameterSet {
932 gamma,
933 n_components,
934 degree: None,
935 coef0: None,
936 };
937
938 let mean_score = self.predict_score_from_history(parameter_history, &candidate);
940 let uncertainty =
941 exploration_factor * self.estimate_uncertainty(parameter_history, &candidate);
942 let acquisition = mean_score + uncertainty;
943
944 if acquisition > best_acquisition {
945 best_acquisition = acquisition;
946 best_candidate = candidate;
947 }
948 }
949
950 best_candidate
951 }
952
953 fn predict_score_from_history(
954 &self,
955 parameter_history: &[(ParameterSet, f64)],
956 candidate: &ParameterSet,
957 ) -> f64 {
958 let mut weighted_sum = 0.0;
960 let mut weight_sum = 0.0;
961
962 for (params, score) in parameter_history {
963 let distance = self.parameter_distance(candidate, params);
964 let weight = (-distance).exp();
965 weighted_sum += weight * score;
966 weight_sum += weight;
967 }
968
969 if weight_sum > 0.0 {
970 weighted_sum / weight_sum
971 } else {
972 0.0
973 }
974 }
975
976 fn estimate_uncertainty(
977 &self,
978 parameter_history: &[(ParameterSet, f64)],
979 candidate: &ParameterSet,
980 ) -> f64 {
981 let min_distance = parameter_history
983 .iter()
984 .map(|(params, _)| self.parameter_distance(candidate, params))
985 .fold(f64::INFINITY, f64::min);
986
987 min_distance
988 }
989
990 fn parameter_distance(&self, p1: &ParameterSet, p2: &ParameterSet) -> f64 {
991 let gamma_diff = (p1.gamma.ln() - p2.gamma.ln()).powi(2);
992 let n_components_diff =
993 ((p1.n_components as f64).ln() - (p2.n_components as f64).ln()).powi(2);
994 (gamma_diff + n_components_diff).sqrt()
995 }
996
997 fn optimize_gamma_coordinate(
998 &self,
999 x: &Array2<f64>,
1000 y: Option<&Array1<f64>>,
1001 current_params: &ParameterSet,
1002 ) -> Result<ParameterSet> {
1003 let gamma_values = self.create_parameter_grid(
1004 self.config.parameter_bounds.gamma_bounds.0,
1005 self.config.parameter_bounds.gamma_bounds.1,
1006 10,
1007 true,
1008 );
1009
1010 let mut best_gamma = current_params.gamma;
1011 let mut best_score = f64::NEG_INFINITY;
1012
1013 for &gamma in &gamma_values {
1014 let test_params = ParameterSet {
1015 gamma,
1016 n_components: current_params.n_components,
1017 degree: current_params.degree,
1018 coef0: current_params.coef0,
1019 };
1020
1021 let score = self.evaluate_rbf_parameters(x, y, &test_params)?;
1022 if score > best_score {
1023 best_score = score;
1024 best_gamma = gamma;
1025 }
1026 }
1027
1028 Ok(ParameterSet {
1029 gamma: best_gamma,
1030 n_components: current_params.n_components,
1031 degree: current_params.degree,
1032 coef0: current_params.coef0,
1033 })
1034 }
1035
1036 fn optimize_n_components_coordinate(
1037 &self,
1038 x: &Array2<f64>,
1039 y: Option<&Array1<f64>>,
1040 current_params: &ParameterSet,
1041 ) -> Result<ParameterSet> {
1042 let n_components_values = self
1043 .create_parameter_grid(
1044 self.config.parameter_bounds.n_components_bounds.0 as f64,
1045 self.config.parameter_bounds.n_components_bounds.1 as f64,
1046 10,
1047 false,
1048 )
1049 .into_iter()
1050 .map(|x| x as usize)
1051 .collect::<Vec<_>>();
1052
1053 let mut best_n_components = current_params.n_components;
1054 let mut best_score = f64::NEG_INFINITY;
1055
1056 for &n_components in &n_components_values {
1057 let test_params = ParameterSet {
1058 gamma: current_params.gamma,
1059 n_components,
1060 degree: current_params.degree,
1061 coef0: current_params.coef0,
1062 };
1063
1064 let score = self.evaluate_rbf_parameters(x, y, &test_params)?;
1065 if score > best_score {
1066 best_score = score;
1067 best_n_components = n_components;
1068 }
1069 }
1070
1071 Ok(ParameterSet {
1072 gamma: current_params.gamma,
1073 n_components: best_n_components,
1074 degree: current_params.degree,
1075 coef0: current_params.coef0,
1076 })
1077 }
1078}
1079
1080#[allow(non_snake_case)]
1081#[cfg(test)]
1082mod tests {
1083 use super::*;
1084 use approx::assert_abs_diff_eq;
1085
1086 #[test]
1087 fn test_parameter_learner_grid_search() {
1088 let x =
1089 Array2::from_shape_vec((50, 5), (0..250).map(|i| i as f64 * 0.01).collect()).unwrap();
1090
1091 let config = ParameterLearningConfig {
1092 search_strategy: SearchStrategy::GridSearch { n_points: 3 },
1093 parameter_bounds: ParameterBounds {
1094 gamma_bounds: (0.1, 10.0),
1095 n_components_bounds: (10, 50),
1096 ..Default::default()
1097 },
1098 objective_function: ObjectiveFunction::KernelAlignment,
1099 ..Default::default()
1100 };
1101
1102 let learner = ParameterLearner::new(config);
1103 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1104
1105 assert!(result.best_score > 0.0);
1106 assert!(result.best_parameters.gamma >= 0.1);
1107 assert!(result.best_parameters.gamma <= 10.0);
1108 assert!(result.best_parameters.n_components >= 10);
1109 assert!(result.best_parameters.n_components <= 50);
1110 assert_eq!(result.parameter_history.len(), 9); assert!(result.converged);
1112 }
1113
1114 #[test]
1115 fn test_parameter_learner_random_search() {
1116 let x =
1117 Array2::from_shape_vec((30, 4), (0..120).map(|i| i as f64 * 0.05).collect()).unwrap();
1118
1119 let config = ParameterLearningConfig {
1120 search_strategy: SearchStrategy::RandomSearch { n_samples: 5 },
1121 parameter_bounds: ParameterBounds {
1122 gamma_bounds: (0.01, 1.0),
1123 n_components_bounds: (5, 25),
1124 ..Default::default()
1125 },
1126 random_seed: Some(42),
1127 ..Default::default()
1128 };
1129
1130 let learner = ParameterLearner::new(config);
1131 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1132
1133 assert!(result.best_score > 0.0);
1134 assert_eq!(result.parameter_history.len(), 5);
1135 assert_eq!(result.n_evaluations, 5);
1136 }
1137
1138 #[test]
1139 fn test_parameter_learner_bayesian_optimization() {
1140 let x = Array2::from_shape_vec((25, 3), (0..75).map(|i| i as f64 * 0.1).collect()).unwrap();
1141
1142 let config = ParameterLearningConfig {
1143 search_strategy: SearchStrategy::BayesianOptimization {
1144 n_initial: 3,
1145 n_iterations: 2,
1146 exploration_factor: 1.0,
1147 },
1148 parameter_bounds: ParameterBounds {
1149 gamma_bounds: (0.1, 5.0),
1150 n_components_bounds: (10, 30),
1151 ..Default::default()
1152 },
1153 random_seed: Some(123),
1154 ..Default::default()
1155 };
1156
1157 let learner = ParameterLearner::new(config);
1158 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1159
1160 assert!(result.best_score > 0.0);
1161 assert_eq!(result.parameter_history.len(), 5); assert_eq!(result.n_evaluations, 5);
1163 }
1164
1165 #[test]
1166 fn test_parameter_learner_coordinate_descent() {
1167 let x =
1168 Array2::from_shape_vec((40, 6), (0..240).map(|i| i as f64 * 0.02).collect()).unwrap();
1169
1170 let config = ParameterLearningConfig {
1171 search_strategy: SearchStrategy::CoordinateDescent {
1172 max_iterations: 3,
1173 tolerance: 1e-6,
1174 },
1175 parameter_bounds: ParameterBounds {
1176 gamma_bounds: (0.1, 2.0),
1177 n_components_bounds: (15, 35),
1178 ..Default::default()
1179 },
1180 ..Default::default()
1181 };
1182
1183 let learner = ParameterLearner::new(config);
1184 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1185
1186 assert!(result.best_score > 0.0);
1187 assert!(result.parameter_history.len() >= 1);
1188 }
1190
1191 #[test]
1192 fn test_parameter_learner_nystroem() {
1193 let x =
1194 Array2::from_shape_vec((35, 4), (0..140).map(|i| i as f64 * 0.03).collect()).unwrap();
1195
1196 let config = ParameterLearningConfig {
1197 search_strategy: SearchStrategy::GridSearch { n_points: 2 },
1198 parameter_bounds: ParameterBounds {
1199 gamma_bounds: (0.5, 2.0),
1200 n_components_bounds: (10, 20),
1201 ..Default::default()
1202 },
1203 ..Default::default()
1204 };
1205
1206 let learner = ParameterLearner::new(config);
1207 let result = learner.optimize_nystroem_parameters(&x, None).unwrap();
1208
1209 assert!(result.best_score > 0.0);
1210 assert_eq!(result.parameter_history.len(), 4); }
1212
1213 #[test]
1214 fn test_cross_validation_objective() {
1215 let x =
1216 Array2::from_shape_vec((50, 3), (0..150).map(|i| i as f64 * 0.01).collect()).unwrap();
1217 let y = Array1::from_shape_fn(50, |i| (i as f64 * 0.1).sin());
1218
1219 let config = ParameterLearningConfig {
1220 search_strategy: SearchStrategy::GridSearch { n_points: 2 },
1221 objective_function: ObjectiveFunction::CrossValidationError { n_folds: 3 },
1222 ..Default::default()
1223 };
1224
1225 let learner = ParameterLearner::new(config);
1226 let result = learner.optimize_rbf_parameters(&x, Some(&y)).unwrap();
1227
1228 assert!(result.best_score >= 0.0);
1229 assert!(result.parameter_history.len() > 0);
1230 }
1231
1232 #[test]
1233 fn test_effective_rank_objective() {
1234 let x =
1235 Array2::from_shape_vec((30, 5), (0..150).map(|i| i as f64 * 0.02).collect()).unwrap();
1236
1237 let config = ParameterLearningConfig {
1238 search_strategy: SearchStrategy::RandomSearch { n_samples: 3 },
1239 objective_function: ObjectiveFunction::EffectiveRank,
1240 random_seed: Some(456),
1241 ..Default::default()
1242 };
1243
1244 let learner = ParameterLearner::new(config);
1245 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1246
1247 assert!(result.best_score > 0.0);
1248 assert_eq!(result.parameter_history.len(), 3);
1249 }
1250
1251 #[test]
1252 fn test_parameter_grid_creation() {
1253 let learner = ParameterLearner::new(ParameterLearningConfig::default());
1254
1255 let linear_grid = learner.create_parameter_grid(0.0, 10.0, 5, false);
1257 assert_eq!(linear_grid.len(), 5);
1258 assert_abs_diff_eq!(linear_grid[0], 0.0, epsilon = 1e-10);
1259 assert_abs_diff_eq!(linear_grid[4], 10.0, epsilon = 1e-10);
1260
1261 let log_grid = learner.create_parameter_grid(0.1, 10.0, 3, true);
1263 assert_eq!(log_grid.len(), 3);
1264 assert_abs_diff_eq!(log_grid[0], 0.1, epsilon = 1e-10);
1265 assert_abs_diff_eq!(log_grid[2], 10.0, epsilon = 1e-10);
1266 assert!(log_grid[1] > log_grid[0]);
1267 assert!(log_grid[1] < log_grid[2]);
1268 }
1269
1270 #[test]
1271 fn test_optimization_result() {
1272 let x = Array2::from_shape_vec((20, 3), (0..60).map(|i| i as f64 * 0.1).collect()).unwrap();
1273
1274 let config = ParameterLearningConfig {
1275 search_strategy: SearchStrategy::GridSearch { n_points: 2 },
1276 ..Default::default()
1277 };
1278
1279 let learner = ParameterLearner::new(config);
1280 let result = learner.optimize_rbf_parameters(&x, None).unwrap();
1281
1282 assert!(result.best_score > 0.0);
1284 assert!(result.best_parameters.gamma > 0.0);
1285 assert!(result.best_parameters.n_components > 0);
1286 assert!(result.converged);
1287 assert_eq!(result.n_evaluations, result.parameter_history.len());
1288
1289 let max_score = result
1291 .parameter_history
1292 .iter()
1293 .map(|(_, score)| *score)
1294 .fold(f64::NEG_INFINITY, f64::max);
1295 assert_abs_diff_eq!(result.best_score, max_score, epsilon = 1e-10);
1296 }
1297}