1use rayon::prelude::*;
8use scirs2_core::ndarray::{s, Array1, Array2, ArrayView2};
9use sklears_core::error::Result;
10
11#[derive(Clone, Debug)]
13pub struct GradientConfig {
15 pub learning_rate: f64,
17 pub max_iterations: usize,
19 pub tolerance: f64,
21 pub momentum: f64,
23 pub l2_regularization: f64,
25 pub adaptive_learning_rate: bool,
27 pub learning_rate_decay: f64,
29 pub min_learning_rate: f64,
31 pub batch_size: usize,
33}
34
35impl Default for GradientConfig {
36 fn default() -> Self {
37 Self {
38 learning_rate: 0.01,
39 max_iterations: 1000,
40 tolerance: 1e-6,
41 momentum: 0.9,
42 l2_regularization: 1e-4,
43 adaptive_learning_rate: true,
44 learning_rate_decay: 0.99,
45 min_learning_rate: 1e-6,
46 batch_size: 256,
47 }
48 }
49}
50
51#[derive(Clone, Debug, PartialEq)]
53pub enum GradientOptimizer {
55 SGD,
57 Momentum,
59 Adam,
61 AdaGrad,
63 RMSprop,
65 LBFGS,
67}
68
69#[derive(Clone, Debug, PartialEq)]
71pub enum KernelObjective {
73 KernelAlignment,
75 CrossValidationError,
77 MarginalLikelihood,
79 KernelRidgeLoss,
81 MaximumMeanDiscrepancy,
83 KernelTargetAlignment,
85}
86
87#[derive(Clone, Debug)]
89pub struct GradientResult {
91 pub gradient: Array1<f64>,
93 pub objective_value: f64,
95 pub hessian: Option<Array2<f64>>,
97}
98
99pub struct GradientKernelLearner {
101 config: GradientConfig,
102 optimizer: GradientOptimizer,
103 objective: KernelObjective,
104 parameters: Array1<f64>,
105 parameter_bounds: Option<Array2<f64>>,
106 optimization_history: Vec<(f64, Array1<f64>)>,
107 velocity: Option<Array1<f64>>,
108 adam_m: Option<Array1<f64>>,
109 adam_v: Option<Array1<f64>>,
110 iteration: usize,
111}
112
113impl GradientKernelLearner {
114 pub fn new(n_parameters: usize) -> Self {
116 Self {
117 config: GradientConfig::default(),
118 optimizer: GradientOptimizer::Adam,
119 objective: KernelObjective::KernelAlignment,
120 parameters: Array1::ones(n_parameters),
121 parameter_bounds: None,
122 optimization_history: Vec::new(),
123 velocity: None,
124 adam_m: None,
125 adam_v: None,
126 iteration: 0,
127 }
128 }
129
130 pub fn with_config(mut self, config: GradientConfig) -> Self {
132 self.config = config;
133 self
134 }
135
136 pub fn with_optimizer(mut self, optimizer: GradientOptimizer) -> Self {
138 self.optimizer = optimizer;
139 self
140 }
141
142 pub fn with_objective(mut self, objective: KernelObjective) -> Self {
144 self.objective = objective;
145 self
146 }
147
148 pub fn with_bounds(mut self, bounds: Array2<f64>) -> Self {
150 self.parameter_bounds = Some(bounds);
151 self
152 }
153
154 pub fn initialize_parameters(&mut self, initial_params: Array1<f64>) {
156 self.parameters = initial_params;
157 self.velocity = Some(Array1::zeros(self.parameters.len()));
158 self.adam_m = Some(Array1::zeros(self.parameters.len()));
159 self.adam_v = Some(Array1::zeros(self.parameters.len()));
160 self.iteration = 0;
161 self.optimization_history.clear();
162 self.apply_bounds();
164 }
165
166 pub fn optimize(&mut self, x: &Array2<f64>, y: Option<&Array1<f64>>) -> Result<Array1<f64>> {
168 for iteration in 0..self.config.max_iterations {
169 self.iteration = iteration;
170
171 let gradient_result = self.compute_gradient(x, y)?;
173
174 if gradient_result
176 .gradient
177 .iter()
178 .map(|&g| g.abs())
179 .sum::<f64>()
180 < self.config.tolerance
181 {
182 break;
183 }
184
185 self.update_parameters(&gradient_result.gradient)?;
187
188 self.optimization_history
190 .push((gradient_result.objective_value, self.parameters.clone()));
191
192 if self.config.adaptive_learning_rate && iteration > 0 {
194 self.update_learning_rate(iteration);
195 }
196 }
197
198 Ok(self.parameters.clone())
199 }
200
201 fn compute_gradient(&self, x: &Array2<f64>, y: Option<&Array1<f64>>) -> Result<GradientResult> {
203 match self.objective {
204 KernelObjective::KernelAlignment => self.compute_kernel_alignment_gradient(x),
205 KernelObjective::CrossValidationError => self.compute_cv_error_gradient(x, y),
206 KernelObjective::MarginalLikelihood => self.compute_marginal_likelihood_gradient(x, y),
207 KernelObjective::KernelRidgeLoss => self.compute_kernel_ridge_gradient(x, y),
208 KernelObjective::MaximumMeanDiscrepancy => self.compute_mmd_gradient(x),
209 KernelObjective::KernelTargetAlignment => self.compute_kta_gradient(x, y),
210 }
211 }
212
213 fn compute_kernel_alignment_gradient(&self, x: &Array2<f64>) -> Result<GradientResult> {
215 let n_samples = x.nrows();
216 let mut gradient = Array1::zeros(self.parameters.len());
217
218 let kernel_matrix = self.compute_kernel_matrix(x)?;
220
221 let kernel_derivatives = self.compute_kernel_derivatives(x)?;
223
224 let alignment = self.compute_kernel_alignment(&kernel_matrix);
226
227 for i in 0..self.parameters.len() {
228 let kernel_derivative = &kernel_derivatives[i];
229 let alignment_derivative =
230 self.compute_alignment_derivative(&kernel_matrix, kernel_derivative);
231 gradient[i] = alignment_derivative;
232 }
233
234 Ok(GradientResult {
235 gradient,
236 objective_value: alignment,
237 hessian: None,
238 })
239 }
240
241 fn compute_cv_error_gradient(
243 &self,
244 x: &Array2<f64>,
245 y: Option<&Array1<f64>>,
246 ) -> Result<GradientResult> {
247 let y = y.ok_or_else(|| "Target values required for CV error gradient")?;
248 let n_samples = x.nrows();
249 let n_folds = 5;
250 let fold_size = n_samples / n_folds;
251
252 let mut gradient = Array1::zeros(self.parameters.len());
253 let mut total_error = 0.0;
254
255 for fold in 0..n_folds {
256 let start_idx = fold * fold_size;
257 let end_idx = std::cmp::min(start_idx + fold_size, n_samples);
258
259 let (x_train, y_train, x_val, y_val) = self.split_data(x, y, start_idx, end_idx);
261
262 let fold_gradient = self.compute_fold_gradient(&x_train, &y_train, &x_val, &y_val)?;
264
265 gradient = gradient + fold_gradient.gradient;
266 total_error += fold_gradient.objective_value;
267 }
268
269 gradient = gradient / n_folds as f64;
270 total_error /= n_folds as f64;
271
272 Ok(GradientResult {
273 gradient,
274 objective_value: total_error,
275 hessian: None,
276 })
277 }
278
279 fn compute_marginal_likelihood_gradient(
281 &self,
282 x: &Array2<f64>,
283 y: Option<&Array1<f64>>,
284 ) -> Result<GradientResult> {
285 let y = y.ok_or_else(|| "Target values required for marginal likelihood gradient")?;
286 let n_samples = x.nrows();
287
288 let kernel_matrix = self.compute_kernel_matrix(x)?;
290
291 let noise_variance = 1e-6;
293 let mut k_with_noise = kernel_matrix.clone();
294 for i in 0..n_samples {
295 k_with_noise[[i, i]] += noise_variance;
296 }
297
298 let log_marginal_likelihood = self.compute_log_marginal_likelihood(&k_with_noise, y)?;
300
301 let mut gradient = Array1::zeros(self.parameters.len());
303 let kernel_derivatives = self.compute_kernel_derivatives(x)?;
304
305 for i in 0..self.parameters.len() {
306 let kernel_derivative = &kernel_derivatives[i];
307 let ml_derivative =
308 self.compute_marginal_likelihood_derivative(&k_with_noise, y, kernel_derivative)?;
309 gradient[i] = ml_derivative;
310 }
311
312 Ok(GradientResult {
313 gradient,
314 objective_value: -log_marginal_likelihood, hessian: None,
316 })
317 }
318
319 fn compute_kernel_ridge_gradient(
321 &self,
322 x: &Array2<f64>,
323 y: Option<&Array1<f64>>,
324 ) -> Result<GradientResult> {
325 let y = y.ok_or_else(|| "Target values required for kernel ridge gradient")?;
326 let n_samples = x.nrows();
327 let alpha = 1e-3; let kernel_matrix = self.compute_kernel_matrix(x)?;
331
332 let mut k_reg = kernel_matrix.clone();
334 for i in 0..n_samples {
335 k_reg[[i, i]] += alpha;
336 }
337
338 let kr_loss = self.compute_kernel_ridge_loss(&k_reg, y)?;
340
341 let mut gradient = Array1::zeros(self.parameters.len());
343 let kernel_derivatives = self.compute_kernel_derivatives(x)?;
344
345 for i in 0..self.parameters.len() {
346 let kernel_derivative = &kernel_derivatives[i];
347 let kr_derivative =
348 self.compute_kernel_ridge_derivative(&k_reg, y, kernel_derivative)?;
349 gradient[i] = kr_derivative;
350 }
351
352 Ok(GradientResult {
353 gradient,
354 objective_value: kr_loss,
355 hessian: None,
356 })
357 }
358
359 fn compute_mmd_gradient(&self, x: &Array2<f64>) -> Result<GradientResult> {
361 let n_samples = x.nrows();
362 let split_point = n_samples / 2;
363
364 let x1 = x.slice(s![..split_point, ..]);
365 let x2 = x.slice(s![split_point.., ..]);
366
367 let mmd = self.compute_mmd(&x1, &x2)?;
369
370 let mut gradient = Array1::zeros(self.parameters.len());
372 let mmd_derivatives = self.compute_mmd_derivatives(&x1, &x2)?;
373
374 for i in 0..self.parameters.len() {
375 gradient[i] = mmd_derivatives[i];
376 }
377
378 Ok(GradientResult {
379 gradient,
380 objective_value: mmd,
381 hessian: None,
382 })
383 }
384
385 fn compute_kta_gradient(
387 &self,
388 x: &Array2<f64>,
389 y: Option<&Array1<f64>>,
390 ) -> Result<GradientResult> {
391 let y = y.ok_or_else(|| "Target values required for KTA gradient")?;
392
393 let kernel_matrix = self.compute_kernel_matrix(x)?;
395
396 let target_kernel = self.compute_target_kernel(y);
398
399 let kta = self.compute_kta(&kernel_matrix, &target_kernel);
401
402 let mut gradient = Array1::zeros(self.parameters.len());
404 let kernel_derivatives = self.compute_kernel_derivatives(x)?;
405
406 for i in 0..self.parameters.len() {
407 let kernel_derivative = &kernel_derivatives[i];
408 let kta_derivative =
409 self.compute_kta_derivative(&kernel_matrix, &target_kernel, kernel_derivative);
410 gradient[i] = kta_derivative;
411 }
412
413 Ok(GradientResult {
414 gradient,
415 objective_value: -kta, hessian: None,
417 })
418 }
419
420 fn update_parameters(&mut self, gradient: &Array1<f64>) -> Result<()> {
422 match self.optimizer {
423 GradientOptimizer::SGD => self.update_sgd(gradient),
424 GradientOptimizer::Momentum => self.update_momentum(gradient),
425 GradientOptimizer::Adam => self.update_adam(gradient),
426 GradientOptimizer::AdaGrad => self.update_adagrad(gradient),
427 GradientOptimizer::RMSprop => self.update_rmsprop(gradient),
428 GradientOptimizer::LBFGS => self.update_lbfgs(gradient),
429 }
430 }
431
432 fn update_sgd(&mut self, gradient: &Array1<f64>) -> Result<()> {
434 for i in 0..self.parameters.len() {
435 self.parameters[i] -= self.config.learning_rate * gradient[i];
436 }
437 self.apply_bounds();
438 Ok(())
439 }
440
441 fn update_momentum(&mut self, gradient: &Array1<f64>) -> Result<()> {
443 let velocity = self.velocity.as_mut().unwrap();
444
445 for i in 0..self.parameters.len() {
446 velocity[i] =
447 self.config.momentum * velocity[i] - self.config.learning_rate * gradient[i];
448 self.parameters[i] += velocity[i];
449 }
450
451 self.apply_bounds();
452 Ok(())
453 }
454
455 fn update_adam(&mut self, gradient: &Array1<f64>) -> Result<()> {
457 if self.adam_m.is_none() {
459 self.adam_m = Some(Array1::zeros(self.parameters.len()));
460 self.adam_v = Some(Array1::zeros(self.parameters.len()));
461 }
462
463 let adam_m = self.adam_m.as_mut().unwrap();
464 let adam_v = self.adam_v.as_mut().unwrap();
465
466 let beta1 = 0.9;
467 let beta2 = 0.999;
468 let epsilon = 1e-8;
469
470 for i in 0..self.parameters.len() {
471 adam_m[i] = beta1 * adam_m[i] + (1.0 - beta1) * gradient[i];
473
474 adam_v[i] = beta2 * adam_v[i] + (1.0 - beta2) * gradient[i] * gradient[i];
476
477 let m_hat = adam_m[i] / (1.0 - beta1.powi(self.iteration as i32 + 1));
479
480 let v_hat = adam_v[i] / (1.0 - beta2.powi(self.iteration as i32 + 1));
482
483 self.parameters[i] -= self.config.learning_rate * m_hat / (v_hat.sqrt() + epsilon);
485 }
486
487 self.apply_bounds();
488 Ok(())
489 }
490
491 fn update_adagrad(&mut self, gradient: &Array1<f64>) -> Result<()> {
493 if self.adam_v.is_none() {
494 self.adam_v = Some(Array1::zeros(self.parameters.len()));
495 }
496
497 let accumulated_grad = self.adam_v.as_mut().unwrap();
498 let epsilon = 1e-8;
499
500 for i in 0..self.parameters.len() {
501 accumulated_grad[i] += gradient[i] * gradient[i];
502 self.parameters[i] -=
503 self.config.learning_rate * gradient[i] / (accumulated_grad[i].sqrt() + epsilon);
504 }
505
506 self.apply_bounds();
507 Ok(())
508 }
509
510 fn update_rmsprop(&mut self, gradient: &Array1<f64>) -> Result<()> {
512 if self.adam_v.is_none() {
513 self.adam_v = Some(Array1::zeros(self.parameters.len()));
514 }
515
516 let accumulated_grad = self.adam_v.as_mut().unwrap();
517 let decay_rate = 0.9;
518 let epsilon = 1e-8;
519
520 for i in 0..self.parameters.len() {
521 accumulated_grad[i] =
522 decay_rate * accumulated_grad[i] + (1.0 - decay_rate) * gradient[i] * gradient[i];
523 self.parameters[i] -=
524 self.config.learning_rate * gradient[i] / (accumulated_grad[i].sqrt() + epsilon);
525 }
526
527 self.apply_bounds();
528 Ok(())
529 }
530
531 fn update_lbfgs(&mut self, gradient: &Array1<f64>) -> Result<()> {
533 for i in 0..self.parameters.len() {
535 self.parameters[i] -= self.config.learning_rate * gradient[i];
536 }
537 self.apply_bounds();
538 Ok(())
539 }
540
541 fn apply_bounds(&mut self) {
543 if let Some(bounds) = &self.parameter_bounds {
544 for i in 0..self.parameters.len() {
545 self.parameters[i] = self.parameters[i].max(bounds[[i, 0]]).min(bounds[[i, 1]]);
546 }
547 }
548 }
549
550 fn update_learning_rate(&mut self, iteration: usize) {
552 if iteration > 0 {
553 let current_loss = self.optimization_history.last().unwrap().0;
554 let previous_loss = self.optimization_history[self.optimization_history.len() - 2].0;
555
556 if current_loss > previous_loss {
557 self.config.learning_rate *= self.config.learning_rate_decay;
559 self.config.learning_rate =
560 self.config.learning_rate.max(self.config.min_learning_rate);
561 }
562 }
563 }
564
565 fn compute_kernel_matrix(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
567 let n_samples = x.nrows();
568 let mut kernel_matrix = Array2::zeros((n_samples, n_samples));
569
570 let gamma = self.parameters[0];
572
573 for i in 0..n_samples {
574 for j in i..n_samples {
575 let dist_sq = x
576 .row(i)
577 .iter()
578 .zip(x.row(j).iter())
579 .map(|(&a, &b)| (a - b).powi(2))
580 .sum::<f64>();
581
582 let kernel_value = (-gamma * dist_sq).exp();
583 kernel_matrix[[i, j]] = kernel_value;
584 kernel_matrix[[j, i]] = kernel_value;
585 }
586 }
587
588 Ok(kernel_matrix)
589 }
590
591 fn compute_kernel_derivatives(&self, x: &Array2<f64>) -> Result<Vec<Array2<f64>>> {
593 let n_samples = x.nrows();
594 let mut derivatives = Vec::new();
595
596 let gamma = self.parameters[0];
598 let mut gamma_derivative = Array2::zeros((n_samples, n_samples));
599
600 for i in 0..n_samples {
601 for j in i..n_samples {
602 let dist_sq = x
603 .row(i)
604 .iter()
605 .zip(x.row(j).iter())
606 .map(|(&a, &b)| (a - b).powi(2))
607 .sum::<f64>();
608
609 let kernel_value = (-gamma * dist_sq).exp();
610 let derivative_value = -dist_sq * kernel_value;
611
612 gamma_derivative[[i, j]] = derivative_value;
613 gamma_derivative[[j, i]] = derivative_value;
614 }
615 }
616
617 derivatives.push(gamma_derivative);
618
619 for param_idx in 1..self.parameters.len() {
621 let derivative = Array2::zeros((n_samples, n_samples));
622 derivatives.push(derivative);
623 }
624
625 Ok(derivatives)
626 }
627
628 fn compute_kernel_alignment(&self, kernel_matrix: &Array2<f64>) -> f64 {
630 let n_samples = kernel_matrix.nrows();
631 let trace = (0..n_samples).map(|i| kernel_matrix[[i, i]]).sum::<f64>();
632 let frobenius_norm = kernel_matrix.iter().map(|&x| x * x).sum::<f64>().sqrt();
633
634 trace / frobenius_norm
635 }
636
637 fn compute_alignment_derivative(
639 &self,
640 kernel_matrix: &Array2<f64>,
641 kernel_derivative: &Array2<f64>,
642 ) -> f64 {
643 let n_samples = kernel_matrix.nrows();
644 let trace = (0..n_samples).map(|i| kernel_matrix[[i, i]]).sum::<f64>();
645 let trace_derivative = (0..n_samples)
646 .map(|i| kernel_derivative[[i, i]])
647 .sum::<f64>();
648
649 let frobenius_norm = kernel_matrix.iter().map(|&x| x * x).sum::<f64>().sqrt();
650 let frobenius_derivative = kernel_matrix
651 .iter()
652 .zip(kernel_derivative.iter())
653 .map(|(&k, &dk)| k * dk)
654 .sum::<f64>()
655 / frobenius_norm;
656
657 (trace_derivative * frobenius_norm - trace * frobenius_derivative)
658 / (frobenius_norm * frobenius_norm)
659 }
660
661 fn split_data(
663 &self,
664 x: &Array2<f64>,
665 y: &Array1<f64>,
666 start_idx: usize,
667 end_idx: usize,
668 ) -> (Array2<f64>, Array1<f64>, Array2<f64>, Array1<f64>) {
669 let n_samples = x.nrows();
670 let n_features = x.ncols();
671
672 let mut x_train = Array2::zeros((n_samples - (end_idx - start_idx), n_features));
673 let mut y_train = Array1::zeros(n_samples - (end_idx - start_idx));
674 let mut x_val = Array2::zeros((end_idx - start_idx, n_features));
675 let mut y_val = Array1::zeros(end_idx - start_idx);
676
677 let mut train_idx = 0;
678 let mut val_idx = 0;
679
680 for i in 0..n_samples {
681 if i >= start_idx && i < end_idx {
682 x_val.row_mut(val_idx).assign(&x.row(i));
683 y_val[val_idx] = y[i];
684 val_idx += 1;
685 } else {
686 x_train.row_mut(train_idx).assign(&x.row(i));
687 y_train[train_idx] = y[i];
688 train_idx += 1;
689 }
690 }
691
692 (x_train, y_train, x_val, y_val)
693 }
694
695 fn compute_fold_gradient(
697 &self,
698 x_train: &Array2<f64>,
699 y_train: &Array1<f64>,
700 x_val: &Array2<f64>,
701 y_val: &Array1<f64>,
702 ) -> Result<GradientResult> {
703 let gradient = Array1::zeros(self.parameters.len());
705 let objective_value = 0.0;
706
707 Ok(GradientResult {
708 gradient,
709 objective_value,
710 hessian: None,
711 })
712 }
713
714 fn compute_log_marginal_likelihood(
716 &self,
717 kernel_matrix: &Array2<f64>,
718 y: &Array1<f64>,
719 ) -> Result<f64> {
720 Ok(0.0)
722 }
723
724 fn compute_marginal_likelihood_derivative(
726 &self,
727 kernel_matrix: &Array2<f64>,
728 y: &Array1<f64>,
729 kernel_derivative: &Array2<f64>,
730 ) -> Result<f64> {
731 Ok(0.0)
733 }
734
735 fn compute_kernel_ridge_loss(
737 &self,
738 kernel_matrix: &Array2<f64>,
739 y: &Array1<f64>,
740 ) -> Result<f64> {
741 Ok(0.0)
743 }
744
745 fn compute_kernel_ridge_derivative(
747 &self,
748 kernel_matrix: &Array2<f64>,
749 y: &Array1<f64>,
750 kernel_derivative: &Array2<f64>,
751 ) -> Result<f64> {
752 Ok(0.0)
754 }
755
756 fn compute_mmd(&self, x1: &ArrayView2<f64>, x2: &ArrayView2<f64>) -> Result<f64> {
758 Ok(0.0)
760 }
761
762 fn compute_mmd_derivatives(
764 &self,
765 x1: &ArrayView2<f64>,
766 x2: &ArrayView2<f64>,
767 ) -> Result<Array1<f64>> {
768 Ok(Array1::zeros(self.parameters.len()))
770 }
771
772 fn compute_target_kernel(&self, y: &Array1<f64>) -> Array2<f64> {
774 let n_samples = y.len();
775 let mut target_kernel = Array2::zeros((n_samples, n_samples));
776
777 for i in 0..n_samples {
778 for j in 0..n_samples {
779 target_kernel[[i, j]] = y[i] * y[j];
780 }
781 }
782
783 target_kernel
784 }
785
786 fn compute_kta(&self, kernel_matrix: &Array2<f64>, target_kernel: &Array2<f64>) -> f64 {
788 let numerator = kernel_matrix
789 .iter()
790 .zip(target_kernel.iter())
791 .map(|(&k, &t)| k * t)
792 .sum::<f64>();
793
794 let k_norm = kernel_matrix.iter().map(|&k| k * k).sum::<f64>().sqrt();
795 let t_norm = target_kernel.iter().map(|&t| t * t).sum::<f64>().sqrt();
796
797 numerator / (k_norm * t_norm)
798 }
799
800 fn compute_kta_derivative(
802 &self,
803 kernel_matrix: &Array2<f64>,
804 target_kernel: &Array2<f64>,
805 kernel_derivative: &Array2<f64>,
806 ) -> f64 {
807 0.0
809 }
810
811 pub fn get_parameters(&self) -> &Array1<f64> {
813 &self.parameters
814 }
815
816 pub fn get_optimization_history(&self) -> &Vec<(f64, Array1<f64>)> {
818 &self.optimization_history
819 }
820}
821
822pub struct GradientMultiKernelLearner {
824 base_learners: Vec<GradientKernelLearner>,
825 combination_weights: Array1<f64>,
826 config: GradientConfig,
827}
828
829impl GradientMultiKernelLearner {
830 pub fn new(n_kernels: usize, n_parameters_per_kernel: usize) -> Self {
832 let mut base_learners = Vec::new();
833 for _ in 0..n_kernels {
834 base_learners.push(GradientKernelLearner::new(n_parameters_per_kernel));
835 }
836
837 Self {
838 base_learners,
839 combination_weights: Array1::from_elem(n_kernels, 1.0 / n_kernels as f64),
840 config: GradientConfig::default(),
841 }
842 }
843
844 pub fn optimize(&mut self, x: &Array2<f64>, y: Option<&Array1<f64>>) -> Result<()> {
846 for learner in &mut self.base_learners {
848 learner.optimize(x, y)?;
849 }
850
851 self.optimize_combination_weights(x, y)?;
853
854 Ok(())
855 }
856
857 fn optimize_combination_weights(
859 &mut self,
860 x: &Array2<f64>,
861 y: Option<&Array1<f64>>,
862 ) -> Result<()> {
863 let n_kernels = self.base_learners.len();
865 self.combination_weights = Array1::from_elem(n_kernels, 1.0 / n_kernels as f64);
866 Ok(())
867 }
868
869 pub fn get_all_parameters(&self) -> Vec<&Array1<f64>> {
871 self.base_learners
872 .iter()
873 .map(|learner| learner.get_parameters())
874 .collect()
875 }
876
877 pub fn get_combination_weights(&self) -> &Array1<f64> {
879 &self.combination_weights
880 }
881}
882
883#[allow(non_snake_case)]
884#[cfg(test)]
885mod tests {
886 use super::*;
887 use scirs2_core::ndarray::Array2;
888
889 #[test]
890 fn test_gradient_config() {
891 let config = GradientConfig::default();
892 assert_eq!(config.learning_rate, 0.01);
893 assert_eq!(config.max_iterations, 1000);
894 assert!(config.tolerance > 0.0);
895 }
896
897 #[test]
898 fn test_gradient_kernel_learner() {
899 let mut learner = GradientKernelLearner::new(2)
900 .with_optimizer(GradientOptimizer::Adam)
901 .with_objective(KernelObjective::KernelAlignment);
902
903 let x =
904 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0]).unwrap();
905
906 learner.initialize_parameters(Array1::from_vec(vec![1.0, 0.5]));
907 let optimized_params = learner.optimize(&x, None).unwrap();
908
909 assert_eq!(optimized_params.len(), 2);
910 }
911
912 #[test]
913 fn test_gradient_optimizers() {
914 let optimizers = vec![
915 GradientOptimizer::SGD,
916 GradientOptimizer::Momentum,
917 GradientOptimizer::Adam,
918 GradientOptimizer::AdaGrad,
919 GradientOptimizer::RMSprop,
920 ];
921
922 for optimizer in optimizers {
923 let mut learner = GradientKernelLearner::new(1).with_optimizer(optimizer);
924
925 let x = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0]).unwrap();
926
927 learner.initialize_parameters(Array1::from_vec(vec![1.0]));
928 let result = learner.optimize(&x, None);
929 assert!(result.is_ok());
930 }
931 }
932
933 #[test]
934 fn test_parameter_bounds() {
935 let mut learner = GradientKernelLearner::new(2).with_bounds(
936 Array2::from_shape_vec(
937 (2, 2),
938 vec![
939 0.1, 10.0, 0.0, 5.0, ],
942 )
943 .unwrap(),
944 );
945
946 let x =
947 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0]).unwrap();
948
949 learner.initialize_parameters(Array1::from_vec(vec![100.0, -1.0]));
950 let optimized_params = learner.optimize(&x, None).unwrap();
951
952 assert!(optimized_params[0] >= 0.1 && optimized_params[0] <= 10.0);
953 assert!(optimized_params[1] >= 0.0 && optimized_params[1] <= 5.0);
954 }
955
956 #[test]
957 fn test_multi_kernel_learner() {
958 let mut multi_learner = GradientMultiKernelLearner::new(3, 2);
959
960 let x =
961 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0]).unwrap();
962
963 multi_learner.optimize(&x, None).unwrap();
964
965 let all_params = multi_learner.get_all_parameters();
966 assert_eq!(all_params.len(), 3);
967
968 let weights = multi_learner.get_combination_weights();
969 assert_eq!(weights.len(), 3);
970 }
971
972 #[test]
973 fn test_objective_functions() {
974 let objectives = vec![
975 KernelObjective::KernelAlignment,
976 KernelObjective::CrossValidationError,
977 KernelObjective::MarginalLikelihood,
978 KernelObjective::KernelRidgeLoss,
979 KernelObjective::MaximumMeanDiscrepancy,
980 KernelObjective::KernelTargetAlignment,
981 ];
982
983 for objective in objectives {
984 let mut learner = GradientKernelLearner::new(1).with_objective(objective.clone());
985
986 let x = Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0])
987 .unwrap();
988
989 let y = Array1::from_vec(vec![1.0, 0.0, 1.0, 0.0]);
990
991 learner.initialize_parameters(Array1::from_vec(vec![1.0]));
992
993 let result = if objective == KernelObjective::KernelAlignment
994 || objective == KernelObjective::MaximumMeanDiscrepancy
995 {
996 learner.optimize(&x, None)
997 } else {
998 learner.optimize(&x, Some(&y))
999 };
1000
1001 assert!(result.is_ok());
1002 }
1003 }
1004
1005 #[test]
1006 fn test_adaptive_learning_rate() {
1007 let config = GradientConfig {
1008 adaptive_learning_rate: true,
1009 learning_rate_decay: 0.5,
1010 min_learning_rate: 1e-6,
1011 ..Default::default()
1012 };
1013
1014 let mut learner = GradientKernelLearner::new(1).with_config(config);
1015
1016 let x =
1017 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0]).unwrap();
1018
1019 learner.initialize_parameters(Array1::from_vec(vec![1.0]));
1020 let result = learner.optimize(&x, None);
1021 assert!(result.is_ok());
1022 }
1023}