1use scirs2_core::ndarray::{s, Array1, Array2, ArrayView2};
8use sklears_core::error::Result;
9
10#[derive(Clone, Debug)]
12pub struct GradientConfig {
14 pub learning_rate: f64,
16 pub max_iterations: usize,
18 pub tolerance: f64,
20 pub momentum: f64,
22 pub l2_regularization: f64,
24 pub adaptive_learning_rate: bool,
26 pub learning_rate_decay: f64,
28 pub min_learning_rate: f64,
30 pub batch_size: usize,
32}
33
34impl Default for GradientConfig {
35 fn default() -> Self {
36 Self {
37 learning_rate: 0.01,
38 max_iterations: 1000,
39 tolerance: 1e-6,
40 momentum: 0.9,
41 l2_regularization: 1e-4,
42 adaptive_learning_rate: true,
43 learning_rate_decay: 0.99,
44 min_learning_rate: 1e-6,
45 batch_size: 256,
46 }
47 }
48}
49
50#[derive(Clone, Debug, PartialEq)]
52pub enum GradientOptimizer {
54 SGD,
56 Momentum,
58 Adam,
60 AdaGrad,
62 RMSprop,
64 LBFGS,
66}
67
68#[derive(Clone, Debug, PartialEq)]
70pub enum KernelObjective {
72 KernelAlignment,
74 CrossValidationError,
76 MarginalLikelihood,
78 KernelRidgeLoss,
80 MaximumMeanDiscrepancy,
82 KernelTargetAlignment,
84}
85
86#[derive(Clone, Debug)]
88pub struct GradientResult {
90 pub gradient: Array1<f64>,
92 pub objective_value: f64,
94 pub hessian: Option<Array2<f64>>,
96}
97
98pub struct GradientKernelLearner {
100 config: GradientConfig,
101 optimizer: GradientOptimizer,
102 objective: KernelObjective,
103 parameters: Array1<f64>,
104 parameter_bounds: Option<Array2<f64>>,
105 optimization_history: Vec<(f64, Array1<f64>)>,
106 velocity: Option<Array1<f64>>,
107 adam_m: Option<Array1<f64>>,
108 adam_v: Option<Array1<f64>>,
109 iteration: usize,
110}
111
112impl GradientKernelLearner {
113 pub fn new(n_parameters: usize) -> Self {
115 Self {
116 config: GradientConfig::default(),
117 optimizer: GradientOptimizer::Adam,
118 objective: KernelObjective::KernelAlignment,
119 parameters: Array1::ones(n_parameters),
120 parameter_bounds: None,
121 optimization_history: Vec::new(),
122 velocity: None,
123 adam_m: None,
124 adam_v: None,
125 iteration: 0,
126 }
127 }
128
129 pub fn with_config(mut self, config: GradientConfig) -> Self {
131 self.config = config;
132 self
133 }
134
135 pub fn with_optimizer(mut self, optimizer: GradientOptimizer) -> Self {
137 self.optimizer = optimizer;
138 self
139 }
140
141 pub fn with_objective(mut self, objective: KernelObjective) -> Self {
143 self.objective = objective;
144 self
145 }
146
147 pub fn with_bounds(mut self, bounds: Array2<f64>) -> Self {
149 self.parameter_bounds = Some(bounds);
150 self
151 }
152
153 pub fn initialize_parameters(&mut self, initial_params: Array1<f64>) {
155 self.parameters = initial_params;
156 self.velocity = Some(Array1::zeros(self.parameters.len()));
157 self.adam_m = Some(Array1::zeros(self.parameters.len()));
158 self.adam_v = Some(Array1::zeros(self.parameters.len()));
159 self.iteration = 0;
160 self.optimization_history.clear();
161 self.apply_bounds();
163 }
164
165 pub fn optimize(&mut self, x: &Array2<f64>, y: Option<&Array1<f64>>) -> Result<Array1<f64>> {
167 for iteration in 0..self.config.max_iterations {
168 self.iteration = iteration;
169
170 let gradient_result = self.compute_gradient(x, y)?;
172
173 if gradient_result
175 .gradient
176 .iter()
177 .map(|&g| g.abs())
178 .sum::<f64>()
179 < self.config.tolerance
180 {
181 break;
182 }
183
184 self.update_parameters(&gradient_result.gradient)?;
186
187 self.optimization_history
189 .push((gradient_result.objective_value, self.parameters.clone()));
190
191 if self.config.adaptive_learning_rate && iteration > 0 {
193 self.update_learning_rate(iteration);
194 }
195 }
196
197 Ok(self.parameters.clone())
198 }
199
200 fn compute_gradient(&self, x: &Array2<f64>, y: Option<&Array1<f64>>) -> Result<GradientResult> {
202 match self.objective {
203 KernelObjective::KernelAlignment => self.compute_kernel_alignment_gradient(x),
204 KernelObjective::CrossValidationError => self.compute_cv_error_gradient(x, y),
205 KernelObjective::MarginalLikelihood => self.compute_marginal_likelihood_gradient(x, y),
206 KernelObjective::KernelRidgeLoss => self.compute_kernel_ridge_gradient(x, y),
207 KernelObjective::MaximumMeanDiscrepancy => self.compute_mmd_gradient(x),
208 KernelObjective::KernelTargetAlignment => self.compute_kta_gradient(x, y),
209 }
210 }
211
212 fn compute_kernel_alignment_gradient(&self, x: &Array2<f64>) -> Result<GradientResult> {
214 let _n_samples = x.nrows();
215 let mut gradient = Array1::zeros(self.parameters.len());
216
217 let kernel_matrix = self.compute_kernel_matrix(x)?;
219
220 let kernel_derivatives = self.compute_kernel_derivatives(x)?;
222
223 let alignment = self.compute_kernel_alignment(&kernel_matrix);
225
226 for i in 0..self.parameters.len() {
227 let kernel_derivative = &kernel_derivatives[i];
228 let alignment_derivative =
229 self.compute_alignment_derivative(&kernel_matrix, kernel_derivative);
230 gradient[i] = alignment_derivative;
231 }
232
233 Ok(GradientResult {
234 gradient,
235 objective_value: alignment,
236 hessian: None,
237 })
238 }
239
240 fn compute_cv_error_gradient(
242 &self,
243 x: &Array2<f64>,
244 y: Option<&Array1<f64>>,
245 ) -> Result<GradientResult> {
246 let y = y.ok_or("Target values required for CV error gradient")?;
247 let n_samples = x.nrows();
248 let n_folds = 5;
249 let fold_size = n_samples / n_folds;
250
251 let mut gradient = Array1::zeros(self.parameters.len());
252 let mut total_error = 0.0;
253
254 for fold in 0..n_folds {
255 let start_idx = fold * fold_size;
256 let end_idx = std::cmp::min(start_idx + fold_size, n_samples);
257
258 let (x_train, y_train, x_val, y_val) = self.split_data(x, y, start_idx, end_idx);
260
261 let fold_gradient = self.compute_fold_gradient(&x_train, &y_train, &x_val, &y_val)?;
263
264 gradient = gradient + fold_gradient.gradient;
265 total_error += fold_gradient.objective_value;
266 }
267
268 gradient /= n_folds as f64;
269 total_error /= n_folds as f64;
270
271 Ok(GradientResult {
272 gradient,
273 objective_value: total_error,
274 hessian: None,
275 })
276 }
277
278 fn compute_marginal_likelihood_gradient(
280 &self,
281 x: &Array2<f64>,
282 y: Option<&Array1<f64>>,
283 ) -> Result<GradientResult> {
284 let y = y.ok_or("Target values required for marginal likelihood gradient")?;
285 let n_samples = x.nrows();
286
287 let kernel_matrix = self.compute_kernel_matrix(x)?;
289
290 let noise_variance = 1e-6;
292 let mut k_with_noise = kernel_matrix.clone();
293 for i in 0..n_samples {
294 k_with_noise[[i, i]] += noise_variance;
295 }
296
297 let log_marginal_likelihood = self.compute_log_marginal_likelihood(&k_with_noise, y)?;
299
300 let mut gradient = Array1::zeros(self.parameters.len());
302 let kernel_derivatives = self.compute_kernel_derivatives(x)?;
303
304 for i in 0..self.parameters.len() {
305 let kernel_derivative = &kernel_derivatives[i];
306 let ml_derivative =
307 self.compute_marginal_likelihood_derivative(&k_with_noise, y, kernel_derivative)?;
308 gradient[i] = ml_derivative;
309 }
310
311 Ok(GradientResult {
312 gradient,
313 objective_value: -log_marginal_likelihood, hessian: None,
315 })
316 }
317
318 fn compute_kernel_ridge_gradient(
320 &self,
321 x: &Array2<f64>,
322 y: Option<&Array1<f64>>,
323 ) -> Result<GradientResult> {
324 let y = y.ok_or("Target values required for kernel ridge gradient")?;
325 let n_samples = x.nrows();
326 let alpha = 1e-3; let kernel_matrix = self.compute_kernel_matrix(x)?;
330
331 let mut k_reg = kernel_matrix.clone();
333 for i in 0..n_samples {
334 k_reg[[i, i]] += alpha;
335 }
336
337 let kr_loss = self.compute_kernel_ridge_loss(&k_reg, y)?;
339
340 let mut gradient = Array1::zeros(self.parameters.len());
342 let kernel_derivatives = self.compute_kernel_derivatives(x)?;
343
344 for i in 0..self.parameters.len() {
345 let kernel_derivative = &kernel_derivatives[i];
346 let kr_derivative =
347 self.compute_kernel_ridge_derivative(&k_reg, y, kernel_derivative)?;
348 gradient[i] = kr_derivative;
349 }
350
351 Ok(GradientResult {
352 gradient,
353 objective_value: kr_loss,
354 hessian: None,
355 })
356 }
357
358 fn compute_mmd_gradient(&self, x: &Array2<f64>) -> Result<GradientResult> {
360 let n_samples = x.nrows();
361 let split_point = n_samples / 2;
362
363 let x1 = x.slice(s![..split_point, ..]);
364 let x2 = x.slice(s![split_point.., ..]);
365
366 let mmd = self.compute_mmd(&x1, &x2)?;
368
369 let mut gradient = Array1::zeros(self.parameters.len());
371 let mmd_derivatives = self.compute_mmd_derivatives(&x1, &x2)?;
372
373 for i in 0..self.parameters.len() {
374 gradient[i] = mmd_derivatives[i];
375 }
376
377 Ok(GradientResult {
378 gradient,
379 objective_value: mmd,
380 hessian: None,
381 })
382 }
383
384 fn compute_kta_gradient(
386 &self,
387 x: &Array2<f64>,
388 y: Option<&Array1<f64>>,
389 ) -> Result<GradientResult> {
390 let y = y.ok_or("Target values required for KTA gradient")?;
391
392 let kernel_matrix = self.compute_kernel_matrix(x)?;
394
395 let target_kernel = self.compute_target_kernel(y);
397
398 let kta = self.compute_kta(&kernel_matrix, &target_kernel);
400
401 let mut gradient = Array1::zeros(self.parameters.len());
403 let kernel_derivatives = self.compute_kernel_derivatives(x)?;
404
405 for i in 0..self.parameters.len() {
406 let kernel_derivative = &kernel_derivatives[i];
407 let kta_derivative =
408 self.compute_kta_derivative(&kernel_matrix, &target_kernel, kernel_derivative);
409 gradient[i] = kta_derivative;
410 }
411
412 Ok(GradientResult {
413 gradient,
414 objective_value: -kta, hessian: None,
416 })
417 }
418
419 fn update_parameters(&mut self, gradient: &Array1<f64>) -> Result<()> {
421 match self.optimizer {
422 GradientOptimizer::SGD => self.update_sgd(gradient),
423 GradientOptimizer::Momentum => self.update_momentum(gradient),
424 GradientOptimizer::Adam => self.update_adam(gradient),
425 GradientOptimizer::AdaGrad => self.update_adagrad(gradient),
426 GradientOptimizer::RMSprop => self.update_rmsprop(gradient),
427 GradientOptimizer::LBFGS => self.update_lbfgs(gradient),
428 }
429 }
430
431 fn update_sgd(&mut self, gradient: &Array1<f64>) -> Result<()> {
433 for i in 0..self.parameters.len() {
434 self.parameters[i] -= self.config.learning_rate * gradient[i];
435 }
436 self.apply_bounds();
437 Ok(())
438 }
439
440 fn update_momentum(&mut self, gradient: &Array1<f64>) -> Result<()> {
442 let velocity = self.velocity.as_mut().unwrap();
443
444 for i in 0..self.parameters.len() {
445 velocity[i] =
446 self.config.momentum * velocity[i] - self.config.learning_rate * gradient[i];
447 self.parameters[i] += velocity[i];
448 }
449
450 self.apply_bounds();
451 Ok(())
452 }
453
454 fn update_adam(&mut self, gradient: &Array1<f64>) -> Result<()> {
456 if self.adam_m.is_none() {
458 self.adam_m = Some(Array1::zeros(self.parameters.len()));
459 self.adam_v = Some(Array1::zeros(self.parameters.len()));
460 }
461
462 let adam_m = self.adam_m.as_mut().unwrap();
463 let adam_v = self.adam_v.as_mut().unwrap();
464
465 let beta1 = 0.9;
466 let beta2 = 0.999;
467 let epsilon = 1e-8;
468
469 for i in 0..self.parameters.len() {
470 adam_m[i] = beta1 * adam_m[i] + (1.0 - beta1) * gradient[i];
472
473 adam_v[i] = beta2 * adam_v[i] + (1.0 - beta2) * gradient[i] * gradient[i];
475
476 let m_hat = adam_m[i] / (1.0 - beta1.powi(self.iteration as i32 + 1));
478
479 let v_hat = adam_v[i] / (1.0 - beta2.powi(self.iteration as i32 + 1));
481
482 self.parameters[i] -= self.config.learning_rate * m_hat / (v_hat.sqrt() + epsilon);
484 }
485
486 self.apply_bounds();
487 Ok(())
488 }
489
490 fn update_adagrad(&mut self, gradient: &Array1<f64>) -> Result<()> {
492 if self.adam_v.is_none() {
493 self.adam_v = Some(Array1::zeros(self.parameters.len()));
494 }
495
496 let accumulated_grad = self.adam_v.as_mut().unwrap();
497 let epsilon = 1e-8;
498
499 for i in 0..self.parameters.len() {
500 accumulated_grad[i] += gradient[i] * gradient[i];
501 self.parameters[i] -=
502 self.config.learning_rate * gradient[i] / (accumulated_grad[i].sqrt() + epsilon);
503 }
504
505 self.apply_bounds();
506 Ok(())
507 }
508
509 fn update_rmsprop(&mut self, gradient: &Array1<f64>) -> Result<()> {
511 if self.adam_v.is_none() {
512 self.adam_v = Some(Array1::zeros(self.parameters.len()));
513 }
514
515 let accumulated_grad = self.adam_v.as_mut().unwrap();
516 let decay_rate = 0.9;
517 let epsilon = 1e-8;
518
519 for i in 0..self.parameters.len() {
520 accumulated_grad[i] =
521 decay_rate * accumulated_grad[i] + (1.0 - decay_rate) * gradient[i] * gradient[i];
522 self.parameters[i] -=
523 self.config.learning_rate * gradient[i] / (accumulated_grad[i].sqrt() + epsilon);
524 }
525
526 self.apply_bounds();
527 Ok(())
528 }
529
530 fn update_lbfgs(&mut self, gradient: &Array1<f64>) -> Result<()> {
532 for i in 0..self.parameters.len() {
534 self.parameters[i] -= self.config.learning_rate * gradient[i];
535 }
536 self.apply_bounds();
537 Ok(())
538 }
539
540 fn apply_bounds(&mut self) {
542 if let Some(bounds) = &self.parameter_bounds {
543 for i in 0..self.parameters.len() {
544 self.parameters[i] = self.parameters[i].max(bounds[[i, 0]]).min(bounds[[i, 1]]);
545 }
546 }
547 }
548
549 fn update_learning_rate(&mut self, iteration: usize) {
551 if iteration > 0 {
552 let current_loss = self.optimization_history.last().unwrap().0;
553 let previous_loss = self.optimization_history[self.optimization_history.len() - 2].0;
554
555 if current_loss > previous_loss {
556 self.config.learning_rate *= self.config.learning_rate_decay;
558 self.config.learning_rate =
559 self.config.learning_rate.max(self.config.min_learning_rate);
560 }
561 }
562 }
563
564 fn compute_kernel_matrix(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
566 let n_samples = x.nrows();
567 let mut kernel_matrix = Array2::zeros((n_samples, n_samples));
568
569 let gamma = self.parameters[0];
571
572 for i in 0..n_samples {
573 for j in i..n_samples {
574 let dist_sq = x
575 .row(i)
576 .iter()
577 .zip(x.row(j).iter())
578 .map(|(&a, &b)| (a - b).powi(2))
579 .sum::<f64>();
580
581 let kernel_value = (-gamma * dist_sq).exp();
582 kernel_matrix[[i, j]] = kernel_value;
583 kernel_matrix[[j, i]] = kernel_value;
584 }
585 }
586
587 Ok(kernel_matrix)
588 }
589
590 fn compute_kernel_derivatives(&self, x: &Array2<f64>) -> Result<Vec<Array2<f64>>> {
592 let n_samples = x.nrows();
593 let mut derivatives = Vec::new();
594
595 let gamma = self.parameters[0];
597 let mut gamma_derivative = Array2::zeros((n_samples, n_samples));
598
599 for i in 0..n_samples {
600 for j in i..n_samples {
601 let dist_sq = x
602 .row(i)
603 .iter()
604 .zip(x.row(j).iter())
605 .map(|(&a, &b)| (a - b).powi(2))
606 .sum::<f64>();
607
608 let kernel_value = (-gamma * dist_sq).exp();
609 let derivative_value = -dist_sq * kernel_value;
610
611 gamma_derivative[[i, j]] = derivative_value;
612 gamma_derivative[[j, i]] = derivative_value;
613 }
614 }
615
616 derivatives.push(gamma_derivative);
617
618 for _param_idx in 1..self.parameters.len() {
620 let derivative = Array2::zeros((n_samples, n_samples));
621 derivatives.push(derivative);
622 }
623
624 Ok(derivatives)
625 }
626
627 fn compute_kernel_alignment(&self, kernel_matrix: &Array2<f64>) -> f64 {
629 let n_samples = kernel_matrix.nrows();
630 let trace = (0..n_samples).map(|i| kernel_matrix[[i, i]]).sum::<f64>();
631 let frobenius_norm = kernel_matrix.iter().map(|&x| x * x).sum::<f64>().sqrt();
632
633 trace / frobenius_norm
634 }
635
636 fn compute_alignment_derivative(
638 &self,
639 kernel_matrix: &Array2<f64>,
640 kernel_derivative: &Array2<f64>,
641 ) -> f64 {
642 let n_samples = kernel_matrix.nrows();
643 let trace = (0..n_samples).map(|i| kernel_matrix[[i, i]]).sum::<f64>();
644 let trace_derivative = (0..n_samples)
645 .map(|i| kernel_derivative[[i, i]])
646 .sum::<f64>();
647
648 let frobenius_norm = kernel_matrix.iter().map(|&x| x * x).sum::<f64>().sqrt();
649 let frobenius_derivative = kernel_matrix
650 .iter()
651 .zip(kernel_derivative.iter())
652 .map(|(&k, &dk)| k * dk)
653 .sum::<f64>()
654 / frobenius_norm;
655
656 (trace_derivative * frobenius_norm - trace * frobenius_derivative)
657 / (frobenius_norm * frobenius_norm)
658 }
659
660 fn split_data(
662 &self,
663 x: &Array2<f64>,
664 y: &Array1<f64>,
665 start_idx: usize,
666 end_idx: usize,
667 ) -> (Array2<f64>, Array1<f64>, Array2<f64>, Array1<f64>) {
668 let n_samples = x.nrows();
669 let n_features = x.ncols();
670
671 let mut x_train = Array2::zeros((n_samples - (end_idx - start_idx), n_features));
672 let mut y_train = Array1::zeros(n_samples - (end_idx - start_idx));
673 let mut x_val = Array2::zeros((end_idx - start_idx, n_features));
674 let mut y_val = Array1::zeros(end_idx - start_idx);
675
676 let mut train_idx = 0;
677 let mut val_idx = 0;
678
679 for i in 0..n_samples {
680 if i >= start_idx && i < end_idx {
681 x_val.row_mut(val_idx).assign(&x.row(i));
682 y_val[val_idx] = y[i];
683 val_idx += 1;
684 } else {
685 x_train.row_mut(train_idx).assign(&x.row(i));
686 y_train[train_idx] = y[i];
687 train_idx += 1;
688 }
689 }
690
691 (x_train, y_train, x_val, y_val)
692 }
693
694 fn compute_fold_gradient(
696 &self,
697 _x_train: &Array2<f64>,
698 _y_train: &Array1<f64>,
699 _x_val: &Array2<f64>,
700 _y_val: &Array1<f64>,
701 ) -> Result<GradientResult> {
702 let gradient = Array1::zeros(self.parameters.len());
704 let objective_value = 0.0;
705
706 Ok(GradientResult {
707 gradient,
708 objective_value,
709 hessian: None,
710 })
711 }
712
713 fn compute_log_marginal_likelihood(
715 &self,
716 _kernel_matrix: &Array2<f64>,
717 _y: &Array1<f64>,
718 ) -> Result<f64> {
719 Ok(0.0)
721 }
722
723 fn compute_marginal_likelihood_derivative(
725 &self,
726 _kernel_matrix: &Array2<f64>,
727 _y: &Array1<f64>,
728 _kernel_derivative: &Array2<f64>,
729 ) -> Result<f64> {
730 Ok(0.0)
732 }
733
734 fn compute_kernel_ridge_loss(
736 &self,
737 _kernel_matrix: &Array2<f64>,
738 _y: &Array1<f64>,
739 ) -> Result<f64> {
740 Ok(0.0)
742 }
743
744 fn compute_kernel_ridge_derivative(
746 &self,
747 _kernel_matrix: &Array2<f64>,
748 _y: &Array1<f64>,
749 _kernel_derivative: &Array2<f64>,
750 ) -> Result<f64> {
751 Ok(0.0)
753 }
754
755 fn compute_mmd(&self, _x1: &ArrayView2<f64>, _x2: &ArrayView2<f64>) -> Result<f64> {
757 Ok(0.0)
759 }
760
761 fn compute_mmd_derivatives(
763 &self,
764 _x1: &ArrayView2<f64>,
765 _x2: &ArrayView2<f64>,
766 ) -> Result<Array1<f64>> {
767 Ok(Array1::zeros(self.parameters.len()))
769 }
770
771 fn compute_target_kernel(&self, y: &Array1<f64>) -> Array2<f64> {
773 let n_samples = y.len();
774 let mut target_kernel = Array2::zeros((n_samples, n_samples));
775
776 for i in 0..n_samples {
777 for j in 0..n_samples {
778 target_kernel[[i, j]] = y[i] * y[j];
779 }
780 }
781
782 target_kernel
783 }
784
785 fn compute_kta(&self, kernel_matrix: &Array2<f64>, target_kernel: &Array2<f64>) -> f64 {
787 let numerator = kernel_matrix
788 .iter()
789 .zip(target_kernel.iter())
790 .map(|(&k, &t)| k * t)
791 .sum::<f64>();
792
793 let k_norm = kernel_matrix.iter().map(|&k| k * k).sum::<f64>().sqrt();
794 let t_norm = target_kernel.iter().map(|&t| t * t).sum::<f64>().sqrt();
795
796 numerator / (k_norm * t_norm)
797 }
798
799 fn compute_kta_derivative(
801 &self,
802 _kernel_matrix: &Array2<f64>,
803 _target_kernel: &Array2<f64>,
804 _kernel_derivative: &Array2<f64>,
805 ) -> f64 {
806 0.0
808 }
809
810 pub fn get_parameters(&self) -> &Array1<f64> {
812 &self.parameters
813 }
814
815 pub fn get_optimization_history(&self) -> &Vec<(f64, Array1<f64>)> {
817 &self.optimization_history
818 }
819}
820
821pub struct GradientMultiKernelLearner {
823 base_learners: Vec<GradientKernelLearner>,
824 combination_weights: Array1<f64>,
825 config: GradientConfig,
826}
827
828impl GradientMultiKernelLearner {
829 pub fn new(n_kernels: usize, n_parameters_per_kernel: usize) -> Self {
831 let mut base_learners = Vec::new();
832 for _ in 0..n_kernels {
833 base_learners.push(GradientKernelLearner::new(n_parameters_per_kernel));
834 }
835
836 Self {
837 base_learners,
838 combination_weights: Array1::from_elem(n_kernels, 1.0 / n_kernels as f64),
839 config: GradientConfig::default(),
840 }
841 }
842
843 pub fn optimize(&mut self, x: &Array2<f64>, y: Option<&Array1<f64>>) -> Result<()> {
845 for learner in &mut self.base_learners {
847 learner.optimize(x, y)?;
848 }
849
850 self.optimize_combination_weights(x, y)?;
852
853 Ok(())
854 }
855
856 fn optimize_combination_weights(
858 &mut self,
859 _x: &Array2<f64>,
860 _y: Option<&Array1<f64>>,
861 ) -> Result<()> {
862 let n_kernels = self.base_learners.len();
864 self.combination_weights = Array1::from_elem(n_kernels, 1.0 / n_kernels as f64);
865 Ok(())
866 }
867
868 pub fn get_all_parameters(&self) -> Vec<&Array1<f64>> {
870 self.base_learners
871 .iter()
872 .map(|learner| learner.get_parameters())
873 .collect()
874 }
875
876 pub fn get_combination_weights(&self) -> &Array1<f64> {
878 &self.combination_weights
879 }
880}
881
882#[allow(non_snake_case)]
883#[cfg(test)]
884mod tests {
885 use super::*;
886 use scirs2_core::ndarray::Array2;
887
888 #[test]
889 fn test_gradient_config() {
890 let config = GradientConfig::default();
891 assert_eq!(config.learning_rate, 0.01);
892 assert_eq!(config.max_iterations, 1000);
893 assert!(config.tolerance > 0.0);
894 }
895
896 #[test]
897 fn test_gradient_kernel_learner() {
898 let mut learner = GradientKernelLearner::new(2)
899 .with_optimizer(GradientOptimizer::Adam)
900 .with_objective(KernelObjective::KernelAlignment);
901
902 let x =
903 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0]).unwrap();
904
905 learner.initialize_parameters(Array1::from_vec(vec![1.0, 0.5]));
906 let optimized_params = learner.optimize(&x, None).unwrap();
907
908 assert_eq!(optimized_params.len(), 2);
909 }
910
911 #[test]
912 fn test_gradient_optimizers() {
913 let optimizers = vec![
914 GradientOptimizer::SGD,
915 GradientOptimizer::Momentum,
916 GradientOptimizer::Adam,
917 GradientOptimizer::AdaGrad,
918 GradientOptimizer::RMSprop,
919 ];
920
921 for optimizer in optimizers {
922 let mut learner = GradientKernelLearner::new(1).with_optimizer(optimizer);
923
924 let x = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0]).unwrap();
925
926 learner.initialize_parameters(Array1::from_vec(vec![1.0]));
927 let result = learner.optimize(&x, None);
928 assert!(result.is_ok());
929 }
930 }
931
932 #[test]
933 fn test_parameter_bounds() {
934 let mut learner = GradientKernelLearner::new(2).with_bounds(
935 Array2::from_shape_vec(
936 (2, 2),
937 vec![
938 0.1, 10.0, 0.0, 5.0, ],
941 )
942 .unwrap(),
943 );
944
945 let x =
946 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0]).unwrap();
947
948 learner.initialize_parameters(Array1::from_vec(vec![100.0, -1.0]));
949 let optimized_params = learner.optimize(&x, None).unwrap();
950
951 assert!(optimized_params[0] >= 0.1 && optimized_params[0] <= 10.0);
952 assert!(optimized_params[1] >= 0.0 && optimized_params[1] <= 5.0);
953 }
954
955 #[test]
956 fn test_multi_kernel_learner() {
957 let mut multi_learner = GradientMultiKernelLearner::new(3, 2);
958
959 let x =
960 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0]).unwrap();
961
962 multi_learner.optimize(&x, None).unwrap();
963
964 let all_params = multi_learner.get_all_parameters();
965 assert_eq!(all_params.len(), 3);
966
967 let weights = multi_learner.get_combination_weights();
968 assert_eq!(weights.len(), 3);
969 }
970
971 #[test]
972 fn test_objective_functions() {
973 let objectives = vec![
974 KernelObjective::KernelAlignment,
975 KernelObjective::CrossValidationError,
976 KernelObjective::MarginalLikelihood,
977 KernelObjective::KernelRidgeLoss,
978 KernelObjective::MaximumMeanDiscrepancy,
979 KernelObjective::KernelTargetAlignment,
980 ];
981
982 for objective in objectives {
983 let mut learner = GradientKernelLearner::new(1).with_objective(objective.clone());
984
985 let x = Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0])
986 .unwrap();
987
988 let y = Array1::from_vec(vec![1.0, 0.0, 1.0, 0.0]);
989
990 learner.initialize_parameters(Array1::from_vec(vec![1.0]));
991
992 let result = if objective == KernelObjective::KernelAlignment
993 || objective == KernelObjective::MaximumMeanDiscrepancy
994 {
995 learner.optimize(&x, None)
996 } else {
997 learner.optimize(&x, Some(&y))
998 };
999
1000 assert!(result.is_ok());
1001 }
1002 }
1003
1004 #[test]
1005 fn test_adaptive_learning_rate() {
1006 let config = GradientConfig {
1007 adaptive_learning_rate: true,
1008 learning_rate_decay: 0.5,
1009 min_learning_rate: 1e-6,
1010 ..Default::default()
1011 };
1012
1013 let mut learner = GradientKernelLearner::new(1).with_config(config);
1014
1015 let x =
1016 Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0]).unwrap();
1017
1018 learner.initialize_parameters(Array1::from_vec(vec![1.0]));
1019 let result = learner.optimize(&x, None);
1020 assert!(result.is_ok());
1021 }
1022}