sklears_gaussian_process/
kernel_optimization.rs1use crate::kernels::Kernel;
7use crate::marginal_likelihood::log_marginal_likelihood;
8use scirs2_core::ndarray::{ArrayView1, ArrayView2};
10use sklears_core::error::{Result as SklResult, SklearsError};
11
12#[derive(Debug, Clone)]
14pub struct KernelOptimizer {
15 pub max_iter: usize,
17 pub learning_rate: f64,
19 pub tol: f64,
21 pub use_line_search: bool,
23 pub n_restarts: usize,
25 pub bounds: Option<Vec<(f64, f64)>>,
27 pub random_state: Option<u64>,
29}
30
31impl Default for KernelOptimizer {
32 fn default() -> Self {
33 Self {
34 max_iter: 100,
35 learning_rate: 0.01,
36 tol: 1e-6,
37 use_line_search: true,
38 n_restarts: 5,
39 bounds: None,
40 random_state: Some(42),
41 }
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct OptimizationResult {
48 pub optimized_params: Vec<f64>,
50 pub final_objective: f64,
52 pub n_iterations: usize,
54 pub converged: bool,
56 pub history: Vec<f64>,
58 pub final_gradient_norm: f64,
60}
61
62impl KernelOptimizer {
63 pub fn new() -> Self {
65 Self::default()
66 }
67
68 pub fn max_iter(mut self, max_iter: usize) -> Self {
70 self.max_iter = max_iter;
71 self
72 }
73
74 pub fn learning_rate(mut self, learning_rate: f64) -> Self {
76 self.learning_rate = learning_rate;
77 self
78 }
79
80 pub fn tol(mut self, tol: f64) -> Self {
82 self.tol = tol;
83 self
84 }
85
86 pub fn use_line_search(mut self, use_line_search: bool) -> Self {
88 self.use_line_search = use_line_search;
89 self
90 }
91
92 pub fn n_restarts(mut self, n_restarts: usize) -> Self {
94 self.n_restarts = n_restarts;
95 self
96 }
97
98 pub fn bounds(mut self, bounds: Vec<(f64, f64)>) -> Self {
100 self.bounds = Some(bounds);
101 self
102 }
103
104 pub fn random_state(mut self, random_state: Option<u64>) -> Self {
106 self.random_state = random_state;
107 self
108 }
109
110 pub fn optimize_kernel(
112 &self,
113 kernel: &mut Box<dyn Kernel>,
114 X: ArrayView2<f64>,
115 y: ArrayView1<f64>,
116 ) -> SklResult<OptimizationResult> {
117 let mut best_result = None;
118 let mut best_objective = f64::INFINITY;
119
120 for restart in 0..self.n_restarts {
122 let seed = self.random_state.map(|s| s + restart as u64);
123
124 let initial_params = if restart == 0 {
126 kernel.get_params()
127 } else {
128 self.random_initialize_params(kernel, seed)?
129 };
130
131 let result = self.optimize_from_initial(kernel, &X, &y, initial_params)?;
133
134 if result.final_objective < best_objective {
136 best_objective = result.final_objective;
137 best_result = Some(result);
138 }
139 }
140
141 let final_result = best_result.ok_or_else(|| {
142 SklearsError::InvalidOperation("No optimization result found".to_string())
143 })?;
144
145 kernel.set_params(&final_result.optimized_params)?;
147
148 Ok(final_result)
149 }
150
151 fn optimize_from_initial(
153 &self,
154 kernel: &mut Box<dyn Kernel>,
155 X: &ArrayView2<f64>,
156 y: &ArrayView1<f64>,
157 initial_params: Vec<f64>,
158 ) -> SklResult<OptimizationResult> {
159 let mut params = initial_params;
160 let mut history = Vec::new();
161 let mut converged = false;
162
163 let mut m = vec![0.0; params.len()]; let mut v = vec![0.0; params.len()]; let beta1 = 0.9;
167 let beta2 = 0.999;
168 let epsilon = 1e-8;
169
170 for iteration in 0..self.max_iter {
171 kernel.set_params(¶ms)?;
173
174 let (objective, gradient) = self.compute_objective_and_gradient(kernel, X, y)?;
176 history.push(objective);
177
178 let gradient_norm = gradient.iter().map(|x| x * x).sum::<f64>().sqrt();
180 if gradient_norm < self.tol {
181 converged = true;
182 break;
183 }
184
185 let clipped_gradient = self.apply_bounds_to_gradient(¶ms, &gradient);
187
188 for i in 0..params.len() {
190 m[i] = beta1 * m[i] + (1.0 - beta1) * clipped_gradient[i];
191 v[i] = beta2 * v[i] + (1.0 - beta2) * clipped_gradient[i] * clipped_gradient[i];
192
193 let m_hat = m[i] / (1.0 - beta1.powi(iteration as i32 + 1));
194 let v_hat = v[i] / (1.0 - beta2.powi(iteration as i32 + 1));
195
196 let update = self.learning_rate * m_hat / (v_hat.sqrt() + epsilon);
197 params[i] -= update;
198 }
199
200 self.apply_bounds_to_params(&mut params);
202
203 if self.use_line_search && iteration % 10 == 0 {
205 params = self.line_search(kernel, X, y, ¶ms, &clipped_gradient)?;
206 }
207 }
208
209 let final_gradient = self.compute_gradient(kernel, X, y)?;
210 let final_gradient_norm = final_gradient.iter().map(|x| x * x).sum::<f64>().sqrt();
211
212 Ok(OptimizationResult {
213 optimized_params: params,
214 final_objective: history.last().copied().unwrap_or(f64::INFINITY),
215 n_iterations: history.len(),
216 converged,
217 history,
218 final_gradient_norm,
219 })
220 }
221
222 #[allow(non_snake_case)]
224 fn compute_objective_and_gradient(
225 &self,
226 kernel: &Box<dyn Kernel>,
227 X: &ArrayView2<f64>,
228 y: &ArrayView1<f64>,
229 ) -> SklResult<(f64, Vec<f64>)> {
230 let X_owned = X.to_owned();
232 let y_owned = y.to_owned();
233 let neg_log_ml = -log_marginal_likelihood(&X_owned.view(), &y_owned.view(), kernel, 1e-6)?;
234
235 let gradient = self.compute_gradient(kernel, X, y)?;
237
238 Ok((neg_log_ml, gradient))
239 }
240
241 #[allow(non_snake_case)]
243 fn compute_gradient(
244 &self,
245 kernel: &Box<dyn Kernel>,
246 X: &ArrayView2<f64>,
247 y: &ArrayView1<f64>,
248 ) -> SklResult<Vec<f64>> {
249 let params = kernel.get_params();
250 let mut gradient = vec![0.0; params.len()];
251 let h = 1e-6; let X_owned = X.to_owned();
254 let y_owned = y.to_owned();
255
256 for i in 0..params.len() {
258 let mut params_plus = params.clone();
259 let mut params_minus = params.clone();
260
261 params_plus[i] += h;
262 params_minus[i] -= h;
263
264 let mut kernel_plus = kernel.clone_box();
266 let mut kernel_minus = kernel.clone_box();
267
268 kernel_plus.set_params(¶ms_plus)?;
269 kernel_minus.set_params(¶ms_minus)?;
270
271 let f_plus =
272 -log_marginal_likelihood(&X_owned.view(), &y_owned.view(), &kernel_plus, 1e-6)?;
273 let f_minus =
274 -log_marginal_likelihood(&X_owned.view(), &y_owned.view(), &kernel_minus, 1e-6)?;
275
276 gradient[i] = (f_plus - f_minus) / (2.0 * h);
277 }
278
279 Ok(gradient)
280 }
281
282 fn apply_bounds_to_gradient(&self, params: &Vec<f64>, gradient: &Vec<f64>) -> Vec<f64> {
284 if let Some(bounds) = &self.bounds {
285 let mut clipped = gradient.clone();
286 for i in 0..params.len() {
287 if i < bounds.len() {
288 let (lower, upper) = bounds[i];
289 if params[i] <= lower && gradient[i] < 0.0 {
291 clipped[i] = 0.0;
292 }
293 if params[i] >= upper && gradient[i] > 0.0 {
295 clipped[i] = 0.0;
296 }
297 }
298 }
299 clipped
300 } else {
301 gradient.clone()
302 }
303 }
304
305 fn apply_bounds_to_params(&self, params: &mut Vec<f64>) {
307 if let Some(bounds) = &self.bounds {
308 for i in 0..params.len() {
309 if i < bounds.len() {
310 let (lower, upper) = bounds[i];
311 params[i] = params[i].max(lower).min(upper);
312 }
313 }
314 }
315 }
316
317 #[allow(non_snake_case)]
319 fn line_search(
320 &self,
321 kernel: &mut Box<dyn Kernel>,
322 X: &ArrayView2<f64>,
323 y: &ArrayView1<f64>,
324 params: &Vec<f64>,
325 direction: &Vec<f64>,
326 ) -> SklResult<Vec<f64>> {
327 let mut alpha = 1.0;
328 let rho = 0.5;
329 let c1 = 1e-4;
330
331 kernel.set_params(params)?;
333 let X_owned = X.to_owned();
334 let y_owned = y.to_owned();
335 let f0 = -log_marginal_likelihood(&X_owned.view(), &y_owned.view(), kernel, 1e-6)?;
336
337 let grad_f = self.compute_gradient(kernel, X, y)?;
339 let dir_deriv: f64 = grad_f
340 .iter()
341 .zip(direction.iter())
342 .map(|(g, d)| g * d)
343 .sum();
344
345 for _ in 0..20 {
346 let new_params: Vec<f64> = params
348 .iter()
349 .zip(direction.iter())
350 .map(|(p, d)| p - alpha * d)
351 .collect();
352
353 let mut bounded_params = new_params;
355 self.apply_bounds_to_params(&mut bounded_params);
356
357 kernel.set_params(&bounded_params)?;
358 let f_new = -log_marginal_likelihood(&X_owned.view(), &y_owned.view(), kernel, 1e-6)?;
359
360 if f_new <= f0 + c1 * alpha * dir_deriv {
362 return Ok(bounded_params);
363 }
364
365 alpha *= rho;
366 }
367
368 Ok(params.clone())
370 }
371
372 fn random_initialize_params(
374 &self,
375 kernel: &Box<dyn Kernel>,
376 seed: Option<u64>,
377 ) -> SklResult<Vec<f64>> {
378 let current_params = kernel.get_params();
379 let mut params = current_params.clone();
380
381 let scale = 0.5; for i in 0..params.len() {
384 let noise = if let Some(s) = seed {
385 let a = 1664525u64;
387 let c = 1013904223u64;
388 let m = 2u64.pow(32);
389 let x = (a.wrapping_mul(s + i as u64).wrapping_add(c)) % m;
390 (x as f64 / m as f64 - 0.5) * 2.0 } else {
392 0.0 };
394
395 params[i] *= 1.0 + scale * noise;
396
397 params[i] = params[i].abs().max(1e-6);
399 }
400
401 self.apply_bounds_to_params(&mut params);
403
404 Ok(params)
405 }
406}
407
408pub fn optimize_kernel_parameters(
410 kernel: &mut Box<dyn Kernel>,
411 X: ArrayView2<f64>,
412 y: ArrayView1<f64>,
413 optimizer_config: Option<KernelOptimizer>,
414) -> SklResult<OptimizationResult> {
415 let optimizer = optimizer_config.unwrap_or_default();
416 optimizer.optimize_kernel(kernel, X, y)
417}
418
419#[allow(non_snake_case)]
420#[cfg(test)]
421mod tests {
422 use super::*;
423 use crate::kernels::{Kernel, RBF};
424 use scirs2_core::ndarray::{Array1, Array2};
426
427 #[test]
428 fn test_kernel_optimizer_creation() {
429 let optimizer = KernelOptimizer::new();
430 assert_eq!(optimizer.max_iter, 100);
431 assert_eq!(optimizer.n_restarts, 5);
432 assert!(optimizer.use_line_search);
433 }
434
435 #[test]
436 #[allow(non_snake_case)]
437 fn test_kernel_optimization() {
438 let optimizer = KernelOptimizer::new()
439 .max_iter(10)
440 .n_restarts(1)
441 .learning_rate(0.1);
442
443 let X = Array2::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
445 let y = Array1::from_vec(vec![1.0, 4.0, 9.0, 16.0, 25.0]);
446
447 let mut kernel: Box<dyn Kernel> = Box::new(RBF::new(1.0));
448
449 let result = optimizer
450 .optimize_kernel(&mut kernel, X.view(), y.view())
451 .unwrap();
452
453 assert!(result.final_objective.is_finite());
454 assert_eq!(result.n_iterations, 10); assert!(!result.history.is_empty());
456 }
457
458 #[test]
459 fn test_bounds_application() {
460 let bounds = vec![(0.1, 10.0)];
461 let optimizer = KernelOptimizer::new().bounds(bounds);
462
463 let mut params = vec![-1.0]; optimizer.apply_bounds_to_params(&mut params);
465 assert_eq!(params[0], 0.1);
466
467 params[0] = 15.0; optimizer.apply_bounds_to_params(&mut params);
469 assert_eq!(params[0], 10.0);
470 }
471
472 #[test]
473 #[allow(non_snake_case)]
474 fn test_gradient_computation() {
475 let optimizer = KernelOptimizer::new();
476 let kernel: Box<dyn Kernel> = Box::new(RBF::new(1.0));
477
478 let X = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
479 let y = Array1::from_vec(vec![1.0, 2.0, 3.0]);
480
481 let gradient = optimizer
482 .compute_gradient(&kernel, &X.view(), &y.view())
483 .unwrap();
484
485 assert_eq!(gradient.len(), 1); assert!(gradient[0].is_finite());
487 }
488
489 #[test]
490 fn test_random_initialization() {
491 let optimizer = KernelOptimizer::new();
492 let kernel: Box<dyn Kernel> = Box::new(RBF::new(1.0));
493
494 let params1 = optimizer
495 .random_initialize_params(&kernel, Some(42))
496 .unwrap();
497 let params2 = optimizer
498 .random_initialize_params(&kernel, Some(42))
499 .unwrap();
500 let params3 = optimizer
501 .random_initialize_params(&kernel, Some(43))
502 .unwrap();
503
504 assert_eq!(params1[0], params2[0]);
506 assert_ne!(params1[0], params3[0]);
508 assert!(params1[0] > 0.0);
510 assert!(params3[0] > 0.0);
511 }
512}