1pub mod classifier;
18pub use classifier::{
19 FittedGaussianProcessClassifier, FittedMulticlassGaussianProcessClassifier,
20 GaussianProcessClassifier, MulticlassGaussianProcessClassifier,
21};
22
23use anofox_ml_core::{Fit, Predict, Result, RustMlError};
24use faer::linalg::solvers::Solve;
25use faer::{Mat, Side};
26use ndarray::{Array1, Array2};
27
28pub enum GpKernel {
30 Rbf {
31 length_scale: f64,
32 signal_var: f64,
33 },
34 Matern {
37 length_scale: f64,
38 signal_var: f64,
39 nu: f64,
40 },
41 RationalQuadratic {
42 length_scale: f64,
43 signal_var: f64,
44 alpha: f64,
45 },
46 White {
48 noise_level: f64,
49 },
50 Constant {
51 value: f64,
52 },
53 Sum(Box<GpKernel>, Box<GpKernel>),
54 Product(Box<GpKernel>, Box<GpKernel>),
55}
56
57impl GpKernel {
58 fn compute(&self, a: &[f64], b: &[f64]) -> f64 {
63 match self {
64 GpKernel::Rbf {
65 length_scale,
66 signal_var,
67 } => {
68 let sd: f64 = a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum();
69 signal_var * (-0.5 * sd / (length_scale * length_scale)).exp()
70 }
71 GpKernel::Matern {
72 length_scale,
73 signal_var,
74 nu,
75 } => {
76 let d: f64 = a
77 .iter()
78 .zip(b.iter())
79 .map(|(x, y)| (x - y).powi(2))
80 .sum::<f64>()
81 .sqrt();
82 let r = d / length_scale;
83 let v = if (nu - 0.5).abs() < 1e-9 {
84 (-r).exp()
85 } else if (nu - 1.5).abs() < 1e-9 {
86 let sqrt3_r = 3.0_f64.sqrt() * r;
87 (1.0 + sqrt3_r) * (-sqrt3_r).exp()
88 } else if (nu - 2.5).abs() < 1e-9 {
89 let sqrt5_r = 5.0_f64.sqrt() * r;
90 (1.0 + sqrt5_r + 5.0 / 3.0 * r * r) * (-sqrt5_r).exp()
91 } else {
92 (-0.5 * (d * d) / (length_scale * length_scale)).exp()
94 };
95 signal_var * v
96 }
97 GpKernel::RationalQuadratic {
98 length_scale,
99 signal_var,
100 alpha,
101 } => {
102 let sd: f64 = a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum();
103 let base = 1.0 + sd / (2.0 * alpha * length_scale * length_scale);
104 signal_var * base.powf(-alpha)
105 }
106 GpKernel::White { noise_level } => {
107 if a.iter().zip(b.iter()).all(|(x, y)| x == y) {
109 *noise_level
110 } else {
111 0.0
112 }
113 }
114 GpKernel::Constant { value } => *value,
115 GpKernel::Sum(k1, k2) => k1.compute(a, b) + k2.compute(a, b),
116 GpKernel::Product(k1, k2) => k1.compute(a, b) * k2.compute(a, b),
117 }
118 }
119
120 pub fn product(self, other: GpKernel) -> GpKernel {
122 GpKernel::Product(Box::new(self), Box::new(other))
123 }
124 pub fn add(self, other: GpKernel) -> GpKernel {
125 GpKernel::Sum(Box::new(self), Box::new(other))
126 }
127}
128
129pub struct GaussianProcessRegressor {
130 pub kernel: GpKernel,
131 pub alpha: f64,
132 pub normalize_y: bool,
133}
134
135impl GaussianProcessRegressor {
136 pub fn new(kernel: GpKernel) -> Self {
137 Self {
138 kernel,
139 alpha: 1e-10,
140 normalize_y: false,
141 }
142 }
143 pub fn with_alpha(mut self, a: f64) -> Self {
144 self.alpha = a;
145 self
146 }
147 pub fn with_normalize_y(mut self, v: bool) -> Self {
148 self.normalize_y = v;
149 self
150 }
151}
152
153pub struct FittedGaussianProcessRegressor {
154 pub x_train: Array2<f64>,
155 pub y_train: Array1<f64>,
156 pub l_lower: Mat<f64>,
158 pub dual: Array1<f64>,
160 pub kernel: GpKernel,
161 pub y_mean: f64,
162 pub y_std: f64,
163}
164
165pub(crate) fn build_gram(x_a: &Array2<f64>, x_b: &Array2<f64>, kernel: &GpKernel) -> Array2<f64> {
166 let na = x_a.nrows();
167 let nb = x_b.nrows();
168 let mut out = Array2::<f64>::zeros((na, nb));
169 for i in 0..na {
170 let ai = x_a.row(i).to_owned();
171 for j in 0..nb {
172 let bj = x_b.row(j).to_owned();
173 out[[i, j]] = kernel.compute(ai.as_slice().unwrap(), bj.as_slice().unwrap());
174 }
175 }
176 out
177}
178
179fn clone_kernel(k: &GpKernel) -> GpKernel {
180 match k {
181 GpKernel::Rbf {
182 length_scale,
183 signal_var,
184 } => GpKernel::Rbf {
185 length_scale: *length_scale,
186 signal_var: *signal_var,
187 },
188 GpKernel::Matern {
189 length_scale,
190 signal_var,
191 nu,
192 } => GpKernel::Matern {
193 length_scale: *length_scale,
194 signal_var: *signal_var,
195 nu: *nu,
196 },
197 GpKernel::RationalQuadratic {
198 length_scale,
199 signal_var,
200 alpha,
201 } => GpKernel::RationalQuadratic {
202 length_scale: *length_scale,
203 signal_var: *signal_var,
204 alpha: *alpha,
205 },
206 GpKernel::White { noise_level } => GpKernel::White {
207 noise_level: *noise_level,
208 },
209 GpKernel::Constant { value } => GpKernel::Constant { value: *value },
210 GpKernel::Sum(a, b) => GpKernel::Sum(Box::new(clone_kernel(a)), Box::new(clone_kernel(b))),
211 GpKernel::Product(a, b) => {
212 GpKernel::Product(Box::new(clone_kernel(a)), Box::new(clone_kernel(b)))
213 }
214 }
215}
216
217impl Fit<f64> for GaussianProcessRegressor {
218 type Fitted = FittedGaussianProcessRegressor;
219
220 fn fit(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
221 let n = x.nrows();
222 if x.nrows() != y.len() {
223 return Err(RustMlError::ShapeMismatch(format!(
224 "X has {} rows but y has {}",
225 x.nrows(),
226 y.len()
227 )));
228 }
229 let (y_mean, y_std, y_norm) = if self.normalize_y {
230 let m = y.sum() / n as f64;
231 let v: f64 = y.iter().map(|v| (v - m).powi(2)).sum::<f64>() / n as f64;
232 let s = v.sqrt().max(1e-12);
233 let yn = y.mapv(|v| (v - m) / s);
234 (m, s, yn)
235 } else {
236 (0.0, 1.0, y.clone())
237 };
238
239 let mut k = build_gram(x, x, &self.kernel);
240 for i in 0..n {
241 k[[i, i]] += self.alpha;
242 }
243 let km = Mat::from_fn(n, n, |i, j| k[[i, j]]);
244 let llt = faer::linalg::solvers::Llt::new(km.as_ref(), Side::Lower)
245 .map_err(|e| RustMlError::InvalidParameter(format!("Cholesky failed: {e:?}")))?;
246 let ym = Mat::from_fn(n, 1, |i, _| y_norm[i]);
247 let sol = llt.solve(&ym);
248 let dual: Array1<f64> = Array1::from_vec((0..n).map(|i| sol[(i, 0)]).collect());
249 let lower = llt.L();
251 let l = Mat::from_fn(n, n, |i, j| lower[(i, j)]);
252
253 Ok(FittedGaussianProcessRegressor {
254 x_train: x.clone(),
255 y_train: y_norm,
256 l_lower: l,
257 dual,
258 kernel: clone_kernel(&self.kernel),
259 y_mean,
260 y_std,
261 })
262 }
263}
264
265impl Predict<f64> for FittedGaussianProcessRegressor {
266 fn predict(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
267 if x.ncols() != self.x_train.ncols() {
268 return Err(RustMlError::ShapeMismatch(format!(
269 "expected {} features, got {}",
270 self.x_train.ncols(),
271 x.ncols()
272 )));
273 }
274 let k_star = build_gram(x, &self.x_train, &self.kernel);
275 let mean_norm = k_star.dot(&self.dual);
276 Ok(mean_norm.mapv(|v| v * self.y_std + self.y_mean))
277 }
278}
279
280pub fn log_marginal_likelihood(
287 x: &Array2<f64>,
288 y: &Array1<f64>,
289 kernel: &GpKernel,
290 alpha: f64,
291) -> Result<f64> {
292 let n = x.nrows();
293 if y.len() != n {
294 return Err(RustMlError::ShapeMismatch(format!(
295 "X has {} rows but y has {}",
296 n,
297 y.len()
298 )));
299 }
300 let mut k = build_gram(x, x, kernel);
301 for i in 0..n {
302 k[[i, i]] += alpha;
303 }
304 let km = Mat::from_fn(n, n, |i, j| k[[i, j]]);
305 let llt = match faer::linalg::solvers::Llt::new(km.as_ref(), Side::Lower) {
306 Ok(llt) => llt,
307 Err(_) => return Ok(f64::NEG_INFINITY),
309 };
310 let ym = Mat::from_fn(n, 1, |i, _| y[i]);
311 let sol = llt.solve(&ym);
312 let mut yt_k_inv_y = 0.0;
313 for i in 0..n {
314 yt_k_inv_y += y[i] * sol[(i, 0)];
315 }
316 let lower = llt.L();
317 let mut log_det = 0.0;
318 for i in 0..n {
319 log_det += lower[(i, i)].abs().ln();
320 }
321 let log_det = 2.0 * log_det;
322 let two_pi = 2.0 * std::f64::consts::PI;
323 Ok(-0.5 * yt_k_inv_y - 0.5 * log_det - 0.5 * n as f64 * two_pi.ln())
324}
325
326#[derive(Debug, Clone)]
328pub struct KernelOptimResult {
329 pub log_params: Vec<f64>,
330 pub log_marginal_likelihood: f64,
331 pub n_iter: usize,
332 pub converged: bool,
333}
334
335pub fn optimize_kernel_lbfgs(
352 x: &Array2<f64>,
353 y: &Array1<f64>,
354 alpha: f64,
355 log_params_init: &[f64],
356 build: impl Fn(&[f64]) -> GpKernel,
357 n_iter: usize,
358 fd_step: f64,
359 grad_tol: f64,
360) -> Result<KernelOptimResult> {
361 let n_params = log_params_init.len();
362 let neg_lml =
363 |p: &[f64]| -> Result<f64> { log_marginal_likelihood(x, y, &build(p), alpha).map(|v| -v) };
364 let grad_fd = |p: &[f64], f0: f64| -> Result<Vec<f64>> {
365 let mut g = vec![0.0_f64; n_params];
366 let mut p_mut = p.to_vec();
367 for i in 0..n_params {
368 let orig = p_mut[i];
369 p_mut[i] = orig + fd_step;
370 let fp = neg_lml(&p_mut)?;
371 p_mut[i] = orig;
372 g[i] = (fp - f0) / fd_step;
375 }
376 Ok(g)
377 };
378
379 let mut p = log_params_init.to_vec();
380 let mut f = neg_lml(&p)?;
381 let mut g = grad_fd(&p, f)?;
382 let mut h = vec![vec![0.0_f64; n_params]; n_params];
384 for i in 0..n_params {
385 h[i][i] = 1.0;
386 }
387
388 let mut converged = false;
389 let mut iters = 0;
390 for it in 0..n_iter {
391 iters = it + 1;
392 let mut d = vec![0.0_f64; n_params];
394 for i in 0..n_params {
395 let mut s = 0.0;
396 for j in 0..n_params {
397 s -= h[i][j] * g[j];
398 }
399 d[i] = s;
400 }
401 let g_dot_d: f64 = g.iter().zip(d.iter()).map(|(a, b)| a * b).sum();
403 if g_dot_d >= 0.0 {
404 for i in 0..n_params {
406 for j in 0..n_params {
407 h[i][j] = 0.0;
408 }
409 h[i][i] = 1.0;
410 }
411 for i in 0..n_params {
412 d[i] = -g[i];
413 }
414 }
415 let mut step = 1.0_f64;
416 let c1 = 1e-4;
417 let mut p_new;
418 let mut f_new;
419 let mut ls_iter = 0;
420 loop {
421 ls_iter += 1;
422 p_new = p
423 .iter()
424 .zip(d.iter())
425 .map(|(a, b)| a + step * b)
426 .collect::<Vec<_>>();
427 f_new = neg_lml(&p_new)?;
428 if f_new.is_finite() && f_new <= f + c1 * step * g_dot_d {
429 break;
430 }
431 step *= 0.5;
432 if step < 1e-12 || ls_iter > 50 {
433 break;
435 }
436 }
437
438 let s_vec: Vec<f64> = p_new.iter().zip(p.iter()).map(|(a, b)| a - b).collect();
439 let g_new = grad_fd(&p_new, f_new)?;
440 let y_vec: Vec<f64> = g_new.iter().zip(g.iter()).map(|(a, b)| a - b).collect();
441 let sy: f64 = s_vec.iter().zip(y_vec.iter()).map(|(a, b)| a * b).sum();
442
443 if sy > 1e-12 {
444 let rho = 1.0 / sy;
448 let mut hy = vec![0.0_f64; n_params];
449 for i in 0..n_params {
450 for j in 0..n_params {
451 hy[i] += h[i][j] * y_vec[j];
452 }
453 }
454 let yhy: f64 = y_vec.iter().zip(hy.iter()).map(|(a, b)| a * b).sum();
455 for i in 0..n_params {
456 for j in 0..n_params {
457 h[i][j] = h[i][j] - rho * (s_vec[i] * hy[j] + hy[i] * s_vec[j])
458 + rho * (rho * yhy + 1.0) * s_vec[i] * s_vec[j];
459 }
460 }
461 }
462
463 p = p_new;
464 f = f_new;
465 g = g_new;
466
467 let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
468 if gnorm < grad_tol {
469 converged = true;
470 break;
471 }
472 }
473
474 Ok(KernelOptimResult {
475 log_params: p,
476 log_marginal_likelihood: -f,
477 n_iter: iters,
478 converged,
479 })
480}
481
482pub fn optimize_rbf_length_scale(
491 x: &Array2<f64>,
492 y: &Array1<f64>,
493 signal_var: f64,
494 alpha: f64,
495 log_lo: f64,
496 log_hi: f64,
497 n_iter: usize,
498) -> Result<f64> {
499 let phi = (1.0 + 5.0_f64.sqrt()) / 2.0;
500 let resphi = 2.0 - phi; let mut a = log_lo;
502 let mut b = log_hi;
503 let mut c = b - resphi * (b - a);
504 let mut d = a + resphi * (b - a);
505 let f = |log_ls: f64| -> Result<f64> {
506 let k = GpKernel::Rbf {
508 length_scale: log_ls.exp(),
509 signal_var,
510 };
511 log_marginal_likelihood(x, y, &k, alpha).map(|v| -v)
512 };
513 let mut fc = f(c)?;
514 let mut fd = f(d)?;
515 for _ in 0..n_iter {
516 if fc < fd {
517 b = d;
518 d = c;
519 fd = fc;
520 c = b - resphi * (b - a);
521 fc = f(c)?;
522 } else {
523 a = c;
524 c = d;
525 fc = fd;
526 d = a + resphi * (b - a);
527 fd = f(d)?;
528 }
529 }
530 Ok(0.5 * (a + b)).map(|log_ls| log_ls.exp())
531}
532
533impl FittedGaussianProcessRegressor {
534 pub fn predict_std(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
536 let n_train = self.x_train.nrows();
537 if x.ncols() != self.x_train.ncols() {
538 return Err(RustMlError::ShapeMismatch(format!(
539 "expected {} features, got {}",
540 self.x_train.ncols(),
541 x.ncols()
542 )));
543 }
544 let n_new = x.nrows();
545 let k_star = build_gram(x, &self.x_train, &self.kernel);
546 let mut std_out = Array1::<f64>::zeros(n_new);
547 for i in 0..n_new {
548 let rhs = Mat::from_fn(n_train, 1, |j, _| k_star[[i, j]]);
549 let n = n_train;
550 let mut v = vec![0.0_f64; n];
551 for r in 0..n {
552 let mut s = rhs[(r, 0)];
553 for c in 0..r {
554 s -= self.l_lower[(r, c)] * v[c];
555 }
556 v[r] = s / self.l_lower[(r, r)].max(1e-12);
557 }
558 let v_sq: f64 = v.iter().map(|x| x * x).sum();
559 let xi = x.row(i).to_owned();
560 let k_xx = self
561 .kernel
562 .compute(xi.as_slice().unwrap(), xi.as_slice().unwrap());
563 let var = (k_xx - v_sq).max(0.0);
564 std_out[i] = var.sqrt() * self.y_std;
565 }
566 Ok(std_out)
567 }
568}
569
570#[cfg(test)]
571mod tests {
572 use super::*;
573 use ndarray::array;
574
575 #[test]
576 fn test_gp_rbf_interpolates_with_low_noise() {
577 let x = Array2::from_shape_vec((6, 1), vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
578 let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin());
579 let kernel = GpKernel::Rbf {
580 length_scale: 1.0,
581 signal_var: 1.0,
582 };
583 let fitted = GaussianProcessRegressor::new(kernel)
584 .with_alpha(1e-8)
585 .fit(&x, &y)
586 .unwrap();
587 let p = fitted.predict(&x).unwrap();
588 for i in 0..6 {
589 assert!((p[i] - y[i]).abs() < 1e-4, "[{i}]: {} vs {}", p[i], y[i]);
590 }
591 let _ = array![1.0_f64];
592 }
593
594 #[test]
595 fn test_gp_matern_nu_2p5_interpolates() {
596 let x = Array2::from_shape_vec((5, 1), vec![0.0, 1.0, 2.0, 3.0, 4.0]).unwrap();
597 let y: Array1<f64> = x.column(0).mapv(|v: f64| v.cos());
598 let kernel = GpKernel::Matern {
599 length_scale: 1.0,
600 signal_var: 1.0,
601 nu: 2.5,
602 };
603 let fitted = GaussianProcessRegressor::new(kernel)
604 .with_alpha(1e-8)
605 .fit(&x, &y)
606 .unwrap();
607 let p = fitted.predict(&x).unwrap();
608 for i in 0..5 {
609 assert!((p[i] - y[i]).abs() < 1e-3, "[{i}]: {} vs {}", p[i], y[i]);
610 }
611 }
612
613 #[test]
614 fn test_gp_rational_quadratic_runs() {
615 let x = Array2::from_shape_vec((5, 1), vec![0.0, 1.0, 2.0, 3.0, 4.0]).unwrap();
616 let y = array![0.0, 1.0, 0.5, -0.5, 0.0];
617 let kernel = GpKernel::RationalQuadratic {
618 length_scale: 1.0,
619 signal_var: 1.0,
620 alpha: 0.5,
621 };
622 let fitted = GaussianProcessRegressor::new(kernel)
623 .with_alpha(1e-6)
624 .fit(&x, &y)
625 .unwrap();
626 let p = fitted.predict(&x).unwrap();
627 for v in p.iter() {
628 assert!(v.is_finite());
629 }
630 }
631
632 #[test]
633 fn test_optimize_rbf_length_scale_picks_sensible_value() {
634 let x =
637 Array2::from_shape_vec((20, 1), (0..20).map(|i| (i as f64) * 0.3).collect()).unwrap();
638 let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin());
639 let best = optimize_rbf_length_scale(&x, &y, 1.0, 1e-6, -2.0, 2.0, 30).unwrap();
640 assert!(best > 0.3 && best < 4.0, "best length_scale = {best}");
641 }
642
643 #[test]
644 fn test_optimize_kernel_lbfgs_rbf_two_params() {
645 let x =
647 Array2::from_shape_vec((20, 1), (0..20).map(|i| (i as f64) * 0.3).collect()).unwrap();
648 let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin());
649 let init = vec![0.0_f64, 0.0]; let res = optimize_kernel_lbfgs(
651 &x,
652 &y,
653 1e-6,
654 &init,
655 |p| GpKernel::Rbf {
656 length_scale: p[0].exp(),
657 signal_var: p[1].exp(),
658 },
659 50,
660 1e-4,
661 1e-3,
662 )
663 .unwrap();
664 let lml_init = log_marginal_likelihood(
666 &x,
667 &y,
668 &GpKernel::Rbf {
669 length_scale: 1.0,
670 signal_var: 1.0,
671 },
672 1e-6,
673 )
674 .unwrap();
675 assert!(
676 res.log_marginal_likelihood >= lml_init - 1e-9,
677 "optimiser regressed: init {} → final {}",
678 lml_init,
679 res.log_marginal_likelihood
680 );
681 let ls_opt = res.log_params[0].exp();
683 assert!(ls_opt > 0.1 && ls_opt < 20.0, "length_scale {ls_opt}");
684 }
685
686 #[test]
687 fn test_log_marginal_likelihood_monotonic_in_data() {
688 let x = Array2::from_shape_vec((10, 1), (0..10).map(|i| i as f64 * 0.3).collect()).unwrap();
690 let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin());
691 let good = GpKernel::Rbf {
692 length_scale: 1.0,
693 signal_var: 1.0,
694 };
695 let bad = GpKernel::Rbf {
696 length_scale: 100.0,
697 signal_var: 1.0,
698 };
699 let lml_good = log_marginal_likelihood(&x, &y, &good, 1e-6).unwrap();
700 let lml_bad = log_marginal_likelihood(&x, &y, &bad, 1e-6).unwrap();
701 assert!(lml_good > lml_bad, "good={lml_good}, bad={lml_bad}");
702 }
703
704 #[test]
705 fn test_gp_sum_of_kernels() {
706 let x = Array2::from_shape_vec((10, 1), (0..10).map(|i| i as f64).collect()).unwrap();
708 let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin() + 0.05);
709 let kernel = GpKernel::Rbf {
710 length_scale: 2.0,
711 signal_var: 1.0,
712 }
713 .add(GpKernel::White { noise_level: 0.01 });
714 let fitted = GaussianProcessRegressor::new(kernel)
715 .with_alpha(1e-8)
716 .fit(&x, &y)
717 .unwrap();
718 let p = fitted.predict(&x).unwrap();
719 for (a, b) in p.iter().zip(y.iter()) {
720 assert!((a - b).abs() < 0.1, "predict {} vs y {}", a, b);
721 }
722 }
723}
724
725impl anofox_ml_core::RegressorScore<f64> for FittedGaussianProcessRegressor {}