Skip to main content

anofox_ml_gaussian_process/
lib.rs

1//! Gaussian Process Regression.
2//!
3//! Mirrors `sklearn.gaussian_process.GaussianProcessRegressor` with a fixed
4//! kernel composed from the kernel zoo below. Hyperparameter learning is not
5//! yet implemented — provide explicit kernel parameters.
6//!
7//! ## Kernels supported
8//!
9//! - `Rbf` — squared-exponential `σ² exp(-||x-x'||² / (2ℓ²))`
10//! - `Matern` — Matern kernel for `ν ∈ {0.5, 1.5, 2.5}` (closed-form
11//!   parameterisations)
12//! - `RationalQuadratic` — `(1 + ||x-x'||² / (2αℓ²))^(-α)`
13//! - `White` — `σ²` if `x == x'` else `0` (diagonal noise)
14//! - `Constant` — `σ²` everywhere
15//! - `Sum` / `Product` — composite kernels
16
17pub mod classifier;
18pub use classifier::{
19    FittedGaussianProcessClassifier, FittedMulticlassGaussianProcessClassifier,
20    GaussianProcessClassifier, MulticlassGaussianProcessClassifier,
21};
22
23use anofox_ml_core::{Fit, Predict, Result, RustMlError};
24use faer::linalg::solvers::Solve;
25use faer::{Mat, Side};
26use ndarray::{Array1, Array2};
27
28/// Composable kernel.
29pub enum GpKernel {
30    Rbf {
31        length_scale: f64,
32        signal_var: f64,
33    },
34    /// Matern with `nu in {0.5, 1.5, 2.5}`. Other values would require Bessel
35    /// functions — currently restricted.
36    Matern {
37        length_scale: f64,
38        signal_var: f64,
39        nu: f64,
40    },
41    RationalQuadratic {
42        length_scale: f64,
43        signal_var: f64,
44        alpha: f64,
45    },
46    /// Adds `noise_level` to the diagonal of K(X, X). Zero off-diagonal.
47    White {
48        noise_level: f64,
49    },
50    Constant {
51        value: f64,
52    },
53    Sum(Box<GpKernel>, Box<GpKernel>),
54    Product(Box<GpKernel>, Box<GpKernel>),
55}
56
57impl GpKernel {
58    /// Compute `k(a, b)` for two single feature vectors. Note: the `White`
59    /// kernel returns `0` for `a != b` and `noise_level` for `a == b` (exact
60    /// equality). For predictive variance at new query points, only the
61    /// diagonal entries matter.
62    fn compute(&self, a: &[f64], b: &[f64]) -> f64 {
63        match self {
64            GpKernel::Rbf {
65                length_scale,
66                signal_var,
67            } => {
68                let sd: f64 = a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum();
69                signal_var * (-0.5 * sd / (length_scale * length_scale)).exp()
70            }
71            GpKernel::Matern {
72                length_scale,
73                signal_var,
74                nu,
75            } => {
76                let d: f64 = a
77                    .iter()
78                    .zip(b.iter())
79                    .map(|(x, y)| (x - y).powi(2))
80                    .sum::<f64>()
81                    .sqrt();
82                let r = d / length_scale;
83                let v = if (nu - 0.5).abs() < 1e-9 {
84                    (-r).exp()
85                } else if (nu - 1.5).abs() < 1e-9 {
86                    let sqrt3_r = 3.0_f64.sqrt() * r;
87                    (1.0 + sqrt3_r) * (-sqrt3_r).exp()
88                } else if (nu - 2.5).abs() < 1e-9 {
89                    let sqrt5_r = 5.0_f64.sqrt() * r;
90                    (1.0 + sqrt5_r + 5.0 / 3.0 * r * r) * (-sqrt5_r).exp()
91                } else {
92                    // Fallback to RBF if unsupported nu.
93                    (-0.5 * (d * d) / (length_scale * length_scale)).exp()
94                };
95                signal_var * v
96            }
97            GpKernel::RationalQuadratic {
98                length_scale,
99                signal_var,
100                alpha,
101            } => {
102                let sd: f64 = a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum();
103                let base = 1.0 + sd / (2.0 * alpha * length_scale * length_scale);
104                signal_var * base.powf(-alpha)
105            }
106            GpKernel::White { noise_level } => {
107                // Compare for floating-point equality across all dims.
108                if a.iter().zip(b.iter()).all(|(x, y)| x == y) {
109                    *noise_level
110                } else {
111                    0.0
112                }
113            }
114            GpKernel::Constant { value } => *value,
115            GpKernel::Sum(k1, k2) => k1.compute(a, b) + k2.compute(a, b),
116            GpKernel::Product(k1, k2) => k1.compute(a, b) * k2.compute(a, b),
117        }
118    }
119
120    /// Helper to build `Rbf * Constant + White`-style composites.
121    pub fn product(self, other: GpKernel) -> GpKernel {
122        GpKernel::Product(Box::new(self), Box::new(other))
123    }
124    pub fn add(self, other: GpKernel) -> GpKernel {
125        GpKernel::Sum(Box::new(self), Box::new(other))
126    }
127}
128
129pub struct GaussianProcessRegressor {
130    pub kernel: GpKernel,
131    pub alpha: f64,
132    pub normalize_y: bool,
133}
134
135impl GaussianProcessRegressor {
136    pub fn new(kernel: GpKernel) -> Self {
137        Self {
138            kernel,
139            alpha: 1e-10,
140            normalize_y: false,
141        }
142    }
143    pub fn with_alpha(mut self, a: f64) -> Self {
144        self.alpha = a;
145        self
146    }
147    pub fn with_normalize_y(mut self, v: bool) -> Self {
148        self.normalize_y = v;
149        self
150    }
151}
152
153pub struct FittedGaussianProcessRegressor {
154    pub x_train: Array2<f64>,
155    pub y_train: Array1<f64>,
156    /// Lower Cholesky factor of `K + αI`.
157    pub l_lower: Mat<f64>,
158    /// `α = L⁻ᵀ L⁻¹ y` — used in mean prediction.
159    pub dual: Array1<f64>,
160    pub kernel: GpKernel,
161    pub y_mean: f64,
162    pub y_std: f64,
163}
164
165pub(crate) fn build_gram(x_a: &Array2<f64>, x_b: &Array2<f64>, kernel: &GpKernel) -> Array2<f64> {
166    let na = x_a.nrows();
167    let nb = x_b.nrows();
168    let mut out = Array2::<f64>::zeros((na, nb));
169    for i in 0..na {
170        let ai = x_a.row(i).to_owned();
171        for j in 0..nb {
172            let bj = x_b.row(j).to_owned();
173            out[[i, j]] = kernel.compute(ai.as_slice().unwrap(), bj.as_slice().unwrap());
174        }
175    }
176    out
177}
178
179fn clone_kernel(k: &GpKernel) -> GpKernel {
180    match k {
181        GpKernel::Rbf {
182            length_scale,
183            signal_var,
184        } => GpKernel::Rbf {
185            length_scale: *length_scale,
186            signal_var: *signal_var,
187        },
188        GpKernel::Matern {
189            length_scale,
190            signal_var,
191            nu,
192        } => GpKernel::Matern {
193            length_scale: *length_scale,
194            signal_var: *signal_var,
195            nu: *nu,
196        },
197        GpKernel::RationalQuadratic {
198            length_scale,
199            signal_var,
200            alpha,
201        } => GpKernel::RationalQuadratic {
202            length_scale: *length_scale,
203            signal_var: *signal_var,
204            alpha: *alpha,
205        },
206        GpKernel::White { noise_level } => GpKernel::White {
207            noise_level: *noise_level,
208        },
209        GpKernel::Constant { value } => GpKernel::Constant { value: *value },
210        GpKernel::Sum(a, b) => GpKernel::Sum(Box::new(clone_kernel(a)), Box::new(clone_kernel(b))),
211        GpKernel::Product(a, b) => {
212            GpKernel::Product(Box::new(clone_kernel(a)), Box::new(clone_kernel(b)))
213        }
214    }
215}
216
217impl Fit<f64> for GaussianProcessRegressor {
218    type Fitted = FittedGaussianProcessRegressor;
219
220    fn fit(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
221        let n = x.nrows();
222        if x.nrows() != y.len() {
223            return Err(RustMlError::ShapeMismatch(format!(
224                "X has {} rows but y has {}",
225                x.nrows(),
226                y.len()
227            )));
228        }
229        let (y_mean, y_std, y_norm) = if self.normalize_y {
230            let m = y.sum() / n as f64;
231            let v: f64 = y.iter().map(|v| (v - m).powi(2)).sum::<f64>() / n as f64;
232            let s = v.sqrt().max(1e-12);
233            let yn = y.mapv(|v| (v - m) / s);
234            (m, s, yn)
235        } else {
236            (0.0, 1.0, y.clone())
237        };
238
239        let mut k = build_gram(x, x, &self.kernel);
240        for i in 0..n {
241            k[[i, i]] += self.alpha;
242        }
243        let km = Mat::from_fn(n, n, |i, j| k[[i, j]]);
244        let llt = faer::linalg::solvers::Llt::new(km.as_ref(), Side::Lower)
245            .map_err(|e| RustMlError::InvalidParameter(format!("Cholesky failed: {e:?}")))?;
246        let ym = Mat::from_fn(n, 1, |i, _| y_norm[i]);
247        let sol = llt.solve(&ym);
248        let dual: Array1<f64> = Array1::from_vec((0..n).map(|i| sol[(i, 0)]).collect());
249        // Save the lower factor for variance prediction.
250        let lower = llt.L();
251        let l = Mat::from_fn(n, n, |i, j| lower[(i, j)]);
252
253        Ok(FittedGaussianProcessRegressor {
254            x_train: x.clone(),
255            y_train: y_norm,
256            l_lower: l,
257            dual,
258            kernel: clone_kernel(&self.kernel),
259            y_mean,
260            y_std,
261        })
262    }
263}
264
265impl Predict<f64> for FittedGaussianProcessRegressor {
266    fn predict(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
267        if x.ncols() != self.x_train.ncols() {
268            return Err(RustMlError::ShapeMismatch(format!(
269                "expected {} features, got {}",
270                self.x_train.ncols(),
271                x.ncols()
272            )));
273        }
274        let k_star = build_gram(x, &self.x_train, &self.kernel);
275        let mean_norm = k_star.dot(&self.dual);
276        Ok(mean_norm.mapv(|v| v * self.y_std + self.y_mean))
277    }
278}
279
280/// Compute the log-marginal-likelihood `log p(y | X, kernel, alpha)` for a
281/// given kernel and noise level on `(X, y)`.
282///
283/// `log p(y|X,θ) = -0.5 yᵀ (K + αI)⁻¹ y - 0.5 log|K + αI| - n/2 log(2π)`
284///
285/// Used by hyperparameter-search loops below.
286pub fn log_marginal_likelihood(
287    x: &Array2<f64>,
288    y: &Array1<f64>,
289    kernel: &GpKernel,
290    alpha: f64,
291) -> Result<f64> {
292    let n = x.nrows();
293    if y.len() != n {
294        return Err(RustMlError::ShapeMismatch(format!(
295            "X has {} rows but y has {}",
296            n,
297            y.len()
298        )));
299    }
300    let mut k = build_gram(x, x, kernel);
301    for i in 0..n {
302        k[[i, i]] += alpha;
303    }
304    let km = Mat::from_fn(n, n, |i, j| k[[i, j]]);
305    let llt = match faer::linalg::solvers::Llt::new(km.as_ref(), Side::Lower) {
306        Ok(llt) => llt,
307        // Non-PD kernel matrices yield −∞ likelihood — caller can compare safely.
308        Err(_) => return Ok(f64::NEG_INFINITY),
309    };
310    let ym = Mat::from_fn(n, 1, |i, _| y[i]);
311    let sol = llt.solve(&ym);
312    let mut yt_k_inv_y = 0.0;
313    for i in 0..n {
314        yt_k_inv_y += y[i] * sol[(i, 0)];
315    }
316    let lower = llt.L();
317    let mut log_det = 0.0;
318    for i in 0..n {
319        log_det += lower[(i, i)].abs().ln();
320    }
321    let log_det = 2.0 * log_det;
322    let two_pi = 2.0 * std::f64::consts::PI;
323    Ok(-0.5 * yt_k_inv_y - 0.5 * log_det - 0.5 * n as f64 * two_pi.ln())
324}
325
326/// Result of multi-parameter hyperparameter optimisation.
327#[derive(Debug, Clone)]
328pub struct KernelOptimResult {
329    pub log_params: Vec<f64>,
330    pub log_marginal_likelihood: f64,
331    pub n_iter: usize,
332    pub converged: bool,
333}
334
335/// Multivariate quasi-Newton (BFGS) optimisation of arbitrary kernel
336/// hyperparameters on the log-scale, maximising log-marginal likelihood.
337///
338/// `build` turns a parameter vector (in log-space) into a `GpKernel`. The
339/// optimiser starts from `log_params_init`, computes finite-difference
340/// gradients with step `fd_step`, and updates a dense inverse-Hessian
341/// approximation via the BFGS formula. Backtracking line search guarantees
342/// monotone increase in log-likelihood.
343///
344/// For low-dim problems (typical kernel zoo has ≤ 5 params) BFGS converges
345/// in a handful of iterations and is more practical than L-BFGS, which adds
346/// history-buffer bookkeeping for negligible benefit at this scale.
347///
348/// Returns the optimised log-parameters, the achieved log-marginal
349/// likelihood, iteration count, and whether the gradient-norm stop
350/// criterion fired.
351pub fn optimize_kernel_lbfgs(
352    x: &Array2<f64>,
353    y: &Array1<f64>,
354    alpha: f64,
355    log_params_init: &[f64],
356    build: impl Fn(&[f64]) -> GpKernel,
357    n_iter: usize,
358    fd_step: f64,
359    grad_tol: f64,
360) -> Result<KernelOptimResult> {
361    let n_params = log_params_init.len();
362    let neg_lml =
363        |p: &[f64]| -> Result<f64> { log_marginal_likelihood(x, y, &build(p), alpha).map(|v| -v) };
364    let grad_fd = |p: &[f64], f0: f64| -> Result<Vec<f64>> {
365        let mut g = vec![0.0_f64; n_params];
366        let mut p_mut = p.to_vec();
367        for i in 0..n_params {
368            let orig = p_mut[i];
369            p_mut[i] = orig + fd_step;
370            let fp = neg_lml(&p_mut)?;
371            p_mut[i] = orig;
372            // Forward-difference. Cheaper than central, accurate enough for
373            // log-space hyperparameters where fd_step ~ 1e-4.
374            g[i] = (fp - f0) / fd_step;
375        }
376        Ok(g)
377    };
378
379    let mut p = log_params_init.to_vec();
380    let mut f = neg_lml(&p)?;
381    let mut g = grad_fd(&p, f)?;
382    // Inverse-Hessian approximation, initially identity.
383    let mut h = vec![vec![0.0_f64; n_params]; n_params];
384    for i in 0..n_params {
385        h[i][i] = 1.0;
386    }
387
388    let mut converged = false;
389    let mut iters = 0;
390    for it in 0..n_iter {
391        iters = it + 1;
392        // Search direction d = -H g
393        let mut d = vec![0.0_f64; n_params];
394        for i in 0..n_params {
395            let mut s = 0.0;
396            for j in 0..n_params {
397                s -= h[i][j] * g[j];
398            }
399            d[i] = s;
400        }
401        // Backtracking line search (Armijo with c1 = 1e-4).
402        let g_dot_d: f64 = g.iter().zip(d.iter()).map(|(a, b)| a * b).sum();
403        if g_dot_d >= 0.0 {
404            // Not a descent direction (numerical issue) — reset H.
405            for i in 0..n_params {
406                for j in 0..n_params {
407                    h[i][j] = 0.0;
408                }
409                h[i][i] = 1.0;
410            }
411            for i in 0..n_params {
412                d[i] = -g[i];
413            }
414        }
415        let mut step = 1.0_f64;
416        let c1 = 1e-4;
417        let mut p_new;
418        let mut f_new;
419        let mut ls_iter = 0;
420        loop {
421            ls_iter += 1;
422            p_new = p
423                .iter()
424                .zip(d.iter())
425                .map(|(a, b)| a + step * b)
426                .collect::<Vec<_>>();
427            f_new = neg_lml(&p_new)?;
428            if f_new.is_finite() && f_new <= f + c1 * step * g_dot_d {
429                break;
430            }
431            step *= 0.5;
432            if step < 1e-12 || ls_iter > 50 {
433                // Line search failed — accept whatever we have and stop.
434                break;
435            }
436        }
437
438        let s_vec: Vec<f64> = p_new.iter().zip(p.iter()).map(|(a, b)| a - b).collect();
439        let g_new = grad_fd(&p_new, f_new)?;
440        let y_vec: Vec<f64> = g_new.iter().zip(g.iter()).map(|(a, b)| a - b).collect();
441        let sy: f64 = s_vec.iter().zip(y_vec.iter()).map(|(a, b)| a * b).sum();
442
443        if sy > 1e-12 {
444            // BFGS inverse-Hessian update:
445            //   H_new = (I - ρ s yᵀ) H (I - ρ y sᵀ) + ρ s sᵀ
446            // Compute H y, then update.
447            let rho = 1.0 / sy;
448            let mut hy = vec![0.0_f64; n_params];
449            for i in 0..n_params {
450                for j in 0..n_params {
451                    hy[i] += h[i][j] * y_vec[j];
452                }
453            }
454            let yhy: f64 = y_vec.iter().zip(hy.iter()).map(|(a, b)| a * b).sum();
455            for i in 0..n_params {
456                for j in 0..n_params {
457                    h[i][j] = h[i][j] - rho * (s_vec[i] * hy[j] + hy[i] * s_vec[j])
458                        + rho * (rho * yhy + 1.0) * s_vec[i] * s_vec[j];
459                }
460            }
461        }
462
463        p = p_new;
464        f = f_new;
465        g = g_new;
466
467        let gnorm: f64 = g.iter().map(|v| v * v).sum::<f64>().sqrt();
468        if gnorm < grad_tol {
469            converged = true;
470            break;
471        }
472    }
473
474    Ok(KernelOptimResult {
475        log_params: p,
476        log_marginal_likelihood: -f,
477        n_iter: iters,
478        converged,
479    })
480}
481
482/// Find the length_scale (RBF kernel) that maximises log-marginal-likelihood
483/// via golden-section search over `log(length_scale)`. Other kernel
484/// parameters are kept fixed at the provided values.
485///
486/// Mirrors what `sklearn.gaussian_process.GaussianProcessRegressor` does when
487/// `optimizer='fmin_l_bfgs_b'` is engaged for a single RBF hyperparameter —
488/// in practice they use L-BFGS, but a golden-section sweep on log-scale is a
489/// strong baseline.
490pub fn optimize_rbf_length_scale(
491    x: &Array2<f64>,
492    y: &Array1<f64>,
493    signal_var: f64,
494    alpha: f64,
495    log_lo: f64,
496    log_hi: f64,
497    n_iter: usize,
498) -> Result<f64> {
499    let phi = (1.0 + 5.0_f64.sqrt()) / 2.0;
500    let resphi = 2.0 - phi; // ≈ 0.382
501    let mut a = log_lo;
502    let mut b = log_hi;
503    let mut c = b - resphi * (b - a);
504    let mut d = a + resphi * (b - a);
505    let f = |log_ls: f64| -> Result<f64> {
506        // Maximise → return value (higher better); we minimise the negation.
507        let k = GpKernel::Rbf {
508            length_scale: log_ls.exp(),
509            signal_var,
510        };
511        log_marginal_likelihood(x, y, &k, alpha).map(|v| -v)
512    };
513    let mut fc = f(c)?;
514    let mut fd = f(d)?;
515    for _ in 0..n_iter {
516        if fc < fd {
517            b = d;
518            d = c;
519            fd = fc;
520            c = b - resphi * (b - a);
521            fc = f(c)?;
522        } else {
523            a = c;
524            c = d;
525            fc = fd;
526            d = a + resphi * (b - a);
527            fd = f(d)?;
528        }
529    }
530    Ok(0.5 * (a + b)).map(|log_ls| log_ls.exp())
531}
532
533impl FittedGaussianProcessRegressor {
534    /// Posterior standard deviation per query point (in target scale).
535    pub fn predict_std(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
536        let n_train = self.x_train.nrows();
537        if x.ncols() != self.x_train.ncols() {
538            return Err(RustMlError::ShapeMismatch(format!(
539                "expected {} features, got {}",
540                self.x_train.ncols(),
541                x.ncols()
542            )));
543        }
544        let n_new = x.nrows();
545        let k_star = build_gram(x, &self.x_train, &self.kernel);
546        let mut std_out = Array1::<f64>::zeros(n_new);
547        for i in 0..n_new {
548            let rhs = Mat::from_fn(n_train, 1, |j, _| k_star[[i, j]]);
549            let n = n_train;
550            let mut v = vec![0.0_f64; n];
551            for r in 0..n {
552                let mut s = rhs[(r, 0)];
553                for c in 0..r {
554                    s -= self.l_lower[(r, c)] * v[c];
555                }
556                v[r] = s / self.l_lower[(r, r)].max(1e-12);
557            }
558            let v_sq: f64 = v.iter().map(|x| x * x).sum();
559            let xi = x.row(i).to_owned();
560            let k_xx = self
561                .kernel
562                .compute(xi.as_slice().unwrap(), xi.as_slice().unwrap());
563            let var = (k_xx - v_sq).max(0.0);
564            std_out[i] = var.sqrt() * self.y_std;
565        }
566        Ok(std_out)
567    }
568}
569
570#[cfg(test)]
571mod tests {
572    use super::*;
573    use ndarray::array;
574
575    #[test]
576    fn test_gp_rbf_interpolates_with_low_noise() {
577        let x = Array2::from_shape_vec((6, 1), vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
578        let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin());
579        let kernel = GpKernel::Rbf {
580            length_scale: 1.0,
581            signal_var: 1.0,
582        };
583        let fitted = GaussianProcessRegressor::new(kernel)
584            .with_alpha(1e-8)
585            .fit(&x, &y)
586            .unwrap();
587        let p = fitted.predict(&x).unwrap();
588        for i in 0..6 {
589            assert!((p[i] - y[i]).abs() < 1e-4, "[{i}]: {} vs {}", p[i], y[i]);
590        }
591        let _ = array![1.0_f64];
592    }
593
594    #[test]
595    fn test_gp_matern_nu_2p5_interpolates() {
596        let x = Array2::from_shape_vec((5, 1), vec![0.0, 1.0, 2.0, 3.0, 4.0]).unwrap();
597        let y: Array1<f64> = x.column(0).mapv(|v: f64| v.cos());
598        let kernel = GpKernel::Matern {
599            length_scale: 1.0,
600            signal_var: 1.0,
601            nu: 2.5,
602        };
603        let fitted = GaussianProcessRegressor::new(kernel)
604            .with_alpha(1e-8)
605            .fit(&x, &y)
606            .unwrap();
607        let p = fitted.predict(&x).unwrap();
608        for i in 0..5 {
609            assert!((p[i] - y[i]).abs() < 1e-3, "[{i}]: {} vs {}", p[i], y[i]);
610        }
611    }
612
613    #[test]
614    fn test_gp_rational_quadratic_runs() {
615        let x = Array2::from_shape_vec((5, 1), vec![0.0, 1.0, 2.0, 3.0, 4.0]).unwrap();
616        let y = array![0.0, 1.0, 0.5, -0.5, 0.0];
617        let kernel = GpKernel::RationalQuadratic {
618            length_scale: 1.0,
619            signal_var: 1.0,
620            alpha: 0.5,
621        };
622        let fitted = GaussianProcessRegressor::new(kernel)
623            .with_alpha(1e-6)
624            .fit(&x, &y)
625            .unwrap();
626        let p = fitted.predict(&x).unwrap();
627        for v in p.iter() {
628            assert!(v.is_finite());
629        }
630    }
631
632    #[test]
633    fn test_optimize_rbf_length_scale_picks_sensible_value() {
634        // Generate y = sin(x); the "right" length scale should be around 1 (the
635        // period of the function in x ∈ [0,5]).
636        let x =
637            Array2::from_shape_vec((20, 1), (0..20).map(|i| (i as f64) * 0.3).collect()).unwrap();
638        let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin());
639        let best = optimize_rbf_length_scale(&x, &y, 1.0, 1e-6, -2.0, 2.0, 30).unwrap();
640        assert!(best > 0.3 && best < 4.0, "best length_scale = {best}");
641    }
642
643    #[test]
644    fn test_optimize_kernel_lbfgs_rbf_two_params() {
645        // Jointly optimise log(length_scale) and log(signal_var) on a sine.
646        let x =
647            Array2::from_shape_vec((20, 1), (0..20).map(|i| (i as f64) * 0.3).collect()).unwrap();
648        let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin());
649        let init = vec![0.0_f64, 0.0]; // log ls, log var (both = 1.0 in linear)
650        let res = optimize_kernel_lbfgs(
651            &x,
652            &y,
653            1e-6,
654            &init,
655            |p| GpKernel::Rbf {
656                length_scale: p[0].exp(),
657                signal_var: p[1].exp(),
658            },
659            50,
660            1e-4,
661            1e-3,
662        )
663        .unwrap();
664        // Should beat the initial point.
665        let lml_init = log_marginal_likelihood(
666            &x,
667            &y,
668            &GpKernel::Rbf {
669                length_scale: 1.0,
670                signal_var: 1.0,
671            },
672            1e-6,
673        )
674        .unwrap();
675        assert!(
676            res.log_marginal_likelihood >= lml_init - 1e-9,
677            "optimiser regressed: init {} → final {}",
678            lml_init,
679            res.log_marginal_likelihood
680        );
681        // Optimised length_scale should be in a plausible range.
682        let ls_opt = res.log_params[0].exp();
683        assert!(ls_opt > 0.1 && ls_opt < 20.0, "length_scale {ls_opt}");
684    }
685
686    #[test]
687    fn test_log_marginal_likelihood_monotonic_in_data() {
688        // Likelihood should be higher for the kernel that better matches data.
689        let x = Array2::from_shape_vec((10, 1), (0..10).map(|i| i as f64 * 0.3).collect()).unwrap();
690        let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin());
691        let good = GpKernel::Rbf {
692            length_scale: 1.0,
693            signal_var: 1.0,
694        };
695        let bad = GpKernel::Rbf {
696            length_scale: 100.0,
697            signal_var: 1.0,
698        };
699        let lml_good = log_marginal_likelihood(&x, &y, &good, 1e-6).unwrap();
700        let lml_bad = log_marginal_likelihood(&x, &y, &bad, 1e-6).unwrap();
701        assert!(lml_good > lml_bad, "good={lml_good}, bad={lml_bad}");
702    }
703
704    #[test]
705    fn test_gp_sum_of_kernels() {
706        // RBF + White: should interpolate noisy data without exploding.
707        let x = Array2::from_shape_vec((10, 1), (0..10).map(|i| i as f64).collect()).unwrap();
708        let y: Array1<f64> = x.column(0).mapv(|v: f64| v.sin() + 0.05);
709        let kernel = GpKernel::Rbf {
710            length_scale: 2.0,
711            signal_var: 1.0,
712        }
713        .add(GpKernel::White { noise_level: 0.01 });
714        let fitted = GaussianProcessRegressor::new(kernel)
715            .with_alpha(1e-8)
716            .fit(&x, &y)
717            .unwrap();
718        let p = fitted.predict(&x).unwrap();
719        for (a, b) in p.iter().zip(y.iter()) {
720            assert!((a - b).abs() < 0.1, "predict {} vs y {}", a, b);
721        }
722    }
723}
724
725impl anofox_ml_core::RegressorScore<f64> for FittedGaussianProcessRegressor {}