Skip to main content

tensorlogic_train/hyperparameter/
gp.rs

1//! Gaussian Process regressor for Bayesian Optimization.
2
3use crate::{TrainError, TrainResult};
4use scirs2_core::ndarray::{s, Array1, Array2};
5
6use super::kernel::GpKernel;
7
8/// Gaussian Process regressor for Bayesian Optimization.
9///
10/// Provides probabilistic predictions with uncertainty estimates.
11#[derive(Debug)]
12pub struct GaussianProcess {
13    /// Kernel function.
14    kernel: GpKernel,
15    /// Noise variance (observation noise).
16    noise_variance: f64,
17    /// Training inputs (normalized to [0, 1]).
18    x_train: Option<Array2<f64>>,
19    /// Training outputs (standardized).
20    y_train: Option<Array1<f64>>,
21    /// Mean of training outputs (for standardization).
22    y_mean: f64,
23    /// Std of training outputs (for standardization).
24    y_std: f64,
25    /// Cholesky decomposition of K + sigma^2 * I (cached for efficiency).
26    l_matrix: Option<Array2<f64>>,
27    /// Alpha = L^T \ (L \ y) (cached).
28    alpha: Option<Array1<f64>>,
29}
30
31impl GaussianProcess {
32    /// Create a new Gaussian Process.
33    pub fn new(kernel: GpKernel, noise_variance: f64) -> Self {
34        Self {
35            kernel,
36            noise_variance,
37            x_train: None,
38            y_train: None,
39            y_mean: 0.0,
40            y_std: 1.0,
41            l_matrix: None,
42            alpha: None,
43        }
44    }
45
46    /// Fit the GP to training data.
47    pub fn fit(&mut self, x: Array2<f64>, y: Array1<f64>) -> TrainResult<()> {
48        if x.nrows() != y.len() {
49            return Err(TrainError::InvalidParameter(
50                "X and y must have same number of samples".to_string(),
51            ));
52        }
53        let y_mean = y.mean().unwrap_or(0.0);
54        let y_std = y.std(0.0).max(1e-8);
55        let y_standardized = (&y - y_mean) / y_std;
56        let k = self.kernel.compute_kernel(&x, &x);
57        let mut k_noisy = k;
58        for i in 0..k_noisy.nrows() {
59            k_noisy[[i, i]] += self.noise_variance;
60        }
61        let l = self.cholesky(&k_noisy)?;
62        let alpha_prime = self.forward_substitution(&l, &y_standardized)?;
63        let alpha = self.backward_substitution(&l, &alpha_prime)?;
64        self.x_train = Some(x);
65        self.y_train = Some(y_standardized);
66        self.y_mean = y_mean;
67        self.y_std = y_std;
68        self.l_matrix = Some(l);
69        self.alpha = Some(alpha);
70        Ok(())
71    }
72
73    /// Predict mean and standard deviation at test points.
74    pub fn predict(&self, x_test: &Array2<f64>) -> TrainResult<(Array1<f64>, Array1<f64>)> {
75        let x_train = self
76            .x_train
77            .as_ref()
78            .ok_or_else(|| TrainError::InvalidParameter("GP not fitted".to_string()))?;
79        let l_matrix = self
80            .l_matrix
81            .as_ref()
82            .expect("l_matrix must be set after fitting");
83        let alpha = self
84            .alpha
85            .as_ref()
86            .expect("alpha must be set after fitting");
87        let n_test = x_test.nrows();
88        let mut means = Array1::zeros(n_test);
89        let mut stds = Array1::zeros(n_test);
90        for i in 0..n_test {
91            let x = x_test.row(i).to_owned();
92            let k_star = self.kernel.compute_kernel_vector(x_train, &x);
93            let mean_standardized = k_star.dot(alpha);
94            means[i] = mean_standardized * self.y_std + self.y_mean;
95            let k_star_star = self
96                .kernel
97                .compute_kernel_vector(&x_test.slice(s![i..i + 1, ..]).to_owned(), &x)[0];
98            let v = self
99                .forward_substitution(l_matrix, &k_star)
100                .unwrap_or_else(|_| Array1::zeros(k_star.len()));
101            let variance_standardized = k_star_star - v.dot(&v);
102            stds[i] = (variance_standardized.max(1e-10) * self.y_std.powi(2)).sqrt();
103        }
104        Ok((means, stds))
105    }
106
107    /// Cholesky decomposition: K = L * L^T.
108    fn cholesky(&self, k: &Array2<f64>) -> TrainResult<Array2<f64>> {
109        let n = k.nrows();
110        let mut l = Array2::zeros((n, n));
111        for i in 0..n {
112            for j in 0..=i {
113                let mut sum = 0.0;
114                for k_idx in 0..j {
115                    sum += l[[i, k_idx]] * l[[j, k_idx]];
116                }
117                if i == j {
118                    let val = k[[i, i]] - sum;
119                    if val <= 0.0 {
120                        l[[i, j]] = (k[[i, i]] - sum + 1e-6).sqrt();
121                    } else {
122                        l[[i, j]] = val.sqrt();
123                    }
124                } else {
125                    l[[i, j]] = (k[[i, j]] - sum) / l[[j, j]];
126                }
127            }
128        }
129        Ok(l)
130    }
131
132    /// Forward substitution: solve L * x = b.
133    fn forward_substitution(&self, l: &Array2<f64>, b: &Array1<f64>) -> TrainResult<Array1<f64>> {
134        let n = l.nrows();
135        let mut x = Array1::zeros(n);
136        for i in 0..n {
137            let mut sum = 0.0;
138            for j in 0..i {
139                sum += l[[i, j]] * x[j];
140            }
141            x[i] = (b[i] - sum) / l[[i, i]];
142        }
143        Ok(x)
144    }
145
146    /// Backward substitution: solve L^T * x = b.
147    fn backward_substitution(&self, l: &Array2<f64>, b: &Array1<f64>) -> TrainResult<Array1<f64>> {
148        let n = l.nrows();
149        let mut x = Array1::zeros(n);
150        for i in (0..n).rev() {
151            let mut sum = 0.0;
152            for j in (i + 1)..n {
153                sum += l[[j, i]] * x[j];
154            }
155            x[i] = (b[i] - sum) / l[[i, i]];
156        }
157        Ok(x)
158    }
159}