sklears_gaussian_process/
sparse_gpr.rs

1//! Sparse Gaussian Process Regression implementations
2
3// SciRS2 Policy - Use scirs2-autograd for ndarray types and operations
4use scirs2_core::ndarray::{Array1, Array2, Axis};
5use sklears_core::{
6    error::{Result as SklResult, SklearsError},
7    traits::{Estimator, Fit, Predict, Untrained},
8    types::Float,
9};
10
11use crate::kernels::Kernel;
12use crate::utils::{robust_cholesky, triangular_solve};
13
14/// Configuration for Sparse Gaussian Process Regressor
15#[derive(Debug, Clone)]
16pub struct SparseGaussianProcessRegressorConfig {
17    /// n_inducing
18    pub n_inducing: usize,
19    /// inducing_init
20    pub inducing_init: InducingPointInit,
21    /// alpha
22    pub alpha: f64,
23    /// optimizer
24    pub optimizer: Option<String>,
25    /// n_restarts_optimizer
26    pub n_restarts_optimizer: usize,
27    /// copy_x_train
28    pub copy_x_train: bool,
29    /// random_state
30    pub random_state: Option<u64>,
31}
32
33impl Default for SparseGaussianProcessRegressorConfig {
34    fn default() -> Self {
35        Self {
36            n_inducing: 10,
37            inducing_init: InducingPointInit::Kmeans,
38            alpha: 1e-10,
39            optimizer: Some("fmin_l_bfgs_b".to_string()),
40            n_restarts_optimizer: 0,
41            copy_x_train: true,
42            random_state: None,
43        }
44    }
45}
46
47///
48/// let X = array![[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]];
49/// let y = array![1.0, 4.0, 9.0, 16.0, 25.0, 36.0];
50///
51/// let kernel = RBF::new(1.0);
52/// let sgpr = SparseGaussianProcessRegressor::new()
53///     .kernel(Box::new(kernel))
54///     .n_inducing(4);
55/// let fitted = sgpr.fit(&X, &y).unwrap();
56/// let predictions = fitted.predict(&X).unwrap();
57/// ```
58#[derive(Debug, Clone)]
59pub struct SparseGaussianProcessRegressor<S = Untrained> {
60    state: S,
61    kernel: Option<Box<dyn Kernel>>,
62    config: SparseGaussianProcessRegressorConfig,
63}
64
65/// Methods for initializing inducing points
66#[derive(Debug, Clone)]
67pub enum InducingPointInit {
68    /// Random
69    Random,
70    /// Uniform
71    Uniform,
72    /// Kmeans
73    Kmeans,
74}
75
76/// Trained state for Sparse Gaussian Process Regressor
77#[derive(Debug, Clone)]
78pub struct SgprTrained {
79    /// X_train
80    pub X_train: Option<Array2<f64>>, // Training inputs
81    /// y_train
82    pub y_train: Array1<f64>, // Training outputs
83    /// Z
84    pub Z: Array2<f64>, // Inducing points
85    /// alpha
86    pub alpha: Array1<f64>, // Solved coefficients
87    /// Kmm
88    pub Kmm: Array2<f64>, // Kernel matrix between inducing points
89    /// Knm
90    pub Knm: Array2<f64>, // Kernel matrix between training and inducing points
91    /// L
92    pub L: Array2<f64>, // Cholesky decomposition
93    /// kernel
94    pub kernel: Box<dyn Kernel>, // Kernel function
95    /// sigma_n
96    pub sigma_n: f64, // Noise level
97    /// log_marginal_likelihood_value
98    pub log_marginal_likelihood_value: f64, // Log marginal likelihood
99}
100
101impl SparseGaussianProcessRegressor<Untrained> {
102    /// Create a new SparseGaussianProcessRegressor instance
103    pub fn new() -> Self {
104        Self {
105            state: Untrained,
106            kernel: None,
107            config: SparseGaussianProcessRegressorConfig::default(),
108        }
109    }
110
111    /// Set the kernel function
112    pub fn kernel(mut self, kernel: Box<dyn Kernel>) -> Self {
113        self.kernel = Some(kernel);
114        self
115    }
116
117    /// Set the number of inducing points
118    pub fn n_inducing(mut self, n_inducing: usize) -> Self {
119        self.config.n_inducing = n_inducing;
120        self
121    }
122
123    /// Set the inducing point initialization method
124    pub fn inducing_init(mut self, inducing_init: InducingPointInit) -> Self {
125        self.config.inducing_init = inducing_init;
126        self
127    }
128
129    /// Set the noise level
130    pub fn alpha(mut self, alpha: f64) -> Self {
131        self.config.alpha = alpha;
132        self
133    }
134
135    /// Set the optimizer
136    pub fn optimizer(mut self, optimizer: Option<String>) -> Self {
137        self.config.optimizer = optimizer;
138        self
139    }
140
141    /// Set the number of optimizer restarts
142    pub fn n_restarts_optimizer(mut self, n_restarts: usize) -> Self {
143        self.config.n_restarts_optimizer = n_restarts;
144        self
145    }
146
147    /// Set whether to copy X during training
148    pub fn copy_x_train(mut self, copy_x_train: bool) -> Self {
149        self.config.copy_x_train = copy_x_train;
150        self
151    }
152
153    /// Set the random state
154    pub fn random_state(mut self, random_state: Option<u64>) -> Self {
155        self.config.random_state = random_state;
156        self
157    }
158}
159
160impl Estimator for SparseGaussianProcessRegressor<Untrained> {
161    type Config = SparseGaussianProcessRegressorConfig;
162    type Error = SklearsError;
163    type Float = Float;
164
165    fn config(&self) -> &Self::Config {
166        &self.config
167    }
168}
169
170impl Estimator for SparseGaussianProcessRegressor<SgprTrained> {
171    type Config = SparseGaussianProcessRegressorConfig;
172    type Error = SklearsError;
173    type Float = Float;
174
175    fn config(&self) -> &Self::Config {
176        &self.config
177    }
178}
179
180impl Fit<Array2<f64>, Array1<f64>> for SparseGaussianProcessRegressor<Untrained> {
181    type Fitted = SparseGaussianProcessRegressor<SgprTrained>;
182
183    fn fit(self, X: &Array2<f64>, y: &Array1<f64>) -> SklResult<Self::Fitted> {
184        if X.nrows() != y.len() {
185            return Err(SklearsError::InvalidInput(
186                "X and y must have the same number of samples".to_string(),
187            ));
188        }
189
190        let kernel = self
191            .kernel
192            .ok_or_else(|| SklearsError::InvalidInput("Kernel must be specified".to_string()))?;
193
194        // Initialize inducing points
195        let Z = match self.config.inducing_init {
196            InducingPointInit::Random => crate::utils::random_inducing_points(
197                &X.view(),
198                self.config.n_inducing,
199                self.config.random_state,
200            )?,
201            InducingPointInit::Uniform => crate::utils::uniform_inducing_points(
202                &X.view(),
203                self.config.n_inducing,
204                self.config.random_state,
205            )?,
206            InducingPointInit::Kmeans => crate::utils::kmeans_inducing_points(
207                &X.view(),
208                self.config.n_inducing,
209                self.config.random_state,
210            )?,
211        };
212
213        // Compute kernel matrices
214        let Kmm = kernel.compute_kernel_matrix(&Z, None)?;
215        let Knm = kernel.compute_kernel_matrix(X, Some(&Z))?;
216
217        // Add regularization to Kmm
218        let mut Kmm_reg = Kmm.clone();
219        for i in 0..Kmm_reg.nrows() {
220            Kmm_reg[[i, i]] += 1e-8; // Small jitter for numerical stability
221        }
222
223        // Cholesky decomposition of Kmm
224        let L_mm = robust_cholesky(&Kmm_reg)?;
225
226        // Solve for Q = Knm * Kmm^{-1}
227        let mut Q = Array2::<f64>::zeros(Knm.raw_dim());
228        for i in 0..Knm.nrows() {
229            let knm_i = Knm.row(i).to_owned();
230            let q_i = triangular_solve(&L_mm, &knm_i)?;
231            Q.row_mut(i).assign(&q_i);
232        }
233
234        // Compute diagonal of Knn - Q * Q^T
235        let knn_diag = X
236            .axis_iter(Axis(0))
237            .enumerate()
238            .map(|(i, x)| {
239                let knn_ii = kernel.kernel(&x, &x);
240                let q_i = Q.row(i);
241                knn_ii - q_i.dot(&q_i)
242            })
243            .collect::<Array1<f64>>();
244
245        // Add noise
246        let sigma_n_sq = self.config.alpha;
247        let Lambda_diag = knn_diag.mapv(|x| x + sigma_n_sq);
248
249        // Construct A = I + Q^T * Lambda^{-1} * Q
250        let mut A = Array2::<f64>::eye(self.config.n_inducing);
251        for i in 0..X.nrows() {
252            let q_i = Q.row(i);
253            let lambda_inv = 1.0 / Lambda_diag[i];
254            for j in 0..self.config.n_inducing {
255                for k in 0..self.config.n_inducing {
256                    A[[j, k]] += q_i[j] * q_i[k] * lambda_inv;
257                }
258            }
259        }
260
261        // Cholesky decomposition of A
262        let L_A = robust_cholesky(&A)?;
263
264        // Solve for alpha
265        let b = Q.t().dot(&(y / &Lambda_diag));
266        let alpha = triangular_solve(&L_A, &b)?;
267
268        // Compute log marginal likelihood (simplified)
269        let log_marginal_likelihood_value = {
270            let log_det_A = 2.0 * L_A.diag().mapv(|x| x.ln()).sum();
271            let log_det_Lambda = Lambda_diag.mapv(|x| x.ln()).sum();
272            let quadratic = y.dot(&(y / &Lambda_diag)) - alpha.dot(&b);
273            -0.5 * (quadratic
274                + log_det_A
275                + log_det_Lambda
276                + y.len() as f64 * (2.0 * std::f64::consts::PI).ln())
277        };
278
279        let X_train = if self.config.copy_x_train {
280            Some(X.to_owned())
281        } else {
282            None
283        };
284
285        Ok(SparseGaussianProcessRegressor {
286            state: SgprTrained {
287                // X_train
288                X_train,
289                y_train: y.to_owned(),
290                Z,
291                alpha,
292                Kmm: Kmm_reg,
293                // Knm
294                Knm,
295                L: L_A,
296                kernel,
297                sigma_n: self.config.alpha.sqrt(),
298                log_marginal_likelihood_value,
299            },
300            kernel: None,
301            config: self.config,
302        })
303    }
304}
305
306impl Predict<Array2<f64>, Array1<f64>> for SparseGaussianProcessRegressor<SgprTrained> {
307    fn predict(&self, X: &Array2<f64>) -> SklResult<Array1<f64>> {
308        let (mean, _) = self.predict_with_std(X)?;
309        Ok(mean)
310    }
311}
312
313impl SparseGaussianProcessRegressor<SgprTrained> {
314    /// Predict with uncertainty estimates
315    #[allow(non_snake_case)]
316    pub fn predict_with_std(&self, X: &Array2<f64>) -> SklResult<(Array1<f64>, Array1<f64>)> {
317        // Compute kernel between test points and inducing points
318        let K_star_m = self
319            .state
320            .kernel
321            .compute_kernel_matrix(X, Some(&self.state.Z.to_owned()))?;
322
323        // Solve for q_star = K_star_m * Kmm^{-1}
324        let L_mm = robust_cholesky(&self.state.Kmm)?;
325        let mut Q_star = Array2::<f64>::zeros(K_star_m.raw_dim());
326        for i in 0..K_star_m.nrows() {
327            let k_star_i = K_star_m.row(i).to_owned();
328            let q_star_i = triangular_solve(&L_mm, &k_star_i)?;
329            Q_star.row_mut(i).assign(&q_star_i);
330        }
331
332        // Predict mean
333        let mean = Q_star.dot(&self.state.alpha);
334
335        // Predict variance (simplified)
336        let k_star_star_diag = X
337            .axis_iter(Axis(0))
338            .map(|x| self.state.kernel.kernel(&x, &x))
339            .collect::<Array1<f64>>();
340
341        let var = k_star_star_diag.clone(); // Simplified variance computation
342        let std = var.mapv(|x| (x + self.state.sigma_n.powi(2)).sqrt().max(0.0));
343
344        Ok((mean, std))
345    }
346
347    /// Get the log marginal likelihood
348    pub fn log_marginal_likelihood(&self) -> f64 {
349        self.state.log_marginal_likelihood_value
350    }
351
352    /// Get the inducing points
353    pub fn inducing_points(&self) -> &Array2<f64> {
354        &self.state.Z
355    }
356}
357
358impl Default for SparseGaussianProcessRegressor<Untrained> {
359    fn default() -> Self {
360        Self::new()
361    }
362}