sklears_kernel_approximation/sparse_gp/
mod.rs1pub mod approximations;
7pub mod core;
8pub mod inference;
9pub mod kernels;
10pub mod simd_operations;
11pub mod ski;
12pub mod variational;
13
14pub use approximations::{InducingPointSelector, SparseApproximationMethods};
16pub use core::*;
17pub use inference::{LanczosMethod, PreconditionedCG, ScalableInference};
18pub use kernels::{KernelOps, MaternKernel, RBFKernel, SparseKernel};
19pub use simd_operations::simd_sparse_gp;
20pub use ski::{FittedTensorSKI, TensorSKI};
21pub use variational::{StochasticVariationalInference, VariationalFreeEnergy};
22
23use scirs2_core::ndarray::{Array1, Array2};
24use scirs2_core::random::essentials::Normal as RandNormal;
25use scirs2_core::random::thread_rng;
26use sklears_core::error::{Result, SklearsError};
27use sklears_core::traits::{Fit, Predict};
28
29impl<K: SparseKernel> SparseGaussianProcess<K> {
31 pub fn new(num_inducing: usize, kernel: K) -> Self {
33 Self {
34 num_inducing,
35 kernel,
36 approximation: SparseApproximation::FullyIndependentConditional,
37 inducing_strategy: InducingPointStrategy::KMeans,
38 noise_variance: 1e-6,
39 max_iter: 100,
40 tol: 1e-6,
41 }
42 }
43
44 pub fn select_inducing_points(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
46 InducingPointSelector::select_points(
47 &self.inducing_strategy,
48 x,
49 self.num_inducing,
50 &self.kernel,
51 )
52 }
53
54 fn fit_approximation(
56 &self,
57 x: &Array2<f64>,
58 y: &Array1<f64>,
59 inducing_points: &Array2<f64>,
60 ) -> Result<(Array1<f64>, Array2<f64>, Option<VariationalParams>)> {
61 match &self.approximation {
62 SparseApproximation::SubsetOfRegressors => {
63 let (alpha, k_mm_inv) = SparseApproximationMethods::fit_sor(
64 x,
65 y,
66 inducing_points,
67 &self.kernel,
68 self.noise_variance,
69 )?;
70 Ok((alpha, k_mm_inv, None))
71 }
72
73 SparseApproximation::FullyIndependentConditional => {
74 let (alpha, k_mm_inv) = SparseApproximationMethods::fit_fic(
75 x,
76 y,
77 inducing_points,
78 &self.kernel,
79 self.noise_variance,
80 )?;
81 Ok((alpha, k_mm_inv, None))
82 }
83
84 SparseApproximation::PartiallyIndependentConditional { block_size } => {
85 let (alpha, k_mm_inv) = SparseApproximationMethods::fit_pic(
86 x,
87 y,
88 inducing_points,
89 &self.kernel,
90 self.noise_variance,
91 *block_size,
92 )?;
93 Ok((alpha, k_mm_inv, None))
94 }
95
96 SparseApproximation::VariationalFreeEnergy {
97 whitened,
98 natural_gradients,
99 } => {
100 let (alpha, k_mm_inv, vfe_params) = VariationalFreeEnergy::fit(
101 x,
102 y,
103 inducing_points,
104 &self.kernel,
105 self.noise_variance,
106 *whitened,
107 *natural_gradients,
108 self.max_iter,
109 self.tol,
110 )?;
111 Ok((alpha, k_mm_inv, Some(vfe_params)))
112 }
113 }
114 }
115}
116
117impl<K: SparseKernel> Fit<Array2<f64>, Array1<f64>> for SparseGaussianProcess<K> {
119 type Fitted = FittedSparseGP<K>;
120
121 fn fit(self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
122 if x.nrows() != y.len() {
124 return Err(SklearsError::InvalidInput(
125 "Number of samples must match between X and y".to_string(),
126 ));
127 }
128
129 if self.num_inducing > x.nrows() {
130 return Err(SklearsError::InvalidInput(
131 "Number of inducing points cannot exceed number of data points".to_string(),
132 ));
133 }
134
135 let inducing_points = self.select_inducing_points(x)?;
137
138 utils::validate_inducing_points(self.num_inducing, x.ncols(), &self.inducing_strategy)?;
140
141 let (alpha, k_mm_inv, variational_params) =
143 self.fit_approximation(x, y, &inducing_points)?;
144
145 Ok(FittedSparseGP {
146 inducing_points,
147 kernel: self.kernel,
148 approximation: self.approximation,
149 alpha,
150 k_mm_inv,
151 noise_variance: self.noise_variance,
152 variational_params,
153 })
154 }
155}
156
157impl<K: SparseKernel> Predict<Array2<f64>, Array1<f64>> for FittedSparseGP<K> {
159 fn predict(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
160 let k_star_m = self.kernel.kernel_matrix(x, &self.inducing_points);
161
162 let predictions = simd_sparse_gp::simd_posterior_mean(&k_star_m, &self.alpha);
164
165 Ok(predictions)
166 }
167}
168
169impl<K: SparseKernel> FittedSparseGP<K> {
170 pub fn predict_with_variance(&self, x: &Array2<f64>) -> Result<(Array1<f64>, Array1<f64>)> {
172 let k_star_m = self.kernel.kernel_matrix(x, &self.inducing_points);
173
174 let mean = simd_sparse_gp::simd_posterior_mean(&k_star_m, &self.alpha);
176
177 let v = self.k_mm_inv.dot(&k_star_m.t());
179 let k_star_star = self.kernel.kernel_diagonal(x);
180 let variance = simd_sparse_gp::simd_posterior_variance(&k_star_star, &v);
181
182 Ok((mean, variance))
183 }
184
185 pub fn predict_scalable(
187 &self,
188 x: &Array2<f64>,
189 method: ScalableInferenceMethod,
190 ) -> Result<Array1<f64>> {
191 let k_star_m = self.kernel.kernel_matrix(x, &self.inducing_points);
192
193 ScalableInference::predict(
194 &method,
195 &k_star_m,
196 &self.inducing_points,
197 &self.alpha,
198 &self.kernel,
199 self.noise_variance,
200 )
201 }
202
203 pub fn log_marginal_likelihood(&self) -> Result<f64> {
205 if let Some(vfe_params) = &self.variational_params {
206 Ok(vfe_params.elbo)
208 } else {
209 let m = self.inducing_points.nrows();
211
212 let log_det = self
214 .k_mm_inv
215 .diag()
216 .mapv(|x| if x > 1e-10 { -x.ln() } else { 23.0 })
217 .sum();
218
219 let data_fit = self.alpha.dot(&self.alpha);
221
222 let log_ml = -0.5 * (data_fit + log_det + m as f64 * (2.0 * std::f64::consts::PI).ln());
224
225 Ok(log_ml)
226 }
227 }
228
229 pub fn sample_posterior(&self, x: &Array2<f64>, num_samples: usize) -> Result<Array2<f64>> {
231 let (mean, var) = self.predict_with_variance(x)?;
232 let n_test = x.nrows();
233
234 let mut samples = Array2::zeros((num_samples, n_test));
235 let mut rng = thread_rng();
236
237 for i in 0..num_samples {
238 for j in 0..n_test {
239 let std_dev = var[j].sqrt();
240 let normal = RandNormal::new(mean[j], std_dev).unwrap();
241 samples[(i, j)] = rng.sample(normal);
242 }
243 }
244
245 Ok(samples)
246 }
247
248 pub fn acquisition_function(
250 &self,
251 x: &Array2<f64>,
252 acquisition_type: &str,
253 ) -> Result<Array1<f64>> {
254 let (mean, var) = self.predict_with_variance(x)?;
255
256 match acquisition_type {
257 "variance" => Ok(var),
258 "entropy" => {
259 let entropy =
261 var.mapv(|v| 0.5 * (2.0 * std::f64::consts::PI * std::f64::consts::E * v).ln());
262 Ok(entropy)
263 }
264 "upper_confidence_bound" => {
265 let ucb = &mean + 2.0 * &var.mapv(|v| v.sqrt());
267 Ok(ucb)
268 }
269 _ => Err(SklearsError::InvalidInput(format!(
270 "Unknown acquisition function: {}",
271 acquisition_type
272 ))),
273 }
274 }
275}
276
277pub mod utils {
279 pub use super::core::utils::*;
280 use super::*;
281
282 pub fn effective_degrees_of_freedom<K: SparseKernel>(
284 fitted_gp: &FittedSparseGP<K>,
285 x: &Array2<f64>,
286 ) -> Result<f64> {
287 let k_nm = fitted_gp
288 .kernel
289 .kernel_matrix(x, &fitted_gp.inducing_points);
290 let k_mm_inv_k_mn = fitted_gp.k_mm_inv.dot(&k_nm.t());
291
292 let mut trace = 0.0;
294 for i in 0..k_nm.nrows() {
295 trace += k_nm.row(i).dot(&k_mm_inv_k_mn.column(i));
296 }
297
298 Ok(trace)
299 }
300
301 pub fn model_complexity_penalty<K: SparseKernel>(fitted_gp: &FittedSparseGP<K>) -> f64 {
303 fitted_gp.inducing_points.nrows() as f64 * (fitted_gp.inducing_points.nrows() as f64).ln()
305 }
306
307 pub fn optimize_hyperparameters<K: SparseKernel>(
309 sparse_gp: &mut SparseGaussianProcess<K>,
310 x: &Array2<f64>,
311 y: &Array1<f64>,
312 max_iter: usize,
313 learning_rate: f64,
314 ) -> Result<f64> {
315 let mut best_likelihood = f64::NEG_INFINITY;
316
317 for _iter in 0..max_iter {
318 let fitted = sparse_gp.clone().fit(x, y)?;
320
321 let log_ml = fitted.log_marginal_likelihood()?;
323
324 if log_ml > best_likelihood {
325 best_likelihood = log_ml;
326 }
327
328 let gradients = sparse_gp
330 .kernel
331 .parameter_gradients(&fitted.inducing_points, &fitted.inducing_points);
332
333 let mut params = sparse_gp.kernel.parameters();
335 for (i, grad_matrix) in gradients.iter().enumerate() {
336 let grad_scalar =
337 grad_matrix.sum() / (grad_matrix.nrows() * grad_matrix.ncols()) as f64;
338 params[i] += learning_rate * grad_scalar;
339 params[i] = params[i].max(1e-6); }
341 sparse_gp.kernel.set_parameters(¶ms);
342 }
343
344 Ok(best_likelihood)
345 }
346}