sklears_kernel_approximation/sparse_gp/
mod.rs1pub mod approximations;
7pub mod core;
8pub mod inference;
9pub mod kernels;
10pub mod simd_operations;
11pub mod ski;
12pub mod variational;
13
14pub use approximations::{InducingPointSelector, SparseApproximationMethods};
16pub use core::*;
17pub use inference::{LanczosMethod, PreconditionedCG, ScalableInference};
18pub use kernels::{KernelOps, MaternKernel, RBFKernel, SparseKernel};
19pub use simd_operations::simd_sparse_gp;
20pub use ski::{FittedTensorSKI, TensorSKI};
21pub use variational::{StochasticVariationalInference, VariationalFreeEnergy};
22
23use scirs2_core::ndarray::{Array1, Array2};
24use scirs2_core::random::essentials::Normal as RandNormal;
25use scirs2_core::random::Distribution;
26use scirs2_core::random::{thread_rng, Rng};
27use sklears_core::error::{Result, SklearsError};
28use sklears_core::traits::{Fit, Predict};
29
30impl<K: SparseKernel> SparseGaussianProcess<K> {
32 pub fn new(num_inducing: usize, kernel: K) -> Self {
34 Self {
35 num_inducing,
36 kernel,
37 approximation: SparseApproximation::FullyIndependentConditional,
38 inducing_strategy: InducingPointStrategy::KMeans,
39 noise_variance: 1e-6,
40 max_iter: 100,
41 tol: 1e-6,
42 }
43 }
44
45 pub fn select_inducing_points(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
47 InducingPointSelector::select_points(
48 &self.inducing_strategy,
49 x,
50 self.num_inducing,
51 &self.kernel,
52 )
53 }
54
55 fn fit_approximation(
57 &self,
58 x: &Array2<f64>,
59 y: &Array1<f64>,
60 inducing_points: &Array2<f64>,
61 ) -> Result<(Array1<f64>, Array2<f64>, Option<VariationalParams>)> {
62 match &self.approximation {
63 SparseApproximation::SubsetOfRegressors => {
64 let (alpha, k_mm_inv) = SparseApproximationMethods::fit_sor(
65 x,
66 y,
67 inducing_points,
68 &self.kernel,
69 self.noise_variance,
70 )?;
71 Ok((alpha, k_mm_inv, None))
72 }
73
74 SparseApproximation::FullyIndependentConditional => {
75 let (alpha, k_mm_inv) = SparseApproximationMethods::fit_fic(
76 x,
77 y,
78 inducing_points,
79 &self.kernel,
80 self.noise_variance,
81 )?;
82 Ok((alpha, k_mm_inv, None))
83 }
84
85 SparseApproximation::PartiallyIndependentConditional { block_size } => {
86 let (alpha, k_mm_inv) = SparseApproximationMethods::fit_pic(
87 x,
88 y,
89 inducing_points,
90 &self.kernel,
91 self.noise_variance,
92 *block_size,
93 )?;
94 Ok((alpha, k_mm_inv, None))
95 }
96
97 SparseApproximation::VariationalFreeEnergy {
98 whitened,
99 natural_gradients,
100 } => {
101 let (alpha, k_mm_inv, vfe_params) = VariationalFreeEnergy::fit(
102 x,
103 y,
104 inducing_points,
105 &self.kernel,
106 self.noise_variance,
107 *whitened,
108 *natural_gradients,
109 self.max_iter,
110 self.tol,
111 )?;
112 Ok((alpha, k_mm_inv, Some(vfe_params)))
113 }
114 }
115 }
116}
117
118impl<K: SparseKernel> Fit<Array2<f64>, Array1<f64>> for SparseGaussianProcess<K> {
120 type Fitted = FittedSparseGP<K>;
121
122 fn fit(self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
123 if x.nrows() != y.len() {
125 return Err(SklearsError::InvalidInput(
126 "Number of samples must match between X and y".to_string(),
127 ));
128 }
129
130 if self.num_inducing > x.nrows() {
131 return Err(SklearsError::InvalidInput(
132 "Number of inducing points cannot exceed number of data points".to_string(),
133 ));
134 }
135
136 let inducing_points = self.select_inducing_points(x)?;
138
139 utils::validate_inducing_points(self.num_inducing, x.ncols(), &self.inducing_strategy)?;
141
142 let (alpha, k_mm_inv, variational_params) =
144 self.fit_approximation(x, y, &inducing_points)?;
145
146 Ok(FittedSparseGP {
147 inducing_points,
148 kernel: self.kernel,
149 approximation: self.approximation,
150 alpha,
151 k_mm_inv,
152 noise_variance: self.noise_variance,
153 variational_params,
154 })
155 }
156}
157
158impl<K: SparseKernel> Predict<Array2<f64>, Array1<f64>> for FittedSparseGP<K> {
160 fn predict(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
161 let k_star_m = self.kernel.kernel_matrix(x, &self.inducing_points);
162
163 let predictions = simd_sparse_gp::simd_posterior_mean(&k_star_m, &self.alpha);
165
166 Ok(predictions)
167 }
168}
169
170impl<K: SparseKernel> FittedSparseGP<K> {
171 pub fn predict_with_variance(&self, x: &Array2<f64>) -> Result<(Array1<f64>, Array1<f64>)> {
173 let k_star_m = self.kernel.kernel_matrix(x, &self.inducing_points);
174
175 let mean = simd_sparse_gp::simd_posterior_mean(&k_star_m, &self.alpha);
177
178 let v = self.k_mm_inv.dot(&k_star_m.t());
180 let k_star_star = self.kernel.kernel_diagonal(x);
181 let variance = simd_sparse_gp::simd_posterior_variance(&k_star_star, &v);
182
183 Ok((mean, variance))
184 }
185
186 pub fn predict_scalable(
188 &self,
189 x: &Array2<f64>,
190 method: ScalableInferenceMethod,
191 ) -> Result<Array1<f64>> {
192 let k_star_m = self.kernel.kernel_matrix(x, &self.inducing_points);
193
194 ScalableInference::predict(
195 &method,
196 &k_star_m,
197 &self.inducing_points,
198 &self.alpha,
199 &self.kernel,
200 self.noise_variance,
201 )
202 }
203
204 pub fn log_marginal_likelihood(&self) -> Result<f64> {
206 if let Some(vfe_params) = &self.variational_params {
207 Ok(vfe_params.elbo)
209 } else {
210 let m = self.inducing_points.nrows();
212
213 let log_det = self
215 .k_mm_inv
216 .diag()
217 .mapv(|x| if x > 1e-10 { -x.ln() } else { 23.0 })
218 .sum();
219
220 let data_fit = self.alpha.dot(&self.alpha);
222
223 let log_ml = -0.5 * (data_fit + log_det + m as f64 * (2.0 * std::f64::consts::PI).ln());
225
226 Ok(log_ml)
227 }
228 }
229
230 pub fn sample_posterior(&self, x: &Array2<f64>, num_samples: usize) -> Result<Array2<f64>> {
232 let (mean, var) = self.predict_with_variance(x)?;
233 let n_test = x.nrows();
234
235 let mut samples = Array2::zeros((num_samples, n_test));
236 let mut rng = thread_rng();
237
238 for i in 0..num_samples {
239 for j in 0..n_test {
240 let std_dev = var[j].sqrt();
241 let normal = RandNormal::new(mean[j], std_dev).unwrap();
242 samples[(i, j)] = rng.sample(normal);
243 }
244 }
245
246 Ok(samples)
247 }
248
249 pub fn acquisition_function(
251 &self,
252 x: &Array2<f64>,
253 acquisition_type: &str,
254 ) -> Result<Array1<f64>> {
255 let (mean, var) = self.predict_with_variance(x)?;
256
257 match acquisition_type {
258 "variance" => Ok(var),
259 "entropy" => {
260 let entropy =
262 var.mapv(|v| 0.5 * (2.0 * std::f64::consts::PI * std::f64::consts::E * v).ln());
263 Ok(entropy)
264 }
265 "upper_confidence_bound" => {
266 let ucb = &mean + 2.0 * &var.mapv(|v| v.sqrt());
268 Ok(ucb)
269 }
270 _ => Err(SklearsError::InvalidInput(format!(
271 "Unknown acquisition function: {}",
272 acquisition_type
273 ))),
274 }
275 }
276}
277
278pub mod utils {
280 pub use super::core::utils::*;
281 use super::*;
282
283 pub fn effective_degrees_of_freedom<K: SparseKernel>(
285 fitted_gp: &FittedSparseGP<K>,
286 x: &Array2<f64>,
287 ) -> Result<f64> {
288 let k_nm = fitted_gp
289 .kernel
290 .kernel_matrix(x, &fitted_gp.inducing_points);
291 let k_mm_inv_k_mn = fitted_gp.k_mm_inv.dot(&k_nm.t());
292
293 let mut trace = 0.0;
295 for i in 0..k_nm.nrows() {
296 trace += k_nm.row(i).dot(&k_mm_inv_k_mn.column(i));
297 }
298
299 Ok(trace)
300 }
301
302 pub fn model_complexity_penalty<K: SparseKernel>(fitted_gp: &FittedSparseGP<K>) -> f64 {
304 fitted_gp.inducing_points.nrows() as f64 * (fitted_gp.inducing_points.nrows() as f64).ln()
306 }
307
308 pub fn optimize_hyperparameters<K: SparseKernel>(
310 sparse_gp: &mut SparseGaussianProcess<K>,
311 x: &Array2<f64>,
312 y: &Array1<f64>,
313 max_iter: usize,
314 learning_rate: f64,
315 ) -> Result<f64> {
316 let mut best_likelihood = f64::NEG_INFINITY;
317
318 for _iter in 0..max_iter {
319 let fitted = sparse_gp.clone().fit(x, y)?;
321
322 let log_ml = fitted.log_marginal_likelihood()?;
324
325 if log_ml > best_likelihood {
326 best_likelihood = log_ml;
327 }
328
329 let gradients = sparse_gp
331 .kernel
332 .parameter_gradients(&fitted.inducing_points, &fitted.inducing_points);
333
334 let mut params = sparse_gp.kernel.parameters();
336 for (i, grad_matrix) in gradients.iter().enumerate() {
337 let grad_scalar =
338 grad_matrix.sum() / (grad_matrix.nrows() * grad_matrix.ncols()) as f64;
339 params[i] += learning_rate * grad_scalar;
340 params[i] = params[i].max(1e-6); }
342 sparse_gp.kernel.set_parameters(¶ms);
343 }
344
345 Ok(best_likelihood)
346 }
347}