sklears_gaussian_process/
gpr.rs1use scirs2_core::ndarray::{Array1, Array2, Axis};
5use sklears_core::{
6 error::{Result as SklResult, SklearsError},
7 traits::{Estimator, Fit, Predict, Untrained},
8 types::Float,
9};
10
11use crate::kernels::Kernel;
12
13#[derive(Debug, Clone)]
15pub struct GaussianProcessRegressorConfig {
16 pub alpha: f64,
18 pub optimizer: Option<String>,
20 pub n_restarts_optimizer: usize,
22 pub normalize_y: bool,
24 pub copy_x_train: bool,
26 pub random_state: Option<u64>,
28}
29
30impl Default for GaussianProcessRegressorConfig {
31 fn default() -> Self {
32 Self {
33 alpha: 1e-10,
34 optimizer: Some("fmin_l_bfgs_b".to_string()),
35 n_restarts_optimizer: 0,
36 normalize_y: false,
37 copy_x_train: true,
38 random_state: None,
39 }
40 }
41}
42
43#[derive(Debug, Clone)]
53pub struct GaussianProcessRegressor<S = Untrained> {
54 state: S,
55 kernel: Option<Box<dyn Kernel>>,
56 config: GaussianProcessRegressorConfig,
57}
58
59#[derive(Debug, Clone)]
61pub struct GprTrained {
62 pub X_train: Option<Array2<f64>>, pub y_train: Array1<f64>, pub y_mean: f64, pub y_std: f64, pub alpha: Array1<f64>, pub L: Array2<f64>, pub K: Array2<f64>, pub kernel: Box<dyn Kernel>, pub log_marginal_likelihood_value: f64, }
81
82impl GaussianProcessRegressor<Untrained> {
83 pub fn new() -> Self {
85 Self {
86 state: Untrained,
87 kernel: None,
88 config: GaussianProcessRegressorConfig::default(),
89 }
90 }
91
92 pub fn kernel(mut self, kernel: Box<dyn Kernel>) -> Self {
94 self.kernel = Some(kernel);
95 self
96 }
97
98 pub fn alpha(mut self, alpha: f64) -> Self {
100 self.config.alpha = alpha;
101 self
102 }
103
104 pub fn optimizer(mut self, optimizer: Option<String>) -> Self {
106 self.config.optimizer = optimizer;
107 self
108 }
109
110 pub fn n_restarts_optimizer(mut self, n_restarts: usize) -> Self {
112 self.config.n_restarts_optimizer = n_restarts;
113 self
114 }
115
116 pub fn normalize_y(mut self, normalize_y: bool) -> Self {
118 self.config.normalize_y = normalize_y;
119 self
120 }
121
122 pub fn copy_x_train(mut self, copy_x_train: bool) -> Self {
124 self.config.copy_x_train = copy_x_train;
125 self
126 }
127
128 pub fn random_state(mut self, random_state: Option<u64>) -> Self {
130 self.config.random_state = random_state;
131 self
132 }
133}
134
135impl Estimator for GaussianProcessRegressor<Untrained> {
136 type Config = GaussianProcessRegressorConfig;
137 type Error = SklearsError;
138 type Float = Float;
139
140 fn config(&self) -> &Self::Config {
141 &self.config
142 }
143}
144
145impl Estimator for GaussianProcessRegressor<GprTrained> {
146 type Config = GaussianProcessRegressorConfig;
147 type Error = SklearsError;
148 type Float = Float;
149
150 fn config(&self) -> &Self::Config {
151 &self.config
152 }
153}
154
155impl Fit<Array2<f64>, Array1<f64>> for GaussianProcessRegressor<Untrained> {
156 type Fitted = GaussianProcessRegressor<GprTrained>;
157
158 #[allow(non_snake_case)]
159 fn fit(self, X: &Array2<f64>, y: &Array1<f64>) -> SklResult<Self::Fitted> {
160 if X.nrows() != y.len() {
161 return Err(SklearsError::InvalidInput(
162 "X and y must have the same number of samples".to_string(),
163 ));
164 }
165
166 let kernel = self
167 .kernel
168 .ok_or_else(|| SklearsError::InvalidInput("Kernel must be specified".to_string()))?;
169
170 let (y_normalized, y_mean, y_std) = if self.config.normalize_y {
172 let mean = y.mean().unwrap_or(0.0);
173 let std = y.std(0.0);
174 let std = if std == 0.0 { 1.0 } else { std };
175 let y_norm = (y - mean) / std;
176 (y_norm, mean, std)
177 } else {
178 (y.to_owned(), 0.0, 1.0)
179 };
180
181 let K = kernel.compute_kernel_matrix(X, None)?;
183
184 let mut K_reg = K.clone();
186 for i in 0..K_reg.nrows() {
187 K_reg[[i, i]] += self.config.alpha;
188 }
189
190 let L = crate::utils::robust_cholesky(&K_reg)?;
192
193 let alpha = crate::utils::triangular_solve(&L, &y_normalized)?;
195
196 let log_marginal_likelihood_value =
198 crate::utils::log_marginal_likelihood(&L, &alpha, &y_normalized);
199
200 let X_train = if self.config.copy_x_train {
201 Some(X.to_owned())
202 } else {
203 None
204 };
205
206 Ok(GaussianProcessRegressor {
207 state: GprTrained {
208 X_train,
209 y_train: y_normalized,
210 y_mean,
211 y_std,
212 alpha,
213 L,
214 K: K_reg,
215 kernel,
216 log_marginal_likelihood_value,
217 },
218 kernel: None,
219 config: self.config,
220 })
221 }
222}
223
224impl Predict<Array2<f64>, Array1<f64>> for GaussianProcessRegressor<GprTrained> {
225 fn predict(&self, X: &Array2<f64>) -> SklResult<Array1<f64>> {
226 let (mean, _) = self.predict_with_std(X)?;
227 Ok(mean)
228 }
229}
230
231impl GaussianProcessRegressor<GprTrained> {
232 #[allow(non_snake_case)]
234 pub fn predict_with_std(&self, X: &Array2<f64>) -> SklResult<(Array1<f64>, Array1<f64>)> {
235 let X_train = self
236 .state
237 .X_train
238 .as_ref()
239 .ok_or_else(|| SklearsError::NotFitted {
240 operation: "predict_with_std".to_string(),
241 })?;
242
243 let K_star = self.state.kernel.compute_kernel_matrix(X_train, Some(X))?;
245
246 let mean_normalized = K_star.t().dot(&self.state.alpha);
248
249 let mean = if self.config.normalize_y {
251 mean_normalized * self.state.y_std + self.state.y_mean
252 } else {
253 mean_normalized
254 };
255
256 let mut v = Array2::<f64>::zeros((self.state.L.nrows(), X.nrows()));
259 for i in 0..X.nrows() {
260 let k_star_i = K_star.column(i);
261 let v_i = crate::utils::triangular_solve(&self.state.L, &k_star_i.to_owned())?;
262 v.column_mut(i).assign(&v_i);
263 }
264 let K_star_star_diag = X
265 .axis_iter(Axis(0))
266 .map(|x| self.state.kernel.kernel(&x, &x))
267 .collect::<Array1<f64>>();
268
269 let var_normalized = K_star_star_diag - v.map_axis(Axis(0), |col| col.dot(&col));
270
271 let var_normalized = var_normalized.mapv(|x| x.max(0.0));
273
274 let std = if self.config.normalize_y {
275 var_normalized.mapv(|x| (x * self.state.y_std.powi(2)).sqrt())
276 } else {
277 var_normalized.mapv(|x| x.sqrt())
278 };
279
280 Ok((mean, std))
281 }
282
283 pub fn log_marginal_likelihood(&self) -> f64 {
285 self.state.log_marginal_likelihood_value
286 }
287}
288
289impl Default for GaussianProcessRegressor<Untrained> {
290 fn default() -> Self {
291 Self::new()
292 }
293}