1use ghostflow_core::Tensor;
4
5pub struct GaussianProcessRegressor {
7 pub kernel: GPKernel,
8 pub alpha: f32, pub normalize_y: bool,
10 pub n_restarts_optimizer: usize,
11 x_train_: Option<Vec<f32>>,
12 y_train_: Option<Vec<f32>>,
13 #[allow(dead_code)]
14 k_inv_: Option<Vec<f32>>, alpha_: Option<Vec<f32>>, y_mean_: f32,
17 y_std_: f32,
18 n_samples_: usize,
19 n_features_: usize,
20}
21
22#[derive(Clone, Debug)]
23pub enum GPKernel {
24 RBF { length_scale: f32 },
25 Matern { length_scale: f32, nu: f32 },
26 RationalQuadratic { length_scale: f32, alpha: f32 },
27 DotProduct { sigma_0: f32 },
28 WhiteKernel { noise_level: f32 },
29 Sum(Box<GPKernel>, Box<GPKernel>),
30 Product(Box<GPKernel>, Box<GPKernel>),
31}
32
33impl GPKernel {
34 pub fn rbf(length_scale: f32) -> Self {
35 GPKernel::RBF { length_scale }
36 }
37
38 pub fn matern(length_scale: f32, nu: f32) -> Self {
39 GPKernel::Matern { length_scale, nu }
40 }
41
42 fn compute(&self, x1: &[f32], x2: &[f32]) -> f32 {
43 match self {
44 GPKernel::RBF { length_scale } => {
45 let dist_sq: f32 = x1.iter().zip(x2.iter())
46 .map(|(&a, &b)| (a - b).powi(2))
47 .sum();
48 (-dist_sq / (2.0 * length_scale * length_scale)).exp()
49 }
50 GPKernel::Matern { length_scale, nu } => {
51 let dist: f32 = x1.iter().zip(x2.iter())
52 .map(|(&a, &b)| (a - b).powi(2))
53 .sum::<f32>()
54 .sqrt();
55 let d = dist / length_scale;
56
57 if *nu == 0.5 {
58 (-d).exp()
59 } else if *nu == 1.5 {
60 let sqrt3_d = 3.0f32.sqrt() * d;
61 (1.0 + sqrt3_d) * (-sqrt3_d).exp()
62 } else if *nu == 2.5 {
63 let sqrt5_d = 5.0f32.sqrt() * d;
64 (1.0 + sqrt5_d + sqrt5_d * sqrt5_d / 3.0) * (-sqrt5_d).exp()
65 } else {
66 (-d).exp() }
68 }
69 GPKernel::RationalQuadratic { length_scale, alpha } => {
70 let dist_sq: f32 = x1.iter().zip(x2.iter())
71 .map(|(&a, &b)| (a - b).powi(2))
72 .sum();
73 (1.0 + dist_sq / (2.0 * alpha * length_scale * length_scale)).powf(-*alpha)
74 }
75 GPKernel::DotProduct { sigma_0 } => {
76 let dot: f32 = x1.iter().zip(x2.iter()).map(|(&a, &b)| a * b).sum();
77 sigma_0 * sigma_0 + dot
78 }
79 GPKernel::WhiteKernel { noise_level } => {
80 let same = x1.iter().zip(x2.iter()).all(|(&a, &b)| (a - b).abs() < 1e-10);
81 if same { *noise_level } else { 0.0 }
82 }
83 GPKernel::Sum(k1, k2) => k1.compute(x1, x2) + k2.compute(x1, x2),
84 GPKernel::Product(k1, k2) => k1.compute(x1, x2) * k2.compute(x1, x2),
85 }
86 }
87}
88
89impl GaussianProcessRegressor {
90 pub fn new(kernel: GPKernel) -> Self {
91 GaussianProcessRegressor {
92 kernel,
93 alpha: 1e-10,
94 normalize_y: false,
95 n_restarts_optimizer: 0,
96 x_train_: None,
97 y_train_: None,
98 k_inv_: None,
99 alpha_: None,
100 y_mean_: 0.0,
101 y_std_: 1.0,
102 n_samples_: 0,
103 n_features_: 0,
104 }
105 }
106
107 pub fn alpha(mut self, alpha: f32) -> Self {
108 self.alpha = alpha;
109 self
110 }
111
112 pub fn normalize_y(mut self, normalize: bool) -> Self {
113 self.normalize_y = normalize;
114 self
115 }
116
117 fn compute_kernel_matrix(&self, x1: &[f32], n1: usize, x2: &[f32], n2: usize, n_features: usize) -> Vec<f32> {
118 let mut k = vec![0.0f32; n1 * n2];
119
120 for i in 0..n1 {
121 for j in 0..n2 {
122 let xi = &x1[i * n_features..(i + 1) * n_features];
123 let xj = &x2[j * n_features..(j + 1) * n_features];
124 k[i * n2 + j] = self.kernel.compute(xi, xj);
125 }
126 }
127
128 k
129 }
130
131 fn cholesky_solve(&self, l: &[f32], b: &[f32], n: usize) -> Vec<f32> {
132 let mut y = vec![0.0f32; n];
134 for i in 0..n {
135 let mut sum = b[i];
136 for j in 0..i {
137 sum -= l[i * n + j] * y[j];
138 }
139 y[i] = sum / l[i * n + i].max(1e-10);
140 }
141
142 let mut x = vec![0.0f32; n];
144 for i in (0..n).rev() {
145 let mut sum = y[i];
146 for j in (i + 1)..n {
147 sum -= l[j * n + i] * x[j];
148 }
149 x[i] = sum / l[i * n + i].max(1e-10);
150 }
151
152 x
153 }
154
155 fn cholesky_decomposition(&self, a: &[f32], n: usize) -> Vec<f32> {
156 let mut l = vec![0.0f32; n * n];
157
158 for i in 0..n {
159 for j in 0..=i {
160 let mut sum = 0.0f32;
161 if i == j {
162 for k in 0..j {
163 sum += l[j * n + k] * l[j * n + k];
164 }
165 l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
166 } else {
167 for k in 0..j {
168 sum += l[i * n + k] * l[j * n + k];
169 }
170 l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
171 }
172 }
173 }
174
175 l
176 }
177
178 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
179 let x_data = x.data_f32();
180 let mut y_data = y.data_f32();
181 let n_samples = x.dims()[0];
182 let n_features = x.dims()[1];
183
184 self.n_samples_ = n_samples;
185 self.n_features_ = n_features;
186
187 if self.normalize_y {
189 self.y_mean_ = y_data.iter().sum::<f32>() / n_samples as f32;
190 let variance: f32 = y_data.iter().map(|&y| (y - self.y_mean_).powi(2)).sum::<f32>() / n_samples as f32;
191 self.y_std_ = variance.sqrt().max(1e-10);
192 y_data = y_data.iter().map(|&y| (y - self.y_mean_) / self.y_std_).collect();
193 } else {
194 self.y_mean_ = 0.0;
195 self.y_std_ = 1.0;
196 }
197
198 let mut k = self.compute_kernel_matrix(&x_data, n_samples, &x_data, n_samples, n_features);
200
201 for i in 0..n_samples {
203 k[i * n_samples + i] += self.alpha;
204 }
205
206 let l = self.cholesky_decomposition(&k, n_samples);
208
209 let alpha = self.cholesky_solve(&l, &y_data, n_samples);
211
212 self.x_train_ = Some(x_data);
213 self.y_train_ = Some(y_data);
214 self.alpha_ = Some(alpha);
215 }
216
217 pub fn predict(&self, x: &Tensor) -> Tensor {
218 let x_data = x.data_f32();
219 let n_samples = x.dims()[0];
220 let n_features = x.dims()[1];
221
222 let x_train = self.x_train_.as_ref().expect("Model not fitted");
223 let alpha = self.alpha_.as_ref().expect("Model not fitted");
224 let n_train = self.n_samples_;
225
226 let k_star = self.compute_kernel_matrix(&x_data, n_samples, x_train, n_train, n_features);
228
229 let mut predictions = vec![0.0f32; n_samples];
231 for i in 0..n_samples {
232 for j in 0..n_train {
233 predictions[i] += k_star[i * n_train + j] * alpha[j];
234 }
235 predictions[i] = predictions[i] * self.y_std_ + self.y_mean_;
237 }
238
239 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
240 }
241
242 pub fn predict_with_std(&self, x: &Tensor) -> (Tensor, Tensor) {
243 let mean = self.predict(x);
244
245 let n_samples = x.dims()[0];
247 let std = vec![self.alpha.sqrt(); n_samples];
248
249 (mean, Tensor::from_slice(&std, &[n_samples]).unwrap())
250 }
251
252 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
253 let predictions = self.predict(x);
254 let pred_data = predictions.data_f32();
255 let y_data = y.data_f32();
256
257 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
258 let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
259 let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
260
261 1.0 - ss_res / ss_tot.max(1e-10)
262 }
263}
264
265
266pub struct GaussianProcessClassifier {
268 pub kernel: GPKernel,
269 pub max_iter_predict: usize,
270 x_train_: Option<Vec<f32>>,
271 y_train_: Option<Vec<f32>>,
272 f_cached_: Option<Vec<f32>>,
273 n_samples_: usize,
274 n_features_: usize,
275 n_classes_: usize,
276}
277
278impl GaussianProcessClassifier {
279 pub fn new(kernel: GPKernel) -> Self {
280 GaussianProcessClassifier {
281 kernel,
282 max_iter_predict: 100,
283 x_train_: None,
284 y_train_: None,
285 f_cached_: None,
286 n_samples_: 0,
287 n_features_: 0,
288 n_classes_: 0,
289 }
290 }
291
292 fn sigmoid(x: f32) -> f32 {
293 if x >= 0.0 {
294 1.0 / (1.0 + (-x).exp())
295 } else {
296 let exp_x = x.exp();
297 exp_x / (1.0 + exp_x)
298 }
299 }
300
301 fn compute_kernel_matrix(&self, x1: &[f32], n1: usize, x2: &[f32], n2: usize, n_features: usize) -> Vec<f32> {
302 let mut k = vec![0.0f32; n1 * n2];
303
304 for i in 0..n1 {
305 for j in 0..n2 {
306 let xi = &x1[i * n_features..(i + 1) * n_features];
307 let xj = &x2[j * n_features..(j + 1) * n_features];
308 k[i * n2 + j] = self.kernel.compute(xi, xj);
309 }
310 }
311
312 k
313 }
314
315 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
316 let x_data = x.data_f32();
317 let y_data = y.data_f32();
318 let n_samples = x.dims()[0];
319 let n_features = x.dims()[1];
320
321 self.n_samples_ = n_samples;
322 self.n_features_ = n_features;
323 self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
324
325 let y_binary: Vec<f32> = y_data.iter()
327 .map(|&y| if y > 0.5 { 1.0 } else { -1.0 })
328 .collect();
329
330 let k = self.compute_kernel_matrix(&x_data, n_samples, &x_data, n_samples, n_features);
332
333 let mut f = vec![0.0f32; n_samples];
335
336 for _ in 0..self.max_iter_predict {
337 let pi: Vec<f32> = f.iter().map(|&fi| Self::sigmoid(fi)).collect();
339
340 let _w: Vec<f32> = pi.iter().map(|&p| p * (1.0 - p)).collect();
342
343 let grad: Vec<f32> = y_binary.iter().zip(pi.iter())
346 .map(|(&y, &p)| (y + 1.0) / 2.0 - p)
347 .collect();
348
349 let step_size = 0.1;
351 for i in 0..n_samples {
352 let mut k_grad = 0.0f32;
353 for j in 0..n_samples {
354 k_grad += k[i * n_samples + j] * grad[j];
355 }
356 f[i] += step_size * k_grad;
357 }
358 }
359
360 self.x_train_ = Some(x_data);
361 self.y_train_ = Some(y_data);
362 self.f_cached_ = Some(f);
363 }
364
365 pub fn predict(&self, x: &Tensor) -> Tensor {
366 let proba = self.predict_proba(x);
367 let proba_data = proba.data_f32();
368 let n_samples = x.dims()[0];
369
370 let predictions: Vec<f32> = (0..n_samples)
371 .map(|i| {
372 if self.n_classes_ == 2 {
373 if proba_data[i * 2 + 1] > 0.5 { 1.0 } else { 0.0 }
374 } else {
375 let start = i * self.n_classes_;
376 let probs = &proba_data[start..start + self.n_classes_];
377 probs.iter()
378 .enumerate()
379 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
380 .map(|(c, _)| c as f32)
381 .unwrap_or(0.0)
382 }
383 })
384 .collect();
385
386 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
387 }
388
389 pub fn predict_proba(&self, x: &Tensor) -> Tensor {
390 let x_data = x.data_f32();
391 let n_samples = x.dims()[0];
392 let n_features = x.dims()[1];
393
394 let x_train = self.x_train_.as_ref().expect("Model not fitted");
395 let f_cached = self.f_cached_.as_ref().expect("Model not fitted");
396 let n_train = self.n_samples_;
397
398 let k_star = self.compute_kernel_matrix(&x_data, n_samples, x_train, n_train, n_features);
400
401 let mut f_star = vec![0.0f32; n_samples];
403 for i in 0..n_samples {
404 for j in 0..n_train {
405 f_star[i] += k_star[i * n_train + j] * f_cached[j] / n_train as f32;
406 }
407 }
408
409 let mut probs = Vec::with_capacity(n_samples * self.n_classes_);
411 for i in 0..n_samples {
412 let p1 = Self::sigmoid(f_star[i]);
413 if self.n_classes_ == 2 {
414 probs.push(1.0 - p1);
415 probs.push(p1);
416 } else {
417 for c in 0..self.n_classes_ {
419 probs.push(if c == 1 { p1 } else { (1.0 - p1) / (self.n_classes_ - 1) as f32 });
420 }
421 }
422 }
423
424 Tensor::from_slice(&probs, &[n_samples, self.n_classes_]).unwrap()
425 }
426
427 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
428 let predictions = self.predict(x);
429 let pred_data = predictions.data_f32();
430 let y_data = y.data_f32();
431
432 let correct: usize = pred_data.iter()
433 .zip(y_data.iter())
434 .filter(|(&p, &y)| (p - y).abs() < 0.5)
435 .count();
436
437 correct as f32 / y_data.len() as f32
438 }
439}
440
441#[cfg(test)]
442mod tests {
443 use super::*;
444
445 #[test]
446 fn test_gp_regressor() {
447 let x = Tensor::from_slice(&[
448 0.0, 1.0, 2.0, 3.0, 4.0,
449 ], &[5, 1]).unwrap();
450
451 let y = Tensor::from_slice(&[0.0, 1.0, 4.0, 9.0, 16.0], &[5]).unwrap();
452
453 let mut gpr = GaussianProcessRegressor::new(GPKernel::rbf(1.0)).alpha(0.1);
454 gpr.fit(&x, &y);
455
456 let predictions = gpr.predict(&x);
457 assert_eq!(predictions.dims(), &[5]);
458 }
459
460 #[test]
461 fn test_gp_classifier() {
462 let x = Tensor::from_slice(&[
463 0.0, 0.0,
464 0.0, 1.0,
465 1.0, 0.0,
466 1.0, 1.0,
467 ], &[4, 2]).unwrap();
468
469 let y = Tensor::from_slice(&[0.0, 0.0, 0.0, 1.0], &[4]).unwrap();
470
471 let mut gpc = GaussianProcessClassifier::new(GPKernel::rbf(1.0));
472 gpc.fit(&x, &y);
473
474 let predictions = gpc.predict(&x);
475 assert_eq!(predictions.dims(), &[4]);
476 }
477}