ghostflow_ml/
gaussian_process.rs

1//! Gaussian Process models for regression and classification
2
3use ghostflow_core::Tensor;
4
5/// Gaussian Process Regressor
6pub struct GaussianProcessRegressor {
7    pub kernel: GPKernel,
8    pub alpha: f32,  // Noise level
9    pub normalize_y: bool,
10    pub n_restarts_optimizer: usize,
11    x_train_: Option<Vec<f32>>,
12    y_train_: Option<Vec<f32>>,
13    #[allow(dead_code)]
14    k_inv_: Option<Vec<f32>>,  // Inverse of kernel matrix
15    alpha_: Option<Vec<f32>>,  // K^-1 * y
16    y_mean_: f32,
17    y_std_: f32,
18    n_samples_: usize,
19    n_features_: usize,
20}
21
22#[derive(Clone, Debug)]
23pub enum GPKernel {
24    RBF { length_scale: f32 },
25    Matern { length_scale: f32, nu: f32 },
26    RationalQuadratic { length_scale: f32, alpha: f32 },
27    DotProduct { sigma_0: f32 },
28    WhiteKernel { noise_level: f32 },
29    Sum(Box<GPKernel>, Box<GPKernel>),
30    Product(Box<GPKernel>, Box<GPKernel>),
31}
32
33impl GPKernel {
34    pub fn rbf(length_scale: f32) -> Self {
35        GPKernel::RBF { length_scale }
36    }
37
38    pub fn matern(length_scale: f32, nu: f32) -> Self {
39        GPKernel::Matern { length_scale, nu }
40    }
41
42    fn compute(&self, x1: &[f32], x2: &[f32]) -> f32 {
43        match self {
44            GPKernel::RBF { length_scale } => {
45                let dist_sq: f32 = x1.iter().zip(x2.iter())
46                    .map(|(&a, &b)| (a - b).powi(2))
47                    .sum();
48                (-dist_sq / (2.0 * length_scale * length_scale)).exp()
49            }
50            GPKernel::Matern { length_scale, nu } => {
51                let dist: f32 = x1.iter().zip(x2.iter())
52                    .map(|(&a, &b)| (a - b).powi(2))
53                    .sum::<f32>()
54                    .sqrt();
55                let d = dist / length_scale;
56                
57                if *nu == 0.5 {
58                    (-d).exp()
59                } else if *nu == 1.5 {
60                    let sqrt3_d = 3.0f32.sqrt() * d;
61                    (1.0 + sqrt3_d) * (-sqrt3_d).exp()
62                } else if *nu == 2.5 {
63                    let sqrt5_d = 5.0f32.sqrt() * d;
64                    (1.0 + sqrt5_d + sqrt5_d * sqrt5_d / 3.0) * (-sqrt5_d).exp()
65                } else {
66                    (-d).exp()  // Fallback to RBF-like
67                }
68            }
69            GPKernel::RationalQuadratic { length_scale, alpha } => {
70                let dist_sq: f32 = x1.iter().zip(x2.iter())
71                    .map(|(&a, &b)| (a - b).powi(2))
72                    .sum();
73                (1.0 + dist_sq / (2.0 * alpha * length_scale * length_scale)).powf(-*alpha)
74            }
75            GPKernel::DotProduct { sigma_0 } => {
76                let dot: f32 = x1.iter().zip(x2.iter()).map(|(&a, &b)| a * b).sum();
77                sigma_0 * sigma_0 + dot
78            }
79            GPKernel::WhiteKernel { noise_level } => {
80                let same = x1.iter().zip(x2.iter()).all(|(&a, &b)| (a - b).abs() < 1e-10);
81                if same { *noise_level } else { 0.0 }
82            }
83            GPKernel::Sum(k1, k2) => k1.compute(x1, x2) + k2.compute(x1, x2),
84            GPKernel::Product(k1, k2) => k1.compute(x1, x2) * k2.compute(x1, x2),
85        }
86    }
87}
88
89impl GaussianProcessRegressor {
90    pub fn new(kernel: GPKernel) -> Self {
91        GaussianProcessRegressor {
92            kernel,
93            alpha: 1e-10,
94            normalize_y: false,
95            n_restarts_optimizer: 0,
96            x_train_: None,
97            y_train_: None,
98            k_inv_: None,
99            alpha_: None,
100            y_mean_: 0.0,
101            y_std_: 1.0,
102            n_samples_: 0,
103            n_features_: 0,
104        }
105    }
106
107    pub fn alpha(mut self, alpha: f32) -> Self {
108        self.alpha = alpha;
109        self
110    }
111
112    pub fn normalize_y(mut self, normalize: bool) -> Self {
113        self.normalize_y = normalize;
114        self
115    }
116
117    fn compute_kernel_matrix(&self, x1: &[f32], n1: usize, x2: &[f32], n2: usize, n_features: usize) -> Vec<f32> {
118        let mut k = vec![0.0f32; n1 * n2];
119        
120        for i in 0..n1 {
121            for j in 0..n2 {
122                let xi = &x1[i * n_features..(i + 1) * n_features];
123                let xj = &x2[j * n_features..(j + 1) * n_features];
124                k[i * n2 + j] = self.kernel.compute(xi, xj);
125            }
126        }
127        
128        k
129    }
130
131    fn cholesky_solve(&self, l: &[f32], b: &[f32], n: usize) -> Vec<f32> {
132        // Forward substitution: L * y = b
133        let mut y = vec![0.0f32; n];
134        for i in 0..n {
135            let mut sum = b[i];
136            for j in 0..i {
137                sum -= l[i * n + j] * y[j];
138            }
139            y[i] = sum / l[i * n + i].max(1e-10);
140        }
141
142        // Backward substitution: L^T * x = y
143        let mut x = vec![0.0f32; n];
144        for i in (0..n).rev() {
145            let mut sum = y[i];
146            for j in (i + 1)..n {
147                sum -= l[j * n + i] * x[j];
148            }
149            x[i] = sum / l[i * n + i].max(1e-10);
150        }
151
152        x
153    }
154
155    fn cholesky_decomposition(&self, a: &[f32], n: usize) -> Vec<f32> {
156        let mut l = vec![0.0f32; n * n];
157        
158        for i in 0..n {
159            for j in 0..=i {
160                let mut sum = 0.0f32;
161                if i == j {
162                    for k in 0..j {
163                        sum += l[j * n + k] * l[j * n + k];
164                    }
165                    l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
166                } else {
167                    for k in 0..j {
168                        sum += l[i * n + k] * l[j * n + k];
169                    }
170                    l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
171                }
172            }
173        }
174        
175        l
176    }
177
178    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
179        let x_data = x.data_f32();
180        let mut y_data = y.data_f32();
181        let n_samples = x.dims()[0];
182        let n_features = x.dims()[1];
183
184        self.n_samples_ = n_samples;
185        self.n_features_ = n_features;
186
187        // Normalize y if requested
188        if self.normalize_y {
189            self.y_mean_ = y_data.iter().sum::<f32>() / n_samples as f32;
190            let variance: f32 = y_data.iter().map(|&y| (y - self.y_mean_).powi(2)).sum::<f32>() / n_samples as f32;
191            self.y_std_ = variance.sqrt().max(1e-10);
192            y_data = y_data.iter().map(|&y| (y - self.y_mean_) / self.y_std_).collect();
193        } else {
194            self.y_mean_ = 0.0;
195            self.y_std_ = 1.0;
196        }
197
198        // Compute kernel matrix K
199        let mut k = self.compute_kernel_matrix(&x_data, n_samples, &x_data, n_samples, n_features);
200        
201        // Add noise to diagonal: K + alpha * I
202        for i in 0..n_samples {
203            k[i * n_samples + i] += self.alpha;
204        }
205
206        // Cholesky decomposition
207        let l = self.cholesky_decomposition(&k, n_samples);
208        
209        // Compute alpha = K^-1 * y using Cholesky
210        let alpha = self.cholesky_solve(&l, &y_data, n_samples);
211
212        self.x_train_ = Some(x_data);
213        self.y_train_ = Some(y_data);
214        self.alpha_ = Some(alpha);
215    }
216
217    pub fn predict(&self, x: &Tensor) -> Tensor {
218        let x_data = x.data_f32();
219        let n_samples = x.dims()[0];
220        let n_features = x.dims()[1];
221
222        let x_train = self.x_train_.as_ref().expect("Model not fitted");
223        let alpha = self.alpha_.as_ref().expect("Model not fitted");
224        let n_train = self.n_samples_;
225
226        // Compute K_star = K(X_test, X_train)
227        let k_star = self.compute_kernel_matrix(&x_data, n_samples, x_train, n_train, n_features);
228
229        // Predict: y = K_star * alpha
230        let mut predictions = vec![0.0f32; n_samples];
231        for i in 0..n_samples {
232            for j in 0..n_train {
233                predictions[i] += k_star[i * n_train + j] * alpha[j];
234            }
235            // Denormalize
236            predictions[i] = predictions[i] * self.y_std_ + self.y_mean_;
237        }
238
239        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
240    }
241
242    pub fn predict_with_std(&self, x: &Tensor) -> (Tensor, Tensor) {
243        let mean = self.predict(x);
244        
245        // For simplicity, return constant std (full implementation would compute posterior variance)
246        let n_samples = x.dims()[0];
247        let std = vec![self.alpha.sqrt(); n_samples];
248        
249        (mean, Tensor::from_slice(&std, &[n_samples]).unwrap())
250    }
251
252    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
253        let predictions = self.predict(x);
254        let pred_data = predictions.data_f32();
255        let y_data = y.data_f32();
256
257        let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
258        let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
259        let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
260
261        1.0 - ss_res / ss_tot.max(1e-10)
262    }
263}
264
265
266/// Gaussian Process Classifier using Laplace approximation
267pub struct GaussianProcessClassifier {
268    pub kernel: GPKernel,
269    pub max_iter_predict: usize,
270    x_train_: Option<Vec<f32>>,
271    y_train_: Option<Vec<f32>>,
272    f_cached_: Option<Vec<f32>>,
273    n_samples_: usize,
274    n_features_: usize,
275    n_classes_: usize,
276}
277
278impl GaussianProcessClassifier {
279    pub fn new(kernel: GPKernel) -> Self {
280        GaussianProcessClassifier {
281            kernel,
282            max_iter_predict: 100,
283            x_train_: None,
284            y_train_: None,
285            f_cached_: None,
286            n_samples_: 0,
287            n_features_: 0,
288            n_classes_: 0,
289        }
290    }
291
292    fn sigmoid(x: f32) -> f32 {
293        if x >= 0.0 {
294            1.0 / (1.0 + (-x).exp())
295        } else {
296            let exp_x = x.exp();
297            exp_x / (1.0 + exp_x)
298        }
299    }
300
301    fn compute_kernel_matrix(&self, x1: &[f32], n1: usize, x2: &[f32], n2: usize, n_features: usize) -> Vec<f32> {
302        let mut k = vec![0.0f32; n1 * n2];
303        
304        for i in 0..n1 {
305            for j in 0..n2 {
306                let xi = &x1[i * n_features..(i + 1) * n_features];
307                let xj = &x2[j * n_features..(j + 1) * n_features];
308                k[i * n2 + j] = self.kernel.compute(xi, xj);
309            }
310        }
311        
312        k
313    }
314
315    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
316        let x_data = x.data_f32();
317        let y_data = y.data_f32();
318        let n_samples = x.dims()[0];
319        let n_features = x.dims()[1];
320
321        self.n_samples_ = n_samples;
322        self.n_features_ = n_features;
323        self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
324
325        // Convert labels to {-1, 1} for binary classification
326        let y_binary: Vec<f32> = y_data.iter()
327            .map(|&y| if y > 0.5 { 1.0 } else { -1.0 })
328            .collect();
329
330        // Compute kernel matrix
331        let k = self.compute_kernel_matrix(&x_data, n_samples, &x_data, n_samples, n_features);
332
333        // Laplace approximation: find mode of posterior using Newton's method
334        let mut f = vec![0.0f32; n_samples];
335        
336        for _ in 0..self.max_iter_predict {
337            // Compute pi = sigmoid(f)
338            let pi: Vec<f32> = f.iter().map(|&fi| Self::sigmoid(fi)).collect();
339            
340            // W = diag(pi * (1 - pi))
341            let _w: Vec<f32> = pi.iter().map(|&p| p * (1.0 - p)).collect();
342            
343            // Gradient: grad = y_binary * (1 - pi) - (1 - y_binary) * pi (simplified for {-1,1})
344            // For binary: grad = (y + 1)/2 - pi
345            let grad: Vec<f32> = y_binary.iter().zip(pi.iter())
346                .map(|(&y, &p)| (y + 1.0) / 2.0 - p)
347                .collect();
348            
349            // Newton update (simplified)
350            let step_size = 0.1;
351            for i in 0..n_samples {
352                let mut k_grad = 0.0f32;
353                for j in 0..n_samples {
354                    k_grad += k[i * n_samples + j] * grad[j];
355                }
356                f[i] += step_size * k_grad;
357            }
358        }
359
360        self.x_train_ = Some(x_data);
361        self.y_train_ = Some(y_data);
362        self.f_cached_ = Some(f);
363    }
364
365    pub fn predict(&self, x: &Tensor) -> Tensor {
366        let proba = self.predict_proba(x);
367        let proba_data = proba.data_f32();
368        let n_samples = x.dims()[0];
369
370        let predictions: Vec<f32> = (0..n_samples)
371            .map(|i| {
372                if self.n_classes_ == 2 {
373                    if proba_data[i * 2 + 1] > 0.5 { 1.0 } else { 0.0 }
374                } else {
375                    let start = i * self.n_classes_;
376                    let probs = &proba_data[start..start + self.n_classes_];
377                    probs.iter()
378                        .enumerate()
379                        .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
380                        .map(|(c, _)| c as f32)
381                        .unwrap_or(0.0)
382                }
383            })
384            .collect();
385
386        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
387    }
388
389    pub fn predict_proba(&self, x: &Tensor) -> Tensor {
390        let x_data = x.data_f32();
391        let n_samples = x.dims()[0];
392        let n_features = x.dims()[1];
393
394        let x_train = self.x_train_.as_ref().expect("Model not fitted");
395        let f_cached = self.f_cached_.as_ref().expect("Model not fitted");
396        let n_train = self.n_samples_;
397
398        // Compute K_star
399        let k_star = self.compute_kernel_matrix(&x_data, n_samples, x_train, n_train, n_features);
400
401        // Predict latent function values
402        let mut f_star = vec![0.0f32; n_samples];
403        for i in 0..n_samples {
404            for j in 0..n_train {
405                f_star[i] += k_star[i * n_train + j] * f_cached[j] / n_train as f32;
406            }
407        }
408
409        // Convert to probabilities
410        let mut probs = Vec::with_capacity(n_samples * self.n_classes_);
411        for i in 0..n_samples {
412            let p1 = Self::sigmoid(f_star[i]);
413            if self.n_classes_ == 2 {
414                probs.push(1.0 - p1);
415                probs.push(p1);
416            } else {
417                // Multi-class: use softmax approximation
418                for c in 0..self.n_classes_ {
419                    probs.push(if c == 1 { p1 } else { (1.0 - p1) / (self.n_classes_ - 1) as f32 });
420                }
421            }
422        }
423
424        Tensor::from_slice(&probs, &[n_samples, self.n_classes_]).unwrap()
425    }
426
427    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
428        let predictions = self.predict(x);
429        let pred_data = predictions.data_f32();
430        let y_data = y.data_f32();
431
432        let correct: usize = pred_data.iter()
433            .zip(y_data.iter())
434            .filter(|(&p, &y)| (p - y).abs() < 0.5)
435            .count();
436
437        correct as f32 / y_data.len() as f32
438    }
439}
440
441#[cfg(test)]
442mod tests {
443    use super::*;
444
445    #[test]
446    fn test_gp_regressor() {
447        let x = Tensor::from_slice(&[0.0f32, 1.0, 2.0, 3.0, 4.0,
448        ], &[5, 1]).unwrap();
449        
450        let y = Tensor::from_slice(&[0.0f32, 1.0, 4.0, 9.0, 16.0], &[5]).unwrap();
451        
452        let mut gpr = GaussianProcessRegressor::new(GPKernel::rbf(1.0)).alpha(0.1);
453        gpr.fit(&x, &y);
454        
455        let predictions = gpr.predict(&x);
456        assert_eq!(predictions.dims(), &[5]);
457    }
458
459    #[test]
460    fn test_gp_classifier() {
461        let x = Tensor::from_slice(&[0.0f32, 0.0,
462            0.0, 1.0,
463            1.0, 0.0,
464            1.0, 1.0,
465        ], &[4, 2]).unwrap();
466        
467        let y = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 1.0], &[4]).unwrap();
468        
469        let mut gpc = GaussianProcessClassifier::new(GPKernel::rbf(1.0));
470        gpc.fit(&x, &y);
471        
472        let predictions = gpc.predict(&x);
473        assert_eq!(predictions.dims(), &[4]);
474    }
475}
476
477