ghostflow_ml/
kernel.rs

1//! Kernel Methods - Kernel Ridge, Kernel PCA
2
3use ghostflow_core::Tensor;
4
5/// Kernel functions
6#[derive(Clone)]
7pub enum Kernel {
8    Linear,
9    Polynomial { degree: usize, coef0: f32 },
10    RBF { gamma: f32 },
11    Sigmoid { gamma: f32, coef0: f32 },
12}
13
14impl Kernel {
15    pub fn compute(&self, x1: &[f32], x2: &[f32]) -> f32 {
16        match self {
17            Kernel::Linear => {
18                x1.iter().zip(x2.iter()).map(|(&a, &b)| a * b).sum()
19            }
20            Kernel::Polynomial { degree, coef0 } => {
21                let dot: f32 = x1.iter().zip(x2.iter()).map(|(&a, &b)| a * b).sum();
22                (dot + coef0).powi(*degree as i32)
23            }
24            Kernel::RBF { gamma } => {
25                let sq_dist: f32 = x1.iter().zip(x2.iter())
26                    .map(|(&a, &b)| (a - b).powi(2)).sum();
27                (-gamma * sq_dist).exp()
28            }
29            Kernel::Sigmoid { gamma, coef0 } => {
30                let dot: f32 = x1.iter().zip(x2.iter()).map(|(&a, &b)| a * b).sum();
31                (gamma * dot + coef0).tanh()
32            }
33        }
34    }
35
36    pub fn rbf(gamma: f32) -> Self {
37        Kernel::RBF { gamma }
38    }
39
40    pub fn polynomial(degree: usize) -> Self {
41        Kernel::Polynomial { degree, coef0: 1.0 }
42    }
43}
44
45/// Kernel Ridge Regression
46pub struct KernelRidge {
47    pub alpha: f32,
48    pub kernel: Kernel,
49    x_train_: Option<Vec<f32>>,
50    dual_coef_: Option<Vec<f32>>,
51    n_samples_: usize,
52    n_features_: usize,
53}
54
55impl KernelRidge {
56    pub fn new() -> Self {
57        KernelRidge {
58            alpha: 1.0,
59            kernel: Kernel::RBF { gamma: 1.0 },
60            x_train_: None,
61            dual_coef_: None,
62            n_samples_: 0,
63            n_features_: 0,
64        }
65    }
66
67    pub fn alpha(mut self, a: f32) -> Self {
68        self.alpha = a;
69        self
70    }
71
72    pub fn kernel(mut self, k: Kernel) -> Self {
73        self.kernel = k;
74        self
75    }
76
77    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
78        let x_data = x.data_f32();
79        let y_data = y.data_f32();
80        let n_samples = x.dims()[0];
81        let n_features = x.dims()[1];
82
83        // Compute kernel matrix
84        let mut k_matrix = vec![0.0f32; n_samples * n_samples];
85        for i in 0..n_samples {
86            let xi = &x_data[i * n_features..(i + 1) * n_features];
87            for j in 0..n_samples {
88                let xj = &x_data[j * n_features..(j + 1) * n_features];
89                k_matrix[i * n_samples + j] = self.kernel.compute(xi, xj);
90            }
91        }
92
93        // Add regularization: K + alpha * I
94        for i in 0..n_samples {
95            k_matrix[i * n_samples + i] += self.alpha;
96        }
97
98        // Solve (K + alpha*I) * dual_coef = y
99        let dual_coef = solve_linear_system(&k_matrix, &y_data, n_samples);
100
101        self.x_train_ = Some(x_data.clone());
102        self.dual_coef_ = Some(dual_coef);
103        self.n_samples_ = n_samples;
104        self.n_features_ = n_features;
105    }
106
107    pub fn predict(&self, x: &Tensor) -> Tensor {
108        let x_data = x.data_f32();
109        let n_samples = x.dims()[0];
110        let n_features = x.dims()[1];
111
112        let x_train = self.x_train_.as_ref().expect("Model not fitted");
113        let dual_coef = self.dual_coef_.as_ref().unwrap();
114
115        let mut predictions = vec![0.0f32; n_samples];
116
117        for i in 0..n_samples {
118            let xi = &x_data[i * n_features..(i + 1) * n_features];
119            for j in 0..self.n_samples_ {
120                let xj = &x_train[j * self.n_features_..(j + 1) * self.n_features_];
121                predictions[i] += dual_coef[j] * self.kernel.compute(xi, xj);
122            }
123        }
124
125        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
126    }
127}
128
129impl Default for KernelRidge {
130    fn default() -> Self { Self::new() }
131}
132
133/// Kernel PCA
134pub struct KernelPCA {
135    pub n_components: usize,
136    pub kernel: Kernel,
137    x_train_: Option<Vec<f32>>,
138    alphas_: Option<Vec<Vec<f32>>>,
139    lambdas_: Option<Vec<f32>>,
140    n_samples_: usize,
141    n_features_: usize,
142}
143
144impl KernelPCA {
145    pub fn new(n_components: usize) -> Self {
146        KernelPCA {
147            n_components,
148            kernel: Kernel::RBF { gamma: 1.0 },
149            x_train_: None,
150            alphas_: None,
151            lambdas_: None,
152            n_samples_: 0,
153            n_features_: 0,
154        }
155    }
156
157    pub fn kernel(mut self, k: Kernel) -> Self {
158        self.kernel = k;
159        self
160    }
161
162    pub fn fit(&mut self, x: &Tensor) {
163        let x_data = x.data_f32();
164        let n_samples = x.dims()[0];
165        let n_features = x.dims()[1];
166
167        // Compute kernel matrix
168        let mut k_matrix = vec![0.0f32; n_samples * n_samples];
169        for i in 0..n_samples {
170            let xi = &x_data[i * n_features..(i + 1) * n_features];
171            for j in 0..n_samples {
172                let xj = &x_data[j * n_features..(j + 1) * n_features];
173                k_matrix[i * n_samples + j] = self.kernel.compute(xi, xj);
174            }
175        }
176
177        // Center kernel matrix
178        let row_means: Vec<f32> = (0..n_samples)
179            .map(|i| (0..n_samples).map(|j| k_matrix[i * n_samples + j]).sum::<f32>() / n_samples as f32)
180            .collect();
181        let total_mean: f32 = row_means.iter().sum::<f32>() / n_samples as f32;
182
183        for i in 0..n_samples {
184            for j in 0..n_samples {
185                k_matrix[i * n_samples + j] = k_matrix[i * n_samples + j] 
186                    - row_means[i] - row_means[j] + total_mean;
187            }
188        }
189
190        // Eigendecomposition using power iteration
191        let mut eigenvalues = Vec::new();
192        let mut eigenvectors = Vec::new();
193        let mut k_deflated = k_matrix.clone();
194
195        for _ in 0..self.n_components.min(n_samples) {
196            let mut v = vec![1.0f32; n_samples];
197            normalize(&mut v);
198
199            for _ in 0..100 {
200                let mut kv = vec![0.0f32; n_samples];
201                for i in 0..n_samples {
202                    for j in 0..n_samples {
203                        kv[i] += k_deflated[i * n_samples + j] * v[j];
204                    }
205                }
206
207                let norm: f32 = kv.iter().map(|&x| x * x).sum::<f32>().sqrt();
208                if norm < 1e-10 { break; }
209                
210                let prev_v = v.clone();
211                for (vi, kvi) in v.iter_mut().zip(kv.iter()) {
212                    *vi = *kvi / norm;
213                }
214
215                let diff: f32 = v.iter().zip(prev_v.iter())
216                    .map(|(&a, &b)| (a - b).abs()).sum();
217                if diff < 1e-6 { break; }
218            }
219
220            // Compute eigenvalue
221            let mut eigenvalue = 0.0f32;
222            for i in 0..n_samples {
223                let mut kv_i = 0.0f32;
224                for j in 0..n_samples {
225                    kv_i += k_deflated[i * n_samples + j] * v[j];
226                }
227                eigenvalue += v[i] * kv_i;
228            }
229
230            if eigenvalue > 1e-10 {
231                eigenvalues.push(eigenvalue);
232                eigenvectors.push(v.clone());
233
234                // Deflate
235                for i in 0..n_samples {
236                    for j in 0..n_samples {
237                        k_deflated[i * n_samples + j] -= eigenvalue * v[i] * v[j];
238                    }
239                }
240            }
241        }
242
243        // Normalize eigenvectors by sqrt(eigenvalue)
244        let mut alphas = Vec::new();
245        for (i, ev) in eigenvectors.iter().enumerate() {
246            let scale = 1.0 / eigenvalues[i].sqrt();
247            let alpha: Vec<f32> = ev.iter().map(|&v| v * scale).collect();
248            alphas.push(alpha);
249        }
250
251        self.x_train_ = Some(x_data.clone());
252        self.alphas_ = Some(alphas);
253        self.lambdas_ = Some(eigenvalues);
254        self.n_samples_ = n_samples;
255        self.n_features_ = n_features;
256    }
257
258    pub fn transform(&self, x: &Tensor) -> Tensor {
259        let x_data = x.data_f32();
260        let n_samples = x.dims()[0];
261        let n_features = x.dims()[1];
262
263        let x_train = self.x_train_.as_ref().expect("Model not fitted");
264        let alphas = self.alphas_.as_ref().unwrap();
265        let n_train = self.n_samples_;
266
267        // Compute kernel between new points and training points
268        let mut k_new = vec![0.0f32; n_samples * n_train];
269        for i in 0..n_samples {
270            let xi = &x_data[i * n_features..(i + 1) * n_features];
271            for j in 0..n_train {
272                let xj = &x_train[j * self.n_features_..(j + 1) * self.n_features_];
273                k_new[i * n_train + j] = self.kernel.compute(xi, xj);
274            }
275        }
276
277        // Project onto principal components
278        let n_components = alphas.len();
279        let mut result = vec![0.0f32; n_samples * n_components];
280
281        for i in 0..n_samples {
282            for (k, alpha) in alphas.iter().enumerate() {
283                for j in 0..n_train {
284                    result[i * n_components + k] += k_new[i * n_train + j] * alpha[j];
285                }
286            }
287        }
288
289        Tensor::from_slice(&result, &[n_samples, n_components]).unwrap()
290    }
291
292    pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
293        self.fit(x);
294        self.transform(x)
295    }
296}
297
298/// Nystrom approximation for large-scale kernel methods
299pub struct Nystrom {
300    pub n_components: usize,
301    pub kernel: Kernel,
302    component_indices_: Option<Vec<usize>>,
303    components_: Option<Vec<f32>>,
304    normalization_: Option<Vec<Vec<f32>>>,
305    n_features_: usize,
306}
307
308impl Nystrom {
309    pub fn new(n_components: usize) -> Self {
310        Nystrom {
311            n_components,
312            kernel: Kernel::RBF { gamma: 1.0 },
313            component_indices_: None,
314            components_: None,
315            normalization_: None,
316            n_features_: 0,
317        }
318    }
319
320    pub fn kernel(mut self, k: Kernel) -> Self {
321        self.kernel = k;
322        self
323    }
324
325    pub fn fit(&mut self, x: &Tensor) {
326        use rand::prelude::*;
327        
328        let x_data = x.data_f32();
329        let n_samples = x.dims()[0];
330        let n_features = x.dims()[1];
331
332        let n_components = self.n_components.min(n_samples);
333
334        // Random sampling of landmark points
335        let mut rng = thread_rng();
336        let indices: Vec<usize> = (0..n_samples)
337            .collect::<Vec<_>>()
338            .choose_multiple(&mut rng, n_components)
339            .cloned()
340            .collect();
341
342        // Extract component data
343        let components: Vec<f32> = indices.iter()
344            .flat_map(|&i| x_data[i * n_features..(i + 1) * n_features].to_vec())
345            .collect();
346
347        // Compute kernel matrix between landmarks
348        let mut k_mm = vec![0.0f32; n_components * n_components];
349        for i in 0..n_components {
350            let xi = &components[i * n_features..(i + 1) * n_features];
351            for j in 0..n_components {
352                let xj = &components[j * n_features..(j + 1) * n_features];
353                k_mm[i * n_components + j] = self.kernel.compute(xi, xj);
354            }
355        }
356
357        // Compute K_mm^(-1/2) via eigendecomposition
358        let (eigenvalues, eigenvectors) = eigen_decomp(&k_mm, n_components);
359        
360        let mut normalization = vec![vec![0.0f32; n_components]; n_components];
361        for i in 0..n_components {
362            for j in 0..n_components {
363                for k in 0..n_components {
364                    if eigenvalues[k] > 1e-10 {
365                        normalization[i][j] += eigenvectors[i][k] * eigenvectors[j][k] 
366                            / eigenvalues[k].sqrt();
367                    }
368                }
369            }
370        }
371
372        self.component_indices_ = Some(indices);
373        self.components_ = Some(components);
374        self.normalization_ = Some(normalization);
375        self.n_features_ = n_features;
376    }
377
378    pub fn transform(&self, x: &Tensor) -> Tensor {
379        let x_data = x.data_f32();
380        let n_samples = x.dims()[0];
381        let n_features = x.dims()[1];
382
383        let components = self.components_.as_ref().expect("Model not fitted");
384        let normalization = self.normalization_.as_ref().unwrap();
385        let n_components = self.n_components;
386
387        // Compute kernel between x and landmarks
388        let mut k_nm = vec![0.0f32; n_samples * n_components];
389        for i in 0..n_samples {
390            let xi = &x_data[i * n_features..(i + 1) * n_features];
391            for j in 0..n_components {
392                let xj = &components[j * self.n_features_..(j + 1) * self.n_features_];
393                k_nm[i * n_components + j] = self.kernel.compute(xi, xj);
394            }
395        }
396
397        // Apply normalization: K_nm @ K_mm^(-1/2)
398        let mut result = vec![0.0f32; n_samples * n_components];
399        for i in 0..n_samples {
400            for j in 0..n_components {
401                for k in 0..n_components {
402                    result[i * n_components + j] += k_nm[i * n_components + k] * normalization[k][j];
403                }
404            }
405        }
406
407        Tensor::from_slice(&result, &[n_samples, n_components]).unwrap()
408    }
409
410    pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
411        self.fit(x);
412        self.transform(x)
413    }
414}
415
416// Helper functions
417fn solve_linear_system(a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
418    let mut aug = vec![0.0f32; n * (n + 1)];
419    for i in 0..n {
420        for j in 0..n {
421            aug[i * (n + 1) + j] = a[i * n + j];
422        }
423        aug[i * (n + 1) + n] = b[i];
424    }
425
426    for i in 0..n {
427        let mut max_row = i;
428        for k in (i + 1)..n {
429            if aug[k * (n + 1) + i].abs() > aug[max_row * (n + 1) + i].abs() {
430                max_row = k;
431            }
432        }
433
434        for j in 0..=n {
435            let tmp = aug[i * (n + 1) + j];
436            aug[i * (n + 1) + j] = aug[max_row * (n + 1) + j];
437            aug[max_row * (n + 1) + j] = tmp;
438        }
439
440        let pivot = aug[i * (n + 1) + i];
441        if pivot.abs() < 1e-10 { continue; }
442
443        for j in i..=n {
444            aug[i * (n + 1) + j] /= pivot;
445        }
446
447        for k in 0..n {
448            if k != i {
449                let factor = aug[k * (n + 1) + i];
450                for j in i..=n {
451                    aug[k * (n + 1) + j] -= factor * aug[i * (n + 1) + j];
452                }
453            }
454        }
455    }
456
457    (0..n).map(|i| aug[i * (n + 1) + n]).collect()
458}
459
460fn normalize(v: &mut [f32]) {
461    let norm: f32 = v.iter().map(|&x| x * x).sum::<f32>().sqrt();
462    if norm > 1e-10 {
463        for vi in v.iter_mut() { *vi /= norm; }
464    }
465}
466
467fn eigen_decomp(a: &[f32], n: usize) -> (Vec<f32>, Vec<Vec<f32>>) {
468    let mut eigenvalues = Vec::new();
469    let mut eigenvectors = Vec::new();
470    let mut a_deflated = a.to_vec();
471
472    for _ in 0..n {
473        let mut v = vec![1.0f32; n];
474        normalize(&mut v);
475
476        for _ in 0..100 {
477            let mut av = vec![0.0f32; n];
478            for i in 0..n {
479                for j in 0..n {
480                    av[i] += a_deflated[i * n + j] * v[j];
481                }
482            }
483
484            let norm: f32 = av.iter().map(|&x| x * x).sum::<f32>().sqrt();
485            if norm < 1e-10 { break; }
486            
487            for (vi, avi) in v.iter_mut().zip(av.iter()) {
488                *vi = *avi / norm;
489            }
490        }
491
492        let mut eigenvalue = 0.0f32;
493        for i in 0..n {
494            let mut av_i = 0.0f32;
495            for j in 0..n {
496                av_i += a_deflated[i * n + j] * v[j];
497            }
498            eigenvalue += v[i] * av_i;
499        }
500
501        eigenvalues.push(eigenvalue.max(0.0));
502        eigenvectors.push(v.clone());
503
504        for i in 0..n {
505            for j in 0..n {
506                a_deflated[i * n + j] -= eigenvalue * v[i] * v[j];
507            }
508        }
509    }
510
511    (eigenvalues, eigenvectors)
512}
513
514#[cfg(test)]
515mod tests {
516    use super::*;
517
518    #[test]
519    fn test_kernel_ridge() {
520        let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[3, 2]).unwrap();
521        let y = Tensor::from_slice(&[1.0, 2.0, 3.0], &[3]).unwrap();
522        
523        let mut kr = KernelRidge::new().kernel(Kernel::rbf(0.5));
524        kr.fit(&x, &y);
525        let pred = kr.predict(&x);
526        assert_eq!(pred.dims(), &[3]);
527    }
528
529    #[test]
530    fn test_kernel_pca() {
531        let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], &[4, 2]).unwrap();
532        
533        let mut kpca = KernelPCA::new(2).kernel(Kernel::rbf(1.0));
534        let result = kpca.fit_transform(&x);
535        assert_eq!(result.dims(), &[4, 2]);
536    }
537}