ghostflow_ml/
decomposition.rs

1//! Dimensionality reduction - PCA, SVD, NMF
2
3use ghostflow_core::Tensor;
4
5/// Principal Component Analysis using power iteration for eigendecomposition
6pub struct PCA {
7    pub n_components: usize,
8    pub whiten: bool,
9    pub components_: Option<Vec<Vec<f32>>>,
10    pub explained_variance_: Option<Vec<f32>>,
11    pub explained_variance_ratio_: Option<Vec<f32>>,
12    pub mean_: Option<Vec<f32>>,
13    pub n_features_: usize,
14    pub n_samples_: usize,
15}
16
17impl PCA {
18    pub fn new(n_components: usize) -> Self {
19        PCA {
20            n_components,
21            whiten: false,
22            components_: None,
23            explained_variance_: None,
24            explained_variance_ratio_: None,
25            mean_: None,
26            n_features_: 0,
27            n_samples_: 0,
28        }
29    }
30
31    pub fn whiten(mut self, whiten: bool) -> Self {
32        self.whiten = whiten;
33        self
34    }
35
36    fn compute_mean(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
37        let mut mean = vec![0.0f32; n_features];
38        for i in 0..n_samples {
39            for j in 0..n_features {
40                mean[j] += x[i * n_features + j];
41            }
42        }
43        for m in &mut mean {
44            *m /= n_samples as f32;
45        }
46        mean
47    }
48
49    fn center_data(&self, x: &[f32], mean: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
50        let mut centered = vec![0.0f32; n_samples * n_features];
51        for i in 0..n_samples {
52            for j in 0..n_features {
53                centered[i * n_features + j] = x[i * n_features + j] - mean[j];
54            }
55        }
56        centered
57    }
58
59    fn compute_covariance(&self, x_centered: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
60        let mut cov = vec![0.0f32; n_features * n_features];
61        
62        // C = X^T X / (n - 1)
63        for i in 0..n_features {
64            for j in 0..n_features {
65                let mut sum = 0.0f32;
66                for k in 0..n_samples {
67                    sum += x_centered[k * n_features + i] * x_centered[k * n_features + j];
68                }
69                cov[i * n_features + j] = sum / (n_samples - 1).max(1) as f32;
70            }
71        }
72        
73        cov
74    }
75
76    /// Power iteration to find dominant eigenvector
77    fn power_iteration(&self, matrix: &[f32], n: usize, max_iter: usize, tol: f32) -> (Vec<f32>, f32) {
78        let mut v = vec![1.0f32 / (n as f32).sqrt(); n];
79        let mut eigenvalue = 0.0f32;
80
81        for _ in 0..max_iter {
82            // w = A * v
83            let mut w = vec![0.0f32; n];
84            for i in 0..n {
85                for j in 0..n {
86                    w[i] += matrix[i * n + j] * v[j];
87                }
88            }
89
90            // Compute eigenvalue (Rayleigh quotient)
91            let new_eigenvalue: f32 = w.iter().zip(v.iter()).map(|(&wi, &vi)| wi * vi).sum();
92
93            // Normalize
94            let norm: f32 = w.iter().map(|&x| x * x).sum::<f32>().sqrt();
95            if norm < 1e-10 {
96                break;
97            }
98            for wi in &mut w {
99                *wi /= norm;
100            }
101
102            // Check convergence
103            let diff: f32 = v.iter().zip(w.iter()).map(|(&vi, &wi)| (vi - wi).abs()).sum();
104            v = w;
105            eigenvalue = new_eigenvalue;
106
107            if diff < tol {
108                break;
109            }
110        }
111
112        (v, eigenvalue)
113    }
114
115    /// Deflation: remove contribution of found eigenvector
116    fn deflate(&self, matrix: &mut [f32], eigenvector: &[f32], eigenvalue: f32, n: usize) {
117        for i in 0..n {
118            for j in 0..n {
119                matrix[i * n + j] -= eigenvalue * eigenvector[i] * eigenvector[j];
120            }
121        }
122    }
123
124    pub fn fit(&mut self, x: &Tensor) {
125        let x_data = x.data_f32();
126        let n_samples = x.dims()[0];
127        let n_features = x.dims()[1];
128
129        self.n_samples_ = n_samples;
130        self.n_features_ = n_features;
131
132        // Compute mean and center data
133        let mean = self.compute_mean(&x_data, n_samples, n_features);
134        let x_centered = self.center_data(&x_data, &mean, n_samples, n_features);
135
136        // Compute covariance matrix
137        let mut cov = self.compute_covariance(&x_centered, n_samples, n_features);
138
139        // Find top k eigenvectors using power iteration with deflation
140        let k = self.n_components.min(n_features);
141        let mut components = Vec::with_capacity(k);
142        let mut eigenvalues = Vec::with_capacity(k);
143
144        for _ in 0..k {
145            let (eigenvector, eigenvalue) = self.power_iteration(&cov, n_features, 1000, 1e-6);
146            
147            if eigenvalue < 1e-10 {
148                break;
149            }
150
151            components.push(eigenvector.clone());
152            eigenvalues.push(eigenvalue);
153
154            // Deflate
155            self.deflate(&mut cov, &eigenvector, eigenvalue, n_features);
156        }
157
158        // Compute explained variance ratio
159        let total_variance: f32 = eigenvalues.iter().sum();
160        let explained_variance_ratio: Vec<f32> = eigenvalues.iter()
161            .map(|&e| e / total_variance.max(1e-10))
162            .collect();
163
164        self.components_ = Some(components);
165        self.explained_variance_ = Some(eigenvalues);
166        self.explained_variance_ratio_ = Some(explained_variance_ratio);
167        self.mean_ = Some(mean);
168    }
169
170    pub fn transform(&self, x: &Tensor) -> Tensor {
171        let x_data = x.data_f32();
172        let n_samples = x.dims()[0];
173        let n_features = x.dims()[1];
174
175        let mean = self.mean_.as_ref().expect("Model not fitted");
176        let components = self.components_.as_ref().expect("Model not fitted");
177        let k = components.len();
178
179        let mut result = vec![0.0f32; n_samples * k];
180
181        for i in 0..n_samples {
182            for (c, component) in components.iter().enumerate() {
183                let mut sum = 0.0f32;
184                for j in 0..n_features {
185                    sum += (x_data[i * n_features + j] - mean[j]) * component[j];
186                }
187                
188                if self.whiten {
189                    let var = self.explained_variance_.as_ref().unwrap()[c];
190                    sum /= var.sqrt().max(1e-10);
191                }
192                
193                result[i * k + c] = sum;
194            }
195        }
196
197        Tensor::from_slice(&result, &[n_samples, k]).unwrap()
198    }
199
200    pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
201        self.fit(x);
202        self.transform(x)
203    }
204
205    pub fn inverse_transform(&self, x: &Tensor) -> Tensor {
206        let x_data = x.data_f32();
207        let n_samples = x.dims()[0];
208        let k = x.dims()[1];
209
210        let mean = self.mean_.as_ref().expect("Model not fitted");
211        let components = self.components_.as_ref().expect("Model not fitted");
212        let n_features = self.n_features_;
213
214        let mut result = vec![0.0f32; n_samples * n_features];
215
216        for i in 0..n_samples {
217            for j in 0..n_features {
218                let mut sum = mean[j];
219                for c in 0..k {
220                    let mut val = x_data[i * k + c];
221                    if self.whiten {
222                        let var = self.explained_variance_.as_ref().unwrap()[c];
223                        val *= var.sqrt();
224                    }
225                    sum += val * components[c][j];
226                }
227                result[i * n_features + j] = sum;
228            }
229        }
230
231        Tensor::from_slice(&result, &[n_samples, n_features]).unwrap()
232    }
233}
234
235/// Singular Value Decomposition using power iteration
236pub struct SVD {
237    pub n_components: Option<usize>,
238    pub u_: Option<Vec<Vec<f32>>>,
239    pub s_: Option<Vec<f32>>,
240    pub vt_: Option<Vec<Vec<f32>>>,
241}
242
243impl SVD {
244    pub fn new(n_components: Option<usize>) -> Self {
245        SVD {
246            n_components,
247            u_: None,
248            s_: None,
249            vt_: None,
250        }
251    }
252
253    pub fn fit(&mut self, x: &Tensor) {
254        let x_data = x.data_f32();
255        let m = x.dims()[0];
256        let n = x.dims()[1];
257        let k = self.n_components.unwrap_or(m.min(n));
258
259        let mut a = x_data.clone();
260        let mut u_vecs = Vec::with_capacity(k);
261        let mut singular_values = Vec::with_capacity(k);
262        let mut v_vecs = Vec::with_capacity(k);
263
264        let max_iter = 100;
265        let tol = 1e-6;
266
267        for _ in 0..k {
268            // Power iteration to find largest singular value
269            let mut v = vec![1.0f32 / (n as f32).sqrt(); n];
270            let mut u = vec![0.0f32; m];
271            let mut sigma = 0.0f32;
272
273            for _ in 0..max_iter {
274                // u = A * v
275                for i in 0..m {
276                    u[i] = 0.0;
277                    for j in 0..n {
278                        u[i] += a[i * n + j] * v[j];
279                    }
280                }
281
282                // Normalize u
283                let u_norm: f32 = u.iter().map(|&x| x * x).sum::<f32>().sqrt();
284                if u_norm < tol {
285                    break;
286                }
287                for ui in &mut u {
288                    *ui /= u_norm;
289                }
290
291                // v = A^T * u
292                let mut new_v = vec![0.0f32; n];
293                for j in 0..n {
294                    for i in 0..m {
295                        new_v[j] += a[i * n + j] * u[i];
296                    }
297                }
298
299                // Compute sigma and normalize v
300                let v_norm: f32 = new_v.iter().map(|&x| x * x).sum::<f32>().sqrt();
301                if v_norm < tol {
302                    break;
303                }
304                sigma = v_norm;
305                for vj in &mut new_v {
306                    *vj /= v_norm;
307                }
308
309                // Check convergence
310                let diff: f32 = v.iter().zip(new_v.iter())
311                    .map(|(&old, &new)| (old - new).abs())
312                    .sum();
313                v = new_v;
314
315                if diff < tol {
316                    break;
317                }
318            }
319
320            if sigma < tol {
321                break;
322            }
323
324            singular_values.push(sigma);
325            u_vecs.push(u.clone());
326            v_vecs.push(v.clone());
327
328            // Deflate: A = A - sigma * u * v^T
329            for i in 0..m {
330                for j in 0..n {
331                    a[i * n + j] -= sigma * u[i] * v[j];
332                }
333            }
334        }
335
336        self.u_ = Some(u_vecs);
337        self.s_ = Some(singular_values);
338        self.vt_ = Some(v_vecs);
339    }
340
341    pub fn transform(&self, x: &Tensor) -> Tensor {
342        let x_data = x.data_f32();
343        let n_samples = x.dims()[0];
344        let n_features = x.dims()[1];
345
346        let vt = self.vt_.as_ref().expect("Model not fitted");
347        let k = vt.len();
348
349        let mut result = vec![0.0f32; n_samples * k];
350
351        for i in 0..n_samples {
352            for c in 0..k {
353                let mut sum = 0.0f32;
354                for j in 0..n_features {
355                    sum += x_data[i * n_features + j] * vt[c][j];
356                }
357                result[i * k + c] = sum;
358            }
359        }
360
361        Tensor::from_slice(&result, &[n_samples, k]).unwrap()
362    }
363
364    pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
365        self.fit(x);
366        self.transform(x)
367    }
368}
369
370/// Non-negative Matrix Factorization using multiplicative updates
371pub struct NMF {
372    pub n_components: usize,
373    pub max_iter: usize,
374    pub tol: f32,
375    pub alpha: f32,
376    pub l1_ratio: f32,
377    pub components_: Option<Vec<Vec<f32>>>,
378    pub reconstruction_err_: Option<f32>,
379    pub n_iter_: usize,
380}
381
382impl NMF {
383    pub fn new(n_components: usize) -> Self {
384        NMF {
385            n_components,
386            max_iter: 200,
387            tol: 1e-4,
388            alpha: 0.0,
389            l1_ratio: 0.0,
390            components_: None,
391            reconstruction_err_: None,
392            n_iter_: 0,
393        }
394    }
395
396    pub fn max_iter(mut self, n: usize) -> Self {
397        self.max_iter = n;
398        self
399    }
400
401    pub fn alpha(mut self, alpha: f32) -> Self {
402        self.alpha = alpha;
403        self
404    }
405
406    fn frobenius_norm(&self, x: &[f32], wh: &[f32]) -> f32 {
407        x.iter().zip(wh.iter())
408            .map(|(&xi, &whi)| (xi - whi).powi(2))
409            .sum::<f32>()
410            .sqrt()
411    }
412
413    pub fn fit(&mut self, x: &Tensor) {
414        let x_data = x.data_f32();
415        let n_samples = x.dims()[0];
416        let n_features = x.dims()[1];
417
418        // Initialize W and H randomly (non-negative)
419        use rand::Rng;
420        let mut rng = rand::thread_rng();
421        
422        let mut w: Vec<Vec<f32>> = (0..n_samples)
423            .map(|_| (0..self.n_components).map(|_| rng.gen::<f32>().abs() + 0.1).collect())
424            .collect();
425        
426        let mut h: Vec<Vec<f32>> = (0..self.n_components)
427            .map(|_| (0..n_features).map(|_| rng.gen::<f32>().abs() + 0.1).collect())
428            .collect();
429
430        let eps = 1e-10f32;
431        let mut prev_error = f32::INFINITY;
432
433        for iter in 0..self.max_iter {
434            // Update H: H = H .* (W^T X) ./ (W^T W H + eps)
435            // Compute W^T X
436            let mut wt_x = vec![vec![0.0f32; n_features]; self.n_components];
437            for k in 0..self.n_components {
438                for j in 0..n_features {
439                    for i in 0..n_samples {
440                        wt_x[k][j] += w[i][k] * x_data[i * n_features + j];
441                    }
442                }
443            }
444            
445            // Compute W^T W
446            let mut wt_w = vec![vec![0.0f32; self.n_components]; self.n_components];
447            for k1 in 0..self.n_components {
448                for k2 in 0..self.n_components {
449                    for i in 0..n_samples {
450                        wt_w[k1][k2] += w[i][k1] * w[i][k2];
451                    }
452                }
453            }
454            
455            // Update H
456            for k in 0..self.n_components {
457                for j in 0..n_features {
458                    let mut denom = eps;
459                    for k2 in 0..self.n_components {
460                        denom += wt_w[k][k2] * h[k2][j];
461                    }
462                    h[k][j] *= wt_x[k][j] / denom;
463                }
464            }
465
466            // Update W: W = W .* (X H^T) ./ (W H H^T + eps)
467            // Compute X H^T
468            let mut x_ht = vec![vec![0.0f32; self.n_components]; n_samples];
469            for i in 0..n_samples {
470                for k in 0..self.n_components {
471                    for j in 0..n_features {
472                        x_ht[i][k] += x_data[i * n_features + j] * h[k][j];
473                    }
474                }
475            }
476            
477            // Compute H H^T
478            let mut h_ht = vec![vec![0.0f32; self.n_components]; self.n_components];
479            for k1 in 0..self.n_components {
480                for k2 in 0..self.n_components {
481                    for j in 0..n_features {
482                        h_ht[k1][k2] += h[k1][j] * h[k2][j];
483                    }
484                }
485            }
486            
487            // Update W
488            for i in 0..n_samples {
489                for k in 0..self.n_components {
490                    let mut denom = eps;
491                    for k2 in 0..self.n_components {
492                        denom += w[i][k2] * h_ht[k2][k];
493                    }
494                    w[i][k] *= x_ht[i][k] / denom;
495                }
496            }
497
498            // Check convergence
499            if iter % 10 == 0 {
500                // Compute W * H
501                let mut wh = vec![0.0f32; n_samples * n_features];
502                for i in 0..n_samples {
503                    for j in 0..n_features {
504                        for k in 0..self.n_components {
505                            wh[i * n_features + j] += w[i][k] * h[k][j];
506                        }
507                    }
508                }
509                
510                let error = self.frobenius_norm(&x_data, &wh);
511                if (prev_error - error).abs() < self.tol {
512                    self.n_iter_ = iter + 1;
513                    break;
514                }
515                prev_error = error;
516            }
517            
518            self.n_iter_ = iter + 1;
519        }
520
521        // Compute final reconstruction error
522        let mut wh = vec![0.0f32; n_samples * n_features];
523        for i in 0..n_samples {
524            for j in 0..n_features {
525                for k in 0..self.n_components {
526                    wh[i * n_features + j] += w[i][k] * h[k][j];
527                }
528            }
529        }
530        self.reconstruction_err_ = Some(self.frobenius_norm(&x_data, &wh));
531        self.components_ = Some(h);
532    }
533
534    pub fn transform(&self, x: &Tensor) -> Tensor {
535        let x_data = x.data_f32();
536        let n_samples = x.dims()[0];
537        let n_features = x.dims()[1];
538
539        let h = self.components_.as_ref().expect("Model not fitted");
540        
541        // Solve for W given H using multiplicative updates
542        use rand::Rng;
543        let mut rng = rand::thread_rng();
544        
545        let mut w: Vec<Vec<f32>> = (0..n_samples)
546            .map(|_| (0..self.n_components).map(|_| rng.gen::<f32>().abs() + 0.1).collect())
547            .collect();
548
549        let eps = 1e-10f32;
550
551        for _ in 0..50 {
552            // Compute X H^T
553            let mut x_ht = vec![vec![0.0f32; self.n_components]; n_samples];
554            for i in 0..n_samples {
555                for k in 0..self.n_components {
556                    for j in 0..n_features {
557                        x_ht[i][k] += x_data[i * n_features + j] * h[k][j];
558                    }
559                }
560            }
561            
562            // Compute H H^T
563            let mut h_ht = vec![vec![0.0f32; self.n_components]; self.n_components];
564            for k1 in 0..self.n_components {
565                for k2 in 0..self.n_components {
566                    for j in 0..n_features {
567                        h_ht[k1][k2] += h[k1][j] * h[k2][j];
568                    }
569                }
570            }
571            
572            // Update W
573            for i in 0..n_samples {
574                for k in 0..self.n_components {
575                    let mut denom = eps;
576                    for k2 in 0..self.n_components {
577                        denom += w[i][k2] * h_ht[k2][k];
578                    }
579                    w[i][k] *= x_ht[i][k] / denom;
580                }
581            }
582        }
583
584        let mut result = vec![0.0f32; n_samples * self.n_components];
585        for i in 0..n_samples {
586            for k in 0..self.n_components {
587                result[i * self.n_components + k] = w[i][k];
588            }
589        }
590
591        Tensor::from_slice(&result, &[n_samples, self.n_components]).unwrap()
592    }
593
594    pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
595        self.fit(x);
596        self.transform(x)
597    }
598
599    pub fn inverse_transform(&self, w: &Tensor) -> Tensor {
600        let w_data = w.data_f32();
601        let n_samples = w.dims()[0];
602        let k = w.dims()[1];
603
604        let h = self.components_.as_ref().expect("Model not fitted");
605        let n_features = h[0].len();
606
607        let mut result = vec![0.0f32; n_samples * n_features];
608
609        for i in 0..n_samples {
610            for j in 0..n_features {
611                for c in 0..k {
612                    result[i * n_features + j] += w_data[i * k + c] * h[c][j];
613                }
614            }
615        }
616
617        Tensor::from_slice(&result, &[n_samples, n_features]).unwrap()
618    }
619}
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624
625    #[test]
626    fn test_pca() {
627        let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0,
628            4.0, 5.0, 6.0,
629            7.0, 8.0, 9.0,
630            10.0, 11.0, 12.0,
631        ], &[4, 3]).unwrap();
632
633        let mut pca = PCA::new(2);
634        let transformed = pca.fit_transform(&x);
635
636        assert_eq!(transformed.dims(), &[4, 2]);
637    }
638
639    #[test]
640    fn test_svd() {
641        let x = Tensor::from_slice(&[1.0f32, 2.0,
642            3.0, 4.0,
643            5.0, 6.0,
644        ], &[3, 2]).unwrap();
645
646        let mut svd = SVD::new(Some(2));
647        svd.fit(&x);
648
649        assert!(svd.s_.is_some());
650        assert!(svd.u_.is_some());
651        assert!(svd.vt_.is_some());
652    }
653
654    #[test]
655    fn test_nmf() {
656        let x = Tensor::from_slice(&[1.0f32, 2.0, 0.0,
657            0.0, 1.0, 3.0,
658            2.0, 0.0, 1.0,
659        ], &[3, 3]).unwrap();
660
661        let mut nmf = NMF::new(2).max_iter(100);
662        let transformed = nmf.fit_transform(&x);
663
664        assert_eq!(transformed.dims(), &[3, 2]);
665        assert!(nmf.reconstruction_err_.unwrap() >= 0.0);
666    }
667}
668
669