1use ghostflow_core::Tensor;
4
5pub struct PCA {
7 pub n_components: usize,
8 pub whiten: bool,
9 pub components_: Option<Vec<Vec<f32>>>,
10 pub explained_variance_: Option<Vec<f32>>,
11 pub explained_variance_ratio_: Option<Vec<f32>>,
12 pub mean_: Option<Vec<f32>>,
13 pub n_features_: usize,
14 pub n_samples_: usize,
15}
16
17impl PCA {
18 pub fn new(n_components: usize) -> Self {
19 PCA {
20 n_components,
21 whiten: false,
22 components_: None,
23 explained_variance_: None,
24 explained_variance_ratio_: None,
25 mean_: None,
26 n_features_: 0,
27 n_samples_: 0,
28 }
29 }
30
31 pub fn whiten(mut self, whiten: bool) -> Self {
32 self.whiten = whiten;
33 self
34 }
35
36 fn compute_mean(&self, x: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
37 let mut mean = vec![0.0f32; n_features];
38 for i in 0..n_samples {
39 for j in 0..n_features {
40 mean[j] += x[i * n_features + j];
41 }
42 }
43 for m in &mut mean {
44 *m /= n_samples as f32;
45 }
46 mean
47 }
48
49 fn center_data(&self, x: &[f32], mean: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
50 let mut centered = vec![0.0f32; n_samples * n_features];
51 for i in 0..n_samples {
52 for j in 0..n_features {
53 centered[i * n_features + j] = x[i * n_features + j] - mean[j];
54 }
55 }
56 centered
57 }
58
59 fn compute_covariance(&self, x_centered: &[f32], n_samples: usize, n_features: usize) -> Vec<f32> {
60 let mut cov = vec![0.0f32; n_features * n_features];
61
62 for i in 0..n_features {
64 for j in 0..n_features {
65 let mut sum = 0.0f32;
66 for k in 0..n_samples {
67 sum += x_centered[k * n_features + i] * x_centered[k * n_features + j];
68 }
69 cov[i * n_features + j] = sum / (n_samples - 1).max(1) as f32;
70 }
71 }
72
73 cov
74 }
75
76 fn power_iteration(&self, matrix: &[f32], n: usize, max_iter: usize, tol: f32) -> (Vec<f32>, f32) {
78 let mut v = vec![1.0f32 / (n as f32).sqrt(); n];
79 let mut eigenvalue = 0.0f32;
80
81 for _ in 0..max_iter {
82 let mut w = vec![0.0f32; n];
84 for i in 0..n {
85 for j in 0..n {
86 w[i] += matrix[i * n + j] * v[j];
87 }
88 }
89
90 let new_eigenvalue: f32 = w.iter().zip(v.iter()).map(|(&wi, &vi)| wi * vi).sum();
92
93 let norm: f32 = w.iter().map(|&x| x * x).sum::<f32>().sqrt();
95 if norm < 1e-10 {
96 break;
97 }
98 for wi in &mut w {
99 *wi /= norm;
100 }
101
102 let diff: f32 = v.iter().zip(w.iter()).map(|(&vi, &wi)| (vi - wi).abs()).sum();
104 v = w;
105 eigenvalue = new_eigenvalue;
106
107 if diff < tol {
108 break;
109 }
110 }
111
112 (v, eigenvalue)
113 }
114
115 fn deflate(&self, matrix: &mut [f32], eigenvector: &[f32], eigenvalue: f32, n: usize) {
117 for i in 0..n {
118 for j in 0..n {
119 matrix[i * n + j] -= eigenvalue * eigenvector[i] * eigenvector[j];
120 }
121 }
122 }
123
124 pub fn fit(&mut self, x: &Tensor) {
125 let x_data = x.data_f32();
126 let n_samples = x.dims()[0];
127 let n_features = x.dims()[1];
128
129 self.n_samples_ = n_samples;
130 self.n_features_ = n_features;
131
132 let mean = self.compute_mean(&x_data, n_samples, n_features);
134 let x_centered = self.center_data(&x_data, &mean, n_samples, n_features);
135
136 let mut cov = self.compute_covariance(&x_centered, n_samples, n_features);
138
139 let k = self.n_components.min(n_features);
141 let mut components = Vec::with_capacity(k);
142 let mut eigenvalues = Vec::with_capacity(k);
143
144 for _ in 0..k {
145 let (eigenvector, eigenvalue) = self.power_iteration(&cov, n_features, 1000, 1e-6);
146
147 if eigenvalue < 1e-10 {
148 break;
149 }
150
151 components.push(eigenvector.clone());
152 eigenvalues.push(eigenvalue);
153
154 self.deflate(&mut cov, &eigenvector, eigenvalue, n_features);
156 }
157
158 let total_variance: f32 = eigenvalues.iter().sum();
160 let explained_variance_ratio: Vec<f32> = eigenvalues.iter()
161 .map(|&e| e / total_variance.max(1e-10))
162 .collect();
163
164 self.components_ = Some(components);
165 self.explained_variance_ = Some(eigenvalues);
166 self.explained_variance_ratio_ = Some(explained_variance_ratio);
167 self.mean_ = Some(mean);
168 }
169
170 pub fn transform(&self, x: &Tensor) -> Tensor {
171 let x_data = x.data_f32();
172 let n_samples = x.dims()[0];
173 let n_features = x.dims()[1];
174
175 let mean = self.mean_.as_ref().expect("Model not fitted");
176 let components = self.components_.as_ref().expect("Model not fitted");
177 let k = components.len();
178
179 let mut result = vec![0.0f32; n_samples * k];
180
181 for i in 0..n_samples {
182 for (c, component) in components.iter().enumerate() {
183 let mut sum = 0.0f32;
184 for j in 0..n_features {
185 sum += (x_data[i * n_features + j] - mean[j]) * component[j];
186 }
187
188 if self.whiten {
189 let var = self.explained_variance_.as_ref().unwrap()[c];
190 sum /= var.sqrt().max(1e-10);
191 }
192
193 result[i * k + c] = sum;
194 }
195 }
196
197 Tensor::from_slice(&result, &[n_samples, k]).unwrap()
198 }
199
200 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
201 self.fit(x);
202 self.transform(x)
203 }
204
205 pub fn inverse_transform(&self, x: &Tensor) -> Tensor {
206 let x_data = x.data_f32();
207 let n_samples = x.dims()[0];
208 let k = x.dims()[1];
209
210 let mean = self.mean_.as_ref().expect("Model not fitted");
211 let components = self.components_.as_ref().expect("Model not fitted");
212 let n_features = self.n_features_;
213
214 let mut result = vec![0.0f32; n_samples * n_features];
215
216 for i in 0..n_samples {
217 for j in 0..n_features {
218 let mut sum = mean[j];
219 for c in 0..k {
220 let mut val = x_data[i * k + c];
221 if self.whiten {
222 let var = self.explained_variance_.as_ref().unwrap()[c];
223 val *= var.sqrt();
224 }
225 sum += val * components[c][j];
226 }
227 result[i * n_features + j] = sum;
228 }
229 }
230
231 Tensor::from_slice(&result, &[n_samples, n_features]).unwrap()
232 }
233}
234
235pub struct SVD {
237 pub n_components: Option<usize>,
238 pub u_: Option<Vec<Vec<f32>>>,
239 pub s_: Option<Vec<f32>>,
240 pub vt_: Option<Vec<Vec<f32>>>,
241}
242
243impl SVD {
244 pub fn new(n_components: Option<usize>) -> Self {
245 SVD {
246 n_components,
247 u_: None,
248 s_: None,
249 vt_: None,
250 }
251 }
252
253 pub fn fit(&mut self, x: &Tensor) {
254 let x_data = x.data_f32();
255 let m = x.dims()[0];
256 let n = x.dims()[1];
257 let k = self.n_components.unwrap_or(m.min(n));
258
259 let mut a = x_data.clone();
260 let mut u_vecs = Vec::with_capacity(k);
261 let mut singular_values = Vec::with_capacity(k);
262 let mut v_vecs = Vec::with_capacity(k);
263
264 let max_iter = 100;
265 let tol = 1e-6;
266
267 for _ in 0..k {
268 let mut v = vec![1.0f32 / (n as f32).sqrt(); n];
270 let mut u = vec![0.0f32; m];
271 let mut sigma = 0.0f32;
272
273 for _ in 0..max_iter {
274 for i in 0..m {
276 u[i] = 0.0;
277 for j in 0..n {
278 u[i] += a[i * n + j] * v[j];
279 }
280 }
281
282 let u_norm: f32 = u.iter().map(|&x| x * x).sum::<f32>().sqrt();
284 if u_norm < tol {
285 break;
286 }
287 for ui in &mut u {
288 *ui /= u_norm;
289 }
290
291 let mut new_v = vec![0.0f32; n];
293 for j in 0..n {
294 for i in 0..m {
295 new_v[j] += a[i * n + j] * u[i];
296 }
297 }
298
299 let v_norm: f32 = new_v.iter().map(|&x| x * x).sum::<f32>().sqrt();
301 if v_norm < tol {
302 break;
303 }
304 sigma = v_norm;
305 for vj in &mut new_v {
306 *vj /= v_norm;
307 }
308
309 let diff: f32 = v.iter().zip(new_v.iter())
311 .map(|(&old, &new)| (old - new).abs())
312 .sum();
313 v = new_v;
314
315 if diff < tol {
316 break;
317 }
318 }
319
320 if sigma < tol {
321 break;
322 }
323
324 singular_values.push(sigma);
325 u_vecs.push(u.clone());
326 v_vecs.push(v.clone());
327
328 for i in 0..m {
330 for j in 0..n {
331 a[i * n + j] -= sigma * u[i] * v[j];
332 }
333 }
334 }
335
336 self.u_ = Some(u_vecs);
337 self.s_ = Some(singular_values);
338 self.vt_ = Some(v_vecs);
339 }
340
341 pub fn transform(&self, x: &Tensor) -> Tensor {
342 let x_data = x.data_f32();
343 let n_samples = x.dims()[0];
344 let n_features = x.dims()[1];
345
346 let vt = self.vt_.as_ref().expect("Model not fitted");
347 let k = vt.len();
348
349 let mut result = vec![0.0f32; n_samples * k];
350
351 for i in 0..n_samples {
352 for c in 0..k {
353 let mut sum = 0.0f32;
354 for j in 0..n_features {
355 sum += x_data[i * n_features + j] * vt[c][j];
356 }
357 result[i * k + c] = sum;
358 }
359 }
360
361 Tensor::from_slice(&result, &[n_samples, k]).unwrap()
362 }
363
364 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
365 self.fit(x);
366 self.transform(x)
367 }
368}
369
370pub struct NMF {
372 pub n_components: usize,
373 pub max_iter: usize,
374 pub tol: f32,
375 pub alpha: f32,
376 pub l1_ratio: f32,
377 pub components_: Option<Vec<Vec<f32>>>,
378 pub reconstruction_err_: Option<f32>,
379 pub n_iter_: usize,
380}
381
382impl NMF {
383 pub fn new(n_components: usize) -> Self {
384 NMF {
385 n_components,
386 max_iter: 200,
387 tol: 1e-4,
388 alpha: 0.0,
389 l1_ratio: 0.0,
390 components_: None,
391 reconstruction_err_: None,
392 n_iter_: 0,
393 }
394 }
395
396 pub fn max_iter(mut self, n: usize) -> Self {
397 self.max_iter = n;
398 self
399 }
400
401 pub fn alpha(mut self, alpha: f32) -> Self {
402 self.alpha = alpha;
403 self
404 }
405
406 fn frobenius_norm(&self, x: &[f32], wh: &[f32]) -> f32 {
407 x.iter().zip(wh.iter())
408 .map(|(&xi, &whi)| (xi - whi).powi(2))
409 .sum::<f32>()
410 .sqrt()
411 }
412
413 pub fn fit(&mut self, x: &Tensor) {
414 let x_data = x.data_f32();
415 let n_samples = x.dims()[0];
416 let n_features = x.dims()[1];
417
418 use rand::Rng;
420 let mut rng = rand::thread_rng();
421
422 let mut w: Vec<Vec<f32>> = (0..n_samples)
423 .map(|_| (0..self.n_components).map(|_| rng.gen::<f32>().abs() + 0.1).collect())
424 .collect();
425
426 let mut h: Vec<Vec<f32>> = (0..self.n_components)
427 .map(|_| (0..n_features).map(|_| rng.gen::<f32>().abs() + 0.1).collect())
428 .collect();
429
430 let eps = 1e-10f32;
431 let mut prev_error = f32::INFINITY;
432
433 for iter in 0..self.max_iter {
434 let mut wt_x = vec![vec![0.0f32; n_features]; self.n_components];
437 for k in 0..self.n_components {
438 for j in 0..n_features {
439 for i in 0..n_samples {
440 wt_x[k][j] += w[i][k] * x_data[i * n_features + j];
441 }
442 }
443 }
444
445 let mut wt_w = vec![vec![0.0f32; self.n_components]; self.n_components];
447 for k1 in 0..self.n_components {
448 for k2 in 0..self.n_components {
449 for i in 0..n_samples {
450 wt_w[k1][k2] += w[i][k1] * w[i][k2];
451 }
452 }
453 }
454
455 for k in 0..self.n_components {
457 for j in 0..n_features {
458 let mut denom = eps;
459 for k2 in 0..self.n_components {
460 denom += wt_w[k][k2] * h[k2][j];
461 }
462 h[k][j] *= wt_x[k][j] / denom;
463 }
464 }
465
466 let mut x_ht = vec![vec![0.0f32; self.n_components]; n_samples];
469 for i in 0..n_samples {
470 for k in 0..self.n_components {
471 for j in 0..n_features {
472 x_ht[i][k] += x_data[i * n_features + j] * h[k][j];
473 }
474 }
475 }
476
477 let mut h_ht = vec![vec![0.0f32; self.n_components]; self.n_components];
479 for k1 in 0..self.n_components {
480 for k2 in 0..self.n_components {
481 for j in 0..n_features {
482 h_ht[k1][k2] += h[k1][j] * h[k2][j];
483 }
484 }
485 }
486
487 for i in 0..n_samples {
489 for k in 0..self.n_components {
490 let mut denom = eps;
491 for k2 in 0..self.n_components {
492 denom += w[i][k2] * h_ht[k2][k];
493 }
494 w[i][k] *= x_ht[i][k] / denom;
495 }
496 }
497
498 if iter % 10 == 0 {
500 let mut wh = vec![0.0f32; n_samples * n_features];
502 for i in 0..n_samples {
503 for j in 0..n_features {
504 for k in 0..self.n_components {
505 wh[i * n_features + j] += w[i][k] * h[k][j];
506 }
507 }
508 }
509
510 let error = self.frobenius_norm(&x_data, &wh);
511 if (prev_error - error).abs() < self.tol {
512 self.n_iter_ = iter + 1;
513 break;
514 }
515 prev_error = error;
516 }
517
518 self.n_iter_ = iter + 1;
519 }
520
521 let mut wh = vec![0.0f32; n_samples * n_features];
523 for i in 0..n_samples {
524 for j in 0..n_features {
525 for k in 0..self.n_components {
526 wh[i * n_features + j] += w[i][k] * h[k][j];
527 }
528 }
529 }
530 self.reconstruction_err_ = Some(self.frobenius_norm(&x_data, &wh));
531 self.components_ = Some(h);
532 }
533
534 pub fn transform(&self, x: &Tensor) -> Tensor {
535 let x_data = x.data_f32();
536 let n_samples = x.dims()[0];
537 let n_features = x.dims()[1];
538
539 let h = self.components_.as_ref().expect("Model not fitted");
540
541 use rand::Rng;
543 let mut rng = rand::thread_rng();
544
545 let mut w: Vec<Vec<f32>> = (0..n_samples)
546 .map(|_| (0..self.n_components).map(|_| rng.gen::<f32>().abs() + 0.1).collect())
547 .collect();
548
549 let eps = 1e-10f32;
550
551 for _ in 0..50 {
552 let mut x_ht = vec![vec![0.0f32; self.n_components]; n_samples];
554 for i in 0..n_samples {
555 for k in 0..self.n_components {
556 for j in 0..n_features {
557 x_ht[i][k] += x_data[i * n_features + j] * h[k][j];
558 }
559 }
560 }
561
562 let mut h_ht = vec![vec![0.0f32; self.n_components]; self.n_components];
564 for k1 in 0..self.n_components {
565 for k2 in 0..self.n_components {
566 for j in 0..n_features {
567 h_ht[k1][k2] += h[k1][j] * h[k2][j];
568 }
569 }
570 }
571
572 for i in 0..n_samples {
574 for k in 0..self.n_components {
575 let mut denom = eps;
576 for k2 in 0..self.n_components {
577 denom += w[i][k2] * h_ht[k2][k];
578 }
579 w[i][k] *= x_ht[i][k] / denom;
580 }
581 }
582 }
583
584 let mut result = vec![0.0f32; n_samples * self.n_components];
585 for i in 0..n_samples {
586 for k in 0..self.n_components {
587 result[i * self.n_components + k] = w[i][k];
588 }
589 }
590
591 Tensor::from_slice(&result, &[n_samples, self.n_components]).unwrap()
592 }
593
594 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
595 self.fit(x);
596 self.transform(x)
597 }
598
599 pub fn inverse_transform(&self, w: &Tensor) -> Tensor {
600 let w_data = w.data_f32();
601 let n_samples = w.dims()[0];
602 let k = w.dims()[1];
603
604 let h = self.components_.as_ref().expect("Model not fitted");
605 let n_features = h[0].len();
606
607 let mut result = vec![0.0f32; n_samples * n_features];
608
609 for i in 0..n_samples {
610 for j in 0..n_features {
611 for c in 0..k {
612 result[i * n_features + j] += w_data[i * k + c] * h[c][j];
613 }
614 }
615 }
616
617 Tensor::from_slice(&result, &[n_samples, n_features]).unwrap()
618 }
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624
625 #[test]
626 fn test_pca() {
627 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0,
628 4.0, 5.0, 6.0,
629 7.0, 8.0, 9.0,
630 10.0, 11.0, 12.0,
631 ], &[4, 3]).unwrap();
632
633 let mut pca = PCA::new(2);
634 let transformed = pca.fit_transform(&x);
635
636 assert_eq!(transformed.dims(), &[4, 2]);
637 }
638
639 #[test]
640 fn test_svd() {
641 let x = Tensor::from_slice(&[1.0f32, 2.0,
642 3.0, 4.0,
643 5.0, 6.0,
644 ], &[3, 2]).unwrap();
645
646 let mut svd = SVD::new(Some(2));
647 svd.fit(&x);
648
649 assert!(svd.s_.is_some());
650 assert!(svd.u_.is_some());
651 assert!(svd.vt_.is_some());
652 }
653
654 #[test]
655 fn test_nmf() {
656 let x = Tensor::from_slice(&[1.0f32, 2.0, 0.0,
657 0.0, 1.0, 3.0,
658 2.0, 0.0, 1.0,
659 ], &[3, 3]).unwrap();
660
661 let mut nmf = NMF::new(2).max_iter(100);
662 let transformed = nmf.fit_transform(&x);
663
664 assert_eq!(transformed.dims(), &[3, 2]);
665 assert!(nmf.reconstruction_err_.unwrap() >= 0.0);
666 }
667}
668
669