1use ghostflow_core::Tensor;
4use rand::prelude::*;
5
6pub struct FactorAnalysis {
8 pub n_components: usize,
9 pub max_iter: usize,
10 pub tol: f32,
11 components_: Option<Vec<Vec<f32>>>,
12 noise_variance_: Option<Vec<f32>>,
13 mean_: Option<Vec<f32>>,
14 n_iter_: usize,
15}
16
17impl FactorAnalysis {
18 pub fn new(n_components: usize) -> Self {
19 FactorAnalysis {
20 n_components,
21 max_iter: 1000,
22 tol: 1e-2,
23 components_: None,
24 noise_variance_: None,
25 mean_: None,
26 n_iter_: 0,
27 }
28 }
29
30 pub fn fit(&mut self, x: &Tensor) {
31 let x_data = x.data_f32();
32 let n_samples = x.dims()[0];
33 let n_features = x.dims()[1];
34
35 let mean: Vec<f32> = (0..n_features)
36 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
37 .collect();
38
39 let x_centered: Vec<f32> = (0..n_samples)
40 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - mean[j]).collect::<Vec<_>>())
41 .collect();
42
43 let mut rng = thread_rng();
44 let mut components: Vec<Vec<f32>> = (0..self.n_components)
45 .map(|_| (0..n_features).map(|_| rng.gen::<f32>() - 0.5).collect())
46 .collect();
47
48 let mut noise_var: Vec<f32> = (0..n_features)
49 .map(|j| {
50 (0..n_samples).map(|i| x_centered[i * n_features + j].powi(2)).sum::<f32>()
51 / n_samples as f32
52 })
53 .collect();
54
55 for iter in 0..self.max_iter {
57 let prev_components = components.clone();
58
59 let mut m = vec![vec![0.0f32; self.n_components]; self.n_components];
61 for i in 0..self.n_components {
62 m[i][i] = 1.0;
63 for j in 0..self.n_components {
64 for k in 0..n_features {
65 m[i][j] += components[i][k] * components[j][k] / noise_var[k].max(1e-10);
66 }
67 }
68 }
69
70 let m_inv = invert_matrix(&m, self.n_components);
71
72 let mut s1 = vec![vec![0.0f32; n_features]; self.n_components];
74 let mut s2 = vec![vec![0.0f32; self.n_components]; self.n_components];
75
76 for i in 0..n_samples {
77 let mut ez = vec![0.0f32; self.n_components];
78 for k in 0..self.n_components {
79 for j in 0..n_features {
80 ez[k] += components[k][j] * x_centered[i * n_features + j] / noise_var[j].max(1e-10);
81 }
82 }
83
84 let mut ez_final = vec![0.0f32; self.n_components];
85 for k in 0..self.n_components {
86 for l in 0..self.n_components {
87 ez_final[k] += m_inv[k][l] * ez[l];
88 }
89 }
90
91 for k in 0..self.n_components {
92 for j in 0..n_features {
93 s1[k][j] += ez_final[k] * x_centered[i * n_features + j];
94 }
95 for l in 0..self.n_components {
96 s2[k][l] += m_inv[k][l] + ez_final[k] * ez_final[l];
97 }
98 }
99 }
100
101 let s2_inv = invert_matrix(&s2, self.n_components);
103 for k in 0..self.n_components {
104 for j in 0..n_features {
105 components[k][j] = 0.0;
106 for l in 0..self.n_components {
107 components[k][j] += s1[l][j] * s2_inv[k][l];
108 }
109 }
110 }
111
112 for j in 0..n_features {
113 let mut var = 0.0f32;
114 for i in 0..n_samples {
115 var += x_centered[i * n_features + j].powi(2);
116 }
117 var /= n_samples as f32;
118 for k in 0..self.n_components {
119 var -= components[k][j] * s1[k][j] / n_samples as f32;
120 }
121 noise_var[j] = var.max(1e-6);
122 }
123
124 self.n_iter_ = iter + 1;
125
126 let mut max_change = 0.0f32;
127 for k in 0..self.n_components {
128 for j in 0..n_features {
129 max_change = max_change.max((components[k][j] - prev_components[k][j]).abs());
130 }
131 }
132 if max_change < self.tol {
133 break;
134 }
135 }
136
137 self.components_ = Some(components);
138 self.noise_variance_ = Some(noise_var);
139 self.mean_ = Some(mean);
140 }
141
142 pub fn transform(&self, x: &Tensor) -> Tensor {
143 let x_data = x.data_f32();
144 let n_samples = x.dims()[0];
145 let n_features = x.dims()[1];
146
147 let components = self.components_.as_ref().expect("Not fitted");
148 let noise_var = self.noise_variance_.as_ref().unwrap();
149 let mean = self.mean_.as_ref().unwrap();
150
151 let mut result = vec![0.0f32; n_samples * self.n_components];
152 for i in 0..n_samples {
153 for k in 0..self.n_components {
154 for j in 0..n_features {
155 result[i * self.n_components + k] +=
156 components[k][j] * (x_data[i * n_features + j] - mean[j]) / noise_var[j].max(1e-10);
157 }
158 }
159 }
160
161 Tensor::from_slice(&result, &[n_samples, self.n_components]).unwrap()
162 }
163
164 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
165 self.fit(x);
166 self.transform(x)
167 }
168}
169
170pub struct FastICA {
172 pub n_components: usize,
173 pub max_iter: usize,
174 pub tol: f32,
175 pub fun: ICAFunction,
176 components_: Option<Vec<Vec<f32>>>,
177 mixing_: Option<Vec<Vec<f32>>>,
178 mean_: Option<Vec<f32>>,
179}
180
181#[derive(Clone, Copy)]
182pub enum ICAFunction {
183 Logcosh,
184 Exp,
185 Cube,
186}
187
188impl FastICA {
189 pub fn new(n_components: usize) -> Self {
190 FastICA {
191 n_components,
192 max_iter: 200,
193 tol: 1e-4,
194 fun: ICAFunction::Logcosh,
195 components_: None,
196 mixing_: None,
197 mean_: None,
198 }
199 }
200
201 fn g(&self, x: f32) -> (f32, f32) {
202 match self.fun {
203 ICAFunction::Logcosh => {
204 let t = x.tanh();
205 (t, 1.0 - t * t)
206 }
207 ICAFunction::Exp => {
208 let e = (-x * x / 2.0).exp();
209 (x * e, (1.0 - x * x) * e)
210 }
211 ICAFunction::Cube => (x.powi(3), 3.0 * x * x),
212 }
213 }
214
215 pub fn fit(&mut self, x: &Tensor) {
216 let x_data = x.data_f32();
217 let n_samples = x.dims()[0];
218 let n_features = x.dims()[1];
219
220 let mean: Vec<f32> = (0..n_features)
221 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
222 .collect();
223
224 let x_centered: Vec<f32> = (0..n_samples)
225 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - mean[j]).collect::<Vec<_>>())
226 .collect();
227
228 let (x_white, whitening) = self.whiten(&x_centered, n_samples, n_features);
230
231 let mut rng = thread_rng();
233 let mut w = vec![vec![0.0f32; self.n_components]; self.n_components];
234 for i in 0..self.n_components {
235 for j in 0..self.n_components {
236 w[i][j] = rng.gen::<f32>() - 0.5;
237 }
238 }
239 orthogonalize(&mut w, self.n_components);
240
241 for _ in 0..self.max_iter {
243 let w_old = w.clone();
244
245 for i in 0..self.n_components {
246 let mut g_sum = vec![0.0f32; self.n_components];
247 let mut gp_mean = 0.0f32;
248
249 for s in 0..n_samples {
250 let mut wtx = 0.0f32;
251 for j in 0..self.n_components {
252 wtx += w[i][j] * x_white[s * self.n_components + j];
253 }
254 let (g_val, gp_val) = self.g(wtx);
255 gp_mean += gp_val;
256 for j in 0..self.n_components {
257 g_sum[j] += g_val * x_white[s * self.n_components + j];
258 }
259 }
260
261 gp_mean /= n_samples as f32;
262 for j in 0..self.n_components {
263 w[i][j] = g_sum[j] / n_samples as f32 - gp_mean * w[i][j];
264 }
265 }
266
267 orthogonalize(&mut w, self.n_components);
268
269 let mut max_change = 0.0f32;
270 for i in 0..self.n_components {
271 for j in 0..self.n_components {
272 max_change = max_change.max((w[i][j] - w_old[i][j]).abs());
273 }
274 }
275 if max_change < self.tol {
276 break;
277 }
278 }
279
280 let mut mixing = vec![vec![0.0f32; self.n_components]; n_features];
282 for i in 0..n_features {
283 for j in 0..self.n_components {
284 for k in 0..self.n_components {
285 mixing[i][j] += whitening[k][i] * w[j][k];
286 }
287 }
288 }
289
290 self.components_ = Some(w);
291 self.mixing_ = Some(mixing);
292 self.mean_ = Some(mean);
293 }
294
295 fn whiten(&self, x: &[f32], n_samples: usize, n_features: usize) -> (Vec<f32>, Vec<Vec<f32>>) {
296 let mut cov = vec![0.0f32; n_features * n_features];
298 for i in 0..n_features {
299 for j in 0..n_features {
300 for k in 0..n_samples {
301 cov[i * n_features + j] += x[k * n_features + i] * x[k * n_features + j];
302 }
303 cov[i * n_features + j] /= (n_samples - 1).max(1) as f32;
304 }
305 }
306
307 let mut eigenvalues = Vec::new();
309 let mut eigenvectors = Vec::new();
310 let mut a = cov.clone();
311
312 for _ in 0..self.n_components.min(n_features) {
313 let mut v = vec![1.0f32; n_features];
314 normalize(&mut v);
315
316 for _ in 0..100 {
317 let mut av = vec![0.0f32; n_features];
318 for i in 0..n_features {
319 for j in 0..n_features {
320 av[i] += a[i * n_features + j] * v[j];
321 }
322 }
323 let norm: f32 = av.iter().map(|&x| x * x).sum::<f32>().sqrt();
324 if norm < 1e-10 { break; }
325 for vi in &mut av { *vi /= norm; }
326 v = av;
327 }
328
329 let mut eigenvalue = 0.0f32;
330 for i in 0..n_features {
331 let mut av = 0.0f32;
332 for j in 0..n_features {
333 av += a[i * n_features + j] * v[j];
334 }
335 eigenvalue += v[i] * av;
336 }
337
338 eigenvalues.push(eigenvalue.max(1e-10));
339 eigenvectors.push(v.clone());
340
341 for i in 0..n_features {
342 for j in 0..n_features {
343 a[i * n_features + j] -= eigenvalue * v[i] * v[j];
344 }
345 }
346 }
347
348 let mut whitening = vec![vec![0.0f32; n_features]; self.n_components];
350 for i in 0..self.n_components {
351 let scale = 1.0 / eigenvalues[i].sqrt();
352 for j in 0..n_features {
353 whitening[i][j] = scale * eigenvectors[i][j];
354 }
355 }
356
357 let mut x_white = vec![0.0f32; n_samples * self.n_components];
359 for i in 0..n_samples {
360 for k in 0..self.n_components {
361 for j in 0..n_features {
362 x_white[i * self.n_components + k] += whitening[k][j] * x[i * n_features + j];
363 }
364 }
365 }
366
367 (x_white, whitening)
368 }
369
370 pub fn transform(&self, x: &Tensor) -> Tensor {
371 let x_data = x.data_f32();
372 let n_samples = x.dims()[0];
373 let n_features = x.dims()[1];
374
375 let mixing = self.mixing_.as_ref().expect("Not fitted");
376 let mean = self.mean_.as_ref().unwrap();
377
378 let mut result = vec![0.0f32; n_samples * self.n_components];
380 for i in 0..n_samples {
381 for k in 0..self.n_components {
382 for j in 0..n_features {
383 result[i * self.n_components + k] +=
384 mixing[j][k] * (x_data[i * n_features + j] - mean[j]);
385 }
386 }
387 }
388
389 Tensor::from_slice(&result, &[n_samples, self.n_components]).unwrap()
390 }
391
392 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
393 self.fit(x);
394 self.transform(x)
395 }
396}
397
398pub struct SparsePCA {
400 pub n_components: usize,
401 pub alpha: f32,
402 pub max_iter: usize,
403 pub tol: f32,
404 components_: Option<Vec<Vec<f32>>>,
405 mean_: Option<Vec<f32>>,
406}
407
408impl SparsePCA {
409 pub fn new(n_components: usize) -> Self {
410 SparsePCA {
411 n_components,
412 alpha: 1.0,
413 max_iter: 1000,
414 tol: 1e-8,
415 components_: None,
416 mean_: None,
417 }
418 }
419
420 pub fn alpha(mut self, a: f32) -> Self {
421 self.alpha = a;
422 self
423 }
424
425 pub fn fit(&mut self, x: &Tensor) {
426 let x_data = x.data_f32();
427 let n_samples = x.dims()[0];
428 let n_features = x.dims()[1];
429
430 let mean: Vec<f32> = (0..n_features)
431 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
432 .collect();
433
434 let x_centered: Vec<f32> = (0..n_samples)
435 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - mean[j]).collect::<Vec<_>>())
436 .collect();
437
438 let mut rng = thread_rng();
439 let mut components: Vec<Vec<f32>> = (0..self.n_components)
440 .map(|_| {
441 let mut v: Vec<f32> = (0..n_features).map(|_| rng.gen::<f32>() - 0.5).collect();
442 normalize(&mut v);
443 v
444 })
445 .collect();
446
447 for _ in 0..self.max_iter {
449 let prev = components.clone();
450
451 for k in 0..self.n_components {
452 let mut grad = vec![0.0f32; n_features];
454 for j in 0..n_features {
455 for i in 0..n_samples {
456 let mut xv = 0.0f32;
457 for l in 0..n_features {
458 xv += x_centered[i * n_features + l] * components[k][l];
459 }
460 grad[j] += x_centered[i * n_features + j] * xv;
461 }
462 }
463
464 for j in 0..n_features {
466 let val = grad[j] / n_samples as f32;
467 components[k][j] = soft_threshold(val, self.alpha);
468 }
469
470 normalize(&mut components[k]);
471 }
472
473 let mut max_change = 0.0f32;
474 for k in 0..self.n_components {
475 for j in 0..n_features {
476 max_change = max_change.max((components[k][j] - prev[k][j]).abs());
477 }
478 }
479 if max_change < self.tol {
480 break;
481 }
482 }
483
484 self.components_ = Some(components);
485 self.mean_ = Some(mean);
486 }
487
488 pub fn transform(&self, x: &Tensor) -> Tensor {
489 let x_data = x.data_f32();
490 let n_samples = x.dims()[0];
491 let n_features = x.dims()[1];
492
493 let components = self.components_.as_ref().expect("Not fitted");
494 let mean = self.mean_.as_ref().unwrap();
495
496 let mut result = vec![0.0f32; n_samples * self.n_components];
497 for i in 0..n_samples {
498 for k in 0..self.n_components {
499 for j in 0..n_features {
500 result[i * self.n_components + k] +=
501 components[k][j] * (x_data[i * n_features + j] - mean[j]);
502 }
503 }
504 }
505
506 Tensor::from_slice(&result, &[n_samples, self.n_components]).unwrap()
507 }
508
509 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
510 self.fit(x);
511 self.transform(x)
512 }
513}
514
515pub struct DictionaryLearning {
517 pub n_components: usize,
518 pub alpha: f32,
519 pub max_iter: usize,
520 pub tol: f32,
521 components_: Option<Vec<Vec<f32>>>,
522 mean_: Option<Vec<f32>>,
523}
524
525impl DictionaryLearning {
526 pub fn new(n_components: usize) -> Self {
527 DictionaryLearning {
528 n_components,
529 alpha: 1.0,
530 max_iter: 1000,
531 tol: 1e-8,
532 components_: None,
533 mean_: None,
534 }
535 }
536
537 pub fn fit(&mut self, x: &Tensor) {
538 let x_data = x.data_f32();
539 let n_samples = x.dims()[0];
540 let n_features = x.dims()[1];
541
542 let mean: Vec<f32> = (0..n_features)
543 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
544 .collect();
545
546 let x_centered: Vec<f32> = (0..n_samples)
547 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - mean[j]).collect::<Vec<_>>())
548 .collect();
549
550 let mut rng = thread_rng();
551
552 let mut dictionary: Vec<Vec<f32>> = (0..self.n_components)
554 .map(|_| {
555 let mut d: Vec<f32> = (0..n_features).map(|_| rng.gen::<f32>() - 0.5).collect();
556 normalize(&mut d);
557 d
558 })
559 .collect();
560
561 for _ in 0..self.max_iter {
563 let prev = dictionary.clone();
564
565 let mut codes = vec![vec![0.0f32; self.n_components]; n_samples];
567 for i in 0..n_samples {
568 let x_i: Vec<f32> = (0..n_features).map(|j| x_centered[i * n_features + j]).collect();
569 codes[i] = sparse_encode(&x_i, &dictionary, self.alpha);
570 }
571
572 for k in 0..self.n_components {
574 let mut num = vec![0.0f32; n_features];
575 let mut denom = 0.0f32;
576
577 for i in 0..n_samples {
578 if codes[i][k].abs() > 1e-10 {
579 for j in 0..n_features {
580 let mut residual = x_centered[i * n_features + j];
581 for l in 0..self.n_components {
582 if l != k {
583 residual -= codes[i][l] * dictionary[l][j];
584 }
585 }
586 num[j] += codes[i][k] * residual;
587 }
588 denom += codes[i][k] * codes[i][k];
589 }
590 }
591
592 if denom > 1e-10 {
593 for j in 0..n_features {
594 dictionary[k][j] = num[j] / denom;
595 }
596 normalize(&mut dictionary[k]);
597 }
598 }
599
600 let mut max_change = 0.0f32;
601 for k in 0..self.n_components {
602 for j in 0..n_features {
603 max_change = max_change.max((dictionary[k][j] - prev[k][j]).abs());
604 }
605 }
606 if max_change < self.tol {
607 break;
608 }
609 }
610
611 self.components_ = Some(dictionary);
612 self.mean_ = Some(mean);
613 }
614
615 pub fn transform(&self, x: &Tensor) -> Tensor {
616 let x_data = x.data_f32();
617 let n_samples = x.dims()[0];
618 let n_features = x.dims()[1];
619
620 let dictionary = self.components_.as_ref().expect("Not fitted");
621 let mean = self.mean_.as_ref().unwrap();
622
623 let mut result = vec![0.0f32; n_samples * self.n_components];
624 for i in 0..n_samples {
625 let x_i: Vec<f32> = (0..n_features).map(|j| x_data[i * n_features + j] - mean[j]).collect();
626 let code = sparse_encode(&x_i, dictionary, self.alpha);
627 for k in 0..self.n_components {
628 result[i * self.n_components + k] = code[k];
629 }
630 }
631
632 Tensor::from_slice(&result, &[n_samples, self.n_components]).unwrap()
633 }
634
635 pub fn fit_transform(&mut self, x: &Tensor) -> Tensor {
636 self.fit(x);
637 self.transform(x)
638 }
639}
640
641fn invert_matrix(m: &[Vec<f32>], n: usize) -> Vec<Vec<f32>> {
643 let mut a: Vec<Vec<f32>> = m.to_vec();
644 let mut inv = vec![vec![0.0f32; n]; n];
645 for i in 0..n { inv[i][i] = 1.0; }
646
647 for i in 0..n {
648 let mut max_row = i;
649 for k in (i + 1)..n {
650 if a[k][i].abs() > a[max_row][i].abs() { max_row = k; }
651 }
652 a.swap(i, max_row);
653 inv.swap(i, max_row);
654
655 let diag = a[i][i];
656 if diag.abs() > 1e-10 {
657 for j in 0..n {
658 a[i][j] /= diag;
659 inv[i][j] /= diag;
660 }
661 }
662
663 for k in 0..n {
664 if k != i {
665 let factor = a[k][i];
666 for j in 0..n {
667 a[k][j] -= factor * a[i][j];
668 inv[k][j] -= factor * inv[i][j];
669 }
670 }
671 }
672 }
673 inv
674}
675
676fn normalize(v: &mut [f32]) {
677 let norm: f32 = v.iter().map(|&x| x * x).sum::<f32>().sqrt();
678 if norm > 1e-10 {
679 for vi in v.iter_mut() { *vi /= norm; }
680 }
681}
682
683fn orthogonalize(w: &mut [Vec<f32>], n: usize) {
684 for i in 0..n {
685 for j in 0..i {
686 let dot: f32 = (0..n).map(|k| w[i][k] * w[j][k]).sum();
687 for k in 0..n {
688 w[i][k] -= dot * w[j][k];
689 }
690 }
691 normalize(&mut w[i]);
692 }
693}
694
695fn soft_threshold(x: f32, lambda: f32) -> f32 {
696 if x > lambda { x - lambda }
697 else if x < -lambda { x + lambda }
698 else { 0.0 }
699}
700
701fn sparse_encode(x: &[f32], dictionary: &[Vec<f32>], alpha: f32) -> Vec<f32> {
702 let n_components = dictionary.len();
703 let n_features = x.len();
704 let mut code = vec![0.0f32; n_components];
705
706 for _ in 0..100 {
708 for k in 0..n_components {
709 let mut residual_dot = 0.0f32;
710 for j in 0..n_features {
711 let mut residual = x[j];
712 for l in 0..n_components {
713 if l != k {
714 residual -= code[l] * dictionary[l][j];
715 }
716 }
717 residual_dot += residual * dictionary[k][j];
718 }
719 code[k] = soft_threshold(residual_dot, alpha);
720 }
721 }
722 code
723}
724
725#[cfg(test)]
726mod tests {
727 use super::*;
728
729 #[test]
730 fn test_factor_analysis() {
731 let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], &[3, 3]).unwrap();
732 let mut fa = FactorAnalysis::new(2);
733 let result = fa.fit_transform(&x);
734 assert_eq!(result.dims(), &[3, 2]);
735 }
736
737 #[test]
738 fn test_fast_ica() {
739 let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], &[4, 2]).unwrap();
740 let mut ica = FastICA::new(2);
741 let result = ica.fit_transform(&x);
742 assert_eq!(result.dims(), &[4, 2]);
743 }
744
745 #[test]
746 fn test_sparse_pca() {
747 let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]).unwrap();
748 let mut spca = SparsePCA::new(2);
749 let result = spca.fit_transform(&x);
750 assert_eq!(result.dims(), &[2, 2]);
751 }
752}