1use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix2};
24use scirs2_core::numeric::{Float, NumCast};
25use scirs2_core::validation::{check_positive, checkshape};
26use scirs2_linalg::svd;
27
28use crate::error::{Result, TransformError};
29
30#[derive(Debug, Clone, PartialEq)]
32pub enum RotationMethod {
33 None,
35 Varimax,
37 Promax,
39}
40
41#[derive(Debug, Clone)]
47pub struct ScreePlotData {
48 pub eigenvalues: Array1<f64>,
50 pub cumulative_variance: Array1<f64>,
52 pub variance_proportions: Array1<f64>,
54 pub kaiser_threshold: f64,
56}
57
58#[derive(Debug, Clone)]
60pub struct FactorAnalysisResult {
61 pub loadings: Array2<f64>,
63 pub scores: Array2<f64>,
65 pub uniquenesses: Array1<f64>,
67 pub communalities: Array1<f64>,
69 pub noise_variance: f64,
71 pub n_iter: usize,
73 pub log_likelihood: f64,
75}
76
77#[derive(Debug, Clone)]
95pub struct FactorAnalysis {
96 n_factors: usize,
98 rotation: RotationMethod,
100 max_iter: usize,
102 tol: f64,
104 promax_power: usize,
106 varimax_max_iter: usize,
108 center: bool,
110 mean: Option<Array1<f64>>,
112 loadings: Option<Array2<f64>>,
114 uniquenesses: Option<Array1<f64>>,
116 training_data: Option<Array2<f64>>,
118}
119
120impl FactorAnalysis {
121 pub fn new(n_factors: usize) -> Self {
126 FactorAnalysis {
127 n_factors,
128 rotation: RotationMethod::None,
129 max_iter: 1000,
130 tol: 1e-8,
131 promax_power: 4,
132 varimax_max_iter: 100,
133 center: true,
134 mean: None,
135 loadings: None,
136 uniquenesses: None,
137 training_data: None,
138 }
139 }
140
141 pub fn with_rotation(mut self, rotation: RotationMethod) -> Self {
143 self.rotation = rotation;
144 self
145 }
146
147 pub fn with_max_iter(mut self, max_iter: usize) -> Self {
149 self.max_iter = max_iter;
150 self
151 }
152
153 pub fn with_tol(mut self, tol: f64) -> Self {
155 self.tol = tol;
156 self
157 }
158
159 pub fn with_promax_power(mut self, power: usize) -> Self {
161 self.promax_power = power.max(2);
162 self
163 }
164
165 pub fn with_center(mut self, center: bool) -> Self {
167 self.center = center;
168 self
169 }
170
171 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<FactorAnalysisResult>
179 where
180 S: Data,
181 S::Elem: Float + NumCast,
182 {
183 let x_f64 = x.mapv(|v| <f64 as NumCast>::from(v).unwrap_or_default());
184 let (n_samples, n_features) = x_f64.dim();
185
186 check_positive(self.n_factors, "n_factors")?;
188 checkshape(&x_f64, &[n_samples, n_features], "x")?;
189
190 if n_samples < 2 {
191 return Err(TransformError::InvalidInput(
192 "Need at least 2 samples for factor analysis".to_string(),
193 ));
194 }
195
196 if self.n_factors > n_features {
197 return Err(TransformError::InvalidInput(format!(
198 "n_factors={} must be <= n_features={}",
199 self.n_factors, n_features
200 )));
201 }
202
203 let mean = x_f64.mean_axis(Axis(0)).ok_or_else(|| {
205 TransformError::ComputationError("Failed to compute mean".to_string())
206 })?;
207
208 let x_centered = if self.center {
209 let mut centered = x_f64.clone();
210 for i in 0..n_samples {
211 for j in 0..n_features {
212 centered[[i, j]] -= mean[j];
213 }
214 }
215 centered
216 } else {
217 x_f64.clone()
218 };
219
220 let mut cov: Array2<f64> = Array2::zeros((n_features, n_features));
222 for i in 0..n_features {
223 for j in 0..n_features {
224 let mut sum = 0.0;
225 for k in 0..n_samples {
226 sum += x_centered[[k, i]] * x_centered[[k, j]];
227 }
228 cov[[i, j]] = sum / (n_samples - 1) as f64;
229 }
230 }
231
232 let (loadings_init, uniquenesses_init) =
234 self.initialize_loadings(&x_centered, &cov, n_samples)?;
235
236 let (loadings, uniquenesses, n_iter, log_likelihood) = self.em_algorithm(
238 &cov,
239 loadings_init,
240 uniquenesses_init,
241 n_samples,
242 n_features,
243 )?;
244
245 let loadings_rotated = match &self.rotation {
247 RotationMethod::None => loadings,
248 RotationMethod::Varimax => self.varimax_rotation(&loadings)?,
249 RotationMethod::Promax => self.promax_rotation(&loadings)?,
250 };
251
252 let scores = self.compute_scores(&x_centered, &loadings_rotated, &uniquenesses)?;
255
256 let mut communalities: Array1<f64> = Array1::zeros(n_features);
258 for j in 0..n_features {
259 for f in 0..self.n_factors {
260 communalities[j] += loadings_rotated[[j, f]] * loadings_rotated[[j, f]];
261 }
262 }
263
264 let noise_variance = if uniquenesses.is_empty() {
265 0.0
266 } else {
267 uniquenesses.sum() / uniquenesses.len() as f64
268 };
269
270 self.mean = Some(mean);
272 self.loadings = Some(loadings_rotated.clone());
273 self.uniquenesses = Some(uniquenesses.clone());
274 self.training_data = Some(x_centered);
275
276 Ok(FactorAnalysisResult {
277 loadings: loadings_rotated,
278 scores,
279 uniquenesses,
280 communalities,
281 noise_variance,
282 n_iter,
283 log_likelihood,
284 })
285 }
286
287 fn initialize_loadings(
289 &self,
290 x_centered: &Array2<f64>,
291 _cov: &Array2<f64>,
292 n_samples: usize,
293 ) -> Result<(Array2<f64>, Array1<f64>)> {
294 let n_features = x_centered.shape()[1];
295
296 let (_u, s, vt) = svd::<f64>(&x_centered.view(), true, None)
298 .map_err(|e| TransformError::LinalgError(e))?;
299
300 let scale = 1.0 / (n_samples as f64 - 1.0).sqrt();
302 let mut loadings: Array2<f64> = Array2::zeros((n_features, self.n_factors));
303 for j in 0..n_features {
304 for f in 0..self.n_factors {
305 if f < s.len() && f < vt.shape()[0] {
306 loadings[[j, f]] = vt[[f, j]] * s[f] * scale;
307 }
308 }
309 }
310
311 let mut uniquenesses: Array1<f64> = Array1::zeros(n_features);
313 for j in 0..n_features {
314 let mut communality = 0.0;
315 for f in 0..self.n_factors {
316 communality += loadings[[j, f]] * loadings[[j, f]];
317 }
318 let mut var_j = 0.0;
320 for k in 0..n_samples {
321 var_j += x_centered[[k, j]] * x_centered[[k, j]];
322 }
323 var_j /= (n_samples - 1) as f64;
324
325 uniquenesses[j] = (var_j - communality).max(1e-6);
326 }
327
328 Ok((loadings, uniquenesses))
329 }
330
331 fn em_algorithm(
337 &self,
338 cov: &Array2<f64>,
339 mut loadings: Array2<f64>,
340 mut uniquenesses: Array1<f64>,
341 n_samples: usize,
342 n_features: usize,
343 ) -> Result<(Array2<f64>, Array1<f64>, usize, f64)> {
344 let n_factors = self.n_factors;
345 let mut prev_ll = f64::NEG_INFINITY;
346 let mut n_iter = 0;
347
348 for iter in 0..self.max_iter {
349 n_iter = iter + 1;
350
351 let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
357 for j in 0..n_features {
358 let psi_inv = 1.0 / uniquenesses[j].max(1e-12);
359 for f in 0..n_factors {
360 psi_inv_w[[j, f]] = psi_inv * loadings[[j, f]];
361 }
362 }
363
364 let mut wt_psi_inv_w: Array2<f64> = Array2::zeros((n_factors, n_factors));
366 for i in 0..n_factors {
367 for j in 0..n_factors {
368 for k in 0..n_features {
369 wt_psi_inv_w[[i, j]] += loadings[[k, i]] * psi_inv_w[[k, j]];
370 }
371 }
372 }
373
374 for i in 0..n_factors {
376 wt_psi_inv_w[[i, i]] += 1.0;
377 }
378
379 let sigma_z = self.invert_small_matrix(&wt_psi_inv_w)?;
381
382 let mut beta: Array2<f64> = Array2::zeros((n_factors, n_features));
391 for i in 0..n_factors {
392 for j in 0..n_features {
393 for k in 0..n_factors {
394 beta[[i, j]] += sigma_z[[i, k]] * psi_inv_w[[j, k]]; }
396 }
397 }
398
399 let mut ez_xt: Array2<f64> = Array2::zeros((n_factors, n_features));
401 for i in 0..n_factors {
402 for j in 0..n_features {
403 for k in 0..n_features {
404 ez_xt[[i, j]] += beta[[i, k]] * cov[[k, j]];
405 }
406 }
407 }
408
409 let mut ez_zt = sigma_z.clone();
411 for i in 0..n_factors {
412 for j in 0..n_factors {
413 for k in 0..n_features {
414 ez_zt[[i, j]] += beta[[i, k]] * ez_xt[[j, k]];
415 }
416 }
417 }
418
419 let ez_zt_inv = self.invert_small_matrix(&ez_zt)?;
421
422 let mut s_beta_t: Array2<f64> = Array2::zeros((n_features, n_factors));
424 for i in 0..n_features {
425 for j in 0..n_factors {
426 for k in 0..n_features {
427 s_beta_t[[i, j]] += cov[[i, k]] * beta[[j, k]];
428 }
429 }
430 }
431
432 let mut new_loadings: Array2<f64> = Array2::zeros((n_features, n_factors));
434 for i in 0..n_features {
435 for j in 0..n_factors {
436 for k in 0..n_factors {
437 new_loadings[[i, j]] += s_beta_t[[i, k]] * ez_zt_inv[[k, j]];
438 }
439 }
440 }
441
442 let mut new_uniquenesses: Array1<f64> = Array1::zeros(n_features);
444 for j in 0..n_features {
445 let mut correction = 0.0;
446 for f in 0..n_factors {
447 correction += new_loadings[[j, f]] * ez_xt[[f, j]];
448 }
449 new_uniquenesses[j] = (cov[[j, j]] - correction).max(1e-6);
450 }
451
452 loadings = new_loadings;
453 uniquenesses = new_uniquenesses;
454
455 let ll =
457 self.compute_log_likelihood(cov, &loadings, &uniquenesses, n_samples, n_features);
458
459 if (ll - prev_ll).abs() < self.tol {
461 break;
462 }
463 prev_ll = ll;
464 }
465
466 let ll = self.compute_log_likelihood(cov, &loadings, &uniquenesses, n_samples, n_features);
467
468 Ok((loadings, uniquenesses, n_iter, ll))
469 }
470
471 fn compute_log_likelihood(
473 &self,
474 cov: &Array2<f64>,
475 loadings: &Array2<f64>,
476 uniquenesses: &Array1<f64>,
477 n_samples: usize,
478 n_features: usize,
479 ) -> f64 {
480 let n_factors = self.n_factors;
481
482 let mut sigma: Array2<f64> = Array2::zeros((n_features, n_features));
484 for i in 0..n_features {
485 for j in 0..n_features {
486 for f in 0..n_factors {
487 sigma[[i, j]] += loadings[[i, f]] * loadings[[j, f]];
488 }
489 if i == j {
490 sigma[[i, j]] += uniquenesses[i];
491 }
492 }
493 }
494
495 let mut log_det_psi = 0.0;
498 for j in 0..n_features {
499 log_det_psi += uniquenesses[j].max(1e-12).ln();
500 }
501
502 let mut m: Array2<f64> = Array2::zeros((n_factors, n_factors));
504 for i in 0..n_factors {
505 for j in 0..n_factors {
506 for k in 0..n_features {
507 m[[i, j]] += loadings[[k, i]] * loadings[[k, j]] / uniquenesses[k].max(1e-12);
508 }
509 if i == j {
510 m[[i, j]] += 1.0;
511 }
512 }
513 }
514
515 let log_det_m = match scirs2_linalg::eigh::<f64>(&m.view(), None) {
517 Ok((eigenvalues, _)) => eigenvalues.iter().map(|&e| e.max(1e-12).ln()).sum::<f64>(),
518 Err(_) => 0.0,
519 };
520
521 let log_det_sigma = log_det_psi + log_det_m;
522
523 let m_inv = match self.invert_small_matrix(&m) {
526 Ok(inv) => inv,
527 Err(_) => Array2::eye(n_factors),
528 };
529
530 let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
532 for j in 0..n_features {
533 for f in 0..n_factors {
534 psi_inv_w[[j, f]] = loadings[[j, f]] / uniquenesses[j].max(1e-12);
535 }
536 }
537
538 let mut psi_inv_w_m_inv: Array2<f64> = Array2::zeros((n_features, n_factors));
540 for i in 0..n_features {
541 for j in 0..n_factors {
542 for k in 0..n_factors {
543 psi_inv_w_m_inv[[i, j]] += psi_inv_w[[i, k]] * m_inv[[k, j]];
544 }
545 }
546 }
547
548 let mut trace_psi_inv_s = 0.0;
550 for j in 0..n_features {
551 trace_psi_inv_s += cov[[j, j]] / uniquenesses[j].max(1e-12);
552 }
553
554 let mut trace_correction = 0.0;
555 for i in 0..n_features {
556 for j in 0..n_features {
557 let mut sigma_inv_ij = 0.0;
558 for f in 0..n_factors {
559 sigma_inv_ij += psi_inv_w_m_inv[[i, f]] * psi_inv_w[[j, f]];
560 }
561 trace_correction += sigma_inv_ij * cov[[j, i]];
562 }
563 }
564
565 let trace_sigma_inv_s = trace_psi_inv_s - trace_correction;
566
567 let n = n_samples as f64;
569 let p = n_features as f64;
570 -n / 2.0 * (p * (2.0 * std::f64::consts::PI).ln() + log_det_sigma + trace_sigma_inv_s)
571 }
572
573 fn invert_small_matrix(&self, a: &Array2<f64>) -> Result<Array2<f64>> {
575 let n = a.shape()[0];
576 let mut augmented: Array2<f64> = Array2::zeros((n, 2 * n));
577
578 for i in 0..n {
580 for j in 0..n {
581 augmented[[i, j]] = a[[i, j]];
582 }
583 augmented[[i, n + i]] = 1.0;
584 }
585
586 for i in 0..n {
588 let mut max_row = i;
589 for k in i + 1..n {
590 if augmented[[k, i]].abs() > augmented[[max_row, i]].abs() {
591 max_row = k;
592 }
593 }
594
595 if max_row != i {
596 for j in 0..2 * n {
597 let temp = augmented[[i, j]];
598 augmented[[i, j]] = augmented[[max_row, j]];
599 augmented[[max_row, j]] = temp;
600 }
601 }
602
603 let pivot = augmented[[i, i]];
604 if pivot.abs() < 1e-14 {
605 augmented[[i, i]] += 1e-8;
607 let pivot = augmented[[i, i]];
608 for j in 0..2 * n {
609 augmented[[i, j]] /= pivot;
610 }
611 } else {
612 for j in 0..2 * n {
613 augmented[[i, j]] /= pivot;
614 }
615 }
616
617 for k in 0..n {
618 if k != i {
619 let factor = augmented[[k, i]];
620 for j in 0..2 * n {
621 augmented[[k, j]] -= factor * augmented[[i, j]];
622 }
623 }
624 }
625 }
626
627 let mut inv: Array2<f64> = Array2::zeros((n, n));
629 for i in 0..n {
630 for j in 0..n {
631 inv[[i, j]] = augmented[[i, n + j]];
632 }
633 }
634
635 Ok(inv)
636 }
637
638 fn compute_scores(
642 &self,
643 x_centered: &Array2<f64>,
644 loadings: &Array2<f64>,
645 uniquenesses: &Array1<f64>,
646 ) -> Result<Array2<f64>> {
647 let n_samples = x_centered.shape()[0];
648 let n_features = x_centered.shape()[1];
649 let n_factors = self.n_factors;
650
651 let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
653 for j in 0..n_features {
654 let psi_inv = 1.0 / uniquenesses[j].max(1e-12);
655 for f in 0..n_factors {
656 psi_inv_w[[j, f]] = psi_inv * loadings[[j, f]];
657 }
658 }
659
660 let mut m: Array2<f64> = Array2::zeros((n_factors, n_factors));
662 for i in 0..n_factors {
663 for j in 0..n_factors {
664 for k in 0..n_features {
665 m[[i, j]] += loadings[[k, i]] * psi_inv_w[[k, j]];
666 }
667 if i == j {
668 m[[i, j]] += 1.0;
669 }
670 }
671 }
672
673 let m_inv = self.invert_small_matrix(&m)?;
675
676 let mut scoring: Array2<f64> = Array2::zeros((n_features, n_factors));
678 for i in 0..n_features {
679 for j in 0..n_factors {
680 for k in 0..n_factors {
681 scoring[[i, j]] += psi_inv_w[[i, k]] * m_inv[[k, j]];
682 }
683 }
684 }
685
686 let mut scores: Array2<f64> = Array2::zeros((n_samples, n_factors));
688 for i in 0..n_samples {
689 for j in 0..n_factors {
690 for k in 0..n_features {
691 scores[[i, j]] += x_centered[[i, k]] * scoring[[k, j]];
692 }
693 }
694 }
695
696 Ok(scores)
697 }
698
699 fn varimax_rotation(&self, loadings: &Array2<f64>) -> Result<Array2<f64>> {
705 let (n_features, n_factors) = loadings.dim();
706
707 if n_factors < 2 {
708 return Ok(loadings.clone());
709 }
710
711 let mut rotated = loadings.clone();
712
713 let mut h: Array1<f64> = Array1::zeros(n_features);
715 for j in 0..n_features {
716 for f in 0..n_factors {
717 h[j] += rotated[[j, f]] * rotated[[j, f]];
718 }
719 h[j] = h[j].sqrt().max(1e-10);
720 }
721
722 let mut normalized = rotated.clone();
723 for j in 0..n_features {
724 for f in 0..n_factors {
725 normalized[[j, f]] /= h[j];
726 }
727 }
728
729 for _iter in 0..self.varimax_max_iter {
731 let mut changed = false;
732
733 for p in 0..n_factors {
734 for q in (p + 1)..n_factors {
735 let n = n_features as f64;
737
738 let mut sum_a: f64 = 0.0;
739 let mut sum_b: f64 = 0.0;
740 let mut sum_c: f64 = 0.0;
741 let mut sum_d: f64 = 0.0;
742
743 for j in 0..n_features {
744 let xj = normalized[[j, p]];
745 let yj = normalized[[j, q]];
746
747 let a_j = xj * xj - yj * yj;
748 let b_j = 2.0 * xj * yj;
749
750 sum_a += a_j;
751 sum_b += b_j;
752 sum_c += a_j * a_j - b_j * b_j;
753 sum_d += 2.0 * a_j * b_j;
754 }
755
756 let u = sum_d - 2.0 * sum_a * sum_b / n;
757 let v = sum_c - (sum_a * sum_a - sum_b * sum_b) / n;
758
759 let angle = 0.25 * u.atan2(v);
761
762 if angle.abs() < 1e-10 {
763 continue;
764 }
765
766 changed = true;
767
768 let cos_a = angle.cos();
769 let sin_a = angle.sin();
770
771 for j in 0..n_features {
773 let xj = normalized[[j, p]];
774 let yj = normalized[[j, q]];
775 normalized[[j, p]] = cos_a * xj + sin_a * yj;
776 normalized[[j, q]] = -sin_a * xj + cos_a * yj;
777 }
778 }
779 }
780
781 if !changed {
782 break;
783 }
784 }
785
786 for j in 0..n_features {
788 for f in 0..n_factors {
789 rotated[[j, f]] = normalized[[j, f]] * h[j];
790 }
791 }
792
793 Ok(rotated)
794 }
795
796 fn promax_rotation(&self, loadings: &Array2<f64>) -> Result<Array2<f64>> {
801 let (n_features, n_factors) = loadings.dim();
802
803 if n_factors < 2 {
804 return Ok(loadings.clone());
805 }
806
807 let varimax_loadings = self.varimax_rotation(loadings)?;
809
810 let mut target: Array2<f64> = Array2::zeros((n_features, n_factors));
812 let power = self.promax_power as f64;
813
814 for j in 0..n_features {
815 for f in 0..n_factors {
816 let val = varimax_loadings[[j, f]];
817 let sign = if val >= 0.0 { 1.0 } else { -1.0 };
818 target[[j, f]] = sign * val.abs().powf(power);
819 }
820 }
821
822 let mut at_a: Array2<f64> = Array2::zeros((n_factors, n_factors));
828 for i in 0..n_factors {
829 for j in 0..n_factors {
830 for k in 0..n_features {
831 at_a[[i, j]] += varimax_loadings[[k, i]] * varimax_loadings[[k, j]];
832 }
833 }
834 }
835
836 let at_a_inv = self.invert_small_matrix(&at_a)?;
838
839 let mut at_target: Array2<f64> = Array2::zeros((n_factors, n_factors));
841 for i in 0..n_factors {
842 for j in 0..n_factors {
843 for k in 0..n_features {
844 at_target[[i, j]] += varimax_loadings[[k, i]] * target[[k, j]];
845 }
846 }
847 }
848
849 let mut t_mat: Array2<f64> = Array2::zeros((n_factors, n_factors));
851 for i in 0..n_factors {
852 for j in 0..n_factors {
853 for k in 0..n_factors {
854 t_mat[[i, j]] += at_a_inv[[i, k]] * at_target[[k, j]];
855 }
856 }
857 }
858
859 let mut result: Array2<f64> = Array2::zeros((n_features, n_factors));
861 for i in 0..n_features {
862 for j in 0..n_factors {
863 for k in 0..n_factors {
864 result[[i, j]] += varimax_loadings[[i, k]] * t_mat[[k, j]];
865 }
866 }
867 }
868
869 Ok(result)
870 }
871
872 pub fn loadings(&self) -> Option<&Array2<f64>> {
874 self.loadings.as_ref()
875 }
876
877 pub fn uniquenesses(&self) -> Option<&Array1<f64>> {
879 self.uniquenesses.as_ref()
880 }
881
882 pub fn scree_plot_data<S>(x: &ArrayBase<S, Ix2>, use_correlation: bool) -> Result<ScreePlotData>
894 where
895 S: Data,
896 S::Elem: Float + NumCast,
897 {
898 let x_f64 = x.mapv(|v| <f64 as NumCast>::from(v).unwrap_or_default());
899 let (n_samples, n_features) = x_f64.dim();
900
901 if n_samples < 2 {
902 return Err(TransformError::InvalidInput(
903 "Need at least 2 samples for scree plot".to_string(),
904 ));
905 }
906
907 let mean = x_f64.mean_axis(Axis(0)).ok_or_else(|| {
909 TransformError::ComputationError("Failed to compute mean".to_string())
910 })?;
911
912 let mut x_centered = x_f64.clone();
913 for i in 0..n_samples {
914 for j in 0..n_features {
915 x_centered[[i, j]] -= mean[j];
916 }
917 }
918
919 let mut cov: Array2<f64> = Array2::zeros((n_features, n_features));
921 for i in 0..n_features {
922 for j in 0..n_features {
923 let mut sum = 0.0;
924 for k in 0..n_samples {
925 sum += x_centered[[k, i]] * x_centered[[k, j]];
926 }
927 cov[[i, j]] = sum / (n_samples - 1) as f64;
928 }
929 }
930
931 if use_correlation {
933 let mut stds: Array1<f64> = Array1::zeros(n_features);
934 for j in 0..n_features {
935 stds[j] = cov[[j, j]].sqrt().max(1e-12);
936 }
937 for i in 0..n_features {
938 for j in 0..n_features {
939 cov[[i, j]] /= stds[i] * stds[j];
940 }
941 }
942 }
943
944 let (eigenvalues_raw, _eigenvectors) =
946 scirs2_linalg::eigh::<f64>(&cov.view(), None).map_err(TransformError::LinalgError)?;
947
948 let n = eigenvalues_raw.len();
950 let mut eig_vec: Vec<f64> = eigenvalues_raw.iter().map(|&e| e.max(0.0)).collect();
951 eig_vec.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
952 let mut eigenvalues: Array1<f64> = Array1::zeros(n);
953 for i in 0..n {
954 eigenvalues[i] = eig_vec[i];
955 }
956
957 let total: f64 = eigenvalues.sum();
959 let mut variance_proportions: Array1<f64> = Array1::zeros(n);
960 let mut cumulative_variance: Array1<f64> = Array1::zeros(n);
961
962 if total > 0.0 {
963 let mut cumsum = 0.0;
964 for i in 0..n {
965 variance_proportions[i] = eigenvalues[i] / total;
966 cumsum += variance_proportions[i];
967 cumulative_variance[i] = cumsum;
968 }
969 }
970
971 let kaiser_threshold = if use_correlation {
973 1.0
974 } else if n > 0 {
975 total / n as f64
976 } else {
977 0.0
978 };
979
980 Ok(ScreePlotData {
981 eigenvalues,
982 cumulative_variance,
983 variance_proportions,
984 kaiser_threshold,
985 })
986 }
987}
988
989pub fn factor_analysis<S>(
998 data: &ArrayBase<S, Ix2>,
999 n_factors: usize,
1000) -> Result<FactorAnalysisResult>
1001where
1002 S: Data,
1003 S::Elem: Float + NumCast,
1004{
1005 let mut fa = FactorAnalysis::new(n_factors);
1006 fa.fit_transform(data)
1007}
1008
1009pub fn scree_plot_data<S>(data: &ArrayBase<S, Ix2>, use_correlation: bool) -> Result<ScreePlotData>
1022where
1023 S: Data,
1024 S::Elem: Float + NumCast,
1025{
1026 FactorAnalysis::scree_plot_data(data, use_correlation)
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031 use super::*;
1032 use scirs2_core::ndarray::Array;
1033
1034 fn generate_factor_data(n_samples: usize, n_features: usize, n_factors: usize) -> Array2<f64> {
1036 let mut rng = scirs2_core::random::rng();
1037 let mut data: Array2<f64> = Array2::zeros((n_samples, n_features));
1038
1039 let mut true_loadings: Array2<f64> = Array2::zeros((n_features, n_factors));
1041 for j in 0..n_features {
1042 let factor_idx = j % n_factors;
1043 true_loadings[[j, factor_idx]] = 0.8;
1044 for f in 0..n_factors {
1046 if f != factor_idx {
1047 true_loadings[[j, f]] = 0.1;
1048 }
1049 }
1050 }
1051
1052 for i in 0..n_samples {
1054 let mut factors: Array1<f64> = Array1::zeros(n_factors);
1056 for f in 0..n_factors {
1057 factors[f] = scirs2_core::random::Rng::random_range(&mut rng, -2.0..2.0);
1058 }
1059
1060 for j in 0..n_features {
1061 let mut val = 0.0;
1062 for f in 0..n_factors {
1063 val += true_loadings[[j, f]] * factors[f];
1064 }
1065 val += scirs2_core::random::Rng::random_range(&mut rng, -0.3..0.3);
1067 data[[i, j]] = val;
1068 }
1069 }
1070
1071 data
1072 }
1073
1074 #[test]
1075 fn test_factor_analysis_basic() {
1076 let data = generate_factor_data(100, 6, 2);
1077
1078 let mut fa = FactorAnalysis::new(2);
1079 let result = fa.fit_transform(&data).expect("FA fit_transform failed");
1080
1081 assert_eq!(result.loadings.shape(), &[6, 2]);
1082 assert_eq!(result.scores.shape(), &[100, 2]);
1083 assert_eq!(result.uniquenesses.len(), 6);
1084 assert_eq!(result.communalities.len(), 6);
1085
1086 for val in result.loadings.iter() {
1088 assert!(val.is_finite());
1089 }
1090 for val in result.scores.iter() {
1091 assert!(val.is_finite());
1092 }
1093 }
1094
1095 #[test]
1096 fn test_factor_analysis_uniquenesses() {
1097 let data = generate_factor_data(100, 6, 2);
1098
1099 let mut fa = FactorAnalysis::new(2);
1100 let result = fa.fit_transform(&data).expect("FA fit_transform failed");
1101
1102 for &u in result.uniquenesses.iter() {
1104 assert!(u > 0.0, "Uniqueness should be positive, got {}", u);
1105 }
1106
1107 for &c in result.communalities.iter() {
1109 assert!(c >= 0.0, "Communality should be non-negative, got {}", c);
1110 }
1111 }
1112
1113 #[test]
1114 fn test_factor_analysis_varimax() {
1115 let data = generate_factor_data(100, 6, 2);
1116
1117 let mut fa = FactorAnalysis::new(2).with_rotation(RotationMethod::Varimax);
1118 let result = fa
1119 .fit_transform(&data)
1120 .expect("FA varimax fit_transform failed");
1121
1122 assert_eq!(result.loadings.shape(), &[6, 2]);
1123 for val in result.loadings.iter() {
1124 assert!(val.is_finite());
1125 }
1126 }
1127
1128 #[test]
1129 fn test_factor_analysis_promax() {
1130 let data = generate_factor_data(100, 6, 2);
1131
1132 let mut fa = FactorAnalysis::new(2).with_rotation(RotationMethod::Promax);
1133 let result = fa
1134 .fit_transform(&data)
1135 .expect("FA promax fit_transform failed");
1136
1137 assert_eq!(result.loadings.shape(), &[6, 2]);
1138 for val in result.loadings.iter() {
1139 assert!(val.is_finite());
1140 }
1141 }
1142
1143 #[test]
1144 fn test_factor_analysis_convergence() {
1145 let data = generate_factor_data(50, 4, 2);
1146
1147 let mut fa = FactorAnalysis::new(2).with_max_iter(500).with_tol(1e-6);
1148 let result = fa.fit_transform(&data).expect("FA fit_transform failed");
1149
1150 assert!(
1152 result.n_iter <= 500,
1153 "Should converge within max_iter, got {}",
1154 result.n_iter
1155 );
1156
1157 assert!(
1159 result.log_likelihood.is_finite(),
1160 "Log-likelihood should be finite, got {}",
1161 result.log_likelihood
1162 );
1163 }
1164
1165 #[test]
1166 fn test_factor_analysis_single_factor() {
1167 let data = generate_factor_data(50, 4, 1);
1168
1169 let mut fa = FactorAnalysis::new(1);
1170 let result = fa.fit_transform(&data).expect("FA fit_transform failed");
1171
1172 assert_eq!(result.loadings.shape(), &[4, 1]);
1173 assert_eq!(result.scores.shape(), &[50, 1]);
1174 }
1175
1176 #[test]
1177 fn test_factor_analysis_convenience_fn() {
1178 let data = generate_factor_data(50, 6, 2);
1179
1180 let result = factor_analysis(&data, 2).expect("factor_analysis failed");
1181 assert_eq!(result.loadings.shape(), &[6, 2]);
1182 }
1183
1184 #[test]
1185 fn test_factor_analysis_invalid_params() {
1186 let data = Array2::<f64>::zeros((10, 3));
1187
1188 let mut fa = FactorAnalysis::new(5);
1190 assert!(fa.fit_transform(&data).is_err());
1191 }
1192
1193 #[test]
1194 fn test_scree_plot_data_covariance() {
1195 let data = generate_factor_data(100, 6, 2);
1196
1197 let scree = FactorAnalysis::scree_plot_data(&data, false).expect("scree_plot_data failed");
1198
1199 assert_eq!(scree.eigenvalues.len(), 6);
1201 assert_eq!(scree.variance_proportions.len(), 6);
1202 assert_eq!(scree.cumulative_variance.len(), 6);
1203
1204 for i in 1..scree.eigenvalues.len() {
1206 assert!(
1207 scree.eigenvalues[i] <= scree.eigenvalues[i - 1] + 1e-6,
1208 "Eigenvalues should be in descending order: eigenvalue[{}]={} > eigenvalue[{}]={}",
1209 i,
1210 scree.eigenvalues[i],
1211 i - 1,
1212 scree.eigenvalues[i - 1]
1213 );
1214 }
1215 for &e in scree.eigenvalues.iter() {
1216 assert!(e >= 0.0, "Eigenvalue should be non-negative, got {}", e);
1217 }
1218
1219 let total: f64 = scree.variance_proportions.sum();
1221 assert!(
1222 (total - 1.0).abs() < 1e-6,
1223 "Variance proportions should sum to 1, got {}",
1224 total
1225 );
1226
1227 let last_idx = scree.cumulative_variance.len() - 1;
1229 assert!(
1230 (scree.cumulative_variance[last_idx] - 1.0).abs() < 1e-6,
1231 "Cumulative variance should end at 1"
1232 );
1233 }
1234
1235 #[test]
1236 fn test_scree_plot_data_correlation() {
1237 let data = generate_factor_data(200, 6, 2);
1238
1239 let scree = FactorAnalysis::scree_plot_data(&data, true)
1240 .expect("scree_plot_data correlation failed");
1241
1242 assert!(
1244 (scree.kaiser_threshold - 1.0).abs() < 1e-10,
1245 "Kaiser threshold for correlation matrix should be 1.0, got {}",
1246 scree.kaiser_threshold
1247 );
1248
1249 for &e in scree.eigenvalues.iter() {
1251 assert!(
1252 e >= 0.0 && e.is_finite(),
1253 "Eigenvalue should be finite non-negative, got {}",
1254 e
1255 );
1256 }
1257
1258 assert!(
1260 scree.eigenvalues[0] >= scree.eigenvalues[scree.eigenvalues.len() - 1],
1261 "Eigenvalues should be in descending order"
1262 );
1263
1264 let total: f64 = scree.variance_proportions.sum();
1266 assert!(
1267 (total - 1.0).abs() < 1e-6,
1268 "Variance proportions should sum to 1, got {}",
1269 total
1270 );
1271 }
1272
1273 #[test]
1274 fn test_scree_plot_convenience_fn() {
1275 let data = generate_factor_data(50, 4, 2);
1276
1277 let scree = scree_plot_data(&data, false).expect("scree_plot_data convenience fn failed");
1278 assert_eq!(scree.eigenvalues.len(), 4);
1279 assert!(scree.eigenvalues.iter().all(|e| e.is_finite()));
1280 }
1281
1282 #[test]
1283 fn test_factor_analysis_communalities_sum_constraint() {
1284 let data = generate_factor_data(200, 4, 2);
1286
1287 let mut fa = FactorAnalysis::new(2);
1288 let result = fa.fit_transform(&data).expect("FA failed");
1289
1290 let n_samples = data.shape()[0];
1292 let n_features = data.shape()[1];
1293 let mean = data.mean_axis(Axis(0)).expect("mean computation failed");
1294 let mut variances: Array1<f64> = Array1::zeros(n_features);
1295 for j in 0..n_features {
1296 let mut var = 0.0;
1297 for i in 0..n_samples {
1298 let diff = data[[i, j]] - mean[j];
1299 var += diff * diff;
1300 }
1301 variances[j] = var / (n_samples - 1) as f64;
1302 }
1303
1304 for j in 0..n_features {
1306 let total = result.communalities[j] + result.uniquenesses[j];
1307 let variance = variances[j];
1308 let rel_err = (total - variance).abs() / variance.max(1e-12);
1310 assert!(
1311 rel_err < 0.5,
1312 "Feature {}: communality ({}) + uniqueness ({}) = {} should approximate variance ({})",
1313 j, result.communalities[j], result.uniquenesses[j], total, variance
1314 );
1315 }
1316 }
1317
1318 #[test]
1319 fn test_factor_analysis_too_few_samples() {
1320 let data = Array2::<f64>::zeros((1, 3));
1321 let mut fa = FactorAnalysis::new(1);
1322 let result = fa.fit_transform(&data);
1323 assert!(result.is_err());
1324 }
1325
1326 #[test]
1327 fn test_factor_analysis_builder_methods() {
1328 let fa = FactorAnalysis::new(3)
1329 .with_rotation(RotationMethod::Varimax)
1330 .with_max_iter(200)
1331 .with_tol(1e-5)
1332 .with_promax_power(6)
1333 .with_center(false);
1334
1335 assert!(fa.loadings().is_none()); assert!(fa.uniquenesses().is_none());
1338 }
1339
1340 #[test]
1341 fn test_scree_plot_too_few_samples() {
1342 let data = Array2::<f64>::zeros((1, 3));
1343 let result = scree_plot_data(&data, false);
1344 assert!(result.is_err());
1345 }
1346
1347 #[test]
1348 fn test_factor_analysis_noise_variance() {
1349 let data = generate_factor_data(100, 6, 2);
1350
1351 let mut fa = FactorAnalysis::new(2);
1352 let result = fa.fit_transform(&data).expect("FA failed");
1353
1354 assert!(result.noise_variance > 0.0);
1356 assert!(result.noise_variance.is_finite());
1357
1358 let expected = result.uniquenesses.sum() / result.uniquenesses.len() as f64;
1360 assert!(
1361 (result.noise_variance - expected).abs() < 1e-10,
1362 "Noise variance should be average of uniquenesses"
1363 );
1364 }
1365}