Skip to main content

scirs2_transform/reduction/
factor_analysis.rs

1//! Factor Analysis for dimensionality reduction and latent variable modeling
2//!
3//! Factor Analysis is a statistical method that describes variability among observed,
4//! correlated variables in terms of a potentially lower number of unobserved variables
5//! called factors. It differs from PCA in that it models noise separately for each
6//! observed variable (uniquenesses/specific variances).
7//!
8//! ## Algorithm (EM-based)
9//!
10//! The factor model is: x = W * z + mu + epsilon
11//! where z ~ N(0, I) are the latent factors, W is the loading matrix,
12//! and epsilon_i ~ N(0, psi_i) are the uniquenesses.
13//!
14//! The EM algorithm alternates between:
15//! - **E-step**: Compute expected sufficient statistics of latent factors given observed data
16//! - **M-step**: Update loadings (W) and uniquenesses (psi) to maximize expected log-likelihood
17//!
18//! ## Rotation Methods
19//!
20//! - **Varimax**: Orthogonal rotation maximizing variance of squared loadings
21//! - **Promax**: Oblique rotation based on raised varimax solution
22
23use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix2};
24use scirs2_core::numeric::{Float, NumCast};
25use scirs2_core::validation::{check_positive, checkshape};
26use scirs2_linalg::svd;
27
28use crate::error::{Result, TransformError};
29
30/// Rotation method for factor loadings
31#[derive(Debug, Clone, PartialEq)]
32pub enum RotationMethod {
33    /// No rotation
34    None,
35    /// Varimax (orthogonal) rotation
36    Varimax,
37    /// Promax (oblique) rotation
38    Promax,
39}
40
41/// Scree plot data for selecting the number of factors
42///
43/// Contains eigenvalues of the covariance/correlation matrix, which can
44/// be used to construct a scree plot for determining the appropriate
45/// number of factors (look for the "elbow" in the eigenvalue curve).
46#[derive(Debug, Clone)]
47pub struct ScreePlotData {
48    /// Eigenvalues in descending order
49    pub eigenvalues: Array1<f64>,
50    /// Cumulative proportion of variance explained
51    pub cumulative_variance: Array1<f64>,
52    /// Proportion of variance explained by each component
53    pub variance_proportions: Array1<f64>,
54    /// Kaiser criterion threshold (eigenvalue = 1.0 for correlation matrix)
55    pub kaiser_threshold: f64,
56}
57
58/// Factor Analysis results
59#[derive(Debug, Clone)]
60pub struct FactorAnalysisResult {
61    /// Factor loadings matrix, shape (n_features, n_factors)
62    pub loadings: Array2<f64>,
63    /// Factor scores for training data, shape (n_samples, n_factors)
64    pub scores: Array2<f64>,
65    /// Uniquenesses (specific variances) for each feature, shape (n_features,)
66    pub uniquenesses: Array1<f64>,
67    /// Communalities for each feature, shape (n_features,)
68    pub communalities: Array1<f64>,
69    /// Noise variance (average uniqueness)
70    pub noise_variance: f64,
71    /// Number of EM iterations performed
72    pub n_iter: usize,
73    /// Log-likelihood at convergence
74    pub log_likelihood: f64,
75}
76
77/// Factor Analysis for dimensionality reduction
78///
79/// Factor Analysis assumes observed data is generated from a linear model
80/// with latent factors plus Gaussian noise with feature-specific variances.
81///
82/// # Example
83///
84/// ```rust,no_run
85/// use scirs2_transform::reduction::factor_analysis::FactorAnalysis;
86/// use scirs2_core::ndarray::Array2;
87///
88/// let data = Array2::<f64>::zeros((100, 10));
89/// let mut fa = FactorAnalysis::new(3);
90/// let result = fa.fit_transform(&data).expect("fit_transform failed");
91/// println!("Loadings shape: {:?}", result.loadings.shape());
92/// println!("Scores shape: {:?}", result.scores.shape());
93/// ```
94#[derive(Debug, Clone)]
95pub struct FactorAnalysis {
96    /// Number of factors to extract
97    n_factors: usize,
98    /// Rotation method to apply
99    rotation: RotationMethod,
100    /// Maximum number of EM iterations
101    max_iter: usize,
102    /// Convergence tolerance for log-likelihood change
103    tol: f64,
104    /// Power parameter for promax rotation
105    promax_power: usize,
106    /// Maximum iterations for varimax rotation
107    varimax_max_iter: usize,
108    /// Whether to center the data
109    center: bool,
110    /// Mean of training data
111    mean: Option<Array1<f64>>,
112    /// Factor loadings
113    loadings: Option<Array2<f64>>,
114    /// Uniquenesses (specific variances)
115    uniquenesses: Option<Array1<f64>>,
116    /// Training data (centered)
117    training_data: Option<Array2<f64>>,
118}
119
120impl FactorAnalysis {
121    /// Creates a new FactorAnalysis instance
122    ///
123    /// # Arguments
124    /// * `n_factors` - Number of latent factors to extract
125    pub fn new(n_factors: usize) -> Self {
126        FactorAnalysis {
127            n_factors,
128            rotation: RotationMethod::None,
129            max_iter: 1000,
130            tol: 1e-8,
131            promax_power: 4,
132            varimax_max_iter: 100,
133            center: true,
134            mean: None,
135            loadings: None,
136            uniquenesses: None,
137            training_data: None,
138        }
139    }
140
141    /// Set the rotation method
142    pub fn with_rotation(mut self, rotation: RotationMethod) -> Self {
143        self.rotation = rotation;
144        self
145    }
146
147    /// Set the maximum number of EM iterations
148    pub fn with_max_iter(mut self, max_iter: usize) -> Self {
149        self.max_iter = max_iter;
150        self
151    }
152
153    /// Set the convergence tolerance
154    pub fn with_tol(mut self, tol: f64) -> Self {
155        self.tol = tol;
156        self
157    }
158
159    /// Set the promax power parameter
160    pub fn with_promax_power(mut self, power: usize) -> Self {
161        self.promax_power = power.max(2);
162        self
163    }
164
165    /// Set whether to center the data
166    pub fn with_center(mut self, center: bool) -> Self {
167        self.center = center;
168        self
169    }
170
171    /// Fit the factor analysis model and return results
172    ///
173    /// # Arguments
174    /// * `x` - Input data, shape (n_samples, n_features)
175    ///
176    /// # Returns
177    /// * `Result<FactorAnalysisResult>` - Factor analysis results
178    pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<FactorAnalysisResult>
179    where
180        S: Data,
181        S::Elem: Float + NumCast,
182    {
183        let x_f64 = x.mapv(|v| <f64 as NumCast>::from(v).unwrap_or_default());
184        let (n_samples, n_features) = x_f64.dim();
185
186        // Input validation
187        check_positive(self.n_factors, "n_factors")?;
188        checkshape(&x_f64, &[n_samples, n_features], "x")?;
189
190        if n_samples < 2 {
191            return Err(TransformError::InvalidInput(
192                "Need at least 2 samples for factor analysis".to_string(),
193            ));
194        }
195
196        if self.n_factors > n_features {
197            return Err(TransformError::InvalidInput(format!(
198                "n_factors={} must be <= n_features={}",
199                self.n_factors, n_features
200            )));
201        }
202
203        // Center data
204        let mean = x_f64.mean_axis(Axis(0)).ok_or_else(|| {
205            TransformError::ComputationError("Failed to compute mean".to_string())
206        })?;
207
208        let x_centered = if self.center {
209            let mut centered = x_f64.clone();
210            for i in 0..n_samples {
211                for j in 0..n_features {
212                    centered[[i, j]] -= mean[j];
213                }
214            }
215            centered
216        } else {
217            x_f64.clone()
218        };
219
220        // Compute sample covariance matrix
221        let mut cov: Array2<f64> = Array2::zeros((n_features, n_features));
222        for i in 0..n_features {
223            for j in 0..n_features {
224                let mut sum = 0.0;
225                for k in 0..n_samples {
226                    sum += x_centered[[k, i]] * x_centered[[k, j]];
227                }
228                cov[[i, j]] = sum / (n_samples - 1) as f64;
229            }
230        }
231
232        // Initialize using SVD of centered data
233        let (loadings_init, uniquenesses_init) =
234            self.initialize_loadings(&x_centered, &cov, n_samples)?;
235
236        // Run EM algorithm
237        let (loadings, uniquenesses, n_iter, log_likelihood) = self.em_algorithm(
238            &cov,
239            loadings_init,
240            uniquenesses_init,
241            n_samples,
242            n_features,
243        )?;
244
245        // Apply rotation if requested
246        let loadings_rotated = match &self.rotation {
247            RotationMethod::None => loadings,
248            RotationMethod::Varimax => self.varimax_rotation(&loadings)?,
249            RotationMethod::Promax => self.promax_rotation(&loadings)?,
250        };
251
252        // Compute factor scores using regression method
253        // scores = X_centered * Psi^{-1} * W * (W^T * Psi^{-1} * W + I)^{-1}
254        let scores = self.compute_scores(&x_centered, &loadings_rotated, &uniquenesses)?;
255
256        // Compute communalities: h_j^2 = sum of squared loadings for feature j
257        let mut communalities: Array1<f64> = Array1::zeros(n_features);
258        for j in 0..n_features {
259            for f in 0..self.n_factors {
260                communalities[j] += loadings_rotated[[j, f]] * loadings_rotated[[j, f]];
261            }
262        }
263
264        let noise_variance = if uniquenesses.is_empty() {
265            0.0
266        } else {
267            uniquenesses.sum() / uniquenesses.len() as f64
268        };
269
270        // Store fitted parameters
271        self.mean = Some(mean);
272        self.loadings = Some(loadings_rotated.clone());
273        self.uniquenesses = Some(uniquenesses.clone());
274        self.training_data = Some(x_centered);
275
276        Ok(FactorAnalysisResult {
277            loadings: loadings_rotated,
278            scores,
279            uniquenesses,
280            communalities,
281            noise_variance,
282            n_iter,
283            log_likelihood,
284        })
285    }
286
287    /// Initialize loadings using SVD
288    fn initialize_loadings(
289        &self,
290        x_centered: &Array2<f64>,
291        _cov: &Array2<f64>,
292        n_samples: usize,
293    ) -> Result<(Array2<f64>, Array1<f64>)> {
294        let n_features = x_centered.shape()[1];
295
296        // SVD of centered data
297        let (_u, s, vt) = svd::<f64>(&x_centered.view(), true, None)
298            .map_err(|e| TransformError::LinalgError(e))?;
299
300        // Initial loadings from the top singular vectors
301        let scale = 1.0 / (n_samples as f64 - 1.0).sqrt();
302        let mut loadings: Array2<f64> = Array2::zeros((n_features, self.n_factors));
303        for j in 0..n_features {
304            for f in 0..self.n_factors {
305                if f < s.len() && f < vt.shape()[0] {
306                    loadings[[j, f]] = vt[[f, j]] * s[f] * scale;
307                }
308            }
309        }
310
311        // Initial uniquenesses: diagonal of cov minus communalities
312        let mut uniquenesses: Array1<f64> = Array1::zeros(n_features);
313        for j in 0..n_features {
314            let mut communality = 0.0;
315            for f in 0..self.n_factors {
316                communality += loadings[[j, f]] * loadings[[j, f]];
317            }
318            // Variance of j-th feature
319            let mut var_j = 0.0;
320            for k in 0..n_samples {
321                var_j += x_centered[[k, j]] * x_centered[[k, j]];
322            }
323            var_j /= (n_samples - 1) as f64;
324
325            uniquenesses[j] = (var_j - communality).max(1e-6);
326        }
327
328        Ok((loadings, uniquenesses))
329    }
330
331    /// EM algorithm for factor analysis
332    ///
333    /// The generative model is: x ~ N(0, W*W^T + Psi)
334    /// where W is the loading matrix (n_features x n_factors)
335    /// and Psi = diag(psi_1, ..., psi_p) is the uniqueness matrix.
336    fn em_algorithm(
337        &self,
338        cov: &Array2<f64>,
339        mut loadings: Array2<f64>,
340        mut uniquenesses: Array1<f64>,
341        n_samples: usize,
342        n_features: usize,
343    ) -> Result<(Array2<f64>, Array1<f64>, usize, f64)> {
344        let n_factors = self.n_factors;
345        let mut prev_ll = f64::NEG_INFINITY;
346        let mut n_iter = 0;
347
348        for iter in 0..self.max_iter {
349            n_iter = iter + 1;
350
351            // E-step: Compute E[z|x] and E[z*z^T|x]
352            // sigma_z = (I + W^T * Psi^{-1} * W)^{-1}
353            // E[z|x] = sigma_z * W^T * Psi^{-1} * x
354
355            // Compute Psi^{-1} * W
356            let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
357            for j in 0..n_features {
358                let psi_inv = 1.0 / uniquenesses[j].max(1e-12);
359                for f in 0..n_factors {
360                    psi_inv_w[[j, f]] = psi_inv * loadings[[j, f]];
361                }
362            }
363
364            // Compute W^T * Psi^{-1} * W
365            let mut wt_psi_inv_w: Array2<f64> = Array2::zeros((n_factors, n_factors));
366            for i in 0..n_factors {
367                for j in 0..n_factors {
368                    for k in 0..n_features {
369                        wt_psi_inv_w[[i, j]] += loadings[[k, i]] * psi_inv_w[[k, j]];
370                    }
371                }
372            }
373
374            // sigma_z = (I + W^T * Psi^{-1} * W)^{-1}
375            for i in 0..n_factors {
376                wt_psi_inv_w[[i, i]] += 1.0;
377            }
378
379            // Invert sigma_z^{-1} using the formula for small matrices
380            let sigma_z = self.invert_small_matrix(&wt_psi_inv_w)?;
381
382            // E[z*z^T] = sigma_z + E[z]*E[z]^T
383            // For EM, we need: expected_zzt = sigma_z + sigma_z * W^T * Psi^{-1} * S * Psi^{-1} * W * sigma_z
384            // where S is the sample covariance
385            // This simplifies to computing the M-step directly
386
387            // M-step: Update W and Psi
388            // The sufficient statistics needed are:
389            // beta = sigma_z * W^T * Psi^{-1}  (n_factors x n_features)
390            let mut beta: Array2<f64> = Array2::zeros((n_factors, n_features));
391            for i in 0..n_factors {
392                for j in 0..n_features {
393                    for k in 0..n_factors {
394                        beta[[i, j]] += sigma_z[[i, k]] * psi_inv_w[[j, k]]; // note: psi_inv_w is (n_features, n_factors)
395                    }
396                }
397            }
398
399            // E[z*x^T] = beta * S  (n_factors x n_features)
400            let mut ez_xt: Array2<f64> = Array2::zeros((n_factors, n_features));
401            for i in 0..n_factors {
402                for j in 0..n_features {
403                    for k in 0..n_features {
404                        ez_xt[[i, j]] += beta[[i, k]] * cov[[k, j]];
405                    }
406                }
407            }
408
409            // E[z*z^T] = sigma_z + beta * S * beta^T  (n_factors x n_factors)
410            let mut ez_zt = sigma_z.clone();
411            for i in 0..n_factors {
412                for j in 0..n_factors {
413                    for k in 0..n_features {
414                        ez_zt[[i, j]] += beta[[i, k]] * ez_xt[[j, k]];
415                    }
416                }
417            }
418
419            // New W: S * beta^T * E[z*z^T]^{-1}
420            let ez_zt_inv = self.invert_small_matrix(&ez_zt)?;
421
422            // Compute S * beta^T
423            let mut s_beta_t: Array2<f64> = Array2::zeros((n_features, n_factors));
424            for i in 0..n_features {
425                for j in 0..n_factors {
426                    for k in 0..n_features {
427                        s_beta_t[[i, j]] += cov[[i, k]] * beta[[j, k]];
428                    }
429                }
430            }
431
432            // New loadings = S * beta^T * E[z*z^T]^{-1}
433            let mut new_loadings: Array2<f64> = Array2::zeros((n_features, n_factors));
434            for i in 0..n_features {
435                for j in 0..n_factors {
436                    for k in 0..n_factors {
437                        new_loadings[[i, j]] += s_beta_t[[i, k]] * ez_zt_inv[[k, j]];
438                    }
439                }
440            }
441
442            // New uniquenesses: diag(S - W_new * E[z*x^T])
443            let mut new_uniquenesses: Array1<f64> = Array1::zeros(n_features);
444            for j in 0..n_features {
445                let mut correction = 0.0;
446                for f in 0..n_factors {
447                    correction += new_loadings[[j, f]] * ez_xt[[f, j]];
448                }
449                new_uniquenesses[j] = (cov[[j, j]] - correction).max(1e-6);
450            }
451
452            loadings = new_loadings;
453            uniquenesses = new_uniquenesses;
454
455            // Compute log-likelihood
456            let ll =
457                self.compute_log_likelihood(cov, &loadings, &uniquenesses, n_samples, n_features);
458
459            // Check convergence
460            if (ll - prev_ll).abs() < self.tol {
461                break;
462            }
463            prev_ll = ll;
464        }
465
466        let ll = self.compute_log_likelihood(cov, &loadings, &uniquenesses, n_samples, n_features);
467
468        Ok((loadings, uniquenesses, n_iter, ll))
469    }
470
471    /// Compute log-likelihood of the factor model
472    fn compute_log_likelihood(
473        &self,
474        cov: &Array2<f64>,
475        loadings: &Array2<f64>,
476        uniquenesses: &Array1<f64>,
477        n_samples: usize,
478        n_features: usize,
479    ) -> f64 {
480        let n_factors = self.n_factors;
481
482        // Sigma = W * W^T + Psi
483        let mut sigma: Array2<f64> = Array2::zeros((n_features, n_features));
484        for i in 0..n_features {
485            for j in 0..n_features {
486                for f in 0..n_factors {
487                    sigma[[i, j]] += loadings[[i, f]] * loadings[[j, f]];
488                }
489                if i == j {
490                    sigma[[i, j]] += uniquenesses[i];
491                }
492            }
493        }
494
495        // Use Woodbury identity for efficient computation
496        // log|Sigma| = log|Psi| + log|I + W^T * Psi^{-1} * W|
497        let mut log_det_psi = 0.0;
498        for j in 0..n_features {
499            log_det_psi += uniquenesses[j].max(1e-12).ln();
500        }
501
502        // Compute W^T * Psi^{-1} * W + I
503        let mut m: Array2<f64> = Array2::zeros((n_factors, n_factors));
504        for i in 0..n_factors {
505            for j in 0..n_factors {
506                for k in 0..n_features {
507                    m[[i, j]] += loadings[[k, i]] * loadings[[k, j]] / uniquenesses[k].max(1e-12);
508                }
509                if i == j {
510                    m[[i, j]] += 1.0;
511                }
512            }
513        }
514
515        // log|M| using eigendecomposition
516        let log_det_m = match scirs2_linalg::eigh::<f64>(&m.view(), None) {
517            Ok((eigenvalues, _)) => eigenvalues.iter().map(|&e| e.max(1e-12).ln()).sum::<f64>(),
518            Err(_) => 0.0,
519        };
520
521        let log_det_sigma = log_det_psi + log_det_m;
522
523        // tr(Sigma^{-1} * S) using Woodbury
524        // Sigma^{-1} = Psi^{-1} - Psi^{-1} * W * M^{-1} * W^T * Psi^{-1}
525        let m_inv = match self.invert_small_matrix(&m) {
526            Ok(inv) => inv,
527            Err(_) => Array2::eye(n_factors),
528        };
529
530        // Compute Psi^{-1} * W
531        let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
532        for j in 0..n_features {
533            for f in 0..n_factors {
534                psi_inv_w[[j, f]] = loadings[[j, f]] / uniquenesses[j].max(1e-12);
535            }
536        }
537
538        // Compute Psi^{-1} * W * M^{-1}
539        let mut psi_inv_w_m_inv: Array2<f64> = Array2::zeros((n_features, n_factors));
540        for i in 0..n_features {
541            for j in 0..n_factors {
542                for k in 0..n_factors {
543                    psi_inv_w_m_inv[[i, j]] += psi_inv_w[[i, k]] * m_inv[[k, j]];
544                }
545            }
546        }
547
548        // tr(Sigma^{-1} * S) = tr(Psi^{-1} * S) - tr(Psi^{-1} * W * M^{-1} * W^T * Psi^{-1} * S)
549        let mut trace_psi_inv_s = 0.0;
550        for j in 0..n_features {
551            trace_psi_inv_s += cov[[j, j]] / uniquenesses[j].max(1e-12);
552        }
553
554        let mut trace_correction = 0.0;
555        for i in 0..n_features {
556            for j in 0..n_features {
557                let mut sigma_inv_ij = 0.0;
558                for f in 0..n_factors {
559                    sigma_inv_ij += psi_inv_w_m_inv[[i, f]] * psi_inv_w[[j, f]];
560                }
561                trace_correction += sigma_inv_ij * cov[[j, i]];
562            }
563        }
564
565        let trace_sigma_inv_s = trace_psi_inv_s - trace_correction;
566
567        // Log-likelihood = -n/2 * (p * log(2*pi) + log|Sigma| + tr(Sigma^{-1} * S))
568        let n = n_samples as f64;
569        let p = n_features as f64;
570        -n / 2.0 * (p * (2.0 * std::f64::consts::PI).ln() + log_det_sigma + trace_sigma_inv_s)
571    }
572
573    /// Invert a small matrix using Gauss-Jordan elimination
574    fn invert_small_matrix(&self, a: &Array2<f64>) -> Result<Array2<f64>> {
575        let n = a.shape()[0];
576        let mut augmented: Array2<f64> = Array2::zeros((n, 2 * n));
577
578        // Build augmented matrix [A | I]
579        for i in 0..n {
580            for j in 0..n {
581                augmented[[i, j]] = a[[i, j]];
582            }
583            augmented[[i, n + i]] = 1.0;
584        }
585
586        // Forward elimination with partial pivoting
587        for i in 0..n {
588            let mut max_row = i;
589            for k in i + 1..n {
590                if augmented[[k, i]].abs() > augmented[[max_row, i]].abs() {
591                    max_row = k;
592                }
593            }
594
595            if max_row != i {
596                for j in 0..2 * n {
597                    let temp = augmented[[i, j]];
598                    augmented[[i, j]] = augmented[[max_row, j]];
599                    augmented[[max_row, j]] = temp;
600                }
601            }
602
603            let pivot = augmented[[i, i]];
604            if pivot.abs() < 1e-14 {
605                // Add regularization
606                augmented[[i, i]] += 1e-8;
607                let pivot = augmented[[i, i]];
608                for j in 0..2 * n {
609                    augmented[[i, j]] /= pivot;
610                }
611            } else {
612                for j in 0..2 * n {
613                    augmented[[i, j]] /= pivot;
614                }
615            }
616
617            for k in 0..n {
618                if k != i {
619                    let factor = augmented[[k, i]];
620                    for j in 0..2 * n {
621                        augmented[[k, j]] -= factor * augmented[[i, j]];
622                    }
623                }
624            }
625        }
626
627        // Extract inverse
628        let mut inv: Array2<f64> = Array2::zeros((n, n));
629        for i in 0..n {
630            for j in 0..n {
631                inv[[i, j]] = augmented[[i, n + j]];
632            }
633        }
634
635        Ok(inv)
636    }
637
638    /// Compute factor scores using the regression method (Thomson's method)
639    ///
640    /// scores = X * Psi^{-1} * W * (W^T * Psi^{-1} * W + I)^{-1}
641    fn compute_scores(
642        &self,
643        x_centered: &Array2<f64>,
644        loadings: &Array2<f64>,
645        uniquenesses: &Array1<f64>,
646    ) -> Result<Array2<f64>> {
647        let n_samples = x_centered.shape()[0];
648        let n_features = x_centered.shape()[1];
649        let n_factors = self.n_factors;
650
651        // Compute Psi^{-1} * W
652        let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
653        for j in 0..n_features {
654            let psi_inv = 1.0 / uniquenesses[j].max(1e-12);
655            for f in 0..n_factors {
656                psi_inv_w[[j, f]] = psi_inv * loadings[[j, f]];
657            }
658        }
659
660        // Compute W^T * Psi^{-1} * W + I
661        let mut m: Array2<f64> = Array2::zeros((n_factors, n_factors));
662        for i in 0..n_factors {
663            for j in 0..n_factors {
664                for k in 0..n_features {
665                    m[[i, j]] += loadings[[k, i]] * psi_inv_w[[k, j]];
666                }
667                if i == j {
668                    m[[i, j]] += 1.0;
669                }
670            }
671        }
672
673        // Invert M
674        let m_inv = self.invert_small_matrix(&m)?;
675
676        // Compute scoring matrix: Psi^{-1} * W * M^{-1}
677        let mut scoring: Array2<f64> = Array2::zeros((n_features, n_factors));
678        for i in 0..n_features {
679            for j in 0..n_factors {
680                for k in 0..n_factors {
681                    scoring[[i, j]] += psi_inv_w[[i, k]] * m_inv[[k, j]];
682                }
683            }
684        }
685
686        // Compute scores: X * scoring
687        let mut scores: Array2<f64> = Array2::zeros((n_samples, n_factors));
688        for i in 0..n_samples {
689            for j in 0..n_factors {
690                for k in 0..n_features {
691                    scores[[i, j]] += x_centered[[i, k]] * scoring[[k, j]];
692                }
693            }
694        }
695
696        Ok(scores)
697    }
698
699    /// Varimax rotation
700    ///
701    /// Maximizes the sum of variances of the squared loadings.
702    /// This makes each factor either have a high or low loading on each variable,
703    /// making interpretation easier.
704    fn varimax_rotation(&self, loadings: &Array2<f64>) -> Result<Array2<f64>> {
705        let (n_features, n_factors) = loadings.dim();
706
707        if n_factors < 2 {
708            return Ok(loadings.clone());
709        }
710
711        let mut rotated = loadings.clone();
712
713        // Normalize by communalities
714        let mut h: Array1<f64> = Array1::zeros(n_features);
715        for j in 0..n_features {
716            for f in 0..n_factors {
717                h[j] += rotated[[j, f]] * rotated[[j, f]];
718            }
719            h[j] = h[j].sqrt().max(1e-10);
720        }
721
722        let mut normalized = rotated.clone();
723        for j in 0..n_features {
724            for f in 0..n_factors {
725                normalized[[j, f]] /= h[j];
726            }
727        }
728
729        // Iterate rotations between pairs of factors
730        for _iter in 0..self.varimax_max_iter {
731            let mut changed = false;
732
733            for p in 0..n_factors {
734                for q in (p + 1)..n_factors {
735                    // Compute the optimal rotation angle
736                    let n = n_features as f64;
737
738                    let mut sum_a: f64 = 0.0;
739                    let mut sum_b: f64 = 0.0;
740                    let mut sum_c: f64 = 0.0;
741                    let mut sum_d: f64 = 0.0;
742
743                    for j in 0..n_features {
744                        let xj = normalized[[j, p]];
745                        let yj = normalized[[j, q]];
746
747                        let a_j = xj * xj - yj * yj;
748                        let b_j = 2.0 * xj * yj;
749
750                        sum_a += a_j;
751                        sum_b += b_j;
752                        sum_c += a_j * a_j - b_j * b_j;
753                        sum_d += 2.0 * a_j * b_j;
754                    }
755
756                    let u = sum_d - 2.0 * sum_a * sum_b / n;
757                    let v = sum_c - (sum_a * sum_a - sum_b * sum_b) / n;
758
759                    // Rotation angle
760                    let angle = 0.25 * u.atan2(v);
761
762                    if angle.abs() < 1e-10 {
763                        continue;
764                    }
765
766                    changed = true;
767
768                    let cos_a = angle.cos();
769                    let sin_a = angle.sin();
770
771                    // Apply rotation
772                    for j in 0..n_features {
773                        let xj = normalized[[j, p]];
774                        let yj = normalized[[j, q]];
775                        normalized[[j, p]] = cos_a * xj + sin_a * yj;
776                        normalized[[j, q]] = -sin_a * xj + cos_a * yj;
777                    }
778                }
779            }
780
781            if !changed {
782                break;
783            }
784        }
785
786        // De-normalize
787        for j in 0..n_features {
788            for f in 0..n_factors {
789                rotated[[j, f]] = normalized[[j, f]] * h[j];
790            }
791        }
792
793        Ok(rotated)
794    }
795
796    /// Promax rotation (oblique)
797    ///
798    /// Starts with varimax rotation, then raises the loadings to a power
799    /// to create a target matrix, and rotates towards that target.
800    fn promax_rotation(&self, loadings: &Array2<f64>) -> Result<Array2<f64>> {
801        let (n_features, n_factors) = loadings.dim();
802
803        if n_factors < 2 {
804            return Ok(loadings.clone());
805        }
806
807        // Step 1: Get varimax rotation
808        let varimax_loadings = self.varimax_rotation(loadings)?;
809
810        // Step 2: Create target matrix by raising to a power while preserving signs
811        let mut target: Array2<f64> = Array2::zeros((n_features, n_factors));
812        let power = self.promax_power as f64;
813
814        for j in 0..n_features {
815            for f in 0..n_factors {
816                let val = varimax_loadings[[j, f]];
817                let sign = if val >= 0.0 { 1.0 } else { -1.0 };
818                target[[j, f]] = sign * val.abs().powf(power);
819            }
820        }
821
822        // Step 3: Rotate towards target using Procrustes rotation
823        // Minimize ||A * T - target||^2 for rotation matrix T
824        // T = (A^T * A)^{-1} * A^T * target
825
826        // Compute A^T * A where A = varimax_loadings
827        let mut at_a: Array2<f64> = Array2::zeros((n_factors, n_factors));
828        for i in 0..n_factors {
829            for j in 0..n_factors {
830                for k in 0..n_features {
831                    at_a[[i, j]] += varimax_loadings[[k, i]] * varimax_loadings[[k, j]];
832                }
833            }
834        }
835
836        // Compute (A^T * A)^{-1}
837        let at_a_inv = self.invert_small_matrix(&at_a)?;
838
839        // Compute A^T * target
840        let mut at_target: Array2<f64> = Array2::zeros((n_factors, n_factors));
841        for i in 0..n_factors {
842            for j in 0..n_factors {
843                for k in 0..n_features {
844                    at_target[[i, j]] += varimax_loadings[[k, i]] * target[[k, j]];
845                }
846            }
847        }
848
849        // T = (A^T * A)^{-1} * A^T * target
850        let mut t_mat: Array2<f64> = Array2::zeros((n_factors, n_factors));
851        for i in 0..n_factors {
852            for j in 0..n_factors {
853                for k in 0..n_factors {
854                    t_mat[[i, j]] += at_a_inv[[i, k]] * at_target[[k, j]];
855                }
856            }
857        }
858
859        // Result = A * T
860        let mut result: Array2<f64> = Array2::zeros((n_features, n_factors));
861        for i in 0..n_features {
862            for j in 0..n_factors {
863                for k in 0..n_factors {
864                    result[[i, j]] += varimax_loadings[[i, k]] * t_mat[[k, j]];
865                }
866            }
867        }
868
869        Ok(result)
870    }
871
872    /// Returns the factor loadings
873    pub fn loadings(&self) -> Option<&Array2<f64>> {
874        self.loadings.as_ref()
875    }
876
877    /// Returns the uniquenesses
878    pub fn uniquenesses(&self) -> Option<&Array1<f64>> {
879        self.uniquenesses.as_ref()
880    }
881
882    /// Compute scree plot data from a data matrix
883    ///
884    /// Computes eigenvalues of the covariance (or correlation) matrix,
885    /// which can be used for scree plots to determine the number of factors.
886    ///
887    /// # Arguments
888    /// * `x` - Input data, shape (n_samples, n_features)
889    /// * `use_correlation` - If true, use the correlation matrix instead of covariance
890    ///
891    /// # Returns
892    /// * `Result<ScreePlotData>` - Eigenvalues and variance proportions
893    pub fn scree_plot_data<S>(x: &ArrayBase<S, Ix2>, use_correlation: bool) -> Result<ScreePlotData>
894    where
895        S: Data,
896        S::Elem: Float + NumCast,
897    {
898        let x_f64 = x.mapv(|v| <f64 as NumCast>::from(v).unwrap_or_default());
899        let (n_samples, n_features) = x_f64.dim();
900
901        if n_samples < 2 {
902            return Err(TransformError::InvalidInput(
903                "Need at least 2 samples for scree plot".to_string(),
904            ));
905        }
906
907        // Center data
908        let mean = x_f64.mean_axis(Axis(0)).ok_or_else(|| {
909            TransformError::ComputationError("Failed to compute mean".to_string())
910        })?;
911
912        let mut x_centered = x_f64.clone();
913        for i in 0..n_samples {
914            for j in 0..n_features {
915                x_centered[[i, j]] -= mean[j];
916            }
917        }
918
919        // Compute covariance matrix
920        let mut cov: Array2<f64> = Array2::zeros((n_features, n_features));
921        for i in 0..n_features {
922            for j in 0..n_features {
923                let mut sum = 0.0;
924                for k in 0..n_samples {
925                    sum += x_centered[[k, i]] * x_centered[[k, j]];
926                }
927                cov[[i, j]] = sum / (n_samples - 1) as f64;
928            }
929        }
930
931        // If using correlation, standardize by dividing by standard deviations
932        if use_correlation {
933            let mut stds: Array1<f64> = Array1::zeros(n_features);
934            for j in 0..n_features {
935                stds[j] = cov[[j, j]].sqrt().max(1e-12);
936            }
937            for i in 0..n_features {
938                for j in 0..n_features {
939                    cov[[i, j]] /= stds[i] * stds[j];
940                }
941            }
942        }
943
944        // Compute eigenvalues using symmetric eigendecomposition
945        let (eigenvalues_raw, _eigenvectors) =
946            scirs2_linalg::eigh::<f64>(&cov.view(), None).map_err(TransformError::LinalgError)?;
947
948        // Sort eigenvalues in descending order
949        let n = eigenvalues_raw.len();
950        let mut eig_vec: Vec<f64> = eigenvalues_raw.iter().map(|&e| e.max(0.0)).collect();
951        eig_vec.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
952        let mut eigenvalues: Array1<f64> = Array1::zeros(n);
953        for i in 0..n {
954            eigenvalues[i] = eig_vec[i];
955        }
956
957        // Compute variance proportions
958        let total: f64 = eigenvalues.sum();
959        let mut variance_proportions: Array1<f64> = Array1::zeros(n);
960        let mut cumulative_variance: Array1<f64> = Array1::zeros(n);
961
962        if total > 0.0 {
963            let mut cumsum = 0.0;
964            for i in 0..n {
965                variance_proportions[i] = eigenvalues[i] / total;
966                cumsum += variance_proportions[i];
967                cumulative_variance[i] = cumsum;
968            }
969        }
970
971        // Kaiser threshold: 1.0 for correlation matrix, average eigenvalue for covariance
972        let kaiser_threshold = if use_correlation {
973            1.0
974        } else if n > 0 {
975            total / n as f64
976        } else {
977            0.0
978        };
979
980        Ok(ScreePlotData {
981            eigenvalues,
982            cumulative_variance,
983            variance_proportions,
984            kaiser_threshold,
985        })
986    }
987}
988
989/// Convenience function for factor analysis
990///
991/// # Arguments
992/// * `data` - Input data, shape (n_samples, n_features)
993/// * `n_factors` - Number of factors to extract
994///
995/// # Returns
996/// * `Result<FactorAnalysisResult>` - Factor analysis results
997pub fn factor_analysis<S>(
998    data: &ArrayBase<S, Ix2>,
999    n_factors: usize,
1000) -> Result<FactorAnalysisResult>
1001where
1002    S: Data,
1003    S::Elem: Float + NumCast,
1004{
1005    let mut fa = FactorAnalysis::new(n_factors);
1006    fa.fit_transform(data)
1007}
1008
1009/// Convenience function to compute scree plot data for factor selection
1010///
1011/// Returns eigenvalues and variance proportions that can be used to
1012/// determine the appropriate number of factors via the scree plot method
1013/// (look for the "elbow") or Kaiser criterion (eigenvalues > threshold).
1014///
1015/// # Arguments
1016/// * `data` - Input data, shape (n_samples, n_features)
1017/// * `use_correlation` - If true, use correlation matrix; if false, use covariance
1018///
1019/// # Returns
1020/// * `Result<ScreePlotData>` - Eigenvalues and variance information
1021pub fn scree_plot_data<S>(data: &ArrayBase<S, Ix2>, use_correlation: bool) -> Result<ScreePlotData>
1022where
1023    S: Data,
1024    S::Elem: Float + NumCast,
1025{
1026    FactorAnalysis::scree_plot_data(data, use_correlation)
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031    use super::*;
1032    use scirs2_core::ndarray::Array;
1033
1034    /// Generate test data with known factor structure
1035    fn generate_factor_data(n_samples: usize, n_features: usize, n_factors: usize) -> Array2<f64> {
1036        let mut rng = scirs2_core::random::rng();
1037        let mut data: Array2<f64> = Array2::zeros((n_samples, n_features));
1038
1039        // Create factor loadings
1040        let mut true_loadings: Array2<f64> = Array2::zeros((n_features, n_factors));
1041        for j in 0..n_features {
1042            let factor_idx = j % n_factors;
1043            true_loadings[[j, factor_idx]] = 0.8;
1044            // Add cross-loadings
1045            for f in 0..n_factors {
1046                if f != factor_idx {
1047                    true_loadings[[j, f]] = 0.1;
1048                }
1049            }
1050        }
1051
1052        // Generate data: x = W * z + noise
1053        for i in 0..n_samples {
1054            // Random factors
1055            let mut factors: Array1<f64> = Array1::zeros(n_factors);
1056            for f in 0..n_factors {
1057                factors[f] = scirs2_core::random::Rng::random_range(&mut rng, -2.0..2.0);
1058            }
1059
1060            for j in 0..n_features {
1061                let mut val = 0.0;
1062                for f in 0..n_factors {
1063                    val += true_loadings[[j, f]] * factors[f];
1064                }
1065                // Add noise
1066                val += scirs2_core::random::Rng::random_range(&mut rng, -0.3..0.3);
1067                data[[i, j]] = val;
1068            }
1069        }
1070
1071        data
1072    }
1073
1074    #[test]
1075    fn test_factor_analysis_basic() {
1076        let data = generate_factor_data(100, 6, 2);
1077
1078        let mut fa = FactorAnalysis::new(2);
1079        let result = fa.fit_transform(&data).expect("FA fit_transform failed");
1080
1081        assert_eq!(result.loadings.shape(), &[6, 2]);
1082        assert_eq!(result.scores.shape(), &[100, 2]);
1083        assert_eq!(result.uniquenesses.len(), 6);
1084        assert_eq!(result.communalities.len(), 6);
1085
1086        // All values should be finite
1087        for val in result.loadings.iter() {
1088            assert!(val.is_finite());
1089        }
1090        for val in result.scores.iter() {
1091            assert!(val.is_finite());
1092        }
1093    }
1094
1095    #[test]
1096    fn test_factor_analysis_uniquenesses() {
1097        let data = generate_factor_data(100, 6, 2);
1098
1099        let mut fa = FactorAnalysis::new(2);
1100        let result = fa.fit_transform(&data).expect("FA fit_transform failed");
1101
1102        // Uniquenesses should be positive
1103        for &u in result.uniquenesses.iter() {
1104            assert!(u > 0.0, "Uniqueness should be positive, got {}", u);
1105        }
1106
1107        // Communalities should be between 0 and data variance
1108        for &c in result.communalities.iter() {
1109            assert!(c >= 0.0, "Communality should be non-negative, got {}", c);
1110        }
1111    }
1112
1113    #[test]
1114    fn test_factor_analysis_varimax() {
1115        let data = generate_factor_data(100, 6, 2);
1116
1117        let mut fa = FactorAnalysis::new(2).with_rotation(RotationMethod::Varimax);
1118        let result = fa
1119            .fit_transform(&data)
1120            .expect("FA varimax fit_transform failed");
1121
1122        assert_eq!(result.loadings.shape(), &[6, 2]);
1123        for val in result.loadings.iter() {
1124            assert!(val.is_finite());
1125        }
1126    }
1127
1128    #[test]
1129    fn test_factor_analysis_promax() {
1130        let data = generate_factor_data(100, 6, 2);
1131
1132        let mut fa = FactorAnalysis::new(2).with_rotation(RotationMethod::Promax);
1133        let result = fa
1134            .fit_transform(&data)
1135            .expect("FA promax fit_transform failed");
1136
1137        assert_eq!(result.loadings.shape(), &[6, 2]);
1138        for val in result.loadings.iter() {
1139            assert!(val.is_finite());
1140        }
1141    }
1142
1143    #[test]
1144    fn test_factor_analysis_convergence() {
1145        let data = generate_factor_data(50, 4, 2);
1146
1147        let mut fa = FactorAnalysis::new(2).with_max_iter(500).with_tol(1e-6);
1148        let result = fa.fit_transform(&data).expect("FA fit_transform failed");
1149
1150        // Should converge in reasonable number of iterations
1151        assert!(
1152            result.n_iter <= 500,
1153            "Should converge within max_iter, got {}",
1154            result.n_iter
1155        );
1156
1157        // Log-likelihood should be finite
1158        assert!(
1159            result.log_likelihood.is_finite(),
1160            "Log-likelihood should be finite, got {}",
1161            result.log_likelihood
1162        );
1163    }
1164
1165    #[test]
1166    fn test_factor_analysis_single_factor() {
1167        let data = generate_factor_data(50, 4, 1);
1168
1169        let mut fa = FactorAnalysis::new(1);
1170        let result = fa.fit_transform(&data).expect("FA fit_transform failed");
1171
1172        assert_eq!(result.loadings.shape(), &[4, 1]);
1173        assert_eq!(result.scores.shape(), &[50, 1]);
1174    }
1175
1176    #[test]
1177    fn test_factor_analysis_convenience_fn() {
1178        let data = generate_factor_data(50, 6, 2);
1179
1180        let result = factor_analysis(&data, 2).expect("factor_analysis failed");
1181        assert_eq!(result.loadings.shape(), &[6, 2]);
1182    }
1183
1184    #[test]
1185    fn test_factor_analysis_invalid_params() {
1186        let data = Array2::<f64>::zeros((10, 3));
1187
1188        // Too many factors
1189        let mut fa = FactorAnalysis::new(5);
1190        assert!(fa.fit_transform(&data).is_err());
1191    }
1192
1193    #[test]
1194    fn test_scree_plot_data_covariance() {
1195        let data = generate_factor_data(100, 6, 2);
1196
1197        let scree = FactorAnalysis::scree_plot_data(&data, false).expect("scree_plot_data failed");
1198
1199        // Should have eigenvalues for all features
1200        assert_eq!(scree.eigenvalues.len(), 6);
1201        assert_eq!(scree.variance_proportions.len(), 6);
1202        assert_eq!(scree.cumulative_variance.len(), 6);
1203
1204        // Eigenvalues should be in approximately descending order and non-negative
1205        for i in 1..scree.eigenvalues.len() {
1206            assert!(
1207                scree.eigenvalues[i] <= scree.eigenvalues[i - 1] + 1e-6,
1208                "Eigenvalues should be in descending order: eigenvalue[{}]={} > eigenvalue[{}]={}",
1209                i,
1210                scree.eigenvalues[i],
1211                i - 1,
1212                scree.eigenvalues[i - 1]
1213            );
1214        }
1215        for &e in scree.eigenvalues.iter() {
1216            assert!(e >= 0.0, "Eigenvalue should be non-negative, got {}", e);
1217        }
1218
1219        // Variance proportions should sum to ~1
1220        let total: f64 = scree.variance_proportions.sum();
1221        assert!(
1222            (total - 1.0).abs() < 1e-6,
1223            "Variance proportions should sum to 1, got {}",
1224            total
1225        );
1226
1227        // Cumulative variance should end at ~1
1228        let last_idx = scree.cumulative_variance.len() - 1;
1229        assert!(
1230            (scree.cumulative_variance[last_idx] - 1.0).abs() < 1e-6,
1231            "Cumulative variance should end at 1"
1232        );
1233    }
1234
1235    #[test]
1236    fn test_scree_plot_data_correlation() {
1237        let data = generate_factor_data(200, 6, 2);
1238
1239        let scree = FactorAnalysis::scree_plot_data(&data, true)
1240            .expect("scree_plot_data correlation failed");
1241
1242        // Kaiser threshold for correlation matrix should be 1.0
1243        assert!(
1244            (scree.kaiser_threshold - 1.0).abs() < 1e-10,
1245            "Kaiser threshold for correlation matrix should be 1.0, got {}",
1246            scree.kaiser_threshold
1247        );
1248
1249        // All eigenvalues should be finite and non-negative
1250        for &e in scree.eigenvalues.iter() {
1251            assert!(
1252                e >= 0.0 && e.is_finite(),
1253                "Eigenvalue should be finite non-negative, got {}",
1254                e
1255            );
1256        }
1257
1258        // First eigenvalue should be largest (descending order guaranteed by sort)
1259        assert!(
1260            scree.eigenvalues[0] >= scree.eigenvalues[scree.eigenvalues.len() - 1],
1261            "Eigenvalues should be in descending order"
1262        );
1263
1264        // Variance proportions should sum to ~1
1265        let total: f64 = scree.variance_proportions.sum();
1266        assert!(
1267            (total - 1.0).abs() < 1e-6,
1268            "Variance proportions should sum to 1, got {}",
1269            total
1270        );
1271    }
1272
1273    #[test]
1274    fn test_scree_plot_convenience_fn() {
1275        let data = generate_factor_data(50, 4, 2);
1276
1277        let scree = scree_plot_data(&data, false).expect("scree_plot_data convenience fn failed");
1278        assert_eq!(scree.eigenvalues.len(), 4);
1279        assert!(scree.eigenvalues.iter().all(|e| e.is_finite()));
1280    }
1281
1282    #[test]
1283    fn test_factor_analysis_communalities_sum_constraint() {
1284        // Communality + uniqueness should approximate the feature variance
1285        let data = generate_factor_data(200, 4, 2);
1286
1287        let mut fa = FactorAnalysis::new(2);
1288        let result = fa.fit_transform(&data).expect("FA failed");
1289
1290        // Compute feature variances
1291        let n_samples = data.shape()[0];
1292        let n_features = data.shape()[1];
1293        let mean = data.mean_axis(Axis(0)).expect("mean computation failed");
1294        let mut variances: Array1<f64> = Array1::zeros(n_features);
1295        for j in 0..n_features {
1296            let mut var = 0.0;
1297            for i in 0..n_samples {
1298                let diff = data[[i, j]] - mean[j];
1299                var += diff * diff;
1300            }
1301            variances[j] = var / (n_samples - 1) as f64;
1302        }
1303
1304        // communality + uniqueness should be approximately equal to the feature variance
1305        for j in 0..n_features {
1306            let total = result.communalities[j] + result.uniquenesses[j];
1307            let variance = variances[j];
1308            // Allow tolerance since EM may not exactly decompose variance
1309            let rel_err = (total - variance).abs() / variance.max(1e-12);
1310            assert!(
1311                rel_err < 0.5,
1312                "Feature {}: communality ({}) + uniqueness ({}) = {} should approximate variance ({})",
1313                j, result.communalities[j], result.uniquenesses[j], total, variance
1314            );
1315        }
1316    }
1317
1318    #[test]
1319    fn test_factor_analysis_too_few_samples() {
1320        let data = Array2::<f64>::zeros((1, 3));
1321        let mut fa = FactorAnalysis::new(1);
1322        let result = fa.fit_transform(&data);
1323        assert!(result.is_err());
1324    }
1325
1326    #[test]
1327    fn test_factor_analysis_builder_methods() {
1328        let fa = FactorAnalysis::new(3)
1329            .with_rotation(RotationMethod::Varimax)
1330            .with_max_iter(200)
1331            .with_tol(1e-5)
1332            .with_promax_power(6)
1333            .with_center(false);
1334
1335        // Just verify builder doesn't panic and struct is created
1336        assert!(fa.loadings().is_none()); // Not yet fitted
1337        assert!(fa.uniquenesses().is_none());
1338    }
1339
1340    #[test]
1341    fn test_scree_plot_too_few_samples() {
1342        let data = Array2::<f64>::zeros((1, 3));
1343        let result = scree_plot_data(&data, false);
1344        assert!(result.is_err());
1345    }
1346
1347    #[test]
1348    fn test_factor_analysis_noise_variance() {
1349        let data = generate_factor_data(100, 6, 2);
1350
1351        let mut fa = FactorAnalysis::new(2);
1352        let result = fa.fit_transform(&data).expect("FA failed");
1353
1354        // Noise variance should be positive and finite
1355        assert!(result.noise_variance > 0.0);
1356        assert!(result.noise_variance.is_finite());
1357
1358        // Noise variance should be the average of uniquenesses
1359        let expected = result.uniquenesses.sum() / result.uniquenesses.len() as f64;
1360        assert!(
1361            (result.noise_variance - expected).abs() < 1e-10,
1362            "Noise variance should be average of uniquenesses"
1363        );
1364    }
1365}