use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::validation::{check_positive, checkshape};
use scirs2_linalg::svd;
use crate::error::{Result, TransformError};
#[derive(Debug, Clone, PartialEq)]
pub enum RotationMethod {
None,
Varimax,
Promax,
}
#[derive(Debug, Clone)]
pub struct ScreePlotData {
pub eigenvalues: Array1<f64>,
pub cumulative_variance: Array1<f64>,
pub variance_proportions: Array1<f64>,
pub kaiser_threshold: f64,
}
#[derive(Debug, Clone)]
pub struct FactorAnalysisResult {
pub loadings: Array2<f64>,
pub scores: Array2<f64>,
pub uniquenesses: Array1<f64>,
pub communalities: Array1<f64>,
pub noise_variance: f64,
pub n_iter: usize,
pub log_likelihood: f64,
}
#[derive(Debug, Clone)]
pub struct FactorAnalysis {
n_factors: usize,
rotation: RotationMethod,
max_iter: usize,
tol: f64,
promax_power: usize,
varimax_max_iter: usize,
center: bool,
mean: Option<Array1<f64>>,
loadings: Option<Array2<f64>>,
uniquenesses: Option<Array1<f64>>,
training_data: Option<Array2<f64>>,
}
impl FactorAnalysis {
pub fn new(n_factors: usize) -> Self {
FactorAnalysis {
n_factors,
rotation: RotationMethod::None,
max_iter: 1000,
tol: 1e-8,
promax_power: 4,
varimax_max_iter: 100,
center: true,
mean: None,
loadings: None,
uniquenesses: None,
training_data: None,
}
}
pub fn with_rotation(mut self, rotation: RotationMethod) -> Self {
self.rotation = rotation;
self
}
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
pub fn with_tol(mut self, tol: f64) -> Self {
self.tol = tol;
self
}
pub fn with_promax_power(mut self, power: usize) -> Self {
self.promax_power = power.max(2);
self
}
pub fn with_center(mut self, center: bool) -> Self {
self.center = center;
self
}
pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<FactorAnalysisResult>
where
S: Data,
S::Elem: Float + NumCast,
{
let x_f64 = x.mapv(|v| <f64 as NumCast>::from(v).unwrap_or_default());
let (n_samples, n_features) = x_f64.dim();
check_positive(self.n_factors, "n_factors")?;
checkshape(&x_f64, &[n_samples, n_features], "x")?;
if n_samples < 2 {
return Err(TransformError::InvalidInput(
"Need at least 2 samples for factor analysis".to_string(),
));
}
if self.n_factors > n_features {
return Err(TransformError::InvalidInput(format!(
"n_factors={} must be <= n_features={}",
self.n_factors, n_features
)));
}
let mean = x_f64.mean_axis(Axis(0)).ok_or_else(|| {
TransformError::ComputationError("Failed to compute mean".to_string())
})?;
let x_centered = if self.center {
let mut centered = x_f64.clone();
for i in 0..n_samples {
for j in 0..n_features {
centered[[i, j]] -= mean[j];
}
}
centered
} else {
x_f64.clone()
};
let mut cov: Array2<f64> = Array2::zeros((n_features, n_features));
for i in 0..n_features {
for j in 0..n_features {
let mut sum = 0.0;
for k in 0..n_samples {
sum += x_centered[[k, i]] * x_centered[[k, j]];
}
cov[[i, j]] = sum / (n_samples - 1) as f64;
}
}
let (loadings_init, uniquenesses_init) =
self.initialize_loadings(&x_centered, &cov, n_samples)?;
let (loadings, uniquenesses, n_iter, log_likelihood) = self.em_algorithm(
&cov,
loadings_init,
uniquenesses_init,
n_samples,
n_features,
)?;
let loadings_rotated = match &self.rotation {
RotationMethod::None => loadings,
RotationMethod::Varimax => self.varimax_rotation(&loadings)?,
RotationMethod::Promax => self.promax_rotation(&loadings)?,
};
let scores = self.compute_scores(&x_centered, &loadings_rotated, &uniquenesses)?;
let mut communalities: Array1<f64> = Array1::zeros(n_features);
for j in 0..n_features {
for f in 0..self.n_factors {
communalities[j] += loadings_rotated[[j, f]] * loadings_rotated[[j, f]];
}
}
let noise_variance = if uniquenesses.is_empty() {
0.0
} else {
uniquenesses.sum() / uniquenesses.len() as f64
};
self.mean = Some(mean);
self.loadings = Some(loadings_rotated.clone());
self.uniquenesses = Some(uniquenesses.clone());
self.training_data = Some(x_centered);
Ok(FactorAnalysisResult {
loadings: loadings_rotated,
scores,
uniquenesses,
communalities,
noise_variance,
n_iter,
log_likelihood,
})
}
fn initialize_loadings(
&self,
x_centered: &Array2<f64>,
_cov: &Array2<f64>,
n_samples: usize,
) -> Result<(Array2<f64>, Array1<f64>)> {
let n_features = x_centered.shape()[1];
let (_u, s, vt) = svd::<f64>(&x_centered.view(), true, None)
.map_err(|e| TransformError::LinalgError(e))?;
let scale = 1.0 / (n_samples as f64 - 1.0).sqrt();
let mut loadings: Array2<f64> = Array2::zeros((n_features, self.n_factors));
for j in 0..n_features {
for f in 0..self.n_factors {
if f < s.len() && f < vt.shape()[0] {
loadings[[j, f]] = vt[[f, j]] * s[f] * scale;
}
}
}
let mut uniquenesses: Array1<f64> = Array1::zeros(n_features);
for j in 0..n_features {
let mut communality = 0.0;
for f in 0..self.n_factors {
communality += loadings[[j, f]] * loadings[[j, f]];
}
let mut var_j = 0.0;
for k in 0..n_samples {
var_j += x_centered[[k, j]] * x_centered[[k, j]];
}
var_j /= (n_samples - 1) as f64;
uniquenesses[j] = (var_j - communality).max(1e-6);
}
Ok((loadings, uniquenesses))
}
fn em_algorithm(
&self,
cov: &Array2<f64>,
mut loadings: Array2<f64>,
mut uniquenesses: Array1<f64>,
n_samples: usize,
n_features: usize,
) -> Result<(Array2<f64>, Array1<f64>, usize, f64)> {
let n_factors = self.n_factors;
let mut prev_ll = f64::NEG_INFINITY;
let mut n_iter = 0;
for iter in 0..self.max_iter {
n_iter = iter + 1;
let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
for j in 0..n_features {
let psi_inv = 1.0 / uniquenesses[j].max(1e-12);
for f in 0..n_factors {
psi_inv_w[[j, f]] = psi_inv * loadings[[j, f]];
}
}
let mut wt_psi_inv_w: Array2<f64> = Array2::zeros((n_factors, n_factors));
for i in 0..n_factors {
for j in 0..n_factors {
for k in 0..n_features {
wt_psi_inv_w[[i, j]] += loadings[[k, i]] * psi_inv_w[[k, j]];
}
}
}
for i in 0..n_factors {
wt_psi_inv_w[[i, i]] += 1.0;
}
let sigma_z = self.invert_small_matrix(&wt_psi_inv_w)?;
let mut beta: Array2<f64> = Array2::zeros((n_factors, n_features));
for i in 0..n_factors {
for j in 0..n_features {
for k in 0..n_factors {
beta[[i, j]] += sigma_z[[i, k]] * psi_inv_w[[j, k]]; }
}
}
let mut ez_xt: Array2<f64> = Array2::zeros((n_factors, n_features));
for i in 0..n_factors {
for j in 0..n_features {
for k in 0..n_features {
ez_xt[[i, j]] += beta[[i, k]] * cov[[k, j]];
}
}
}
let mut ez_zt = sigma_z.clone();
for i in 0..n_factors {
for j in 0..n_factors {
for k in 0..n_features {
ez_zt[[i, j]] += beta[[i, k]] * ez_xt[[j, k]];
}
}
}
let ez_zt_inv = self.invert_small_matrix(&ez_zt)?;
let mut s_beta_t: Array2<f64> = Array2::zeros((n_features, n_factors));
for i in 0..n_features {
for j in 0..n_factors {
for k in 0..n_features {
s_beta_t[[i, j]] += cov[[i, k]] * beta[[j, k]];
}
}
}
let mut new_loadings: Array2<f64> = Array2::zeros((n_features, n_factors));
for i in 0..n_features {
for j in 0..n_factors {
for k in 0..n_factors {
new_loadings[[i, j]] += s_beta_t[[i, k]] * ez_zt_inv[[k, j]];
}
}
}
let mut new_uniquenesses: Array1<f64> = Array1::zeros(n_features);
for j in 0..n_features {
let mut correction = 0.0;
for f in 0..n_factors {
correction += new_loadings[[j, f]] * ez_xt[[f, j]];
}
new_uniquenesses[j] = (cov[[j, j]] - correction).max(1e-6);
}
loadings = new_loadings;
uniquenesses = new_uniquenesses;
let ll =
self.compute_log_likelihood(cov, &loadings, &uniquenesses, n_samples, n_features);
if (ll - prev_ll).abs() < self.tol {
break;
}
prev_ll = ll;
}
let ll = self.compute_log_likelihood(cov, &loadings, &uniquenesses, n_samples, n_features);
Ok((loadings, uniquenesses, n_iter, ll))
}
fn compute_log_likelihood(
&self,
cov: &Array2<f64>,
loadings: &Array2<f64>,
uniquenesses: &Array1<f64>,
n_samples: usize,
n_features: usize,
) -> f64 {
let n_factors = self.n_factors;
let mut sigma: Array2<f64> = Array2::zeros((n_features, n_features));
for i in 0..n_features {
for j in 0..n_features {
for f in 0..n_factors {
sigma[[i, j]] += loadings[[i, f]] * loadings[[j, f]];
}
if i == j {
sigma[[i, j]] += uniquenesses[i];
}
}
}
let mut log_det_psi = 0.0;
for j in 0..n_features {
log_det_psi += uniquenesses[j].max(1e-12).ln();
}
let mut m: Array2<f64> = Array2::zeros((n_factors, n_factors));
for i in 0..n_factors {
for j in 0..n_factors {
for k in 0..n_features {
m[[i, j]] += loadings[[k, i]] * loadings[[k, j]] / uniquenesses[k].max(1e-12);
}
if i == j {
m[[i, j]] += 1.0;
}
}
}
let log_det_m = match scirs2_linalg::eigh::<f64>(&m.view(), None) {
Ok((eigenvalues, _)) => eigenvalues.iter().map(|&e| e.max(1e-12).ln()).sum::<f64>(),
Err(_) => 0.0,
};
let log_det_sigma = log_det_psi + log_det_m;
let m_inv = match self.invert_small_matrix(&m) {
Ok(inv) => inv,
Err(_) => Array2::eye(n_factors),
};
let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
for j in 0..n_features {
for f in 0..n_factors {
psi_inv_w[[j, f]] = loadings[[j, f]] / uniquenesses[j].max(1e-12);
}
}
let mut psi_inv_w_m_inv: Array2<f64> = Array2::zeros((n_features, n_factors));
for i in 0..n_features {
for j in 0..n_factors {
for k in 0..n_factors {
psi_inv_w_m_inv[[i, j]] += psi_inv_w[[i, k]] * m_inv[[k, j]];
}
}
}
let mut trace_psi_inv_s = 0.0;
for j in 0..n_features {
trace_psi_inv_s += cov[[j, j]] / uniquenesses[j].max(1e-12);
}
let mut trace_correction = 0.0;
for i in 0..n_features {
for j in 0..n_features {
let mut sigma_inv_ij = 0.0;
for f in 0..n_factors {
sigma_inv_ij += psi_inv_w_m_inv[[i, f]] * psi_inv_w[[j, f]];
}
trace_correction += sigma_inv_ij * cov[[j, i]];
}
}
let trace_sigma_inv_s = trace_psi_inv_s - trace_correction;
let n = n_samples as f64;
let p = n_features as f64;
-n / 2.0 * (p * (2.0 * std::f64::consts::PI).ln() + log_det_sigma + trace_sigma_inv_s)
}
fn invert_small_matrix(&self, a: &Array2<f64>) -> Result<Array2<f64>> {
let n = a.shape()[0];
let mut augmented: Array2<f64> = Array2::zeros((n, 2 * n));
for i in 0..n {
for j in 0..n {
augmented[[i, j]] = a[[i, j]];
}
augmented[[i, n + i]] = 1.0;
}
for i in 0..n {
let mut max_row = i;
for k in i + 1..n {
if augmented[[k, i]].abs() > augmented[[max_row, i]].abs() {
max_row = k;
}
}
if max_row != i {
for j in 0..2 * n {
let temp = augmented[[i, j]];
augmented[[i, j]] = augmented[[max_row, j]];
augmented[[max_row, j]] = temp;
}
}
let pivot = augmented[[i, i]];
if pivot.abs() < 1e-14 {
augmented[[i, i]] += 1e-8;
let pivot = augmented[[i, i]];
for j in 0..2 * n {
augmented[[i, j]] /= pivot;
}
} else {
for j in 0..2 * n {
augmented[[i, j]] /= pivot;
}
}
for k in 0..n {
if k != i {
let factor = augmented[[k, i]];
for j in 0..2 * n {
augmented[[k, j]] -= factor * augmented[[i, j]];
}
}
}
}
let mut inv: Array2<f64> = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
inv[[i, j]] = augmented[[i, n + j]];
}
}
Ok(inv)
}
fn compute_scores(
&self,
x_centered: &Array2<f64>,
loadings: &Array2<f64>,
uniquenesses: &Array1<f64>,
) -> Result<Array2<f64>> {
let n_samples = x_centered.shape()[0];
let n_features = x_centered.shape()[1];
let n_factors = self.n_factors;
let mut psi_inv_w: Array2<f64> = Array2::zeros((n_features, n_factors));
for j in 0..n_features {
let psi_inv = 1.0 / uniquenesses[j].max(1e-12);
for f in 0..n_factors {
psi_inv_w[[j, f]] = psi_inv * loadings[[j, f]];
}
}
let mut m: Array2<f64> = Array2::zeros((n_factors, n_factors));
for i in 0..n_factors {
for j in 0..n_factors {
for k in 0..n_features {
m[[i, j]] += loadings[[k, i]] * psi_inv_w[[k, j]];
}
if i == j {
m[[i, j]] += 1.0;
}
}
}
let m_inv = self.invert_small_matrix(&m)?;
let mut scoring: Array2<f64> = Array2::zeros((n_features, n_factors));
for i in 0..n_features {
for j in 0..n_factors {
for k in 0..n_factors {
scoring[[i, j]] += psi_inv_w[[i, k]] * m_inv[[k, j]];
}
}
}
let mut scores: Array2<f64> = Array2::zeros((n_samples, n_factors));
for i in 0..n_samples {
for j in 0..n_factors {
for k in 0..n_features {
scores[[i, j]] += x_centered[[i, k]] * scoring[[k, j]];
}
}
}
Ok(scores)
}
fn varimax_rotation(&self, loadings: &Array2<f64>) -> Result<Array2<f64>> {
let (n_features, n_factors) = loadings.dim();
if n_factors < 2 {
return Ok(loadings.clone());
}
let mut rotated = loadings.clone();
let mut h: Array1<f64> = Array1::zeros(n_features);
for j in 0..n_features {
for f in 0..n_factors {
h[j] += rotated[[j, f]] * rotated[[j, f]];
}
h[j] = h[j].sqrt().max(1e-10);
}
let mut normalized = rotated.clone();
for j in 0..n_features {
for f in 0..n_factors {
normalized[[j, f]] /= h[j];
}
}
for _iter in 0..self.varimax_max_iter {
let mut changed = false;
for p in 0..n_factors {
for q in (p + 1)..n_factors {
let n = n_features as f64;
let mut sum_a: f64 = 0.0;
let mut sum_b: f64 = 0.0;
let mut sum_c: f64 = 0.0;
let mut sum_d: f64 = 0.0;
for j in 0..n_features {
let xj = normalized[[j, p]];
let yj = normalized[[j, q]];
let a_j = xj * xj - yj * yj;
let b_j = 2.0 * xj * yj;
sum_a += a_j;
sum_b += b_j;
sum_c += a_j * a_j - b_j * b_j;
sum_d += 2.0 * a_j * b_j;
}
let u = sum_d - 2.0 * sum_a * sum_b / n;
let v = sum_c - (sum_a * sum_a - sum_b * sum_b) / n;
let angle = 0.25 * u.atan2(v);
if angle.abs() < 1e-10 {
continue;
}
changed = true;
let cos_a = angle.cos();
let sin_a = angle.sin();
for j in 0..n_features {
let xj = normalized[[j, p]];
let yj = normalized[[j, q]];
normalized[[j, p]] = cos_a * xj + sin_a * yj;
normalized[[j, q]] = -sin_a * xj + cos_a * yj;
}
}
}
if !changed {
break;
}
}
for j in 0..n_features {
for f in 0..n_factors {
rotated[[j, f]] = normalized[[j, f]] * h[j];
}
}
Ok(rotated)
}
fn promax_rotation(&self, loadings: &Array2<f64>) -> Result<Array2<f64>> {
let (n_features, n_factors) = loadings.dim();
if n_factors < 2 {
return Ok(loadings.clone());
}
let varimax_loadings = self.varimax_rotation(loadings)?;
let mut target: Array2<f64> = Array2::zeros((n_features, n_factors));
let power = self.promax_power as f64;
for j in 0..n_features {
for f in 0..n_factors {
let val = varimax_loadings[[j, f]];
let sign = if val >= 0.0 { 1.0 } else { -1.0 };
target[[j, f]] = sign * val.abs().powf(power);
}
}
let mut at_a: Array2<f64> = Array2::zeros((n_factors, n_factors));
for i in 0..n_factors {
for j in 0..n_factors {
for k in 0..n_features {
at_a[[i, j]] += varimax_loadings[[k, i]] * varimax_loadings[[k, j]];
}
}
}
let at_a_inv = self.invert_small_matrix(&at_a)?;
let mut at_target: Array2<f64> = Array2::zeros((n_factors, n_factors));
for i in 0..n_factors {
for j in 0..n_factors {
for k in 0..n_features {
at_target[[i, j]] += varimax_loadings[[k, i]] * target[[k, j]];
}
}
}
let mut t_mat: Array2<f64> = Array2::zeros((n_factors, n_factors));
for i in 0..n_factors {
for j in 0..n_factors {
for k in 0..n_factors {
t_mat[[i, j]] += at_a_inv[[i, k]] * at_target[[k, j]];
}
}
}
let mut result: Array2<f64> = Array2::zeros((n_features, n_factors));
for i in 0..n_features {
for j in 0..n_factors {
for k in 0..n_factors {
result[[i, j]] += varimax_loadings[[i, k]] * t_mat[[k, j]];
}
}
}
Ok(result)
}
pub fn loadings(&self) -> Option<&Array2<f64>> {
self.loadings.as_ref()
}
pub fn uniquenesses(&self) -> Option<&Array1<f64>> {
self.uniquenesses.as_ref()
}
pub fn scree_plot_data<S>(x: &ArrayBase<S, Ix2>, use_correlation: bool) -> Result<ScreePlotData>
where
S: Data,
S::Elem: Float + NumCast,
{
let x_f64 = x.mapv(|v| <f64 as NumCast>::from(v).unwrap_or_default());
let (n_samples, n_features) = x_f64.dim();
if n_samples < 2 {
return Err(TransformError::InvalidInput(
"Need at least 2 samples for scree plot".to_string(),
));
}
let mean = x_f64.mean_axis(Axis(0)).ok_or_else(|| {
TransformError::ComputationError("Failed to compute mean".to_string())
})?;
let mut x_centered = x_f64.clone();
for i in 0..n_samples {
for j in 0..n_features {
x_centered[[i, j]] -= mean[j];
}
}
let mut cov: Array2<f64> = Array2::zeros((n_features, n_features));
for i in 0..n_features {
for j in 0..n_features {
let mut sum = 0.0;
for k in 0..n_samples {
sum += x_centered[[k, i]] * x_centered[[k, j]];
}
cov[[i, j]] = sum / (n_samples - 1) as f64;
}
}
if use_correlation {
let mut stds: Array1<f64> = Array1::zeros(n_features);
for j in 0..n_features {
stds[j] = cov[[j, j]].sqrt().max(1e-12);
}
for i in 0..n_features {
for j in 0..n_features {
cov[[i, j]] /= stds[i] * stds[j];
}
}
}
let (eigenvalues_raw, _eigenvectors) =
scirs2_linalg::eigh::<f64>(&cov.view(), None).map_err(TransformError::LinalgError)?;
let n = eigenvalues_raw.len();
let mut eig_vec: Vec<f64> = eigenvalues_raw.iter().map(|&e| e.max(0.0)).collect();
eig_vec.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
let mut eigenvalues: Array1<f64> = Array1::zeros(n);
for i in 0..n {
eigenvalues[i] = eig_vec[i];
}
let total: f64 = eigenvalues.sum();
let mut variance_proportions: Array1<f64> = Array1::zeros(n);
let mut cumulative_variance: Array1<f64> = Array1::zeros(n);
if total > 0.0 {
let mut cumsum = 0.0;
for i in 0..n {
variance_proportions[i] = eigenvalues[i] / total;
cumsum += variance_proportions[i];
cumulative_variance[i] = cumsum;
}
}
let kaiser_threshold = if use_correlation {
1.0
} else if n > 0 {
total / n as f64
} else {
0.0
};
Ok(ScreePlotData {
eigenvalues,
cumulative_variance,
variance_proportions,
kaiser_threshold,
})
}
}
pub fn factor_analysis<S>(
data: &ArrayBase<S, Ix2>,
n_factors: usize,
) -> Result<FactorAnalysisResult>
where
S: Data,
S::Elem: Float + NumCast,
{
let mut fa = FactorAnalysis::new(n_factors);
fa.fit_transform(data)
}
pub fn scree_plot_data<S>(data: &ArrayBase<S, Ix2>, use_correlation: bool) -> Result<ScreePlotData>
where
S: Data,
S::Elem: Float + NumCast,
{
FactorAnalysis::scree_plot_data(data, use_correlation)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array;
fn generate_factor_data(n_samples: usize, n_features: usize, n_factors: usize) -> Array2<f64> {
let mut rng = scirs2_core::random::rng();
let mut data: Array2<f64> = Array2::zeros((n_samples, n_features));
let mut true_loadings: Array2<f64> = Array2::zeros((n_features, n_factors));
for j in 0..n_features {
let factor_idx = j % n_factors;
true_loadings[[j, factor_idx]] = 0.8;
for f in 0..n_factors {
if f != factor_idx {
true_loadings[[j, f]] = 0.1;
}
}
}
for i in 0..n_samples {
let mut factors: Array1<f64> = Array1::zeros(n_factors);
for f in 0..n_factors {
factors[f] = scirs2_core::random::RngExt::random_range(&mut rng, -2.0..2.0);
}
for j in 0..n_features {
let mut val = 0.0;
for f in 0..n_factors {
val += true_loadings[[j, f]] * factors[f];
}
val += scirs2_core::random::RngExt::random_range(&mut rng, -0.3..0.3);
data[[i, j]] = val;
}
}
data
}
#[test]
fn test_factor_analysis_basic() {
let data = generate_factor_data(100, 6, 2);
let mut fa = FactorAnalysis::new(2);
let result = fa.fit_transform(&data).expect("FA fit_transform failed");
assert_eq!(result.loadings.shape(), &[6, 2]);
assert_eq!(result.scores.shape(), &[100, 2]);
assert_eq!(result.uniquenesses.len(), 6);
assert_eq!(result.communalities.len(), 6);
for val in result.loadings.iter() {
assert!(val.is_finite());
}
for val in result.scores.iter() {
assert!(val.is_finite());
}
}
#[test]
fn test_factor_analysis_uniquenesses() {
let data = generate_factor_data(100, 6, 2);
let mut fa = FactorAnalysis::new(2);
let result = fa.fit_transform(&data).expect("FA fit_transform failed");
for &u in result.uniquenesses.iter() {
assert!(u > 0.0, "Uniqueness should be positive, got {}", u);
}
for &c in result.communalities.iter() {
assert!(c >= 0.0, "Communality should be non-negative, got {}", c);
}
}
#[test]
fn test_factor_analysis_varimax() {
let data = generate_factor_data(100, 6, 2);
let mut fa = FactorAnalysis::new(2).with_rotation(RotationMethod::Varimax);
let result = fa
.fit_transform(&data)
.expect("FA varimax fit_transform failed");
assert_eq!(result.loadings.shape(), &[6, 2]);
for val in result.loadings.iter() {
assert!(val.is_finite());
}
}
#[test]
fn test_factor_analysis_promax() {
let data = generate_factor_data(100, 6, 2);
let mut fa = FactorAnalysis::new(2).with_rotation(RotationMethod::Promax);
let result = fa
.fit_transform(&data)
.expect("FA promax fit_transform failed");
assert_eq!(result.loadings.shape(), &[6, 2]);
for val in result.loadings.iter() {
assert!(val.is_finite());
}
}
#[test]
fn test_factor_analysis_convergence() {
let data = generate_factor_data(50, 4, 2);
let mut fa = FactorAnalysis::new(2).with_max_iter(500).with_tol(1e-6);
let result = fa.fit_transform(&data).expect("FA fit_transform failed");
assert!(
result.n_iter <= 500,
"Should converge within max_iter, got {}",
result.n_iter
);
assert!(
result.log_likelihood.is_finite(),
"Log-likelihood should be finite, got {}",
result.log_likelihood
);
}
#[test]
fn test_factor_analysis_single_factor() {
let data = generate_factor_data(50, 4, 1);
let mut fa = FactorAnalysis::new(1);
let result = fa.fit_transform(&data).expect("FA fit_transform failed");
assert_eq!(result.loadings.shape(), &[4, 1]);
assert_eq!(result.scores.shape(), &[50, 1]);
}
#[test]
fn test_factor_analysis_convenience_fn() {
let data = generate_factor_data(50, 6, 2);
let result = factor_analysis(&data, 2).expect("factor_analysis failed");
assert_eq!(result.loadings.shape(), &[6, 2]);
}
#[test]
fn test_factor_analysis_invalid_params() {
let data = Array2::<f64>::zeros((10, 3));
let mut fa = FactorAnalysis::new(5);
assert!(fa.fit_transform(&data).is_err());
}
#[test]
fn test_scree_plot_data_covariance() {
let data = generate_factor_data(100, 6, 2);
let scree = FactorAnalysis::scree_plot_data(&data, false).expect("scree_plot_data failed");
assert_eq!(scree.eigenvalues.len(), 6);
assert_eq!(scree.variance_proportions.len(), 6);
assert_eq!(scree.cumulative_variance.len(), 6);
for i in 1..scree.eigenvalues.len() {
assert!(
scree.eigenvalues[i] <= scree.eigenvalues[i - 1] + 1e-6,
"Eigenvalues should be in descending order: eigenvalue[{}]={} > eigenvalue[{}]={}",
i,
scree.eigenvalues[i],
i - 1,
scree.eigenvalues[i - 1]
);
}
for &e in scree.eigenvalues.iter() {
assert!(e >= 0.0, "Eigenvalue should be non-negative, got {}", e);
}
let total: f64 = scree.variance_proportions.sum();
assert!(
(total - 1.0).abs() < 1e-6,
"Variance proportions should sum to 1, got {}",
total
);
let last_idx = scree.cumulative_variance.len() - 1;
assert!(
(scree.cumulative_variance[last_idx] - 1.0).abs() < 1e-6,
"Cumulative variance should end at 1"
);
}
#[test]
fn test_scree_plot_data_correlation() {
let data = generate_factor_data(200, 6, 2);
let scree = FactorAnalysis::scree_plot_data(&data, true)
.expect("scree_plot_data correlation failed");
assert!(
(scree.kaiser_threshold - 1.0).abs() < 1e-10,
"Kaiser threshold for correlation matrix should be 1.0, got {}",
scree.kaiser_threshold
);
for &e in scree.eigenvalues.iter() {
assert!(
e >= 0.0 && e.is_finite(),
"Eigenvalue should be finite non-negative, got {}",
e
);
}
assert!(
scree.eigenvalues[0] >= scree.eigenvalues[scree.eigenvalues.len() - 1],
"Eigenvalues should be in descending order"
);
let total: f64 = scree.variance_proportions.sum();
assert!(
(total - 1.0).abs() < 1e-6,
"Variance proportions should sum to 1, got {}",
total
);
}
#[test]
fn test_scree_plot_convenience_fn() {
let data = generate_factor_data(50, 4, 2);
let scree = scree_plot_data(&data, false).expect("scree_plot_data convenience fn failed");
assert_eq!(scree.eigenvalues.len(), 4);
assert!(scree.eigenvalues.iter().all(|e| e.is_finite()));
}
#[test]
fn test_factor_analysis_communalities_sum_constraint() {
let data = generate_factor_data(200, 4, 2);
let mut fa = FactorAnalysis::new(2);
let result = fa.fit_transform(&data).expect("FA failed");
let n_samples = data.shape()[0];
let n_features = data.shape()[1];
let mean = data.mean_axis(Axis(0)).expect("mean computation failed");
let mut variances: Array1<f64> = Array1::zeros(n_features);
for j in 0..n_features {
let mut var = 0.0;
for i in 0..n_samples {
let diff = data[[i, j]] - mean[j];
var += diff * diff;
}
variances[j] = var / (n_samples - 1) as f64;
}
for j in 0..n_features {
let total = result.communalities[j] + result.uniquenesses[j];
let variance = variances[j];
let rel_err = (total - variance).abs() / variance.max(1e-12);
assert!(
rel_err < 0.5,
"Feature {}: communality ({}) + uniqueness ({}) = {} should approximate variance ({})",
j, result.communalities[j], result.uniquenesses[j], total, variance
);
}
}
#[test]
fn test_factor_analysis_too_few_samples() {
let data = Array2::<f64>::zeros((1, 3));
let mut fa = FactorAnalysis::new(1);
let result = fa.fit_transform(&data);
assert!(result.is_err());
}
#[test]
fn test_factor_analysis_builder_methods() {
let fa = FactorAnalysis::new(3)
.with_rotation(RotationMethod::Varimax)
.with_max_iter(200)
.with_tol(1e-5)
.with_promax_power(6)
.with_center(false);
assert!(fa.loadings().is_none()); assert!(fa.uniquenesses().is_none());
}
#[test]
fn test_scree_plot_too_few_samples() {
let data = Array2::<f64>::zeros((1, 3));
let result = scree_plot_data(&data, false);
assert!(result.is_err());
}
#[test]
fn test_factor_analysis_noise_variance() {
let data = generate_factor_data(100, 6, 2);
let mut fa = FactorAnalysis::new(2);
let result = fa.fit_transform(&data).expect("FA failed");
assert!(result.noise_variance > 0.0);
assert!(result.noise_variance.is_finite());
let expected = result.uniquenesses.sum() / result.uniquenesses.len() as f64;
assert!(
(result.noise_variance - expected).abs() < 1e-10,
"Noise variance should be average of uniquenesses"
);
}
}