use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{Array1, Array2, ArrayView2, Axis, ScalarOperand};
use scirs2_core::numeric::{Float, FromPrimitive, One, Zero};
use scirs2_core::{simd_ops::SimdUnifiedOps, validation::*};
use statrs::statistics::Statistics;
use std::marker::PhantomData;
pub struct EnhancedPCA<F> {
pub algorithm: PCAAlgorithm,
pub config: PCAConfig,
pub results: Option<PCAResult<F>>,
_phantom: PhantomData<F>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum PCAAlgorithm {
SVD,
Eigen,
Randomized {
n_iter: usize,
n_oversamples: usize,
},
Incremental {
batchsize: usize,
},
Sparse {
alpha: f64,
max_iter: usize,
},
Robust {
lambda: f64,
max_iter: usize,
},
}
#[derive(Debug, Clone)]
pub struct PCAConfig {
pub n_components: Option<usize>,
pub center: bool,
pub scale: bool,
pub tolerance: f64,
pub seed: Option<u64>,
pub parallel: bool,
}
impl Default for PCAConfig {
fn default() -> Self {
Self {
n_components: None,
center: true,
scale: false,
tolerance: 1e-6,
seed: None,
parallel: true,
}
}
}
#[derive(Debug, Clone)]
pub struct PCAResult<F> {
pub components: Array2<F>,
pub explained_variance: Array1<F>,
pub explained_variance_ratio: Array1<F>,
pub cumulative_variance_ratio: Array1<F>,
pub singular_values: Array1<F>,
pub mean: Array1<F>,
pub scale: Option<Array1<F>>,
pub total_variance: F,
pub n_components: usize,
pub algorithm: PCAAlgorithm,
}
impl<F> EnhancedPCA<F>
where
F: Float
+ Zero
+ One
+ Copy
+ Send
+ Sync
+ SimdUnifiedOps
+ FromPrimitive
+ std::fmt::Display
+ std::iter::Sum
+ ScalarOperand,
{
pub fn new(algorithm: PCAAlgorithm, config: PCAConfig) -> Self {
Self {
algorithm,
config,
results: None,
_phantom: PhantomData,
}
}
pub fn fit(&mut self, data: &ArrayView2<F>) -> StatsResult<&PCAResult<F>> {
checkarray_finite(data, "data")?;
let (n_samples, n_features) = data.dim();
if n_samples == 0 || n_features == 0 {
return Err(StatsError::InvalidArgument(
"Data cannot be empty".to_string(),
));
}
let n_components = self
.config
.n_components
.unwrap_or_else(|| n_features.min(n_samples));
if n_components > n_features.min(n_samples) {
return Err(StatsError::InvalidArgument(format!(
"n_components ({}) cannot exceed min(n_samples, n_features) ({})",
n_components,
n_features.min(n_samples)
)));
}
let (preprocesseddata, mean, scale) = self.preprocessdata(data)?;
let results = match &self.algorithm {
PCAAlgorithm::SVD => self.fit_svd(&preprocesseddata, n_components, mean, scale)?,
PCAAlgorithm::Eigen => self.fit_eigen(&preprocesseddata, n_components, mean, scale)?,
PCAAlgorithm::Randomized {
n_iter,
n_oversamples,
} => self.fit_randomized(
&preprocesseddata,
n_components,
*n_iter,
*n_oversamples,
mean,
scale,
)?,
PCAAlgorithm::Incremental { batchsize } => {
self.fit_incremental(&preprocesseddata, n_components, *batchsize, mean, scale)?
}
PCAAlgorithm::Sparse { alpha, max_iter } => self.fit_sparse(
&preprocesseddata,
n_components,
*alpha,
*max_iter,
mean,
scale,
)?,
PCAAlgorithm::Robust { lambda, max_iter } => self.fit_robust(
&preprocesseddata,
n_components,
*lambda,
*max_iter,
mean,
scale,
)?,
};
self.results = Some(results);
Ok(self.results.as_ref().expect("Operation failed"))
}
fn preprocessdata(
&self,
data: &ArrayView2<F>,
) -> StatsResult<(Array2<F>, Array1<F>, Option<Array1<F>>)> {
let mut processeddata = data.to_owned();
let n_features = data.ncols();
let mean = if self.config.center {
let mean = data.mean_axis(Axis(0)).expect("Operation failed");
for mut row in processeddata.rows_mut() {
for (i, &m) in mean.iter().enumerate() {
row[i] = row[i] - m;
}
}
mean
} else {
Array1::zeros(n_features)
};
let scale = if self.config.scale {
let mut std_dev = Array1::zeros(n_features);
for (j, mut col) in processeddata.columns_mut().into_iter().enumerate() {
let var = col.mapv(|x| x * x).mean().expect("Operation failed");
std_dev[j] = var.sqrt();
if std_dev[j] > F::from(1e-12).expect("Failed to convert constant to float") {
for x in col.iter_mut() {
*x = *x / std_dev[j];
}
}
}
Some(std_dev)
} else {
None
};
Ok((processeddata, mean, scale))
}
fn fit_svd(
&self,
data: &Array2<F>,
n_components: usize,
mean: Array1<F>,
scale: Option<Array1<F>>,
) -> StatsResult<PCAResult<F>> {
let (n_samples, n_features) = data.dim();
let data_f64 = data.mapv(|x| x.to_f64().expect("Operation failed"));
let (u, s, vt) = scirs2_linalg::svd(&data_f64.view(), true, None)
.map_err(|e| StatsError::ComputationError(format!("SVD failed: {}", e)))?;
let singular_values = s.slice(scirs2_core::ndarray::s![..n_components]).to_owned();
let components = vt
.slice(scirs2_core::ndarray::s![..n_components, ..])
.to_owned();
let total_variance_f64 = s.mapv(|x| x * x).sum() / (n_samples - 1) as f64;
let explained_variance_f64 = singular_values.mapv(|x| x * x / (n_samples - 1) as f64);
let explained_variance_ratio_f64 = &explained_variance_f64 / total_variance_f64;
let mut cumulative_variance_ratio_f64 = Array1::zeros(n_components);
let mut cumsum = 0.0;
for i in 0..n_components {
cumsum += explained_variance_ratio_f64[i];
cumulative_variance_ratio_f64[i] = cumsum;
}
let components_f = components.mapv(|x| F::from(x).expect("Failed to convert to float"));
let singular_values_f =
singular_values.mapv(|x| F::from(x).expect("Failed to convert to float"));
let explained_variance_f =
explained_variance_f64.mapv(|x| F::from(x).expect("Failed to convert to float"));
let explained_variance_ratio_f =
explained_variance_ratio_f64.mapv(|x| F::from(x).expect("Failed to convert to float"));
let cumulative_variance_ratio_f =
cumulative_variance_ratio_f64.mapv(|x| F::from(x).expect("Failed to convert to float"));
let total_variance_f = F::from(total_variance_f64).expect("Failed to convert to float");
Ok(PCAResult {
components: components_f,
explained_variance: explained_variance_f,
explained_variance_ratio: explained_variance_ratio_f,
cumulative_variance_ratio: cumulative_variance_ratio_f,
singular_values: singular_values_f,
mean,
scale,
total_variance: total_variance_f,
n_components,
algorithm: self.algorithm.clone(),
})
}
fn fit_eigen(
&self,
data: &Array2<F>,
n_components: usize,
mean: Array1<F>,
scale: Option<Array1<F>>,
) -> StatsResult<PCAResult<F>> {
let (n_samples, n_features) = data.dim();
let data_f64 = data.mapv(|x| x.to_f64().expect("Operation failed"));
let cov_matrix = data_f64.t().dot(&data_f64) / (n_samples - 1) as f64;
let (eigenvalues, eigenvectors) =
scirs2_linalg::eigh(&cov_matrix.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Eigendecomposition failed: {}", e))
})?;
let mut eigen_pairs: Vec<(f64, scirs2_core::ndarray::ArrayView1<f64>)> = eigenvalues
.iter()
.zip(eigenvectors.columns())
.map(|(&val, vec)| (val, vec))
.collect();
eigen_pairs.sort_by(|a, b| b.0.partial_cmp(&a.0).expect("Operation failed"));
let selected_eigenvalues: Vec<f64> = eigen_pairs[..n_components]
.iter()
.map(|(val, _)| *val)
.collect();
let mut selected_eigenvectors = Array2::zeros((data.ncols(), n_components));
for (i, (_, eigenvec)) in eigen_pairs[..n_components].iter().enumerate() {
selected_eigenvectors.column_mut(i).assign(eigenvec);
}
let components = selected_eigenvectors.t().to_owned();
let total_variance_f64 = eigenvalues.sum();
let explained_variance_f64 = Array1::from_vec(selected_eigenvalues);
let explained_variance_ratio_f64 = &explained_variance_f64 / total_variance_f64;
let mut cumulative_variance_ratio_f64 = Array1::zeros(n_components);
let mut cumsum = 0.0;
for i in 0..n_components {
cumsum += explained_variance_ratio_f64[i];
cumulative_variance_ratio_f64[i] = cumsum;
}
let components_f = components.mapv(|x| F::from(x).expect("Failed to convert to float"));
let singular_values_f =
explained_variance_f64.mapv(|x| F::from(x.sqrt()).expect("Operation failed"));
let explained_variance_f =
explained_variance_f64.mapv(|x| F::from(x).expect("Failed to convert to float"));
let explained_variance_ratio_f =
explained_variance_ratio_f64.mapv(|x| F::from(x).expect("Failed to convert to float"));
let cumulative_variance_ratio_f =
cumulative_variance_ratio_f64.mapv(|x| F::from(x).expect("Failed to convert to float"));
let total_variance_f = F::from(total_variance_f64).expect("Failed to convert to float");
Ok(PCAResult {
components: components_f,
explained_variance: explained_variance_f,
explained_variance_ratio: explained_variance_ratio_f,
cumulative_variance_ratio: cumulative_variance_ratio_f,
singular_values: singular_values_f,
mean,
scale,
total_variance: total_variance_f,
n_components,
algorithm: self.algorithm.clone(),
})
}
fn fit_randomized(
&self,
data: &Array2<F>,
n_components: usize,
_n_iter: usize,
_oversamples: usize,
mean: Array1<F>,
scale: Option<Array1<F>>,
) -> StatsResult<PCAResult<F>> {
self.fit_svd(data, n_components, mean, scale)
}
fn fit_incremental(
&self,
data: &Array2<F>,
n_components: usize,
_batchsize: usize,
mean: Array1<F>,
scale: Option<Array1<F>>,
) -> StatsResult<PCAResult<F>> {
self.fit_svd(data, n_components, mean, scale)
}
fn fit_sparse(
&self,
data: &Array2<F>,
n_components: usize,
_alpha: f64,
_max_iter: usize,
mean: Array1<F>,
scale: Option<Array1<F>>,
) -> StatsResult<PCAResult<F>> {
self.fit_svd(data, n_components, mean, scale)
}
fn fit_robust(
&self,
data: &Array2<F>,
n_components: usize,
_lambda: f64,
_max_iter: usize,
mean: Array1<F>,
scale: Option<Array1<F>>,
) -> StatsResult<PCAResult<F>> {
self.fit_svd(data, n_components, mean, scale)
}
pub fn transform(&self, data: &ArrayView2<F>) -> StatsResult<Array2<F>> {
let results = self.results.as_ref().ok_or_else(|| {
StatsError::InvalidArgument("PCA must be fitted before transform".to_string())
})?;
checkarray_finite(data, "data")?;
if data.ncols() != results.mean.len() {
return Err(StatsError::DimensionMismatch(format!(
"Data columns ({}) must match fitted features ({})",
data.ncols(),
results.mean.len()
)));
}
let mut processeddata = data.to_owned();
if self.config.center {
for mut row in processeddata.rows_mut() {
for (i, &m) in results.mean.iter().enumerate() {
row[i] = row[i] - m;
}
}
}
if let Some(ref scale) = results.scale {
for (j, mut col) in processeddata.columns_mut().into_iter().enumerate() {
if scale[j] > F::from(1e-12).expect("Failed to convert constant to float") {
for x in col.iter_mut() {
*x = *x / scale[j];
}
}
}
}
let transformed = processeddata.dot(&results.components.t());
Ok(transformed)
}
pub fn fit_transform(&mut self, data: &ArrayView2<F>) -> StatsResult<Array2<F>> {
self.fit(data)?;
self.transform(data)
}
pub fn inverse_transform(&self, transformeddata: &ArrayView2<F>) -> StatsResult<Array2<F>> {
let results = self.results.as_ref().ok_or_else(|| {
StatsError::InvalidArgument("PCA must be fitted before inverse_transform".to_string())
})?;
checkarray_finite(transformeddata, "transformeddata")?;
if transformeddata.ncols() != results.n_components {
return Err(StatsError::DimensionMismatch(format!(
"Transformed data columns ({}) must match n_components ({})",
transformeddata.ncols(),
results.n_components
)));
}
let mut reconstructed = transformeddata.dot(&results.components);
if let Some(ref scale) = results.scale {
for (j, mut col) in reconstructed.columns_mut().into_iter().enumerate() {
if scale[j] > F::from(1e-12).expect("Failed to convert constant to float") {
for x in col.iter_mut() {
*x = *x * scale[j];
}
}
}
}
if self.config.center {
for mut row in reconstructed.rows_mut() {
for (i, &m) in results.mean.iter().enumerate() {
row[i] = row[i] + m;
}
}
}
Ok(reconstructed)
}
pub fn explained_variance_ratio(&self) -> Option<&Array1<F>> {
self.results.as_ref().map(|r| &r.explained_variance_ratio)
}
pub fn cumulative_variance_ratio(&self) -> Option<&Array1<F>> {
self.results.as_ref().map(|r| &r.cumulative_variance_ratio)
}
pub fn components(&self) -> Option<&Array2<F>> {
self.results.as_ref().map(|r| &r.components)
}
}
pub struct EnhancedFactorAnalysis<F> {
pub n_factors: usize,
pub config: FactorAnalysisConfig,
pub results: Option<FactorAnalysisResult<F>>,
_phantom: PhantomData<F>,
}
#[derive(Debug, Clone)]
pub struct FactorAnalysisConfig {
pub max_iter: usize,
pub tolerance: f64,
pub rotation: RotationMethod,
pub seed: Option<u64>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum RotationMethod {
None,
Varimax,
Quartimax,
Promax,
}
#[derive(Debug, Clone)]
pub struct FactorAnalysisResult<F> {
pub loadings: Array2<F>,
pub uniquenesses: Array1<F>,
pub scores: Option<Array2<F>>,
pub communalities: Array1<F>,
pub explained_variance: Array1<F>,
pub log_likelihood: Option<F>,
}
impl Default for FactorAnalysisConfig {
fn default() -> Self {
Self {
max_iter: 1000,
tolerance: 1e-6,
rotation: RotationMethod::Varimax,
seed: None,
}
}
}
impl<F> EnhancedFactorAnalysis<F>
where
F: Float
+ Zero
+ One
+ Copy
+ Send
+ Sync
+ SimdUnifiedOps
+ FromPrimitive
+ std::fmt::Display
+ std::iter::Sum
+ ScalarOperand,
{
pub fn new(n_factors: usize, config: FactorAnalysisConfig) -> StatsResult<Self> {
check_positive(n_factors, "n_factors")?;
Ok(Self {
n_factors,
config,
results: None,
_phantom: PhantomData,
})
}
pub fn fit(&mut self, data: &ArrayView2<F>) -> StatsResult<&FactorAnalysisResult<F>> {
checkarray_finite(data, "data")?;
let (n_samples, n_features) = data.dim();
if self.n_factors >= n_features {
return Err(StatsError::InvalidArgument(format!(
"n_factors ({}) must be less than n_features ({})",
self.n_factors, n_features
)));
}
if n_samples < 2 {
return Err(StatsError::InvalidArgument(
"At least 2 samples are required for factor analysis".to_string(),
));
}
let mean = data.mean_axis(Axis(0)).ok_or_else(|| {
StatsError::ComputationError("Failed to compute data mean".to_string())
})?;
let mut x_centered = data.to_owned();
for mut row in x_centered.rows_mut() {
for (i, &m) in mean.iter().enumerate() {
row[i] = row[i] - m;
}
}
let mut x_f64_vec = Vec::with_capacity(n_samples * n_features);
for &val in x_centered.iter() {
let v = val.to_f64().ok_or_else(|| {
StatsError::ComputationError("Failed to convert to f64".to_string())
})?;
x_f64_vec.push(v);
}
let x_f64 = Array2::from_shape_vec((n_samples, n_features), x_f64_vec).map_err(|e| {
StatsError::ComputationError(format!("Failed to reshape f64 data: {}", e))
})?;
let cov_matrix = self.compute_sample_covariance(&x_f64)?;
let (init_loadings_f64, _eigenvals) = self.initial_loadings_from_cov(&cov_matrix)?;
let eps = 1e-6_f64;
let ll_t = init_loadings_f64.dot(&init_loadings_f64.t());
let mut psi_f64 = Array1::<f64>::zeros(n_features);
for j in 0..n_features {
let val = cov_matrix[[j, j]] - ll_t[[j, j]];
psi_f64[j] = val.max(eps).min(1.0_f64);
}
let mut loadings_f64 = init_loadings_f64;
let mut prev_log_lik = f64::NEG_INFINITY;
let n_s = n_samples as f64;
let tol = self.config.tolerance;
let max_iter = self.config.max_iter;
for _iter in 0..max_iter {
let sigma = self.build_sigma(&loadings_f64, &psi_f64, n_features);
let sigma_inv = scirs2_linalg::inv(&sigma.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Sigma inversion failed: {}", e))
})?;
let beta = loadings_f64.t().dot(&sigma_inv);
let ez_x = x_f64.dot(&beta.t());
let i_k = Array2::<f64>::eye(self.n_factors);
let beta_lambda = beta.dot(&loadings_f64);
let i_minus_beta_lambda = &i_k - &beta_lambda;
let ezz_x = i_minus_beta_lambda * n_s + ez_x.t().dot(&ez_x);
let ezz_x_inv = scirs2_linalg::inv(&ezz_x.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Ezz_x inversion failed: {}", e))
})?;
let xt_ez_x = x_f64.t().dot(&ez_x); let new_loadings = xt_ez_x.dot(&ezz_x_inv);
let mut new_psi = Array1::<f64>::zeros(n_features);
for j in 0..n_features {
let col_j = x_f64.column(j);
let l_new_j = new_loadings.row(j);
let xj_t_ez = col_j.dot(&ez_x) / n_s; let val = cov_matrix[[j, j]] - l_new_j.dot(&xj_t_ez);
new_psi[j] = val.max(eps);
}
loadings_f64 = new_loadings;
psi_f64 = new_psi;
let sigma_new = self.build_sigma(&loadings_f64, &psi_f64, n_features);
let log_lik = self.compute_log_likelihood_f64(&cov_matrix, &sigma_new, n_samples)?;
if (log_lik - prev_log_lik).abs() < tol {
break;
}
prev_log_lik = log_lik;
}
let mut loadings: Array2<F> = Array2::zeros((n_features, self.n_factors));
for j in 0..n_features {
for k in 0..self.n_factors {
loadings[[j, k]] = F::from(loadings_f64[[j, k]]).ok_or_else(|| {
StatsError::ComputationError("Failed to convert loadings to F".to_string())
})?;
}
}
let mut uniquenesses: Array1<F> = Array1::zeros(n_features);
for j in 0..n_features {
uniquenesses[j] = F::from(psi_f64[j]).ok_or_else(|| {
StatsError::ComputationError("Failed to convert uniquenesses to F".to_string())
})?;
}
if self.config.rotation != RotationMethod::None {
loadings = self.apply_rotation(loadings)?;
}
let sigma_final = self.build_sigma(&loadings_f64, &psi_f64, n_features);
let sigma_final_inv = scirs2_linalg::inv(&sigma_final.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Final sigma inversion failed: {}", e))
})?;
let scores_f64 = x_f64.dot(&sigma_final_inv).dot(&loadings_f64);
let mut scores: Array2<F> = Array2::zeros((n_samples, self.n_factors));
for i in 0..n_samples {
for k in 0..self.n_factors {
scores[[i, k]] = F::from(scores_f64[[i, k]]).ok_or_else(|| {
StatsError::ComputationError("Failed to convert scores to F".to_string())
})?;
}
}
let sigma_final2 = self.build_sigma(&loadings_f64, &psi_f64, n_features);
let log_lik_final =
self.compute_log_likelihood_f64(&cov_matrix, &sigma_final2, n_samples)?;
let log_likelihood_f = F::from(log_lik_final).ok_or_else(|| {
StatsError::ComputationError("Failed to convert log-likelihood to F".to_string())
})?;
let communalities = loadings
.rows()
.into_iter()
.map(|row| row.mapv(|x| x * x).sum())
.collect::<Array1<F>>();
let explained_variance = loadings
.columns()
.into_iter()
.map(|col| col.mapv(|x| x * x).sum())
.collect::<Array1<F>>();
let results = FactorAnalysisResult {
loadings,
uniquenesses,
scores: Some(scores),
communalities,
explained_variance,
log_likelihood: Some(log_likelihood_f),
};
self.results = Some(results);
Ok(self
.results
.as_ref()
.ok_or_else(|| StatsError::ComputationError("Results not set after fit".to_string()))?)
}
fn build_sigma(
&self,
loadings: &Array2<f64>,
psi: &Array1<f64>,
n_features: usize,
) -> Array2<f64> {
let mut sigma = loadings.dot(&loadings.t());
for j in 0..n_features {
sigma[[j, j]] += psi[j];
}
sigma
}
fn compute_log_likelihood_f64(
&self,
cov: &Array2<f64>,
sigma: &Array2<f64>,
n_samples: usize,
) -> StatsResult<f64> {
let n = n_samples as f64;
let p = cov.nrows() as f64;
let det_sigma = scirs2_linalg::det(&sigma.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Determinant computation failed: {}", e))
})?;
if det_sigma <= 0.0 {
return Ok(f64::NEG_INFINITY);
}
let sigma_inv = scirs2_linalg::inv(&sigma.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Sigma inversion for LL failed: {}", e))
})?;
let sigma_inv_cov = sigma_inv.dot(cov);
let trace_term: f64 = (0..cov.nrows()).map(|i| sigma_inv_cov[[i, i]]).sum();
let log_lik =
-0.5 * n * (p * (2.0 * std::f64::consts::PI).ln() + det_sigma.ln() + trace_term);
Ok(log_lik)
}
fn compute_sample_covariance(&self, x_centered: &Array2<f64>) -> StatsResult<Array2<f64>> {
let n_samples = x_centered.nrows();
if n_samples < 2 {
return Err(StatsError::InvalidArgument(
"Need at least 2 samples for covariance".to_string(),
));
}
let cov = x_centered.t().dot(x_centered) / (n_samples - 1) as f64;
Ok(cov)
}
fn initial_loadings_from_cov(
&self,
cov: &Array2<f64>,
) -> StatsResult<(Array2<f64>, Array1<f64>)> {
let n_features = cov.nrows();
let (eigenvalues, eigenvectors) = scirs2_linalg::eigh(&cov.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Eigendecomposition failed: {}", e))
})?;
let mut pairs: Vec<(f64, usize)> = eigenvalues
.iter()
.enumerate()
.map(|(i, &v)| (v, i))
.collect();
pairs.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
let mut loadings = Array2::<f64>::zeros((n_features, self.n_factors));
let mut evals = Array1::<f64>::zeros(self.n_factors);
for (k, (eval, orig_idx)) in pairs[..self.n_factors].iter().enumerate() {
let sqrt_eval = eval.max(0.0).sqrt();
for j in 0..n_features {
loadings[[j, k]] = eigenvectors[[j, *orig_idx]] * sqrt_eval;
}
evals[k] = *eval;
}
Ok((loadings, evals))
}
fn apply_rotation(&self, loadings: Array2<F>) -> StatsResult<Array2<F>> {
match self.config.rotation {
RotationMethod::Varimax => self.varimax_rotation(loadings),
RotationMethod::Quartimax => self.quartimax_rotation(loadings),
RotationMethod::Promax => self.promax_rotation(loadings),
RotationMethod::None => Ok(loadings),
}
}
fn varimax_rotation(&self, loadings: Array2<F>) -> StatsResult<Array2<F>> {
Ok(loadings)
}
fn quartimax_rotation(&self, loadings: Array2<F>) -> StatsResult<Array2<F>> {
Ok(loadings)
}
fn promax_rotation(&self, loadings: Array2<F>) -> StatsResult<Array2<F>> {
Ok(loadings)
}
pub fn loadings(&self) -> Option<&Array2<F>> {
self.results.as_ref().map(|r| &r.loadings)
}
pub fn communalities(&self) -> Option<&Array1<F>> {
self.results.as_ref().map(|r| &r.communalities)
}
}
#[allow(dead_code)]
pub fn enhanced_pca<F>(
data: &ArrayView2<F>,
n_components: Option<usize>,
algorithm: Option<PCAAlgorithm>,
) -> StatsResult<PCAResult<F>>
where
F: Float
+ Zero
+ One
+ Copy
+ Send
+ Sync
+ SimdUnifiedOps
+ FromPrimitive
+ std::fmt::Display
+ std::iter::Sum
+ ScalarOperand,
{
let algorithm = algorithm.unwrap_or(PCAAlgorithm::SVD);
let config = PCAConfig {
n_components,
..Default::default()
};
let mut pca = EnhancedPCA::new(algorithm, config);
Ok(pca.fit(data)?.clone())
}
#[allow(dead_code)]
pub fn enhanced_factor_analysis<F>(
data: &ArrayView2<F>,
n_factors: usize,
rotation: Option<RotationMethod>,
) -> StatsResult<FactorAnalysisResult<F>>
where
F: Float
+ Zero
+ One
+ Copy
+ Send
+ Sync
+ SimdUnifiedOps
+ FromPrimitive
+ std::fmt::Display
+ std::iter::Sum
+ ScalarOperand,
{
let config = FactorAnalysisConfig {
rotation: rotation.unwrap_or(RotationMethod::Varimax),
..Default::default()
};
let mut fa = EnhancedFactorAnalysis::new(n_factors, config)?;
Ok(fa.fit(data)?.clone())
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
fn make_two_factor_data() -> Array2<f64> {
let n = 200_usize;
let p = 5_usize;
let mut data = Array2::<f64>::zeros((n, p));
for i in 0..n {
let f1 = ((i as f64) * 0.1_f64).sin();
let f2 = ((i as f64) * 0.15_f64).cos();
data[[i, 0]] = 0.8 * f1 + 0.01 * ((i as f64) * 7.0).sin();
data[[i, 1]] = 0.7 * f1 + 0.01 * ((i as f64) * 11.0).cos();
data[[i, 2]] = 0.9 * f1 + 0.01 * ((i as f64) * 13.0).sin();
data[[i, 3]] = 0.75 * f2 + 0.01 * ((i as f64) * 17.0).cos();
data[[i, 4]] = 0.85 * f2 + 0.01 * ((i as f64) * 19.0).sin();
}
data
}
#[test]
fn test_em_factor_analysis_loadings_shape() {
let data = make_two_factor_data();
let config = FactorAnalysisConfig {
max_iter: 200,
tolerance: 1e-6,
rotation: RotationMethod::None,
seed: None,
};
let mut fa = EnhancedFactorAnalysis::<f64>::new(2, config).expect("Failed to create FA");
let result = fa.fit(&data.view()).expect("EM fit failed");
let (n_features, n_factors) = result.loadings.dim();
assert_eq!(n_features, 5, "loadings should have 5 rows (features)");
assert_eq!(n_factors, 2, "loadings should have 2 columns (factors)");
}
#[test]
fn test_em_factor_analysis_uniquenesses_positive() {
let data = make_two_factor_data();
let config = FactorAnalysisConfig {
max_iter: 200,
tolerance: 1e-6,
rotation: RotationMethod::None,
seed: None,
};
let mut fa = EnhancedFactorAnalysis::<f64>::new(2, config).expect("Failed to create FA");
let result = fa.fit(&data.view()).expect("EM fit failed");
for &psi in result.uniquenesses.iter() {
assert!(psi > 0.0, "All uniquenesses must be positive, got {}", psi);
}
}
#[test]
fn test_em_factor_analysis_log_likelihood_present_and_negative() {
let data = make_two_factor_data();
let config = FactorAnalysisConfig {
max_iter: 200,
tolerance: 1e-6,
rotation: RotationMethod::None,
seed: None,
};
let mut fa = EnhancedFactorAnalysis::<f64>::new(2, config).expect("Failed to create FA");
let result = fa.fit(&data.view()).expect("EM fit failed");
let ll = result
.log_likelihood
.expect("log_likelihood must be Some(_)");
assert!(ll.is_finite(), "Log-likelihood must be finite, got {}", ll);
}
#[test]
fn test_em_factor_analysis_scores_shape() {
let data = make_two_factor_data();
let n_samples = data.nrows();
let config = FactorAnalysisConfig {
max_iter: 200,
tolerance: 1e-6,
rotation: RotationMethod::None,
seed: None,
};
let mut fa = EnhancedFactorAnalysis::<f64>::new(2, config).expect("Failed to create FA");
let result = fa.fit(&data.view()).expect("EM fit failed");
let scores = result.scores.as_ref().expect("scores must be Some(_)");
assert_eq!(
scores.dim(),
(n_samples, 2),
"scores must have shape (n_samples, n_factors)"
);
}
#[test]
fn test_em_factor_analysis_convergence() {
let data = make_two_factor_data();
let max_iter = 1000_usize;
let config = FactorAnalysisConfig {
max_iter,
tolerance: 1e-8,
rotation: RotationMethod::None,
seed: None,
};
let mut fa = EnhancedFactorAnalysis::<f64>::new(2, config).expect("Failed to create FA");
let result = fa
.fit(&data.view())
.expect("EM fit converged without error");
let ll = result.log_likelihood.expect("log_likelihood must be Some");
assert!(
ll.is_finite(),
"Log-likelihood should be finite after convergence"
);
}
#[test]
fn test_em_communalities_nonnegative() {
let data = make_two_factor_data();
let config = FactorAnalysisConfig {
max_iter: 200,
tolerance: 1e-6,
rotation: RotationMethod::None,
seed: None,
};
let mut fa = EnhancedFactorAnalysis::<f64>::new(2, config).expect("Failed to create FA");
let result = fa.fit(&data.view()).expect("EM fit failed");
for &h2 in result.communalities.iter() {
assert!(h2 >= 0.0, "Communalities must be non-negative, got {}", h2);
}
}
#[test]
fn test_em_explained_variance_shape() {
let data = make_two_factor_data();
let config = FactorAnalysisConfig {
max_iter: 200,
tolerance: 1e-6,
rotation: RotationMethod::None,
seed: None,
};
let mut fa = EnhancedFactorAnalysis::<f64>::new(2, config).expect("Failed to create FA");
let result = fa.fit(&data.view()).expect("EM fit failed");
assert_eq!(
result.explained_variance.len(),
2,
"explained_variance must have length n_factors"
);
}
#[test]
fn test_em_rejects_too_many_factors() {
let data = make_two_factor_data(); let config = FactorAnalysisConfig::default();
let result = EnhancedFactorAnalysis::<f64>::new(5, config);
if let Ok(mut fa) = result {
assert!(fa.fit(&data.view()).is_err());
}
}
}