use crate::compute::stats::correlation::covariance_matrix;
use crate::compute::stats::descriptive::mean_vertical;
use crate::matrix::{Axis, Matrix, SeriesOps};
pub struct PCA {
pub components: Matrix<f64>, pub mean: Matrix<f64>, }
impl PCA {
pub fn fit(x: &Matrix<f64>, n_components: usize, _iters: usize) -> Self {
let mean = mean_vertical(x); let broadcasted_mean = mean.broadcast_row_to_target_shape(x.rows(), x.cols());
let centered_data = x.zip(&broadcasted_mean, |x_i, mean_i| x_i - mean_i);
let covariance_matrix = covariance_matrix(¢ered_data, Axis::Col);
let mut components = Matrix::zeros(n_components, x.cols());
for i in 0..n_components {
if i < covariance_matrix.rows() {
components.row_copy_from_slice(i, &covariance_matrix.row(i));
} else {
break;
}
}
PCA { components, mean }
}
pub fn transform(&self, x: &Matrix<f64>) -> Matrix<f64> {
let broadcasted_mean = self.mean.broadcast_row_to_target_shape(x.rows(), x.cols());
let centered_data = x.zip(&broadcasted_mean, |x_i, mean_i| x_i - mean_i);
centered_data.matrix_mul(&self.components.transpose())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matrix::Matrix;
const EPSILON: f64 = 1e-8;
#[test]
fn test_pca_basic() {
let data = Matrix::from_rows_vec(vec![1.0, 1.0, 2.0, 2.0, 3.0, 3.0], 3, 2);
let (_n_samples, _n_features) = data.shape();
let pca = PCA::fit(&data, 1, 0);
println!("Data shape: {:?}", data.shape());
println!("PCA mean shape: {:?}", pca.mean.shape());
println!("PCA components shape: {:?}", pca.components.shape());
assert!((pca.mean.get(0, 0) - 2.0).abs() < EPSILON);
assert!((pca.mean.get(0, 1) - 2.0).abs() < EPSILON);
assert!((pca.components.get(0, 0) - 1.0).abs() < EPSILON);
assert!((pca.components.get(0, 1) - 1.0).abs() < EPSILON);
let transformed_data = pca.transform(&data);
assert_eq!(transformed_data.rows(), 3);
assert_eq!(transformed_data.cols(), 1);
assert!((transformed_data.get(0, 0) - -2.0).abs() < EPSILON);
assert!((transformed_data.get(1, 0) - 0.0).abs() < EPSILON);
assert!((transformed_data.get(2, 0) - 2.0).abs() < EPSILON);
}
#[test]
fn test_pca_fit_break_branch() {
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 3, 2);
let (_n_samples, n_features) = data.shape();
let n_components_large = n_features + 1;
let pca = PCA::fit(&data, n_components_large, 0);
assert_eq!(pca.components.rows(), n_components_large);
assert_eq!(pca.components.cols(), n_features);
for i in n_features..n_components_large {
for j in 0..n_features {
assert!((pca.components.get(i, j) - 0.0).abs() < EPSILON);
}
}
}
}