rs_ml/dimensionality_reduction/
pca.rs

1//! Principal Component Analysis
2
3use std::num::NonZeroUsize;
4
5use ndarray::{Array1, Array2, ArrayView2, Axis, ScalarOperand};
6use ndarray_linalg::{Eigh, Lapack};
7use num_traits::{Float, FromPrimitive};
8
9use crate::{transformer::Transformer, Estimator};
10
11/// Estimator to reduce dimension of input data using principal component analysis. returns a
12/// fitted [`PCATransformer`]
13///
14/// ```rust
15/// # use std::error::Error;
16/// # use ndarray::arr2;
17/// # use rs_ml::Estimator;
18/// # use rs_ml::transformer::FitTransform;
19/// # use rs_ml::dimensionality_reduction::pca::{PCAEstimator, PCATransformer};
20/// # use std::num::NonZeroUsize;
21/// # fn main() -> Result<(), Box<dyn Error>> {
22/// let arr = arr2(&[
23///     [0., 1., 3.],
24///     [1., 2., 3.],
25///     [2., 3., 3.]
26/// ]);
27///
28/// let pca_estimator = PCAEstimator::new(NonZeroUsize::new(1).unwrap());
29/// let pca = pca_estimator
30///         .fit_transform(&arr)
31///         .ok_or(Box::<dyn Error>::from("Error fitting data"))?;
32///
33/// assert!(
34///     pca.abs_diff_eq(
35///         &arr2(&[[-f64::sqrt(2.)], [0.], [f64::sqrt(2.)]]),
36///         1e-10
37///     )
38/// );
39/// # Ok(())
40/// # }
41/// ```
42#[derive(Clone, Debug, Copy)]
43pub struct PCAEstimator {
44    dimensions: NonZeroUsize,
45}
46
47impl PCAEstimator {
48    /// define PCA estimator
49    pub fn new(dim: NonZeroUsize) -> PCAEstimator {
50        PCAEstimator { dimensions: dim }
51    }
52}
53
54/// Fitted PCA reduction transformer
55#[derive(Debug, Clone)]
56pub struct PCATransformer<F: Lapack + Clone> {
57    means: Array1<F>,
58    eigen_vectors: Array2<F>,
59}
60
61impl<F: FromPrimitive + ScalarOperand + Float + Lapack<Real = F>> Estimator<Array2<F>>
62    for PCAEstimator
63{
64    type Estimator = PCATransformer<F>;
65
66    fn fit(&self, input: &Array2<F>) -> Option<Self::Estimator> {
67        let ddof = F::from_usize(input.ncols())?;
68        let means: Array1<F> = input.mean_axis(Axis(0))?;
69        let translated: Array2<F> = input - &means;
70        let t: ArrayView2<F> = translated.t();
71        let cov: Array2<F> = t.dot(&translated) / ddof;
72
73        let (eigen_values, eigen_vectors) = cov.eigh(ndarray_linalg::UPLO::Upper).ok()?;
74
75        let mut eigen_ordering: Vec<_> = (0..eigen_values.len()).collect();
76
77        eigen_ordering.sort_by(|i, j| eigen_values[*j].partial_cmp(&eigen_values[*i]).unwrap());
78
79        let indexes: Vec<_> = eigen_ordering
80            .into_iter()
81            .take(self.dimensions.into())
82            .collect();
83
84        let eigen_vec = eigen_vectors.select(Axis(1), &indexes);
85
86        Some(PCATransformer {
87            means,
88            eigen_vectors: eigen_vec.t().to_owned(),
89        })
90    }
91}
92
93impl<F: FromPrimitive + ScalarOperand + Float + ndarray_linalg::Lapack>
94    Transformer<Array2<F>, Array2<F>> for PCATransformer<F>
95{
96    fn transform(&self, input: &Array2<F>) -> Option<Array2<F>> {
97        let binding = input - &self.means;
98        let normalized: ArrayView2<F> = binding.t();
99
100        let pca: Array2<F> = self.eigen_vectors.dot(&normalized);
101        let ret: Array2<F> = pca.t().to_owned();
102
103        Some(ret)
104    }
105}