rs_ml/dimensionality_reduction/
pca.rs

1//! Principal Component Analysis
2
3use std::num::NonZeroUsize;
4
5use ndarray::{Array1, Array2, ArrayView2, Axis, ScalarOperand};
6use ndarray_linalg::{Eigh, Lapack};
7use num_traits::{Float, FromPrimitive};
8
9use crate::{transformer::Transformer, Estimator};
10
11/// Estimator to reduce dimension of a input variable
12#[derive(Clone, Debug, Copy)]
13pub struct PCAEstimator {
14    dimensions: NonZeroUsize,
15}
16
17impl PCAEstimator {
18    /// define PCA estimator
19    pub fn new(dim: NonZeroUsize) -> PCAEstimator {
20        PCAEstimator { dimensions: dim }
21    }
22}
23
24/// Fitted PCA reduction transformer
25#[derive(Debug, Clone)]
26pub struct PCATransformer<F: Lapack + Clone> {
27    means: Array1<F>,
28    eigen_vectors: Array2<F>,
29}
30
31impl<F: FromPrimitive + ScalarOperand + Float + Lapack<Real = F>> Estimator<Array2<F>>
32    for PCAEstimator
33{
34    type Estimator = PCATransformer<F>;
35
36    fn fit(&self, input: &Array2<F>) -> Option<Self::Estimator> {
37        let ddof = F::from_usize(input.nrows())?;
38        let means: Array1<F> = input.mean_axis(Axis(1))?;
39        let translated: Array2<F> = input - &means;
40        let t: ArrayView2<F> = translated.t();
41        let cov: Array2<F> = t.dot(&translated) / ddof;
42
43        let (eigen_values, eigen_vectors) = cov.eigh(ndarray_linalg::UPLO::Upper).ok()?;
44
45        let mut eigen_ordering: Vec<_> = (0..eigen_values.len()).collect();
46
47        eigen_ordering.sort_by(|i, j| eigen_values[*i].partial_cmp(&eigen_values[*j]).unwrap());
48
49        let indexes: Vec<_> = eigen_ordering
50            .into_iter()
51            .rev()
52            .take(self.dimensions.into())
53            .collect();
54
55        let eigen_vec = eigen_vectors.select(Axis(1), &indexes);
56
57        Some(PCATransformer {
58            means,
59            eigen_vectors: eigen_vec.t().to_owned(),
60        })
61    }
62}
63
64impl<F: FromPrimitive + ScalarOperand + Float + ndarray_linalg::Lapack>
65    Transformer<Array2<F>, Array2<F>> for PCATransformer<F>
66{
67    fn transform(&self, input: &Array2<F>) -> Option<Array2<F>> {
68        let binding = input - &self.means;
69        let normalized: ArrayView2<F> = binding.t();
70
71        let pca: Array2<F> = self.eigen_vectors.dot(&normalized);
72        let ret: Array2<F> = pca.t().to_owned();
73
74        Some(ret)
75    }
76}