rs_ml/dimensionality_reduction/
pca.rs1use std::num::NonZeroUsize;
4
5use ndarray::{Array1, Array2, ArrayView2, Axis, ScalarOperand};
6use ndarray_linalg::{Eigh, Lapack};
7use num_traits::{Float, FromPrimitive};
8
9use crate::{transformer::Transformer, Estimator};
10
11#[derive(Clone, Debug, Copy)]
13pub struct PCAEstimator {
14 dimensions: NonZeroUsize,
15}
16
17impl PCAEstimator {
18 pub fn new(dim: NonZeroUsize) -> PCAEstimator {
20 PCAEstimator { dimensions: dim }
21 }
22}
23
24#[derive(Debug, Clone)]
26pub struct PCATransformer<F: Lapack + Clone> {
27 means: Array1<F>,
28 eigen_vectors: Array2<F>,
29}
30
31impl<F: FromPrimitive + ScalarOperand + Float + Lapack<Real = F>> Estimator<Array2<F>>
32 for PCAEstimator
33{
34 type Estimator = PCATransformer<F>;
35
36 fn fit(&self, input: &Array2<F>) -> Option<Self::Estimator> {
37 let ddof = F::from_usize(input.nrows())?;
38 let means: Array1<F> = input.mean_axis(Axis(1))?;
39 let translated: Array2<F> = input - &means;
40 let t: ArrayView2<F> = translated.t();
41 let cov: Array2<F> = t.dot(&translated) / ddof;
42
43 let (eigen_values, eigen_vectors) = cov.eigh(ndarray_linalg::UPLO::Upper).ok()?;
44
45 let mut eigen_ordering: Vec<_> = (0..eigen_values.len()).collect();
46
47 eigen_ordering.sort_by(|i, j| eigen_values[*i].partial_cmp(&eigen_values[*j]).unwrap());
48
49 let indexes: Vec<_> = eigen_ordering
50 .into_iter()
51 .rev()
52 .take(self.dimensions.into())
53 .collect();
54
55 let eigen_vec = eigen_vectors.select(Axis(1), &indexes);
56
57 Some(PCATransformer {
58 means,
59 eigen_vectors: eigen_vec.t().to_owned(),
60 })
61 }
62}
63
64impl<F: FromPrimitive + ScalarOperand + Float + ndarray_linalg::Lapack>
65 Transformer<Array2<F>, Array2<F>> for PCATransformer<F>
66{
67 fn transform(&self, input: &Array2<F>) -> Option<Array2<F>> {
68 let binding = input - &self.means;
69 let normalized: ArrayView2<F> = binding.t();
70
71 let pca: Array2<F> = self.eigen_vectors.dot(&normalized);
72 let ret: Array2<F> = pca.t().to_owned();
73
74 Some(ret)
75 }
76}