rs_ml/dimensionality_reduction/
pca.rs1use std::num::NonZeroUsize;
4
5use ndarray::{Array1, Array2, ArrayView2, Axis, ScalarOperand};
6use ndarray_linalg::{Eigh, Lapack};
7use num_traits::{Float, FromPrimitive};
8
9use crate::{transformer::Transformer, Estimator};
10
11#[derive(Clone, Debug, Copy)]
43pub struct PCAEstimator {
44 dimensions: NonZeroUsize,
45}
46
47impl PCAEstimator {
48 pub fn new(dim: NonZeroUsize) -> PCAEstimator {
50 PCAEstimator { dimensions: dim }
51 }
52}
53
54#[derive(Debug, Clone)]
56pub struct PCATransformer<F: Lapack + Clone> {
57 means: Array1<F>,
58 eigen_vectors: Array2<F>,
59}
60
61impl<F: FromPrimitive + ScalarOperand + Float + Lapack<Real = F>> Estimator<Array2<F>>
62 for PCAEstimator
63{
64 type Estimator = PCATransformer<F>;
65
66 fn fit(&self, input: &Array2<F>) -> Option<Self::Estimator> {
67 let ddof = F::from_usize(input.ncols())?;
68 let means: Array1<F> = input.mean_axis(Axis(0))?;
69 let translated: Array2<F> = input - &means;
70 let t: ArrayView2<F> = translated.t();
71 let cov: Array2<F> = t.dot(&translated) / ddof;
72
73 let (eigen_values, eigen_vectors) = cov.eigh(ndarray_linalg::UPLO::Upper).ok()?;
74
75 let mut eigen_ordering: Vec<_> = (0..eigen_values.len()).collect();
76
77 eigen_ordering.sort_by(|i, j| eigen_values[*j].partial_cmp(&eigen_values[*i]).unwrap());
78
79 let indexes: Vec<_> = eigen_ordering
80 .into_iter()
81 .take(self.dimensions.into())
82 .collect();
83
84 let eigen_vec = eigen_vectors.select(Axis(1), &indexes);
85
86 Some(PCATransformer {
87 means,
88 eigen_vectors: eigen_vec.t().to_owned(),
89 })
90 }
91}
92
93impl<F: FromPrimitive + ScalarOperand + Float + ndarray_linalg::Lapack>
94 Transformer<Array2<F>, Array2<F>> for PCATransformer<F>
95{
96 fn transform(&self, input: &Array2<F>) -> Option<Array2<F>> {
97 let binding = input - &self.means;
98 let normalized: ArrayView2<F> = binding.t();
99
100 let pca: Array2<F> = self.eigen_vectors.dot(&normalized);
101 let ret: Array2<F> = pca.t().to_owned();
102
103 Some(ret)
104 }
105}