1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix2};
use ndarray_linalg::{TruncatedOrder, TruncatedSvd};
#[cfg(feature = "serde")]
use serde_crate::{Deserialize, Serialize};
use linfa::{
traits::{Fit, Predict},
DatasetBase, Float,
};
#[cfg_attr(
feature = "serde",
derive(Serialize, Deserialize),
serde(crate = "serde_crate")
)]
pub struct PrincipalComponentAnalysisParams {
embedding_size: usize,
}
impl<'a> Fit<'a, Array2<f64>, ()> for PrincipalComponentAnalysisParams {
type Object = Pca<f64>;
fn fit(&self, dataset: &DatasetBase<Array2<f64>, ()>) -> Pca<f64> {
let mut x = dataset.records().to_owned();
let mean = x.mean_axis(Axis(0)).unwrap();
x -= &mean;
let result = TruncatedSvd::new(x, TruncatedOrder::Largest)
.decompose(self.embedding_size)
.unwrap();
let (_, sigma, v_t) = result.values_vectors();
let explained_variance = sigma.mapv(|x| x * x / (sigma.len() as f64 - 1.0));
Pca {
embedding: v_t,
explained_variance,
mean,
}
}
}
#[cfg_attr(
feature = "serde",
derive(Serialize, Deserialize),
serde(crate = "serde_crate")
)]
#[derive(Debug, Clone)]
pub struct Pca<F> {
embedding: Array2<F>,
explained_variance: Array1<F>,
mean: Array1<F>,
}
impl Pca<f64> {
pub fn params(size: usize) -> PrincipalComponentAnalysisParams {
PrincipalComponentAnalysisParams {
embedding_size: size,
}
}
pub fn explained_variance(&self) -> Array1<f64> {
self.explained_variance.clone()
}
pub fn explained_variance_ratio(&self) -> Array1<f64> {
&self.explained_variance / self.explained_variance.sum()
}
}
impl<F: Float, D: Data<Elem = F>> Predict<ArrayBase<D, Ix2>, Array2<F>> for Pca<F> {
fn predict(&self, x: ArrayBase<D, Ix2>) -> Array2<F> {
(&x - &self.mean).dot(&self.embedding.t())
}
}