1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
//! # Partial Least Squares
//!
//! `linfa-pls` provides an implementation of methods in the PLS (Partial Least Squares) family.
//! The PLS method is a statistical method that finds a linear relationship between
//! input variables and output variables by projecting them onto a new subspace formed
//! by newly chosen variables (aka latent variables), which are linear
//! combinations of the input variables. The subspace is choosen to maximize the
//! covariance between responses and independant variables.
//!
//! This approach is particularly useful when the original data are characterized by
//! a large number of highly collinear variables measured on a small number of samples.
//!
//! The implementation is a port of the scikit-learn 0.24 cross-decomposition module.
//!
//! ## References
//!
//! * [A survey of Partial Least Squares (PLS) methods, with emphasis on the two-block case JA Wegelin](https://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf)
//! * [Scikit-Learn User Guide](https://scikit-learn.org/stable/modules/cross_decomposition.html#cross-decomposition)
//!
//! ## Example
//!
//! ```rust, ignore
//! use linfa::prelude::*;
//! use linfa_pls::{errors::Result, PlsRegression};
//! use ndarray::array;
//!
//! // Load linnerud datase 20 samples, 3 input features, 3 output features
//! let ds = linnerud();
//!
//! // Fit PLS2 method using 2 principal components (latent variables)
//! let pls = PlsRegression::params(2).fit(&ds)?;
//!
//! // We can either apply the dimension reduction to a dataset
//! let reduced_ds = pls.transform(ds);
//!
//! // ... or predict outputs given a new input sample.
//! let exercices = array![[14., 146., 61.], [6., 80., 60.]];
//! let physio_measures = pls.predict(exercices);
//! ```
mod errors;
mod hyperparams;
mod pls_generic;
pub use pls_generic::*;
mod pls_svd;
mod utils;
use linfa::{traits::Fit, traits::PredictInplace, traits::Transformer, DatasetBase, Float};
use ndarray::{Array2, ArrayBase, Data, Ix2};
pub use errors::*;
pub use hyperparams::*;
pub use pls_svd::*;
macro_rules! pls_algo { ($name:ident) => {
paste::item! {
pub struct [<Pls $name>]<F: Float>(Pls<F>);
impl<F: Float> [<Pls $name>]<F> {
pub fn params(n_components: usize) -> [<Pls $name Params>]<F> {
[<Pls $name Params>]([<Pls $name ValidParams>](Pls::[<$name:lower>](n_components).0))
}
/// Singular vectors of the cross-covariance matrices
pub fn weights(&self) -> (&Array2<F>, &Array2<F>) {
self.0.weights()
}
/// Loadings of records and targets
pub fn loadings(&self) -> (&Array2<F>, &Array2<F>) {
self.0.loadings()
}
/// Projection matrices used to transform records and targets
pub fn rotations(&self) -> (&Array2<F>, &Array2<F>) {
self.0.rotations()
}
/// The coefficients of the linear model such that Y is approximated as Y = X.coefficients
pub fn coefficients(&self) -> &Array2<F> {
self.0.coefficients()
}
/// Transform the given dataset in the projected space back to the original space.
pub fn inverse_transform(
&self,
dataset: DatasetBase<
ArrayBase<impl Data<Elem = F>, Ix2>,
ArrayBase<impl Data<Elem = F>, Ix2>,
>,
) -> DatasetBase<Array2<F>, Array2<F>> {
self.0.inverse_transform(dataset)
}
}
impl<F: Float, D: Data<Elem = F>> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, PlsError>
for [<Pls $name ValidParams>]<F>
{
type Object = [<Pls $name>]<F>;
fn fit(
&self,
dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
) -> Result<Self::Object> {
let pls = self.0.fit(dataset)?;
Ok([<Pls $name>](pls))
}
}
impl<F: Float, D: Data<Elem = F>> Transformer<
DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
DatasetBase<Array2<F>, Array2<F>>,
> for [<Pls $name>]<F>
{
/// Apply dimension reduction to the given dataset
fn transform(
&self,
dataset: DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
) -> DatasetBase<Array2<F>, Array2<F>> {
self.0.transform(dataset)
}
}
impl<F: Float, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array2<F>> for [<Pls $name>]<F> {
/// Given an input matrix `X`, with shape `(n_samples, n_features)`,
/// `predict` returns the target variable according to [<Pls $name>] method
/// learned from the training data distribution.
fn predict_inplace<'a>(&'a self, x: &ArrayBase<D, Ix2>, y: &mut Array2<F>) {
self.0.predict_inplace(x, y);
}
fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array2<F> {
self.0.default_target(x)
}
}
}
}}
pls_algo!(Regression);
pls_algo!(Canonical);
pls_algo!(Cca);
#[cfg(test)]
mod test {
use super::*;
use approx::assert_abs_diff_eq;
use linfa::{traits::Fit, traits::Predict, traits::Transformer};
use linfa_datasets::linnerud;
use ndarray::array;
macro_rules! test_pls_algo {
(Svd) => {
paste::item! {
#[test]
fn [<test_pls_svd>]() -> Result<()> {
let ds = linnerud();
let pls = PlsSvd::<f64>::params(3).fit(&ds)?;
let _ds1 = pls.transform(ds);
Ok(())
}
}
};
($name:ident, $expected:expr) => {
paste::item! {
#[test]
fn [<test_pls_$name:lower>]() -> Result<()> {
let ds = linnerud();
let pls = [<Pls $name>]::<f64>::params(2).fit(&ds)?;
let _ds1 = pls.transform(ds);
let exercices = array![[14., 146., 61.], [6., 80., 60.]];
let physios = pls.predict(exercices);
assert_abs_diff_eq!($expected, physios.targets(), epsilon=1e-2);
Ok(())
}
}
};
}
// Prediction values were checked against scikit-learn 0.24.1
test_pls_algo!(
Canonical,
array![
[180.56979423, 33.29543984, 56.90850758],
[190.854022, 38.91963398, 53.26914489]
]
);
test_pls_algo!(
Regression,
array![
[172.39580643, 34.11919145, 57.15430526],
[192.11167813, 38.05058858, 53.99844922]
]
);
test_pls_algo!(
Cca,
array![
[181.56238421, 34.42502589, 57.31447865],
[205.11767414, 40.23445194, 52.26494323]
]
);
test_pls_algo!(Svd);
#[test]
fn test_one_component_equivalence() -> Result<()> {
// PlsRegression, PlsSvd and PLSCanonical should all be equivalent when n_components is 1
let ds = linnerud();
let regression = PlsRegression::params(1).fit(&ds)?.transform(linnerud());
let canonical = PlsCanonical::params(1).fit(&ds)?.transform(linnerud());
let svd = PlsSvd::<f64>::params(1).fit(&ds)?.transform(linnerud());
assert_abs_diff_eq!(regression.records(), canonical.records(), epsilon = 1e-5);
assert_abs_diff_eq!(svd.records(), canonical.records(), epsilon = 1e-5);
Ok(())
}
}