use ferrolearn_core::backend::Backend;
use ferrolearn_core::backend_faer::NdarrayFaerBackend;
use ferrolearn_core::error::FerroError;
use ferrolearn_core::traits::{Fit, Predict, Transform};
use ndarray::{Array1, Array2};
use num_traits::Float;
use std::any::TypeId;
type SvdResult<F> = Result<(Array2<F>, Array1<F>, Array2<F>), FerroError>;
fn centre_scale<F: Float + Send + Sync + 'static>(
x: &Array2<F>,
scale: bool,
) -> (Array2<F>, Array1<F>, Option<Array1<F>>) {
let (n_samples, n_features) = x.dim();
let n_f = F::from(n_samples).unwrap();
let mean = Array1::from_shape_fn(n_features, |j| {
x.column(j).iter().copied().fold(F::zero(), |a, b| a + b) / n_f
});
let mut xc = x.to_owned();
for mut row in xc.rows_mut() {
for (v, &m) in row.iter_mut().zip(mean.iter()) {
*v = *v - m;
}
}
if scale {
let n_minus_1 = F::from(n_samples.saturating_sub(1).max(1)).unwrap();
let std_dev = Array1::from_shape_fn(n_features, |j| {
let var = xc
.column(j)
.iter()
.copied()
.fold(F::zero(), |a, b| a + b * b)
/ n_minus_1;
let s = var.sqrt();
if s < F::epsilon() { F::one() } else { s }
});
for mut row in xc.rows_mut() {
for (v, &s) in row.iter_mut().zip(std_dev.iter()) {
*v = *v / s;
}
}
(xc, mean, Some(std_dev))
} else {
(xc, mean, None)
}
}
fn apply_centre_scale<F: Float + Send + Sync + 'static>(
x: &Array2<F>,
mean: &Array1<F>,
std_dev: &Option<Array1<F>>,
context: &str,
) -> Result<Array2<F>, FerroError> {
if x.ncols() != mean.len() {
return Err(FerroError::ShapeMismatch {
expected: vec![x.nrows(), mean.len()],
actual: vec![x.nrows(), x.ncols()],
context: context.into(),
});
}
let mut xc = x.to_owned();
for mut row in xc.rows_mut() {
for (v, &m) in row.iter_mut().zip(mean.iter()) {
*v = *v - m;
}
}
if let Some(ref s) = *std_dev {
for mut row in xc.rows_mut() {
for (v, &sd) in row.iter_mut().zip(s.iter()) {
*v = *v / sd;
}
}
}
Ok(xc)
}
fn svd_dispatch<F: Float + Send + Sync + 'static>(a: &Array2<F>) -> SvdResult<F> {
if TypeId::of::<F>() == TypeId::of::<f64>() {
let a_f64: &Array2<f64> = unsafe { &*(std::ptr::from_ref(a).cast::<Array2<f64>>()) };
let (u, s, vt) = NdarrayFaerBackend::svd(a_f64)?;
let k = s.len();
let u_thin = u.slice(ndarray::s![.., ..k]).to_owned();
let vt_thin = vt.slice(ndarray::s![..k, ..]).to_owned();
let u_f: Array2<F> = unsafe { std::mem::transmute_copy::<Array2<f64>, Array2<F>>(&u_thin) };
let s_f: Array1<F> = unsafe { std::mem::transmute_copy::<Array1<f64>, Array1<F>>(&s) };
let vt_f: Array2<F> =
unsafe { std::mem::transmute_copy::<Array2<f64>, Array2<F>>(&vt_thin) };
std::mem::forget(u_thin);
std::mem::forget(s);
std::mem::forget(vt_thin);
Ok((u_f, s_f, vt_f))
} else if TypeId::of::<F>() == TypeId::of::<f32>() {
let (m, n) = a.dim();
let a_f64 =
Array2::<f64>::from_shape_fn((m, n), |(i, j)| a[[i, j]].to_f64().unwrap_or(0.0));
let (u64, s64, vt64) = NdarrayFaerBackend::svd(&a_f64)?;
let k = s64.len();
let u_thin = u64.slice(ndarray::s![.., ..k]).to_owned();
let vt_thin = vt64.slice(ndarray::s![..k, ..]).to_owned();
let u_f =
Array2::<F>::from_shape_fn(u_thin.dim(), |(i, j)| F::from(u_thin[[i, j]]).unwrap());
let s_f = Array1::<F>::from_shape_fn(s64.len(), |i| F::from(s64[i]).unwrap());
let vt_f =
Array2::<F>::from_shape_fn(vt_thin.dim(), |(i, j)| F::from(vt_thin[[i, j]]).unwrap());
Ok((u_f, s_f, vt_f))
} else {
svd_via_eigen(a)
}
}
fn svd_via_eigen<F: Float + Send + Sync + 'static>(a: &Array2<F>) -> SvdResult<F> {
let (m, n) = a.dim();
let k = m.min(n);
let ata = a.t().dot(a);
let max_iter = n * n * 100 + 1000;
let (eigenvalues, eigenvectors) = jacobi_eigen_symmetric(&ata, max_iter)?;
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_by(|&i, &j| {
eigenvalues[j]
.partial_cmp(&eigenvalues[i])
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut s = Array1::<F>::zeros(k);
let mut v = Array2::<F>::zeros((n, k));
for (col, &idx) in indices.iter().take(k).enumerate() {
let eval = eigenvalues[idx];
s[col] = if eval > F::zero() {
eval.sqrt()
} else {
F::zero()
};
for row in 0..n {
v[[row, col]] = eigenvectors[[row, idx]];
}
}
let av = a.dot(&v);
let mut u = Array2::<F>::zeros((m, k));
for col in 0..k {
if s[col] > F::epsilon() {
let inv_s = F::one() / s[col];
for row in 0..m {
u[[row, col]] = av[[row, col]] * inv_s;
}
}
}
let mut vt = Array2::<F>::zeros((k, n));
for i in 0..k {
for j in 0..n {
vt[[i, j]] = v[[j, i]];
}
}
Ok((u, s, vt))
}
fn jacobi_eigen_symmetric<F: Float + Send + Sync + 'static>(
a: &Array2<F>,
max_iter: usize,
) -> Result<(Array1<F>, Array2<F>), FerroError> {
let n = a.nrows();
let mut mat = a.to_owned();
let mut v = Array2::<F>::zeros((n, n));
for i in 0..n {
v[[i, i]] = F::one();
}
let tol = F::from(1e-12).unwrap_or_else(F::epsilon);
for _iteration in 0..max_iter {
let mut max_off = F::zero();
let mut p = 0;
let mut q = 1;
for i in 0..n {
for j in (i + 1)..n {
let val = mat[[i, j]].abs();
if val > max_off {
max_off = val;
p = i;
q = j;
}
}
}
if max_off < tol {
let eigenvalues = Array1::from_shape_fn(n, |i| mat[[i, i]]);
return Ok((eigenvalues, v));
}
let app = mat[[p, p]];
let aqq = mat[[q, q]];
let apq = mat[[p, q]];
let theta = if (app - aqq).abs() < tol {
F::from(std::f64::consts::FRAC_PI_4).unwrap_or_else(F::one)
} else {
let tau = (aqq - app) / (F::from(2.0).unwrap() * apq);
let t = if tau >= F::zero() {
F::one() / (tau.abs() + (F::one() + tau * tau).sqrt())
} else {
-F::one() / (tau.abs() + (F::one() + tau * tau).sqrt())
};
t.atan()
};
let c = theta.cos();
let s = theta.sin();
let mut new_mat = mat.clone();
for i in 0..n {
if i != p && i != q {
let mip = mat[[i, p]];
let miq = mat[[i, q]];
new_mat[[i, p]] = c * mip - s * miq;
new_mat[[p, i]] = new_mat[[i, p]];
new_mat[[i, q]] = s * mip + c * miq;
new_mat[[q, i]] = new_mat[[i, q]];
}
}
new_mat[[p, p]] = c * c * app - F::from(2.0).unwrap() * s * c * apq + s * s * aqq;
new_mat[[q, q]] = s * s * app + F::from(2.0).unwrap() * s * c * apq + c * c * aqq;
new_mat[[p, q]] = F::zero();
new_mat[[q, p]] = F::zero();
mat = new_mat;
for i in 0..n {
let vip = v[[i, p]];
let viq = v[[i, q]];
v[[i, p]] = c * vip - s * viq;
v[[i, q]] = s * vip + c * viq;
}
}
Err(FerroError::ConvergenceFailure {
iterations: max_iter,
message: "Jacobi eigendecomposition did not converge in cross_decomposition SVD fallback"
.into(),
})
}
fn norm<F: Float>(v: &Array1<F>) -> F {
v.iter().copied().fold(F::zero(), |a, b| a + b * b).sqrt()
}
fn dot<F: Float>(a: &Array1<F>, b: &Array1<F>) -> F {
a.iter()
.copied()
.zip(b.iter().copied())
.fold(F::zero(), |acc, (x, y)| acc + x * y)
}
fn invert_square<F: Float + Send + Sync + 'static>(a: &Array2<F>) -> Result<Array2<F>, FerroError> {
let n = a.nrows();
if n != a.ncols() {
return Err(FerroError::ShapeMismatch {
expected: vec![n, n],
actual: vec![a.nrows(), a.ncols()],
context: "invert_square: matrix must be square".into(),
});
}
let mut aug = Array2::<F>::zeros((n, 2 * n));
for i in 0..n {
for j in 0..n {
aug[[i, j]] = a[[i, j]];
}
aug[[i, n + i]] = F::one();
}
let max_abs = a.iter().copied().fold(F::zero(), |m, v| {
let abs = v.abs();
if abs > m { abs } else { m }
});
let regularise_tol = max_abs * F::from(1e-12).unwrap_or_else(F::epsilon)
+ F::from(1e-15).unwrap_or_else(F::epsilon);
for col in 0..n {
let mut max_val = aug[[col, col]].abs();
let mut max_row = col;
for row in (col + 1)..n {
let val = aug[[row, col]].abs();
if val > max_val {
max_val = val;
max_row = row;
}
}
if max_val < regularise_tol {
aug[[col, col]] = regularise_tol;
} else {
if max_row != col {
for j in 0..(2 * n) {
let tmp = aug[[col, j]];
aug[[col, j]] = aug[[max_row, j]];
aug[[max_row, j]] = tmp;
}
}
}
let pivot = aug[[col, col]];
for row in (col + 1)..n {
let factor = aug[[row, col]] / pivot;
for j in col..(2 * n) {
let above = aug[[col, j]];
aug[[row, j]] = aug[[row, j]] - factor * above;
}
}
}
for col in (0..n).rev() {
let pivot = aug[[col, col]];
for j in 0..(2 * n) {
aug[[col, j]] = aug[[col, j]] / pivot;
}
for row in 0..col {
let factor = aug[[row, col]];
for j in 0..(2 * n) {
let below = aug[[col, j]];
aug[[row, j]] = aug[[row, j]] - factor * below;
}
}
}
let mut inv = Array2::<F>::zeros((n, n));
for i in 0..n {
for j in 0..n {
inv[[i, j]] = aug[[i, n + j]];
}
}
Ok(inv)
}
#[derive(Debug, Clone)]
pub struct PLSSVD<F> {
n_components: usize,
scale: bool,
_marker: std::marker::PhantomData<F>,
}
impl<F: Float + Send + Sync + 'static> PLSSVD<F> {
#[must_use]
pub fn new(n_components: usize) -> Self {
Self {
n_components,
scale: true,
_marker: std::marker::PhantomData,
}
}
#[must_use]
pub fn with_scale(mut self, scale: bool) -> Self {
self.scale = scale;
self
}
#[must_use]
pub fn n_components(&self) -> usize {
self.n_components
}
}
#[derive(Debug, Clone)]
pub struct FittedPLSSVD<F> {
x_weights_: Array2<F>,
y_weights_: Array2<F>,
x_mean_: Array1<F>,
y_mean_: Array1<F>,
x_std_: Option<Array1<F>>,
y_std_: Option<Array1<F>>,
}
impl<F: Float + Send + Sync + 'static> FittedPLSSVD<F> {
#[must_use]
pub fn x_weights(&self) -> &Array2<F> {
&self.x_weights_
}
#[must_use]
pub fn y_weights(&self) -> &Array2<F> {
&self.y_weights_
}
#[must_use]
pub fn x_mean(&self) -> &Array1<F> {
&self.x_mean_
}
#[must_use]
pub fn y_mean(&self) -> &Array1<F> {
&self.y_mean_
}
pub fn transform_y(&self, y: &Array2<F>) -> Result<Array2<F>, FerroError> {
let yc = apply_centre_scale(y, &self.y_mean_, &self.y_std_, "FittedPLSSVD::transform_y")?;
Ok(yc.dot(&self.y_weights_))
}
}
impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array2<F>> for PLSSVD<F> {
type Fitted = FittedPLSSVD<F>;
type Error = FerroError;
fn fit(&self, x: &Array2<F>, y: &Array2<F>) -> Result<FittedPLSSVD<F>, FerroError> {
let (n_samples_x, n_features_x) = x.dim();
let (n_samples_y, n_features_y) = y.dim();
if n_samples_x != n_samples_y {
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples_x, n_features_y],
actual: vec![n_samples_y, n_features_y],
context: "PLSSVD::fit: X and Y must have the same number of rows".into(),
});
}
if self.n_components == 0 {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: "must be at least 1".into(),
});
}
let max_components = n_features_x.min(n_features_y);
if self.n_components > max_components {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: format!(
"n_components ({}) exceeds min(n_features_x, n_features_y) ({})",
self.n_components, max_components
),
});
}
if n_samples_x < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: n_samples_x,
context: "PLSSVD::fit requires at least 2 samples".into(),
});
}
let (xc, x_mean, x_std) = centre_scale(x, self.scale);
let (yc, y_mean, y_std) = centre_scale(y, self.scale);
let c = xc.t().dot(&yc);
let (u, _s, vt) = svd_dispatch(&c)?;
let nc = self.n_components;
let x_weights = u.slice(ndarray::s![.., ..nc]).to_owned();
let y_weights = vt.t().slice(ndarray::s![.., ..nc]).to_owned();
Ok(FittedPLSSVD {
x_weights_: x_weights,
y_weights_: y_weights,
x_mean_: x_mean,
y_mean_: y_mean,
x_std_: x_std,
y_std_: y_std,
})
}
}
impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedPLSSVD<F> {
type Output = Array2<F>;
type Error = FerroError;
fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
let xc = apply_centre_scale(x, &self.x_mean_, &self.x_std_, "FittedPLSSVD::transform")?;
Ok(xc.dot(&self.x_weights_))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum NipalsMode {
Regression,
Canonical,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ScoreNorm {
None,
UnitVariance,
}
#[derive(Debug, Clone)]
struct NipalsResult<F> {
x_weights: Array2<F>,
x_loadings: Array2<F>,
x_scores: Array2<F>,
y_loadings: Array2<F>,
y_scores: Array2<F>,
n_iter: Vec<usize>,
}
fn nipals<F: Float + Send + Sync + 'static>(
x: &Array2<F>,
y: &Array2<F>,
n_components: usize,
max_iter: usize,
tol: F,
mode: NipalsMode,
score_norm: ScoreNorm,
) -> Result<NipalsResult<F>, FerroError> {
let (n_samples, n_features_x) = x.dim();
let n_features_y = y.ncols();
let mut xk = x.to_owned();
let mut yk = y.to_owned();
let mut x_weights = Array2::<F>::zeros((n_features_x, n_components));
let mut x_loadings = Array2::<F>::zeros((n_features_x, n_components));
let mut x_scores = Array2::<F>::zeros((n_samples, n_components));
let mut y_loadings = Array2::<F>::zeros((n_features_y, n_components));
let mut y_scores = Array2::<F>::zeros((n_samples, n_components));
let mut n_iter_vec = Vec::with_capacity(n_components);
for k in 0..n_components {
let best_col = (0..n_features_y)
.max_by(|&a, &b| {
let var_a: F = yk
.column(a)
.iter()
.copied()
.fold(F::zero(), |s, v| s + v * v);
let var_b: F = yk
.column(b)
.iter()
.copied()
.fold(F::zero(), |s, v| s + v * v);
var_a
.partial_cmp(&var_b)
.unwrap_or(std::cmp::Ordering::Equal)
})
.unwrap_or(0);
let mut u = yk.column(best_col).to_owned();
let mut converged = false;
let mut iters = 0;
for iteration in 0..max_iter {
iters = iteration + 1;
let utu = dot(&u, &u);
let mut w = xk.t().dot(&u);
if utu > F::epsilon() {
w.mapv_inplace(|v| v / utu);
}
let w_norm = norm(&w);
if w_norm < F::epsilon() {
break;
}
w.mapv_inplace(|v| v / w_norm);
let t = xk.dot(&w);
let ttt = dot(&t, &t);
let mut q = yk.t().dot(&t);
if ttt > F::epsilon() {
q.mapv_inplace(|v| v / ttt);
}
if score_norm == ScoreNorm::UnitVariance {
let q_norm = norm(&q);
if q_norm > F::epsilon() {
q.mapv_inplace(|v| v / q_norm);
}
}
let qtq = dot(&q, &q);
let mut u_new = yk.dot(&q);
if qtq > F::epsilon() {
u_new.mapv_inplace(|v| v / qtq);
}
let diff_norm = {
let diff: Array1<F> = &u_new - &u;
norm(&diff)
};
let u_new_norm = norm(&u_new);
u = u_new;
if u_new_norm > F::epsilon() && diff_norm / u_new_norm < tol {
converged = true;
let utu2 = dot(&u, &u);
w = xk.t().dot(&u);
if utu2 > F::epsilon() {
w.mapv_inplace(|v| v / utu2);
}
let w_norm2 = norm(&w);
if w_norm2 > F::epsilon() {
w.mapv_inplace(|v| v / w_norm2);
}
let t_final = xk.dot(&w);
let ttt2 = dot(&t_final, &t_final);
q = yk.t().dot(&t_final);
if ttt2 > F::epsilon() {
q.mapv_inplace(|v| v / ttt2);
}
if score_norm == ScoreNorm::UnitVariance {
let q_norm2 = norm(&q);
if q_norm2 > F::epsilon() {
q.mapv_inplace(|v| v / q_norm2);
}
}
let qtq2 = dot(&q, &q);
u = yk.dot(&q);
if qtq2 > F::epsilon() {
u.mapv_inplace(|v| v / qtq2);
}
break;
}
}
let utu_final = dot(&u, &u);
let mut w_final = xk.t().dot(&u);
if utu_final > F::epsilon() {
w_final.mapv_inplace(|v| v / utu_final);
}
let w_norm_final = norm(&w_final);
if w_norm_final > F::epsilon() {
w_final.mapv_inplace(|v| v / w_norm_final);
}
let mut t_final = xk.dot(&w_final);
let ttt_final = dot(&t_final, &t_final);
let mut p = xk.t().dot(&t_final);
if ttt_final > F::epsilon() {
p.mapv_inplace(|v| v / ttt_final);
}
let mut q_final = yk.t().dot(&t_final);
if ttt_final > F::epsilon() {
q_final.mapv_inplace(|v| v / ttt_final);
}
if score_norm == ScoreNorm::UnitVariance {
let q_norm = norm(&q_final);
if q_norm > F::epsilon() {
q_final.mapv_inplace(|v| v / q_norm);
}
}
let qtq_final = dot(&q_final, &q_final);
let mut u_final = yk.dot(&q_final);
if qtq_final > F::epsilon() {
u_final.mapv_inplace(|v| v / qtq_final);
}
if score_norm == ScoreNorm::UnitVariance {
let t_std = {
let t_mean = t_final.iter().copied().fold(F::zero(), |a, b| a + b)
/ F::from(n_samples).unwrap();
let var = t_final
.iter()
.copied()
.fold(F::zero(), |a, b| a + (b - t_mean) * (b - t_mean))
/ F::from(n_samples.saturating_sub(1).max(1)).unwrap();
var.sqrt()
};
if t_std > F::epsilon() {
t_final.mapv_inplace(|v| v / t_std);
}
let u_std = {
let u_mean = u_final.iter().copied().fold(F::zero(), |a, b| a + b)
/ F::from(n_samples).unwrap();
let var = u_final
.iter()
.copied()
.fold(F::zero(), |a, b| a + (b - u_mean) * (b - u_mean))
/ F::from(n_samples.saturating_sub(1).max(1)).unwrap();
var.sqrt()
};
if u_std > F::epsilon() {
u_final.mapv_inplace(|v| v / u_std);
}
}
x_weights.column_mut(k).assign(&w_final);
x_loadings.column_mut(k).assign(&p);
x_scores.column_mut(k).assign(&t_final);
y_loadings.column_mut(k).assign(&q_final);
y_scores.column_mut(k).assign(&u_final);
for i in 0..n_samples {
let ti = t_final[i];
for j in 0..n_features_x {
xk[[i, j]] = xk[[i, j]] - ti * p[j];
}
}
match mode {
NipalsMode::Regression => {
for i in 0..n_samples {
let ti = t_final[i];
for j in 0..n_features_y {
yk[[i, j]] = yk[[i, j]] - ti * q_final[j];
}
}
}
NipalsMode::Canonical => {
let utu_c = dot(&u_final, &u_final);
let mut c = yk.t().dot(&u_final);
if utu_c > F::epsilon() {
c.mapv_inplace(|v| v / utu_c);
}
for i in 0..n_samples {
let ui = u_final[i];
for j in 0..n_features_y {
yk[[i, j]] = yk[[i, j]] - ui * c[j];
}
}
}
}
n_iter_vec.push(iters);
if !converged && n_features_y > 1 && iters == max_iter {
return Err(FerroError::ConvergenceFailure {
iterations: max_iter,
message: format!("NIPALS did not converge for component {k}"),
});
}
}
Ok(NipalsResult {
x_weights,
x_loadings,
x_scores,
y_loadings,
y_scores,
n_iter: n_iter_vec,
})
}
#[derive(Debug, Clone)]
pub struct PLSRegression<F> {
n_components: usize,
max_iter: usize,
tol: F,
scale: bool,
_marker: std::marker::PhantomData<F>,
}
impl<F: Float + Send + Sync + 'static> PLSRegression<F> {
#[must_use]
pub fn new(n_components: usize) -> Self {
Self {
n_components,
max_iter: 500,
tol: F::from(1e-6).unwrap_or_else(F::epsilon),
scale: true,
_marker: std::marker::PhantomData,
}
}
#[must_use]
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
#[must_use]
pub fn with_tol(mut self, tol: F) -> Self {
self.tol = tol;
self
}
#[must_use]
pub fn with_scale(mut self, scale: bool) -> Self {
self.scale = scale;
self
}
#[must_use]
pub fn n_components(&self) -> usize {
self.n_components
}
}
#[derive(Debug, Clone)]
pub struct FittedPLSRegression<F> {
x_weights_: Array2<F>,
x_loadings_: Array2<F>,
y_loadings_: Array2<F>,
coefficients_: Array2<F>,
x_scores_: Array2<F>,
y_scores_: Array2<F>,
n_iter_: Vec<usize>,
x_mean_: Array1<F>,
y_mean_: Array1<F>,
x_std_: Option<Array1<F>>,
y_std_: Option<Array1<F>>,
}
impl<F: Float + Send + Sync + 'static> FittedPLSRegression<F> {
#[must_use]
pub fn x_weights(&self) -> &Array2<F> {
&self.x_weights_
}
#[must_use]
pub fn x_loadings(&self) -> &Array2<F> {
&self.x_loadings_
}
#[must_use]
pub fn y_loadings(&self) -> &Array2<F> {
&self.y_loadings_
}
#[must_use]
pub fn coefficients(&self) -> &Array2<F> {
&self.coefficients_
}
#[must_use]
pub fn x_scores(&self) -> &Array2<F> {
&self.x_scores_
}
#[must_use]
pub fn y_scores(&self) -> &Array2<F> {
&self.y_scores_
}
#[must_use]
pub fn n_iter(&self) -> &[usize] {
&self.n_iter_
}
}
impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array2<F>> for PLSRegression<F> {
type Fitted = FittedPLSRegression<F>;
type Error = FerroError;
fn fit(&self, x: &Array2<F>, y: &Array2<F>) -> Result<FittedPLSRegression<F>, FerroError> {
let (n_samples_x, n_features_x) = x.dim();
let (n_samples_y, n_features_y) = y.dim();
if n_samples_x != n_samples_y {
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples_x, n_features_y],
actual: vec![n_samples_y, n_features_y],
context: "PLSRegression::fit: X and Y must have the same number of rows".into(),
});
}
if self.n_components == 0 {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: "must be at least 1".into(),
});
}
let max_components = n_features_x.min(n_features_y).min(n_samples_x);
if self.n_components > max_components {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: format!(
"n_components ({}) exceeds min(n_features_x, n_features_y, n_samples) ({})",
self.n_components, max_components
),
});
}
if n_samples_x < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: n_samples_x,
context: "PLSRegression::fit requires at least 2 samples".into(),
});
}
let (xc, x_mean, x_std) = centre_scale(x, self.scale);
let (yc, y_mean, y_std) = centre_scale(y, self.scale);
let result = nipals(
&xc,
&yc,
self.n_components,
self.max_iter,
self.tol,
NipalsMode::Regression,
ScoreNorm::None,
)?;
let ptw = result.x_loadings.t().dot(&result.x_weights);
let ptw_inv = invert_square(&ptw)?;
let coefficients = result.x_weights.dot(&ptw_inv).dot(&result.y_loadings.t());
Ok(FittedPLSRegression {
x_weights_: result.x_weights,
x_loadings_: result.x_loadings,
y_loadings_: result.y_loadings,
coefficients_: coefficients,
x_scores_: result.x_scores,
y_scores_: result.y_scores,
n_iter_: result.n_iter,
x_mean_: x_mean,
y_mean_: y_mean,
x_std_: x_std,
y_std_: y_std,
})
}
}
impl<F: Float + Send + Sync + 'static> Predict<Array2<F>> for FittedPLSRegression<F> {
type Output = Array2<F>;
type Error = FerroError;
fn predict(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
let xc = apply_centre_scale(
x,
&self.x_mean_,
&self.x_std_,
"FittedPLSRegression::predict",
)?;
let mut y_pred = xc.dot(&self.coefficients_);
if let Some(ref ys) = self.y_std_ {
for mut row in y_pred.rows_mut() {
for (v, &s) in row.iter_mut().zip(ys.iter()) {
*v = *v * s;
}
}
}
for mut row in y_pred.rows_mut() {
for (v, &m) in row.iter_mut().zip(self.y_mean_.iter()) {
*v = *v + m;
}
}
Ok(y_pred)
}
}
impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedPLSRegression<F> {
type Output = Array2<F>;
type Error = FerroError;
fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
let xc = apply_centre_scale(
x,
&self.x_mean_,
&self.x_std_,
"FittedPLSRegression::transform",
)?;
let ptw = self.x_loadings_.t().dot(&self.x_weights_);
let ptw_inv = invert_square(&ptw)?;
let rotation = self.x_weights_.dot(&ptw_inv);
Ok(xc.dot(&rotation))
}
}
#[derive(Debug, Clone)]
pub struct PLSCanonical<F> {
n_components: usize,
max_iter: usize,
tol: F,
scale: bool,
_marker: std::marker::PhantomData<F>,
}
impl<F: Float + Send + Sync + 'static> PLSCanonical<F> {
#[must_use]
pub fn new(n_components: usize) -> Self {
Self {
n_components,
max_iter: 500,
tol: F::from(1e-6).unwrap_or_else(F::epsilon),
scale: true,
_marker: std::marker::PhantomData,
}
}
#[must_use]
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
#[must_use]
pub fn with_tol(mut self, tol: F) -> Self {
self.tol = tol;
self
}
#[must_use]
pub fn with_scale(mut self, scale: bool) -> Self {
self.scale = scale;
self
}
#[must_use]
pub fn n_components(&self) -> usize {
self.n_components
}
}
#[derive(Debug, Clone)]
pub struct FittedPLSCanonical<F> {
x_weights_: Array2<F>,
x_loadings_: Array2<F>,
y_loadings_: Array2<F>,
x_scores_: Array2<F>,
y_scores_: Array2<F>,
n_iter_: Vec<usize>,
x_mean_: Array1<F>,
y_mean_: Array1<F>,
x_std_: Option<Array1<F>>,
y_std_: Option<Array1<F>>,
}
impl<F: Float + Send + Sync + 'static> FittedPLSCanonical<F> {
#[must_use]
pub fn x_weights(&self) -> &Array2<F> {
&self.x_weights_
}
#[must_use]
pub fn x_loadings(&self) -> &Array2<F> {
&self.x_loadings_
}
#[must_use]
pub fn y_loadings(&self) -> &Array2<F> {
&self.y_loadings_
}
#[must_use]
pub fn x_scores(&self) -> &Array2<F> {
&self.x_scores_
}
#[must_use]
pub fn y_scores(&self) -> &Array2<F> {
&self.y_scores_
}
#[must_use]
pub fn n_iter(&self) -> &[usize] {
&self.n_iter_
}
pub fn transform_y(&self, y: &Array2<F>) -> Result<Array2<F>, FerroError> {
let yc = apply_centre_scale(
y,
&self.y_mean_,
&self.y_std_,
"FittedPLSCanonical::transform_y",
)?;
Ok(yc.dot(&self.y_loadings_))
}
}
impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array2<F>> for PLSCanonical<F> {
type Fitted = FittedPLSCanonical<F>;
type Error = FerroError;
fn fit(&self, x: &Array2<F>, y: &Array2<F>) -> Result<FittedPLSCanonical<F>, FerroError> {
let (n_samples_x, n_features_x) = x.dim();
let (n_samples_y, n_features_y) = y.dim();
if n_samples_x != n_samples_y {
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples_x, n_features_y],
actual: vec![n_samples_y, n_features_y],
context: "PLSCanonical::fit: X and Y must have the same number of rows".into(),
});
}
if self.n_components == 0 {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: "must be at least 1".into(),
});
}
let max_components = n_features_x.min(n_features_y).min(n_samples_x);
if self.n_components > max_components {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: format!(
"n_components ({}) exceeds min(n_features_x, n_features_y, n_samples) ({})",
self.n_components, max_components
),
});
}
if n_samples_x < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: n_samples_x,
context: "PLSCanonical::fit requires at least 2 samples".into(),
});
}
let (xc, x_mean, x_std) = centre_scale(x, self.scale);
let (yc, y_mean, y_std) = centre_scale(y, self.scale);
let result = nipals(
&xc,
&yc,
self.n_components,
self.max_iter,
self.tol,
NipalsMode::Canonical,
ScoreNorm::None,
)?;
Ok(FittedPLSCanonical {
x_weights_: result.x_weights,
x_loadings_: result.x_loadings,
y_loadings_: result.y_loadings,
x_scores_: result.x_scores,
y_scores_: result.y_scores,
n_iter_: result.n_iter,
x_mean_: x_mean,
y_mean_: y_mean,
x_std_: x_std,
y_std_: y_std,
})
}
}
impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedPLSCanonical<F> {
type Output = Array2<F>;
type Error = FerroError;
fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
let xc = apply_centre_scale(
x,
&self.x_mean_,
&self.x_std_,
"FittedPLSCanonical::transform",
)?;
let ptw = self.x_loadings_.t().dot(&self.x_weights_);
let ptw_inv = invert_square(&ptw)?;
let rotation = self.x_weights_.dot(&ptw_inv);
Ok(xc.dot(&rotation))
}
}
#[derive(Debug, Clone)]
pub struct CCA<F> {
n_components: usize,
max_iter: usize,
tol: F,
scale: bool,
_marker: std::marker::PhantomData<F>,
}
impl<F: Float + Send + Sync + 'static> CCA<F> {
#[must_use]
pub fn new(n_components: usize) -> Self {
Self {
n_components,
max_iter: 500,
tol: F::from(1e-6).unwrap_or_else(F::epsilon),
scale: true,
_marker: std::marker::PhantomData,
}
}
#[must_use]
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
#[must_use]
pub fn with_tol(mut self, tol: F) -> Self {
self.tol = tol;
self
}
#[must_use]
pub fn with_scale(mut self, scale: bool) -> Self {
self.scale = scale;
self
}
#[must_use]
pub fn n_components(&self) -> usize {
self.n_components
}
}
#[derive(Debug, Clone)]
pub struct FittedCCA<F> {
x_weights_: Array2<F>,
x_loadings_: Array2<F>,
y_loadings_: Array2<F>,
x_scores_: Array2<F>,
y_scores_: Array2<F>,
n_iter_: Vec<usize>,
x_mean_: Array1<F>,
y_mean_: Array1<F>,
x_std_: Option<Array1<F>>,
y_std_: Option<Array1<F>>,
}
impl<F: Float + Send + Sync + 'static> FittedCCA<F> {
#[must_use]
pub fn x_weights(&self) -> &Array2<F> {
&self.x_weights_
}
#[must_use]
pub fn x_loadings(&self) -> &Array2<F> {
&self.x_loadings_
}
#[must_use]
pub fn y_loadings(&self) -> &Array2<F> {
&self.y_loadings_
}
#[must_use]
pub fn x_scores(&self) -> &Array2<F> {
&self.x_scores_
}
#[must_use]
pub fn y_scores(&self) -> &Array2<F> {
&self.y_scores_
}
#[must_use]
pub fn n_iter(&self) -> &[usize] {
&self.n_iter_
}
pub fn transform_y(&self, y: &Array2<F>) -> Result<Array2<F>, FerroError> {
let yc = apply_centre_scale(y, &self.y_mean_, &self.y_std_, "FittedCCA::transform_y")?;
Ok(yc.dot(&self.y_loadings_))
}
}
impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, Array2<F>> for CCA<F> {
type Fitted = FittedCCA<F>;
type Error = FerroError;
fn fit(&self, x: &Array2<F>, y: &Array2<F>) -> Result<FittedCCA<F>, FerroError> {
let (n_samples_x, n_features_x) = x.dim();
let (n_samples_y, n_features_y) = y.dim();
if n_samples_x != n_samples_y {
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples_x, n_features_y],
actual: vec![n_samples_y, n_features_y],
context: "CCA::fit: X and Y must have the same number of rows".into(),
});
}
if self.n_components == 0 {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: "must be at least 1".into(),
});
}
let max_components = n_features_x.min(n_features_y).min(n_samples_x);
if self.n_components > max_components {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: format!(
"n_components ({}) exceeds min(n_features_x, n_features_y, n_samples) ({})",
self.n_components, max_components
),
});
}
if n_samples_x < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: n_samples_x,
context: "CCA::fit requires at least 2 samples".into(),
});
}
let (xc, x_mean, x_std) = centre_scale(x, self.scale);
let (yc, y_mean, y_std) = centre_scale(y, self.scale);
let result = nipals(
&xc,
&yc,
self.n_components,
self.max_iter,
self.tol,
NipalsMode::Canonical,
ScoreNorm::UnitVariance,
)?;
Ok(FittedCCA {
x_weights_: result.x_weights,
x_loadings_: result.x_loadings,
y_loadings_: result.y_loadings,
x_scores_: result.x_scores,
y_scores_: result.y_scores,
n_iter_: result.n_iter,
x_mean_: x_mean,
y_mean_: y_mean,
x_std_: x_std,
y_std_: y_std,
})
}
}
impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedCCA<F> {
type Output = Array2<F>;
type Error = FerroError;
fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
let xc = apply_centre_scale(x, &self.x_mean_, &self.x_std_, "FittedCCA::transform")?;
let ptw = self.x_loadings_.t().dot(&self.x_weights_);
let ptw_inv = invert_square(&ptw)?;
let rotation = self.x_weights_.dot(&ptw_inv);
Ok(xc.dot(&rotation))
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use ndarray::array;
#[test]
fn test_plssvd_basic_fit_transform() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
let svd = PLSSVD::<f64>::new(1);
let fitted = svd.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.dim(), (5, 1));
}
#[test]
fn test_plssvd_two_components() {
let x = array![
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0],
[10.0, 11.0, 12.0],
];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0]];
let svd = PLSSVD::<f64>::new(2);
let fitted = svd.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.dim(), (4, 2));
}
#[test]
fn test_plssvd_transform_y() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0]];
let svd = PLSSVD::<f64>::new(1);
let fitted = svd.fit(&x, &y).unwrap();
let y_scores = fitted.transform_y(&y).unwrap();
assert_eq!(y_scores.ncols(), 1);
}
#[test]
fn test_plssvd_no_scale() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
let svd = PLSSVD::<f64>::new(1).with_scale(false);
let fitted = svd.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.ncols(), 1);
}
#[test]
fn test_plssvd_x_weights_shape() {
let x = array![
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0],
[10.0, 11.0, 12.0],
];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0]];
let svd = PLSSVD::<f64>::new(2);
let fitted = svd.fit(&x, &y).unwrap();
assert_eq!(fitted.x_weights().dim(), (3, 2));
assert_eq!(fitted.y_weights().dim(), (2, 2));
}
#[test]
fn test_plssvd_invalid_zero_components() {
let x = array![[1.0, 2.0], [3.0, 4.0]];
let y = array![[1.0], [2.0]];
let svd = PLSSVD::<f64>::new(0);
assert!(svd.fit(&x, &y).is_err());
}
#[test]
fn test_plssvd_too_many_components() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0], [3.0]];
let svd = PLSSVD::<f64>::new(2);
assert!(svd.fit(&x, &y).is_err());
}
#[test]
fn test_plssvd_row_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0]];
let svd = PLSSVD::<f64>::new(1);
assert!(svd.fit(&x, &y).is_err());
}
#[test]
fn test_plssvd_insufficient_samples() {
let x = array![[1.0, 2.0]];
let y = array![[1.0]];
let svd = PLSSVD::<f64>::new(1);
assert!(svd.fit(&x, &y).is_err());
}
#[test]
fn test_plssvd_transform_shape_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0], [3.0]];
let svd = PLSSVD::<f64>::new(1);
let fitted = svd.fit(&x, &y).unwrap();
let x_bad = array![[1.0, 2.0, 3.0]];
assert!(fitted.transform(&x_bad).is_err());
}
#[test]
fn test_plssvd_n_components_getter() {
let svd = PLSSVD::<f64>::new(3);
assert_eq!(svd.n_components(), 3);
}
#[test]
fn test_plssvd_f32() {
let x: Array2<f32> = array![
[1.0f32, 2.0],
[3.0, 4.0],
[5.0, 6.0],
[7.0, 8.0],
[9.0, 10.0],
];
let y: Array2<f32> = array![[1.0f32], [2.0], [3.0], [4.0], [5.0]];
let svd = PLSSVD::<f32>::new(1);
let fitted = svd.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.ncols(), 1);
}
#[test]
fn test_plsregression_basic_fit_predict() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
let pls = PLSRegression::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let y_pred = fitted.predict(&x).unwrap();
assert_eq!(y_pred.dim(), (5, 1));
}
#[test]
fn test_plsregression_prediction_quality() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[3.0], [7.0], [11.0], [15.0], [19.0]];
let pls = PLSRegression::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let y_pred = fitted.predict(&x).unwrap();
for (pred, actual) in y_pred.column(0).iter().zip(y.column(0).iter()) {
assert_abs_diff_eq!(pred, actual, epsilon = 1e-6);
}
}
#[test]
fn test_plsregression_multi_target() {
let x = array![
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0],
[10.0, 11.0, 12.0],
[13.0, 14.0, 15.0],
];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0], [5.0, 2.5],];
let pls = PLSRegression::<f64>::new(2);
let fitted = pls.fit(&x, &y).unwrap();
let y_pred = fitted.predict(&x).unwrap();
assert_eq!(y_pred.dim(), (5, 2));
}
#[test]
fn test_plsregression_transform() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
let pls = PLSRegression::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.dim(), (5, 1));
}
#[test]
fn test_plsregression_coefficients_shape() {
let x = array![
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0],
[10.0, 11.0, 12.0],
];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0]];
let pls = PLSRegression::<f64>::new(2);
let fitted = pls.fit(&x, &y).unwrap();
assert_eq!(fitted.coefficients().dim(), (3, 2));
}
#[test]
fn test_plsregression_no_scale() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
let pls = PLSRegression::<f64>::new(1).with_scale(false);
let fitted = pls.fit(&x, &y).unwrap();
let y_pred = fitted.predict(&x).unwrap();
assert_eq!(y_pred.dim(), (5, 1));
}
#[test]
fn test_plsregression_builder() {
let pls = PLSRegression::<f64>::new(2)
.with_max_iter(1000)
.with_tol(1e-8)
.with_scale(false);
assert_eq!(pls.n_components(), 2);
}
#[test]
fn test_plsregression_invalid_zero_components() {
let x = array![[1.0, 2.0], [3.0, 4.0]];
let y = array![[1.0], [2.0]];
let pls = PLSRegression::<f64>::new(0);
assert!(pls.fit(&x, &y).is_err());
}
#[test]
fn test_plsregression_too_many_components() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0], [3.0]];
let pls = PLSRegression::<f64>::new(2);
assert!(pls.fit(&x, &y).is_err());
}
#[test]
fn test_plsregression_row_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0]];
let pls = PLSRegression::<f64>::new(1);
assert!(pls.fit(&x, &y).is_err());
}
#[test]
fn test_plsregression_insufficient_samples() {
let x = array![[1.0, 2.0]];
let y = array![[1.0]];
let pls = PLSRegression::<f64>::new(1);
assert!(pls.fit(&x, &y).is_err());
}
#[test]
fn test_plsregression_predict_shape_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0], [3.0]];
let pls = PLSRegression::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let x_bad = array![[1.0, 2.0, 3.0]];
assert!(fitted.predict(&x_bad).is_err());
}
#[test]
fn test_plsregression_transform_shape_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0], [3.0]];
let pls = PLSRegression::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let x_bad = array![[1.0, 2.0, 3.0]];
assert!(fitted.transform(&x_bad).is_err());
}
#[test]
fn test_plsregression_x_scores_shape() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]];
let y = array![[1.0], [2.0], [3.0], [4.0]];
let pls = PLSRegression::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
assert_eq!(fitted.x_scores().dim(), (4, 1));
assert_eq!(fitted.y_scores().dim(), (4, 1));
assert_eq!(fitted.n_iter().len(), 1);
}
#[test]
fn test_plsregression_f32() {
let x: Array2<f32> = array![
[1.0f32, 2.0],
[3.0, 4.0],
[5.0, 6.0],
[7.0, 8.0],
[9.0, 10.0],
];
let y: Array2<f32> = array![[1.0f32], [2.0], [3.0], [4.0], [5.0]];
let pls = PLSRegression::<f32>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let y_pred = fitted.predict(&x).unwrap();
assert_eq!(y_pred.ncols(), 1);
}
#[test]
fn test_plscanonical_basic_fit_transform() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0], [5.0, 2.5],];
let pls = PLSCanonical::<f64>::new(2);
let fitted = pls.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.dim(), (5, 2));
}
#[test]
fn test_plscanonical_single_component() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
let pls = PLSCanonical::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.ncols(), 1);
}
#[test]
fn test_plscanonical_scores_shape() {
let x = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5]];
let pls = PLSCanonical::<f64>::new(2);
let fitted = pls.fit(&x, &y).unwrap();
assert_eq!(fitted.x_scores().dim(), (3, 2));
assert_eq!(fitted.y_scores().dim(), (3, 2));
assert_eq!(fitted.x_weights().dim(), (3, 2));
assert_eq!(fitted.x_loadings().dim(), (3, 2));
assert_eq!(fitted.y_loadings().dim(), (2, 2));
}
#[test]
fn test_plscanonical_transform_y() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0]];
let pls = PLSCanonical::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let y_scores = fitted.transform_y(&y).unwrap();
assert_eq!(y_scores.ncols(), 1);
}
#[test]
fn test_plscanonical_builder() {
let pls = PLSCanonical::<f64>::new(2)
.with_max_iter(1000)
.with_tol(1e-8)
.with_scale(false);
assert_eq!(pls.n_components(), 2);
}
#[test]
fn test_plscanonical_invalid_zero_components() {
let x = array![[1.0, 2.0], [3.0, 4.0]];
let y = array![[1.0, 0.5], [2.0, 1.0]];
let pls = PLSCanonical::<f64>::new(0);
assert!(pls.fit(&x, &y).is_err());
}
#[test]
fn test_plscanonical_too_many_components() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0], [3.0]];
let pls = PLSCanonical::<f64>::new(2);
assert!(pls.fit(&x, &y).is_err());
}
#[test]
fn test_plscanonical_row_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5]];
let pls = PLSCanonical::<f64>::new(1);
assert!(pls.fit(&x, &y).is_err());
}
#[test]
fn test_plscanonical_transform_shape_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5]];
let pls = PLSCanonical::<f64>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let x_bad = array![[1.0, 2.0, 3.0]];
assert!(fitted.transform(&x_bad).is_err());
}
#[test]
fn test_plscanonical_f32() {
let x: Array2<f32> = array![
[1.0f32, 2.0],
[3.0, 4.0],
[5.0, 6.0],
[7.0, 8.0],
[9.0, 10.0],
];
let y: Array2<f32> = array![
[1.0f32, 0.5],
[2.0, 1.0],
[3.0, 1.5],
[4.0, 2.0],
[5.0, 2.5],
];
let pls = PLSCanonical::<f32>::new(1);
let fitted = pls.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.ncols(), 1);
}
#[test]
fn test_cca_basic_fit_transform() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0], [5.0, 2.5],];
let cca = CCA::<f64>::new(2);
let fitted = cca.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.dim(), (5, 2));
}
#[test]
fn test_cca_single_component() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0],];
let y = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
let cca = CCA::<f64>::new(1);
let fitted = cca.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.ncols(), 1);
}
#[test]
fn test_cca_scores_shape() {
let x = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5]];
let cca = CCA::<f64>::new(2);
let fitted = cca.fit(&x, &y).unwrap();
assert_eq!(fitted.x_scores().dim(), (3, 2));
assert_eq!(fitted.y_scores().dim(), (3, 2));
assert_eq!(fitted.x_weights().dim(), (3, 2));
assert_eq!(fitted.x_loadings().dim(), (3, 2));
assert_eq!(fitted.y_loadings().dim(), (2, 2));
}
#[test]
fn test_cca_transform_y() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0]];
let cca = CCA::<f64>::new(1);
let fitted = cca.fit(&x, &y).unwrap();
let y_scores = fitted.transform_y(&y).unwrap();
assert_eq!(y_scores.ncols(), 1);
}
#[test]
fn test_cca_builder() {
let cca = CCA::<f64>::new(2)
.with_max_iter(1000)
.with_tol(1e-8)
.with_scale(false);
assert_eq!(cca.n_components(), 2);
}
#[test]
fn test_cca_invalid_zero_components() {
let x = array![[1.0, 2.0], [3.0, 4.0]];
let y = array![[1.0, 0.5], [2.0, 1.0]];
let cca = CCA::<f64>::new(0);
assert!(cca.fit(&x, &y).is_err());
}
#[test]
fn test_cca_too_many_components() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0], [2.0], [3.0]];
let cca = CCA::<f64>::new(2);
assert!(cca.fit(&x, &y).is_err());
}
#[test]
fn test_cca_row_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5]];
let cca = CCA::<f64>::new(1);
assert!(cca.fit(&x, &y).is_err());
}
#[test]
fn test_cca_transform_shape_mismatch() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5]];
let cca = CCA::<f64>::new(1);
let fitted = cca.fit(&x, &y).unwrap();
let x_bad = array![[1.0, 2.0, 3.0]];
assert!(fitted.transform(&x_bad).is_err());
}
#[test]
fn test_cca_f32() {
let x: Array2<f32> = array![
[1.0f32, 2.0],
[3.0, 4.0],
[5.0, 6.0],
[7.0, 8.0],
[9.0, 10.0],
];
let y: Array2<f32> = array![
[1.0f32, 0.5],
[2.0, 1.0],
[3.0, 1.5],
[4.0, 2.0],
[5.0, 2.5],
];
let cca = CCA::<f32>::new(1);
let fitted = cca.fit(&x, &y).unwrap();
let scores = fitted.transform(&x).unwrap();
assert_eq!(scores.ncols(), 1);
}
#[test]
fn test_pls_regression_and_canonical_give_different_scores() {
let x = array![
[1.0, 2.0, 0.5],
[3.0, 1.0, 2.5],
[5.0, 6.0, 1.0],
[7.0, 3.0, 4.5],
[9.0, 10.0, 2.0],
];
let y = array![[1.0, 0.5], [2.0, 1.0], [3.0, 1.5], [4.0, 2.0], [5.0, 2.5],];
let pls_reg = PLSRegression::<f64>::new(2);
let fitted_reg = pls_reg.fit(&x, &y).unwrap();
let scores_reg = fitted_reg.transform(&x).unwrap();
let pls_can = PLSCanonical::<f64>::new(2);
let fitted_can = pls_can.fit(&x, &y).unwrap();
let scores_can = fitted_can.transform(&x).unwrap();
let diff: f64 = scores_reg
.iter()
.zip(scores_can.iter())
.map(|(a, b)| (a - b).abs())
.sum();
assert_eq!(scores_reg.dim(), scores_can.dim());
assert!(diff.is_finite());
}
#[test]
fn test_centre_scale_helper() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let (xc, mean, std_dev) = centre_scale(&x, true);
assert_abs_diff_eq!(mean[0], 3.0, epsilon = 1e-10);
assert_abs_diff_eq!(mean[1], 4.0, epsilon = 1e-10);
assert!(std_dev.is_some());
let col_mean_0: f64 = xc.column(0).iter().sum::<f64>() / 3.0;
assert_abs_diff_eq!(col_mean_0, 0.0, epsilon = 1e-10);
}
#[test]
fn test_centre_scale_no_scale() {
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let (_xc, _mean, std_dev) = centre_scale(&x, false);
assert!(std_dev.is_none());
}
#[test]
fn test_invert_square_identity() {
let eye = Array2::<f64>::from_shape_fn((3, 3), |(i, j)| if i == j { 1.0 } else { 0.0 });
let inv = invert_square(&eye).unwrap();
for i in 0..3 {
for j in 0..3 {
let expected = if i == j { 1.0 } else { 0.0 };
assert_abs_diff_eq!(inv[[i, j]], expected, epsilon = 1e-10);
}
}
}
#[test]
fn test_invert_square_2x2() {
let a = array![[4.0, 7.0], [2.0, 6.0]];
let inv = invert_square(&a).unwrap();
let prod = a.dot(&inv);
for i in 0..2 {
for j in 0..2 {
let expected = if i == j { 1.0 } else { 0.0 };
assert_abs_diff_eq!(prod[[i, j]], expected, epsilon = 1e-10);
}
}
}
}