use ferrolearn_core::error::FerroError;
use ferrolearn_core::traits::Fit;
use ndarray::Array2;
use rand::{Rng, SeedableRng};
fn reject_non_finite(x: &Array2<f64>) -> Result<(), FerroError> {
if x.iter().any(|v| !v.is_finite()) {
return Err(FerroError::InvalidParameter {
name: "X".into(),
reason: "Input X contains NaN or infinity.".into(),
});
}
Ok(())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Dissimilarity {
Euclidean,
Precomputed,
}
#[derive(Debug, Clone)]
pub struct MDS {
n_components: usize,
dissimilarity: Dissimilarity,
init: Option<Array2<f64>>,
n_init: usize,
max_iter: usize,
eps: f64,
normalized_stress: bool,
random_state: Option<u64>,
}
impl MDS {
#[must_use]
pub fn new(n_components: usize) -> Self {
Self {
n_components,
dissimilarity: Dissimilarity::Euclidean,
init: None,
n_init: 4,
max_iter: 300,
eps: 1e-3,
normalized_stress: false,
random_state: None,
}
}
#[must_use]
pub fn with_dissimilarity(mut self, d: Dissimilarity) -> Self {
self.dissimilarity = d;
self
}
#[must_use]
pub fn with_init(mut self, init: Array2<f64>) -> Self {
self.init = Some(init);
self
}
#[must_use]
pub fn with_n_init(mut self, n_init: usize) -> Self {
self.n_init = n_init;
self
}
#[must_use]
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
#[must_use]
pub fn with_eps(mut self, eps: f64) -> Self {
self.eps = eps;
self
}
#[must_use]
pub fn with_normalized_stress(mut self, normalized: bool) -> Self {
self.normalized_stress = normalized;
self
}
#[must_use]
pub fn with_random_state(mut self, seed: u64) -> Self {
self.random_state = Some(seed);
self
}
#[must_use]
pub fn n_components(&self) -> usize {
self.n_components
}
#[must_use]
pub fn dissimilarity(&self) -> Dissimilarity {
self.dissimilarity
}
}
#[derive(Debug, Clone)]
pub struct FittedMDS {
embedding_: Array2<f64>,
stress_: f64,
dissimilarity_matrix_: Array2<f64>,
n_iter_: usize,
}
impl FittedMDS {
#[must_use]
pub fn embedding(&self) -> &Array2<f64> {
&self.embedding_
}
#[must_use]
pub fn stress(&self) -> f64 {
self.stress_
}
#[must_use]
pub fn dissimilarity_matrix(&self) -> &Array2<f64> {
&self.dissimilarity_matrix_
}
#[must_use]
pub fn n_iter(&self) -> usize {
self.n_iter_
}
}
pub(crate) fn pairwise_sq_distances(x: &Array2<f64>) -> Array2<f64> {
let n = x.nrows();
let mut d = Array2::<f64>::zeros((n, n));
for i in 0..n {
for j in (i + 1)..n {
let mut sq = 0.0;
for k in 0..x.ncols() {
let diff = x[[i, k]] - x[[j, k]];
sq += diff * diff;
}
d[[i, j]] = sq;
d[[j, i]] = sq;
}
}
d
}
fn kruskal_stress(dist_orig: &Array2<f64>, embedding: &Array2<f64>) -> f64 {
let n = embedding.nrows();
let mut numerator = 0.0;
let mut denominator = 0.0;
for i in 0..n {
for j in (i + 1)..n {
let d_orig = dist_orig[[i, j]].sqrt();
let mut sq = 0.0;
for k in 0..embedding.ncols() {
let diff = embedding[[i, k]] - embedding[[j, k]];
sq += diff * diff;
}
let d_embed = sq.sqrt();
let diff = d_orig - d_embed;
numerator += diff * diff;
denominator += d_orig * d_orig;
}
}
if denominator > 0.0 {
(numerator / denominator).sqrt()
} else {
0.0
}
}
fn euclidean_distances(x: &Array2<f64>) -> Array2<f64> {
let n = x.nrows();
let mut d = Array2::<f64>::zeros((n, n));
for i in 0..n {
for j in (i + 1)..n {
let mut sq = 0.0;
for k in 0..x.ncols() {
let diff = x[[i, k]] - x[[j, k]];
sq += diff * diff;
}
let dist = sq.sqrt();
d[[i, j]] = dist;
d[[j, i]] = dist;
}
}
d
}
fn smacof_single(
disparities: &Array2<f64>,
init: &Array2<f64>,
max_iter: usize,
eps: f64,
normalized_stress: bool,
) -> (Array2<f64>, f64, usize) {
let n = disparities.nrows();
let n_f = n as f64;
let mut x = init.clone();
let mut old_stress: Option<f64> = None;
let mut stress = 0.0_f64;
let mut iters = 0_usize;
for it in 0..max_iter {
iters = it + 1;
let dis = euclidean_distances(&x);
let mut raw = 0.0_f64;
for i in 0..n {
for j in 0..n {
let diff = dis[[i, j]] - disparities[[i, j]];
raw += diff * diff;
}
}
raw /= 2.0;
stress = if normalized_stress {
let mut denom = 0.0_f64;
for i in 0..n {
for j in 0..n {
denom += disparities[[i, j]] * disparities[[i, j]];
}
}
denom /= 2.0;
if denom > 0.0 {
(raw / denom).sqrt()
} else {
0.0
}
} else {
raw
};
let mut b = Array2::<f64>::zeros((n, n));
for i in 0..n {
let mut row_sum = 0.0_f64;
for j in 0..n {
let dis_safe = if dis[[i, j]] == 0.0 {
1e-5
} else {
dis[[i, j]]
};
let ratio = disparities[[i, j]] / dis_safe;
b[[i, j]] = -ratio;
row_sum += ratio;
}
b[[i, i]] += row_sum;
}
let bx = b.dot(&x);
x = bx.mapv(|v| v / n_f);
let mut norm = 0.0_f64;
for i in 0..n {
let mut sq = 0.0_f64;
for k in 0..x.ncols() {
sq += x[[i, k]] * x[[i, k]];
}
norm += sq.sqrt();
}
let normed = stress / norm;
if let Some(prev) = old_stress
&& (prev - normed) < eps
{
break;
}
old_stress = Some(normed);
}
(x, stress, iters)
}
#[allow(
clippy::too_many_arguments,
reason = "mirrors sklearn smacof's parameter set (dissimilarities, n_components, init, n_init, max_iter, eps, normalized_stress, random_state)"
)]
fn smacof(
disparities: &Array2<f64>,
n_components: usize,
init: Option<&Array2<f64>>,
n_init: usize,
max_iter: usize,
eps: f64,
normalized_stress: bool,
random_state: Option<u64>,
) -> (Array2<f64>, f64, usize) {
let n = disparities.nrows();
if let Some(x0) = init {
return smacof_single(disparities, x0, max_iter, eps, normalized_stress);
}
let seed = random_state.unwrap_or(0);
let mut rng = rand_xoshiro::Xoshiro256PlusPlus::seed_from_u64(seed);
let runs = n_init.max(1);
let mut best: Option<(Array2<f64>, f64, usize)> = None;
for _ in 0..runs {
let x0 = Array2::<f64>::from_shape_fn((n, n_components), |_| rng.random::<f64>());
let (pos, stress, n_iter) =
smacof_single(disparities, &x0, max_iter, eps, normalized_stress);
match &best {
Some((_, best_stress, _)) if stress >= *best_stress => {}
_ => best = Some((pos, stress, n_iter)),
}
}
best.unwrap_or_else(|| {
let x0 = Array2::<f64>::zeros((n, n_components));
smacof_single(disparities, &x0, max_iter, eps, normalized_stress)
})
}
pub(crate) fn eigh_faer(a: &Array2<f64>) -> Result<(Vec<f64>, Array2<f64>), FerroError> {
let n = a.nrows();
let mat = faer::Mat::from_fn(n, n, |i, j| a[[i, j]]);
let decomp = mat.self_adjoint_eigen(faer::Side::Lower).map_err(|e| {
FerroError::NumericalInstability {
message: format!("Symmetric eigendecomposition failed: {e:?}"),
}
})?;
let eigenvalues: Vec<f64> = decomp.S().column_vector().iter().copied().collect();
let eigenvectors = Array2::from_shape_fn((n, n), |(i, j)| decomp.U()[(i, j)]);
Ok((eigenvalues, eigenvectors))
}
pub(crate) fn classical_mds(
sq_dist: &Array2<f64>,
n_components: usize,
) -> Result<(Array2<f64>, f64), FerroError> {
let n = sq_dist.nrows();
let n_f = n as f64;
let mut row_means = vec![0.0; n];
let mut col_means = vec![0.0; n];
let mut grand_mean = 0.0;
for i in 0..n {
for j in 0..n {
row_means[i] += sq_dist[[i, j]];
col_means[j] += sq_dist[[i, j]];
grand_mean += sq_dist[[i, j]];
}
}
for i in 0..n {
row_means[i] /= n_f;
col_means[i] /= n_f;
}
grand_mean /= n_f * n_f;
let mut b = Array2::<f64>::zeros((n, n));
for i in 0..n {
for j in 0..n {
b[[i, j]] = -0.5 * (sq_dist[[i, j]] - row_means[i] - col_means[j] + grand_mean);
}
}
let (eigenvalues, eigenvectors) = eigh_faer(&b)?;
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_by(|&a, &b_idx| {
eigenvalues[b_idx]
.partial_cmp(&eigenvalues[a])
.unwrap_or(std::cmp::Ordering::Equal)
});
let n_comp = n_components.min(n);
let mut embedding = Array2::<f64>::zeros((n, n_comp));
for (k, &idx) in indices.iter().take(n_comp).enumerate() {
let eigval = eigenvalues[idx].max(0.0);
let scale = eigval.sqrt();
for i in 0..n {
embedding[[i, k]] = eigenvectors[[i, idx]] * scale;
}
}
let stress = kruskal_stress(sq_dist, &embedding);
Ok((embedding, stress))
}
impl Fit<Array2<f64>, ()> for MDS {
type Fitted = FittedMDS;
type Error = FerroError;
fn fit(&self, x: &Array2<f64>, _y: &()) -> Result<FittedMDS, FerroError> {
if self.n_components == 0 {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: "must be at least 1".into(),
});
}
reject_non_finite(x)?;
let dissimilarity_matrix = match self.dissimilarity {
Dissimilarity::Euclidean => {
let n_samples = x.nrows();
if n_samples < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: n_samples,
context: "MDS::fit requires at least 2 samples".into(),
});
}
if self.n_components > n_samples {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: format!(
"n_components ({}) exceeds n_samples ({})",
self.n_components, n_samples
),
});
}
euclidean_distances(x)
}
Dissimilarity::Precomputed => {
if x.nrows() != x.ncols() {
return Err(FerroError::ShapeMismatch {
expected: vec![x.nrows(), x.nrows()],
actual: vec![x.nrows(), x.ncols()],
context: "MDS with Precomputed dissimilarity requires a square matrix"
.into(),
});
}
let n = x.nrows();
if n < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: n,
context: "MDS::fit requires at least 2 samples".into(),
});
}
if self.n_components > n {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: format!(
"n_components ({}) exceeds n_samples ({})",
self.n_components, n
),
});
}
x.clone()
}
};
let n_samples = dissimilarity_matrix.nrows();
if let Some(init) = &self.init
&& init.nrows() != n_samples
{
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples, self.n_components],
actual: vec![init.nrows(), init.ncols()],
context: "MDS init must have shape (n_samples, n_components)".into(),
});
}
let n_components = self
.init
.as_ref()
.map_or(self.n_components, ndarray::Array2::ncols);
let (embedding, stress, n_iter) = smacof(
&dissimilarity_matrix,
n_components,
self.init.as_ref(),
self.n_init,
self.max_iter,
self.eps,
self.normalized_stress,
self.random_state,
);
Ok(FittedMDS {
embedding_: embedding,
stress_: stress,
dissimilarity_matrix_: dissimilarity_matrix,
n_iter_: n_iter,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use ndarray::array;
fn square_data() -> Array2<f64> {
array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0],]
}
#[test]
fn test_mds_basic_embedding_shape() {
let mds = MDS::new(2);
let x = square_data();
let fitted = mds.fit(&x, &()).unwrap();
assert_eq!(fitted.embedding().dim(), (4, 2));
}
#[test]
fn test_mds_1d_embedding() {
let mds = MDS::new(1);
let x = array![[0.0, 0.0], [1.0, 0.0], [2.0, 0.0], [3.0, 0.0],];
let fitted = mds.fit(&x, &()).unwrap();
assert_eq!(fitted.embedding().ncols(), 1);
}
#[test]
fn test_mds_stress_non_negative() {
let mds = MDS::new(2);
let x = square_data();
let fitted = mds.fit(&x, &()).unwrap();
assert!(fitted.stress() >= 0.0);
}
fn square_init() -> Array2<f64> {
array![[0.1, 0.2], [0.3, -0.1], [-0.2, 0.4], [0.5, 0.05]]
}
#[test]
#[allow(
clippy::assertions_on_constants,
reason = "assert!(false, ...) reports the unexpected fit Err with diagnostics"
)]
fn test_mds_perfect_embedding_low_stress() {
let mds = MDS::new(2).with_init(square_init());
let Ok(fitted) = mds.fit(&square_data(), &()) else {
assert!(false, "fit failed");
return;
};
assert!(fitted.stress() < 0.1, "stress = {}", fitted.stress());
}
#[test]
#[allow(
clippy::assertions_on_constants,
reason = "assert!(false, ...) reports the unexpected fit Err with diagnostics"
)]
fn test_mds_preserves_distances() {
let mds = MDS::new(2).with_init(square_init());
let x = square_data();
let Ok(fitted) = mds.fit(&x, &()) else {
assert!(false, "fit failed");
return;
};
let emb = fitted.embedding();
let orig = pairwise_sq_distances(&x);
for i in 0..4 {
for j in (i + 1)..4 {
let d_orig = orig[[i, j]].sqrt();
let mut sq = 0.0;
for k in 0..emb.ncols() {
let diff = emb[[i, k]] - emb[[j, k]];
sq += diff * diff;
}
let d_emb = sq.sqrt();
assert_abs_diff_eq!(d_orig, d_emb, epsilon = 0.05);
}
}
}
#[test]
fn test_mds_precomputed() {
let x = square_data();
let sq = pairwise_sq_distances(&x);
let dist = sq.mapv(f64::sqrt);
let mds = MDS::new(2).with_dissimilarity(Dissimilarity::Precomputed);
let fitted = mds.fit(&dist, &()).unwrap();
assert_eq!(fitted.embedding().dim(), (4, 2));
}
#[test]
fn test_mds_invalid_n_components_zero() {
let mds = MDS::new(0);
let x = square_data();
assert!(mds.fit(&x, &()).is_err());
}
#[test]
fn test_mds_invalid_n_components_too_large() {
let mds = MDS::new(10);
let x = square_data(); assert!(mds.fit(&x, &()).is_err());
}
#[test]
fn test_mds_insufficient_samples() {
let mds = MDS::new(1);
let x = array![[1.0, 2.0]]; assert!(mds.fit(&x, &()).is_err());
}
#[test]
fn test_mds_precomputed_not_square() {
let mds = MDS::new(1).with_dissimilarity(Dissimilarity::Precomputed);
let x = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]];
assert!(mds.fit(&x, &()).is_err());
}
#[test]
fn test_mds_collinear_data() {
let mds = MDS::new(1);
let x = array![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0],];
let fitted = mds.fit(&x, &()).unwrap();
assert_eq!(fitted.embedding().ncols(), 1);
let emb = fitted.embedding();
let mut vals: Vec<f64> = (0..5).map(|i| emb[[i, 0]]).collect();
vals.sort_by(|a, b| a.partial_cmp(b).unwrap());
let diffs: Vec<f64> = vals.windows(2).map(|w| (w[1] - w[0]).abs()).collect();
for d in &diffs {
assert_abs_diff_eq!(d, &diffs[0], epsilon = 0.1);
}
}
#[test]
fn test_mds_getters() {
let mds = MDS::new(3).with_dissimilarity(Dissimilarity::Precomputed);
assert_eq!(mds.n_components(), 3);
assert_eq!(mds.dissimilarity(), Dissimilarity::Precomputed);
}
#[test]
fn test_mds_larger_dataset() {
let n = 20;
let d = 5;
let mut data = Array2::<f64>::zeros((n, d));
for i in 0..n {
for j in 0..d {
data[[i, j]] = (i * d + j) as f64 / (n * d) as f64;
}
}
let mds = MDS::new(2);
let fitted = mds.fit(&data, &()).unwrap();
assert_eq!(fitted.embedding().dim(), (20, 2));
assert!(fitted.stress() >= 0.0);
}
fn fixture_d() -> Array2<f64> {
array![
[0.0, 2.0, 5.0, 9.0],
[2.0, 0.0, 3.0, 4.0],
[5.0, 3.0, 0.0, 6.0],
[9.0, 4.0, 6.0, 0.0],
]
}
fn fixed_init() -> Array2<f64> {
array![[0.1, 0.2], [0.3, -0.1], [-0.2, 0.4], [0.5, 0.05]]
}
#[test]
#[allow(
clippy::assertions_on_constants,
reason = "assert!(false, ...) reports the unexpected fit Err with diagnostics"
)]
fn smacof_fixed_init_precomputed_parity() {
const SK_STRESS: f64 = 3.148_219_331_054_871;
const SK_N_ITER: usize = 13;
let sk_emb: Array2<f64> = array![
[-3.333_717_200_034, -1.658_330_631_573],
[-0.431_085_112_947, -0.700_165_295_708],
[-0.786_750_476_78, 2.465_105_803_376],
[4.551_552_789_761, -0.106_609_876_095],
];
let mds = MDS::new(2)
.with_dissimilarity(Dissimilarity::Precomputed)
.with_init(fixed_init());
let Ok(fitted) = mds.fit(&fixture_d(), &()) else {
assert!(false, "fit failed");
return;
};
assert!(
(fitted.stress() - SK_STRESS).abs() <= 1e-6,
"stress {} vs sklearn {SK_STRESS}",
fitted.stress()
);
assert_eq!(fitted.n_iter(), SK_N_ITER, "n_iter mismatch");
let emb = fitted.embedding();
assert_eq!(emb.dim(), (4, 2));
for i in 0..4 {
for k in 0..2 {
assert_abs_diff_eq!(emb[[i, k]], sk_emb[[i, k]], epsilon = 1e-6);
}
}
}
#[test]
#[allow(
clippy::assertions_on_constants,
reason = "assert!(false, ...) reports the unexpected fit Err with diagnostics"
)]
fn smacof_fixed_init_euclidean_parity() {
const SK_STRESS: f64 = 0.001_311_184_699_657_248_8;
const SK_N_ITER: usize = 13;
let sk_emb: Array2<f64> = array![
[-2.164_424_557_023, -1.234_049_962_647],
[0.576_638_876_45, -2.435_876_213_413],
[-0.587_315_085_045, 2.433_308_813_391],
[2.175_100_765_618, 1.236_617_362_669],
];
let x = array![[0.0, 0.0], [3.0, 0.0], [0.0, 4.0], [3.0, 4.0]];
let mds = MDS::new(2).with_init(fixed_init());
let Ok(fitted) = mds.fit(&x, &()) else {
assert!(false, "fit failed");
return;
};
assert!(
(fitted.stress() - SK_STRESS).abs() <= 1e-6,
"stress {} vs sklearn {SK_STRESS}",
fitted.stress()
);
assert_eq!(fitted.n_iter(), SK_N_ITER, "n_iter mismatch");
let emb = fitted.embedding();
for i in 0..4 {
for k in 0..2 {
assert_abs_diff_eq!(emb[[i, k]], sk_emb[[i, k]], epsilon = 1e-6);
}
}
}
#[test]
#[allow(
clippy::assertions_on_constants,
reason = "assert!(false, ...) reports the unexpected fit Err with diagnostics"
)]
fn smacof_dissimilarity_matrix_is_euclidean() {
let x = array![[0.0, 0.0], [3.0, 0.0], [0.0, 4.0], [3.0, 4.0]];
let mds = MDS::new(2).with_init(fixed_init());
let Ok(fitted) = mds.fit(&x, &()) else {
assert!(false, "fit failed");
return;
};
let expected: Array2<f64> = array![
[0.0, 3.0, 4.0, 5.0],
[3.0, 0.0, 5.0, 4.0],
[4.0, 5.0, 0.0, 3.0],
[5.0, 4.0, 3.0, 0.0],
];
let dm = fitted.dissimilarity_matrix();
for i in 0..4 {
for j in 0..4 {
assert_abs_diff_eq!(dm[[i, j]], expected[[i, j]], epsilon = 1e-12);
}
}
}
#[test]
#[allow(
clippy::assertions_on_constants,
reason = "assert!(false, ...) reports the unexpected fit Err with diagnostics"
)]
fn smacof_normalized_stress_is_kruskal1() {
let mds_raw = MDS::new(2)
.with_dissimilarity(Dissimilarity::Precomputed)
.with_init(fixed_init());
let mds_norm = MDS::new(2)
.with_dissimilarity(Dissimilarity::Precomputed)
.with_init(fixed_init())
.with_normalized_stress(true);
let Ok(raw) = mds_raw.fit(&fixture_d(), &()) else {
assert!(false, "raw fit failed");
return;
};
let Ok(norm) = mds_norm.fit(&fixture_d(), &()) else {
assert!(false, "norm fit failed");
return;
};
assert!(raw.stress() > 1.0, "raw stress = {}", raw.stress());
assert!(
norm.stress() > 0.0 && norm.stress() < 1.0,
"normalized stress = {}",
norm.stress()
);
}
}