use crate::mds::{classical_mds, eigh_faer, pairwise_sq_distances};
use ferrolearn_core::error::FerroError;
use ferrolearn_core::traits::{Fit, Transform};
use ndarray::Array2;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
#[derive(Debug, Clone)]
pub struct Isomap {
n_components: usize,
n_neighbors: usize,
}
impl Isomap {
#[must_use]
pub fn new(n_components: usize) -> Self {
Self {
n_components,
n_neighbors: 5,
}
}
#[must_use]
pub fn with_n_neighbors(mut self, k: usize) -> Self {
self.n_neighbors = k;
self
}
#[must_use]
pub fn n_components(&self) -> usize {
self.n_components
}
#[must_use]
pub fn n_neighbors(&self) -> usize {
self.n_neighbors
}
}
#[derive(Debug, Clone)]
pub struct FittedIsomap {
embedding_: Array2<f64>,
x_train_: Array2<f64>,
_n_neighbors: usize,
eigenvalues_: Vec<f64>,
eigenvectors_: Array2<f64>,
geo_sq_row_means_: Vec<f64>,
geo_sq_grand_mean_: f64,
}
impl FittedIsomap {
#[must_use]
pub fn embedding(&self) -> &Array2<f64> {
&self.embedding_
}
}
#[derive(Clone, PartialEq)]
struct State {
cost: f64,
node: usize,
}
impl Eq for State {}
impl Ord for State {
fn cmp(&self, other: &Self) -> Ordering {
other
.cost
.partial_cmp(&self.cost)
.unwrap_or(Ordering::Equal)
}
}
impl PartialOrd for State {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
fn build_knn_graph(sq_dist: &Array2<f64>, k: usize) -> Vec<Vec<(usize, f64)>> {
let n = sq_dist.nrows();
let mut adj: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
for i in 0..n {
let mut neighbors: Vec<(f64, usize)> = (0..n)
.filter(|&j| j != i)
.map(|j| (sq_dist[[i, j]].sqrt(), j))
.collect();
neighbors.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
for &(dist, j) in neighbors.iter().take(k) {
adj[i].push((j, dist));
}
}
let mut sym: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
for i in 0..n {
for &(j, d) in &adj[i] {
sym[i].push((j, d));
sym[j].push((i, d));
}
}
for entry in &mut sym {
entry.sort_by_key(|a| a.0);
entry.dedup_by(|a, b| {
if a.0 == b.0 {
b.1 = b.1.min(a.1);
true
} else {
false
}
});
}
sym
}
fn dijkstra(adj: &[Vec<(usize, f64)>], source: usize) -> Vec<f64> {
let n = adj.len();
let mut dist = vec![f64::INFINITY; n];
dist[source] = 0.0;
let mut heap = BinaryHeap::new();
heap.push(State {
cost: 0.0,
node: source,
});
while let Some(State { cost, node }) = heap.pop() {
if cost > dist[node] {
continue;
}
for &(neighbor, weight) in &adj[node] {
let next_cost = cost + weight;
if next_cost < dist[neighbor] {
dist[neighbor] = next_cost;
heap.push(State {
cost: next_cost,
node: neighbor,
});
}
}
}
dist
}
fn all_pairs_shortest_paths(adj: &[Vec<(usize, f64)>]) -> Array2<f64> {
let n = adj.len();
let mut result = Array2::<f64>::zeros((n, n));
for i in 0..n {
let dists = dijkstra(adj, i);
for j in 0..n {
result[[i, j]] = dists[j];
}
}
result
}
impl Fit<Array2<f64>, ()> for Isomap {
type Fitted = FittedIsomap;
type Error = FerroError;
fn fit(&self, x: &Array2<f64>, _y: &()) -> Result<FittedIsomap, FerroError> {
let n_samples = x.nrows();
if self.n_components == 0 {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: "must be at least 1".into(),
});
}
if self.n_neighbors == 0 {
return Err(FerroError::InvalidParameter {
name: "n_neighbors".into(),
reason: "must be at least 1".into(),
});
}
if n_samples < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: n_samples,
context: "Isomap::fit requires at least 2 samples".into(),
});
}
if self.n_neighbors >= n_samples {
return Err(FerroError::InvalidParameter {
name: "n_neighbors".into(),
reason: format!(
"n_neighbors ({}) must be less than n_samples ({})",
self.n_neighbors, n_samples
),
});
}
if self.n_components > n_samples {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: format!(
"n_components ({}) exceeds n_samples ({})",
self.n_components, n_samples
),
});
}
let sq_dist = pairwise_sq_distances(x);
let adj = build_knn_graph(&sq_dist, self.n_neighbors);
let geodesic = all_pairs_shortest_paths(&adj);
for i in 0..n_samples {
for j in 0..n_samples {
if geodesic[[i, j]].is_infinite() {
return Err(FerroError::NumericalInstability {
message: format!(
"kNN graph is disconnected (no path from point {i} to {j}). \
Try increasing n_neighbors."
),
});
}
}
}
let geo_sq = geodesic.mapv(|v| v * v);
let n = n_samples;
let n_f = n as f64;
let mut row_means = vec![0.0; n];
let mut grand_mean = 0.0;
for i in 0..n {
for j in 0..n {
row_means[i] += geo_sq[[i, j]];
grand_mean += geo_sq[[i, j]];
}
row_means[i] /= n_f;
}
grand_mean /= n_f * n_f;
let (embedding, _stress) = classical_mds(&geo_sq, self.n_components)?;
let mut b = Array2::<f64>::zeros((n, n));
let mut col_means = vec![0.0; n];
for j in 0..n {
for i in 0..n {
col_means[j] += geo_sq[[i, j]];
}
col_means[j] /= n_f;
}
for i in 0..n {
for j in 0..n {
b[[i, j]] = -0.5 * (geo_sq[[i, j]] - row_means[i] - col_means[j] + grand_mean);
}
}
let (eigenvalues, eigenvectors) = eigh_faer(&b)?;
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_by(|&a, &b_idx| {
eigenvalues[b_idx]
.partial_cmp(&eigenvalues[a])
.unwrap_or(std::cmp::Ordering::Equal)
});
let n_comp = self.n_components.min(n);
let mut top_eigenvalues = Vec::with_capacity(n_comp);
let mut top_eigenvectors = Array2::<f64>::zeros((n, n_comp));
for (k, &idx) in indices.iter().take(n_comp).enumerate() {
top_eigenvalues.push(eigenvalues[idx].max(0.0));
for i in 0..n {
top_eigenvectors[[i, k]] = eigenvectors[[i, idx]];
}
}
Ok(FittedIsomap {
embedding_: embedding,
x_train_: x.to_owned(),
_n_neighbors: self.n_neighbors,
eigenvalues_: top_eigenvalues,
eigenvectors_: top_eigenvectors,
geo_sq_row_means_: row_means,
geo_sq_grand_mean_: grand_mean,
})
}
}
impl Transform<Array2<f64>> for FittedIsomap {
type Output = Array2<f64>;
type Error = FerroError;
fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>, FerroError> {
let n_features = self.x_train_.ncols();
if x.ncols() != n_features {
return Err(FerroError::ShapeMismatch {
expected: vec![x.nrows(), n_features],
actual: vec![x.nrows(), x.ncols()],
context: "FittedIsomap::transform".into(),
});
}
let n_test = x.nrows();
let n_train = self.x_train_.nrows();
let n_comp = self.eigenvalues_.len();
let mut result = Array2::<f64>::zeros((n_test, n_comp));
for t in 0..n_test {
let mut dists: Vec<(f64, usize)> = (0..n_train)
.map(|i| {
let mut sq = 0.0;
for k in 0..n_features {
let diff = x[[t, k]] - self.x_train_[[i, k]];
sq += diff * diff;
}
(sq.sqrt(), i)
})
.collect();
dists.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
let delta_sq: Vec<f64> = dists.iter().map(|&(d, _)| d * d).collect();
let delta_sq_mean: f64 = delta_sq.iter().sum::<f64>() / n_train as f64;
let mut delta_sq_ordered = vec![0.0; n_train];
for &(d, idx) in &dists {
delta_sq_ordered[idx] = d * d;
}
for k in 0..n_comp {
let eigval = self.eigenvalues_[k];
if eigval <= 1e-12 {
continue;
}
let scale = 1.0 / eigval.sqrt();
let mut sum = 0.0;
for (i, &dsq_i) in delta_sq_ordered.iter().enumerate().take(n_train) {
let b_i = -0.5
* (dsq_i - self.geo_sq_row_means_[i] - delta_sq_mean
+ self.geo_sq_grand_mean_);
sum += self.eigenvectors_[[i, k]] * b_i;
}
result[[t, k]] = sum * scale;
}
}
Ok(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::array;
fn line_data() -> Array2<f64> {
array![[0.0, 0.0], [1.0, 0.0], [2.0, 0.0], [3.0, 0.0], [4.0, 0.0],]
}
fn grid_data() -> Array2<f64> {
array![
[0.0, 0.0],
[1.0, 0.0],
[2.0, 0.0],
[0.0, 1.0],
[1.0, 1.0],
[2.0, 1.0],
[0.0, 2.0],
[1.0, 2.0],
[2.0, 2.0],
]
}
#[test]
fn test_isomap_basic_shape() {
let iso = Isomap::new(2).with_n_neighbors(3);
let x = grid_data();
let fitted = iso.fit(&x, &()).unwrap();
assert_eq!(fitted.embedding().dim(), (9, 2));
}
#[test]
fn test_isomap_1d() {
let iso = Isomap::new(1).with_n_neighbors(2);
let x = line_data();
let fitted = iso.fit(&x, &()).unwrap();
assert_eq!(fitted.embedding().ncols(), 1);
}
#[test]
fn test_isomap_preserves_ordering() {
let iso = Isomap::new(1).with_n_neighbors(2);
let x = line_data();
let fitted = iso.fit(&x, &()).unwrap();
let emb = fitted.embedding();
let vals: Vec<f64> = (0..5).map(|i| emb[[i, 0]]).collect();
let ascending = vals.windows(2).all(|w| w[0] <= w[1] + 1e-10);
let descending = vals.windows(2).all(|w| w[0] >= w[1] - 1e-10);
assert!(
ascending || descending,
"embedding should be monotonic: {vals:?}"
);
}
#[test]
fn test_isomap_transform_new_data() {
let iso = Isomap::new(2).with_n_neighbors(3);
let x_train = grid_data();
let fitted = iso.fit(&x_train, &()).unwrap();
let x_test = array![[0.5, 0.5], [1.5, 1.5]];
let projected = fitted.transform(&x_test).unwrap();
assert_eq!(projected.dim(), (2, 2));
}
#[test]
fn test_isomap_transform_shape_mismatch() {
let iso = Isomap::new(2).with_n_neighbors(3);
let x = grid_data();
let fitted = iso.fit(&x, &()).unwrap();
let x_bad = array![[1.0, 2.0, 3.0]]; assert!(fitted.transform(&x_bad).is_err());
}
#[test]
fn test_isomap_transform_recovers_training() {
let iso = Isomap::new(2).with_n_neighbors(3);
let x = grid_data();
let fitted = iso.fit(&x, &()).unwrap();
let projected = fitted.transform(&x).unwrap();
let emb = fitted.embedding();
assert_eq!(projected.dim(), emb.dim());
}
#[test]
fn test_isomap_invalid_n_components_zero() {
let iso = Isomap::new(0);
let x = grid_data();
assert!(iso.fit(&x, &()).is_err());
}
#[test]
fn test_isomap_invalid_n_neighbors_zero() {
let iso = Isomap::new(2).with_n_neighbors(0);
let x = grid_data();
assert!(iso.fit(&x, &()).is_err());
}
#[test]
fn test_isomap_n_neighbors_too_large() {
let iso = Isomap::new(2).with_n_neighbors(100);
let x = grid_data(); assert!(iso.fit(&x, &()).is_err());
}
#[test]
fn test_isomap_insufficient_samples() {
let iso = Isomap::new(1).with_n_neighbors(1);
let x = array![[1.0, 2.0]]; assert!(iso.fit(&x, &()).is_err());
}
#[test]
fn test_isomap_getters() {
let iso = Isomap::new(3).with_n_neighbors(7);
assert_eq!(iso.n_components(), 3);
assert_eq!(iso.n_neighbors(), 7);
}
#[test]
fn test_isomap_default_n_neighbors() {
let iso = Isomap::new(2);
assert_eq!(iso.n_neighbors(), 5);
}
#[test]
fn test_isomap_n_components_too_large() {
let iso = Isomap::new(50);
let x = grid_data(); assert!(iso.fit(&x, &()).is_err());
}
}