// Numeric/geometric code uses index-based loops extensively for clarity.
#![allow(clippy::needless_range_loop, clippy::manual_memcpy)]
//! t-distributed Stochastic Neighbor Embedding (t-SNE).
//!
//! [`Tsne`] performs non-linear dimensionality reduction by modelling pairwise
//! similarities in the high-dimensional space as conditional probabilities and
//! finding a low-dimensional embedding that preserves those similarities under
//! a Student-t distribution.
//!
//! # Algorithm
//!
//! 1. Compute pairwise affinities in the input space using a Gaussian kernel
//! whose bandwidth (sigma) is set per point via binary search to match a
//! target perplexity.
//! 2. Symmetrise the affinity matrix: `P_ij = (P_i|j + P_j|i) / (2n)`.
//! 3. Initialise a low-dimensional embedding (random Gaussian).
//! 4. Optimise with gradient descent using:
//! - **Early exaggeration** (multiply P by a factor) for the first 250
//! iterations.
//! - **Momentum** (0.5 early, 0.8 later).
//! - **Barnes-Hut approximation** with a spatial tree for O(n log n)
//! repulsive force computation.
//!
//! # Important
//!
//! t-SNE does **not** support projecting new data — there is no `Transform`
//! implementation. Use [`FittedTsne::embedding`] to retrieve the result.
//!
//! # Examples
//!
//! ```
//! use ferrolearn_decomp::Tsne;
//! use ferrolearn_core::traits::Fit;
//! use ndarray::Array2;
//!
//! let x = Array2::<f64>::zeros((20, 5));
//! let tsne = Tsne::new().with_perplexity(5.0).with_random_state(42);
//! let fitted = tsne.fit(&x, &()).unwrap();
//! assert_eq!(fitted.embedding().dim(), (20, 2));
//! ```
//!
//! ## REQ status
//!
//! Design: `.design/decomp/tsne.md`. Tracking: #1596. Each REQ is BINARY — SHIPPED
//! (impl + non-test consumer + tests + green verification) or NOT-STARTED (concrete
//! open blocker). Non-test consumer: crate re-export (`lib.rs:102`); there is NO
//! PyO3 binding and NO `Transform` (t-SNE has no out-of-sample projection — matches
//! sklearn). Oracle = live sklearn 1.5.2 (`manifold/_t_sne.py`), run from `/tmp`
//! (R-CHAR-3). The embedding is RNG-coupled (random init) + non-convex → identifiable
//! only up to rotation/reflection/translation/local-optimum, so exact values are a
//! carve-out; the meaningful correctness check is cluster separation.
//!
//! | REQ | Scope | Status | Evidence / Blocker |
//! |---|---|---|---|
//! | REQ-1 | Structural: embedding shape + early-exag/momentum/Barnes-Hut(or exact) GD runs + determinism | SHIPPED (scoped) | `fit` (`tsne.rs:538`); `test_tsne_basic_shape`/`_3d_embedding`/`_exact_mode`/`_reproducibility` |
//! | REQ-2 | Perplexity binary-search affinities → valid symmetric joint P | SHIPPED (scoped) | `compute_pij_row` (`:410`) + `compute_joint_probabilities` (`:467`); P internal (no accessor). `/2n` ≡ sklearn `/sum(P)` (conditional rows sum to 1, verified) |
//! | REQ-3 | Error/parameter contracts (n_components 0, n_samples<2, perplexity ≤0/≥n, learning_rate ≤0, theta <0, NON-FINITE rejection) | SHIPPED (scoped) | `fit` guards. NON-FINITE: `fit` calls `reject_non_finite` (`tsne.rs` symbol `reject_non_finite`) BEFORE the affinity/gradient-descent math, returning the CLEAN finiteness `InvalidParameter{name:"X", reason:"Input X contains NaN or infinity."}` = sklearn `_validate_data(force_all_finite=True)` (`_t_sne.py:884`,`:891`,`utils/validation.py:147-154`). `tests/divergence_nonfinite_spillover.rs::divergence_tsne_fit_nan` matches the live sklearn 1.5.2 oracle (#2290). FLAG: sklearn raises `InvalidParameterError`, enforces `early_exaggeration in [1,inf)`, `max_iter>=250`, `angle in [0,1]`, `n_components<4` for barnes_hut |
//! | REQ-4 | Cluster SEPARATION (the "did t-SNE work" check) + finite kl_divergence_≥0 + n_iter_ | SHIPPED (scoped) | `test_tsne_separates_clusters` (3-NN recovery > 0.8) + `compute_kl_divergence` (`:496`); green-guard `separates_clusters_knn_recovery` |
//! | REQ-5 | EXACT `embedding_`/`kl_divergence_` value parity | NOT-STARTED | CARVE-OUT (R-DEFER-3): random Xoshiro init vs sklearn deterministic PCA default + non-convex local minima + learning_rate 200-vs-auto + Barnes-Hut — blocker #1597 |
//! | REQ-6 | `init='pca'` default + `init='random'`(`1e-4*randn`) + ndarray init | NOT-STARTED | sklearn `_t_sne.py:837,:1019-1036`; ferrolearn always random — blocker #1598 |
//! | REQ-7 | `learning_rate='auto'` default (`max(N/early_ex/4, 50)`) | NOT-STARTED | sklearn `_t_sne.py:876-879`; ferrolearn fixed 200 — blocker #1599 |
//! | REQ-8 | `n_iter_without_progress`/`min_grad_norm` early stopping | NOT-STARTED | sklearn `_t_sne.py:431,:439`; ferrolearn fixed n_iter — blocker #1600 |
//! | REQ-9 | `metric` (precomputed/non-euclidean) + `metric_params` | NOT-STARTED | sklearn `_t_sne.py:835`; ferrolearn squared-Euclidean only — blocker #1601 |
//! | REQ-10 | `method='exact'` (sparse-P barnes_hut) + `/sum(P)` normalization | NOT-STARTED | sklearn `_t_sne.py:840`; ferrolearn dense-P only (`/2n` ≡ `/sum(P)`) — blocker #1602 |
//! | REQ-11 | `n_iter`→`max_iter` rename + deprecation + `max_iter≥250` | NOT-STARTED | sklearn `_t_sne.py:832,:843`; ferrolearn `n_iter` — blocker #1603 |
//! | REQ-12 | fitted attrs `learning_rate_`/`n_features_in_` | NOT-STARTED | blocker #1604 |
//! | REQ-13 | generic `F`/f32 (currently f64-only) | NOT-STARTED | blocker #1605 |
//! | REQ-14 | PyO3 binding | NOT-STARTED | absent; only consumer re-export `lib.rs:102` — blocker #1606 |
//! | REQ-15 | ferray substrate | NOT-STARTED | `ndarray` + `rand` — blocker #1607 |
//!
//! Count: **4 SHIPPED (REQ-1,2,3,4) / 11 NOT-STARTED (REQ-5..15)**.
use ferrolearn_core::error::FerroError;
use ferrolearn_core::traits::Fit;
use ndarray::Array2;
use rand::SeedableRng;
use rand_distr::{Distribution, Normal};
use rand_xoshiro::Xoshiro256PlusPlus;
/// Reject non-finite input the way sklearn's `_validate_data` does.
///
/// sklearn runs `check_array` with the default `force_all_finite=True` at the
/// top of `TSNE._fit` for both the barnes_hut and exact branches
/// (`sklearn/manifold/_t_sne.py:884`,`:891`), raising
/// `ValueError("Input X contains NaN.")` / `"... contains infinity ..."`
/// (`sklearn/utils/validation.py:147-154`) BEFORE the affinity / gradient-descent
/// math. NaN AND infinity are both rejected. The message names "NaN" and
/// "infinity" to mirror sklearn's `ValueError`. Never panics (R-CODE-2).
fn reject_non_finite(x: &Array2<f64>) -> Result<(), FerroError> {
if x.iter().any(|v| !v.is_finite()) {
return Err(FerroError::InvalidParameter {
name: "X".into(),
reason: "Input X contains NaN or infinity.".into(),
});
}
Ok(())
}
// ---------------------------------------------------------------------------
// Tsne (unfitted)
// ---------------------------------------------------------------------------
/// t-SNE configuration.
///
/// Holds hyperparameters for the t-SNE algorithm. Calling [`Fit::fit`]
/// computes the embedding and returns a [`FittedTsne`].
#[derive(Debug, Clone)]
pub struct Tsne {
/// Number of embedding dimensions (default 2).
n_components: usize,
/// Perplexity target (default 30.0).
perplexity: f64,
/// Learning rate for gradient descent (default 200.0).
learning_rate: f64,
/// Number of gradient descent iterations (default 1000).
n_iter: usize,
/// Early exaggeration factor (default 12.0).
early_exaggeration: f64,
/// Barnes-Hut theta parameter controlling the accuracy/speed trade-off
/// (default 0.5). A value of 0 disables the approximation.
theta: f64,
/// Optional random seed for reproducibility.
random_state: Option<u64>,
}
impl Tsne {
/// Create a new `Tsne` with default parameters.
///
/// Defaults: `n_components=2`, `perplexity=30.0`, `learning_rate=200.0`,
/// `n_iter=1000`, `early_exaggeration=12.0`, `theta=0.5`.
#[must_use]
pub fn new() -> Self {
Self {
n_components: 2,
perplexity: 30.0,
learning_rate: 200.0,
n_iter: 1000,
early_exaggeration: 12.0,
theta: 0.5,
random_state: None,
}
}
/// Set the number of embedding dimensions.
#[must_use]
pub fn with_n_components(mut self, n: usize) -> Self {
self.n_components = n;
self
}
/// Set the perplexity.
#[must_use]
pub fn with_perplexity(mut self, perp: f64) -> Self {
self.perplexity = perp;
self
}
/// Set the learning rate.
#[must_use]
pub fn with_learning_rate(mut self, lr: f64) -> Self {
self.learning_rate = lr;
self
}
/// Set the number of iterations.
#[must_use]
pub fn with_n_iter(mut self, n: usize) -> Self {
self.n_iter = n;
self
}
/// Set the early exaggeration factor.
#[must_use]
pub fn with_early_exaggeration(mut self, ex: f64) -> Self {
self.early_exaggeration = ex;
self
}
/// Set the Barnes-Hut theta parameter. Use 0 for exact computation.
#[must_use]
pub fn with_theta(mut self, theta: f64) -> Self {
self.theta = theta;
self
}
/// Set the random seed for reproducible results.
#[must_use]
pub fn with_random_state(mut self, seed: u64) -> Self {
self.random_state = Some(seed);
self
}
/// Return the configured number of components.
#[must_use]
pub fn n_components(&self) -> usize {
self.n_components
}
/// Return the configured perplexity.
#[must_use]
pub fn perplexity(&self) -> f64 {
self.perplexity
}
/// Return the configured learning rate.
#[must_use]
pub fn learning_rate(&self) -> f64 {
self.learning_rate
}
/// Return the configured number of iterations.
#[must_use]
pub fn n_iter(&self) -> usize {
self.n_iter
}
/// Return the configured early exaggeration factor.
#[must_use]
pub fn early_exaggeration(&self) -> f64 {
self.early_exaggeration
}
/// Return the configured theta.
#[must_use]
pub fn theta(&self) -> f64 {
self.theta
}
/// Return the configured random state, if any.
#[must_use]
pub fn random_state(&self) -> Option<u64> {
self.random_state
}
}
impl Default for Tsne {
fn default() -> Self {
Self::new()
}
}
// ---------------------------------------------------------------------------
// FittedTsne
// ---------------------------------------------------------------------------
/// A fitted t-SNE model holding the learned embedding.
///
/// Created by calling [`Fit::fit`] on a [`Tsne`]. There is **no**
/// `Transform` implementation because t-SNE does not support out-of-sample
/// projection.
#[derive(Debug, Clone)]
pub struct FittedTsne {
/// The embedding, shape `(n_samples, n_components)`.
embedding_: Array2<f64>,
/// Final Kullback-Leibler divergence.
kl_divergence_: f64,
/// Number of iterations actually performed.
n_iter_: usize,
}
impl FittedTsne {
/// The embedding coordinates, shape `(n_samples, n_components)`.
#[must_use]
pub fn embedding(&self) -> &Array2<f64> {
&self.embedding_
}
/// The Kullback-Leibler divergence at convergence.
#[must_use]
pub fn kl_divergence(&self) -> f64 {
self.kl_divergence_
}
/// Number of iterations actually performed.
#[must_use]
pub fn n_iter(&self) -> usize {
self.n_iter_
}
}
// ---------------------------------------------------------------------------
// Barnes-Hut spatial tree (quad-tree / oct-tree generalisation)
// ---------------------------------------------------------------------------
/// A spatial tree node for Barnes-Hut approximation.
///
/// This is a generic k-d tree where each internal node stores the centre of
/// mass and total count of the points in its region, and each leaf stores a
/// single point (or is empty). The tree recursively subdivides space into
/// 2^d children (d = number of embedding dimensions, typically 2 or 3).
struct BHTree {
/// Centre of mass of all points in this node.
center_of_mass: Vec<f64>,
/// Number of points contained.
count: usize,
/// Width of this cell (side length).
width: f64,
/// Children nodes. Empty means leaf.
children: Vec<BHTree>,
/// Dimensionality.
dim: usize,
}
impl BHTree {
/// Create an empty tree node centred at `center` with given `width`.
fn new(_center: Vec<f64>, width: f64, dim: usize) -> Self {
Self {
center_of_mass: vec![0.0; dim],
count: 0,
width,
children: Vec::new(),
dim,
}
}
/// Insert a point into the tree.
fn insert(&mut self, point: &[f64], center: &[f64]) {
if self.count == 0 && self.children.is_empty() {
// Empty leaf: store the point here.
self.center_of_mass = point.to_vec();
self.count = 1;
return;
}
if self.children.is_empty() && self.count == 1 {
// Occupied leaf: subdivide and re-insert existing point.
self.subdivide(center);
let old = self.center_of_mass.clone();
self.reinsert(&old, center);
}
// Update aggregate centre of mass.
let c = self.count as f64;
for d in 0..self.dim {
self.center_of_mass[d] = (self.center_of_mass[d] * c + point[d]) / (c + 1.0);
}
self.count += 1;
// Insert into appropriate child.
if !self.children.is_empty() {
let idx = self.child_index(point, center);
let child_center = self.child_center(center, idx);
self.children[idx].insert(point, &child_center);
}
}
/// Determine which child quadrant a point belongs to.
fn child_index(&self, point: &[f64], center: &[f64]) -> usize {
let mut idx = 0;
for d in 0..self.dim {
if point[d] >= center[d] {
idx |= 1 << d;
}
}
idx
}
/// Compute the centre of a child quadrant.
fn child_center(&self, parent_center: &[f64], idx: usize) -> Vec<f64> {
let quarter = self.width / 4.0;
let mut c = parent_center.to_vec();
for d in 0..self.dim {
if idx & (1 << d) != 0 {
c[d] += quarter;
} else {
c[d] -= quarter;
}
}
c
}
/// Subdivide this node into 2^dim children.
fn subdivide(&mut self, _center: &[f64]) {
let n_children = 1 << self.dim;
let child_width = self.width / 2.0;
self.children.reserve_exact(n_children);
for _ in 0..n_children {
self.children
.push(BHTree::new(vec![0.0; self.dim], child_width, self.dim));
}
}
/// Re-insert a point that was stored in this node into a child.
fn reinsert(&mut self, point: &[f64], center: &[f64]) {
let idx = self.child_index(point, center);
let child_center = self.child_center(center, idx);
self.children[idx].insert(point, &child_center);
}
/// Compute the repulsive force on a point using the Barnes-Hut criterion.
///
/// Accumulates into `force` (length = dim) and returns the sum of
/// 1/(1 + ||y_i - y_j||^2) for normalisation (the Z factor).
fn compute_repulsive(
&self,
point: &[f64],
theta: f64,
force: &mut [f64],
center: &[f64],
) -> f64 {
if self.count == 0 {
return 0.0;
}
// Squared distance from point to this node's centre of mass.
let mut dist_sq = 0.0;
for d in 0..self.dim {
let diff = point[d] - self.center_of_mass[d];
dist_sq += diff * diff;
}
// If this is a leaf with a single point that is not the query itself
// (dist > 0), or the cell is far enough away, use the approximation.
if self.children.is_empty() || (self.width / dist_sq.sqrt() < theta && dist_sq > 1e-16) {
let inv = 1.0 / (1.0 + dist_sq);
let mult = (self.count as f64) * inv * inv;
for d in 0..self.dim {
force[d] += mult * (point[d] - self.center_of_mass[d]);
}
return (self.count as f64) * inv;
}
// Otherwise, recurse into children.
let mut z = 0.0;
for (idx, child) in self.children.iter().enumerate() {
let child_center = self.child_center(center, idx);
z += child.compute_repulsive(point, theta, force, &child_center);
}
z
}
}
// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------
/// Compute pairwise squared Euclidean distances.
fn pairwise_sq_distances(x: &Array2<f64>) -> Vec<Vec<f64>> {
let n = x.nrows();
let d = x.ncols();
let mut dist = vec![vec![0.0; n]; n];
for i in 0..n {
for j in (i + 1)..n {
let mut sq = 0.0;
for k in 0..d {
let diff = x[[i, k]] - x[[j, k]];
sq += diff * diff;
}
dist[i][j] = sq;
dist[j][i] = sq;
}
}
dist
}
/// Binary search for the sigma that produces the target perplexity for point i.
///
/// Returns a row of conditional probabilities `P(j|i)` for all j.
fn compute_pij_row(dist_sq: &[f64], i: usize, target_perp: f64) -> Vec<f64> {
let n = dist_sq.len();
let target_entropy = target_perp.ln();
let mut lo = 1e-20_f64;
let mut hi = 1e5_f64;
let mut beta = 1.0; // beta = 1 / (2 * sigma^2)
let mut p = vec![0.0; n];
for _iter in 0..100 {
// Compute un-normalised probabilities.
let mut sum = 0.0;
for j in 0..n {
if j == i {
p[j] = 0.0;
} else {
p[j] = (-beta * dist_sq[j]).exp();
sum += p[j];
}
}
if sum < 1e-16 {
// All probabilities near zero — widen the kernel.
hi = beta;
beta = f64::midpoint(lo, hi);
continue;
}
// Normalise and compute entropy.
let inv_sum = 1.0 / sum;
let mut entropy = 0.0;
for j in 0..n {
p[j] *= inv_sum;
if p[j] > 1e-16 {
entropy -= p[j] * p[j].ln();
}
}
let diff = entropy - target_entropy;
if diff.abs() < 1e-5 {
break;
}
if diff > 0.0 {
// Entropy too high — need narrower kernel (larger beta).
lo = beta;
} else {
// Entropy too low — need wider kernel (smaller beta).
hi = beta;
}
beta = f64::midpoint(lo, hi);
}
p
}
/// Compute the symmetric joint probability matrix P.
fn compute_joint_probabilities(x: &Array2<f64>, perplexity: f64) -> Vec<Vec<f64>> {
let n = x.nrows();
let dist = pairwise_sq_distances(x);
let mut p = vec![vec![0.0; n]; n];
for i in 0..n {
let p_row = compute_pij_row(&dist[i], i, perplexity);
for j in 0..n {
p[i][j] = p_row[j];
}
}
// Symmetrise: P_ij = (P_i|j + P_j|i) / (2n)
let inv_2n = 1.0 / (2.0 * n as f64);
for i in 0..n {
for j in (i + 1)..n {
let sym = (p[i][j] + p[j][i]) * inv_2n;
// Clamp to small positive value for numerical stability.
let sym = sym.max(1e-12);
p[i][j] = sym;
p[j][i] = sym;
}
p[i][i] = 0.0;
}
p
}
/// Compute the Kullback-Leibler divergence KL(P || Q).
fn compute_kl_divergence(p: &[Vec<f64>], y: &Array2<f64>) -> f64 {
let n = p.len();
// Compute Q (student-t distribution).
let mut z = 0.0;
let mut q_raw = vec![vec![0.0; n]; n];
for i in 0..n {
for j in (i + 1)..n {
let mut dist_sq = 0.0;
for d in 0..y.ncols() {
let diff = y[[i, d]] - y[[j, d]];
dist_sq += diff * diff;
}
let val = 1.0 / (1.0 + dist_sq);
q_raw[i][j] = val;
q_raw[j][i] = val;
z += 2.0 * val;
}
}
let mut kl = 0.0;
if z > 0.0 {
let inv_z = 1.0 / z;
for i in 0..n {
for j in 0..n {
if i == j {
continue;
}
let p_ij = p[i][j];
let q_ij = (q_raw[i][j] * inv_z).max(1e-16);
if p_ij > 1e-16 {
kl += p_ij * (p_ij / q_ij).ln();
}
}
}
}
kl
}
// ---------------------------------------------------------------------------
// Trait implementations
// ---------------------------------------------------------------------------
impl Fit<Array2<f64>, ()> for Tsne {
type Fitted = FittedTsne;
type Error = FerroError;
/// Fit t-SNE by computing the low-dimensional embedding.
///
/// # Errors
///
/// - [`FerroError::InvalidParameter`] if `n_components` is zero,
/// perplexity is non-positive, or learning rate is non-positive.
/// - [`FerroError::InsufficientSamples`] if there are fewer than 2
/// samples or perplexity is too large for the number of samples.
fn fit(&self, x: &Array2<f64>, _y: &()) -> Result<FittedTsne, FerroError> {
let n = x.nrows();
// Validate parameters.
if self.n_components == 0 {
return Err(FerroError::InvalidParameter {
name: "n_components".into(),
reason: "must be at least 1".into(),
});
}
if n < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: n,
context: "Tsne::fit requires at least 2 samples".into(),
});
}
if self.perplexity <= 0.0 {
return Err(FerroError::InvalidParameter {
name: "perplexity".into(),
reason: "must be positive".into(),
});
}
if self.perplexity >= n as f64 {
return Err(FerroError::InvalidParameter {
name: "perplexity".into(),
reason: format!(
"perplexity ({}) must be less than n_samples ({})",
self.perplexity, n
),
});
}
if self.learning_rate <= 0.0 {
return Err(FerroError::InvalidParameter {
name: "learning_rate".into(),
reason: "must be positive".into(),
});
}
if self.theta < 0.0 {
return Err(FerroError::InvalidParameter {
name: "theta".into(),
reason: "must be non-negative".into(),
});
}
// Reject NaN/Inf BEFORE the affinity / gradient-descent math (sklearn's
// `_validate_data(force_all_finite=True)` at `_t_sne.py:884` (barnes_hut)
// / `:891` (exact), `utils/validation.py:147-154`).
reject_non_finite(x)?;
let dim = self.n_components;
// Step 1: Compute joint probabilities.
let p = compute_joint_probabilities(x, self.perplexity);
// Step 2: Initialise embedding from a small Gaussian.
let seed = self.random_state.unwrap_or(0);
let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed);
let normal = Normal::new(0.0, 1e-4).unwrap();
let mut y = Array2::<f64>::zeros((n, dim));
for elem in &mut y {
*elem = normal.sample(&mut rng);
}
// Gradient descent state.
let mut gains = Array2::from_elem((n, dim), 1.0);
let mut velocity = Array2::<f64>::zeros((n, dim));
let early_exag_end = 250.min(self.n_iter);
let lr = self.learning_rate;
let use_bh = self.theta > 0.0 && dim <= 3;
for iteration in 0..self.n_iter {
let momentum = if iteration < early_exag_end { 0.5 } else { 0.8 };
let exaggeration = if iteration < early_exag_end {
self.early_exaggeration
} else {
1.0
};
// Compute gradient.
let mut gradient = Array2::<f64>::zeros((n, dim));
if use_bh {
// Barnes-Hut: attractive forces from P, repulsive from tree.
self.bh_gradient(&p, &y, exaggeration, &mut gradient);
} else {
// Exact gradient computation.
exact_gradient(&p, &y, exaggeration, &mut gradient);
}
// Update with momentum and adaptive gains.
for i in 0..n {
for d in 0..dim {
let g = gradient[[i, d]];
let v = velocity[[i, d]];
// Adaptive gain: increase if gradient and velocity disagree.
if (g > 0.0) == (v > 0.0) {
gains[[i, d]] = (gains[[i, d]] * 0.8_f64).max(0.01);
} else {
gains[[i, d]] += 0.2;
}
velocity[[i, d]] = momentum * v - lr * gains[[i, d]] * g;
y[[i, d]] += velocity[[i, d]];
}
}
// Centre the embedding.
for d in 0..dim {
let mean: f64 = y.column(d).sum() / n as f64;
for i in 0..n {
y[[i, d]] -= mean;
}
}
}
let kl = compute_kl_divergence(&p, &y);
Ok(FittedTsne {
embedding_: y,
kl_divergence_: kl,
n_iter_: self.n_iter,
})
}
}
impl Tsne {
/// Compute the gradient using Barnes-Hut approximation.
fn bh_gradient(
&self,
p: &[Vec<f64>],
y: &Array2<f64>,
exaggeration: f64,
gradient: &mut Array2<f64>,
) {
let n = y.nrows();
let dim = y.ncols();
// Build the spatial tree.
let mut min_vals = vec![f64::MAX; dim];
let mut max_vals = vec![f64::MIN; dim];
for i in 0..n {
for d in 0..dim {
min_vals[d] = min_vals[d].min(y[[i, d]]);
max_vals[d] = max_vals[d].max(y[[i, d]]);
}
}
let mut width = 0.0_f64;
let mut tree_center = vec![0.0; dim];
for d in 0..dim {
let range = max_vals[d] - min_vals[d];
width = width.max(range);
tree_center[d] = f64::midpoint(min_vals[d], max_vals[d]);
}
width *= 1.01; // Small padding.
let mut tree = BHTree::new(vec![0.0; dim], width, dim);
for i in 0..n {
let point: Vec<f64> = (0..dim).map(|d| y[[i, d]]).collect();
tree.insert(&point, &tree_center);
}
// Attractive forces: iterate over non-zero P entries.
// Also compute repulsive forces via tree.
let mut repulsive = Array2::<f64>::zeros((n, dim));
let mut z_sum = 0.0;
for i in 0..n {
let point: Vec<f64> = (0..dim).map(|d| y[[i, d]]).collect();
let mut rep_force = vec![0.0; dim];
let z_i = tree.compute_repulsive(&point, self.theta, &mut rep_force, &tree_center);
z_sum += z_i;
for d in 0..dim {
repulsive[[i, d]] = rep_force[d];
}
}
// Attractive gradient.
for i in 0..n {
for j in 0..n {
if i == j {
continue;
}
let p_ij = p[i][j] * exaggeration;
if p_ij < 1e-16 {
continue;
}
let mut dist_sq = 0.0;
for d in 0..dim {
let diff = y[[i, d]] - y[[j, d]];
dist_sq += diff * diff;
}
let inv = 1.0 / (1.0 + dist_sq);
for d in 0..dim {
gradient[[i, d]] += 4.0 * p_ij * inv * (y[[i, d]] - y[[j, d]]);
}
}
}
// Repulsive gradient (normalised by Z).
if z_sum > 0.0 {
let inv_z = 1.0 / z_sum;
for i in 0..n {
for d in 0..dim {
gradient[[i, d]] -= 4.0 * inv_z * repulsive[[i, d]];
}
}
}
}
}
/// Exact gradient computation (no Barnes-Hut).
fn exact_gradient(p: &[Vec<f64>], y: &Array2<f64>, exaggeration: f64, gradient: &mut Array2<f64>) {
let n = y.nrows();
let dim = y.ncols();
// Compute Q distribution (Student-t).
let mut q_raw = vec![vec![0.0; n]; n];
let mut z = 0.0;
for i in 0..n {
for j in (i + 1)..n {
let mut dist_sq = 0.0;
for d in 0..dim {
let diff = y[[i, d]] - y[[j, d]];
dist_sq += diff * diff;
}
let val = 1.0 / (1.0 + dist_sq);
q_raw[i][j] = val;
q_raw[j][i] = val;
z += 2.0 * val;
}
}
if z < 1e-16 {
return;
}
let inv_z = 1.0 / z;
// Gradient: 4 * sum_j (p_ij - q_ij) * (y_i - y_j) / (1 + ||y_i - y_j||^2)
for i in 0..n {
for j in 0..n {
if i == j {
continue;
}
let p_ij = p[i][j] * exaggeration;
let q_ij = q_raw[i][j] * inv_z;
let inv = q_raw[i][j]; // = 1/(1+dist_sq)
let mult = 4.0 * (p_ij - q_ij) * inv;
for d in 0..dim {
gradient[[i, d]] += mult * (y[[i, d]] - y[[j, d]]);
}
}
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use ndarray::Array2;
use rand::SeedableRng;
use rand_distr::{Distribution, Normal};
use rand_xoshiro::Xoshiro256PlusPlus;
/// Generate small blobs dataset: 3 clusters of 10 points each in 5D.
fn make_blobs(seed: u64) -> (Array2<f64>, Vec<usize>) {
let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed);
let normal = Normal::new(0.0, 0.3).unwrap();
let n_per_cluster = 10;
let n_features = 5;
let centers = [
vec![0.0, 0.0, 0.0, 0.0, 0.0],
vec![5.0, 5.0, 5.0, 5.0, 5.0],
vec![10.0, 0.0, 10.0, 0.0, 10.0],
];
let n = centers.len() * n_per_cluster;
let mut x = Array2::<f64>::zeros((n, n_features));
let mut labels = Vec::with_capacity(n);
for (c_idx, center) in centers.iter().enumerate() {
for i in 0..n_per_cluster {
let row = c_idx * n_per_cluster + i;
for (f, &c) in center.iter().enumerate() {
x[[row, f]] = c + normal.sample(&mut rng);
}
labels.push(c_idx);
}
}
(x, labels)
}
#[test]
fn test_tsne_basic_shape() {
let x = Array2::<f64>::zeros((20, 5));
let tsne = Tsne::new()
.with_perplexity(5.0)
.with_n_iter(50)
.with_random_state(42);
let fitted = tsne.fit(&x, &()).unwrap();
assert_eq!(fitted.embedding().dim(), (20, 2));
}
#[test]
fn test_tsne_3d_embedding() {
let x = Array2::<f64>::zeros((15, 4));
let tsne = Tsne::new()
.with_n_components(3)
.with_perplexity(4.0)
.with_n_iter(50)
.with_random_state(42);
let fitted = tsne.fit(&x, &()).unwrap();
assert_eq!(fitted.embedding().ncols(), 3);
}
#[test]
fn test_tsne_separates_clusters() {
let (x, labels) = make_blobs(42);
let tsne = Tsne::new()
.with_perplexity(5.0)
.with_n_iter(500)
.with_random_state(42);
let fitted = tsne.fit(&x, &()).unwrap();
let emb = fitted.embedding();
// Check cluster separation with k-NN accuracy (k=3).
let n = emb.nrows();
let mut correct = 0;
for i in 0..n {
let mut dists: Vec<(f64, usize)> = (0..n)
.filter(|&j| j != i)
.map(|j| {
let mut d = 0.0;
for dd in 0..emb.ncols() {
let diff = emb[[i, dd]] - emb[[j, dd]];
d += diff * diff;
}
(d, labels[j])
})
.collect();
dists.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
// Majority vote of 3 nearest.
let mut votes = [0usize; 3];
for &(_, lbl) in dists.iter().take(3) {
votes[lbl] += 1;
}
let pred = votes.iter().enumerate().max_by_key(|&(_, v)| v).unwrap().0;
if pred == labels[i] {
correct += 1;
}
}
let accuracy = f64::from(correct) / n as f64;
assert!(
accuracy > 0.8,
"t-SNE k-NN accuracy should be > 80%, got {:.1}%",
accuracy * 100.0
);
}
#[test]
fn test_tsne_kl_divergence_non_negative() {
let (x, _) = make_blobs(42);
let tsne = Tsne::new()
.with_perplexity(5.0)
.with_n_iter(100)
.with_random_state(42);
let fitted = tsne.fit(&x, &()).unwrap();
assert!(
fitted.kl_divergence() >= 0.0,
"KL divergence should be non-negative, got {}",
fitted.kl_divergence()
);
}
#[test]
fn test_tsne_reproducibility() {
let x = Array2::<f64>::from_shape_fn((10, 3), |(i, j)| (i + j) as f64);
let t1 = Tsne::new()
.with_perplexity(3.0)
.with_n_iter(50)
.with_random_state(42);
let t2 = Tsne::new()
.with_perplexity(3.0)
.with_n_iter(50)
.with_random_state(42);
let f1 = t1.fit(&x, &()).unwrap();
let f2 = t2.fit(&x, &()).unwrap();
for (a, b) in f1.embedding().iter().zip(f2.embedding().iter()) {
assert!(
(a - b).abs() < 1e-10,
"embeddings should be identical with same seed"
);
}
}
#[test]
fn test_tsne_exact_mode() {
// theta=0 forces exact computation
let x = Array2::<f64>::from_shape_fn((10, 3), |(i, j)| (i + j) as f64);
let tsne = Tsne::new()
.with_theta(0.0)
.with_perplexity(3.0)
.with_n_iter(50)
.with_random_state(42);
let fitted = tsne.fit(&x, &()).unwrap();
assert_eq!(fitted.embedding().dim(), (10, 2));
}
#[test]
fn test_tsne_invalid_n_components_zero() {
let x = Array2::<f64>::zeros((10, 3));
let tsne = Tsne::new().with_n_components(0);
assert!(tsne.fit(&x, &()).is_err());
}
#[test]
fn test_tsne_invalid_perplexity_zero() {
let x = Array2::<f64>::zeros((10, 3));
let tsne = Tsne::new().with_perplexity(0.0);
assert!(tsne.fit(&x, &()).is_err());
}
#[test]
fn test_tsne_perplexity_too_large() {
let x = Array2::<f64>::zeros((10, 3));
let tsne = Tsne::new().with_perplexity(10.0);
assert!(tsne.fit(&x, &()).is_err());
}
#[test]
fn test_tsne_invalid_learning_rate() {
let x = Array2::<f64>::zeros((10, 3));
let tsne = Tsne::new().with_learning_rate(-1.0);
assert!(tsne.fit(&x, &()).is_err());
}
#[test]
fn test_tsne_invalid_theta() {
let x = Array2::<f64>::zeros((10, 3));
let tsne = Tsne::new().with_theta(-0.1);
assert!(tsne.fit(&x, &()).is_err());
}
#[test]
fn test_tsne_insufficient_samples() {
let x = Array2::<f64>::zeros((1, 3));
let tsne = Tsne::new();
assert!(tsne.fit(&x, &()).is_err());
}
#[test]
fn test_tsne_getters() {
let tsne = Tsne::new()
.with_n_components(3)
.with_perplexity(20.0)
.with_learning_rate(100.0)
.with_n_iter(500)
.with_early_exaggeration(8.0)
.with_theta(0.3)
.with_random_state(99);
assert_eq!(tsne.n_components(), 3);
assert!((tsne.perplexity() - 20.0).abs() < 1e-10);
assert!((tsne.learning_rate() - 100.0).abs() < 1e-10);
assert_eq!(tsne.n_iter(), 500);
assert!((tsne.early_exaggeration() - 8.0).abs() < 1e-10);
assert!((tsne.theta() - 0.3).abs() < 1e-10);
assert_eq!(tsne.random_state(), Some(99));
}
#[test]
fn test_tsne_default() {
let tsne = Tsne::default();
assert_eq!(tsne.n_components(), 2);
assert!((tsne.perplexity() - 30.0).abs() < 1e-10);
}
#[test]
fn test_tsne_n_iter_recorded() {
let x = Array2::<f64>::zeros((10, 3));
let tsne = Tsne::new()
.with_perplexity(3.0)
.with_n_iter(50)
.with_random_state(42);
let fitted = tsne.fit(&x, &()).unwrap();
assert_eq!(fitted.n_iter(), 50);
}
}