#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TSNE {
n_components: usize,
perplexity: f32,
learning_rate: f32,
n_iter: usize,
random_state: Option<u64>,
embedding: Option<Matrix<f32>>,
}
impl Default for TSNE {
fn default() -> Self {
Self::new(2)
}
}
impl TSNE {
#[must_use]
pub fn new(n_components: usize) -> Self {
Self {
n_components,
perplexity: 30.0,
learning_rate: 200.0,
n_iter: 1000,
random_state: None,
embedding: None,
}
}
#[must_use]
pub fn with_perplexity(mut self, perplexity: f32) -> Self {
self.perplexity = perplexity;
self
}
#[must_use]
pub fn with_learning_rate(mut self, learning_rate: f32) -> Self {
self.learning_rate = learning_rate;
self
}
#[must_use]
pub fn with_n_iter(mut self, n_iter: usize) -> Self {
self.n_iter = n_iter;
self
}
#[must_use]
pub fn with_random_state(mut self, seed: u64) -> Self {
self.random_state = Some(seed);
self
}
#[must_use]
pub fn n_components(&self) -> usize {
self.n_components
}
#[must_use]
pub fn is_fitted(&self) -> bool {
self.embedding.is_some()
}
#[allow(clippy::unused_self)]
fn compute_pairwise_distances(&self, x: &Matrix<f32>) -> Vec<f32> {
let (n_samples, n_features) = x.shape();
let mut distances = vec![0.0; n_samples * n_samples];
for i in 0..n_samples {
for j in 0..n_samples {
if i == j {
distances[i * n_samples + j] = 0.0;
continue;
}
let mut dist_sq = 0.0;
for k in 0..n_features {
let diff = x.get(i, k) - x.get(j, k);
dist_sq += diff * diff;
}
distances[i * n_samples + j] = dist_sq;
}
}
distances
}
fn compute_p_conditional(&self, distances: &[f32], n_samples: usize) -> Vec<f32> {
let mut p_conditional = vec![0.0; n_samples * n_samples];
let target_entropy = self.perplexity.ln();
for i in 0..n_samples {
let mut beta_min = -f32::INFINITY;
let mut beta_max = f32::INFINITY;
let mut beta = 1.0;
for _ in 0..50 {
let mut sum_p = 0.0;
let mut entropy = 0.0;
for j in 0..n_samples {
if i == j {
p_conditional[i * n_samples + j] = 0.0;
continue;
}
let p_ji = (-beta * distances[i * n_samples + j]).exp();
p_conditional[i * n_samples + j] = p_ji;
sum_p += p_ji;
}
if sum_p > 0.0 {
for j in 0..n_samples {
if i != j {
let p_normalized = p_conditional[i * n_samples + j] / sum_p;
p_conditional[i * n_samples + j] = p_normalized;
if p_normalized > 1e-12 {
entropy -= p_normalized * p_normalized.ln();
}
}
}
}
let entropy_diff = entropy - target_entropy;
if entropy_diff.abs() < 1e-5 {
break;
}
if entropy_diff > 0.0 {
beta_min = beta;
beta = if beta_max.is_infinite() {
beta * 2.0
} else {
f32::midpoint(beta, beta_max)
};
} else {
beta_max = beta;
beta = if beta_min.is_infinite() {
beta / 2.0
} else {
f32::midpoint(beta, beta_min)
};
}
}
}
p_conditional
}
#[allow(clippy::unused_self)]
fn compute_p_joint(&self, p_conditional: &[f32], n_samples: usize) -> Vec<f32> {
let mut p_joint = vec![0.0; n_samples * n_samples];
let normalizer = 2.0 * n_samples as f32;
for i in 0..n_samples {
for j in 0..n_samples {
p_joint[i * n_samples + j] = (p_conditional[i * n_samples + j]
+ p_conditional[j * n_samples + i])
/ normalizer;
p_joint[i * n_samples + j] = p_joint[i * n_samples + j].max(1e-12);
}
}
p_joint
}
fn compute_q(&self, y: &[f32], n_samples: usize) -> Vec<f32> {
let mut q = vec![0.0; n_samples * n_samples];
let mut sum_q = 0.0;
for i in 0..n_samples {
for j in 0..n_samples {
if i == j {
q[i * n_samples + j] = 0.0;
continue;
}
let mut dist_sq = 0.0;
for k in 0..self.n_components {
let diff = y[i * self.n_components + k] - y[j * self.n_components + k];
dist_sq += diff * diff;
}
let q_ij = 1.0 / (1.0 + dist_sq);
q[i * n_samples + j] = q_ij;
sum_q += q_ij;
}
}
if sum_q > 0.0 {
for q_val in &mut q {
*q_val /= sum_q;
*q_val = q_val.max(1e-12); }
}
q
}
fn compute_gradient(&self, y: &[f32], p: &[f32], q: &[f32], n_samples: usize) -> Vec<f32> {
let mut gradient = vec![0.0; n_samples * self.n_components];
for i in 0..n_samples {
for j in 0..n_samples {
if i == j {
continue;
}
let p_ij = p[i * n_samples + j];
let q_ij = q[i * n_samples + j];
let mut dist_sq = 0.0;
for k in 0..self.n_components {
let diff = y[i * self.n_components + k] - y[j * self.n_components + k];
dist_sq += diff * diff;
}
let factor = 4.0 * (p_ij - q_ij) / (1.0 + dist_sq);
for k in 0..self.n_components {
let diff = y[i * self.n_components + k] - y[j * self.n_components + k];
gradient[i * self.n_components + k] += factor * diff;
}
}
}
gradient
}
}
impl Transformer for TSNE {
fn fit(&mut self, x: &Matrix<f32>) -> Result<()> {
let (n_samples, _n_features) = x.shape();
let distances = self.compute_pairwise_distances(x);
let p_conditional = self.compute_p_conditional(&distances, n_samples);
let p_joint = self.compute_p_joint(&p_conditional, n_samples);
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let seed = self.random_state.unwrap_or_else(|| {
let mut hasher = DefaultHasher::new();
std::time::SystemTime::now().hash(&mut hasher);
hasher.finish()
});
let mut rng_state = seed;
let mut rand = || -> f32 {
rng_state = rng_state.wrapping_mul(1664525).wrapping_add(1013904223);
((rng_state >> 16) as f32 / 65536.0) - 0.5
};
let mut y = vec![0.0; n_samples * self.n_components];
for val in &mut y {
*val = rand() * 0.0001; }
let mut velocity = vec![0.0; n_samples * self.n_components];
let momentum = 0.5;
let final_momentum = 0.8;
let momentum_switch_iter = 250;
for iter in 0..self.n_iter {
let q = self.compute_q(&y, n_samples);
let gradient = self.compute_gradient(&y, &p_joint, &q, n_samples);
let current_momentum = if iter < momentum_switch_iter {
momentum
} else {
final_momentum
};
for i in 0..(n_samples * self.n_components) {
velocity[i] = current_momentum * velocity[i] - self.learning_rate * gradient[i];
y[i] += velocity[i];
}
if iter == 100 {
}
}
self.embedding = Some(Matrix::from_vec(n_samples, self.n_components, y)?);
Ok(())
}
fn transform(&self, _x: &Matrix<f32>) -> Result<Matrix<f32>> {
assert!(self.is_fitted(), "Model not fitted. Call fit() first.");
Ok(self
.embedding
.as_ref()
.expect("embedding should exist after is_fitted() check")
.clone())
}
fn fit_transform(&mut self, x: &Matrix<f32>) -> Result<Matrix<f32>> {
self.fit(x)?;
self.transform(x)
}
}
#[cfg(test)]
mod tests;