use rayon::prelude::*;
use rustc_hash::{FxHashMap, FxHashSet};
use thousands::*;
use crate::data::structures::*;
use crate::prelude::*;
#[derive(Clone, Debug)]
pub struct UmapGraphParams<T> {
pub bandwidth: T,
pub local_connectivity: T,
pub mix_weight: T,
}
impl<T> Default for UmapGraphParams<T>
where
T: ManifoldsFloat,
{
fn default() -> Self {
Self {
local_connectivity: T::from(1.0).unwrap(),
bandwidth: T::from(1e-5).unwrap(),
mix_weight: T::from(1.0).unwrap(),
}
}
}
pub fn smooth_knn_dist<T>(
dist: &[Vec<T>],
k: usize,
local_connectivity: T,
bandwidth: T,
n_iter: usize,
) -> (Vec<T>, Vec<T>)
where
T: ManifoldsFloat,
{
dist.par_iter()
.map(|dists| {
let target = (k as f64).ln();
let rho = if local_connectivity > T::zero() {
let idx = (local_connectivity - T::one())
.max(T::zero())
.floor()
.to_usize()
.unwrap()
.min(dists.len() - 1);
let fraction = (local_connectivity - T::one()).max(T::zero())
- (local_connectivity - T::one()).max(T::zero()).floor();
if fraction > T::zero() && idx + 1 < dists.len() {
dists[idx] * (T::one() - fraction) + dists[idx + 1] * fraction
} else {
dists[idx]
}
} else {
T::zero()
};
let mut lo = T::zero();
let mut hi = T::max_value();
let mut mid = T::one();
for _ in 0..n_iter {
let mut val = T::zero();
for &d in dists.iter() {
let adjusted = (d - rho).max(T::zero());
val += (-(adjusted / mid)).exp();
}
if (val.to_f64().unwrap() - target).abs() < bandwidth.to_f64().unwrap() {
break;
}
if val.to_f64().unwrap() > target {
hi = mid;
mid = (lo + hi) / (T::one() + T::one());
} else {
lo = mid;
if hi == T::max_value() {
mid *= T::one() + T::one();
} else {
mid = (lo + hi) / (T::one() + T::one());
}
}
}
(mid, rho)
})
.unzip()
}
pub fn knn_to_coo<T>(
knn_indices: &[Vec<usize>],
knn_dists: &[Vec<T>],
sigmas: &[T],
rhos: &[T],
) -> CoordinateList<T>
where
T: ManifoldsFloat,
{
let n = knn_indices.len();
let capacity: usize = knn_indices.iter().map(|v| v.len()).sum();
let mut row_indices = Vec::with_capacity(capacity);
let mut col_indices = Vec::with_capacity(capacity);
let mut values = Vec::with_capacity(capacity);
for (i, (neighbours, dists)) in knn_indices.iter().zip(knn_dists.iter()).enumerate() {
let sigma = sigmas[i];
let rho = rhos[i];
for (&j, &dist) in neighbours.iter().zip(dists.iter()) {
if i == j {
continue;
}
let adjusted = (dist - rho).max(T::zero());
let weight = if sigma > T::zero() {
(-(adjusted / sigma)).exp()
} else if adjusted > T::zero() {
T::zero()
} else {
T::one()
};
row_indices.push(i);
col_indices.push(j);
values.push(weight);
}
}
CoordinateList {
row_indices,
col_indices,
values,
n_samples: n,
}
}
pub fn symmetrise_graph<T>(graph: CoordinateList<T>, mix_weight: T) -> CoordinateList<T>
where
T: ManifoldsFloat,
{
let n = graph.n_samples;
let mut forward: Vec<FxHashMap<usize, T>> = vec![FxHashMap::default(); n];
let mut backward: Vec<FxHashMap<usize, T>> = vec![FxHashMap::default(); n];
for ((&i, &j), &w) in graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
{
forward[i].insert(j, w);
backward[j].insert(i, w);
}
let edges: Vec<Vec<(usize, T)>> = (0..n)
.into_par_iter()
.map(|i| {
let mut combined = FxHashMap::default();
for &j in forward[i].keys().chain(backward[i].keys()) {
let w_ij = forward[i].get(&j).copied().unwrap_or(T::zero());
let w_ji = backward[i].get(&j).copied().unwrap_or(T::zero());
let union = w_ij + w_ji - w_ij * w_ji;
let w_sym = mix_weight * union + (T::one() - mix_weight) * w_ij;
if w_sym > T::zero() {
combined.insert(j, w_sym);
}
}
let mut result: Vec<(usize, T)> = combined.into_iter().collect();
result.sort_unstable_by_key(|&(idx, _)| idx);
result
})
.collect();
let capacity: usize = edges.iter().map(|v| v.len()).sum();
let mut row_indices = Vec::with_capacity(capacity);
let mut col_indices = Vec::with_capacity(capacity);
let mut values = Vec::with_capacity(capacity);
for (i, neighbours) in edges.into_iter().enumerate() {
for (j, w) in neighbours {
row_indices.push(i);
col_indices.push(j);
values.push(w);
}
}
CoordinateList {
row_indices,
col_indices,
values,
n_samples: n,
}
}
pub fn coo_to_adjacency_list<T>(graph: &CoordinateList<T>) -> Vec<Vec<(usize, T)>>
where
T: ManifoldsFloat,
{
let mut adj = vec![Vec::new(); graph.n_samples];
for ((&i, &j), &w) in graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
{
adj[i].push((j, w));
}
adj
}
pub fn filter_weak_edges<T>(
graph: CoordinateList<T>,
n_epochs: usize,
verbose: usize,
) -> CoordinateList<T>
where
T: ManifoldsFloat,
{
let verbosity = parse_verbosity_level(verbose);
let max_weight = graph
.values
.iter()
.copied()
.fold(T::zero(), |acc, w| if w > acc { w } else { acc });
let original_edge_no = graph.col_indices.len();
let threshold = max_weight / T::from(n_epochs).unwrap();
let mut filtered_rows = Vec::new();
let mut filtered_cols = Vec::new();
let mut filtered_vals = Vec::new();
for ((&i, &j), &w) in graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
{
if w >= threshold {
filtered_rows.push(i);
filtered_cols.push(j);
filtered_vals.push(w);
}
}
let filtered_edge_no = filtered_cols.len();
if verbosity.detailed_verbosity() {
println!(
" Filtered out {} weak edges.",
(original_edge_no - filtered_edge_no).separate_with_underscores(),
);
}
CoordinateList {
row_indices: filtered_rows,
col_indices: filtered_cols,
values: filtered_vals,
n_samples: graph.n_samples,
}
}
pub fn gaussian_knn_affinities<T>(
knn_indices: &[Vec<usize>],
knn_dists: &[Vec<T>],
perplexity: T,
tol: T,
max_iter: usize,
distances_squared: bool,
) -> Result<CoordinateList<T>, ManifoldsError>
where
T: ManifoldsFloat,
{
let n = knn_indices.len();
let min_k = knn_indices.iter().map(|idx| idx.len()).min().unwrap_or(0);
let perp_f64 = perplexity.to_f64().unwrap_or(f64::NAN);
if perp_f64 >= min_k as f64 {
return Err(ManifoldsError::PerplexityTooLarge {
perplexity: perp_f64,
k: min_k,
});
}
let target_entropy = perplexity.log2();
let machine_epsilon = T::epsilon();
let results: Vec<Vec<T>> = knn_indices
.par_iter()
.zip(knn_dists.par_iter())
.map(|(_, dists)| {
let mut beta = T::one();
let mut min_beta = T::neg_infinity();
let mut max_beta = T::infinity();
let mut current_probs = vec![T::zero(); dists.len()];
for _ in 0..max_iter {
let mut sum_p = T::zero();
for (j, &d) in dists.iter().enumerate() {
if d < T::epsilon() {
continue;
}
let d_sq = if distances_squared { d } else { d * d };
let p = (-beta * d_sq).exp();
current_probs[j] = p;
sum_p += p;
}
if sum_p.abs() < machine_epsilon {
sum_p = machine_epsilon;
}
let mut entropy = T::zero();
for p in current_probs.iter_mut() {
*p /= sum_p;
if *p > machine_epsilon {
entropy -= *p * p.log2();
}
}
let entropy_diff = entropy - target_entropy;
if entropy_diff.abs() < tol {
break;
}
if entropy_diff > T::zero() {
min_beta = beta;
if max_beta.is_infinite() {
beta *= T::one() + T::one();
} else {
beta = (beta + max_beta) / (T::one() + T::one());
}
} else {
max_beta = beta;
if min_beta.is_infinite() {
beta /= T::one() + T::one();
} else {
beta = (beta + min_beta) / (T::one() + T::one());
}
}
}
current_probs
})
.collect();
let capacity: usize = results.iter().map(|p| p.len()).sum();
let mut row_indices = Vec::with_capacity(capacity);
let mut col_indices = Vec::with_capacity(capacity);
let mut values = Vec::with_capacity(capacity);
for (i, probs) in results.into_iter().enumerate() {
for (&j, p) in knn_indices[i].iter().zip(probs) {
if p > machine_epsilon && j != i {
row_indices.push(i);
col_indices.push(j);
values.push(p);
}
}
}
Ok(CoordinateList {
row_indices,
col_indices,
values,
n_samples: n,
})
}
pub fn symmetrise_affinities_tsne<T>(graph: CoordinateList<T>) -> CoordinateList<T>
where
T: ManifoldsFloat,
{
let n = graph.n_samples;
let n_float = T::from_usize(n).unwrap();
let two = T::from_f64(2.0).unwrap();
let normalization = two * n_float;
let mut adj: Vec<FxHashMap<usize, T>> = vec![FxHashMap::default(); n];
for ((&i, &j), &w) in graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
{
adj[i].insert(j, w);
}
let mut pairs_set: FxHashSet<(usize, usize)> = FxHashSet::default();
for ((&i, &j), _) in graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
{
let pair = if i < j {
(i, j)
} else if i > j {
(j, i)
} else {
(i, i)
};
pairs_set.insert(pair);
}
let pairs: Vec<(usize, usize)> = pairs_set.into_iter().collect();
let edges: Vec<Vec<(usize, usize, T)>> = pairs
.par_iter()
.map(|&(i, j)| {
let w_ij = adj[i].get(&j).copied().unwrap_or_else(T::zero);
let w_ji = adj[j].get(&i).copied().unwrap_or_else(T::zero);
let p_sym = (w_ij + w_ji) / normalization;
let mut local_edges = Vec::new();
if p_sym > T::epsilon() {
local_edges.push((i, j, p_sym));
if i != j {
local_edges.push((j, i, p_sym));
}
}
local_edges
})
.collect();
let total_capacity: usize = edges.iter().map(|v| v.len()).sum();
let mut rows = Vec::with_capacity(total_capacity);
let mut cols = Vec::with_capacity(total_capacity);
let mut vals = Vec::with_capacity(total_capacity);
for edge_vec in edges {
for (i, j, w) in edge_vec {
rows.push(i);
cols.push(j);
vals.push(w);
}
}
CoordinateList {
row_indices: rows,
col_indices: cols,
values: vals,
n_samples: n,
}
}
#[derive(Default)]
pub enum PhateGraphSymmetrisation {
#[default]
Additive,
Multiplicative,
Mnn,
None,
}
pub fn parse_phate_symmetrisation(s: &str) -> Option<PhateGraphSymmetrisation> {
match s.to_lowercase().as_str() {
"additive" | "add" => Some(PhateGraphSymmetrisation::Additive),
"multiplicative" | "mult" | "multiply" => Some(PhateGraphSymmetrisation::Multiplicative),
"mnn" => Some(PhateGraphSymmetrisation::Mnn),
"none" => Some(PhateGraphSymmetrisation::None),
_ => None,
}
}
fn symmetrise_additive<T>(graph: &mut CoordinateList<T>)
where
T: ManifoldsFloat,
{
let two = T::one() + T::one();
let edge_map: FxHashMap<(usize, usize), T> = graph
.row_indices
.par_iter()
.zip(&graph.col_indices)
.zip(&graph.values)
.fold(
FxHashMap::default, |mut local_map, ((&i, &j), &v)| {
*local_map.entry((i, j)).or_insert(T::zero()) += v;
*local_map.entry((j, i)).or_insert(T::zero()) += v;
local_map
},
)
.reduce(
FxHashMap::default, |mut map1, map2| {
for (key, val) in map2 {
*map1.entry(key).or_insert(T::zero()) += val;
}
map1
},
);
graph.row_indices.clear();
graph.col_indices.clear();
graph.values.clear();
graph.row_indices.reserve(edge_map.len());
graph.col_indices.reserve(edge_map.len());
graph.values.reserve(edge_map.len());
for ((i, j), v) in edge_map {
graph.row_indices.push(i);
graph.col_indices.push(j);
graph.values.push(v / two);
}
}
fn symmetrise_multiplicative<T>(graph: &mut CoordinateList<T>)
where
T: ManifoldsFloat,
{
let forward_map: FxHashMap<(usize, usize), T> = graph
.row_indices
.par_iter()
.zip(&graph.col_indices)
.zip(&graph.values)
.fold(FxHashMap::default, |mut map, ((&i, &j), &v)| {
map.insert((i, j), v);
map
})
.reduce(FxHashMap::default, |mut map1, map2| {
map1.extend(map2);
map1
});
let backward_map: FxHashMap<(usize, usize), T> = graph
.row_indices
.par_iter()
.zip(&graph.col_indices)
.zip(&graph.values)
.fold(FxHashMap::default, |mut map, ((&i, &j), &v)| {
map.insert((j, i), v);
map
})
.reduce(FxHashMap::default, |mut map1, map2| {
map1.extend(map2);
map1
});
let products: Vec<(usize, usize, T)> = forward_map
.par_iter()
.filter_map(|(&(i, j), &v_ij)| backward_map.get(&(i, j)).map(|&v_ji| (i, j, v_ij * v_ji)))
.collect();
graph.row_indices.clear();
graph.col_indices.clear();
graph.values.clear();
graph.row_indices.reserve(products.len());
graph.col_indices.reserve(products.len());
graph.values.reserve(products.len());
for (i, j, v) in products {
graph.row_indices.push(i);
graph.col_indices.push(j);
graph.values.push(v);
}
}
fn symmetrise_mnn<T>(graph: &mut CoordinateList<T>, theta: T)
where
T: ManifoldsFloat,
{
let edge_map: FxHashMap<(usize, usize), (Option<T>, Option<T>)> = graph
.row_indices
.par_iter()
.zip(&graph.col_indices)
.zip(&graph.values)
.fold(FxHashMap::default, |mut map, ((&i, &j), &v)| {
map.entry((i, j)).or_insert((None, None)).0 = Some(v);
map.entry((j, i)).or_insert((None, None)).1 = Some(v);
map
})
.reduce(FxHashMap::default, |mut map1, map2| {
for (key, (v_ij, v_ji)) in map2 {
let entry = map1.entry(key).or_insert((None, None));
if v_ij.is_some() {
entry.0 = v_ij;
}
if v_ji.is_some() {
entry.1 = v_ji;
}
}
map1
});
let one_minus_theta = T::one() - theta;
let results: Vec<(usize, usize, T)> = edge_map
.par_iter()
.filter_map(|(&(i, j), &(v_ij, v_ji))| {
let v_ij = v_ij.unwrap_or(T::zero());
let v_ji = v_ji.unwrap_or(T::zero());
let min_val = v_ij.min(v_ji);
let max_val = v_ij.max(v_ji);
let combined = theta * min_val + one_minus_theta * max_val;
if combined > T::epsilon() {
Some((i, j, combined))
} else {
None
}
})
.collect();
graph.row_indices.clear();
graph.col_indices.clear();
graph.values.clear();
graph.row_indices.reserve(results.len());
graph.col_indices.reserve(results.len());
graph.values.reserve(results.len());
for (i, j, v) in results {
graph.row_indices.push(i);
graph.col_indices.push(j);
graph.values.push(v);
}
}
fn binary_knn_connectivity<T>(
knn_indices: &[Vec<usize>],
knn: usize,
symmetrise: PhateGraphSymmetrisation,
) -> CoordinateList<T>
where
T: ManifoldsFloat,
{
let n = knn_indices.len();
let k_actual = knn.min(knn_indices[0].len());
let mut row_indices = Vec::new();
let mut col_indices = Vec::new();
let mut values = Vec::new();
for (i, indices) in knn_indices.iter().enumerate() {
for &j in indices.iter().take(k_actual) {
if j != i {
row_indices.push(i);
col_indices.push(j);
values.push(T::one());
}
}
}
let mut graph = CoordinateList {
row_indices,
col_indices,
values,
n_samples: n,
};
if !matches!(symmetrise, PhateGraphSymmetrisation::None) {
symmetrise_additive(&mut graph);
}
graph
}
#[allow(clippy::too_many_arguments)]
pub fn phate_alpha_decay_affinities<T>(
knn_indices: &[Vec<usize>],
knn_dists: &[Vec<T>],
knn: usize,
decay: Option<T>,
bandwidth_scale: T,
thresh: T,
symmetrise: &str,
distances_squared: bool,
) -> CoordinateList<T>
where
T: ManifoldsFloat,
{
let n = knn_indices.len();
let machine_epsilon = T::epsilon();
let symmetrise = parse_phate_symmetrisation(symmetrise).unwrap_or_default();
if decay.is_none() {
return binary_knn_connectivity(knn_indices, knn, symmetrise);
}
let decay_val = decay.unwrap();
let results: Vec<(Vec<usize>, Vec<T>)> = knn_indices
.par_iter()
.zip(knn_dists.par_iter())
.enumerate()
.map(|(i, (indices, dists))| {
let bandwidth_dist = if knn > 0 && knn <= dists.len() {
if distances_squared {
dists[knn - 1].sqrt() } else {
dists[knn - 1] }
} else {
if distances_squared {
dists[dists.len() - 1].sqrt()
} else {
dists[dists.len() - 1]
}
};
let bandwidth = bandwidth_dist * bandwidth_scale;
let bandwidth = bandwidth.max(machine_epsilon);
let mut neighbor_indices = Vec::with_capacity(indices.len());
let mut neighbor_values = Vec::with_capacity(indices.len());
for (&j, &dist_val) in indices.iter().zip(dists.iter()) {
if j == i {
continue;
}
if dist_val < machine_epsilon {
neighbor_indices.push(j);
neighbor_values.push(T::one());
continue;
}
let d = if distances_squared {
dist_val.sqrt() } else {
dist_val };
let scaled = d / bandwidth;
let powered = scaled.powf(decay_val);
let affinity = (-powered).exp();
if affinity >= thresh {
neighbor_indices.push(j);
neighbor_values.push(affinity);
}
}
(neighbor_indices, neighbor_values)
})
.collect();
let capacity: usize = results.iter().map(|(idx, _)| idx.len()).sum();
let mut row_indices = Vec::with_capacity(capacity);
let mut col_indices = Vec::with_capacity(capacity);
let mut values = Vec::with_capacity(capacity);
for (i, (indices, vals)) in results.into_iter().enumerate() {
for (&j, v) in indices.iter().zip(vals) {
row_indices.push(i);
col_indices.push(j);
values.push(v);
}
}
let mut graph = CoordinateList {
row_indices,
col_indices,
values,
n_samples: n,
};
match symmetrise {
PhateGraphSymmetrisation::Additive => symmetrise_additive(&mut graph),
PhateGraphSymmetrisation::Multiplicative => symmetrise_multiplicative(&mut graph),
PhateGraphSymmetrisation::Mnn => symmetrise_mnn(&mut graph, T::one()),
PhateGraphSymmetrisation::None => {}
};
graph
}
#[cfg(test)]
mod test_data_gen {
use super::*;
use approx::assert_relative_eq;
use num_traits::Float;
#[test]
fn test_smooth_knn_dist_basic() {
let dist = vec![vec![1.0, 2.0], vec![1.5, 3.0], vec![0.5, 1.5]];
let (sigmas, rhos) = smooth_knn_dist(&dist, 2, 1.0, 1e-5, 64);
assert_eq!(sigmas.len(), 3);
assert_eq!(rhos.len(), 3);
assert_relative_eq!(rhos[0], 1.0, epsilon = 1e-4);
assert_relative_eq!(rhos[1], 1.5, epsilon = 1e-4);
assert_relative_eq!(rhos[2], 0.5, epsilon = 1e-4);
for sigma in sigmas.iter() {
assert!(*sigma > 0.0);
}
}
#[test]
fn test_smooth_knn_dist_zero_local_connectivity() {
let dist = vec![vec![0.0, 1.0, 2.0], vec![0.0, 1.0, 2.0]];
let (sigmas, rhos) = smooth_knn_dist(&dist, 2, 0.0, 1e-5, 64);
assert!(rhos.iter().all(|&r| r == 0.0));
assert_eq!(sigmas.len(), 2);
}
#[test]
fn test_knn_to_coo_basic() {
let knn_indices = vec![vec![1, 2], vec![0, 2], vec![0, 1]];
let knn_dists = vec![vec![1.0, 2.0], vec![1.0, 1.5], vec![2.0, 1.5]];
let sigmas = vec![1.0, 1.0, 1.0];
let rhos = vec![0.0, 0.0, 0.0];
let graph = knn_to_coo(&knn_indices, &knn_dists, &sigmas, &rhos);
assert_eq!(graph.n_samples, 3);
assert_eq!(graph.row_indices.len(), 6); assert_eq!(graph.col_indices.len(), 6);
assert_eq!(graph.values.len(), 6);
for &w in &graph.values {
assert!((0.0..=1.0).contains(&w));
}
}
#[test]
fn test_knn_to_coo_self_loop_excluded() {
let knn_indices = vec![vec![0, 1], vec![1, 0]];
let knn_dists = vec![vec![0.0, 1.0], vec![0.0, 1.0]];
let sigmas = vec![1.0, 1.0];
let rhos = vec![0.0, 0.0];
let graph = knn_to_coo(&knn_indices, &knn_dists, &sigmas, &rhos);
assert_eq!(graph.values.len(), 2); assert!(graph
.row_indices
.iter()
.zip(&graph.col_indices)
.all(|(&i, &j)| i != j));
}
#[test]
fn test_symmetrise_graph_full_union() {
let graph = CoordinateList {
row_indices: vec![0, 1],
col_indices: vec![1, 0],
values: vec![0.8, 0.6],
n_samples: 2,
};
let sym_graph = symmetrise_graph(graph, 0.5);
assert_eq!(sym_graph.n_samples, 2);
let edges = sym_graph.to_edge_list();
assert_eq!(edges.len(), 2);
let edge_01 = edges.iter().find(|&&(i, j, _)| i == 0 && j == 1).unwrap();
let edge_10 = edges.iter().find(|&&(i, j, _)| i == 1 && j == 0).unwrap();
assert_relative_eq!(edge_01.2, 0.86, epsilon = 1e-6);
assert_relative_eq!(edge_10.2, 0.76, epsilon = 1e-6);
}
#[test]
fn test_symmetrise_graph_directed() {
let graph = CoordinateList {
row_indices: vec![0, 1],
col_indices: vec![1, 0],
values: vec![0.8, 0.6],
n_samples: 2,
};
let sym_graph = symmetrise_graph(graph.clone(), 1.0);
let edges = sym_graph.to_edge_list();
assert_eq!(edges.len(), 2);
let union = 0.8 + 0.6 - 0.8 * 0.6;
for (_, _, w) in edges {
assert_relative_eq!(w, union, epsilon = 1e-6);
}
}
#[test]
fn test_coo_to_adjacency_list() {
let graph = CoordinateList {
row_indices: vec![0, 0, 1, 2],
col_indices: vec![1, 2, 2, 0],
values: vec![0.5, 0.3, 0.8, 0.9],
n_samples: 3,
};
let adj = coo_to_adjacency_list(&graph);
assert_eq!(adj.len(), 3);
assert_eq!(adj[0].len(), 2); assert_eq!(adj[1].len(), 1); assert_eq!(adj[2].len(), 1);
assert!(adj[0].contains(&(1, 0.5)));
assert!(adj[0].contains(&(2, 0.3)));
assert!(adj[1].contains(&(2, 0.8)));
assert!(adj[2].contains(&(0, 0.9)));
}
#[test]
fn test_coo_to_adjacency_list_empty() {
let graph: CoordinateList<f64> = CoordinateList {
row_indices: vec![],
col_indices: vec![],
values: vec![],
n_samples: 3,
};
let adj = coo_to_adjacency_list(&graph);
assert_eq!(adj.len(), 3);
assert!(adj[0].is_empty());
assert!(adj[1].is_empty());
assert!(adj[2].is_empty());
}
fn graph_to_adj<T: Float + Copy>(graph: &CoordinateList<T>) -> Vec<Vec<(usize, T)>> {
let mut adj = vec![Vec::new(); graph.n_samples];
for ((&i, &j), &w) in graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
{
adj[i].push((j, w));
}
adj
}
fn entropy(probs: &[f64]) -> f64 {
probs
.iter()
.filter(|&&p| p > 1e-12)
.map(|&p| -p * p.log2())
.sum()
}
#[test]
fn test_row_probabilities_sum_to_one() {
let knn_indices = vec![
vec![1, 2, 3, 4],
vec![0, 2, 3, 4],
vec![0, 1, 3, 4],
vec![0, 1, 2, 4],
vec![0, 1, 2, 3],
];
let knn_dists = vec![
vec![1.0, 4.0, 9.0, 16.0],
vec![1.0, 1.0, 4.0, 9.0],
vec![4.0, 1.0, 1.0, 4.0],
vec![9.0, 4.0, 1.0, 1.0],
vec![16.0, 9.0, 4.0, 1.0],
];
let perplexity = 2.0;
let graph =
gaussian_knn_affinities(&knn_indices, &knn_dists, perplexity, 1e-5, 200, true).unwrap();
let adj = graph_to_adj(&graph);
for (i, neighbours) in adj.iter().enumerate() {
let sum: f64 = neighbours.iter().map(|(_, w)| *w).sum();
assert_relative_eq!(sum, 1.0, epsilon = 1e-4, max_relative = 1e-4);
println!("Row {}: sum = {:.6}", i, sum);
}
}
#[test]
fn test_entropy_matches_target_perplexity() {
let knn_indices = vec![
vec![1, 2, 3, 4, 5, 6, 7],
vec![0, 2, 3, 4, 5, 6, 7],
vec![0, 1, 3, 4, 5, 6, 7],
vec![0, 1, 2, 4, 5, 6, 7],
vec![0, 1, 2, 3, 5, 6, 7],
vec![0, 1, 2, 3, 4, 6, 7],
vec![0, 1, 2, 3, 4, 5, 7],
vec![0, 1, 2, 3, 4, 5, 6],
];
let knn_dists: Vec<Vec<f64>> = (0..8)
.map(|i| {
(0..7)
.map(|j| ((j + 1) as f64) * (1.0 + 0.1 * (i as f64)))
.collect()
})
.collect();
let perplexity = 3.0;
let target_entropy = perplexity.log2();
let graph =
gaussian_knn_affinities(&knn_indices, &knn_dists, perplexity, 1e-5, 200, true).unwrap();
let adj = graph_to_adj(&graph);
for (i, neighbours) in adj.iter().enumerate() {
let probs: Vec<f64> = neighbours.iter().map(|(_, w)| *w).collect();
let h = entropy(&probs);
println!(
"Row {}: entropy = {:.4}, target = {:.4}, diff = {:.6}",
i,
h,
target_entropy,
(h - target_entropy).abs()
);
assert_relative_eq!(h, target_entropy, epsilon = 1e-3, max_relative = 1e-3);
}
}
#[test]
fn test_squared_vs_unsquared_equivalence() {
let knn_indices = vec![
vec![1, 2, 3, 4],
vec![0, 2, 3, 4],
vec![0, 1, 3, 4],
vec![0, 1, 2, 4],
vec![0, 1, 2, 3],
];
let unsquared: Vec<Vec<f64>> = vec![
vec![1.0, 2.0, 3.0, 4.0],
vec![1.0, 1.0, 2.0, 3.0],
vec![2.0, 1.0, 1.0, 2.0],
vec![3.0, 2.0, 1.0, 1.0],
vec![4.0, 3.0, 2.0, 1.0],
];
let squared: Vec<Vec<f64>> = unsquared
.iter()
.map(|row| row.iter().map(|d| d * d).collect())
.collect();
let perplexity = 2.0;
let graph_unsq =
gaussian_knn_affinities(&knn_indices, &unsquared, perplexity, 1e-5, 200, false)
.unwrap();
let graph_sq =
gaussian_knn_affinities(&knn_indices, &squared, perplexity, 1e-5, 200, true).unwrap();
let adj_unsq = graph_to_adj(&graph_unsq);
let adj_sq = graph_to_adj(&graph_sq);
for i in 0..5 {
assert_eq!(adj_unsq[i].len(), adj_sq[i].len());
for (a, b) in adj_unsq[i].iter().zip(adj_sq[i].iter()) {
assert_eq!(a.0, b.0); assert_relative_eq!(a.1, b.1, epsilon = 1e-10);
}
}
println!("Squared vs unsquared: results match!");
}
#[test]
fn test_self_loops_excluded() {
let knn_indices = vec![
vec![0, 1, 2, 3], vec![1, 0, 2, 3], vec![2, 0, 1, 3], vec![3, 0, 1, 2], ];
let knn_dists = vec![
vec![0.0, 1.0, 4.0, 9.0], vec![0.0, 1.0, 1.0, 4.0],
vec![0.0, 4.0, 1.0, 1.0],
vec![0.0, 9.0, 4.0, 1.0],
];
let graph =
gaussian_knn_affinities(&knn_indices, &knn_dists, 2.0, 1e-5, 200, true).unwrap();
for (&i, &j) in graph.row_indices.iter().zip(&graph.col_indices) {
assert_ne!(i, j, "Self-loop found: {} -> {}", i, j);
}
println!("No self-loops in output graph.");
}
#[test]
fn test_closer_neighbours_have_higher_probability() {
let knn_indices = vec![vec![1, 2, 3, 4]];
let knn_dists = vec![vec![1.0, 4.0, 9.0, 16.0]];
let graph =
gaussian_knn_affinities(&knn_indices, &knn_dists, 2.0, 1e-5, 200, true).unwrap();
let adj = graph_to_adj(&graph);
let probs: Vec<(usize, f64)> = adj[0].clone();
println!("Probabilities: {:?}", probs);
let p1 = probs.iter().find(|(j, _)| *j == 1).unwrap().1;
let p2 = probs.iter().find(|(j, _)| *j == 2).unwrap().1;
let p3 = probs.iter().find(|(j, _)| *j == 3).unwrap().1;
let p4 = probs.iter().find(|(j, _)| *j == 4).unwrap().1;
assert!(p1 > p2, "p1={} should be > p2={}", p1, p2);
assert!(p2 > p3, "p2={} should be > p3={}", p2, p3);
assert!(p3 > p4, "p3={} should be > p4={}", p3, p4);
}
#[test]
fn test_uniform_distances_give_uniform_probs() {
let knn_indices = vec![vec![1, 2, 3, 4]];
let knn_dists = vec![vec![4.0, 4.0, 4.0, 4.0]];
let perplexity = 3.99999999; let graph =
gaussian_knn_affinities(&knn_indices, &knn_dists, perplexity, 1e-5, 200, true).unwrap();
let adj = graph_to_adj(&graph);
let probs: Vec<f64> = adj[0].iter().map(|(_, p)| *p).collect();
let expected = 0.25;
for (i, &p) in probs.iter().enumerate() {
assert_relative_eq!(p, expected, epsilon = 1e-1); println!("Neighbour {}: p = {:.6}", i, p);
}
}
#[test]
fn test_perplexity_affects_distribution_spread() {
let knn_indices = vec![vec![1, 2, 3, 4, 5, 6, 7]];
let knn_dists = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]];
let graph_low =
gaussian_knn_affinities(&knn_indices, &knn_dists, 1.5, 1e-5, 200, false).unwrap();
let adj_low = graph_to_adj(&graph_low);
let probs_low: Vec<f64> = adj_low[0].iter().map(|(_, p)| *p).collect();
let entropy_low = entropy(&probs_low);
let graph_high =
gaussian_knn_affinities(&knn_indices, &knn_dists, 4.0, 1e-5, 200, false).unwrap();
let adj_high = graph_to_adj(&graph_high);
let probs_high: Vec<f64> = adj_high[0].iter().map(|(_, p)| *p).collect();
let entropy_high = entropy(&probs_high);
println!("Low perplexity (1.5): entropy = {:.4}", entropy_low);
println!("High perplexity (4.0): entropy = {:.4}", entropy_high);
assert!(
entropy_high > entropy_low,
"Higher perplexity should give higher entropy"
);
}
#[test]
fn test_phate_basic_affinity_computation() {
let knn_indices = vec![
vec![0, 1, 2, 3],
vec![1, 0, 2, 3],
vec![2, 0, 1, 3],
vec![3, 0, 1, 2],
];
let knn_dists = vec![
vec![0.0, 1.0, 4.0, 9.0],
vec![0.0, 1.0, 4.0, 9.0],
vec![0.0, 4.0, 1.0, 9.0],
vec![0.0, 9.0, 4.0, 1.0],
];
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2, Some(40.0), 1.0, 1e-4, "none", true,
);
assert_eq!(graph.n_samples, 4);
assert!(!graph.row_indices.is_empty());
assert_eq!(graph.row_indices.len(), graph.col_indices.len());
assert_eq!(graph.row_indices.len(), graph.values.len());
for &v in &graph.values {
assert!((0.0..=1.0).contains(&v), "Affinity {} out of range", v);
}
println!("Basic test passed: {} edges created", graph.values.len());
}
#[test]
fn test_phate_self_loops_excluded() {
let knn_indices = vec![vec![0, 1, 2], vec![1, 0, 2], vec![2, 0, 1]];
let knn_dists = vec![
vec![0.0, 1.0, 4.0],
vec![0.0, 1.0, 4.0],
vec![0.0, 4.0, 1.0],
];
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(40.0),
1.0,
1e-4,
"none",
true,
);
for (&i, &j) in graph.row_indices.iter().zip(&graph.col_indices) {
assert_ne!(i, j, "Self-loop found: {} -> {}", i, j);
}
println!("No self-loops in PHATE graph");
}
#[test]
fn test_phate_closer_neighbours_higher_affinity() {
let knn_indices = vec![vec![0, 1, 2, 3, 4]];
let knn_dists = vec![vec![0.0, 1.0, 4.0, 9.0, 16.0]];
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"none",
true,
);
let adj = graph_to_adj(&graph);
let affinities: Vec<(usize, f64)> = adj[0].clone();
let a1 = affinities.iter().find(|(j, _)| *j == 1).unwrap().1;
let a2 = affinities.iter().find(|(j, _)| *j == 2).unwrap().1;
let a3 = affinities.iter().find(|(j, _)| *j == 3).unwrap().1;
let a4 = affinities.iter().find(|(j, _)| *j == 4).unwrap().1;
println!(
"Affinities: a1={:.6}, a2={:.6}, a3={:.6}, a4={:.6}",
a1, a2, a3, a4
);
assert!(a1 > a2, "a1={} should be > a2={}", a1, a2);
assert!(a2 > a3, "a2={} should be > a3={}", a2, a3);
assert!(a3 > a4, "a3={} should be > a4={}", a3, a4);
}
#[test]
fn test_phate_bandwidth_from_kth_neighbor() {
let knn_indices = vec![vec![0, 1, 2, 3]];
let knn_dists = vec![vec![0.0, 1.0, 4.0, 16.0]];
let graph_k2 = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"none",
true,
);
let graph_k3 = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
3,
Some(2.0),
1.0,
1e-10,
"none",
true,
);
let adj_k2 = graph_to_adj(&graph_k2);
let adj_k3 = graph_to_adj(&graph_k3);
let a1_k2 = adj_k2[0].iter().find(|(j, _)| *j == 1).unwrap().1; let a2_k2 = adj_k2[0].iter().find(|(j, _)| *j == 2).unwrap().1;
let a1_k3 = adj_k3[0].iter().find(|(j, _)| *j == 1).unwrap().1; let a2_k3 = adj_k3[0].iter().find(|(j, _)| *j == 2).unwrap().1;
println!(
"k=2: a1={:.6}, a2={:.6}, ratio={:.6}",
a1_k2,
a2_k2,
a1_k2 / a2_k2
);
println!(
"k=3: a1={:.6}, a2={:.6}, ratio={:.6}",
a1_k3,
a2_k3,
a1_k3 / a2_k3
);
let ratio_k2 = a1_k2 / a2_k2;
let ratio_k3 = a1_k3 / a2_k3;
assert!(
ratio_k2 > ratio_k3,
"Smaller bandwidth should give higher ratio (more peaked): ratio_k2={:.6} vs ratio_k3={:.6}",
ratio_k2, ratio_k3
);
assert!(
a1_k3 > a1_k2,
"Larger bandwidth should give higher absolute affinity to nearest neighbor"
);
}
#[test]
fn test_phate_bandwidth_scale_effect() {
let knn_indices = vec![vec![0, 1, 2, 3]];
let knn_dists = vec![vec![0.0, 1.0, 4.0, 9.0]];
let graph_scale1 = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"none",
true,
);
let graph_scale2 = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
2.0,
1e-10,
"none",
true,
);
let adj1 = graph_to_adj(&graph_scale1);
let adj2 = graph_to_adj(&graph_scale2);
let a3_scale1 = adj1[0].iter().find(|(j, _)| *j == 3).unwrap().1;
let a3_scale2 = adj2[0].iter().find(|(j, _)| *j == 3).unwrap().1;
println!(
"scale=1.0: a3={:.6}, scale=2.0: a3={:.6}",
a3_scale1, a3_scale2
);
assert!(
a3_scale2 > a3_scale1,
"Larger bandwidth scale should give higher affinity to distant neighbors"
);
}
#[test]
fn test_phate_decay_parameter_effect() {
let knn_indices = vec![vec![0, 1, 2, 3, 4]];
let knn_dists = vec![vec![0.0, 1.0, 4.0, 9.0, 16.0]];
let graph_low = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(1.0),
1.0,
1e-10,
"none",
true,
);
let graph_high = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"none",
true,
);
let adj_low = graph_to_adj(&graph_low);
let adj_high = graph_to_adj(&graph_high);
if let Some(&(_, a4_low)) = adj_low[0].iter().find(|(j, _)| *j == 4) {
if let Some(&(_, a4_high)) = adj_high[0].iter().find(|(j, _)| *j == 4) {
println!("decay=10: a4={:.6}, decay=80: a4={:.6}", a4_low, a4_high);
assert!(
a4_low > a4_high,
"Lower decay should give higher affinity to distant neighbors"
);
}
}
}
#[test]
fn test_phate_thresholding() {
let knn_indices = vec![vec![0, 1, 2, 3, 4, 5]];
let knn_dists = vec![vec![0.0, 1.0, 4.0, 9.0, 16.0, 25.0]];
let graph_strict = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-5, "none",
true,
);
let graph_lenient = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10, "none",
true,
);
println!(
"Strict threshold edges: {}, Lenient threshold edges: {}",
graph_strict.values.len(),
graph_lenient.values.len()
);
assert!(
graph_lenient.values.len() >= graph_strict.values.len(),
"Lenient threshold should produce at least as many edges"
);
}
#[test]
fn test_phate_binary_connectivity() {
let knn_indices = vec![
vec![0, 1, 2, 3],
vec![1, 0, 2, 3],
vec![2, 0, 1, 3],
vec![3, 0, 1, 2],
];
let knn_dists = vec![
vec![0.0, 1.0, 4.0, 9.0],
vec![0.0, 1.0, 4.0, 9.0],
vec![0.0, 4.0, 1.0, 9.0],
vec![0.0, 9.0, 4.0, 1.0],
];
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
None, 1.0,
1e-4,
"none",
true,
);
for &v in &graph.values {
assert_relative_eq!(v, 1.0, epsilon = 1e-10);
}
println!(
"Binary connectivity: all {} edges have weight 1.0",
graph.values.len()
);
}
#[test]
fn test_phate_additive_symmetrisation() {
let knn_indices = vec![
vec![0, 1, 2],
vec![1, 2, 0], vec![2, 1, 0],
];
let knn_dists = vec![
vec![0.0, 1.0, 4.0],
vec![0.0, 1.0, 9.0],
vec![0.0, 1.0, 4.0],
];
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-4,
"add", true,
);
let edges: std::collections::HashMap<(usize, usize), f64> = graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
.map(|((&i, &j), &v)| ((i, j), v))
.collect();
for (i, j) in [(0, 1), (0, 2), (1, 2)] {
if let (Some(&v_ij), Some(&v_ji)) = (edges.get(&(i, j)), edges.get(&(j, i))) {
assert_relative_eq!(v_ij, v_ji, epsilon = 1e-6, max_relative = 1e-6);
println!(
"Symmetric edge ({},{}): v_ij={:.6}, v_ji={:.6}",
i, j, v_ij, v_ji
);
}
}
}
#[test]
fn test_phate_multiplicative_symmetrisation() {
let knn_indices = vec![vec![0, 1, 2], vec![1, 0, 2], vec![2, 0, 1]];
let knn_dists = vec![
vec![0.0, 1.0, 4.0],
vec![0.0, 1.0, 4.0],
vec![0.0, 4.0, 1.0],
];
let graph_asym = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"none",
true,
);
let graph_sym = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"multiply",
true,
);
assert!(
graph_sym.values.len() <= graph_asym.values.len(),
"Multiplicative symmetrisation should have <= edges"
);
let edges: std::collections::HashMap<(usize, usize), f64> = graph_sym
.row_indices
.iter()
.zip(&graph_sym.col_indices)
.zip(&graph_sym.values)
.map(|((&i, &j), &v)| ((i, j), v))
.collect();
for (&(i, j), &v_ij) in &edges {
if let Some(&v_ji) = edges.get(&(j, i)) {
assert_relative_eq!(v_ij, v_ji, epsilon = 1e-6);
}
}
println!(
"Multiplicative symmetrisation: {} edges",
graph_sym.values.len()
);
}
#[test]
fn test_phate_symmetrisation_comparison() {
let knn_indices = vec![
vec![0, 1, 2, 3],
vec![1, 0, 2, 3],
vec![2, 1, 0, 3],
vec![3, 0, 1, 2],
];
let knn_dists = vec![
vec![0.0, 1.0, 4.0, 9.0],
vec![0.0, 2.0, 3.0, 8.0],
vec![0.0, 1.0, 5.0, 7.0],
vec![0.0, 6.0, 7.0, 8.0],
];
let graph_none = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"none",
true,
);
let graph_add = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"add",
true,
);
let graph_mult = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(2.0),
1.0,
1e-10,
"multiply",
true,
);
println!("Asymmetric: {} edges", graph_none.values.len());
println!("Additive: {} edges", graph_add.values.len());
println!("Multiplicative: {} edges", graph_mult.values.len());
assert!(graph_add.values.len() >= graph_mult.values.len());
assert!(!graph_add.values.is_empty());
assert!(!graph_mult.values.is_empty());
}
#[test]
fn test_phate_affinity_formula_verification() {
let knn_indices = vec![vec![0, 1]];
let knn_dists = vec![vec![0.0, 4.0]];
let decay = 2.0;
let bandwidth_scale = 1.0;
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(decay),
bandwidth_scale,
1e-10,
"none",
true,
);
let expected = (-1.0_f64).exp();
let actual = graph.values[0];
println!("Expected affinity: {:.10}", expected);
println!("Actual affinity: {:.10}", actual);
assert_relative_eq!(actual, expected, epsilon = 1e-6);
}
#[test]
fn test_phate_zero_distance_handling() {
let knn_indices = vec![vec![0, 1, 2]];
let knn_dists = vec![vec![0.0, 0.0, 1.0]];
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
2,
Some(40.0),
1.0,
1e-4,
"none",
true,
);
let adj = graph_to_adj(&graph);
if let Some(&(j, v)) = adj[0].iter().find(|(j, _)| *j == 1) {
println!("Zero-distance neighbor {} has affinity {:.6}", j, v);
assert_relative_eq!(v, 1.0, epsilon = 1e-6,);
}
}
#[test]
fn test_phate_large_dataset_edge_count() {
let n = 100;
let k = 10;
let knn_indices: Vec<Vec<usize>> = (0..n)
.map(|i| {
let mut indices: Vec<usize> = (0..k).map(|j| (i + j) % n).collect();
indices[0] = i; indices
})
.collect();
let knn_dists: Vec<Vec<f64>> = (0..n)
.map(|_| {
let mut dists: Vec<f64> = (0..k).map(|j| (j as f64).powi(2)).collect();
dists[0] = 0.0; dists
})
.collect();
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dists,
5,
Some(2.0),
1.0,
1e-10,
"add",
true,
);
println!(
"Large dataset: {} vertices, {} edges",
n,
graph.values.len()
);
assert!(!graph.values.is_empty());
assert!(graph.values.len() <= n * (k - 1) * 2); }
}