use faer::MatRef;
use rand::{
rngs::StdRng,
{Rng, SeedableRng},
};
use rand_distr::{Distribution, StandardNormal};
use std::collections::VecDeque;
use crate::data::structures::*;
use crate::prelude::*;
use crate::utils::math::*;
pub const SPECTRAL_RANGE: f64 = 10.0;
pub const RANDOM_RANGE: f64 = 10.0;
pub const PCA_RANGE: f64 = 1.0;
#[derive(Clone, Debug)]
pub enum EmbdInit<T> {
SpectralInit {
range: Option<T>,
},
RandomInit {
range: Option<T>,
},
PcaInit {
range: Option<T>,
randomised: bool,
},
}
pub fn parse_initilisation<T>(s: &str, randomised: bool, range: Option<T>) -> Option<EmbdInit<T>>
where
T: ManifoldsFloat,
{
match s.to_lowercase().as_str() {
"spectral" => Some(EmbdInit::SpectralInit { range }),
"pca" => Some(EmbdInit::PcaInit { randomised, range }),
"random" => Some(EmbdInit::RandomInit { range }),
_ => None,
}
}
fn graph_to_normalised_laplacian<T>(graph: &CoordinateList<T>) -> CompressedSparseData<f64>
where
T: ManifoldsFloat,
{
let n = graph.n_samples;
let mut adj: Vec<Vec<(usize, f64)>> = vec![vec![]; n];
let mut degrees = vec![0.0; n];
for ((&i, &j), &w) in graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
{
let w_f64 = w.to_f64().unwrap();
adj[i].push((j, w_f64));
degrees[i] += w_f64;
}
let d_inv_sqrt: Vec<f64> = degrees
.iter()
.map(|&d| if d > 1e-8 { 1.0 / d.sqrt() } else { 0.0 })
.collect();
let mut data = Vec::new();
let mut indices = Vec::new();
let mut indptr = vec![0];
for i in 0..n {
let mut row_entries = vec![];
for &(j, w) in &adj[i] {
if i != j {
let normalised_weight = -d_inv_sqrt[i] * w * d_inv_sqrt[j];
row_entries.push((j, normalised_weight));
}
}
row_entries.sort_unstable_by_key(|&(idx, _)| idx);
for (idx, val) in row_entries {
indices.push(idx);
data.push(val);
}
indptr.push(data.len());
}
CompressedSparseData::new_csr(&data, &indices, &indptr, (n, n))
}
fn single_component_spectral_raw<T>(
graph: &CoordinateList<T>,
n_comp: usize,
seed: u64,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
{
let n = graph.n_samples;
let mut embedding = vec![vec![T::zero(); n_comp]; n];
if n <= n_comp + 1 {
let mut rng = StdRng::seed_from_u64(seed);
for row in &mut embedding {
for v in row.iter_mut() {
*v = T::from_f64(rng.random_range(-1.0..1.0)).unwrap();
}
}
return Ok(embedding);
}
let laplacian = graph_to_normalised_laplacian(graph);
let n_eigs = (n_comp + 1).min(n);
let (_, evecs) = compute_smallest_eigenpairs_lanczos(&laplacian, n_eigs, seed)?;
for comp_idx in 0..n_comp {
let evec_idx = comp_idx + 1;
if evec_idx < evecs[0].len() {
for i in 0..n {
embedding[i][comp_idx] = T::from_f32(evecs[i][evec_idx]).unwrap();
}
} else {
let mut rng = StdRng::seed_from_u64(seed + comp_idx as u64);
for i in 0..n {
embedding[i][comp_idx] = T::from_f64(rng.random_range(-1.0..1.0)).unwrap();
}
}
}
Ok(embedding)
}
fn find_connected_components<T>(graph: &CoordinateList<T>) -> Vec<Vec<usize>>
where
T: ManifoldsFloat,
{
let n = graph.n_samples;
let mut adj: Vec<Vec<usize>> = vec![vec![]; n];
for (&i, &j) in graph.row_indices.iter().zip(&graph.col_indices) {
adj[i].push(j);
}
let mut visited = vec![false; n];
let mut components = Vec::new();
for start in 0..n {
if visited[start] {
continue;
}
let mut component = Vec::new();
let mut queue = VecDeque::new();
queue.push_back(start);
visited[start] = true;
while let Some(node) = queue.pop_front() {
component.push(node);
for &neighbor in &adj[node] {
if !visited[neighbor] {
visited[neighbor] = true;
queue.push_back(neighbor);
}
}
}
components.push(component);
}
components
}
fn multi_component_init<T>(
graph: &CoordinateList<T>,
components: &[Vec<usize>],
n_comp: usize,
seed: u64,
range: T,
data: Option<MatRef<T>>,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
{
let n = graph.n_samples;
let mut embedding = vec![vec![T::zero(); n_comp]; n];
let mut rng = StdRng::seed_from_u64(seed);
let meta = component_meta_layout(components, n_comp, data, seed)?;
for (label, component) in components.iter().enumerate() {
let centroid = &meta[label];
let data_range = meta_data_range(&meta, label);
if component.len() < 2 * n_comp {
for &global in component {
for d in 0..n_comp {
let u = T::from_f64(rng.random_range(-1.0..1.0)).unwrap();
embedding[global][d] = centroid[d] + u * data_range;
}
}
continue;
}
let subgraph = extract_subgraph(graph, component);
let sub = single_component_spectral_raw(&subgraph, n_comp, seed + label as u64)?;
let max_abs = sub
.iter()
.flat_map(|v| v.iter())
.fold(T::zero(), |acc, &x| {
let a = x.abs();
if a > acc {
a
} else {
acc
}
});
let expansion = if max_abs > T::from_f64(1e-8).unwrap() {
data_range / max_abs
} else {
T::one()
};
for (local, &global) in component.iter().enumerate() {
for d in 0..n_comp {
embedding[global][d] = centroid[d] + sub[local][d] * expansion;
}
}
}
let n_t = T::from_usize(n).unwrap();
for d in 0..n_comp {
let mean = embedding.iter().map(|v| v[d]).sum::<T>() / n_t;
for row in &mut embedding {
row[d] -= mean;
}
}
let max_abs = embedding
.iter()
.flat_map(|v| v.iter())
.fold(T::zero(), |acc, &x| {
let a = x.abs();
if a > acc {
a
} else {
acc
}
});
if max_abs > T::from_f64(1e-8).unwrap() {
let s = range / max_abs;
for row in &mut embedding {
for v in row {
*v *= s;
}
}
}
Ok(embedding)
}
fn component_meta_layout<T>(
components: &[Vec<usize>],
n_comp: usize,
data: Option<MatRef<T>>,
seed: u64,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
{
let n_components = components.len();
let mut meta = if n_components > 2 * n_comp {
match data {
Some(d) => component_spectral_meta(components, n_comp, d, seed)?,
None => component_simplex_meta(n_components, n_comp),
}
} else {
component_simplex_meta(n_components, n_comp)
};
let mut max_abs = meta
.iter()
.flat_map(|v| v.iter())
.fold(T::zero(), |acc, &x| {
let a = x.abs();
if a > acc {
a
} else {
acc
}
});
if max_abs <= T::from_f64(1e-8).unwrap() {
meta = component_simplex_meta(n_components, n_comp);
max_abs = meta
.iter()
.flat_map(|v| v.iter())
.fold(T::zero(), |acc, &x| {
let a = x.abs();
if a > acc {
a
} else {
acc
}
});
}
if max_abs > T::from_f64(1e-8).unwrap() {
for row in &mut meta {
for v in row {
*v /= max_abs;
}
}
}
Ok(meta)
}
fn component_simplex_meta<T>(n_components: usize, n_comp: usize) -> Vec<Vec<T>>
where
T: ManifoldsFloat,
{
let k = n_components.div_ceil(2);
let mut meta = vec![vec![T::zero(); n_comp]; n_components];
for label in 0..n_components {
let (idx, sign) = if label < k {
(label, T::one())
} else {
(label - k, -T::one())
};
if idx < n_comp {
meta[label][idx] = sign;
}
}
meta
}
fn component_spectral_meta<T>(
components: &[Vec<usize>],
n_comp: usize,
data: MatRef<T>,
seed: u64,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
{
let n_components = components.len();
let n_features = data.ncols();
let mut centroids = vec![vec![T::zero(); n_features]; n_components];
for (l, comp) in components.iter().enumerate() {
let inv = T::one() / T::from_usize(comp.len()).unwrap();
for &i in comp {
for f in 0..n_features {
centroids[l][f] += data[(i, f)];
}
}
for f in 0..n_features {
centroids[l][f] *= inv;
}
}
let mut sq_dists = vec![vec![T::zero(); n_components]; n_components];
let mut max_d2 = T::zero();
for a in 0..n_components {
for b in (a + 1)..n_components {
let mut d2 = T::zero();
for f in 0..n_features {
let diff = centroids[a][f] - centroids[b][f];
d2 += diff * diff;
}
sq_dists[a][b] = d2;
sq_dists[b][a] = d2;
if d2 > max_d2 {
max_d2 = d2;
}
}
}
let scale = if max_d2 > T::from_f64(1e-12).unwrap() {
T::one() / max_d2
} else {
T::one()
};
let mut row_indices = Vec::new();
let mut col_indices = Vec::new();
let mut values = Vec::new();
for a in 0..n_components {
for b in 0..n_components {
if a == b {
continue;
}
let aff = T::from_f64((-(sq_dists[a][b] * scale).to_f64().unwrap()).exp()).unwrap();
row_indices.push(a);
col_indices.push(b);
values.push(aff);
}
}
let affinity = CoordinateList {
row_indices,
col_indices,
values,
n_samples: n_components,
};
single_component_spectral_raw(&affinity, n_comp, seed)
}
fn meta_data_range<T>(meta: &[Vec<T>], label: usize) -> T
where
T: ManifoldsFloat,
{
let mut max_d = T::zero();
for other in 0..meta.len() {
let mut d2 = T::zero();
for d in 0..meta[label].len() {
let diff = meta[label][d] - meta[other][d];
d2 += diff * diff;
}
let dist = d2.sqrt();
if dist > max_d {
max_d = dist;
}
}
max_d / T::from_f64(2.0).unwrap()
}
fn extract_subgraph<T>(graph: &CoordinateList<T>, component: &[usize]) -> CoordinateList<T>
where
T: ManifoldsFloat,
{
let n = graph.n_samples;
let mut global_to_local = vec![None; n];
for (local, &global) in component.iter().enumerate() {
global_to_local[global] = Some(local);
}
let mut row_indices = Vec::new();
let mut col_indices = Vec::new();
let mut values = Vec::new();
for ((&i, &j), &v) in graph
.row_indices
.iter()
.zip(&graph.col_indices)
.zip(&graph.values)
{
if let (Some(local_i), Some(local_j)) = (global_to_local[i], global_to_local[j]) {
row_indices.push(local_i);
col_indices.push(local_j);
values.push(v);
}
}
CoordinateList {
row_indices,
col_indices,
values,
n_samples: component.len(),
}
}
fn single_component_spectral<T>(
graph: &CoordinateList<T>,
n_comp: usize,
seed: u64,
range: T,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
{
let raw = single_component_spectral_raw(graph, n_comp, seed)?;
Ok(finalise_spectral_embedding(raw, n_comp, range, seed))
}
fn finalise_spectral_embedding<T>(
mut embedding: Vec<Vec<T>>,
n_comp: usize,
range: T,
seed: u64,
) -> Vec<Vec<T>>
where
T: ManifoldsFloat,
{
let n = embedding.len();
let n_t = T::from_usize(n).unwrap();
for comp in 0..n_comp {
let mean: T = embedding.iter().map(|v| v[comp]).sum::<T>() / n_t;
for i in 0..n {
embedding[i][comp] -= mean;
}
}
let max_abs: T = embedding
.iter()
.flat_map(|v| v.iter())
.fold(T::zero(), |acc, &x| {
let abs_x = x.abs();
if abs_x > acc {
abs_x
} else {
acc
}
});
if max_abs > T::from_f64(1e-8).unwrap() {
let scale = range / max_abs;
for row in &mut embedding {
for val in row {
*val *= scale;
}
}
}
let mut rng = StdRng::seed_from_u64(seed + 9999);
let noise_std = T::from_f64(1e-4).unwrap();
for row in &mut embedding {
for val in row {
let noise = T::from_f64(rng.sample::<f64, _>(StandardNormal)).unwrap() * noise_std;
*val += noise;
}
}
embedding
}
pub fn spectral_layout<T>(
graph: &CoordinateList<T>,
n_comp: usize,
seed: u64,
range: Option<T>,
data: Option<MatRef<T>>,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
{
let range = range.unwrap_or(T::from_f64(SPECTRAL_RANGE).unwrap());
let components = find_connected_components(graph);
if components.len() > 1 {
return multi_component_init(graph, &components, n_comp, seed, range, data);
}
single_component_spectral(graph, n_comp, seed, range)
}
pub fn random_layout<T>(n_samples: usize, n_comp: usize, seed: u64, range: Option<T>) -> Vec<Vec<T>>
where
T: ManifoldsFloat,
{
let range = range
.unwrap_or(T::from_f64(RANDOM_RANGE).unwrap())
.to_f64()
.unwrap();
let mut rng = StdRng::seed_from_u64(seed);
let mut embedding = vec![vec![T::zero(); n_comp]; n_samples];
for i in 0..n_samples {
for j in 0..n_comp {
embedding[i][j] = T::from_f64(rng.random_range(-range..range)).unwrap();
}
}
embedding
}
pub fn pca_layout<T>(
data: MatRef<T>,
n_comp: usize,
randomised: bool,
range: Option<T>,
seed: u64,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
StandardNormal: Distribution<T>,
{
let target_std = range.unwrap_or(T::from_f64(PCA_RANGE).unwrap());
let (n_samples, n_features) = (data.nrows(), data.ncols());
let mut centred = data.to_owned();
for j in 0..n_features {
let mean = (0..n_samples).map(|i| data[(i, j)]).sum::<T>() / T::from(n_samples).unwrap();
for i in 0..n_samples {
centred[(i, j)] -= mean;
}
}
let svd_result = if randomised {
randomised_svd(centred.as_ref(), n_comp, seed as usize, None, None)?
} else {
let svd = centred
.thin_svd()
.map_err(|_| ManifoldsError::FaerSvdError)?;
RandomSvdResults {
u: svd.U().cloned(),
v: svd.V().cloned(),
s: svd.S().column_vector().iter().copied().collect(),
}
};
let u_truncated = svd_result.u.get(.., ..n_comp);
let s_diagonal = faer::Mat::from_fn(n_comp, n_comp, |i, j| {
if i == j {
svd_result.s[i]
} else {
T::zero()
}
});
let pca_scores = u_truncated * s_diagonal;
let mut embedding = vec![vec![T::zero(); n_comp]; n_samples];
for comp in 0..n_comp {
let col: Vec<T> = (0..n_samples).map(|i| pca_scores[(i, comp)]).collect();
let mean = col.iter().copied().sum::<T>() / T::from(n_samples).unwrap();
let variance =
col.iter().map(|&x| (x - mean) * (x - mean)).sum::<T>() / T::from(n_samples).unwrap();
let current_std = variance.sqrt();
let scale_factor = if current_std > T::from_f64(1e-8).unwrap() {
target_std / current_std
} else {
T::one()
};
for i in 0..n_samples {
embedding[i][comp] = (pca_scores[(i, comp)] - mean) * scale_factor;
}
}
Ok(embedding)
}
pub fn initialise_embedding<T>(
init_method: &EmbdInit<T>,
n_comp: usize,
seed: u64,
graph: &CoordinateList<T>,
data: MatRef<T>,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
StandardNormal: Distribution<T>,
{
match init_method {
EmbdInit::SpectralInit { range } => {
spectral_layout(graph, n_comp, seed, *range, Some(data))
}
EmbdInit::RandomInit { range } => {
let n_samples = data.nrows();
Ok(random_layout(n_samples, n_comp, seed, *range))
}
EmbdInit::PcaInit { randomised, range } => {
Ok(pca_layout(data, n_comp, *randomised, *range, seed)?)
}
}
}
#[cfg(test)]
mod test_init {
use super::*;
use approx::assert_relative_eq;
#[test]
fn test_parse_initialisation() {
assert!(matches!(
parse_initilisation::<f32>("spectral", false, None),
Some(EmbdInit::SpectralInit { range: None })
));
assert!(matches!(
parse_initilisation::<f32>("SPECTRAL", false, None),
Some(EmbdInit::SpectralInit { range: None })
));
assert!(matches!(
parse_initilisation::<f32>("spectral", false, Some(0.01)),
Some(EmbdInit::SpectralInit { range: Some(0.01) })
));
assert!(matches!(
parse_initilisation::<f32>("random", false, None),
Some(EmbdInit::RandomInit { range: None })
));
assert!(matches!(
parse_initilisation::<f32>("random", false, Some(0.01)),
Some(EmbdInit::RandomInit { range: Some(0.01) })
));
assert!(matches!(
parse_initilisation::<f32>("pca", false, None),
Some(EmbdInit::PcaInit {
randomised: false,
range: None
})
));
assert!(matches!(
parse_initilisation::<f32>("pca", true, None),
Some(EmbdInit::PcaInit {
randomised: true,
range: None
})
));
assert!(matches!(
parse_initilisation::<f32>("pca", false, Some(0.01)),
Some(EmbdInit::PcaInit {
randomised: false,
range: Some(0.01)
})
));
assert!(parse_initilisation::<f32>("invalid", false, None).is_none());
}
#[test]
fn test_graph_to_normalised_laplacian_simple() {
let graph = CoordinateList {
row_indices: vec![0, 1],
col_indices: vec![1, 0],
values: vec![1.0, 1.0],
n_samples: 2,
};
let laplacian = graph_to_normalised_laplacian(&graph);
assert_eq!(laplacian.shape(), (2, 2));
assert!(laplacian.cs_type.is_csr());
assert_eq!(laplacian.get_nnz(), 2);
}
#[test]
fn test_graph_to_normalised_laplacian_isolated_vertex() {
let graph = CoordinateList {
row_indices: vec![0],
col_indices: vec![1],
values: vec![1.0],
n_samples: 3,
};
let laplacian = graph_to_normalised_laplacian(&graph);
assert_eq!(laplacian.shape(), (3, 3));
}
#[test]
fn test_spectral_layout_basic() {
let graph = CoordinateList {
row_indices: vec![0, 0, 1, 1, 2, 2],
col_indices: vec![1, 2, 0, 2, 0, 1],
values: vec![1.0, 0.5, 1.0, 1.0, 0.5, 1.0],
n_samples: 3,
};
let embedding = spectral_layout(&graph, 2, 42, None, None).unwrap();
assert_eq!(embedding.len(), 3); assert_eq!(embedding[0].len(), 2);
for point in &embedding {
for &coord in point {
assert!((-10.01..=10.01).contains(&coord));
}
}
for dim in 0..2 {
let mean: f64 = embedding.iter().map(|p| p[dim]).sum::<f64>() / 3.0;
assert_relative_eq!(mean, 0.0, epsilon = 0.01);
}
}
#[test]
fn test_spectral_layout_range_bound() {
let graph = CoordinateList {
row_indices: vec![0, 0, 1, 1, 2, 2],
col_indices: vec![1, 2, 0, 2, 0, 1],
values: vec![1.0, 0.5, 1.0, 1.0, 0.5, 1.0],
n_samples: 3,
};
let embedding = spectral_layout(&graph, 2, 42, Some(1.0), None).unwrap();
assert_eq!(embedding.len(), 3); assert_eq!(embedding[0].len(), 2);
for point in &embedding {
for &coord in point {
assert!((-1.01..=1.01).contains(&coord));
}
}
for dim in 0..2 {
let mean: f64 = embedding.iter().map(|p| p[dim]).sum::<f64>() / 3.0;
assert_relative_eq!(mean, 0.0, epsilon = 0.01);
}
}
#[test]
fn test_spectral_layout_reproducibility() {
let graph = CoordinateList {
row_indices: vec![0, 1, 2],
col_indices: vec![1, 2, 0],
values: vec![1.0, 1.0, 1.0],
n_samples: 3,
};
let embd1 = spectral_layout(&graph, 2, 42, None, None).unwrap();
let embd2 = spectral_layout(&graph, 2, 42, None, None).unwrap();
assert_eq!(embd1, embd2);
}
#[test]
fn test_spectral_layout_higher_dimensions() {
let graph = CoordinateList {
row_indices: vec![0, 1, 2, 3],
col_indices: vec![1, 2, 3, 0],
values: vec![1.0; 4],
n_samples: 4,
};
let embedding = spectral_layout(&graph, 3, 42, None, None).unwrap();
assert_eq!(embedding.len(), 4);
assert_eq!(embedding[0].len(), 3);
}
#[test]
fn test_spectral_layout_disconnected() {
let graph = CoordinateList {
row_indices: vec![0, 1, 2, 3],
col_indices: vec![1, 0, 3, 2],
values: vec![1.0, 1.0, 1.0, 1.0],
n_samples: 4,
};
let embedding = spectral_layout::<f64>(&graph, 2, 42, None, None).unwrap();
assert_eq!(embedding.len(), 4);
assert_eq!(embedding[0].len(), 2);
for dim in 0..2 {
let mean: f64 = embedding.iter().map(|p| p[dim]).sum::<f64>() / 4.0;
assert_relative_eq!(mean, 0.0, epsilon = 0.01);
}
for point in &embedding {
for &coord in point {
assert!((-10.01..=10.01).contains(&coord));
}
}
}
#[test]
fn test_spectral_layout_disconnected_reproducibility() {
let graph = CoordinateList {
row_indices: vec![0, 1, 2, 3],
col_indices: vec![1, 0, 3, 2],
values: vec![1.0, 1.0, 1.0, 1.0],
n_samples: 4,
};
let embd1 = spectral_layout::<f64>(&graph, 2, 42, None, None).unwrap();
let embd2 = spectral_layout::<f64>(&graph, 2, 42, None, None).unwrap();
assert_eq!(embd1, embd2);
}
#[test]
fn test_random_layout_basic() {
let embedding = random_layout::<f64>(10, 2, 42, None);
assert_eq!(embedding.len(), 10);
assert_eq!(embedding[0].len(), 2);
for point in &embedding {
for &coord in point {
assert!((-10.01..=10.01).contains(&coord));
}
}
}
#[test]
fn test_random_layout_basic_rangebound() {
let embedding = random_layout::<f64>(10, 2, 42, Some(1.0));
assert_eq!(embedding.len(), 10);
assert_eq!(embedding[0].len(), 2);
for point in &embedding {
for &coord in point {
assert!((-1.01..=1.01).contains(&coord));
}
}
}
#[test]
fn test_random_layout_reproducibility() {
let embd1 = random_layout::<f64>(10, 2, 42, None);
let embd2 = random_layout::<f64>(10, 2, 42, None);
assert_eq!(embd1, embd2);
}
#[test]
fn test_random_layout_different_seeds() {
let embd1 = random_layout::<f64>(10, 2, 42, None);
let embd2 = random_layout::<f64>(10, 2, 999, None);
assert_ne!(embd1, embd2);
}
#[test]
fn test_random_layout_dimensions() {
let embedding = random_layout::<f32>(5, 3, 42, None);
assert_eq!(embedding.len(), 5);
assert_eq!(embedding[0].len(), 3);
}
#[test]
fn test_spectral_layout_single_vertex() {
let graph: CoordinateList<f32> = CoordinateList {
row_indices: vec![],
col_indices: vec![],
values: vec![],
n_samples: 1,
};
let embedding = spectral_layout(&graph, 2, 42, None, None).unwrap();
assert_eq!(embedding.len(), 1);
assert_eq!(embedding[0].len(), 2);
}
#[test]
fn test_pca_layout_basic() {
let data = faer::mat![
[1.0, 2.0, 1.0],
[2.0, 3.0, 2.0],
[3.0, 4.0, 1.5],
[4.0, 5.0, 2.5],
[5.0, 6.0, 2.0],
];
let embedding = pca_layout(data.as_ref(), 2, false, None, 42).unwrap();
assert_eq!(embedding.len(), 5);
assert_eq!(embedding[0].len(), 2);
for dim in 0..2 {
let mean: f64 = embedding.iter().map(|p| p[dim]).sum::<f64>() / 5.0;
assert_relative_eq!(mean, 0.0, epsilon = 1e-6);
}
let mean_0: f64 = embedding.iter().map(|p| p[0]).sum::<f64>() / 5.0;
let variance_0: f64 = embedding
.iter()
.map(|p| (p[0] - mean_0).powi(2))
.sum::<f64>()
/ 5.0;
let std_0 = variance_0.sqrt();
assert_relative_eq!(std_0, PCA_RANGE, epsilon = 1e-4);
}
#[test]
fn test_pca_layout_custom_range() {
let data = faer::mat![
[1.0, 2.0, 1.0],
[2.0, 3.0, 2.0],
[3.0, 4.0, 1.5],
[4.0, 5.0, 2.5],
[5.0, 6.0, 2.0],
];
let custom_range = 1.0;
let embedding = pca_layout(data.as_ref(), 2, false, Some(custom_range), 42).unwrap();
assert_eq!(embedding.len(), 5);
assert_eq!(embedding[0].len(), 2);
let mean_0: f64 = embedding.iter().map(|p| p[0]).sum::<f64>() / 5.0;
let variance_0: f64 = embedding
.iter()
.map(|p| (p[0] - mean_0).powi(2))
.sum::<f64>()
/ 5.0;
let std_0 = variance_0.sqrt();
assert_relative_eq!(std_0, custom_range, epsilon = 1e-4);
}
#[test]
fn test_pca_layout_reproducibility() {
let data = faer::mat![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0],];
let embd1 = pca_layout(data.as_ref(), 2, false, None, 42).unwrap();
let embd2 = pca_layout(data.as_ref(), 2, false, None, 42).unwrap();
assert_eq!(embd1, embd2);
}
#[test]
fn test_pca_layout_randomised() {
let data = faer::mat![
[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[9.0, 10.0, 11.0, 12.0],
];
let embd_standard = pca_layout(data.as_ref(), 2, false, None, 42).unwrap();
let embd_randomised = pca_layout(data.as_ref(), 2, true, None, 42).unwrap();
assert_eq!(embd_standard.len(), 3);
assert_eq!(embd_randomised.len(), 3);
}
#[test]
fn test_initialise_embedding_spectral() {
let graph = CoordinateList {
row_indices: vec![0, 1],
col_indices: vec![1, 0],
values: vec![1.0, 1.0],
n_samples: 2,
};
let data = faer::mat![[1.0, 2.0], [3.0, 4.0],];
let embedding = initialise_embedding(
&EmbdInit::SpectralInit { range: None },
2,
42,
&graph,
data.as_ref(),
)
.unwrap();
assert_eq!(embedding.len(), 2);
assert_eq!(embedding[0].len(), 2);
}
#[test]
fn test_initialise_embedding_spectral_disconnected() {
let graph = CoordinateList {
row_indices: vec![0, 1, 2, 3],
col_indices: vec![1, 0, 3, 2],
values: vec![1.0, 1.0, 1.0, 1.0],
n_samples: 4,
};
let data = faer::mat![[1.0, 2.0], [1.5, 2.5], [8.0, 9.0], [8.5, 9.5],];
let embedding = initialise_embedding(
&EmbdInit::SpectralInit { range: None },
2,
42,
&graph,
data.as_ref(),
)
.unwrap();
assert_eq!(embedding.len(), 4);
assert_eq!(embedding[0].len(), 2);
}
#[test]
fn test_initialise_embedding_random() {
let graph = CoordinateList {
row_indices: vec![],
col_indices: vec![],
values: vec![],
n_samples: 3,
};
let data = faer::mat![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0],];
let embedding = initialise_embedding(
&EmbdInit::RandomInit { range: None },
2,
42,
&graph,
data.as_ref(),
)
.unwrap();
assert_eq!(embedding.len(), 3);
assert_eq!(embedding[0].len(), 2);
}
#[test]
fn test_initialise_embedding_pca() {
let graph = CoordinateList {
row_indices: vec![],
col_indices: vec![],
values: vec![],
n_samples: 3,
};
let data = faer::mat![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],];
let embedding = initialise_embedding(
&EmbdInit::PcaInit {
randomised: false,
range: None,
},
2,
42,
&graph,
data.as_ref(),
)
.unwrap();
assert_eq!(embedding.len(), 3);
assert_eq!(embedding[0].len(), 2);
}
#[test]
fn test_spectral_layout_disconnected_within_component_spectral() {
let graph = CoordinateList {
row_indices: vec![
0, 1, 1, 2, 2, 3, 3, 4, 4, 0, 5, 6, 6, 7, 7, 8, 8, 9, 9, 5, ],
col_indices: vec![
1, 0, 2, 1, 3, 2, 4, 3, 0, 4, 6, 5, 7, 6, 8, 7, 9, 8, 5, 9, ],
values: vec![1.0; 20],
n_samples: 10,
};
let embedding = spectral_layout::<f64>(&graph, 2, 42, None, None).unwrap();
assert_eq!(embedding.len(), 10);
assert_eq!(embedding[0].len(), 2);
for dim in 0..2 {
let mean: f64 = embedding.iter().map(|p| p[dim]).sum::<f64>() / 10.0;
assert_relative_eq!(mean, 0.0, epsilon = 0.01);
}
for point in &embedding {
for &coord in point {
assert!((-10.01..=10.01).contains(&coord));
}
}
let c1: Vec<f64> = (0..5).map(|i| embedding[i][0]).collect();
let c2: Vec<f64> = (5..10).map(|i| embedding[i][0]).collect();
let mean1: f64 = c1.iter().sum::<f64>() / 5.0;
let mean2: f64 = c2.iter().sum::<f64>() / 5.0;
assert!((mean1 - mean2).abs() > 1e-3);
}
}