use rand::RngExt;
use rand::SeedableRng;
use rand::rngs::SmallRng;
use rayon::prelude::*;
use crate::graph::embedding::evoc_embedding;
use crate::prelude::*;
use crate::utils::sparse::{CoordinateList, Csr, mat_to_vecs, vecs_to_mat};
fn label_prop_iteration<T: EvocFloat>(csr: &Csr<T>, labels: &[i64]) -> Vec<i64> {
(0..csr.nrows)
.into_par_iter()
.map(|i| {
if labels[i] >= 0 {
return labels[i];
}
let mut votes: Vec<(i64, T)> = Vec::new();
for idx in csr.indptr[i]..csr.indptr[i + 1] {
let l = labels[csr.indices[idx]];
let w = csr.data[idx];
if let Some(entry) = votes.iter_mut().find(|e| e.0 == l) {
entry.1 += w;
} else {
votes.push((l, w));
}
}
let mut best = -1i64;
let mut max_vote = T::one(); let mut _tie_count = 1usize;
for &(l, v) in &votes {
if l < 0 {
continue;
}
if v > max_vote {
max_vote = v;
best = l;
_tie_count = 1;
} else if v == max_vote {
_tie_count += 1;
best = l;
}
}
best
})
.collect()
}
fn label_outliers<T: EvocFloat>(csr: &Csr<T>, labels: &mut [i64], seed: u64) {
let max_label = match labels.iter().copied().reduce(i64::max) {
Some(m) if m >= 0 => m,
_ => return, };
for i in 0..csr.nrows {
if labels[i] >= 0 {
continue;
}
let mut rng = SmallRng::seed_from_u64(seed.wrapping_add(i as u64));
let mut queue = vec![i];
let mut found = false;
for _ in 0..64 {
if queue.is_empty() {
break;
}
let node = queue.pop().unwrap();
for idx in csr.indptr[node]..csr.indptr[node + 1] {
let j = csr.indices[idx];
if labels[j] >= 0 {
labels[i] = labels[j];
found = true;
break;
}
queue.push(j);
}
if found {
break;
}
}
if !found {
labels[i] = rng.random_range(0..=max_label);
}
}
}
fn remap_labels(labels: &mut [i64]) -> usize {
let mut sorted: Vec<i64> = labels.to_vec();
sorted.sort_unstable();
sorted.dedup();
sorted.retain(|&l| l >= 0);
let mut map = std::collections::HashMap::with_capacity(sorted.len());
for (new_id, &old_id) in sorted.iter().enumerate() {
map.insert(old_id, new_id as i64);
}
let mut next = sorted.len() as i64;
for l in labels.iter_mut() {
if *l < 0 {
*l = next;
next += 1;
} else {
*l = map[l];
}
}
next as usize
}
fn label_prop_loop<T: EvocFloat>(
csr: &Csr<T>,
approx_n_parts: usize,
n_iter: usize,
seed: u64,
) -> (Vec<usize>, usize) {
let n = csr.nrows;
let mut rng = SmallRng::seed_from_u64(seed);
let mut labels = vec![-1i64; n];
for i in 0..approx_n_parts {
let idx = rng.random_range(0..n);
labels[idx] = i as i64;
}
for _ in 0..n_iter {
labels = label_prop_iteration(csr, &labels);
}
label_outliers(csr, &mut labels, seed.wrapping_add(2000));
let n_parts = remap_labels(&mut labels);
let partition: Vec<usize> = labels.iter().map(|&l| l as usize).collect();
(partition, n_parts)
}
fn normalise_to_unit_range<T: EvocFloat>(data: &mut [Vec<T>]) {
if data.is_empty() {
return;
}
let d = data[0].len();
for j in 0..d {
let mut lo = T::infinity();
let mut hi = T::neg_infinity();
for row in data.iter() {
if row[j] < lo {
lo = row[j];
}
if row[j] > hi {
hi = row[j];
}
}
let range = hi - lo;
if range > T::zero() {
let mid = (hi + lo) / T::from(2.0).unwrap();
let half_range = range / T::from(2.0).unwrap();
for row in data.iter_mut() {
row[j] = (row[j] - mid) / half_range;
}
}
}
}
fn pca_init<T: EvocFloat>(data: &[Vec<T>], n_components: usize) -> Vec<Vec<T>> {
let n = data.len();
let d = data[0].len();
let k = n_components.min(n).min(d);
let mut means = vec![0.0f64; d];
for row in data {
for (j, &v) in row.iter().enumerate() {
means[j] += v.to_f64().unwrap();
}
}
let inv_n = 1.0 / n as f64;
for m in &mut means {
*m *= inv_n;
}
let mat = faer::Mat::<f64>::from_fn(n, d, |i, j| data[i][j].to_f64().unwrap() - means[j]);
let svd = mat.thin_svd().unwrap();
let u = svd.U();
let s = svd.S();
let mut result: Vec<Vec<T>> = (0..n)
.map(|i| (0..k).map(|j| T::from(u[(i, j)] * s[j]).unwrap()).collect())
.collect();
normalise_to_unit_range(&mut result);
result
}
fn random_init<T: EvocFloat>(n: usize, n_components: usize, seed: u64) -> Vec<Vec<T>> {
let mut rng = SmallRng::seed_from_u64(seed);
let mut result: Vec<Vec<T>> = (0..n)
.map(|_| {
(0..n_components)
.map(|_| T::from(rng.random::<f64>() * 2.0 - 1.0).unwrap())
.collect()
})
.collect();
for row in &mut result {
let norm: f64 = row
.iter()
.map(|v| v.to_f64().unwrap().powi(2))
.sum::<f64>()
.sqrt();
if norm > 0.0 {
for v in row.iter_mut() {
*v = T::from(v.to_f64().unwrap() / norm).unwrap();
}
}
}
result
}
#[allow(clippy::too_many_arguments)]
fn label_prop_init_inner<T: EvocFloat>(
graph: &Csr<T>,
n_components: usize,
data: Option<&[Vec<T>]>,
n_embedding_epochs: usize,
approx_n_parts: usize,
n_label_prop_iter: usize,
base_init_threshold: usize,
scaling: f64,
seed: u64,
verbose: bool,
) -> Vec<Vec<T>> {
let n = graph.nrows;
if n < base_init_threshold {
return match data {
Some(d) if d.len() == n && n_components <= d[0].len() => pca_init(d, n_components),
_ => random_init(n, n_components, seed),
};
}
let (partition, n_parts) = label_prop_loop(graph, approx_n_parts, n_label_prop_iter, seed);
if verbose {
println!(" Label prop: {} nodes -> {} partitions", n, n_parts);
}
let base_reduction_map = Csr::<T>::from_partition(&partition, n_parts);
let norm_reduction_map = base_reduction_map.normalise_cols_l2();
let data_reducer = norm_reduction_map.transpose().normalise_rows_l1();
let nrt = norm_reduction_map.transpose();
let mut reduced_graph = nrt.matmul(graph).matmul(&base_reduction_map);
reduced_graph.clip_values(T::zero(), T::one());
let reduced_data: Option<Vec<Vec<T>>> = data.map(|d| {
let d_mat = vecs_to_mat(d);
let rd_mat = data_reducer.matmul_dense(&d_mat.as_ref());
mat_to_vecs(&rd_mat)
});
let reduced_init = label_prop_init_inner(
&reduced_graph,
n_components,
reduced_data.as_deref(),
n_embedding_epochs.min(255),
approx_n_parts / 4,
n_label_prop_iter,
base_init_threshold,
scaling,
seed.wrapping_add(3000),
verbose,
);
let adj_list = reduced_graph.to_adjacency_list();
let emb_params = EvocEmbeddingParams {
n_epochs: n_embedding_epochs.min(255),
initial_alpha: T::from(0.001 * n_embedding_epochs as f64).unwrap(),
..Default::default()
};
let reduced_layout = evoc_embedding(
&adj_list,
n_components,
&emb_params,
Some(&reduced_init),
seed.wrapping_add(4000),
verbose,
);
let graph_t = graph.transpose();
let sym = graph.elementwise_mul(&graph_t);
let data_expander = sym.matmul(&norm_reduction_map).normalise_rows_l1();
let nrm_l1 = norm_reduction_map.normalise_rows_l1();
let layout_mat = vecs_to_mat(&reduced_layout);
let expanded = data_expander.matmul_dense(&layout_mat.as_ref());
let direct = nrm_l1.matmul_dense(&layout_mat.as_ref());
let half = T::from(0.5).unwrap();
let mut result = faer::Mat::from_fn(n, n_components, |i, j| {
(expanded[(i, j)] + direct[(i, j)]) * half
});
let inv_n = T::from(1.0 / n as f64).unwrap();
for j in 0..n_components {
let mut sum = T::zero();
for i in 0..n {
sum += result[(i, j)];
}
let mean = sum * inv_n;
for i in 0..n {
result[(i, j)] = result[(i, j)] - mean;
}
}
let scale = T::from(scaling).unwrap();
let out: Vec<Vec<T>> = (0..n)
.map(|i| (0..n_components).map(|j| scale * result[(i, j)]).collect())
.collect();
out
}
pub fn label_propagation_init<T: EvocFloat>(
graph: &CoordinateList<T>,
n_components: usize,
data: Option<&[Vec<T>]>,
seed: u64,
verbose: bool,
) -> Vec<Vec<T>> {
let csr = Csr::from_coo(graph);
let n = csr.nrows;
let approx_n_parts = ((8.0 * (n as f64).sqrt()) as usize)
.clamp(256, 16384)
.min(n);
label_prop_init_inner(
&csr,
n_components,
data,
50, approx_n_parts,
20, 64, 0.1, seed,
verbose,
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn iteration_skips_labelled_nodes() {
let csr = Csr::new(vec![0, 1, 2, 3], vec![1, 0, 1], vec![1.0, 1.0, 1.0], 3, 3);
let labels = vec![0i64, 1, 2];
let result = label_prop_iteration(&csr, &labels);
assert_eq!(result, labels, "Labelled nodes must not change");
}
#[test]
fn iteration_propagates_strong_label() {
let csr = Csr::new(vec![0, 2, 2, 2], vec![1, 2], vec![2.0, 0.3], 3, 3);
let labels = vec![-1i64, 5, 7];
let result = label_prop_iteration(&csr, &labels);
assert_eq!(result[0], 5);
}
#[test]
fn iteration_stays_unlabelled_below_threshold() {
let csr = Csr::new(vec![0, 1, 1], vec![1], vec![0.5], 2, 2);
let labels = vec![-1i64, 3];
let result = label_prop_iteration(&csr, &labels);
assert_eq!(result[0], -1, "Weight below threshold should not propagate");
}
#[test]
fn iteration_ignores_unlabelled_neighbours() {
let csr = Csr::new(vec![0, 1, 1], vec![1], vec![5.0], 2, 2);
let labels = vec![-1i64, -1];
let result = label_prop_iteration(&csr, &labels);
assert_eq!(result[0], -1);
}
#[test]
fn outliers_finds_labelled_neighbour() {
let csr = Csr::new(vec![0, 1, 3, 4], vec![1, 0, 2, 1], vec![1.0; 4], 3, 3);
let mut labels = vec![-1i64, -1, 42];
label_outliers(&csr, &mut labels, 99);
assert!(labels[0] >= 0, "Node 0 should be labelled");
assert_eq!(labels[1], 42, "Node 1 adjacent to node 2");
}
#[test]
fn outliers_random_fallback_for_isolated() {
let data: Vec<f64> = Vec::new();
let csr = Csr::new(vec![0, 0, 0], Vec::new(), data, 2, 2);
let mut labels = vec![-1i64, 5];
label_outliers(&csr, &mut labels, 99);
assert!(labels[0] >= 0 && labels[0] <= 5);
}
#[test]
fn remap_contiguous() {
let mut labels = vec![10, 10, 20, 20, 10];
let n = remap_labels(&mut labels);
assert_eq!(n, 2); assert_eq!(labels, vec![0, 0, 1, 1, 0]);
}
#[test]
fn remap_handles_negatives() {
let mut labels = vec![5, -1, 5, -1];
let n = remap_labels(&mut labels);
assert_eq!(n, 3);
assert_eq!(labels[0], 0);
assert_eq!(labels[2], 0);
assert!(labels[1] >= 1 && labels[1] <= 2);
assert!(labels[3] >= 1 && labels[3] <= 2);
assert_ne!(labels[1], labels[3]); }
#[test]
fn loop_returns_valid_partition() {
let n = 5;
let mut rows = Vec::new();
let mut cols = Vec::new();
let mut vals = Vec::new();
for i in 0..n {
let j = (i + 1) % n;
rows.push(i);
cols.push(j);
vals.push(1.0f64);
rows.push(j);
cols.push(i);
vals.push(1.0);
}
let coo = CoordinateList {
row_indices: rows,
col_indices: cols,
values: vals,
n_samples: n,
};
let csr = Csr::from_coo(&coo);
let (partition, n_parts) = label_prop_loop(&csr, 3, 10, 42);
assert_eq!(partition.len(), n);
assert!(n_parts > 0);
for &p in &partition {
assert!(p < n_parts);
}
}
#[test]
fn unit_range_normalisation() {
let mut data: Vec<Vec<f64>> = vec![vec![0.0, 10.0], vec![4.0, 20.0], vec![8.0, 30.0]];
normalise_to_unit_range(&mut data);
assert!(
data.iter()
.flatten()
.all(|&v| (-1.0 - 1e-12..=1.0 + 1e-12).contains(&v))
);
assert!((data[0][0] - (-1.0)).abs() < 1e-12);
assert!((data[2][0] - 1.0).abs() < 1e-12);
assert!((data[0][1] - (-1.0)).abs() < 1e-12);
assert!((data[2][1] - 1.0).abs() < 1e-12);
}
#[test]
fn init_small_graph_random_base() {
let n = 10;
let mut rows = Vec::new();
let mut cols = Vec::new();
let mut vals = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
rows.push(i);
cols.push(j);
vals.push(0.5f64);
rows.push(j);
cols.push(i);
vals.push(0.5);
}
}
let coo = CoordinateList {
row_indices: rows,
col_indices: cols,
values: vals,
n_samples: n,
};
let result = label_propagation_init(&coo, 4, None, 42, false);
assert_eq!(result.len(), n);
for row in &result {
assert_eq!(row.len(), 4);
for &v in row {
assert!(v.is_finite());
}
}
}
}