use faer::{Mat, MatMut, MatRef};
use num_traits::ToPrimitive;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use rand_distr::{Distribution, Normal, StandardNormal};
use rayon::prelude::*;
use crate::assert_same_len;
use crate::data::structures::*;
use crate::prelude::*;
const VNE_DENSE_THRESHOLD: usize = 5000;
const VNE_SPARSE_RANK: usize = 100;
#[derive(Clone, Debug)]
pub struct RandomSvdResults<T> {
pub u: faer::Mat<T>,
pub v: faer::Mat<T>,
pub s: Vec<T>,
}
pub fn randomised_svd<T>(
x: MatRef<T>,
rank: usize,
seed: usize,
oversampling: Option<usize>,
n_power_iter: Option<usize>,
) -> Result<RandomSvdResults<T>, ManifoldsError>
where
T: ManifoldsFloat,
StandardNormal: Distribution<T>,
{
let ncol = x.ncols();
let nrow = x.nrows();
let os = oversampling.unwrap_or({
if rank < 10 {
rank } else {
10
}
});
let sample_size = (rank + os).min(ncol.min(nrow));
let n_iter = n_power_iter.unwrap_or(2);
let mut rng = StdRng::seed_from_u64(seed as u64);
let normal = Normal::new(T::from(0.0).unwrap(), T::from(1.0).unwrap()).unwrap();
let omega = Mat::from_fn(ncol, sample_size, |_, _| normal.sample(&mut rng));
let y = x * omega;
let mut q = y.qr().compute_thin_Q();
for _ in 0..n_iter {
let z = x.transpose() * q;
q = (x * z).qr().compute_thin_Q();
}
let b = q.transpose() * x;
let svd = b.thin_svd().map_err(|_| ManifoldsError::FaerSvdError)?;
Ok(RandomSvdResults {
u: q * svd.U(),
v: svd.V().cloned(), s: svd.S().column_vector().iter().copied().collect(),
})
}
pub fn sparse_randomised_svd<T>(
matrix: &CompressedSparseData<T>,
rank: usize,
seed: u64,
oversampling: Option<usize>,
n_power_iter: Option<usize>,
) -> Result<RandomSvdResults<T>, ManifoldsError>
where
T: Into<T> + ManifoldsFloat,
{
let (n, m) = matrix.shape;
let os = oversampling.unwrap_or(10);
let sample_size = (rank + os).min(m).min(n);
let n_iter = n_power_iter.unwrap_or(2);
let ncols = sample_size;
let (csr, csc);
let csr_owned;
let csc_owned;
match matrix.cs_type {
CompressedSparseFormat::Csr => {
csr = matrix;
csc_owned = matrix.transform();
csc = &csc_owned;
}
CompressedSparseFormat::Csc => {
csc = matrix;
csr_owned = matrix.transform();
csr = &csr_owned;
}
};
let data_csr = csr.data.as_slice();
let data_csc = csc.data.as_slice();
let spmm = |sparse: &CompressedSparseData<T>, data: &[T], x_flat: &[T], y_flat: &mut [T]| {
let n_outer = sparse.indptr.len() - 1;
y_flat
.par_chunks_exact_mut(ncols)
.take(n_outer)
.enumerate()
.for_each(|(i, y_row)| {
y_row.fill(T::zero());
for idx in sparse.indptr[i]..sparse.indptr[i + 1] {
let j = sparse.indices[idx];
let a_val = data[idx];
let x_row = &x_flat[j * ncols..(j + 1) * ncols];
for col in 0..ncols {
y_row[col] += a_val * x_row[col];
}
}
});
};
let to_row_major = |mat: MatRef<T>| -> Vec<T> {
let rows = mat.nrows();
let mut flat = vec![T::zero(); rows * ncols];
flat.par_chunks_exact_mut(ncols)
.enumerate()
.for_each(|(i, row_slice)| {
for col in 0..ncols {
row_slice[col] = mat[(i, col)];
}
});
flat
};
let from_row_major = |flat: &[T], mut mat: MatMut<T>| {
let rows = mat.nrows();
for i in 0..rows {
let row_slice = &flat[i * ncols..(i + 1) * ncols];
for col in 0..ncols {
mat[(i, col)] = row_slice[col];
}
}
};
let mut rng = StdRng::seed_from_u64(seed);
let normal = Normal::new(0.0, 1.0).unwrap();
let omega = Mat::from_fn(m, sample_size, |_, _| {
T::from_f64(normal.sample(&mut rng)).unwrap()
});
let mut x_flat = to_row_major(omega.as_ref());
let mut y_flat = vec![T::zero(); n * ncols];
let mut z_flat = vec![T::zero(); m * ncols];
spmm(csr, data_csr, &x_flat, &mut y_flat);
let mut y = Mat::<T>::zeros(n, sample_size);
from_row_major(&y_flat, y.as_mut());
let mut q = y.qr().compute_thin_Q();
for _ in 0..n_iter {
x_flat = to_row_major(q.as_ref());
spmm(csc, data_csc, &x_flat, &mut z_flat);
spmm(csr, data_csr, &z_flat, &mut y_flat);
from_row_major(&y_flat, y.as_mut());
q = y.qr().compute_thin_Q();
}
x_flat = to_row_major(q.as_ref());
let mut b_t_flat = vec![T::zero(); m * ncols];
spmm(csc, data_csc, &x_flat, &mut b_t_flat);
let mut b_t = Mat::<T>::zeros(m, sample_size);
from_row_major(&b_t_flat, b_t.as_mut());
let b = b_t.transpose().to_owned();
let svd = b.thin_svd().map_err(|_| ManifoldsError::FaerSvdError)?;
let u = (&q * svd.U()).get(.., ..rank).to_owned();
let s: Vec<T> = svd.S().column_vector().iter().copied().take(rank).collect();
let v = svd.V().get(.., ..rank).to_owned();
Ok(RandomSvdResults { u, s, v })
}
struct LanczosEigendecomp {
tri_evals: Vec<f64>,
tri_evecs: Mat<f64>,
v_basis: Vec<Vec<f64>>,
n: usize,
}
fn dot<T>(a: &[T], b: &[T]) -> T
where
T: ManifoldsFloat,
{
assert_same_len!(a, b);
T::dot_simd(a, b)
}
fn norm<T>(v: &[T]) -> T
where
T: ManifoldsFloat,
{
dot(v, v).sqrt()
}
fn normalise<T>(v: &mut [T])
where
T: ManifoldsFloat,
{
let n = norm(v);
v.par_iter_mut().for_each(|x| *x /= n);
}
fn tridiag_eig<T>(alpha: &[T], beta: &[T]) -> Result<(Vec<T::Real>, Mat<T>), ManifoldsError>
where
T: ManifoldsFloat,
{
let n = alpha.len();
let mut t = Mat::<T>::zeros(n, n);
for i in 0..n {
t[(i, i)] = alpha[i];
if i < n - 1 {
t[(i, i + 1)] = beta[i];
t[(i + 1, i)] = beta[i];
}
}
let eig = t
.self_adjoint_eigen(faer::Side::Lower)
.map_err(|_| ManifoldsError::FaerEigenError)?;
let evals = eig.S().column_vector().iter().copied().collect();
let evecs = eig.U().to_owned();
Ok((evals, evecs))
}
fn lanczos_eigendecomp<T>(
matrix: &CompressedSparseData<T>,
n_components: usize,
seed: u64,
) -> Result<LanczosEigendecomp, ManifoldsError>
where
T: ManifoldsFloat,
{
let n = matrix.shape.0;
let n_iter = (n_components * 4 + 30).min(n);
let csr = match matrix.cs_type {
CompressedSparseFormat::Csr => matrix.clone(),
CompressedSparseFormat::Csc => matrix.transform(),
};
let data_f64: Vec<f64> = csr.data.iter().map(|v| v.to_f64().unwrap()).collect();
let matvec = |x: &[f64], y: &mut [f64]| {
y.fill(0.0);
for i in 0..n {
for idx in csr.indptr[i]..csr.indptr[i + 1] {
let j = csr.indices[idx];
y[i] += data_f64[idx] * x[j];
}
}
};
let mut v = vec![0.0; n];
let mut v_old = vec![0.0; n];
let mut w = vec![0.0; n];
let mut v_basis = vec![vec![0.0; n]; n_iter];
let mut rng = StdRng::seed_from_u64(seed);
for i in 0..n {
v[i] = rng.random::<f64>() - 0.5;
}
normalise(&mut v);
let mut alpha = vec![0.0; n_iter];
let mut beta = vec![0.0; n_iter];
for j in 0..n_iter {
v_basis[j].copy_from_slice(&v);
matvec(&v, &mut w);
alpha[j] = dot(&w, &v);
for i in 0..n {
w[i] -= alpha[j] * v[i];
if j > 0 {
w[i] -= beta[j - 1] * v_old[i];
}
}
for k in 0..=j {
let coeff = dot(&w, &v_basis[k]);
for i in 0..n {
w[i] -= coeff * v_basis[k][i];
}
}
beta[j] = norm(&w);
if beta[j] < 1e-12 {
break;
}
v_old.copy_from_slice(&v);
v.copy_from_slice(&w);
normalise(&mut v);
}
let (tri_evals, tri_evecs) = tridiag_eig(&alpha[..n_iter], &beta[..n_iter - 1])?;
Ok(LanczosEigendecomp {
tri_evals,
tri_evecs,
v_basis,
n,
})
}
fn select_eigenpairs(
decomp: &LanczosEigendecomp,
n_components: usize,
largest: bool,
) -> (Vec<f32>, Vec<Vec<f32>>) {
let n = decomp.n;
let n_iter = decomp.v_basis.len();
let mut indices: Vec<usize> = (0..decomp.tri_evals.len()).collect();
if largest {
indices.sort_by(|&i, &j| {
decomp.tri_evals[j]
.partial_cmp(&decomp.tri_evals[i])
.unwrap()
});
} else {
indices.sort_by(|&i, &j| {
decomp.tri_evals[i]
.partial_cmp(&decomp.tri_evals[j])
.unwrap()
});
}
let mut sel_evals: Vec<f32> = Vec::with_capacity(n_components);
let mut sel_evecs: Vec<Vec<f32>> = Vec::with_capacity(n_components);
for &idx in indices.iter().take(n_components) {
let mut evec = vec![0.0; n];
for i in 0..n {
for j in 0..n_iter {
evec[i] += decomp.v_basis[j][i] * decomp.tri_evecs[(j, idx)].to_f64().unwrap();
}
}
let norm_e: f64 = evec.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm_e > 0.0 {
for x in &mut evec {
*x /= norm_e;
}
}
sel_evals.push(decomp.tri_evals[idx].to_f64().unwrap() as f32);
sel_evecs.push(evec.iter().map(|&x| x as f32).collect());
}
let mut transposed = vec![vec![0.0f32; n_components]; n];
for comp_idx in 0..n_components {
for point_idx in 0..n {
transposed[point_idx][comp_idx] = sel_evecs[comp_idx][point_idx];
}
}
(sel_evals, transposed)
}
pub fn compute_smallest_eigenpairs_lanczos<T>(
matrix: &CompressedSparseData<T>,
n_components: usize,
seed: u64,
) -> Result<(Vec<f32>, Vec<Vec<f32>>), ManifoldsError>
where
T: ManifoldsFloat,
{
let decomp = lanczos_eigendecomp(matrix, n_components, seed)?;
Ok(select_eigenpairs(&decomp, n_components, false))
}
pub fn compute_largest_eigenpairs_lanczos<T>(
matrix: &CompressedSparseData<T>,
n_components: usize,
seed: u64,
) -> Result<(Vec<f32>, Vec<Vec<f32>>), ManifoldsError>
where
T: ManifoldsFloat,
{
let decomp = lanczos_eigendecomp(matrix, n_components, seed)?;
Ok(select_eigenpairs(&decomp, n_components, true))
}
pub fn landmark_von_neumann_entropy<T>(
operator: &CompressedSparseData<T>,
t_max: usize,
) -> Result<Vec<T>, ManifoldsError>
where
T: ManifoldsFloat,
{
let (n, _) = operator.shape;
let eigenvalues: Vec<T> = if n < VNE_DENSE_THRESHOLD {
let dense = operator.to_dense();
let svd = dense.thin_svd().map_err(|_| ManifoldsError::FaerSvdError)?;
svd.S().column_vector().iter().copied().collect()
} else {
let rank = VNE_SPARSE_RANK.min(n.saturating_sub(1));
let svd = sparse_randomised_svd(operator, rank, 42, None, None)?;
svd.s
};
let mut eigenvalues_t = eigenvalues.clone();
let mut entropy = Vec::with_capacity(t_max);
for _ in 0..t_max {
let sum: T = eigenvalues_t.iter().copied().sum();
if sum <= T::zero() {
entropy.push(T::zero());
} else {
let h: T = eigenvalues_t
.iter()
.map(|&v| {
let prob = v / sum + T::epsilon();
-prob * prob.ln()
})
.sum();
entropy.push(h);
}
for (vt, &v) in eigenvalues_t.iter_mut().zip(&eigenvalues) {
*vt *= v;
}
}
Ok(entropy)
}
#[cfg(test)]
mod test_utils_math {
use super::*;
use approx::assert_relative_eq;
#[test]
fn test_dot_product() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![4.0, 5.0, 6.0];
let result = dot(&a, &b);
assert_relative_eq!(result, 32.0, epsilon = 1e-6);
}
#[test]
fn test_dot_product_zero() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![0.0, 0.0, 0.0];
let result = dot(&a, &b);
assert_relative_eq!(result, 0.0, epsilon = 1e-6);
}
#[test]
fn test_norm() {
let v = vec![3.0, 4.0];
let result = norm(&v);
assert_relative_eq!(result, 5.0, epsilon = 1e-6); }
#[test]
fn test_normalise() {
let mut v = vec![3.0, 4.0];
normalise(&mut v);
assert_relative_eq!(v[0], 0.6, epsilon = 1e-6);
assert_relative_eq!(v[1], 0.8, epsilon = 1e-6);
let new_norm = norm(&v);
assert_relative_eq!(new_norm, 1.0, epsilon = 1e-6);
}
#[test]
fn test_tridiag_eig_simple() {
let alpha = vec![2.0, 2.0];
let beta = vec![1.0];
let (evals, _) = tridiag_eig(&alpha, &beta).unwrap();
assert_eq!(evals.len(), 2);
let mut sorted_evals = evals.clone();
sorted_evals.sort_by(|a: &f64, b: &f64| a.partial_cmp(b).unwrap());
assert_relative_eq!(sorted_evals[0], 1.0, epsilon = 1e-5);
assert_relative_eq!(sorted_evals[1], 3.0, epsilon = 1e-5);
}
#[test]
fn test_compute_largest_eigenpairs_identity() {
let n = 3;
let data = vec![2.0, -1.0, -1.0, 2.0, -1.0, -1.0, 2.0];
let indices = vec![0, 1, 0, 1, 2, 1, 2];
let indptr = vec![0, 2, 5, 7];
let matrix = CompressedSparseData::new_csr(&data, &indices, &indptr, (n, n));
let (evals, evecs) = compute_smallest_eigenpairs_lanczos(&matrix, 2, 42).unwrap();
assert_eq!(evals.len(), 2);
assert_eq!(evecs.len(), n);
assert_eq!(evecs[0].len(), 2);
for eval in evals {
assert!(eval > 0.0);
}
}
#[test]
fn test_compute_smallest_eigenpairs_diagonal() {
let n = 5;
let data = vec![5.0, 4.0, 3.0, 2.0, 1.0];
let indices = vec![0, 1, 2, 3, 4];
let indptr = vec![0, 1, 2, 3, 4, 5];
let matrix = CompressedSparseData::new_csr(&data, &indices, &indptr, (n, n));
let (evals, evecs) = compute_smallest_eigenpairs_lanczos(&matrix, 3, 42).unwrap();
assert_eq!(evals.len(), 3);
assert_eq!(evecs.len(), n);
let mut sorted_evals = evals.clone();
sorted_evals.sort_by(|a, b| b.partial_cmp(a).unwrap());
assert_relative_eq!(sorted_evals[0], 3.0, epsilon = 0.1);
assert_relative_eq!(sorted_evals[1], 2.0, epsilon = 0.1);
assert_relative_eq!(sorted_evals[2], 1.0, epsilon = 0.1);
}
#[test]
fn test_lanczos_reproducibility() {
let n = 10;
let mut data = Vec::new();
let mut indices = Vec::new();
let mut indptr = vec![0];
for i in 0..n {
if i > 0 {
data.push(-1.0);
indices.push(i - 1);
}
data.push(2.0);
indices.push(i);
if i < n - 1 {
data.push(-1.0);
indices.push(i + 1);
}
indptr.push(data.len());
}
let matrix = CompressedSparseData::new_csr(&data, &indices, &indptr, (n, n));
let (evals1, evecs1) = compute_smallest_eigenpairs_lanczos(&matrix, 3, 42).unwrap();
let (evals2, evecs2) = compute_smallest_eigenpairs_lanczos(&matrix, 3, 42).unwrap();
assert_eq!(evals1, evals2);
assert_eq!(evecs1, evecs2);
}
#[test]
#[should_panic]
fn test_dot_product_different_lengths() {
let a = vec![1.0, 2.0];
let b = vec![1.0, 2.0, 3.0];
let _ = dot(&a, &b);
}
#[test]
fn test_compute_largest_eigenpairs_diagonal() {
let data = vec![5.0, 4.0, 3.0, 2.0, 1.0];
let indices = vec![0, 1, 2, 3, 4];
let indptr = vec![0, 1, 2, 3, 4, 5];
let matrix = CompressedSparseData::new_csr(&data, &indices, &indptr, (5, 5));
let (evals, evecs) = compute_largest_eigenpairs_lanczos(&matrix, 3, 42).unwrap();
assert_eq!(evals.len(), 3);
assert_eq!(evecs.len(), 5);
assert_eq!(evecs[0].len(), 3);
let mut sorted = evals.clone();
sorted.sort_by(|a, b| b.partial_cmp(a).unwrap());
assert_relative_eq!(sorted[0], 5.0, epsilon = 0.1);
assert_relative_eq!(sorted[1], 4.0, epsilon = 0.1);
assert_relative_eq!(sorted[2], 3.0, epsilon = 0.1);
}
#[test]
fn test_compute_largest_eigenpairs_path_laplacian() {
let data = vec![2.0, -1.0, -1.0, 2.0, -1.0, -1.0, 2.0];
let indices = vec![0, 1, 0, 1, 2, 1, 2];
let indptr = vec![0, 2, 5, 7];
let matrix = CompressedSparseData::new_csr(&data, &indices, &indptr, (3, 3));
let (evals, _) = compute_largest_eigenpairs_lanczos(&matrix, 2, 42).unwrap();
let mut sorted = evals.clone();
sorted.sort_by(|a, b| b.partial_cmp(a).unwrap());
assert_relative_eq!(sorted[0], 2.0 + 2.0_f32.sqrt(), epsilon = 0.05);
assert_relative_eq!(sorted[1], 2.0, epsilon = 0.05);
}
#[test]
fn test_largest_eigenpairs_reproducibility() {
let n = 10;
let mut data = Vec::new();
let mut indices = Vec::new();
let mut indptr = vec![0];
for i in 0..n {
if i > 0 {
data.push(-1.0);
indices.push(i - 1);
}
data.push(2.0);
indices.push(i);
if i < n - 1 {
data.push(-1.0);
indices.push(i + 1);
}
indptr.push(data.len());
}
let matrix = CompressedSparseData::new_csr(&data, &indices, &indptr, (n, n));
let (e1, v1) = compute_largest_eigenpairs_lanczos(&matrix, 3, 42).unwrap();
let (e2, v2) = compute_largest_eigenpairs_lanczos(&matrix, 3, 42).unwrap();
assert_eq!(e1, e2);
assert_eq!(v1, v2);
}
#[test]
fn test_smallest_and_largest_agree_on_diagonal() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let indices = vec![0, 1, 2, 3, 4];
let indptr = vec![0, 1, 2, 3, 4, 5];
let matrix = CompressedSparseData::new_csr(&data, &indices, &indptr, (5, 5));
let (small, _) = compute_smallest_eigenpairs_lanczos(&matrix, 2, 42).unwrap();
let (large, _) = compute_largest_eigenpairs_lanczos(&matrix, 2, 42).unwrap();
let mut s_sorted = small.clone();
s_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mut l_sorted = large.clone();
l_sorted.sort_by(|a, b| b.partial_cmp(a).unwrap());
assert_relative_eq!(s_sorted[0], 1.0, epsilon = 0.1);
assert_relative_eq!(s_sorted[1], 2.0, epsilon = 0.1);
assert_relative_eq!(l_sorted[0], 5.0, epsilon = 0.1);
assert_relative_eq!(l_sorted[1], 4.0, epsilon = 0.1);
}
}