use crate::graph::{GraphLaplacian, GraphParams};
use dashmap::DashMap;
use smartcore::algorithm::neighbour::cosinepair::CosinePair;
use smartcore::api::{Transformer, UnsupervisedEstimator};
use smartcore::linalg::basic::arrays::Array;
use smartcore::linalg::basic::matrix::DenseMatrix;
use smartcore::preprocessing::numerical::{StandardScaler, StandardScalerParameters};
use log::{debug, info, trace};
use rayon::prelude::*;
use sprs::{CsMat, TriMat};
pub fn build_laplacian_matrix(
transposed: DenseMatrix<f64>, params: &GraphParams, n_items: Option<usize>,
energy: bool,
) -> GraphLaplacian {
let (d, n) = transposed.shape();
assert!(
n >= 2 && d >= 2,
"items should be at least of shape (2,2): ({},{})",
d,
n
);
info!(
"Building Laplacian matrix for {} items with {} features",
n, d
);
debug!(
"Graph parameters: eps={}, k={}, p={}, sigma={:?}, normalise={}",
params.eps, params.k, params.p, params.sigma, params.normalise
);
let mut items = if params.normalise {
debug!("Normalizing items to unit norm");
let scaler = StandardScaler::fit(&transposed, StandardScalerParameters::default()).unwrap();
let scaled = scaler.transform(&transposed).unwrap();
trace!("Items normalized successfully");
scaled
} else {
debug!("Skipping normalization - using raw item magnitudes");
transposed
};
let triplets = _main_laplacian(&mut items, params);
let sparse_matrix: CsMat<f64> = triplets.to_csr();
let graph_laplacian = GraphLaplacian {
init_data: items, matrix: sparse_matrix,
nnodes: match n_items {
Some(n_items) => n_items,
None => n,
},
graph_params: params.clone(),
energy,
};
info!(
"Successfully built sparse Laplacian matrix ({}x{}) with {} non-zeros",
graph_laplacian.matrix.shape().0,
graph_laplacian.matrix.shape().1,
graph_laplacian.matrix.nnz()
);
graph_laplacian
}
fn _main_laplacian(
items: &mut DenseMatrix<f64>,
params: &GraphParams,
) -> sprs::TriMatBase<Vec<usize>, Vec<f64>> {
let n = items.shape().0;
let start = std::time::Instant::now();
let adj_rows = _build_adjacency(items, params, n);
let sym = _symmetrise_adjancency(adj_rows, n);
let triplets = _build_sparse_laplacian(sym, n);
info!("Total Laplacian construction time: {:?}", start.elapsed());
triplets
}
fn _build_adjacency(
items: &mut DenseMatrix<f64>,
params: &GraphParams,
n: usize,
) -> Vec<Vec<(usize, f64)>> {
info!("Building CosinePair data structure");
#[allow(clippy::unnecessary_mut_passed)]
let fastpair = CosinePair::with_top_k(items, params.topk + 1).unwrap();
debug!("CosinePair structure built for {} items", n);
info!("Computing degrees for inline sparsification");
let degrees: Vec<usize> = (0..n)
.into_par_iter()
.map(|i| {
fastpair
.query_row_top_k(i, params.topk + 1)
.unwrap()
.iter()
.filter(|(dist, j)| i != *j && *dist <= params.eps)
.count()
})
.collect();
let avg_degree = degrees.iter().sum::<usize>() as f64 / n as f64;
let sparsify = avg_degree > 10.0;
if sparsify {
info!(
"Inline sparsification enabled (avg degree {:.1})",
avg_degree
);
} else {
debug!("Skipping sparsification (avg degree {:.1})", avg_degree);
}
info!("Computing k-NN with CosinePair: k={}", params.topk + 1);
let adj_rows: Vec<Vec<(usize, f64)>> = (0..n)
.into_par_iter()
.map(|i| {
let neighbors = fastpair.query_row_top_k(i, params.topk + 1).unwrap();
let mut valid_neighbors: Vec<(usize, f64, f64)> = neighbors
.iter()
.filter_map(|(distance, j)| {
if i != *j && *distance <= params.eps {
let weight =
1.0 / (1.0 + (distance / params.sigma.unwrap_or(1.0)).powf(params.p));
if weight > 1e-12 {
let score = if sparsify {
weight * ((degrees[i] * degrees[*j]) as f64).sqrt()
} else {
weight };
Some((*j, weight, score))
} else {
None
}
} else {
None
}
})
.collect();
if sparsify && valid_neighbors.len() > 2 {
valid_neighbors.sort_unstable_by(|a, b| {
b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal)
});
let keep_count = (valid_neighbors.len() / 2).max(1);
valid_neighbors.truncate(keep_count);
}
valid_neighbors
.into_iter()
.map(|(j, w, _)| (j, w))
.collect()
})
.collect();
debug!("Built adjacency rows for {} items", n);
adj_rows
}
pub(crate) fn _symmetrise_adjancency(
adj_rows: Vec<Vec<(usize, f64)>>,
n: usize,
) -> Vec<Vec<(usize, f64)>> {
trace!("Symmetrizing adjacency matrix");
let all_edges: Vec<(usize, usize, f64)> = adj_rows
.par_iter()
.enumerate()
.flat_map(|(i, row)| {
row.par_iter()
.map(move |&(j, w)| (i, j, w))
.collect::<Vec<_>>()
})
.collect();
let edge_map: DashMap<(usize, usize), f64> = DashMap::new();
all_edges.par_iter().for_each(|&(i, j, w)| {
edge_map.insert((i, j), w);
edge_map.insert((j, i), w);
});
let sym: Vec<Vec<(usize, f64)>> = (0..n)
.into_par_iter()
.map(|i| {
let mut neighbors: Vec<(usize, f64)> = edge_map
.iter()
.filter_map(|entry| {
let &(src, dst) = entry.key();
let &w = entry.value();
if src == i && src != dst {
Some((dst, w))
} else {
None
}
})
.collect();
neighbors.sort_unstable_by_key(|&(j, _)| j);
neighbors
})
.collect();
sym
}
pub(crate) fn _build_sparse_laplacian(
sym: Vec<Vec<(usize, f64)>>,
n: usize,
) -> sprs::TriMatBase<Vec<usize>, Vec<f64>> {
info!("Converting adjacency to sparse Laplacian matrix (DashMap batched)");
let start = std::time::Instant::now();
let triplet_map: DashMap<(usize, usize), f64> =
DashMap::with_capacity(sym.iter().map(|s| s.len() + 1).sum());
let total_edges = sym
.par_iter()
.enumerate()
.map(|(i, s)| {
let mut local_edge_count = 0;
let degree: f64 = s.iter().map(|&(_j, w)| w).sum();
triplet_map.insert((i, i), degree);
for &(j, w) in s {
if i != j {
triplet_map.insert((i, j), -w);
if i < j {
local_edge_count += 1;
}
}
}
local_edge_count
})
.sum::<usize>();
trace!("DashMap population completed in {:?}", start.elapsed());
debug!(
"Total triplets: {}, edges: {}",
triplet_map.len(),
total_edges
);
let conversion_start = std::time::Instant::now();
let mut triplets: Vec<((usize, usize), f64)> = triplet_map.into_iter().collect();
triplets.par_sort_unstable_by_key(|&((i, j), _)| (i, j));
trace!(
"Sorted {} triplets in {:?}",
triplets.len(),
conversion_start.elapsed()
);
let insert_start = std::time::Instant::now();
let mut trimat = TriMat::with_capacity((n, n), triplets.len());
for ((i, j), val) in triplets {
trimat.add_triplet(i, j, val);
}
debug!("Inserted triplets in {:?}", insert_start.elapsed());
info!("Sparse Laplacian construction time: {:?}", start.elapsed());
trimat
}
fn mean(data: &[f64]) -> Option<f32> {
let sum = data.iter().sum::<f64>() as f32;
let count = data.len();
match count {
positive if positive > 0 => Some(sum / count as f32),
_ => None,
}
}
pub fn std_deviation(data: &[f64]) -> Option<f32> {
match (mean(data), data.len()) {
(Some(data_mean), count) if count > 0 => {
let variance = data
.iter()
.map(|value| {
let diff = data_mean - (*value as f32);
diff * diff
})
.sum::<f32>()
/ count as f32;
Some(variance.sqrt())
}
_ => None,
}
}