use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
use crate::error::{ClusteringError, Result};
pub trait Filtration: Send + Sync {
fn apply(&self, data: ArrayView2<f64>) -> Result<Array1<f64>>;
}
#[derive(Debug, Clone)]
pub struct EccentricityFiltration {
pub p: f64,
}
impl Default for EccentricityFiltration {
fn default() -> Self {
Self { p: 1.0 }
}
}
impl EccentricityFiltration {
pub fn new(p: f64) -> Self {
Self { p }
}
}
impl Filtration for EccentricityFiltration {
fn apply(&self, data: ArrayView2<f64>) -> Result<Array1<f64>> {
let n = data.nrows();
let d = data.ncols();
if n == 0 || d == 0 {
return Err(ClusteringError::InvalidInput(
"eccentricity: data must be non-empty".into(),
));
}
let p = self.p;
let mut ecc = Array1::zeros(n);
for i in 0..n {
let mut sum = 0.0_f64;
for j in 0..n {
if i == j {
continue;
}
let dist_sq: f64 = data
.row(i)
.iter()
.zip(data.row(j).iter())
.map(|(&a, &b)| (a - b) * (a - b))
.sum();
let dist = dist_sq.sqrt();
sum += dist.powf(p);
}
let mean = sum / (n as f64);
ecc[i] = mean.powf(1.0 / p);
}
Ok(ecc)
}
}
#[derive(Debug, Clone)]
pub struct DensityFiltration {
pub k: usize,
pub negate: bool,
}
impl Default for DensityFiltration {
fn default() -> Self {
Self { k: 5, negate: false }
}
}
impl DensityFiltration {
pub fn new(k: usize, negate: bool) -> Self {
Self { k, negate }
}
}
impl Filtration for DensityFiltration {
fn apply(&self, data: ArrayView2<f64>) -> Result<Array1<f64>> {
let n = data.nrows();
let d = data.ncols();
if n == 0 || d == 0 {
return Err(ClusteringError::InvalidInput(
"density filtration: data must be non-empty".into(),
));
}
let k = self.k.min(n - 1).max(1);
let mut densities = Array1::zeros(n);
for i in 0..n {
let mut dists: Vec<f64> = (0..n)
.filter(|&j| j != i)
.map(|j| {
data.row(i)
.iter()
.zip(data.row(j).iter())
.map(|(&a, &b)| (a - b) * (a - b))
.sum::<f64>()
.sqrt()
})
.collect();
dists.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let r_k = dists[k - 1].max(1e-15);
let vol = r_k.powi(d as i32);
let density = (k as f64) / ((n as f64) * vol);
densities[i] = if self.negate { -density } else { density };
}
Ok(densities)
}
}
#[derive(Debug, Clone)]
pub struct PcaFiltration {
pub component: usize,
pub max_iter: usize,
}
impl Default for PcaFiltration {
fn default() -> Self {
Self {
component: 1,
max_iter: 200,
}
}
}
impl PcaFiltration {
pub fn new(component: usize) -> Self {
Self {
component: component.max(1),
max_iter: 200,
}
}
}
impl Filtration for PcaFiltration {
fn apply(&self, data: ArrayView2<f64>) -> Result<Array1<f64>> {
let n = data.nrows();
let d = data.ncols();
if n < 2 || d == 0 {
return Err(ClusteringError::InvalidInput(
"PCA filtration: need at least 2 samples".into(),
));
}
let mut centered = data.to_owned();
for j in 0..d {
let mean: f64 = centered.column(j).iter().sum::<f64>() / n as f64;
for i in 0..n {
centered[[i, j]] -= mean;
}
}
let k = self.component;
let mut eigenvecs: Vec<Vec<f64>> = Vec::with_capacity(k);
for pc_idx in 0..k {
let mut v: Vec<f64> = (0..d).map(|i| ((i + pc_idx + 1) as f64).sin()).collect();
normalise_vec(&mut v);
for _ in 0..self.max_iter {
for ev in &eigenvecs {
let dot: f64 = v.iter().zip(ev.iter()).map(|(a, b)| a * b).sum();
for j in 0..d {
v[j] -= dot * ev[j];
}
}
let xv: Vec<f64> = (0..n)
.map(|i| {
centered
.row(i)
.iter()
.zip(v.iter())
.map(|(a, b)| a * b)
.sum::<f64>()
})
.collect();
let mut mv = vec![0.0f64; d];
for i in 0..n {
for j in 0..d {
mv[j] += centered[[i, j]] * xv[i];
}
}
for ev in &eigenvecs {
let dot: f64 = mv.iter().zip(ev.iter()).map(|(a, b)| a * b).sum();
for j in 0..d {
mv[j] -= dot * ev[j];
}
}
let norm = normalise_vec(&mut mv);
if norm < 1e-12 {
break;
}
let change: f64 = v.iter().zip(mv.iter()).map(|(a, b)| (a - b).abs()).sum();
v = mv;
if change < 1e-8 {
break;
}
}
eigenvecs.push(v);
}
let ev = &eigenvecs[k - 1];
let projections: Vec<f64> = (0..n)
.map(|i| {
centered
.row(i)
.iter()
.zip(ev.iter())
.map(|(a, b)| a * b)
.sum()
})
.collect();
Array1::from_vec(projections)
.into_shape_with_order(n)
.map_err(|e| ClusteringError::ComputationError(e.to_string()))
}
}
fn normalise_vec(v: &mut Vec<f64>) -> f64 {
let norm: f64 = v.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm > 1e-12 {
for x in v.iter_mut() {
*x /= norm;
}
}
norm
}
#[derive(Debug, Clone)]
pub struct LaplacianEigenvectorFiltration {
pub k_neighbors: usize,
pub component: usize,
pub sigma: Option<f64>,
pub max_iter: usize,
}
impl Default for LaplacianEigenvectorFiltration {
fn default() -> Self {
Self {
k_neighbors: 7,
component: 1,
sigma: None,
max_iter: 300,
}
}
}
impl LaplacianEigenvectorFiltration {
pub fn new(k_neighbors: usize, component: usize) -> Self {
Self {
k_neighbors,
component,
sigma: None,
max_iter: 300,
}
}
}
impl Filtration for LaplacianEigenvectorFiltration {
fn apply(&self, data: ArrayView2<f64>) -> Result<Array1<f64>> {
let n = data.nrows();
let d = data.ncols();
if n < 3 || d == 0 {
return Err(ClusteringError::InvalidInput(
"Laplacian eigenvector: need ≥ 3 samples".into(),
));
}
let k = self.k_neighbors.min(n - 1).max(1);
let mut sq_dists: Vec<Vec<f64>> = vec![vec![0.0; n]; n];
for i in 0..n {
for j in (i + 1)..n {
let d2: f64 = data
.row(i)
.iter()
.zip(data.row(j).iter())
.map(|(&a, &b)| (a - b) * (a - b))
.sum();
sq_dists[i][j] = d2;
sq_dists[j][i] = d2;
}
}
let sigma_sq = match self.sigma {
Some(s) => s * s,
None => {
let mut knn_dists: Vec<f64> = Vec::with_capacity(n);
for i in 0..n {
let mut row: Vec<f64> = sq_dists[i].clone();
row.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
knn_dists.push(row[k].sqrt());
}
knn_dists.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = knn_dists[n / 2];
(median * median).max(1e-10)
}
};
let mut w = vec![vec![0.0f64; n]; n];
for i in 0..n {
let mut sorted_idx: Vec<usize> = (0..n).filter(|&j| j != i).collect();
sorted_idx
.sort_by(|&a, &b| sq_dists[i][a].partial_cmp(&sq_dists[i][b]).unwrap_or(std::cmp::Ordering::Equal));
for &j in sorted_idx.iter().take(k) {
let wij = (-sq_dists[i][j] / sigma_sq).exp();
w[i][j] = wij;
w[j][i] = wij;
}
}
let deg: Vec<f64> = (0..n).map(|i| w[i].iter().sum::<f64>().max(1e-15)).collect();
let d_inv_sqrt: Vec<f64> = deg.iter().map(|&di| 1.0 / di.sqrt()).collect();
let matvec = |v: &[f64]| -> Vec<f64> {
let scaled: Vec<f64> = v.iter().zip(d_inv_sqrt.iter()).map(|(vi, di)| vi * di).collect();
let mut result = vec![0.0f64; n];
for i in 0..n {
let mut s = 0.0;
for j in 0..n {
s += w[i][j] * scaled[j];
}
result[i] = d_inv_sqrt[i] * s;
}
result
};
let comp = self.component + 1; let mut eigenvecs: Vec<Vec<f64>> = Vec::with_capacity(comp);
for pc_idx in 0..comp {
let mut v: Vec<f64> = (0..n)
.map(|i| ((i + pc_idx + 1) as f64).sin())
.collect();
let mut v_norm = vec_norm(&v);
if v_norm > 1e-12 {
for x in v.iter_mut() {
*x /= v_norm;
}
}
for _ in 0..self.max_iter {
let mut mv = matvec(&v);
for ev in &eigenvecs {
let dot: f64 = mv.iter().zip(ev.iter()).map(|(a, b)| a * b).sum();
for j in 0..n {
mv[j] -= dot * ev[j];
}
}
v_norm = vec_norm(&mv);
if v_norm < 1e-12 {
break;
}
for x in mv.iter_mut() {
*x /= v_norm;
}
let change: f64 = v.iter().zip(mv.iter()).map(|(a, b)| (a - b).abs()).sum();
v = mv;
if change < 1e-8 {
break;
}
}
eigenvecs.push(v);
}
let ev = &eigenvecs[comp - 1];
Ok(Array1::from_vec(ev.clone()))
}
}
fn vec_norm(v: &[f64]) -> f64 {
v.iter().map(|x| x * x).sum::<f64>().sqrt()
}
#[derive(Debug, Clone)]
pub struct GeodesicDistanceFiltration {
pub k_neighbors: usize,
pub reference: usize,
}
impl Default for GeodesicDistanceFiltration {
fn default() -> Self {
Self {
k_neighbors: 7,
reference: 0,
}
}
}
impl GeodesicDistanceFiltration {
pub fn new(k_neighbors: usize, reference: usize) -> Self {
Self {
k_neighbors,
reference,
}
}
}
impl Filtration for GeodesicDistanceFiltration {
fn apply(&self, data: ArrayView2<f64>) -> Result<Array1<f64>> {
let n = data.nrows();
if n == 0 {
return Err(ClusteringError::InvalidInput(
"geodesic filtration: data must be non-empty".into(),
));
}
if self.reference >= n {
return Err(ClusteringError::InvalidInput(format!(
"geodesic filtration: reference index {} out of range (n={})",
self.reference, n
)));
}
let k = self.k_neighbors.min(n - 1).max(1);
let mut adj: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
for i in 0..n {
let mut dists: Vec<(usize, f64)> = (0..n)
.filter(|&j| j != i)
.map(|j| {
let d = data
.row(i)
.iter()
.zip(data.row(j).iter())
.map(|(&a, &b)| (a - b) * (a - b))
.sum::<f64>()
.sqrt();
(j, d)
})
.collect();
dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
for (j, d) in dists.into_iter().take(k) {
adj[i].push((j, d));
adj[j].push((i, d)); }
}
let mut dist = vec![f64::MAX; n];
dist[self.reference] = 0.0;
let mut heap: BinaryHeap<DijkstraEntry> = BinaryHeap::new();
heap.push(DijkstraEntry {
neg_dist: 0.0,
node: self.reference,
});
while let Some(DijkstraEntry { neg_dist, node }) = heap.pop() {
let d = -neg_dist;
if d > dist[node] {
continue; }
for &(nb, w) in &adj[node] {
let nd = d + w;
if nd < dist[nb] {
dist[nb] = nd;
heap.push(DijkstraEntry {
neg_dist: -nd,
node: nb,
});
}
}
}
let max_finite = dist
.iter()
.copied()
.filter(|&x| x < f64::MAX)
.fold(0.0f64, f64::max);
let result: Vec<f64> = dist
.into_iter()
.map(|x| if x == f64::MAX { max_finite * 2.0 } else { x })
.collect();
Ok(Array1::from_vec(result))
}
}
use std::collections::BinaryHeap;
#[derive(Debug, PartialEq)]
struct DijkstraEntry {
neg_dist: f64, node: usize,
}
impl Eq for DijkstraEntry {}
impl Ord for DijkstraEntry {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.neg_dist
.partial_cmp(&other.neg_dist)
.unwrap_or(std::cmp::Ordering::Equal)
}
}
impl PartialOrd for DijkstraEntry {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
fn two_blobs() -> Array2<f64> {
Array2::from_shape_vec(
(6, 2),
vec![
0.0, 0.0, 0.1, 0.1, -0.1, 0.1, 10.0, 10.0, 10.1, 9.9, 9.9, 10.1,
],
)
.expect("shape ok")
}
#[test]
fn test_eccentricity_two_blobs() {
let data = two_blobs();
let filt = EccentricityFiltration::default();
let vals = filt.apply(data.view()).expect("ok");
assert_eq!(vals.len(), 6);
for &v in vals.iter() {
assert!(v > 0.0, "expected positive eccentricity, got {v}");
}
}
#[test]
fn test_density_two_blobs() {
let data = two_blobs();
let filt = DensityFiltration::new(2, false);
let vals = filt.apply(data.view()).expect("ok");
assert_eq!(vals.len(), 6);
for &v in vals.iter() {
assert!(v > 0.0, "expected positive density, got {v}");
}
}
#[test]
fn test_pca_projection() {
let data = two_blobs();
let filt = PcaFiltration::new(1);
let vals = filt.apply(data.view()).expect("ok");
assert_eq!(vals.len(), 6);
let blob0: f64 = vals.iter().take(3).copied().sum::<f64>() / 3.0;
let blob1: f64 = vals.iter().skip(3).copied().sum::<f64>() / 3.0;
assert!(
(blob0 - blob1).abs() > 1.0,
"PCA should separate blobs: blob0={blob0:.3} blob1={blob1:.3}"
);
}
#[test]
fn test_geodesic_distance() {
let data = two_blobs();
let filt = GeodesicDistanceFiltration::new(2, 0);
let vals = filt.apply(data.view()).expect("ok");
assert_eq!(vals.len(), 6);
assert!(vals[0].abs() < 1e-10, "self-distance should be 0");
assert!(vals[1] < 1.0, "within-blob geodesic should be small");
}
#[test]
fn test_laplacian_eigenvector() {
let data = two_blobs();
let filt = LaplacianEigenvectorFiltration::new(3, 1);
let vals = filt.apply(data.view()).expect("ok");
assert_eq!(vals.len(), 6);
}
}