use scirs2_core::ndarray::{s, Array1, Array2, ArrayView2, ScalarOperand};
use scirs2_core::numeric::{Float, FromPrimitive};
use scirs2_linalg::{eigh, smallest_k_eigh};
use std::fmt::Debug;
use crate::error::{ClusteringError, Result};
use crate::vq::{kmeans_with_options, KMeansInit, KMeansOptions};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AffinityMode {
NearestNeighbors,
RBF,
Precomputed,
}
#[allow(dead_code)]
fn eigengap_heuristic<F>(eigenvalues: &[F], max_clusters: usize) -> usize
where
F: Float + FromPrimitive + Debug + PartialOrd,
{
let n = eigenvalues.len();
let mut max_gap = F::zero();
let mut max_gap_idx = 1;
for i in 0..(max_clusters.min(n - 1)) {
let gap = eigenvalues[i + 1] - eigenvalues[i];
if gap > max_gap {
max_gap = gap;
max_gap_idx = i + 1;
}
}
max_gap_idx
}
#[allow(dead_code)]
fn normalized_laplacian<F>(affinity: &Array2<F>) -> Result<Array2<F>>
where
F: Float + FromPrimitive + Debug + PartialOrd,
{
let n = affinity.shape()[0];
if n != affinity.shape()[1] {
return Err(ClusteringError::InvalidInput(
"Affinity matrix must be square".to_string(),
));
}
let mut degrees = Array1::zeros(n);
for i in 0..n {
degrees[i] = affinity.row(i).sum();
}
let mut d_inv_sqrt = Array1::zeros(n);
for i in 0..n {
if degrees[i] <= F::epsilon() {
return Err(ClusteringError::ComputationError(
"Degree matrix contains zero values, graph may be disconnected".to_string(),
));
}
d_inv_sqrt[i] = F::one() / degrees[i].sqrt();
}
let mut laplacian = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
if i == j {
laplacian[[i, j]] = F::one() - affinity[[i, j]] * d_inv_sqrt[i] * d_inv_sqrt[j];
} else {
laplacian[[i, j]] = -affinity[[i, j]] * d_inv_sqrt[i] * d_inv_sqrt[j];
}
}
}
Ok(laplacian)
}
#[allow(dead_code)]
fn knn_affinity<F>(data: ArrayView2<F>, n_neighbors: usize) -> Result<Array2<F>>
where
F: Float + FromPrimitive + Debug + PartialOrd,
{
let n_samples = data.shape()[0];
let n_features = data.shape()[1];
if n_neighbors >= n_samples {
return Err(ClusteringError::InvalidInput(format!(
"n_neighbors ({}) must be less than the number of samples ({})",
n_neighbors, n_samples
)));
}
let mut dist_matrix = Array2::zeros((n_samples, n_samples));
for i in 0..n_samples {
for j in (i + 1)..n_samples {
let mut dist_sq = F::zero();
for k in 0..n_features {
let diff = data[[i, k]] - data[[j, k]];
dist_sq = dist_sq + diff * diff;
}
let dist = dist_sq.sqrt();
dist_matrix[[i, j]] = dist;
dist_matrix[[j, i]] = dist; }
}
let mut affinity = Array2::zeros((n_samples, n_samples));
for i in 0..n_samples {
let mut distances: Vec<(usize, F)> = (0..n_samples)
.filter(|&j| i != j) .map(|j| (j, dist_matrix[[i, j]]))
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
for (j, _) in distances.iter().take(n_neighbors.min(distances.len())) {
affinity[[i, *j]] = F::one();
affinity[[*j, i]] = F::one();
}
}
Ok(affinity)
}
#[allow(dead_code)]
fn rbf_affinity<F>(data: ArrayView2<F>, gamma: F) -> Result<Array2<F>>
where
F: Float + FromPrimitive + Debug + PartialOrd,
{
let n_samples = data.shape()[0];
let n_features = data.shape()[1];
if gamma <= F::zero() {
return Err(ClusteringError::InvalidInput(
"gamma must be positive".to_string(),
));
}
let mut affinity = Array2::zeros((n_samples, n_samples));
for i in 0..n_samples {
affinity[[i, i]] = F::one();
for j in (i + 1)..n_samples {
let mut dist_sq = F::zero();
for k in 0..n_features {
let diff = data[[i, k]] - data[[j, k]];
dist_sq = dist_sq + diff * diff;
}
let affinity_val = (-gamma * dist_sq).exp();
affinity[[i, j]] = affinity_val;
affinity[[j, i]] = affinity_val; }
}
Ok(affinity)
}
#[derive(Debug, Clone)]
pub struct SpectralClusteringOptions<F: Float> {
pub affinity: AffinityMode,
pub n_neighbors: usize,
pub gamma: F,
pub normalized_laplacian: bool,
pub max_iter: usize,
pub n_init: usize,
pub tol: F,
pub random_seed: Option<u64>,
pub eigen_solver: String,
pub auto_n_clusters: bool,
}
impl<F: Float + FromPrimitive> Default for SpectralClusteringOptions<F> {
fn default() -> Self {
Self {
affinity: AffinityMode::RBF,
n_neighbors: 10,
gamma: F::from(1.0).expect("Failed to convert constant to float"),
normalized_laplacian: true,
max_iter: 300,
n_init: 10,
tol: F::from(1e-4).expect("Failed to convert constant to float"),
random_seed: None,
eigen_solver: "arpack".to_string(),
auto_n_clusters: false,
}
}
}
#[allow(dead_code)]
pub fn spectral_clustering<F>(
data: ArrayView2<F>,
n_clusters: usize,
options: Option<SpectralClusteringOptions<F>>,
) -> Result<(Array2<F>, Array1<usize>)>
where
F: Float
+ FromPrimitive
+ Debug
+ PartialOrd
+ ScalarOperand
+ 'static
+ std::iter::Sum
+ std::ops::AddAssign
+ std::ops::SubAssign
+ std::ops::MulAssign
+ std::ops::DivAssign
+ std::ops::RemAssign
+ std::fmt::Display
+ Send
+ Sync,
{
let opts = options.unwrap_or_default();
let n_samples = data.shape()[0];
scirs2_core::validation::clustering::validate_clustering_data(
&data,
"Spectral clustering",
false,
Some(2),
)
.map_err(|e| ClusteringError::InvalidInput(format!("Spectral clustering: {}", e)))?;
if n_clusters < 2 {
return Err(ClusteringError::InvalidInput(format!(
"Spectral clustering: number of _clusters must be >= 2, got {}",
n_clusters
)));
}
scirs2_core::validation::clustering::check_n_clusters_bounds(
&data,
n_clusters,
"Spectral clustering",
)
.map_err(|e| ClusteringError::InvalidInput(format!("{}", e)))?;
let affinity = match opts.affinity {
AffinityMode::NearestNeighbors => {
if data.shape()[0] == data.shape()[1] {
data.to_owned()
} else {
knn_affinity(data, opts.n_neighbors)?
}
}
AffinityMode::RBF => {
if data.shape()[0] == data.shape()[1] {
data.to_owned()
} else {
rbf_affinity(data, opts.gamma)?
}
}
AffinityMode::Precomputed => {
if data.shape()[0] != data.shape()[1] {
return Err(ClusteringError::InvalidInput(
"For precomputed affinity, data must be a square matrix".to_string(),
));
}
data.to_owned()
}
};
let laplacian = if opts.normalized_laplacian {
normalized_laplacian(&affinity)?
} else {
let mut lap = Array2::zeros((n_samples, n_samples));
let mut degrees = vec![F::zero(); n_samples];
for i in 0..n_samples {
degrees[i] = affinity.row(i).sum();
lap[[i, i]] = degrees[i];
}
for i in 0..n_samples {
for j in 0..n_samples {
lap[[i, j]] -= affinity[[i, j]];
}
}
lap
};
let n = laplacian.nrows();
let mut stabilized_laplacian = laplacian.clone();
for i in 0..n {
stabilized_laplacian[[i, i]] +=
F::from(1e-10).expect("Failed to convert constant to float");
}
let (eigenvalues, eigenvectors) = if n <= 3 {
eigh(&stabilized_laplacian.view(), None)?
} else {
let k = (n_clusters + 2).min(n); let max_iter = 1000;
let tolerance = F::from(1e-10).expect("Failed to convert constant to float");
smallest_k_eigh(&stabilized_laplacian.view(), k, max_iter, tolerance)?
};
let actual_n_clusters = if opts.auto_n_clusters {
eigengap_heuristic(&eigenvalues.to_vec(), n_clusters)
} else {
n_clusters
};
let embedding = if opts.normalized_laplacian {
eigenvectors.slice(s![.., ..actual_n_clusters]).to_owned()
} else {
eigenvectors
.slice(s![.., 1..(actual_n_clusters + 1)])
.to_owned()
};
let normalized_embedding = if opts.normalized_laplacian {
let mut norm_embedding = embedding.clone();
for i in 0..n_samples {
let row = embedding.row(i);
let norm: F = row.iter().map(|&x| x * x).sum::<F>().sqrt();
if norm > F::epsilon() {
for j in 0..actual_n_clusters {
norm_embedding[[i, j]] = embedding[[i, j]] / norm;
}
}
}
norm_embedding
} else {
embedding
};
let kmeans_opts = KMeansOptions {
max_iter: opts.max_iter,
tol: opts.tol,
random_seed: opts.random_seed,
n_init: opts.n_init,
init_method: KMeansInit::KMeansPlusPlus,
};
let (_, labels) = kmeans_with_options(
normalized_embedding.view(),
actual_n_clusters,
Some(kmeans_opts),
)?;
Ok((normalized_embedding, labels))
}
#[allow(dead_code)]
pub fn spectral_bipartition<F>(
data: ArrayView2<F>,
options: Option<SpectralClusteringOptions<F>>,
) -> Result<Array1<usize>>
where
F: Float
+ FromPrimitive
+ Debug
+ PartialOrd
+ ScalarOperand
+ 'static
+ std::iter::Sum
+ std::ops::AddAssign
+ std::ops::SubAssign
+ std::ops::MulAssign
+ std::ops::DivAssign
+ std::ops::RemAssign
+ std::fmt::Display
+ Send
+ Sync,
{
let (_, labels) = spectral_clustering(data, 2, options)?;
Ok(labels)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
#[test]
fn test_spectral_clustering_basic() {
let data = Array2::from_shape_vec(
(6, 2),
vec![
1.0, 1.0, 1.1, 1.1, 0.9, 0.9, 5.0, 5.0, 5.1, 5.1, 4.9, 4.9,
],
)
.expect("Operation failed");
let options = SpectralClusteringOptions {
affinity: AffinityMode::RBF,
gamma: 0.1, normalized_laplacian: false, ..Default::default()
};
let result = spectral_clustering(data.view(), 2, Some(options));
assert!(
result.is_ok(),
"Spectral clustering should not fail: {:?}",
result.err()
);
let (embeddings, labels) = result.expect("Operation failed");
assert_eq!(embeddings.shape()[0], 6);
assert_eq!(labels.len(), 6);
let unique_labels: std::collections::HashSet<_> = labels.iter().cloned().collect();
assert!(
unique_labels.len() <= 2,
"Should have at most 2 clusters, got: {:?}",
unique_labels
);
assert!(
labels.iter().all(|&l| l == 0 || l == 1),
"All labels should be 0 or 1, got: {:?}",
labels
);
}
#[test]
fn test_spectral_clustering_ring() {
let data = Array2::from_shape_vec(
(16, 2),
vec![
1.0, 0.0, 0.7, 0.7, 0.0, 1.0, -0.7, 0.7, -1.0, 0.0, -0.7, -0.7, 0.0, -1.0, 0.7,
-0.7, 3.0, 0.0, 2.1, 2.1, 0.0, 3.0, -2.1, 2.1, -3.0, 0.0, -2.1, -2.1, 0.0, -3.0, 2.1,
-2.1,
],
)
.expect("Operation failed");
let options = SpectralClusteringOptions {
affinity: AffinityMode::RBF,
gamma: 0.05, n_init: 5, normalized_laplacian: false, ..Default::default()
};
let result = spectral_clustering(data.view(), 2, Some(options));
assert!(
result.is_ok(),
"Spectral clustering should not fail: {:?}",
result.err()
);
let (_, labels) = result.expect("Operation failed");
let unique_labels: std::collections::HashSet<_> = labels.iter().cloned().collect();
assert!(
unique_labels.len() <= 2,
"Should have at most 2 clusters, got: {:?}",
unique_labels
);
let first_label = labels[0];
let all_same = labels.iter().all(|&l| l == first_label);
assert!(!all_same, "All points should not be in the same cluster");
assert!(
labels.iter().all(|&l| l == 0 || l == 1),
"All labels should be 0 or 1"
);
}
}