1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
//! Spectral Clustering
//!
//! Graph partitioning using spectral methods.
//! Efficient approximation via Chebyshev polynomials.
use super::ScaledLaplacian;
/// Spectral clustering configuration
#[derive(Debug, Clone)]
pub struct ClusteringConfig {
/// Number of clusters
pub k: usize,
/// Number of eigenvectors to use
pub num_eigenvectors: usize,
/// Power iteration steps for eigenvector approximation
pub power_iters: usize,
/// K-means iterations
pub kmeans_iters: usize,
/// Random seed
pub seed: u64,
}
impl Default for ClusteringConfig {
fn default() -> Self {
Self {
k: 2,
num_eigenvectors: 10,
power_iters: 50,
kmeans_iters: 20,
seed: 42,
}
}
}
/// Spectral clustering result
#[derive(Debug, Clone)]
pub struct ClusteringResult {
/// Cluster assignment for each vertex
pub assignments: Vec<usize>,
/// Eigenvector embedding (n × k)
pub embedding: Vec<Vec<f64>>,
/// Number of clusters
pub k: usize,
}
impl ClusteringResult {
/// Get vertices in cluster c
pub fn cluster(&self, c: usize) -> Vec<usize> {
self.assignments
.iter()
.enumerate()
.filter(|(_, &a)| a == c)
.map(|(i, _)| i)
.collect()
}
/// Cluster sizes
pub fn cluster_sizes(&self) -> Vec<usize> {
let mut sizes = vec![0; self.k];
for &a in &self.assignments {
if a < self.k {
sizes[a] += 1;
}
}
sizes
}
}
/// Spectral clustering
#[derive(Debug, Clone)]
pub struct SpectralClustering {
/// Configuration
config: ClusteringConfig,
}
impl SpectralClustering {
/// Create with configuration
pub fn new(config: ClusteringConfig) -> Self {
Self { config }
}
/// Create with just number of clusters
pub fn with_k(k: usize) -> Self {
Self::new(ClusteringConfig {
k,
num_eigenvectors: k,
..Default::default()
})
}
/// Cluster graph using normalized Laplacian eigenvectors
pub fn cluster(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
let n = laplacian.n;
let k = self.config.k.min(n);
let num_eig = self.config.num_eigenvectors.min(n);
// Compute approximate eigenvectors of Laplacian
// We want the k smallest eigenvalues (smoothest eigenvectors)
// Use inverse power method on shifted Laplacian
let embedding = self.compute_embedding(laplacian, num_eig);
// Run k-means on embedding
let assignments = self.kmeans(&embedding, k);
ClusteringResult {
assignments,
embedding,
k,
}
}
/// Cluster using Fiedler vector (k=2)
pub fn bipartition(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
let n = laplacian.n;
// Compute Fiedler vector (second smallest eigenvector)
let fiedler = self.compute_fiedler(laplacian);
// Partition by sign
let assignments: Vec<usize> = fiedler
.iter()
.map(|&v| if v >= 0.0 { 0 } else { 1 })
.collect();
ClusteringResult {
assignments,
embedding: vec![fiedler],
k: 2,
}
}
/// Compute spectral embedding (k smallest non-trivial eigenvectors)
fn compute_embedding(&self, laplacian: &ScaledLaplacian, k: usize) -> Vec<Vec<f64>> {
let n = laplacian.n;
if k == 0 || n == 0 {
return vec![];
}
// Initialize random vectors
let mut vectors: Vec<Vec<f64>> = (0..k)
.map(|i| {
(0..n)
.map(|j| {
let x = ((j * 2654435769 + i * 1103515245 + self.config.seed as usize)
as f64
/ 4294967296.0)
* 2.0
- 1.0;
x
})
.collect()
})
.collect();
// Power iteration to find smallest eigenvectors
// We use (I - L_scaled) which has largest eigenvalue where L_scaled has smallest
for _ in 0..self.config.power_iters {
for i in 0..k {
// Apply (I - L_scaled) = (2I - L)/λ_max approximately
// Simpler: just use deflated power iteration on L for smallest
let mut y = vec![0.0; n];
let lx = laplacian.apply(&vectors[i]);
// We want small eigenvalues, so use (λ_max*I - L)
let shift = 2.0; // Approximate max eigenvalue of scaled Laplacian
for j in 0..n {
y[j] = shift * vectors[i][j] - lx[j];
}
// Orthogonalize against previous vectors and constant vector
// First, remove constant component (eigenvalue 0)
let mean: f64 = y.iter().sum::<f64>() / n as f64;
for j in 0..n {
y[j] -= mean;
}
// Then orthogonalize against previous eigenvectors
for prev in 0..i {
let dot: f64 = y.iter().zip(vectors[prev].iter()).map(|(a, b)| a * b).sum();
for j in 0..n {
y[j] -= dot * vectors[prev][j];
}
}
// Normalize
let norm: f64 = y.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm > 1e-15 {
for j in 0..n {
y[j] /= norm;
}
}
vectors[i] = y;
}
}
vectors
}
/// Compute Fiedler vector (second smallest eigenvector)
fn compute_fiedler(&self, laplacian: &ScaledLaplacian) -> Vec<f64> {
let embedding = self.compute_embedding(laplacian, 1);
if embedding.is_empty() {
return vec![0.0; laplacian.n];
}
embedding[0].clone()
}
/// K-means clustering on embedding
fn kmeans(&self, embedding: &[Vec<f64>], k: usize) -> Vec<usize> {
if embedding.is_empty() {
return vec![];
}
let n = embedding[0].len();
let dim = embedding.len();
if n == 0 || k == 0 {
return vec![];
}
// Initialize centroids (k-means++ style)
let mut centroids: Vec<Vec<f64>> = Vec::with_capacity(k);
// First centroid: random point
let first = (self.config.seed as usize) % n;
centroids.push((0..dim).map(|d| embedding[d][first]).collect());
// Remaining centroids: proportional to squared distance
for _ in 1..k {
let mut distances: Vec<f64> = (0..n)
.map(|i| {
centroids
.iter()
.map(|c| {
(0..dim)
.map(|d| (embedding[d][i] - c[d]).powi(2))
.sum::<f64>()
})
.fold(f64::INFINITY, f64::min)
})
.collect();
let total: f64 = distances.iter().sum();
if total > 0.0 {
let threshold = (self.config.seed as f64 / 4294967296.0) * total;
let mut cumsum = 0.0;
let mut chosen = 0;
for (i, &d) in distances.iter().enumerate() {
cumsum += d;
if cumsum >= threshold {
chosen = i;
break;
}
}
centroids.push((0..dim).map(|d| embedding[d][chosen]).collect());
} else {
// Degenerate case
centroids.push(vec![0.0; dim]);
}
}
// K-means iterations
let mut assignments = vec![0; n];
for _ in 0..self.config.kmeans_iters {
// Assign points to nearest centroid
for i in 0..n {
let mut best_cluster = 0;
let mut best_dist = f64::INFINITY;
for (c, centroid) in centroids.iter().enumerate() {
let dist: f64 = (0..dim)
.map(|d| (embedding[d][i] - centroid[d]).powi(2))
.sum();
if dist < best_dist {
best_dist = dist;
best_cluster = c;
}
}
assignments[i] = best_cluster;
}
// Update centroids
let mut counts = vec![0usize; k];
for centroid in centroids.iter_mut() {
for v in centroid.iter_mut() {
*v = 0.0;
}
}
for (i, &c) in assignments.iter().enumerate() {
counts[c] += 1;
for d in 0..dim {
centroids[c][d] += embedding[d][i];
}
}
for (c, centroid) in centroids.iter_mut().enumerate() {
if counts[c] > 0 {
for v in centroid.iter_mut() {
*v /= counts[c] as f64;
}
}
}
}
assignments
}
/// Compute normalized cut value for a bipartition
pub fn normalized_cut(&self, laplacian: &ScaledLaplacian, partition: &[bool]) -> f64 {
let n = laplacian.n;
if n == 0 {
return 0.0;
}
// Compute cut and volumes
let mut cut = 0.0;
let mut vol_a = 0.0;
let mut vol_b = 0.0;
// For each entry in Laplacian
for &(i, j, v) in &laplacian.entries {
if i < n && j < n && i != j {
// This is an edge (negative Laplacian entry)
let w = -v; // Edge weight
if w > 0.0 && partition[i] != partition[j] {
cut += w;
}
}
if i == j && i < n {
// Diagonal = degree
if partition[i] {
vol_a += v;
} else {
vol_b += v;
}
}
}
// NCut = cut/vol(A) + cut/vol(B)
let ncut = if vol_a > 0.0 { cut / vol_a } else { 0.0 }
+ if vol_b > 0.0 { cut / vol_b } else { 0.0 };
ncut
}
}
#[cfg(test)]
mod tests {
use super::*;
fn two_cliques_graph() -> ScaledLaplacian {
// Two cliques of size 3 connected by one edge
let edges = vec![
// Clique 1
(0, 1, 1.0),
(0, 2, 1.0),
(1, 2, 1.0),
// Clique 2
(3, 4, 1.0),
(3, 5, 1.0),
(4, 5, 1.0),
// Bridge
(2, 3, 0.1),
];
ScaledLaplacian::from_sparse_adjacency(&edges, 6)
}
#[test]
fn test_spectral_clustering() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.cluster(&laplacian);
assert_eq!(result.assignments.len(), 6);
assert_eq!(result.k, 2);
// Should roughly separate the two cliques
let sizes = result.cluster_sizes();
assert_eq!(sizes.iter().sum::<usize>(), 6);
}
#[test]
fn test_bipartition() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.bipartition(&laplacian);
assert_eq!(result.assignments.len(), 6);
assert_eq!(result.k, 2);
}
#[test]
fn test_cluster_extraction() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.cluster(&laplacian);
let c0 = result.cluster(0);
let c1 = result.cluster(1);
// All vertices assigned
assert_eq!(c0.len() + c1.len(), 6);
}
#[test]
fn test_normalized_cut() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
// Good partition: separate cliques
let good_partition = vec![true, true, true, false, false, false];
let good_ncut = clustering.normalized_cut(&laplacian, &good_partition);
// Bad partition: mix cliques
let bad_partition = vec![true, false, true, false, true, false];
let bad_ncut = clustering.normalized_cut(&laplacian, &bad_partition);
// Good partition should have lower normalized cut
// (This is a heuristic test, actual values depend on graph structure)
assert!(good_ncut >= 0.0);
assert!(bad_ncut >= 0.0);
}
#[test]
fn test_single_node() {
let laplacian = ScaledLaplacian::from_sparse_adjacency(&[], 1);
let clustering = SpectralClustering::with_k(1);
let result = clustering.cluster(&laplacian);
assert_eq!(result.assignments.len(), 1);
assert_eq!(result.assignments[0], 0);
}
}