1use super::ScaledLaplacian;
7
8#[derive(Debug, Clone)]
10pub struct ClusteringConfig {
11 pub k: usize,
13 pub num_eigenvectors: usize,
15 pub power_iters: usize,
17 pub kmeans_iters: usize,
19 pub seed: u64,
21}
22
23impl Default for ClusteringConfig {
24 fn default() -> Self {
25 Self {
26 k: 2,
27 num_eigenvectors: 10,
28 power_iters: 50,
29 kmeans_iters: 20,
30 seed: 42,
31 }
32 }
33}
34
35#[derive(Debug, Clone)]
37pub struct ClusteringResult {
38 pub assignments: Vec<usize>,
40 pub embedding: Vec<Vec<f64>>,
42 pub k: usize,
44}
45
46impl ClusteringResult {
47 pub fn cluster(&self, c: usize) -> Vec<usize> {
49 self.assignments
50 .iter()
51 .enumerate()
52 .filter(|(_, &a)| a == c)
53 .map(|(i, _)| i)
54 .collect()
55 }
56
57 pub fn cluster_sizes(&self) -> Vec<usize> {
59 let mut sizes = vec![0; self.k];
60 for &a in &self.assignments {
61 if a < self.k {
62 sizes[a] += 1;
63 }
64 }
65 sizes
66 }
67}
68
69#[derive(Debug, Clone)]
71pub struct SpectralClustering {
72 config: ClusteringConfig,
74}
75
76impl SpectralClustering {
77 pub fn new(config: ClusteringConfig) -> Self {
79 Self { config }
80 }
81
82 pub fn with_k(k: usize) -> Self {
84 Self::new(ClusteringConfig {
85 k,
86 num_eigenvectors: k,
87 ..Default::default()
88 })
89 }
90
91 pub fn cluster(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
93 let n = laplacian.n;
94 let k = self.config.k.min(n);
95 let num_eig = self.config.num_eigenvectors.min(n);
96
97 let embedding = self.compute_embedding(laplacian, num_eig);
101
102 let assignments = self.kmeans(&embedding, k);
104
105 ClusteringResult {
106 assignments,
107 embedding,
108 k,
109 }
110 }
111
112 pub fn bipartition(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
114 let n = laplacian.n;
115
116 let fiedler = self.compute_fiedler(laplacian);
118
119 let assignments: Vec<usize> = fiedler.iter().map(|&v| if v >= 0.0 { 0 } else { 1 }).collect();
121
122 ClusteringResult {
123 assignments,
124 embedding: vec![fiedler],
125 k: 2,
126 }
127 }
128
129 fn compute_embedding(&self, laplacian: &ScaledLaplacian, k: usize) -> Vec<Vec<f64>> {
131 let n = laplacian.n;
132 if k == 0 || n == 0 {
133 return vec![];
134 }
135
136 let mut vectors: Vec<Vec<f64>> = (0..k)
138 .map(|i| {
139 (0..n)
140 .map(|j| {
141 let x = ((j * 2654435769 + i * 1103515245 + self.config.seed as usize) as f64
142 / 4294967296.0)
143 * 2.0
144 - 1.0;
145 x
146 })
147 .collect()
148 })
149 .collect();
150
151 for _ in 0..self.config.power_iters {
154 for i in 0..k {
155 let mut y = vec![0.0; n];
158 let lx = laplacian.apply(&vectors[i]);
159
160 let shift = 2.0; for j in 0..n {
163 y[j] = shift * vectors[i][j] - lx[j];
164 }
165
166 let mean: f64 = y.iter().sum::<f64>() / n as f64;
169 for j in 0..n {
170 y[j] -= mean;
171 }
172
173 for prev in 0..i {
175 let dot: f64 = y.iter().zip(vectors[prev].iter()).map(|(a, b)| a * b).sum();
176 for j in 0..n {
177 y[j] -= dot * vectors[prev][j];
178 }
179 }
180
181 let norm: f64 = y.iter().map(|x| x * x).sum::<f64>().sqrt();
183 if norm > 1e-15 {
184 for j in 0..n {
185 y[j] /= norm;
186 }
187 }
188
189 vectors[i] = y;
190 }
191 }
192
193 vectors
194 }
195
196 fn compute_fiedler(&self, laplacian: &ScaledLaplacian) -> Vec<f64> {
198 let embedding = self.compute_embedding(laplacian, 1);
199 if embedding.is_empty() {
200 return vec![0.0; laplacian.n];
201 }
202 embedding[0].clone()
203 }
204
205 fn kmeans(&self, embedding: &[Vec<f64>], k: usize) -> Vec<usize> {
207 if embedding.is_empty() {
208 return vec![];
209 }
210
211 let n = embedding[0].len();
212 let dim = embedding.len();
213
214 if n == 0 || k == 0 {
215 return vec![];
216 }
217
218 let mut centroids: Vec<Vec<f64>> = Vec::with_capacity(k);
220
221 let first = (self.config.seed as usize) % n;
223 centroids.push((0..dim).map(|d| embedding[d][first]).collect());
224
225 for _ in 1..k {
227 let mut distances: Vec<f64> = (0..n)
228 .map(|i| {
229 centroids
230 .iter()
231 .map(|c| {
232 (0..dim)
233 .map(|d| (embedding[d][i] - c[d]).powi(2))
234 .sum::<f64>()
235 })
236 .fold(f64::INFINITY, f64::min)
237 })
238 .collect();
239
240 let total: f64 = distances.iter().sum();
241 if total > 0.0 {
242 let threshold = (self.config.seed as f64 / 4294967296.0) * total;
243 let mut cumsum = 0.0;
244 let mut chosen = 0;
245 for (i, &d) in distances.iter().enumerate() {
246 cumsum += d;
247 if cumsum >= threshold {
248 chosen = i;
249 break;
250 }
251 }
252 centroids.push((0..dim).map(|d| embedding[d][chosen]).collect());
253 } else {
254 centroids.push(vec![0.0; dim]);
256 }
257 }
258
259 let mut assignments = vec![0; n];
261
262 for _ in 0..self.config.kmeans_iters {
263 for i in 0..n {
265 let mut best_cluster = 0;
266 let mut best_dist = f64::INFINITY;
267
268 for (c, centroid) in centroids.iter().enumerate() {
269 let dist: f64 = (0..dim)
270 .map(|d| (embedding[d][i] - centroid[d]).powi(2))
271 .sum();
272
273 if dist < best_dist {
274 best_dist = dist;
275 best_cluster = c;
276 }
277 }
278
279 assignments[i] = best_cluster;
280 }
281
282 let mut counts = vec![0usize; k];
284 for centroid in centroids.iter_mut() {
285 for v in centroid.iter_mut() {
286 *v = 0.0;
287 }
288 }
289
290 for (i, &c) in assignments.iter().enumerate() {
291 counts[c] += 1;
292 for d in 0..dim {
293 centroids[c][d] += embedding[d][i];
294 }
295 }
296
297 for (c, centroid) in centroids.iter_mut().enumerate() {
298 if counts[c] > 0 {
299 for v in centroid.iter_mut() {
300 *v /= counts[c] as f64;
301 }
302 }
303 }
304 }
305
306 assignments
307 }
308
309 pub fn normalized_cut(&self, laplacian: &ScaledLaplacian, partition: &[bool]) -> f64 {
311 let n = laplacian.n;
312 if n == 0 {
313 return 0.0;
314 }
315
316 let mut cut = 0.0;
318 let mut vol_a = 0.0;
319 let mut vol_b = 0.0;
320
321 for &(i, j, v) in &laplacian.entries {
323 if i < n && j < n && i != j {
324 let w = -v; if w > 0.0 && partition[i] != partition[j] {
327 cut += w;
328 }
329 }
330 if i == j && i < n {
331 if partition[i] {
333 vol_a += v;
334 } else {
335 vol_b += v;
336 }
337 }
338 }
339
340 let ncut = if vol_a > 0.0 { cut / vol_a } else { 0.0 }
342 + if vol_b > 0.0 { cut / vol_b } else { 0.0 };
343
344 ncut
345 }
346}
347
348#[cfg(test)]
349mod tests {
350 use super::*;
351
352 fn two_cliques_graph() -> ScaledLaplacian {
353 let edges = vec![
355 (0, 1, 1.0),
357 (0, 2, 1.0),
358 (1, 2, 1.0),
359 (3, 4, 1.0),
361 (3, 5, 1.0),
362 (4, 5, 1.0),
363 (2, 3, 0.1),
365 ];
366 ScaledLaplacian::from_sparse_adjacency(&edges, 6)
367 }
368
369 #[test]
370 fn test_spectral_clustering() {
371 let laplacian = two_cliques_graph();
372 let clustering = SpectralClustering::with_k(2);
373
374 let result = clustering.cluster(&laplacian);
375
376 assert_eq!(result.assignments.len(), 6);
377 assert_eq!(result.k, 2);
378
379 let sizes = result.cluster_sizes();
381 assert_eq!(sizes.iter().sum::<usize>(), 6);
382 }
383
384 #[test]
385 fn test_bipartition() {
386 let laplacian = two_cliques_graph();
387 let clustering = SpectralClustering::with_k(2);
388
389 let result = clustering.bipartition(&laplacian);
390
391 assert_eq!(result.assignments.len(), 6);
392 assert_eq!(result.k, 2);
393 }
394
395 #[test]
396 fn test_cluster_extraction() {
397 let laplacian = two_cliques_graph();
398 let clustering = SpectralClustering::with_k(2);
399 let result = clustering.cluster(&laplacian);
400
401 let c0 = result.cluster(0);
402 let c1 = result.cluster(1);
403
404 assert_eq!(c0.len() + c1.len(), 6);
406 }
407
408 #[test]
409 fn test_normalized_cut() {
410 let laplacian = two_cliques_graph();
411 let clustering = SpectralClustering::with_k(2);
412
413 let good_partition = vec![true, true, true, false, false, false];
415 let good_ncut = clustering.normalized_cut(&laplacian, &good_partition);
416
417 let bad_partition = vec![true, false, true, false, true, false];
419 let bad_ncut = clustering.normalized_cut(&laplacian, &bad_partition);
420
421 assert!(good_ncut >= 0.0);
424 assert!(bad_ncut >= 0.0);
425 }
426
427 #[test]
428 fn test_single_node() {
429 let laplacian = ScaledLaplacian::from_sparse_adjacency(&[], 1);
430 let clustering = SpectralClustering::with_k(1);
431
432 let result = clustering.cluster(&laplacian);
433
434 assert_eq!(result.assignments.len(), 1);
435 assert_eq!(result.assignments[0], 0);
436 }
437}