1use super::ScaledLaplacian;
7
8#[derive(Debug, Clone)]
10pub struct ClusteringConfig {
11 pub k: usize,
13 pub num_eigenvectors: usize,
15 pub power_iters: usize,
17 pub kmeans_iters: usize,
19 pub seed: u64,
21}
22
23impl Default for ClusteringConfig {
24 fn default() -> Self {
25 Self {
26 k: 2,
27 num_eigenvectors: 10,
28 power_iters: 50,
29 kmeans_iters: 20,
30 seed: 42,
31 }
32 }
33}
34
35#[derive(Debug, Clone)]
37pub struct ClusteringResult {
38 pub assignments: Vec<usize>,
40 pub embedding: Vec<Vec<f64>>,
42 pub k: usize,
44}
45
46impl ClusteringResult {
47 pub fn cluster(&self, c: usize) -> Vec<usize> {
49 self.assignments
50 .iter()
51 .enumerate()
52 .filter(|(_, &a)| a == c)
53 .map(|(i, _)| i)
54 .collect()
55 }
56
57 pub fn cluster_sizes(&self) -> Vec<usize> {
59 let mut sizes = vec![0; self.k];
60 for &a in &self.assignments {
61 if a < self.k {
62 sizes[a] += 1;
63 }
64 }
65 sizes
66 }
67}
68
69#[derive(Debug, Clone)]
71pub struct SpectralClustering {
72 config: ClusteringConfig,
74}
75
76impl SpectralClustering {
77 pub fn new(config: ClusteringConfig) -> Self {
79 Self { config }
80 }
81
82 pub fn with_k(k: usize) -> Self {
84 Self::new(ClusteringConfig {
85 k,
86 num_eigenvectors: k,
87 ..Default::default()
88 })
89 }
90
91 pub fn cluster(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
93 let n = laplacian.n;
94 let k = self.config.k.min(n);
95 let num_eig = self.config.num_eigenvectors.min(n);
96
97 let embedding = self.compute_embedding(laplacian, num_eig);
101
102 let assignments = self.kmeans(&embedding, k);
104
105 ClusteringResult {
106 assignments,
107 embedding,
108 k,
109 }
110 }
111
112 pub fn bipartition(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
114 let n = laplacian.n;
115
116 let fiedler = self.compute_fiedler(laplacian);
118
119 let assignments: Vec<usize> = fiedler
121 .iter()
122 .map(|&v| if v >= 0.0 { 0 } else { 1 })
123 .collect();
124
125 ClusteringResult {
126 assignments,
127 embedding: vec![fiedler],
128 k: 2,
129 }
130 }
131
132 fn compute_embedding(&self, laplacian: &ScaledLaplacian, k: usize) -> Vec<Vec<f64>> {
134 let n = laplacian.n;
135 if k == 0 || n == 0 {
136 return vec![];
137 }
138
139 let mut vectors: Vec<Vec<f64>> = (0..k)
141 .map(|i| {
142 (0..n)
143 .map(|j| {
144 let x = ((j * 2654435769 + i * 1103515245 + self.config.seed as usize)
145 as f64
146 / 4294967296.0)
147 * 2.0
148 - 1.0;
149 x
150 })
151 .collect()
152 })
153 .collect();
154
155 for _ in 0..self.config.power_iters {
158 for i in 0..k {
159 let mut y = vec![0.0; n];
162 let lx = laplacian.apply(&vectors[i]);
163
164 let shift = 2.0; for j in 0..n {
167 y[j] = shift * vectors[i][j] - lx[j];
168 }
169
170 let mean: f64 = y.iter().sum::<f64>() / n as f64;
173 for j in 0..n {
174 y[j] -= mean;
175 }
176
177 for prev in 0..i {
179 let dot: f64 = y.iter().zip(vectors[prev].iter()).map(|(a, b)| a * b).sum();
180 for j in 0..n {
181 y[j] -= dot * vectors[prev][j];
182 }
183 }
184
185 let norm: f64 = y.iter().map(|x| x * x).sum::<f64>().sqrt();
187 if norm > 1e-15 {
188 for j in 0..n {
189 y[j] /= norm;
190 }
191 }
192
193 vectors[i] = y;
194 }
195 }
196
197 vectors
198 }
199
200 fn compute_fiedler(&self, laplacian: &ScaledLaplacian) -> Vec<f64> {
202 let embedding = self.compute_embedding(laplacian, 1);
203 if embedding.is_empty() {
204 return vec![0.0; laplacian.n];
205 }
206 embedding[0].clone()
207 }
208
209 fn kmeans(&self, embedding: &[Vec<f64>], k: usize) -> Vec<usize> {
211 if embedding.is_empty() {
212 return vec![];
213 }
214
215 let n = embedding[0].len();
216 let dim = embedding.len();
217
218 if n == 0 || k == 0 {
219 return vec![];
220 }
221
222 let mut centroids: Vec<Vec<f64>> = Vec::with_capacity(k);
224
225 let first = (self.config.seed as usize) % n;
227 centroids.push((0..dim).map(|d| embedding[d][first]).collect());
228
229 for _ in 1..k {
231 let mut distances: Vec<f64> = (0..n)
232 .map(|i| {
233 centroids
234 .iter()
235 .map(|c| {
236 (0..dim)
237 .map(|d| (embedding[d][i] - c[d]).powi(2))
238 .sum::<f64>()
239 })
240 .fold(f64::INFINITY, f64::min)
241 })
242 .collect();
243
244 let total: f64 = distances.iter().sum();
245 if total > 0.0 {
246 let threshold = (self.config.seed as f64 / 4294967296.0) * total;
247 let mut cumsum = 0.0;
248 let mut chosen = 0;
249 for (i, &d) in distances.iter().enumerate() {
250 cumsum += d;
251 if cumsum >= threshold {
252 chosen = i;
253 break;
254 }
255 }
256 centroids.push((0..dim).map(|d| embedding[d][chosen]).collect());
257 } else {
258 centroids.push(vec![0.0; dim]);
260 }
261 }
262
263 let mut assignments = vec![0; n];
265
266 for _ in 0..self.config.kmeans_iters {
267 for i in 0..n {
269 let mut best_cluster = 0;
270 let mut best_dist = f64::INFINITY;
271
272 for (c, centroid) in centroids.iter().enumerate() {
273 let dist: f64 = (0..dim)
274 .map(|d| (embedding[d][i] - centroid[d]).powi(2))
275 .sum();
276
277 if dist < best_dist {
278 best_dist = dist;
279 best_cluster = c;
280 }
281 }
282
283 assignments[i] = best_cluster;
284 }
285
286 let mut counts = vec![0usize; k];
288 for centroid in centroids.iter_mut() {
289 for v in centroid.iter_mut() {
290 *v = 0.0;
291 }
292 }
293
294 for (i, &c) in assignments.iter().enumerate() {
295 counts[c] += 1;
296 for d in 0..dim {
297 centroids[c][d] += embedding[d][i];
298 }
299 }
300
301 for (c, centroid) in centroids.iter_mut().enumerate() {
302 if counts[c] > 0 {
303 for v in centroid.iter_mut() {
304 *v /= counts[c] as f64;
305 }
306 }
307 }
308 }
309
310 assignments
311 }
312
313 pub fn normalized_cut(&self, laplacian: &ScaledLaplacian, partition: &[bool]) -> f64 {
315 let n = laplacian.n;
316 if n == 0 {
317 return 0.0;
318 }
319
320 let mut cut = 0.0;
322 let mut vol_a = 0.0;
323 let mut vol_b = 0.0;
324
325 for &(i, j, v) in &laplacian.entries {
327 if i < n && j < n && i != j {
328 let w = -v; if w > 0.0 && partition[i] != partition[j] {
331 cut += w;
332 }
333 }
334 if i == j && i < n {
335 if partition[i] {
337 vol_a += v;
338 } else {
339 vol_b += v;
340 }
341 }
342 }
343
344 let ncut = if vol_a > 0.0 { cut / vol_a } else { 0.0 }
346 + if vol_b > 0.0 { cut / vol_b } else { 0.0 };
347
348 ncut
349 }
350}
351
352#[cfg(test)]
353mod tests {
354 use super::*;
355
356 fn two_cliques_graph() -> ScaledLaplacian {
357 let edges = vec![
359 (0, 1, 1.0),
361 (0, 2, 1.0),
362 (1, 2, 1.0),
363 (3, 4, 1.0),
365 (3, 5, 1.0),
366 (4, 5, 1.0),
367 (2, 3, 0.1),
369 ];
370 ScaledLaplacian::from_sparse_adjacency(&edges, 6)
371 }
372
373 #[test]
374 fn test_spectral_clustering() {
375 let laplacian = two_cliques_graph();
376 let clustering = SpectralClustering::with_k(2);
377
378 let result = clustering.cluster(&laplacian);
379
380 assert_eq!(result.assignments.len(), 6);
381 assert_eq!(result.k, 2);
382
383 let sizes = result.cluster_sizes();
385 assert_eq!(sizes.iter().sum::<usize>(), 6);
386 }
387
388 #[test]
389 fn test_bipartition() {
390 let laplacian = two_cliques_graph();
391 let clustering = SpectralClustering::with_k(2);
392
393 let result = clustering.bipartition(&laplacian);
394
395 assert_eq!(result.assignments.len(), 6);
396 assert_eq!(result.k, 2);
397 }
398
399 #[test]
400 fn test_cluster_extraction() {
401 let laplacian = two_cliques_graph();
402 let clustering = SpectralClustering::with_k(2);
403 let result = clustering.cluster(&laplacian);
404
405 let c0 = result.cluster(0);
406 let c1 = result.cluster(1);
407
408 assert_eq!(c0.len() + c1.len(), 6);
410 }
411
412 #[test]
413 fn test_normalized_cut() {
414 let laplacian = two_cliques_graph();
415 let clustering = SpectralClustering::with_k(2);
416
417 let good_partition = vec![true, true, true, false, false, false];
419 let good_ncut = clustering.normalized_cut(&laplacian, &good_partition);
420
421 let bad_partition = vec![true, false, true, false, true, false];
423 let bad_ncut = clustering.normalized_cut(&laplacian, &bad_partition);
424
425 assert!(good_ncut >= 0.0);
428 assert!(bad_ncut >= 0.0);
429 }
430
431 #[test]
432 fn test_single_node() {
433 let laplacian = ScaledLaplacian::from_sparse_adjacency(&[], 1);
434 let clustering = SpectralClustering::with_k(1);
435
436 let result = clustering.cluster(&laplacian);
437
438 assert_eq!(result.assignments.len(), 1);
439 assert_eq!(result.assignments[0], 0);
440 }
441}