arrowspace 0.26.2

Graph Wiring for embeddings using physical networks wiring. Provides graph analysis, vector search and a energy-distribution stats.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
//! Graph motif detection via triangle density and spectral cohesion.
//!
//! This module provides efficient triangle-based motif spotting on sparse graph Laplacians,
//! leveraging local clustering coefficients and optional Rayleigh-quotient validation
//! to surface cohesive, low-boundary subgraphs and near-cliques.
//!
//! # Overview
//!
//! - **Motives trait**: Public API for motif detection on any graph structure.
//! - **MotiveConfig**: Tunable parameters for seeding, expansion, and deduplication.
//! - **Zero-copy adjacency**: Iterates Laplacian off-diagonals on the fly; no separate matrix.
//! - **Triangle seeding**: Seeds from nodes with high triangle counts and clustering ≥ threshold.
//! - **Greedy expansion**: Grows motifs by maximizing triangle gain per added node.
//! - **Rayleigh validation**: Optional spectral check to keep sets cohesive and low-cut.
//!
//! # Usage
//!
//! ```ignore
//! use arrowspace::graph::GraphLaplacian;
//! use arrowspace::motives::{Motives, MotiveConfig};
//!
//! let gl: GraphLaplacian = /* ... */;
//! let cfg = MotiveConfig {
//!     top_l: 16,
//!     min_triangles: 3,
//!     min_clust: 0.5,
//!     max_motif_size: 24,
//!     max_sets: 128,
//!     jaccard_dedup: 0.8,
//! };
//! let motifs: Vec<Vec<usize>> = gl.spot_motives(&cfg);
//! ```
//!
//! # References
//!
//! - Scalable motif-aware clustering: <https://arxiv.org/abs/1606.06235>
//! - Local clustering coefficient: <https://en.wikipedia.org/wiki/Clustering_coefficient>
//! - Cheeger inequality & spectral cuts: MIT OCW Lecture Notes

use crate::graph::GraphLaplacian;
use log::{debug, info};
use rayon::prelude::*;
use smartcore::linalg::basic::arrays::Array;
use std::collections::HashSet;

// ──────────────────────────────────────────────────────────────────────────────
// Configuration
// ──────────────────────────────────────────────────────────────────────────────

/// Configuration for motif detection.
#[derive(Clone, Debug)]
pub struct MotiveConfig {
    /// Prune to top-L strongest neighbors per node (from Laplacian).
    pub top_l: usize,
    /// Minimum triangle count to seed a motif.
    pub min_triangles: usize,
    /// Minimum local clustering coefficient C_i to seed a motif.
    pub min_clust: f64,
    /// Maximum size (number of nodes) per motif during greedy expansion.
    pub max_motif_size: usize,
    /// Limit on number of returned motif sets.
    pub max_sets: usize,
    /// Jaccard similarity threshold for deduplication (0..=1).
    pub jaccard_dedup: f64,
}

impl Default for MotiveConfig {
    fn default() -> Self {
        Self {
            top_l: 16,
            min_triangles: 2,
            min_clust: 0.4,
            max_motif_size: 32,
            max_sets: 256,
            jaccard_dedup: 0.8,
        }
    }
}

// ──────────────────────────────────────────────────────────────────────────────
// Public trait
// ──────────────────────────────────────────────────────────────────────────────

/// Trait for detecting graph motifs (triangles, near-cliques) via local density and spectral cohesion.
pub trait Motives {
    /// Spot motifs in the graph using triangle density, clustering coefficient, and optional Rayleigh validation.
    ///
    /// Returns a list of motif node-index sets, each represented as a `Vec<usize>` sorted ascending.
    ///
    /// # Arguments
    ///
    /// - `cfg`: Configuration for seeding, expansion, and filtering.
    ///
    /// # Algorithm
    ///
    /// 1. Build top-L neighbor lists per node by iterating Laplacian off-diagonals.
    /// 2. Count triangles per node and compute local clustering coefficient C_i = 2T_i / (k_i(k_i-1)).
    /// 3. Seed from nodes meeting `min_triangles` and `min_clust` thresholds, sorted by triangle count descending.
    /// 4. Greedily expand each seed by adding neighbors that maximize triangle gain with existing motif members.
    /// 5. Optional: enforce Rayleigh quotient on indicator ≤ `rayleigh_max` to keep motifs cohesive.
    /// 6. Deduplicate sets with Jaccard similarity ≥ `jaccard_dedup`.
    ///
    /// # Performance
    ///
    /// - Time: O(n · L²) for triangle enumeration, O(seeds · expansion) for greedy growth.
    /// - Space: O(n · L) for neighbor lists; no separate adjacency matrix.
    ///
    /// # References
    ///
    /// - Triangle-based clustering: <https://arxiv.org/abs/1606.06235>
    /// - Local clustering: <https://en.wikipedia.org/wiki/Clustering_coefficient>
    /// - Rayleigh quotient & cuts: MIT OCW, Cheeger inequality notes
    fn spot_motives_eigen(&self, cfg: &MotiveConfig) -> Vec<Vec<usize>>;

    /// EnergyMaps-aware motif spotting:
    /// 1) Spot motifs on the subcentroid Laplacian (self).
    /// 2) Map each subcentroid-set to original item indices via ArrowSpace.centroid_map.
    /// 3) Deduplicate and return item-index motifs.
    ///
    /// Requirements:
    /// - self.energy must be true (built via build_energy)
    /// - aspace.centroid_map must be Some(Vec<usize>) mapping item -> subcentroid index
    fn spot_motives_energy(
        &self,
        aspace: &crate::core::ArrowSpace,
        cfg: &crate::analysis::motives::MotiveConfig,
    ) -> Vec<Vec<usize>>;

    /// Check if a given set of nodes forms a clique in the graph.
    ///
    /// Returns `true` if all pairs in `set` are connected.
    fn is_clique(&self, set: &HashSet<usize>) -> bool;

    /// Compute the Rayleigh quotient R_L(1_S) = (1_S^T L 1_S) / (1_S^T 1_S) for an indicator vector of `set`.
    ///
    /// Low values indicate cohesive, low-boundary subgraphs.
    fn rayleigh_indicator(&self, set: &HashSet<usize>) -> f64;
}

// ──────────────────────────────────────────────────────────────────────────────
// Implementation for GraphLaplacian
// ──────────────────────────────────────────────────────────────────────────────

impl Motives for GraphLaplacian {
    fn spot_motives_eigen(&self, cfg: &MotiveConfig) -> Vec<Vec<usize>> {
        info!(
            "Spotting motifs: top_l={}, min_tri={}, min_clust={:.2}, max_size={}",
            cfg.top_l, cfg.min_triangles, cfg.min_clust, cfg.max_motif_size
        );

        let n = self.init_data.shape().0;

        // 1) Build top-L neighbor lists per node (parallel)
        let neigh: Vec<Vec<(usize, f64)>> = (0..n)
            .into_par_iter()
            .map(|i| {
                let mut nb: Vec<(usize, f64)> = self.neighbors_of(i);
                nb.sort_unstable_by(|a, b| {
                    b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
                });
                if nb.len() > cfg.top_l {
                    nb.truncate(cfg.top_l);
                }
                nb
            })
            .collect();

        // Sorted neighbor-only vectors for faster intersections
        let neigh_idx: Vec<Vec<usize>> = neigh
            .par_iter()
            .map(|v| {
                let mut ids: Vec<usize> = v.iter().map(|(j, _)| *j).collect();
                ids.sort_unstable();
                ids
            })
            .collect();

        // 2) Triangle stats (parallel per node)
        let (tri_count, clust) = triangle_stats_sorted(&neigh_idx, n);

        debug!(
            "Triangle stats: max_tri={}, max_clust={:.3}",
            tri_count.iter().max().unwrap_or(&0),
            clust.iter().cloned().fold(0.0f64, f64::max)
        );

        // 3) Seed selection and sorting (parallel filter, then sort)
        let mut seeds: Vec<usize> = (0..n)
            .into_par_iter()
            .filter(|&i| tri_count[i] >= cfg.min_triangles && clust[i] >= cfg.min_clust)
            .collect();
        seeds.par_sort_unstable_by_key(|&i| {
            std::cmp::Reverse((tri_count[i], (clust[i] * 1e6) as i64))
        });

        info!("Seeds identified: {}", seeds.len());
        debug!("Motives Seeds used: {:?}", seeds);

        // 4) Greedy expansions per seed in parallel, with local state
        let expansions: Vec<Option<HashSet<usize>>> = seeds
            .par_iter()
            .map(|&s| {
                // Skip seeds that are trivially dominated later during global dedup
                let mut seeds_hashset: HashSet<usize> = HashSet::from([s]);

                loop {
                    if seeds_hashset.len() >= cfg.max_motif_size {
                        break;
                    }

                    // Frontier
                    let mut cand = HashSet::new();
                    for &u in &seeds_hashset {
                        for &v in &neigh_idx[u] {
                            if !seeds_hashset.contains(&v) {
                                cand.insert(v);
                            }
                        }
                    }
                    if cand.is_empty() {
                        break;
                    }

                    // Select by triangle gain
                    let mut best_u: Option<usize> = None;
                    let mut best_gain: i64 = -1;

                    for u in cand {
                        // neighbors of u inside seeds_hashset
                        let mut s_nbrs: Vec<usize> = neigh_idx[u]
                            .iter()
                            .copied()
                            .filter(|v| seeds_hashset.contains(v))
                            .collect();
                        s_nbrs.sort_unstable();
                        let mut edges = 0i64;
                        for i in 0..s_nbrs.len() {
                            // count links among s_nbrs
                            let ui = s_nbrs[i];
                            // two-pointer count intersection between neigh_idx[ui] and s_nbrs[(i+1)..]
                            edges += count_edges_among(&neigh_idx[ui], &s_nbrs, i + 1) as i64;
                        }
                        if edges > best_gain {
                            best_gain = edges;
                            best_u = Some(u);
                        }
                    }

                    match best_u {
                        Some(u) => {
                            let mut s2 = seeds_hashset.clone();
                            s2.insert(u);
                            seeds_hashset = s2;
                        }
                        None => break,
                    }
                }

                if seeds_hashset.len() >= 3 {
                    Some(seeds_hashset)
                } else {
                    None
                }
            })
            .collect();

        // 5) Global Jaccard dedup (sequential for deterministic ordering)
        let mut results: Vec<HashSet<usize>> = Vec::new();
        for opt in expansions.into_iter().flatten() {
            let mut keep = true;
            for res in &results {
                if jaccard(&opt, res) >= cfg.jaccard_dedup {
                    keep = false;
                    break;
                }
            }
            if keep {
                results.push(opt);
                if results.len() >= cfg.max_sets {
                    break;
                }
            }
        }

        info!("Motifs found: {}", results.len());

        let mut out: Vec<Vec<usize>> = results
            .into_iter()
            .map(|res| {
                let mut v: Vec<usize> = res.into_iter().collect();
                v.sort_unstable();
                v
            })
            .collect();
        out.shrink_to_fit();
        out
    }

    fn spot_motives_energy(
        &self,
        aspace: &crate::core::ArrowSpace,
        cfg: &MotiveConfig,
    ) -> Vec<Vec<usize>> {
        // Operate strictly on the energy Laplacian over subcentroids
        let (rows, cols) = self.matrix.shape();
        if rows == 0 || rows != cols {
            return Vec::new();
        }
        let n_sc = rows;

        info!(
            "Spotting energy motifs: top_l={}, min_tri={}, min_clust={:.2}, max_size={}, n_sc={}",
            cfg.top_l, cfg.min_triangles, cfg.min_clust, cfg.max_motif_size, n_sc
        );

        // 1) Neighbors with clamped indices (parallel)
        let neigh_idx: Vec<Vec<usize>> = (0..n_sc)
            .into_par_iter()
            .map(|i| {
                let mut ids: Vec<usize> = self
                    .neighbors_of(i)
                    .into_iter()
                    .filter_map(|(j, w)| {
                        if j < n_sc && j != i && w > 0.0 {
                            Some(j)
                        } else {
                            None
                        }
                    })
                    .collect();
                ids.sort_unstable();
                if ids.len() > cfg.top_l {
                    ids.truncate(cfg.top_l);
                }
                ids
            })
            .collect();

        // 2) Triangle stats (parallel)
        let (tri_count, clust) = triangle_stats_sorted(&neigh_idx, n_sc);

        debug!(
            "Energy triangle stats: max_tri={}, max_clust={:.3}",
            tri_count.iter().copied().max().unwrap_or(0),
            clust.iter().cloned().fold(0.0f64, f64::max)
        );

        // 3) Seeds (parallel filter + deterministic sort)
        //
        // par_filter output order is scheduler-dependent; par_sort_unstable_by_key
        // can leave equal-scored seeds in arbitrary relative order because it is
        // unstable. We finish with a sequential sort_by_key so that seeds with
        // identical (tri_count, clust) scores always appear in ascending node-index
        // order, giving the greedy expansion in step 4 a fully reproducible input.
        let mut seeds: Vec<usize> = (0..n_sc)
            .into_par_iter()
            .filter(|&i| tri_count[i] >= cfg.min_triangles && clust[i] >= cfg.min_clust)
            .collect();

        // Primary key: triangle count descending + clustering coefficient descending.
        // Tie-breaker: node index ascending — deterministic across all runs.
        seeds.sort_by_key(|&i| {
            (
                std::cmp::Reverse(tri_count[i]),
                std::cmp::Reverse((clust[i] * 1e6) as i64),
                i, // ascending node index breaks all remaining ties
            )
        });

        debug!("Energy motifs seeds (subcentroids): {:?}", seeds);

        // 4) Parallel greedy expansions per seed in subcentroid space.
        //
        // Each expansion is independent, so par_iter is safe here.
        // The two sources of non-determinism fixed below are:
        //
        //   a) Candidate frontier built from HashSet iteration:
        //      `for &u in &seeds_hashset` and `for &v in &neigh_idx[u]` both iterated
        //      a HashSet, whose order is undefined. Two runs could produce different
        //      `cand` sets with different insertion orders, affecting which u wins
        //      when multiple candidates share the same best_gain.
        //      Fix: collect cand into a sorted Vec before the gain loop.
        //
        //   b) Tie-breaking in the gain loop:
        //      When two candidates share best_gain, `best_u` was last-write-wins
        //      over HashSet iteration order. After fix (a) the loop is ordered, so
        //      the first maximum found is always the lowest node index — stable.
        let expansions: Vec<Option<HashSet<usize>>> = seeds
            .par_iter()
            .map(|&s| {
                // Use a sorted Vec as the working set so `for &u in &seeds_vec`
                // always iterates in ascending node-index order.
                let mut seeds_vec: Vec<usize> = vec![s];

                loop {
                    if seeds_vec.len() >= cfg.max_motif_size {
                        break;
                    }

                    // Build frontier: neighbours of current set not yet in set.
                    // Collect into a HashSet first to deduplicate, then sort for
                    // deterministic iteration order in the gain loop below.
                    let seeds_set: HashSet<usize> = seeds_vec.iter().copied().collect();
                    let cand: Vec<usize> = {
                        let mut c = HashSet::new();
                        for &u in &seeds_vec {
                            for &v in &neigh_idx[u] {
                                if !seeds_set.contains(&v) {
                                    c.insert(v);
                                }
                            }
                        }
                        let mut v: Vec<usize> = c.into_iter().collect();
                        v.sort_unstable(); // deterministic candidate order
                        v
                    };

                    if cand.is_empty() {
                        break;
                    }

                    // Select candidate with highest triangle-gain.
                    // Iterating a sorted Vec means the first maximum encountered is
                    // always the lowest node index — fully deterministic tie-breaking.
                    let mut best_u: Option<usize> = None;
                    let mut best_gain: i64 = -1;

                    for &u in &cand {
                        let mut s_nbrs: Vec<usize> = neigh_idx[u]
                            .iter()
                            .copied()
                            .filter(|v| seeds_set.contains(v))
                            .collect();
                        s_nbrs.sort_unstable();
                        let mut edges = 0i64;
                        for i in 0..s_nbrs.len() {
                            let ui = s_nbrs[i];
                            edges += count_edges_among(&neigh_idx[ui], &s_nbrs, i + 1) as i64;
                        }
                        if edges > best_gain {
                            best_gain = edges;
                            best_u = Some(u);
                        }
                    }

                    match best_u {
                        Some(u) => {
                            seeds_vec.push(u);
                            seeds_vec.sort_unstable(); // keep working set sorted
                        }
                        None => break,
                    }
                }

                if seeds_vec.len() >= 3 {
                    Some(seeds_vec.iter().copied().collect::<HashSet<usize>>())
                } else {
                    None
                }
            })
            .collect();

        // 5) Global dedup in subcentroid space
        let mut sc_results: Vec<HashSet<usize>> = Vec::new();
        for opt in expansions.into_iter().flatten() {
            let mut keep = true;
            for res in &sc_results {
                if jaccard(&opt, res) >= cfg.jaccard_dedup {
                    keep = false;
                    break;
                }
            }
            if keep {
                sc_results.push(opt);
                if sc_results.len() >= cfg.max_sets {
                    break;
                }
            }
        }

        info!(
            "Energy motifs: {} subcentroid motifs found",
            sc_results.len()
        );

        // 6) Map to item indices via centroid_map (parallel)
        let cmap = match &aspace.centroid_map {
            Some(m) => m,
            None => {
                // Return subcentroid motifs if mapping not available
                let mut out_sc: Vec<Vec<usize>> = sc_results
                    .into_iter()
                    .map(|res| {
                        let mut v: Vec<usize> = res.into_iter().collect();
                        v.sort_unstable();
                        v
                    })
                    .collect();
                out_sc.shrink_to_fit();
                return out_sc;
            }
        };

        // sc_id -> items (parallel build with local buckets, then merge)
        cmap.par_iter().enumerate().for_each(|(_, &sc_idx)| {
            if sc_idx < n_sc {
                // local push via interior mutability avoided; collect pairs then group
                // fallback: lightweight locking-free grouping by preallocating pairs
            }
        });
        // Simpler and safe: collect pairs and group
        let sc_item_pairs: Vec<(usize, usize)> = cmap
            .par_iter()
            .enumerate()
            .filter_map(|(it, &sc)| if sc < n_sc { Some((sc, it)) } else { None })
            .collect();
        let mut sc_to_items: Vec<Vec<usize>> = vec![Vec::new(); n_sc];
        for (sc, it) in sc_item_pairs {
            sc_to_items[sc].push(it);
        }

        // Project each subcentroid motif to items (parallel)
        let item_sets: Vec<HashSet<usize>> = sc_results
            .par_iter()
            .map(|s_sc| {
                let mut s_items = HashSet::new();
                for &sc in s_sc {
                    for &it in &sc_to_items[sc] {
                        s_items.insert(it);
                    }
                }
                s_items
            })
            .filter(|s_items| s_items.len() >= 3)
            .collect();

        // 7) Final item-level dedup (sequential, fully deterministic)
        //
        // Non-determinism source: `item_sets` is produced by `sc_results.par_iter()`
        // in step 6, whose delivery order is scheduler-dependent. The sequential dedup
        // loop below is order-sensitive — different input orderings evict different
        // sets on Jaccard ties, changing the final count between runs.
        //
        // Fix: canonicalise every set into a sorted Vec<usize> and sort the whole
        // input slice before dedup. This gives the dedup loop a stable, reproducible
        // input regardless of how Rayon delivered the parallel results.

        // Canonicalise: HashSet → sorted Vec so both content and ordering are stable.
        let mut item_sets_sorted: Vec<Vec<usize>> = item_sets
            .into_iter()
            .map(|set| {
                let mut v: Vec<usize> = set.into_iter().collect();
                v.sort_unstable(); // canonical form for each set
                v
            })
            .collect();

        // Sort the collection itself so dedup always sees the same input order.
        // Lexicographic order on sorted Vecs is deterministic and cheap here
        // since item_sets_sorted is bounded by cfg.max_sets (typically ≤ 60).
        item_sets_sorted.sort_unstable();

        let mut deduped_items: Vec<Vec<usize>> = Vec::new();
        for item in item_sets_sorted {
            let item_set: HashSet<usize> = item.iter().copied().collect();
            let mut keep = true;
            for cmp in &deduped_items {
                let cmp_set: HashSet<usize> = cmp.iter().copied().collect();
                if jaccard(&item_set, &cmp_set) >= cfg.jaccard_dedup {
                    keep = false;
                    break;
                }
            }
            if keep {
                deduped_items.push(item); // already sorted, no re-sort needed
                if deduped_items.len() >= cfg.max_sets {
                    break;
                }
            }
        }

        info!(
            "Energy motifs: {} item-level motifs after mapping",
            deduped_items.len()
        );

        // Output vecs are already sorted from canonicalisation above.
        let mut out: Vec<Vec<usize>> = deduped_items;
        out.shrink_to_fit();
        out
    }

    fn is_clique(&self, set: &HashSet<usize>) -> bool {
        let sz = set.len();
        if sz < 2 {
            return false;
        }
        // Parallel short-circuit check
        let ok = set.par_iter().all(|&u| {
            let nbrs: HashSet<usize> = self.neighbors_of(u).iter().map(|(j, _)| *j).collect();
            let need = sz - 1;
            let have = nbrs.intersection(set).count();
            have == need
        });
        ok
    }

    /// unused: potential improvements using rayleigh energy boundaries
    fn rayleigh_indicator(&self, set: &HashSet<usize>) -> f64 {
        // Active computation space derived from the Laplacian itself
        let (rows, cols) = self.matrix.shape();
        if rows == 0 || rows != cols || set.is_empty() {
            return f64::INFINITY;
        }
        let n = rows;
        if set.iter().any(|&u| u >= n) {
            return f64::INFINITY;
        }
        let mut x = vec![0.0f64; n];
        for &i in set {
            x[i] = 1.0;
        }
        self.rayleigh_quotient(&x)
    }
}

// ──────────────────────────────────────────────────────────────────────────────
// Internal helpers (parallel-friendly)
// ──────────────────────────────────────────────────────────────────────────────

fn triangle_stats_sorted(neigh_idx: &[Vec<usize>], n: usize) -> (Vec<usize>, Vec<f64>) {
    // Count triangles per node by intersecting neighbor lists
    let tri_count: Vec<usize> = (0..n)
        .into_par_iter()
        .map(|i| {
            let nbrs_i = &neigh_idx[i];
            if nbrs_i.len() < 2 {
                return 0usize;
            }
            let mut t = 0usize;
            for &j in nbrs_i {
                if j <= i {
                    continue;
                }
                let nbrs_j = &neigh_idx[j];
                t += count_intersection(nbrs_i, nbrs_j, i, j);
            }
            t
        })
        .collect();

    // Local clustering per node in parallel
    let clust: Vec<f64> = (0..n)
        .into_par_iter()
        .map(|i| {
            let k = neigh_idx[i].len();
            if k >= 2 {
                (2.0 * tri_count[i] as f64) / ((k * (k - 1)) as f64)
            } else {
                0.0
            }
        })
        .collect();

    (tri_count, clust)
}

// Count common neighbors excluding i and j using two-pointer scan on sorted lists
#[inline]
fn count_intersection(a: &Vec<usize>, b: &Vec<usize>, i: usize, j: usize) -> usize {
    let mut x = 0usize;
    let (mut p, mut q) = (0usize, 0usize);
    while p < a.len() && q < b.len() {
        let va = a[p];
        let vb = b[q];
        if va == vb {
            if va != i && va != j {
                x += 1;
            }
            p += 1;
            q += 1;
        } else if va < vb {
            p += 1;
        } else {
            q += 1;
        }
    }
    x
}

// Count edges among s_nbrs after position start by intersecting with neigh(u)
#[inline]
fn count_edges_among(neigh_u: &Vec<usize>, s_nbrs: &Vec<usize>, start: usize) -> usize {
    let mut x = 0usize;
    let mut p = 0usize;
    let mut q = start;
    while p < neigh_u.len() && q < s_nbrs.len() {
        let va = neigh_u[p];
        let vb = s_nbrs[q];
        if va == vb {
            x += 1;
            p += 1;
            q += 1;
        } else if va < vb {
            p += 1;
        } else {
            q += 1;
        }
    }
    x
}

pub fn jaccard(a: &HashSet<usize>, b: &HashSet<usize>) -> f64 {
    let inter = a.intersection(b).count() as f64;
    let union = (a.len() + b.len()) as f64 - inter;
    if union == 0.0 { 0.0 } else { inter / union }
}