Skip to main content

reddb_server/storage/engine/
graph_algorithms.rs

1//! Pure graph algorithms operating on abstract node/edge inputs.
2//!
3//! This module is deliberately self-contained: it has NO dependency on
4//! catalog, storage, cluster, or `GraphStore` types. Callers (e.g. the query
5//! executor) materialize a node-id list plus weighted edges and call into
6//! these pure functions. This keeps the algorithms easy to test and reuse,
7//! and separate from the storage-coupled
8//! `crate::storage::engine::algorithms` helpers.
9
10use std::collections::{BTreeMap, BTreeSet};
11
12/// Edge weight type. The storage layer uses `f32` for edge weight, so we
13/// mirror that here. Weight is accepted for API compatibility; connectivity
14/// treats edges as unweighted/undirected.
15pub type Weight = f32;
16
17/// Compute connected components over an abstract graph.
18///
19/// Inputs:
20/// - `nodes`: the declared node universe.
21/// - `edges`: weighted edges `(src, dst, weight)`. Edges are treated as
22///   UNDIRECTED for connectivity. Edge endpoints not present in `nodes` are
23///   still included in the universe (`nodes ∪ all edge endpoints`).
24///
25/// Output: exactly one `(node, island_id)` pair per distinct node in the
26/// universe.
27///
28/// Determinism (guaranteed and tested):
29/// - The id universe is built from a `BTreeSet`, so it is sorted and deduped.
30/// - Union-find unions toward the smaller index, so each component's
31///   representative is its smallest node.
32/// - `island_id`s are assigned in ascending order of each component's
33///   smallest node, yielding `0, 1, 2, ...`.
34/// - Output is ordered by node ascending.
35///
36/// Identical input always produces identical output.
37pub fn connected_components<N: Clone + Ord>(
38    nodes: &[N],
39    edges: &[(N, N, Weight)],
40) -> Vec<(N, usize)> {
41    // Build the deduplicated, sorted node universe (nodes ∪ edge endpoints).
42    let mut universe: BTreeSet<N> = BTreeSet::new();
43    for n in nodes {
44        universe.insert(n.clone());
45    }
46    for (a, b, _w) in edges {
47        universe.insert(a.clone());
48        universe.insert(b.clone());
49    }
50
51    // Stable index for each node (ascending order via BTreeSet iteration).
52    let ordered: Vec<N> = universe.into_iter().collect();
53    let mut index_of: BTreeMap<&N, usize> = BTreeMap::new();
54    for (i, n) in ordered.iter().enumerate() {
55        index_of.insert(n, i);
56    }
57
58    // Union-find parent array. Representative is always the smaller index,
59    // i.e. the component's smallest node.
60    let mut parent: Vec<usize> = (0..ordered.len()).collect();
61
62    fn find(parent: &mut [usize], mut x: usize) -> usize {
63        // Path compression.
64        while parent[x] != x {
65            parent[x] = parent[parent[x]];
66            x = parent[x];
67        }
68        x
69    }
70
71    for (a, b, _w) in edges {
72        let ia = index_of[a];
73        let ib = index_of[b];
74        let ra = find(&mut parent, ia);
75        let rb = find(&mut parent, ib);
76        if ra != rb {
77            // Union toward the smaller index so the representative is the
78            // component's smallest node.
79            let (lo, hi) = if ra < rb { (ra, rb) } else { (rb, ra) };
80            parent[hi] = lo;
81        }
82    }
83
84    // Assign island ids by ascending order of each component's representative
85    // (which is its smallest node). Iterating nodes ascending and assigning a
86    // fresh id the first time a representative is seen yields 0,1,2,... in
87    // ascending-smallest-node order.
88    let mut island_of_root: BTreeMap<usize, usize> = BTreeMap::new();
89    let mut next_island: usize = 0;
90    let mut result: Vec<(N, usize)> = Vec::with_capacity(ordered.len());
91    for (i, n) in ordered.iter().enumerate() {
92        let root = find(&mut parent, i);
93        let island = *island_of_root.entry(root).or_insert_with(|| {
94            let id = next_island;
95            next_island += 1;
96            id
97        });
98        result.push((n.clone(), island));
99    }
100
101    result
102}
103
104/// Build the deduplicated, sorted node universe (nodes ∪ edge endpoints)
105/// plus a stable ascending index for each node. Shared by the algorithms so
106/// node identity → integer index is computed identically everywhere.
107fn node_universe<N: Clone + Ord>(nodes: &[N], edges: &[(N, N, Weight)]) -> Vec<N> {
108    let mut universe: BTreeSet<N> = BTreeSet::new();
109    for n in nodes {
110        universe.insert(n.clone());
111    }
112    for (a, b, _w) in edges {
113        universe.insert(a.clone());
114        universe.insert(b.clone());
115    }
116    universe.into_iter().collect()
117}
118
119/// Modularity Q of a partition over an undirected weighted graph.
120///
121/// `comm[i]` is the community id of the node at index `i` (indices match the
122/// ascending node universe). `resolution` (γ) scales the null-model term; the
123/// classic modularity uses γ = 1.0.
124///
125/// Q = Σ_C [ W_in(C)/m − γ·(Σ_tot(C)/2m)² ], where W_in(C) is the total weight
126/// of edges with both endpoints in C (self-loops included once), Σ_tot(C) is
127/// the summed degree of C, and m is the total edge weight. Returns 0.0 for an
128/// edgeless graph (m = 0), which is the modularity of every partition there.
129fn modularity_of(
130    n: usize,
131    adj: &[Vec<(usize, f64)>],
132    selfloop: &[f64],
133    degree: &[f64],
134    m: f64,
135    comm: &[usize],
136    resolution: f64,
137) -> f64 {
138    if m <= 0.0 {
139        return 0.0;
140    }
141    let two_m = 2.0 * m;
142    // W_in per community: sum edge weights whose endpoints share a community.
143    // Each undirected non-self edge (i, j) appears once in i's adjacency and
144    // once in j's, so summing over adjacency and halving counts it once; the
145    // self-loop weight is added separately (counted once).
146    let mut w_in: BTreeMap<usize, f64> = BTreeMap::new();
147    let mut tot: BTreeMap<usize, f64> = BTreeMap::new();
148    for i in 0..n {
149        *tot.entry(comm[i]).or_insert(0.0) += degree[i];
150        if selfloop[i] != 0.0 {
151            *w_in.entry(comm[i]).or_insert(0.0) += selfloop[i];
152        }
153        for &(j, w) in &adj[i] {
154            if comm[i] == comm[j] {
155                *w_in.entry(comm[i]).or_insert(0.0) += w / 2.0;
156            }
157        }
158    }
159    let mut q = 0.0;
160    for (c, &win) in &w_in {
161        let st = *tot.get(c).unwrap_or(&0.0);
162        q += win / m - resolution * (st / two_m) * (st / two_m);
163    }
164    // Communities with no internal weight still contribute the null term.
165    for (c, &st) in &tot {
166        if !w_in.contains_key(c) {
167            q += -resolution * (st / two_m) * (st / two_m);
168        }
169    }
170    q
171}
172
173/// A level in the Louvain hierarchy: an undirected weighted graph over
174/// `0..n` super-nodes with per-node self-loop weight, degree, and total
175/// edge weight. `adj[i]` lists `(neighbour, weight)` for non-self edges only.
176struct Level {
177    n: usize,
178    adj: Vec<Vec<(usize, f64)>>,
179    selfloop: Vec<f64>,
180    degree: Vec<f64>,
181    m: f64,
182}
183
184impl Level {
185    /// Build the base level directly from the abstract graph using the
186    /// supplied node universe for index assignment. Parallel edges are summed;
187    /// `(a, a, w)` is a self-loop. Edges are undirected.
188    fn base<N: Clone + Ord>(ordered: &[N], edges: &[(N, N, Weight)]) -> Self {
189        let n = ordered.len();
190        let mut index_of: BTreeMap<&N, usize> = BTreeMap::new();
191        for (i, node) in ordered.iter().enumerate() {
192            index_of.insert(node, i);
193        }
194        // Accumulate undirected weights between distinct index pairs, and
195        // self-loops separately, so parallel edges merge deterministically.
196        let mut pair_weight: BTreeMap<(usize, usize), f64> = BTreeMap::new();
197        let mut selfloop = vec![0.0f64; n];
198        for (a, b, w) in edges {
199            let ia = index_of[a];
200            let ib = index_of[b];
201            let w = *w as f64;
202            if ia == ib {
203                selfloop[ia] += w;
204            } else {
205                let key = if ia < ib { (ia, ib) } else { (ib, ia) };
206                *pair_weight.entry(key).or_insert(0.0) += w;
207            }
208        }
209        Self::from_parts(n, pair_weight, selfloop)
210    }
211
212    /// Assemble a level from summed undirected pair weights and self-loops.
213    fn from_parts(
214        n: usize,
215        pair_weight: BTreeMap<(usize, usize), f64>,
216        selfloop: Vec<f64>,
217    ) -> Self {
218        let mut adj: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
219        let mut degree = vec![0.0f64; n];
220        let mut m = 0.0;
221        for (&(lo, hi), &w) in &pair_weight {
222            adj[lo].push((hi, w));
223            adj[hi].push((lo, w));
224            degree[lo] += w;
225            degree[hi] += w;
226            m += w;
227        }
228        for (i, &sl) in selfloop.iter().enumerate() {
229            // A self-loop contributes 2·w to the node degree (A_ii = 2w
230            // convention) and w to the total edge weight m, keeping
231            // Σ_i degree[i] = 2m across aggregation levels.
232            degree[i] += 2.0 * sl;
233            m += sl;
234        }
235        // Neighbour lists are built by ascending pair key, so each adjacency
236        // list is already sorted by neighbour index — relied on for
237        // deterministic community iteration during local moving.
238        Level {
239            n,
240            adj,
241            selfloop,
242            degree,
243            m,
244        }
245    }
246
247    /// One pass of local moving (Louvain phase 1). Each node starts in its own
248    /// community; repeatedly move nodes into the neighbouring community with
249    /// the greatest modularity gain until a full sweep moves nothing. Returns
250    /// the community label per node. Deterministic: nodes are swept in index
251    /// order, candidate communities in ascending neighbour order, ties keep
252    /// the current community / smallest community id.
253    fn local_moving(&self, resolution: f64) -> Vec<usize> {
254        let mut comm: Vec<usize> = (0..self.n).collect();
255        let mut sigma_tot: Vec<f64> = self.degree.clone();
256        if self.m <= 0.0 {
257            return comm; // no edges: every node is its own community.
258        }
259        let two_m = 2.0 * self.m;
260        loop {
261            let mut improved = false;
262            for i in 0..self.n {
263                let ci = comm[i];
264                // Weight from i to each neighbouring community (excludes the
265                // self-loop, which never moves between communities). BTreeMap
266                // keeps candidate communities in ascending id order.
267                let mut w_to: BTreeMap<usize, f64> = BTreeMap::new();
268                for &(j, w) in &self.adj[i] {
269                    *w_to.entry(comm[j]).or_insert(0.0) += w;
270                }
271                // Remove i from its community before scoring candidates.
272                sigma_tot[ci] -= self.degree[i];
273                let ki = self.degree[i];
274                // Gain of (re)joining community c, dropping the constant term:
275                //   ΔQ ∝ w_to[c] − γ·Σ_tot[c]·k_i / 2m.
276                let gain = |c: usize| -> f64 {
277                    w_to.get(&c).copied().unwrap_or(0.0) - resolution * sigma_tot[c] * ki / two_m
278                };
279                let mut best_comm = ci;
280                let mut best_gain = gain(ci);
281                for &c in w_to.keys() {
282                    let g = gain(c);
283                    if g > best_gain {
284                        best_gain = g;
285                        best_comm = c;
286                    }
287                }
288                sigma_tot[best_comm] += self.degree[i];
289                if best_comm != ci {
290                    comm[i] = best_comm;
291                    improved = true;
292                }
293            }
294            if !improved {
295                break;
296            }
297        }
298        comm
299    }
300
301    /// Aggregate communities into a coarser level: every community becomes one
302    /// super-node, intra-community weight folds into its self-loop, and
303    /// inter-community weight becomes edges between super-nodes. Returns the
304    /// coarser level plus the dense renumbering `community id → super-node id`
305    /// (assigned in ascending community-id order).
306    fn aggregate(&self, comm: &[usize]) -> (Level, BTreeMap<usize, usize>) {
307        let mut renumber: BTreeMap<usize, usize> = BTreeMap::new();
308        for &c in comm {
309            let next = renumber.len();
310            renumber.entry(c).or_insert(next);
311        }
312        let nc = renumber.len();
313        let mut pair_weight: BTreeMap<(usize, usize), f64> = BTreeMap::new();
314        let mut selfloop = vec![0.0f64; nc];
315        // Existing self-loops stay internal to their community.
316        for i in 0..self.n {
317            if self.selfloop[i] != 0.0 {
318                selfloop[renumber[&comm[i]]] += self.selfloop[i];
319            }
320        }
321        // Each undirected non-self edge appears twice across adjacency lists,
322        // so halve the accumulated weight.
323        for i in 0..self.n {
324            let ci = renumber[&comm[i]];
325            for &(j, w) in &self.adj[i] {
326                let cj = renumber[&comm[j]];
327                if ci == cj {
328                    selfloop[ci] += w / 2.0;
329                } else {
330                    let key = if ci < cj { (ci, cj) } else { (cj, ci) };
331                    *pair_weight.entry(key).or_insert(0.0) += w / 2.0;
332                }
333            }
334        }
335        (Self::from_parts(nc, pair_weight, selfloop), renumber)
336    }
337}
338
339/// Detect communities with the Louvain modularity-maximisation algorithm over
340/// an abstract undirected weighted graph.
341///
342/// Inputs mirror [`connected_components`]:
343/// - `nodes`: the declared node universe.
344/// - `edges`: weighted edges `(src, dst, weight)`, treated as UNDIRECTED.
345///   Endpoints not present in `nodes` are still included in the universe.
346/// - `resolution` (γ): the modularity resolution parameter (default 1.0 at the
347///   call site). Higher values yield more, smaller communities.
348///
349/// Output: exactly one `(node, community_id)` pair per distinct node in the
350/// universe, ordered by node ascending.
351///
352/// Determinism (guaranteed and tested):
353/// - The node universe is a sorted, deduped `BTreeSet`, giving stable indices.
354/// - Local moving sweeps nodes in index order, considers candidate communities
355///   in ascending id order, and breaks ties toward the current / smallest
356///   community, so the optimisation path is fully reproducible.
357/// - `community_id`s are assigned in ascending order of each community's
358///   smallest node, yielding `0, 1, 2, …` — the same labelling scheme as
359///   `connected_components`.
360///
361/// Identical `(nodes, edges, resolution)` always produces identical output.
362pub fn louvain<N: Clone + Ord>(
363    nodes: &[N],
364    edges: &[(N, N, Weight)],
365    resolution: f64,
366) -> Vec<(N, usize)> {
367    let ordered = node_universe(nodes, edges);
368    let n = ordered.len();
369    if n == 0 {
370        return Vec::new();
371    }
372
373    let base = Level::base(&ordered, edges);
374    // `membership[i]` tracks the current community of base node i across
375    // aggregation levels. Initially each node is its own community.
376    let mut membership: Vec<usize> = (0..n).collect();
377    let mut level = base;
378
379    loop {
380        let comm = level.local_moving(resolution);
381        // Project this level's partition back onto the base nodes.
382        let (next_level, renumber) = level.aggregate(&comm);
383        for c in membership.iter_mut() {
384            *c = renumber[&comm[*c]];
385        }
386        // Converged when local moving collapses nothing further: the number of
387        // communities equals the number of nodes at this level.
388        if next_level.n == level.n {
389            break;
390        }
391        level = next_level;
392    }
393
394    // Relabel communities 0,1,2,… in ascending order of each community's
395    // smallest base-node index, matching the connected_components convention.
396    let mut label_of: BTreeMap<usize, usize> = BTreeMap::new();
397    let mut next_label = 0usize;
398    let mut result: Vec<(N, usize)> = Vec::with_capacity(n);
399    for (i, node) in ordered.iter().enumerate() {
400        let label = *label_of.entry(membership[i]).or_insert_with(|| {
401            let id = next_label;
402            next_label += 1;
403            id
404        });
405        result.push((node.clone(), label));
406    }
407    result
408}
409
410/// Public modularity helper over the abstract graph for a given node→community
411/// labelling (as produced by [`louvain`]). Exposed for tests and callers that
412/// want to score a partition. Edges are treated as undirected; `resolution` is
413/// the γ parameter. Returns 0.0 for an edgeless graph.
414pub fn modularity<N: Clone + Ord>(
415    nodes: &[N],
416    edges: &[(N, N, Weight)],
417    assignment: &[(N, usize)],
418    resolution: f64,
419) -> f64 {
420    let ordered = node_universe(nodes, edges);
421    let n = ordered.len();
422    if n == 0 {
423        return 0.0;
424    }
425    let mut index_of: BTreeMap<&N, usize> = BTreeMap::new();
426    for (i, node) in ordered.iter().enumerate() {
427        index_of.insert(node, i);
428    }
429    let level = Level::base(&ordered, edges);
430    let assigned: BTreeMap<N, usize> = assignment.iter().cloned().collect();
431    let mut comm = vec![0usize; n];
432    for (node, &i) in &index_of {
433        comm[i] = *assigned.get(node).unwrap_or(&i);
434    }
435    modularity_of(
436        n,
437        &level.adj,
438        &level.selfloop,
439        &level.degree,
440        level.m,
441        &comm,
442        resolution,
443    )
444}
445
446/// Single-source single-target shortest path over an abstract undirected
447/// weighted graph.
448///
449/// Inputs mirror [`connected_components`] / [`louvain`]:
450/// - `nodes`: the declared node universe.
451/// - `edges`: weighted edges `(src, dst, weight)`, treated as UNDIRECTED.
452///   Endpoints not present in `nodes` are still included in the universe.
453///   Parallel edges between the same pair collapse to their MINIMUM weight;
454///   self-loops are ignored (they never shorten a path). Edge weights are
455///   assumed NON-NEGATIVE (the storage layer's `f32` weights are); the
456///   shortest-path / triangle-inequality guarantees below hold under that
457///   assumption.
458/// - `src`, `dst`: the path endpoints. Either absent from the universe yields
459///   `None`.
460/// - `max_hops`: an optional cap on the number of EDGES in the path. `None`
461///   means unbounded. The result is the minimum-weight path using at most
462///   `max_hops` edges; if no such path exists, `None`.
463///
464/// Output: `Some(path)` where `path` is the ordered sequence of
465/// `(node, cumulative_weight)` from `src` to `dst` (the first entry is
466/// `(src, 0.0)`, the hop index is the position in the vector, and the last
467/// entry's weight is the total path weight). `None` when `dst` is unreachable
468/// from `src` (within the hop budget) — the executor maps this to an EMPTY
469/// result set, never an error. A zero-length path (`src == dst`) returns the
470/// single entry `(src, 0.0)`.
471///
472/// Algorithm: a hop-limited Bellman-Ford relaxation. Each round relaxes every
473/// (undirected) edge against the previous round's distances, so after `k`
474/// rounds `dist[v]` is the shortest distance to `v` using at most `k` edges.
475/// With `max_hops = None` we run `n - 1` rounds, which suffices for the true
476/// shortest path because — with non-negative weights — a shortest path is
477/// simple and therefore uses at most `n - 1` edges. O(rounds · E).
478///
479/// Determinism (guaranteed and tested): the node universe is a sorted, deduped
480/// `BTreeSet` giving stable indices; relaxation sweeps nodes in index order and
481/// neighbours in ascending index order; ties keep the lower-index predecessor.
482/// Identical `(nodes, edges, src, dst, max_hops)` always produces identical
483/// output. On an undirected graph the distance is symmetric:
484/// `shortest_path(.., a, b, ..)` and `shortest_path(.., b, a, ..)` have equal
485/// total weight.
486pub fn shortest_path<N: Clone + Ord>(
487    nodes: &[N],
488    edges: &[(N, N, Weight)],
489    src: &N,
490    dst: &N,
491    max_hops: Option<usize>,
492) -> Option<Vec<(N, f64)>> {
493    let ordered = node_universe(nodes, edges);
494    let n = ordered.len();
495    if n == 0 {
496        return None;
497    }
498    let mut index_of: BTreeMap<&N, usize> = BTreeMap::new();
499    for (i, node) in ordered.iter().enumerate() {
500        index_of.insert(node, i);
501    }
502    let si = match index_of.get(src) {
503        Some(&i) => i,
504        None => return None,
505    };
506    let di = match index_of.get(dst) {
507        Some(&i) => i,
508        None => return None,
509    };
510
511    // Undirected adjacency keeping the minimum weight between each pair, so
512    // parallel edges collapse deterministically and self-loops are dropped.
513    let mut adj: Vec<BTreeMap<usize, f64>> = vec![BTreeMap::new(); n];
514    for (a, b, w) in edges {
515        let ia = index_of[a];
516        let ib = index_of[b];
517        if ia == ib {
518            continue; // self-loop: never part of a shortest simple path.
519        }
520        let w = *w as f64;
521        let e = adj[ia].entry(ib).or_insert(f64::INFINITY);
522        if w < *e {
523            *e = w;
524        }
525        let e = adj[ib].entry(ia).or_insert(f64::INFINITY);
526        if w < *e {
527            *e = w;
528        }
529    }
530
531    // Hop-limited Bellman-Ford. `rounds` bounds the path length in edges:
532    // `max_hops` when provided, else `n - 1` (enough for any simple path).
533    let rounds = max_hops.unwrap_or(n.saturating_sub(1));
534    let mut dist = vec![f64::INFINITY; n];
535    dist[si] = 0.0;
536    let mut pred: Vec<Option<usize>> = vec![None; n];
537    for _ in 0..rounds {
538        // Relax against the previous round's distances (the clone) so each
539        // round extends every path by at most one edge.
540        let mut next = dist.clone();
541        let mut changed = false;
542        for u in 0..n {
543            if !dist[u].is_finite() {
544                continue;
545            }
546            for (&v, &w) in &adj[u] {
547                let cand = dist[u] + w;
548                if cand < next[v] {
549                    next[v] = cand;
550                    pred[v] = Some(u);
551                    changed = true;
552                }
553            }
554        }
555        dist = next;
556        if !changed {
557            break; // converged early — fewer hops than the budget sufficed.
558        }
559    }
560
561    if !dist[di].is_finite() {
562        return None; // unreachable within the hop budget.
563    }
564
565    // Reconstruct the path dst -> src via predecessors. The cycle guard is
566    // defensive: with non-negative weights each predecessor step is to a
567    // strictly-or-equally smaller distance and the chain terminates at src.
568    let mut path_idx = Vec::new();
569    let mut seen = BTreeSet::new();
570    let mut cur = di;
571    loop {
572        if !seen.insert(cur) {
573            return None; // unexpected cycle — bail rather than loop forever.
574        }
575        path_idx.push(cur);
576        if cur == si {
577            break;
578        }
579        match pred[cur] {
580            Some(p) => cur = p,
581            None => return None, // no predecessor yet not src: unreachable.
582        }
583    }
584    path_idx.reverse();
585
586    // Cumulative weight along the reconstructed path (per-hop running sum of
587    // the min pair weights), so the final entry equals `dist[dst]`.
588    let mut result = Vec::with_capacity(path_idx.len());
589    let mut cum = 0.0;
590    for (i, &idx) in path_idx.iter().enumerate() {
591        if i > 0 {
592            let prev = path_idx[i - 1];
593            cum += adj[prev][&idx];
594        }
595        result.push((ordered[idx].clone(), cum));
596    }
597    Some(result)
598}
599
600/// Build an undirected adjacency list indexed by the ascending node universe.
601///
602/// `adj[i]` lists each neighbour index of node `i`, sorted ascending and
603/// deduplicated (parallel edges collapse; self-loops are dropped — they never
604/// affect betweenness shortest paths and would distort degree counts). Edge
605/// weights are ignored: the centrality family below treats the graph as
606/// unweighted and undirected, matching `connected_components`. Returns the node
607/// universe alongside the adjacency so callers share one index assignment.
608fn undirected_adjacency<N: Clone + Ord>(
609    nodes: &[N],
610    edges: &[(N, N, Weight)],
611) -> (Vec<N>, Vec<Vec<usize>>) {
612    let ordered = node_universe(nodes, edges);
613    let n = ordered.len();
614    let mut index_of: BTreeMap<&N, usize> = BTreeMap::new();
615    for (i, node) in ordered.iter().enumerate() {
616        index_of.insert(node, i);
617    }
618    // BTreeSet per node keeps neighbours ascending and deduplicated, so BFS /
619    // matrix sweeps visit them in a fully reproducible order.
620    let mut sets: Vec<BTreeSet<usize>> = vec![BTreeSet::new(); n];
621    for (a, b, _w) in edges {
622        let ia = index_of[a];
623        let ib = index_of[b];
624        if ia == ib {
625            continue; // self-loop: irrelevant to undirected centrality.
626        }
627        sets[ia].insert(ib);
628        sets[ib].insert(ia);
629    }
630    let adj: Vec<Vec<usize>> = sets.into_iter().map(|s| s.into_iter().collect()).collect();
631    (ordered, adj)
632}
633
634/// Betweenness centrality over an abstract undirected, unweighted graph using
635/// Brandes' algorithm.
636///
637/// Inputs mirror [`connected_components`]: `nodes` declares the universe and
638/// `edges` are treated as UNDIRECTED (weights ignored; endpoints absent from
639/// `nodes` still join the universe). The score of node `v` is the number of
640/// shortest paths between all other unordered pairs `{s, t}` that pass through
641/// `v`. Each unordered pair is counted once (the raw directed accumulation is
642/// halved), so an isolated node and both endpoints of a bridge score `0.0`.
643///
644/// Output: one `(node, score)` pair per distinct node, ordered by node
645/// ascending.
646///
647/// Determinism (guaranteed and tested): the node universe is a sorted, deduped
648/// `BTreeSet`; BFS explores neighbours in ascending index order; the
649/// accumulation visits vertices in reverse-BFS-distance order with stable
650/// per-level ordering. Identical input always produces identical output.
651pub fn betweenness<N: Clone + Ord>(nodes: &[N], edges: &[(N, N, Weight)]) -> Vec<(N, f64)> {
652    let (ordered, adj) = undirected_adjacency(nodes, edges);
653    let n = ordered.len();
654    let mut cb = vec![0.0f64; n];
655
656    for s in 0..n {
657        // Single-source shortest-path counting (unweighted -> BFS).
658        let mut stack: Vec<usize> = Vec::with_capacity(n);
659        let mut preds: Vec<Vec<usize>> = vec![Vec::new(); n];
660        let mut sigma = vec![0.0f64; n]; // # shortest s->v paths.
661        let mut dist = vec![-1i64; n];
662        sigma[s] = 1.0;
663        dist[s] = 0;
664        let mut queue: std::collections::VecDeque<usize> = std::collections::VecDeque::new();
665        queue.push_back(s);
666        while let Some(v) = queue.pop_front() {
667            stack.push(v);
668            for &w in &adj[v] {
669                // First time we reach w: record its distance and enqueue.
670                if dist[w] < 0 {
671                    dist[w] = dist[v] + 1;
672                    queue.push_back(w);
673                }
674                // w found on a shortest path via v.
675                if dist[w] == dist[v] + 1 {
676                    sigma[w] += sigma[v];
677                    preds[w].push(v);
678                }
679            }
680        }
681        // Back-propagation of dependencies (Brandes accumulation).
682        let mut delta = vec![0.0f64; n];
683        while let Some(w) = stack.pop() {
684            for &v in &preds[w] {
685                delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]);
686            }
687            if w != s {
688                cb[w] += delta[w];
689            }
690        }
691    }
692
693    // Undirected graphs count every unordered pair twice in the directed
694    // accumulation above, so halve to recover the conventional score.
695    let mut result: Vec<(N, f64)> = Vec::with_capacity(n);
696    for (i, node) in ordered.iter().enumerate() {
697        result.push((node.clone(), cb[i] / 2.0));
698    }
699    result
700}
701
702/// Eigenvector centrality over an abstract undirected, unweighted graph via
703/// power iteration on the adjacency matrix.
704///
705/// `max_iterations` caps the power-iteration sweeps; `tolerance` is the L1
706/// convergence threshold on successive (normalised) iterates. The returned
707/// vector is L2-normalised, so every score lies in `[0, 1]` and the
708/// Perron–Frobenius principal eigenvector is non-negative. An isolated node
709/// (no incident edges) scores `0.0` whenever the graph has any edges; an
710/// edgeless graph has no well-defined dominant eigenvector, so every node gets
711/// the uniform value `1/√n` (still L2-normalised).
712///
713/// Output: one `(node, score)` pair per distinct node, ordered by node
714/// ascending. Deterministic: fixed uniform start vector, ascending sweep order,
715/// sign fixed non-negative by construction.
716pub fn eigenvector<N: Clone + Ord>(
717    nodes: &[N],
718    edges: &[(N, N, Weight)],
719    max_iterations: usize,
720    tolerance: f64,
721) -> Vec<(N, f64)> {
722    let (ordered, adj) = undirected_adjacency(nodes, edges);
723    let n = ordered.len();
724    if n == 0 {
725        return Vec::new();
726    }
727    let has_edges = adj.iter().any(|row| !row.is_empty());
728    let mut x = vec![1.0f64 / (n as f64).sqrt(); n];
729    if !has_edges {
730        // No dominant eigenvector: fall back to the uniform unit vector.
731        return ordered.into_iter().zip(x).collect();
732    }
733
734    for _ in 0..max_iterations {
735        // y = (I + A)·x. The identity shift keeps the same principal
736        // eigenvector as A while making every eigenvalue positive, so power
737        // iteration converges on BIPARTITE graphs too (plain A·x oscillates
738        // there because its spectrum is symmetric about zero). Connected nodes
739        // grow by the dominant factor each sweep while isolated nodes grow by
740        // 1, so after normalisation isolated nodes still decay to 0.
741        let mut y = vec![0.0f64; n];
742        for i in 0..n {
743            let mut acc = x[i];
744            for &j in &adj[i] {
745                acc += x[j];
746            }
747            y[i] = acc;
748        }
749        // Normalise to unit L2 norm; non-negative start keeps the sign fixed.
750        let norm: f64 = y.iter().map(|v| v * v).sum::<f64>().sqrt();
751        if norm == 0.0 {
752            break;
753        }
754        for v in y.iter_mut() {
755            *v /= norm;
756        }
757        let diff: f64 = x.iter().zip(&y).map(|(a, b)| (a - b).abs()).sum();
758        x = y;
759        if diff < tolerance {
760            break;
761        }
762    }
763
764    ordered.into_iter().zip(x).collect()
765}
766
767/// PageRank over an abstract undirected, unweighted graph.
768///
769/// Edges are treated as UNDIRECTED — each edge contributes an out-link in both
770/// directions. `damping` is the classic teleport factor (0.85 at the call
771/// site); `max_iterations` caps the sweeps. Rank mass from dangling nodes
772/// (degree 0) is redistributed uniformly each iteration, so the returned scores
773/// always sum to `1.0`. Every score is strictly positive (≥ the teleport floor
774/// `(1−damping)/n`), so an isolated node receives exactly that boundary share.
775///
776/// Output: one `(node, score)` pair per distinct node, ordered by node
777/// ascending. Deterministic: fixed uniform start, ascending sweep order, fixed
778/// iteration cap. Identical input always produces identical output.
779pub fn pagerank<N: Clone + Ord>(
780    nodes: &[N],
781    edges: &[(N, N, Weight)],
782    damping: f64,
783    max_iterations: usize,
784) -> Vec<(N, f64)> {
785    let (ordered, adj) = undirected_adjacency(nodes, edges);
786    let n = ordered.len();
787    if n == 0 {
788        return Vec::new();
789    }
790    let nf = n as f64;
791    let degree: Vec<f64> = adj.iter().map(|row| row.len() as f64).collect();
792    let mut rank = vec![1.0f64 / nf; n];
793    let teleport = (1.0 - damping) / nf;
794
795    for _ in 0..max_iterations {
796        // Mass held by dangling (degree-0) nodes is spread uniformly.
797        let dangling: f64 = (0..n).filter(|&i| degree[i] == 0.0).map(|i| rank[i]).sum();
798        let mut next = vec![teleport + damping * dangling / nf; n];
799        for i in 0..n {
800            if degree[i] == 0.0 {
801                continue;
802            }
803            let share = damping * rank[i] / degree[i];
804            for &j in &adj[i] {
805                next[j] += share;
806            }
807        }
808        rank = next;
809    }
810
811    // Renormalise to defend against floating-point drift so scores sum to 1.0.
812    let total: f64 = rank.iter().sum();
813    if total > 0.0 {
814        for r in rank.iter_mut() {
815            *r /= total;
816        }
817    }
818    ordered.into_iter().zip(rank).collect()
819}
820
821/// Deterministic 2D spectral layout over an abstract undirected, unweighted
822/// graph. Returns one `(node, (x, y))` pair per distinct node with both
823/// coordinates min–max normalised to `[0, 1]`.
824///
825/// The coordinates are the two smallest non-trivial eigenvectors of the graph
826/// Laplacian `L = D − A` (the Fiedler vector and the next one) — the classic
827/// spectral-drawing layout: graph-adjacent nodes land near each other and
828/// disconnected components separate cleanly, so a client force-directed layout
829/// seeded from these hints converges faster and to a stable arrangement. The
830/// hint is purely advisory; callers may seed their own layout with it or ignore
831/// it entirely.
832///
833/// Method: shifted, deflated power iteration. We iterate on `M = cI − L` (whose
834/// DOMINANT eigenvectors are `L`'s SMALLEST), deflating the trivial constant
835/// mode (`L`-eigenvalue 0) and then each found coordinate via Gram–Schmidt, so
836/// successive sweeps recover the next Laplacian eigenvector. `max_iterations`
837/// caps the sweeps per coordinate; `tolerance` is the L1 convergence threshold
838/// on successive normalised iterates.
839///
840/// Determinism (guaranteed and tested): the node universe is a sorted, deduped
841/// `BTreeSet`; the start vectors are fixed index-polynomial seeds (no random
842/// initialisation); sweeps run in ascending index order with a fixed cap. The
843/// eigenvector sign — mathematically arbitrary — is pinned by the fixed seed,
844/// so identical input always produces identical coordinates. An isolated node
845/// or an edgeless / single-node graph still yields finite, deterministic hints
846/// (a degenerate coordinate collapses to `0.5`).
847pub fn spectral_embedding<N: Clone + Ord>(
848    nodes: &[N],
849    edges: &[(N, N, Weight)],
850    max_iterations: usize,
851    tolerance: f64,
852) -> Vec<(N, (f64, f64))> {
853    let (ordered, adj) = undirected_adjacency(nodes, edges);
854    let n = ordered.len();
855    if n == 0 {
856        return Vec::new();
857    }
858    if n == 1 {
859        // A single point has no spread; place it at the centre.
860        return ordered.into_iter().map(|node| (node, (0.5, 0.5))).collect();
861    }
862
863    let degree: Vec<f64> = adj.iter().map(|row| row.len() as f64).collect();
864    // The largest Laplacian eigenvalue is bounded by 2·max_degree (Gershgorin),
865    // so shifting by c just above that makes M = cI − L positive definite and
866    // its dominant eigenvectors L's smallest. +1 guarantees a strict spectral
867    // gap and a valid shift even for an edgeless graph (max_degree = 0).
868    let max_degree = degree.iter().cloned().fold(0.0_f64, f64::max);
869    let c = 2.0 * max_degree + 1.0;
870
871    // (M·v)[i] = (c − deg[i])·v[i] + Σ_{j ~ i} v[j].
872    let apply_m = |v: &[f64]| -> Vec<f64> {
873        let mut out = vec![0.0f64; n];
874        for i in 0..n {
875            let mut acc = (c - degree[i]) * v[i];
876            for &j in &adj[i] {
877                acc += v[j];
878            }
879            out[i] = acc;
880        }
881        out
882    };
883
884    // v0: the constant (L-eigenvalue-0) mode we deflate away so the layout
885    // captures structure rather than the trivial all-equal solution. Unit norm
886    // keeps the Gram–Schmidt basis orthonormal.
887    let inv_sqrt_n = 1.0 / (n as f64).sqrt();
888    let v0 = vec![inv_sqrt_n; n];
889
890    // Distinct non-constant seeds for the two coordinates. Their differing
891    // index-polynomial shapes keep a component along the next eigenvector after
892    // deflation, and being fixed they make the result fully reproducible.
893    let seed_x: Vec<f64> = (0..n).map(|i| (i as f64) + 1.0).collect();
894    let seed_y: Vec<f64> = (0..n).map(|i| ((i as f64) + 1.0).powi(2)).collect();
895
896    let mut basis: Vec<Vec<f64>> = vec![v0];
897    let vx = dominant_orthogonal(&apply_m, &seed_x, &basis, max_iterations, tolerance);
898    basis.push(vx.clone());
899    let vy = dominant_orthogonal(&apply_m, &seed_y, &basis, max_iterations, tolerance);
900
901    let x = normalize_unit_interval(&vx);
902    let y = normalize_unit_interval(&vy);
903    ordered
904        .into_iter()
905        .enumerate()
906        .map(|(i, node)| (node, (x[i], y[i])))
907        .collect()
908}
909
910/// L2 norm of a vector.
911fn l2_norm(v: &[f64]) -> f64 {
912    v.iter().map(|x| x * x).sum::<f64>().sqrt()
913}
914
915/// Subtract the projection of `v` onto each (orthonormal) basis vector,
916/// leaving `v` in the orthogonal complement of the span. Assumes `basis`
917/// vectors are unit-norm and mutually orthogonal.
918fn project_out(v: &mut [f64], basis: &[Vec<f64>]) {
919    for b in basis {
920        let dot: f64 = v.iter().zip(b).map(|(x, y)| x * y).sum();
921        for (x, &bi) in v.iter_mut().zip(b) {
922            *x -= dot * bi;
923        }
924    }
925}
926
927/// Deflated power iteration: the dominant eigenvector of `apply` within the
928/// orthogonal complement of `basis`, started from `seed`. Re-orthogonalises
929/// against `basis` every sweep so the iterate cannot drift back toward an
930/// already-found mode. Returns a unit (or zero, if `seed` lies entirely in the
931/// span of `basis`) vector.
932fn dominant_orthogonal(
933    apply: &impl Fn(&[f64]) -> Vec<f64>,
934    seed: &[f64],
935    basis: &[Vec<f64>],
936    max_iterations: usize,
937    tolerance: f64,
938) -> Vec<f64> {
939    let mut v = seed.to_vec();
940    project_out(&mut v, basis);
941    let norm = l2_norm(&v);
942    if norm == 0.0 {
943        return v; // seed fully explained by the existing basis.
944    }
945    for x in v.iter_mut() {
946        *x /= norm;
947    }
948    for _ in 0..max_iterations {
949        let mut next = apply(&v);
950        project_out(&mut next, basis);
951        let norm = l2_norm(&next);
952        if norm == 0.0 {
953            break;
954        }
955        for x in next.iter_mut() {
956            *x /= norm;
957        }
958        let diff: f64 = v.iter().zip(&next).map(|(a, b)| (a - b).abs()).sum();
959        v = next;
960        if diff < tolerance {
961            break;
962        }
963    }
964    v
965}
966
967/// Min–max normalise a vector into `[0, 1]`. A degenerate (zero-span or
968/// non-finite) vector collapses to the centre `0.5` so a hint is always finite.
969fn normalize_unit_interval(v: &[f64]) -> Vec<f64> {
970    let min = v.iter().cloned().fold(f64::INFINITY, f64::min);
971    let max = v.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
972    let span = max - min;
973    if !span.is_finite() || span <= 0.0 {
974        return vec![0.5; v.len()];
975    }
976    v.iter().map(|x| (x - min) / span).collect()
977}
978
979#[cfg(test)]
980mod tests {
981    use super::*;
982    use proptest::prelude::*;
983    use std::collections::{BTreeMap, BTreeSet, VecDeque};
984
985    fn w(a: &str, b: &str) -> (String, String, Weight) {
986        (a.to_string(), b.to_string(), 1.0)
987    }
988
989    #[test]
990    fn golden_two_disjoint_triangles() {
991        // Triangle {a,b,c} and triangle {d,e,f}, fully disconnected.
992        let nodes: Vec<String> = ["a", "b", "c", "d", "e", "f"]
993            .iter()
994            .map(|s| s.to_string())
995            .collect();
996        let edges = vec![
997            w("a", "b"),
998            w("b", "c"),
999            w("c", "a"),
1000            w("d", "e"),
1001            w("e", "f"),
1002            w("f", "d"),
1003        ];
1004        let got = connected_components(&nodes, &edges);
1005        let map: BTreeMap<String, usize> = got.into_iter().collect();
1006        // Smallest node "a" -> island 0; smallest node "d" -> island 1.
1007        assert_eq!(map["a"], 0);
1008        assert_eq!(map["b"], 0);
1009        assert_eq!(map["c"], 0);
1010        assert_eq!(map["d"], 1);
1011        assert_eq!(map["e"], 1);
1012        assert_eq!(map["f"], 1);
1013    }
1014
1015    #[test]
1016    fn dumbbell_is_one_component() {
1017        // Two triangles joined by a bridge edge c-d => one island.
1018        let nodes: Vec<String> = ["a", "b", "c", "d", "e", "f"]
1019            .iter()
1020            .map(|s| s.to_string())
1021            .collect();
1022        let edges = vec![
1023            w("a", "b"),
1024            w("b", "c"),
1025            w("c", "a"),
1026            w("d", "e"),
1027            w("e", "f"),
1028            w("f", "d"),
1029            w("c", "d"),
1030        ];
1031        let got = connected_components(&nodes, &edges);
1032        let islands: BTreeSet<usize> = got.iter().map(|(_, i)| *i).collect();
1033        assert_eq!(islands.len(), 1);
1034        assert!(got.iter().all(|(_, i)| *i == 0));
1035    }
1036
1037    #[test]
1038    fn isolated_nodes_each_their_own_island() {
1039        let nodes: Vec<String> = ["x", "y", "z"].iter().map(|s| s.to_string()).collect();
1040        let edges: Vec<(String, String, Weight)> = vec![];
1041        let got = connected_components(&nodes, &edges);
1042        let map: BTreeMap<String, usize> = got.into_iter().collect();
1043        assert_eq!(map["x"], 0);
1044        assert_eq!(map["y"], 1);
1045        assert_eq!(map["z"], 2);
1046    }
1047
1048    #[test]
1049    fn edge_endpoints_not_in_node_list_are_included() {
1050        // Edge references "g" which is not declared in nodes.
1051        let nodes: Vec<String> = vec!["a".to_string()];
1052        let edges = vec![w("a", "g")];
1053        let got = connected_components(&nodes, &edges);
1054        assert_eq!(got.len(), 2);
1055        let map: BTreeMap<String, usize> = got.into_iter().collect();
1056        assert_eq!(map["a"], map["g"]);
1057        assert_eq!(map["a"], 0);
1058    }
1059
1060    #[test]
1061    fn determinism_repeated_runs_identical() {
1062        let nodes: Vec<String> = ["n3", "n1", "n2", "n4"]
1063            .iter()
1064            .map(|s| s.to_string())
1065            .collect();
1066        let edges = vec![w("n1", "n2"), w("n3", "n4")];
1067        let a = connected_components(&nodes, &edges);
1068        let b = connected_components(&nodes, &edges);
1069        assert_eq!(a, b);
1070        // Output ordered by node ascending.
1071        let ordered: Vec<String> = a.iter().map(|(n, _)| n.clone()).collect();
1072        let mut sorted = ordered.clone();
1073        sorted.sort();
1074        assert_eq!(ordered, sorted);
1075    }
1076
1077    // Strategy: a small id universe plus random edges between those ids.
1078    fn graph_strategy() -> impl Strategy<Value = (Vec<String>, Vec<(String, String, Weight)>)> {
1079        (1usize..8usize).prop_flat_map(|n| {
1080            let nodes: Vec<String> = (0..n).map(|i| format!("n{i:02}")).collect();
1081            let ids = nodes.clone();
1082            let edge = (0..n, 0..n)
1083                .prop_map(move |(a, b)| (format!("n{a:02}"), format!("n{b:02}"), 1.0f32));
1084            let edges = prop::collection::vec(edge, 0..16);
1085            (Just(ids), edges)
1086        })
1087    }
1088
1089    proptest! {
1090        #![proptest_config(ProptestConfig::with_cases(256))]
1091
1092        // (a) every node in exactly one component; union of components == total
1093        //     distinct node count. (b) no node assigned two island ids.
1094        #[test]
1095        fn every_node_exactly_one_island((nodes, edges) in graph_strategy()) {
1096            let result = connected_components(&nodes, &edges);
1097            // Universe = nodes ∪ edge endpoints.
1098            let mut universe: BTreeSet<String> = BTreeSet::new();
1099            for n in &nodes { universe.insert(n.clone()); }
1100            for (a, b, _) in &edges { universe.insert(a.clone()); universe.insert(b.clone()); }
1101
1102            // One row per distinct node, and each node appears exactly once.
1103            prop_assert_eq!(result.len(), universe.len());
1104            let assigned: BTreeMap<String, usize> = result.iter().cloned().collect();
1105            prop_assert_eq!(assigned.len(), universe.len());
1106            for n in &universe {
1107                prop_assert!(assigned.contains_key(n));
1108            }
1109        }
1110
1111        // (c) each component is internally connected: a BFS over the undirected
1112        //     adjacency from any member reaches exactly that island's members.
1113        #[test]
1114        fn islands_are_connected((nodes, edges) in graph_strategy()) {
1115            let result = connected_components(&nodes, &edges);
1116            let assigned: BTreeMap<String, usize> = result.iter().cloned().collect();
1117
1118            // Build undirected adjacency.
1119            let mut adj: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1120            for n in assigned.keys() {
1121                adj.entry(n.clone()).or_default();
1122            }
1123            for (a, b, _) in &edges {
1124                adj.entry(a.clone()).or_default().insert(b.clone());
1125                adj.entry(b.clone()).or_default().insert(a.clone());
1126            }
1127
1128            // Group nodes by island.
1129            let mut by_island: BTreeMap<usize, BTreeSet<String>> = BTreeMap::new();
1130            for (n, isl) in &assigned {
1131                by_island.entry(*isl).or_default().insert(n.clone());
1132            }
1133
1134            for members in by_island.values() {
1135                // BFS from the first member; reachable set must equal members.
1136                let start = members.iter().next().unwrap().clone();
1137                let mut seen: BTreeSet<String> = BTreeSet::new();
1138                let mut q: VecDeque<String> = VecDeque::new();
1139                q.push_back(start.clone());
1140                seen.insert(start);
1141                while let Some(cur) = q.pop_front() {
1142                    if let Some(ns) = adj.get(&cur) {
1143                        for nb in ns {
1144                            if seen.insert(nb.clone()) {
1145                                q.push_back(nb.clone());
1146                            }
1147                        }
1148                    }
1149                }
1150                prop_assert_eq!(&seen, members);
1151            }
1152
1153            // island ids are contiguous 0..k.
1154            let k = by_island.len();
1155            for id in 0..k {
1156                prop_assert!(by_island.contains_key(&id));
1157            }
1158        }
1159
1160        // determinism property: identical input -> identical output.
1161        #[test]
1162        fn determinism_property((nodes, edges) in graph_strategy()) {
1163            let a = connected_components(&nodes, &edges);
1164            let b = connected_components(&nodes, &edges);
1165            prop_assert_eq!(a, b);
1166        }
1167    }
1168
1169    // ── Louvain community detection (issue #796) ──
1170
1171    /// Number of distinct communities in a labelling.
1172    fn community_count(assignment: &[(String, usize)]) -> usize {
1173        assignment
1174            .iter()
1175            .map(|(_, c)| *c)
1176            .collect::<BTreeSet<usize>>()
1177            .len()
1178    }
1179
1180    /// The singleton partition: every node in its own community. Used as the
1181    /// modularity floor — Louvain must never do worse than this.
1182    fn singleton_partition(assignment: &[(String, usize)]) -> Vec<(String, usize)> {
1183        assignment
1184            .iter()
1185            .enumerate()
1186            .map(|(i, (n, _))| (n.clone(), i))
1187            .collect()
1188    }
1189
1190    /// Two K5 cliques joined by a single bridge edge. Edges all weight 1.
1191    fn two_cliques_bridge() -> (Vec<String>, Vec<(String, String, Weight)>) {
1192        let nodes: Vec<String> = ["a0", "a1", "a2", "a3", "a4", "b0", "b1", "b2", "b3", "b4"]
1193            .iter()
1194            .map(|s| s.to_string())
1195            .collect();
1196        let clique = |p: &str| -> Vec<(String, String, Weight)> {
1197            let mut e = Vec::new();
1198            for i in 0..5 {
1199                for j in (i + 1)..5 {
1200                    e.push((format!("{p}{i}"), format!("{p}{j}"), 1.0));
1201                }
1202            }
1203            e
1204        };
1205        let mut edges = clique("a");
1206        edges.extend(clique("b"));
1207        // Thin bridge between the two cliques.
1208        edges.push(("a0".to_string(), "b0".to_string(), 1.0));
1209        (nodes, edges)
1210    }
1211
1212    #[test]
1213    fn louvain_golden_two_cliques_form_two_communities() {
1214        // Canonical clear-community-structure fixture (Karate-Club-equivalent):
1215        // two dense cliques joined by one thin edge. The literature-known
1216        // community count is unambiguously 2, with each clique its own
1217        // community.
1218        let (nodes, edges) = two_cliques_bridge();
1219        let assignment = louvain(&nodes, &edges, 1.0);
1220        let map: BTreeMap<String, usize> = assignment.iter().cloned().collect();
1221
1222        assert_eq!(community_count(&assignment), 2, "expected two communities");
1223        // Every member of clique A shares a community; likewise clique B.
1224        for i in 1..5 {
1225            assert_eq!(map[&format!("a{i}")], map["a0"], "clique A coheres");
1226            assert_eq!(map[&format!("b{i}")], map["b0"], "clique B coheres");
1227        }
1228        assert_ne!(map["a0"], map["b0"], "the two cliques are distinct");
1229        // Smallest node "a0" anchors community 0 (ascending-smallest-node ids).
1230        assert_eq!(map["a0"], 0);
1231    }
1232
1233    /// Zachary's Karate Club — the canonical community-detection benchmark
1234    /// (Zachary 1977), 34 nodes / 78 undirected edges, 0-indexed and rendered
1235    /// as zero-padded ids so lexicographic order matches node number.
1236    fn karate_club() -> (Vec<String>, Vec<(String, String, Weight)>) {
1237        let pairs: &[(u32, u32)] = &[
1238            (0, 1),
1239            (0, 2),
1240            (0, 3),
1241            (0, 4),
1242            (0, 5),
1243            (0, 6),
1244            (0, 7),
1245            (0, 8),
1246            (0, 10),
1247            (0, 11),
1248            (0, 12),
1249            (0, 13),
1250            (0, 17),
1251            (0, 19),
1252            (0, 21),
1253            (0, 31),
1254            (1, 2),
1255            (1, 3),
1256            (1, 7),
1257            (1, 13),
1258            (1, 17),
1259            (1, 19),
1260            (1, 21),
1261            (1, 30),
1262            (2, 3),
1263            (2, 7),
1264            (2, 8),
1265            (2, 9),
1266            (2, 13),
1267            (2, 27),
1268            (2, 28),
1269            (2, 32),
1270            (3, 7),
1271            (3, 12),
1272            (3, 13),
1273            (4, 6),
1274            (4, 10),
1275            (5, 6),
1276            (5, 10),
1277            (5, 16),
1278            (6, 16),
1279            (8, 30),
1280            (8, 32),
1281            (8, 33),
1282            (9, 33),
1283            (13, 33),
1284            (14, 32),
1285            (14, 33),
1286            (15, 32),
1287            (15, 33),
1288            (18, 32),
1289            (18, 33),
1290            (19, 33),
1291            (20, 32),
1292            (20, 33),
1293            (22, 32),
1294            (22, 33),
1295            (23, 25),
1296            (23, 27),
1297            (23, 29),
1298            (23, 32),
1299            (23, 33),
1300            (24, 25),
1301            (24, 27),
1302            (24, 31),
1303            (25, 31),
1304            (26, 29),
1305            (26, 33),
1306            (27, 33),
1307            (28, 31),
1308            (28, 33),
1309            (29, 32),
1310            (29, 33),
1311            (30, 32),
1312            (30, 33),
1313            (31, 32),
1314            (31, 33),
1315            (32, 33),
1316        ];
1317        let nodes: Vec<String> = (0..34).map(|i| format!("n{i:02}")).collect();
1318        let edges = pairs
1319            .iter()
1320            .map(|(a, b)| (format!("n{a:02}"), format!("n{b:02}"), 1.0f32))
1321            .collect();
1322        (nodes, edges)
1323    }
1324
1325    #[test]
1326    fn louvain_karate_club_recovers_high_modularity_communities() {
1327        // The literature places the Karate Club's modularity-optimal partition
1328        // at Q ≈ 0.42 over 2–4 communities. Assert a strong partition rather
1329        // than a brittle exact count: a handful of communities, modularity
1330        // comfortably above the singleton floor, and the two faction leaders
1331        // (instructor n00, president n33) separated.
1332        let (nodes, edges) = karate_club();
1333        let assignment = louvain(&nodes, &edges, 1.0);
1334        let map: BTreeMap<String, usize> = assignment.iter().cloned().collect();
1335
1336        let k = community_count(&assignment);
1337        assert!((2..=5).contains(&k), "expected 2..=5 communities, got {k}");
1338
1339        let q = modularity(&nodes, &edges, &assignment, 1.0);
1340        assert!(
1341            q >= 0.38,
1342            "modularity {q} should clear the 0.38 literature bar"
1343        );
1344
1345        assert_ne!(
1346            map["n00"], map["n33"],
1347            "the two faction leaders land in different communities"
1348        );
1349    }
1350
1351    #[test]
1352    fn louvain_resolution_higher_yields_more_communities() {
1353        // Raising γ penalises large communities, so the partition is at least
1354        // as fragmented. The two-cliques graph is one community at very low γ
1355        // and two at γ = 1.0.
1356        let (nodes, edges) = two_cliques_bridge();
1357        let low = community_count(&louvain(&nodes, &edges, 0.1));
1358        let high = community_count(&louvain(&nodes, &edges, 2.0));
1359        assert!(
1360            high >= low,
1361            "higher resolution must not reduce community count ({low} -> {high})"
1362        );
1363    }
1364
1365    #[test]
1366    fn louvain_determinism_100_runs_identical() {
1367        // Determinism criterion: 100 runs over identical input are bit-for-bit
1368        // identical. Node ids are deliberately out of order to exercise the
1369        // sorted-universe normalisation.
1370        let (nodes, edges) = karate_club();
1371        let first = louvain(&nodes, &edges, 1.0);
1372        for _ in 0..100 {
1373            assert_eq!(louvain(&nodes, &edges, 1.0), first);
1374        }
1375    }
1376
1377    #[test]
1378    fn louvain_empty_and_isolated_nodes() {
1379        // Empty graph -> empty result.
1380        let empty: Vec<String> = Vec::new();
1381        assert!(louvain(&empty, &[], 1.0).is_empty());
1382
1383        // Three isolated nodes -> three singleton communities 0,1,2.
1384        let nodes: Vec<String> = ["x", "y", "z"].iter().map(|s| s.to_string()).collect();
1385        let got = louvain(&nodes, &[], 1.0);
1386        let map: BTreeMap<String, usize> = got.into_iter().collect();
1387        assert_eq!(map["x"], 0);
1388        assert_eq!(map["y"], 1);
1389        assert_eq!(map["z"], 2);
1390    }
1391
1392    proptest! {
1393        #![proptest_config(ProptestConfig::with_cases(256))]
1394
1395        // (a) modularity of the Louvain partition is >= modularity of the
1396        //     singleton partition (Louvain never regresses below the floor).
1397        // (b) every community is non-empty and node coverage is exact.
1398        #[test]
1399        fn louvain_beats_singleton_and_covers_all((nodes, edges) in graph_strategy()) {
1400            let assignment = louvain(&nodes, &edges, 1.0);
1401
1402            // Universe = nodes ∪ edge endpoints.
1403            let mut universe: BTreeSet<String> = BTreeSet::new();
1404            for n in &nodes { universe.insert(n.clone()); }
1405            for (a, b, _) in &edges { universe.insert(a.clone()); universe.insert(b.clone()); }
1406
1407            // Exactly one row per distinct node.
1408            prop_assert_eq!(assignment.len(), universe.len());
1409            let assigned: BTreeMap<String, usize> = assignment.iter().cloned().collect();
1410            prop_assert_eq!(assigned.len(), universe.len());
1411
1412            // Every community is non-empty by construction; verify community
1413            // ids are contiguous 0..k so no id maps to an empty community.
1414            let ids: BTreeSet<usize> = assignment.iter().map(|(_, c)| *c).collect();
1415            let k = ids.len();
1416            for id in 0..k {
1417                prop_assert!(ids.contains(&id), "community ids contiguous 0..k");
1418            }
1419
1420            // Modularity floor: Louvain >= singleton (allow a tiny epsilon for
1421            // floating-point noise).
1422            let q = modularity(&nodes, &edges, &assignment, 1.0);
1423            let q0 = modularity(&nodes, &edges, &singleton_partition(&assignment), 1.0);
1424            prop_assert!(q + 1e-9 >= q0, "louvain Q {} >= singleton Q {}", q, q0);
1425        }
1426
1427        // determinism property: identical input -> identical output.
1428        #[test]
1429        fn louvain_determinism_property((nodes, edges) in graph_strategy()) {
1430            let a = louvain(&nodes, &edges, 1.0);
1431            let b = louvain(&nodes, &edges, 1.0);
1432            prop_assert_eq!(a, b);
1433        }
1434    }
1435
1436    // ── shortest_path (issue #798) ──
1437
1438    /// Total weight of a path (the last entry's cumulative weight), or `None`
1439    /// when no path exists.
1440    fn path_weight(path: &Option<Vec<(String, f64)>>) -> Option<f64> {
1441        path.as_ref()
1442            .map(|p| p.last().map(|(_, w)| *w).unwrap_or(0.0))
1443    }
1444
1445    #[test]
1446    fn shortest_path_golden_known_path() {
1447        // Diamond: a-b (1), a-c (4), b-c (1), c-d (1), b-d (5).
1448        // a -> d shortest is a-b-c-d with weight 3 (beats a-c-d=5, a-b-d=6).
1449        let nodes: Vec<String> = ["a", "b", "c", "d"].iter().map(|s| s.to_string()).collect();
1450        let edges = vec![
1451            ("a".to_string(), "b".to_string(), 1.0f32),
1452            ("a".to_string(), "c".to_string(), 4.0),
1453            ("b".to_string(), "c".to_string(), 1.0),
1454            ("c".to_string(), "d".to_string(), 1.0),
1455            ("b".to_string(), "d".to_string(), 5.0),
1456        ];
1457        let path = shortest_path(&nodes, &edges, &"a".to_string(), &"d".to_string(), None)
1458            .expect("a reaches d");
1459        let route: Vec<String> = path.iter().map(|(n, _)| n.clone()).collect();
1460        assert_eq!(route, vec!["a", "b", "c", "d"], "min-weight route");
1461        // Hop ordering: first entry is the source at weight 0, cumulative weight
1462        // is monotonically non-decreasing, last entry is the total.
1463        assert_eq!(path[0], ("a".to_string(), 0.0));
1464        assert_eq!(path[1].1, 1.0); // a-b
1465        assert_eq!(path[2].1, 2.0); // +b-c
1466        assert_eq!(path[3].1, 3.0); // +c-d (total)
1467    }
1468
1469    #[test]
1470    fn shortest_path_self_is_zero_length() {
1471        let nodes: Vec<String> = ["a", "b"].iter().map(|s| s.to_string()).collect();
1472        let edges = vec![("a".to_string(), "b".to_string(), 1.0f32)];
1473        let path = shortest_path(&nodes, &edges, &"a".to_string(), &"a".to_string(), None)
1474            .expect("self path exists");
1475        assert_eq!(path, vec![("a".to_string(), 0.0)]);
1476    }
1477
1478    #[test]
1479    fn shortest_path_unreachable_is_none() {
1480        // Two disconnected edges: {a-b} and {c-d}. a cannot reach d.
1481        let nodes: Vec<String> = ["a", "b", "c", "d"].iter().map(|s| s.to_string()).collect();
1482        let edges = vec![
1483            ("a".to_string(), "b".to_string(), 1.0f32),
1484            ("c".to_string(), "d".to_string(), 1.0),
1485        ];
1486        assert!(shortest_path(&nodes, &edges, &"a".to_string(), &"d".to_string(), None).is_none());
1487    }
1488
1489    #[test]
1490    fn shortest_path_missing_endpoint_is_none() {
1491        let nodes: Vec<String> = vec!["a".to_string()];
1492        let edges: Vec<(String, String, Weight)> = vec![];
1493        // dst not in the universe.
1494        assert!(shortest_path(&nodes, &edges, &"a".to_string(), &"z".to_string(), None).is_none());
1495    }
1496
1497    #[test]
1498    fn shortest_path_max_hops_caps_edge_count() {
1499        // Path a-b-c-d needs 3 edges. A direct shortcut a-d (weight 10) needs 1.
1500        let nodes: Vec<String> = ["a", "b", "c", "d"].iter().map(|s| s.to_string()).collect();
1501        let edges = vec![
1502            ("a".to_string(), "b".to_string(), 1.0f32),
1503            ("b".to_string(), "c".to_string(), 1.0),
1504            ("c".to_string(), "d".to_string(), 1.0),
1505            ("a".to_string(), "d".to_string(), 10.0),
1506        ];
1507        // Unbounded: the cheap 3-hop route wins (weight 3).
1508        let unbounded = shortest_path(&nodes, &edges, &"a".to_string(), &"d".to_string(), None);
1509        assert_eq!(path_weight(&unbounded), Some(3.0));
1510        // max_hops = 1: only the direct shortcut fits the budget (weight 10).
1511        let capped = shortest_path(&nodes, &edges, &"a".to_string(), &"d".to_string(), Some(1));
1512        assert_eq!(path_weight(&capped), Some(10.0));
1513        // max_hops = 0: no edges allowed, so distinct endpoints are unreachable.
1514        assert!(
1515            shortest_path(&nodes, &edges, &"a".to_string(), &"d".to_string(), Some(0)).is_none()
1516        );
1517    }
1518
1519    // ── Centrality family: betweenness / eigenvector / pagerank (issue #797) ──
1520
1521    /// Default power-iteration cap and tolerance for eigenvector tests.
1522    const EIG_ITERS: usize = 200;
1523    const EIG_TOL: f64 = 1e-10;
1524    /// Classic PageRank damping and a generous iteration cap for convergence.
1525    const PR_DAMPING: f64 = 0.85;
1526    const PR_ITERS: usize = 200;
1527
1528    fn map_of(rows: Vec<(String, f64)>) -> BTreeMap<String, f64> {
1529        rows.into_iter().collect()
1530    }
1531
1532    /// An undirected star K1,3: centre "c" joined to leaves "l1","l2","l3".
1533    fn star() -> (Vec<String>, Vec<(String, String, Weight)>) {
1534        let nodes: Vec<String> = ["c", "l1", "l2", "l3"]
1535            .iter()
1536            .map(|s| s.to_string())
1537            .collect();
1538        let edges = vec![w("c", "l1"), w("c", "l2"), w("c", "l3")];
1539        (nodes, edges)
1540    }
1541
1542    /// An undirected path P3: a - b - c.
1543    fn path3() -> (Vec<String>, Vec<(String, String, Weight)>) {
1544        let nodes: Vec<String> = ["a", "b", "c"].iter().map(|s| s.to_string()).collect();
1545        let edges = vec![w("a", "b"), w("b", "c")];
1546        (nodes, edges)
1547    }
1548
1549    /// A triangle K3 over a,b,c — fully symmetric.
1550    fn triangle() -> (Vec<String>, Vec<(String, String, Weight)>) {
1551        let nodes: Vec<String> = ["a", "b", "c"].iter().map(|s| s.to_string()).collect();
1552        let edges = vec![w("a", "b"), w("b", "c"), w("c", "a")];
1553        (nodes, edges)
1554    }
1555
1556    // ── betweenness golden + properties ──
1557
1558    #[test]
1559    fn betweenness_golden_star_centre_carries_all_pairs() {
1560        // In K1,3 every shortest path between two leaves passes through the
1561        // centre. There are C(3,2) = 3 leaf pairs, so centre = 3.0 and each
1562        // leaf = 0.0 (no pair routes through a leaf).
1563        let (nodes, edges) = star();
1564        let got = map_of(betweenness(&nodes, &edges));
1565        assert!(
1566            (got["c"] - 3.0).abs() < 1e-9,
1567            "centre = 3.0, got {}",
1568            got["c"]
1569        );
1570        for leaf in ["l1", "l2", "l3"] {
1571            assert!(got[leaf].abs() < 1e-9, "{leaf} = 0.0, got {}", got[leaf]);
1572        }
1573    }
1574
1575    #[test]
1576    fn betweenness_golden_path_middle_is_one() {
1577        // On a - b - c only the pair {a, c} routes through b, so b = 1.0 and
1578        // the two endpoints score 0.0.
1579        let (nodes, edges) = path3();
1580        let got = map_of(betweenness(&nodes, &edges));
1581        assert!(
1582            (got["b"] - 1.0).abs() < 1e-9,
1583            "middle = 1.0, got {}",
1584            got["b"]
1585        );
1586        assert!(got["a"].abs() < 1e-9);
1587        assert!(got["c"].abs() < 1e-9);
1588    }
1589
1590    #[test]
1591    fn betweenness_isolated_node_scores_zero() {
1592        let nodes: Vec<String> = ["a", "b", "iso"].iter().map(|s| s.to_string()).collect();
1593        let edges = vec![w("a", "b")];
1594        let got = map_of(betweenness(&nodes, &edges));
1595        assert!(got["iso"].abs() < 1e-9, "isolated node scores 0.0");
1596        // A single edge contributes no intermediary, so its endpoints are 0 too.
1597        assert!(got["a"].abs() < 1e-9);
1598        assert!(got["b"].abs() < 1e-9);
1599    }
1600
1601    #[test]
1602    fn betweenness_empty_graph_is_empty() {
1603        let empty: Vec<String> = Vec::new();
1604        assert!(betweenness(&empty, &[]).is_empty());
1605    }
1606
1607    // ── eigenvector golden + properties ──
1608
1609    #[test]
1610    fn eigenvector_golden_path3_known_vector() {
1611        // The path P3 adjacency has principal eigenvector (1, √2, 1); L2
1612        // normalised that is (0.5, 1/√2, 0.5).
1613        let (nodes, edges) = path3();
1614        let got = map_of(eigenvector(&nodes, &edges, EIG_ITERS, EIG_TOL));
1615        let inv_sqrt2 = 1.0 / 2.0_f64.sqrt();
1616        assert!(
1617            (got["b"] - inv_sqrt2).abs() < 1e-6,
1618            "centre {} ~ {inv_sqrt2}",
1619            got["b"]
1620        );
1621        assert!(
1622            (got["a"] - 0.5).abs() < 1e-6,
1623            "endpoint a {} ~ 0.5",
1624            got["a"]
1625        );
1626        assert!(
1627            (got["c"] - 0.5).abs() < 1e-6,
1628            "endpoint c {} ~ 0.5",
1629            got["c"]
1630        );
1631        // L2-normalised: Σ x² = 1.
1632        let sumsq: f64 = got.values().map(|v| v * v).sum();
1633        assert!((sumsq - 1.0).abs() < 1e-6, "unit L2 norm, got {sumsq}");
1634    }
1635
1636    #[test]
1637    fn eigenvector_golden_triangle_all_equal() {
1638        // K3 is vertex-transitive, so all three scores equal 1/√3.
1639        let (nodes, edges) = triangle();
1640        let got = map_of(eigenvector(&nodes, &edges, EIG_ITERS, EIG_TOL));
1641        let expect = 1.0 / 3.0_f64.sqrt();
1642        for k in ["a", "b", "c"] {
1643            assert!((got[k] - expect).abs() < 1e-6, "{k} = 1/√3, got {}", got[k]);
1644        }
1645    }
1646
1647    #[test]
1648    fn eigenvector_isolated_node_scores_zero() {
1649        let nodes: Vec<String> = ["a", "b", "iso"].iter().map(|s| s.to_string()).collect();
1650        let edges = vec![w("a", "b")];
1651        let got = map_of(eigenvector(&nodes, &edges, EIG_ITERS, EIG_TOL));
1652        assert!(
1653            got["iso"].abs() < 1e-9,
1654            "isolated node scores 0.0, got {}",
1655            got["iso"]
1656        );
1657    }
1658
1659    #[test]
1660    fn shortest_path_parallel_edges_take_minimum() {
1661        // Two parallel a-b edges (weight 5 and weight 2): the min wins.
1662        let nodes: Vec<String> = ["a", "b"].iter().map(|s| s.to_string()).collect();
1663        let edges = vec![
1664            ("a".to_string(), "b".to_string(), 5.0f32),
1665            ("a".to_string(), "b".to_string(), 2.0),
1666        ];
1667        let path = shortest_path(&nodes, &edges, &"a".to_string(), &"b".to_string(), None);
1668        assert_eq!(path_weight(&path), Some(2.0));
1669    }
1670
1671    /// A small id universe plus random POSITIVE-weight edges between those ids.
1672    /// Weights are integers in 1..=9 (as f32) so distance comparisons are exact
1673    /// and the non-negative-weight precondition holds.
1674    fn weighted_graph_strategy(
1675    ) -> impl Strategy<Value = (Vec<String>, Vec<(String, String, Weight)>)> {
1676        (2usize..7usize).prop_flat_map(|n| {
1677            let nodes: Vec<String> = (0..n).map(|i| format!("n{i:02}")).collect();
1678            let ids = nodes.clone();
1679            let edge = (0..n, 0..n, 1u32..10u32)
1680                .prop_map(move |(a, b, w)| (format!("n{a:02}"), format!("n{b:02}"), w as f32));
1681            let edges = prop::collection::vec(edge, 0..14);
1682            (Just(ids), edges)
1683        })
1684    }
1685
1686    #[test]
1687    fn eigenvector_edgeless_graph_is_uniform_unit() {
1688        let nodes: Vec<String> = ["x", "y", "z", "q"].iter().map(|s| s.to_string()).collect();
1689        let got = map_of(eigenvector(&nodes, &[], EIG_ITERS, EIG_TOL));
1690        let expect = 1.0 / 4.0_f64.sqrt();
1691        for k in ["x", "y", "z", "q"] {
1692            assert!((got[k] - expect).abs() < 1e-9, "uniform 1/√n");
1693        }
1694    }
1695
1696    // ── pagerank golden + properties ──
1697
1698    #[test]
1699    fn pagerank_golden_triangle_all_equal() {
1700        // K3 is symmetric, so PageRank is uniform 1/3 regardless of damping.
1701        let (nodes, edges) = triangle();
1702        let got = map_of(pagerank(&nodes, &edges, PR_DAMPING, PR_ITERS));
1703        for k in ["a", "b", "c"] {
1704            assert!(
1705                (got[k] - 1.0 / 3.0).abs() < 1e-9,
1706                "{k} = 1/3, got {}",
1707                got[k]
1708            );
1709        }
1710        let sum: f64 = got.values().sum();
1711        assert!((sum - 1.0).abs() < 1e-9, "sums to 1");
1712    }
1713
1714    #[test]
1715    fn pagerank_golden_star_centre_ranks_highest() {
1716        // In K1,3 the centre collects rank from all three leaves and outranks
1717        // every (symmetric) leaf; the leaves share one rank.
1718        let (nodes, edges) = star();
1719        let got = map_of(pagerank(&nodes, &edges, PR_DAMPING, PR_ITERS));
1720        for leaf in ["l1", "l2", "l3"] {
1721            assert!(got["c"] > got[leaf], "centre outranks {leaf}");
1722            assert!((got[leaf] - got["l1"]).abs() < 1e-9, "leaves share rank");
1723        }
1724        let sum: f64 = got.values().sum();
1725        assert!((sum - 1.0).abs() < 1e-9, "sums to 1");
1726    }
1727
1728    #[test]
1729    fn pagerank_isolated_node_gets_teleport_floor() {
1730        let nodes: Vec<String> = ["a", "b", "iso"].iter().map(|s| s.to_string()).collect();
1731        let edges = vec![w("a", "b")];
1732        let got = map_of(pagerank(&nodes, &edges, PR_DAMPING, PR_ITERS));
1733        let sum: f64 = got.values().sum();
1734        assert!((sum - 1.0).abs() < 1e-9, "sums to 1");
1735        // Every node clears the teleport floor (1-d)/n.
1736        let floor = (1.0 - PR_DAMPING) / 3.0;
1737        for v in got.values() {
1738            assert!(*v + 1e-12 >= floor, "score {v} >= floor {floor}");
1739        }
1740        // The isolated node ranks below both connected nodes.
1741        assert!(got["iso"] < got["a"]);
1742        assert!(got["iso"] < got["b"]);
1743    }
1744
1745    #[test]
1746    fn centrality_empty_graph_all_empty() {
1747        let empty: Vec<String> = Vec::new();
1748        assert!(eigenvector(&empty, &[], EIG_ITERS, EIG_TOL).is_empty());
1749        assert!(pagerank(&empty, &[], PR_DAMPING, PR_ITERS).is_empty());
1750    }
1751
1752    // ── spectral_embedding (issue #804) ──
1753
1754    const SPECTRAL_ITERS: usize = 200;
1755    const SPECTRAL_TOL: f64 = 1e-9;
1756
1757    fn hint_map(rows: Vec<(String, (f64, f64))>) -> BTreeMap<String, (f64, f64)> {
1758        rows.into_iter().collect()
1759    }
1760
1761    #[test]
1762    fn spectral_embedding_coords_in_unit_interval() {
1763        // Two triangles bridged: every coordinate must land in [0, 1].
1764        let (nodes, edges) = two_cliques_bridge();
1765        let rows = spectral_embedding(&nodes, &edges, SPECTRAL_ITERS, SPECTRAL_TOL);
1766        for (node, (x, y)) in &rows {
1767            assert!(
1768                (0.0..=1.0).contains(x) && (0.0..=1.0).contains(y),
1769                "node {node} hint ({x}, {y}) must be normalised to [0, 1]²"
1770            );
1771        }
1772        // Min–max normalisation pins the extremes of the spread to 0 and 1.
1773        let xs: Vec<f64> = rows.iter().map(|(_, (x, _))| *x).collect();
1774        let min_x = xs.iter().cloned().fold(f64::INFINITY, f64::min);
1775        let max_x = xs.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1776        assert!(min_x.abs() < 1e-9, "x min anchored at 0, got {min_x}");
1777        assert!(
1778            (max_x - 1.0).abs() < 1e-9,
1779            "x max anchored at 1, got {max_x}"
1780        );
1781    }
1782
1783    #[test]
1784    fn spectral_embedding_determinism_100_runs_identical() {
1785        // AC: identical input must produce identical hints (no random init).
1786        let (nodes, edges) = karate_club();
1787        let first = spectral_embedding(&nodes, &edges, SPECTRAL_ITERS, SPECTRAL_TOL);
1788        for _ in 0..100 {
1789            assert_eq!(
1790                spectral_embedding(&nodes, &edges, SPECTRAL_ITERS, SPECTRAL_TOL),
1791                first,
1792                "spectral embedding must be bit-for-bit reproducible"
1793            );
1794        }
1795    }
1796
1797    #[test]
1798    fn spectral_embedding_node_order_independent() {
1799        // Shuffling the input node/edge order must not change the per-node hint:
1800        // the embedding sorts the universe internally before laying out.
1801        let (nodes, edges) = two_cliques_bridge();
1802        let mut shuffled_nodes = nodes.clone();
1803        shuffled_nodes.reverse();
1804        let mut shuffled_edges = edges.clone();
1805        shuffled_edges.reverse();
1806
1807        let a = hint_map(spectral_embedding(
1808            &nodes,
1809            &edges,
1810            SPECTRAL_ITERS,
1811            SPECTRAL_TOL,
1812        ));
1813        let b = hint_map(spectral_embedding(
1814            &shuffled_nodes,
1815            &shuffled_edges,
1816            SPECTRAL_ITERS,
1817            SPECTRAL_TOL,
1818        ));
1819        assert_eq!(a, b, "hint is keyed on node identity, not input order");
1820    }
1821
1822    #[test]
1823    fn spectral_embedding_separates_disconnected_components() {
1824        // Two disjoint triangles: the Fiedler coordinate should place the two
1825        // components in distinct regions of the x axis (graph-distance shows up
1826        // as layout distance), so the means differ noticeably.
1827        let nodes: Vec<String> = ["a", "b", "c", "d", "e", "f"]
1828            .iter()
1829            .map(|s| s.to_string())
1830            .collect();
1831        let edges = vec![
1832            w("a", "b"),
1833            w("b", "c"),
1834            w("c", "a"),
1835            w("d", "e"),
1836            w("e", "f"),
1837            w("f", "d"),
1838        ];
1839        let map = hint_map(spectral_embedding(
1840            &nodes,
1841            &edges,
1842            SPECTRAL_ITERS,
1843            SPECTRAL_TOL,
1844        ));
1845        let left = (map["a"].0 + map["b"].0 + map["c"].0) / 3.0;
1846        let right = (map["d"].0 + map["e"].0 + map["f"].0) / 3.0;
1847        assert!(
1848            (left - right).abs() > 0.25,
1849            "disconnected components should separate on the layout (Δx = {})",
1850            (left - right).abs()
1851        );
1852    }
1853
1854    #[test]
1855    fn spectral_embedding_degenerate_graphs() {
1856        // Empty graph -> empty result.
1857        let empty: Vec<String> = Vec::new();
1858        assert!(spectral_embedding(&empty, &[], SPECTRAL_ITERS, SPECTRAL_TOL).is_empty());
1859
1860        // Single node -> centred, finite hint.
1861        let one = vec!["solo".to_string()];
1862        let rows = spectral_embedding(&one, &[], SPECTRAL_ITERS, SPECTRAL_TOL);
1863        assert_eq!(rows, vec![("solo".to_string(), (0.5, 0.5))]);
1864
1865        // Edgeless multi-node graph -> still finite and in range.
1866        let nodes: Vec<String> = ["x", "y", "z"].iter().map(|s| s.to_string()).collect();
1867        for (_, (x, y)) in spectral_embedding(&nodes, &[], SPECTRAL_ITERS, SPECTRAL_TOL) {
1868            assert!(x.is_finite() && y.is_finite(), "edgeless hint stays finite");
1869            assert!((0.0..=1.0).contains(&x) && (0.0..=1.0).contains(&y));
1870        }
1871    }
1872
1873    proptest! {
1874        #![proptest_config(ProptestConfig::with_cases(256))]
1875
1876        // (a) symmetric distance on undirected graphs:
1877        //     weight(src -> dst) == weight(dst -> src), and reachability agrees.
1878        #[test]
1879        fn shortest_path_is_symmetric((nodes, edges) in weighted_graph_strategy()) {
1880            let universe = node_universe(&nodes, &edges);
1881            for a in &universe {
1882                for b in &universe {
1883                    let fwd = shortest_path(&nodes, &edges, a, b, None);
1884                    let rev = shortest_path(&nodes, &edges, b, a, None);
1885                    prop_assert_eq!(
1886                        path_weight(&fwd), path_weight(&rev),
1887                        "distance {} <-> {} must be symmetric", a, b
1888                    );
1889                }
1890            }
1891        }
1892
1893        // (b) triangle inequality: for reachable a,b,c,
1894        //     dist(a, c) <= dist(a, b) + dist(b, c).
1895        #[test]
1896        fn shortest_path_triangle_inequality((nodes, edges) in weighted_graph_strategy()) {
1897            let universe = node_universe(&nodes, &edges);
1898            for a in &universe {
1899                for b in &universe {
1900                    for c in &universe {
1901                        let ab = path_weight(&shortest_path(&nodes, &edges, a, b, None));
1902                        let bc = path_weight(&shortest_path(&nodes, &edges, b, c, None));
1903                        let ac = path_weight(&shortest_path(&nodes, &edges, a, c, None));
1904                        if let (Some(ab), Some(bc), Some(ac)) = (ab, bc, ac) {
1905                            prop_assert!(
1906                                ac <= ab + bc + 1e-9,
1907                                "triangle: d({},{})={} > d({},{})={} + d({},{})={}",
1908                                a, c, ac, a, b, ab, b, c, bc
1909                            );
1910                        }
1911                    }
1912                }
1913            }
1914        }
1915
1916        // (c) unreachable pairs across distinct connected components yield None.
1917        //     Build two disjoint cliques and assert no cross-component path.
1918        #[test]
1919        fn shortest_path_unreachable_across_components(
1920            (left, right) in (1usize..4usize, 1usize..4usize)
1921        ) {
1922            // Left clique uses ids l0.., right clique uses r0.. — disjoint id
1923            // spaces guarantee no shared node and thus no connecting edge.
1924            let mut nodes = Vec::new();
1925            let mut edges: Vec<(String, String, Weight)> = Vec::new();
1926            for i in 0..left { nodes.push(format!("l{i}")); }
1927            for i in 0..right { nodes.push(format!("r{i}")); }
1928            for i in 0..left {
1929                for j in (i + 1)..left {
1930                    edges.push((format!("l{i}"), format!("l{j}"), 1.0));
1931                }
1932            }
1933            for i in 0..right {
1934                for j in (i + 1)..right {
1935                    edges.push((format!("r{i}"), format!("r{j}"), 1.0));
1936                }
1937            }
1938            // Every left node is unreachable from every right node.
1939            prop_assert!(
1940                shortest_path(&nodes, &edges, &"l0".to_string(), &"r0".to_string(), None).is_none()
1941            );
1942            prop_assert!(
1943                shortest_path(&nodes, &edges, &"r0".to_string(), &"l0".to_string(), None).is_none()
1944            );
1945        }
1946
1947        // determinism: identical input -> identical output.
1948        #[test]
1949        fn shortest_path_determinism((nodes, edges) in weighted_graph_strategy()) {
1950            let universe = node_universe(&nodes, &edges);
1951            if universe.len() >= 2 {
1952                let a = &universe[0];
1953                let b = &universe[universe.len() - 1];
1954                let p1 = shortest_path(&nodes, &edges, a, b, None);
1955                let p2 = shortest_path(&nodes, &edges, a, b, None);
1956                prop_assert_eq!(p1, p2);
1957            }
1958        }
1959
1960        // betweenness: one row per node; all scores >= 0; isolated nodes (no
1961        // incident edge) score exactly 0; deterministic across runs.
1962        #[test]
1963        fn betweenness_range_isolation_and_determinism((nodes, edges) in graph_strategy()) {
1964            let rows = betweenness(&nodes, &edges);
1965
1966            let mut universe: BTreeSet<String> = BTreeSet::new();
1967            for n in &nodes { universe.insert(n.clone()); }
1968            for (a, b, _) in &edges { universe.insert(a.clone()); universe.insert(b.clone()); }
1969            prop_assert_eq!(rows.len(), universe.len());
1970
1971            // Degrees (undirected, self-loops excluded).
1972            let mut deg: BTreeMap<String, usize> = BTreeMap::new();
1973            for n in &universe { deg.insert(n.clone(), 0); }
1974            for (a, b, _) in &edges {
1975                if a != b {
1976                    *deg.get_mut(a).unwrap() += 1;
1977                    *deg.get_mut(b).unwrap() += 1;
1978                }
1979            }
1980            for (node, score) in &rows {
1981                prop_assert!(*score >= -1e-9, "score {} >= 0", score);
1982                if deg[node] == 0 {
1983                    prop_assert!(score.abs() < 1e-9, "isolated {} scores 0", node);
1984                }
1985            }
1986
1987            prop_assert_eq!(betweenness(&nodes, &edges), rows);
1988        }
1989
1990        // eigenvector: L2-normalised (Σx² ≈ 1), every score in [0, 1], and an
1991        // isolated node scores 0 whenever the graph has any edge. Deterministic.
1992        #[test]
1993        fn eigenvector_unit_norm_range_and_isolation((nodes, edges) in graph_strategy()) {
1994            let rows = eigenvector(&nodes, &edges, EIG_ITERS, EIG_TOL);
1995            prop_assume!(!rows.is_empty());
1996
1997            let sumsq: f64 = rows.iter().map(|(_, v)| v * v).sum();
1998            prop_assert!((sumsq - 1.0).abs() < 1e-6, "unit L2 norm, got {}", sumsq);
1999
2000            let mut deg: BTreeMap<String, usize> = BTreeMap::new();
2001            for (n, _) in &rows { deg.insert(n.clone(), 0); }
2002            for (a, b, _) in &edges {
2003                if a != b {
2004                    *deg.get_mut(a).unwrap() += 1;
2005                    *deg.get_mut(b).unwrap() += 1;
2006                }
2007            }
2008            let has_edges = deg.values().any(|d| *d > 0);
2009            for (node, score) in &rows {
2010                prop_assert!(*score >= -1e-9 && *score <= 1.0 + 1e-9, "score {} in [0,1]", score);
2011                if has_edges && deg[node] == 0 {
2012                    prop_assert!(score.abs() < 1e-6, "isolated {} ~ 0", node);
2013                }
2014            }
2015
2016            prop_assert_eq!(eigenvector(&nodes, &edges, EIG_ITERS, EIG_TOL), rows);
2017        }
2018
2019        // pagerank: scores sum to ~1, every score >= the teleport floor
2020        // (1-d)/n (so strictly positive), and the result is deterministic.
2021        #[test]
2022        fn pagerank_sums_to_one_floor_and_determinism((nodes, edges) in graph_strategy()) {
2023            let rows = pagerank(&nodes, &edges, PR_DAMPING, PR_ITERS);
2024            prop_assume!(!rows.is_empty());
2025
2026            let sum: f64 = rows.iter().map(|(_, v)| v).sum();
2027            prop_assert!((sum - 1.0).abs() < 1e-9, "PageRank sums to 1, got {}", sum);
2028
2029            let floor = (1.0 - PR_DAMPING) / rows.len() as f64;
2030            for (_, score) in &rows {
2031                prop_assert!(*score + 1e-12 >= floor, "score {} >= floor {}", score, floor);
2032            }
2033
2034            prop_assert_eq!(pagerank(&nodes, &edges, PR_DAMPING, PR_ITERS), rows);
2035        }
2036
2037        // spectral_embedding: one hint per node, every coordinate finite and in
2038        // [0, 1], and the result is deterministic across runs.
2039        #[test]
2040        fn spectral_embedding_range_coverage_and_determinism((nodes, edges) in graph_strategy()) {
2041            let rows = spectral_embedding(&nodes, &edges, SPECTRAL_ITERS, SPECTRAL_TOL);
2042
2043            let mut universe: BTreeSet<String> = BTreeSet::new();
2044            for n in &nodes { universe.insert(n.clone()); }
2045            for (a, b, _) in &edges { universe.insert(a.clone()); universe.insert(b.clone()); }
2046            prop_assert_eq!(rows.len(), universe.len());
2047
2048            for (node, (x, y)) in &rows {
2049                prop_assert!(x.is_finite() && y.is_finite(), "node {} hint is finite", node);
2050                prop_assert!(
2051                    *x >= -1e-9 && *x <= 1.0 + 1e-9 && *y >= -1e-9 && *y <= 1.0 + 1e-9,
2052                    "node {} hint ({}, {}) in [0,1]^2", node, x, y
2053                );
2054            }
2055
2056            prop_assert_eq!(spectral_embedding(&nodes, &edges, SPECTRAL_ITERS, SPECTRAL_TOL), rows);
2057        }
2058    }
2059}