Skip to main content

rust_igraph/algorithms/community/
walktrap.rs

1//! Walktrap random-walk community detection (ALGO-CO-008).
2//!
3//! Counterpart of `igraph_community_walktrap()` from
4//! `references/igraph/src/community/walktrap/`.
5//!
6//! Pons & Latapy (2005, 2006): two vertices are *close* when short
7//! random walks (length `t`, default 4) starting from each of them end
8//! up with similar probability distributions on the vertex set. The
9//! distance between communities `C1`, `C2` is the L2 norm of the
10//! probability vectors weighted by `1/sqrt(deg(v))`. Walktrap is a
11//! Ward-style hierarchical agglomeration that, at each step, merges the
12//! adjacent community pair whose merger minimises the increase in
13//! variance σ (equivalently, the lower-bound `Δσ`). The dendrogram is
14//! then cut at the level with maximum Newman-Girvan modularity.
15//!
16//! References:
17//! - P. Pons, M. Latapy. *Computing communities in large networks
18//!   using random walks*. J. Graph Algorithms Appl. 10 (2006), 191-218.
19//!   <https://doi.org/10.7155/jgaa.00124>
20//!
21//! Phase-1 scope: **undirected** graphs only — matches upstream C
22//! (`igraph_community_walktrap` does not support directed input). Edge
23//! weights, when given, must be finite and non-negative. Multi-edges
24//! are accepted and folded by weight-sum; self-loops are kept. Per the
25//! igraph #2043 fix, each vertex receives a synthesized self-loop of
26//! weight `mean(incident edge weight)` (or `1.0` if isolated) so
27//! diffusion is well-defined.
28//!
29//! Complexity: `O(t · |E| · nb_merges)` worst case in the present
30//! full-vector implementation. The sparse-vector trick from upstream is
31//! a future optimisation.
32
33#![allow(
34    clippy::cast_possible_truncation,
35    clippy::cast_possible_wrap,
36    clippy::cast_precision_loss,
37    clippy::cast_sign_loss,
38    clippy::float_cmp,
39    clippy::items_after_statements,
40    clippy::many_single_char_names,
41    clippy::needless_range_loop,
42    clippy::too_many_lines
43)]
44
45use crate::core::{Graph, IgraphError, IgraphResult};
46
47/// Default random-walk length matching the igraph reference (`steps = 4`).
48pub const WALKTRAP_DEFAULT_STEPS: u32 = 4;
49
50/// Tuning knobs for [`walktrap_with_options`].
51#[derive(Debug, Clone, Copy)]
52pub struct WalktrapOptions {
53    /// Random-walk length `t`. Must be `>= 1`; igraph default is `4`.
54    pub steps: u32,
55}
56
57impl Default for WalktrapOptions {
58    fn default() -> Self {
59        Self {
60            steps: WALKTRAP_DEFAULT_STEPS,
61        }
62    }
63}
64
65/// Result of [`walktrap`] / [`walktrap_weighted`] /
66/// [`walktrap_with_options`].
67#[derive(Debug, Clone)]
68pub struct WalktrapResult {
69    /// Per-vertex community label of the best-modularity dendrogram cut,
70    /// densified to `0..nb_clusters`.
71    pub membership: Vec<u32>,
72    /// Number of distinct communities in `membership`.
73    pub nb_clusters: u32,
74    /// Merges in dendrogram order. Each row `[c1, c2]` merges clusters
75    /// `c1` and `c2` into the new cluster `n + i` where `i` is the
76    /// merge index. Same encoding as `igraph_community_walktrap`.
77    pub merges: Vec<[u32; 2]>,
78    /// Modularity trajectory. `modularity[i]` is the modularity *after*
79    /// `i` merges starting from the all-singletons partition. Length =
80    /// `merges.len() + 1` when the graph has edges. For an edgeless
81    /// graph the single entry is `NaN`, matching the C convention.
82    pub modularity: Vec<f64>,
83}
84
85/// Run Walktrap on an unweighted, undirected graph with default `steps = 4`.
86///
87/// # Errors
88/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
89///
90/// # Examples
91///
92/// ```
93/// use rust_igraph::{Graph, walktrap};
94///
95/// // Triangle: walktrap puts all three vertices in one community.
96/// let mut g = Graph::with_vertices(3);
97/// for &(u, v) in &[(0, 1), (1, 2), (2, 0)] {
98///     g.add_edge(u, v).unwrap();
99/// }
100/// let r = walktrap(&g).unwrap();
101/// assert_eq!(r.nb_clusters, 1);
102/// assert_eq!(r.membership, vec![0, 0, 0]);
103/// ```
104pub fn walktrap(graph: &Graph) -> IgraphResult<WalktrapResult> {
105    walktrap_with_options(graph, None, WalktrapOptions::default())
106}
107
108/// Run Walktrap on a weighted, undirected graph with default `steps = 4`.
109///
110/// `weights[i]` is the weight of edge id `i`. Must be finite and
111/// non-negative; length must equal `graph.ecount()`.
112///
113/// # Errors
114/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
115/// - [`IgraphError::InvalidArgument`] if `weights.len() != ecount` or
116///   any weight is negative or non-finite.
117pub fn walktrap_weighted(graph: &Graph, weights: &[f64]) -> IgraphResult<WalktrapResult> {
118    walktrap_with_options(graph, Some(weights), WalktrapOptions::default())
119}
120
121/// Run Walktrap with a custom [`WalktrapOptions`].
122///
123/// `weights = None` runs the unweighted variant; otherwise see
124/// [`walktrap_weighted`] for the weight contract.
125///
126/// # Errors
127/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
128/// - [`IgraphError::InvalidArgument`] for `opts.steps == 0`, weight
129///   length mismatch, negative / non-finite weights, or a vertex whose
130///   incident-edge total weight underflows to zero.
131pub fn walktrap_with_options(
132    graph: &Graph,
133    weights: Option<&[f64]>,
134    opts: WalktrapOptions,
135) -> IgraphResult<WalktrapResult> {
136    if graph.is_directed() {
137        return Err(IgraphError::Unsupported(
138            "walktrap is undirected-only (matches igraph C reference)",
139        ));
140    }
141    if opts.steps == 0 {
142        return Err(IgraphError::InvalidArgument(
143            "walktrap: steps must be >= 1".to_string(),
144        ));
145    }
146    if let Some(w) = weights {
147        if w.len() != graph.ecount() {
148            return Err(IgraphError::InvalidArgument(
149                "walktrap: weights.len() must equal graph.ecount()".to_string(),
150            ));
151        }
152        for &v in w {
153            if !v.is_finite() || v < 0.0 {
154                return Err(IgraphError::InvalidArgument(
155                    "walktrap: weights must be finite and non-negative".to_string(),
156                ));
157            }
158        }
159    }
160
161    run(graph, weights, opts.steps)
162}
163
164// ---------------------------------------------------------------------
165// Internal: graph adapter
166// ---------------------------------------------------------------------
167
168#[derive(Clone, Copy)]
169struct AdjEntry {
170    neighbor: u32,
171    weight: f64,
172}
173
174struct InternalGraph {
175    n: u32,
176    /// `vertices[v]` is a sorted-by-neighbor adjacency including a
177    /// synthesized self-loop at position 0 with weight = mean incident
178    /// edge weight (or `1.0` when the vertex has no incident edges).
179    vertices: Vec<Vec<AdjEntry>>,
180    /// `total_weight[v]` includes the synthesized self-loop, matching
181    /// upstream's "strength" used in `P · D⁻¹` diffusion.
182    total_weight: Vec<f64>,
183    /// Sum of all `total_weight[v]`; the global "2m + n·mean-self-loop"
184    /// normalising constant used in the modularity formula.
185    total_weight_global: f64,
186    /// Sum of original (non-synthesised) edge weights; the `m` in the
187    /// classical modularity `Q = Σ (e_ii - a_i²)`.
188    /// We retain it for the dendrogram modularity computed at the end.
189    raw_total_weight: f64,
190}
191
192fn build_internal_graph(graph: &Graph, weights: Option<&[f64]>) -> IgraphResult<InternalGraph> {
193    let n = graph.vcount();
194    let mut vertices: Vec<Vec<AdjEntry>> = vec![Vec::new(); n as usize];
195    let mut deg: Vec<u32> = vec![0; n as usize];
196    let mut total_weight: Vec<f64> = vec![0.0; n as usize];
197    let mut raw_total_weight = 0.0f64;
198
199    let m = graph.ecount();
200    for eid in 0..m {
201        let (u, v) = graph.edge(eid as u32)?;
202        let w = match weights {
203            Some(ws) => ws[eid],
204            None => 1.0,
205        };
206        deg[u as usize] = deg[u as usize].saturating_add(1);
207        deg[v as usize] = deg[v as usize].saturating_add(1);
208        total_weight[u as usize] += w;
209        total_weight[v as usize] += w;
210        raw_total_weight += w;
211
212        vertices[u as usize].push(AdjEntry {
213            neighbor: v,
214            weight: w,
215        });
216        if u != v {
217            vertices[v as usize].push(AdjEntry {
218                neighbor: u,
219                weight: w,
220            });
221        }
222    }
223
224    // Prepend a synthesized self-loop with weight = mean incident edge
225    // weight (or 1.0 if isolated). Mirrors walktrap_graph.cpp lines
226    // 182-190 and the #2043 fix.
227    for v in 0..n as usize {
228        let mean_w = if deg[v] == 0 {
229            1.0
230        } else {
231            total_weight[v] / f64::from(deg[v])
232        };
233        vertices[v].push(AdjEntry {
234            neighbor: v as u32,
235            weight: mean_w,
236        });
237        total_weight[v] += mean_w;
238    }
239
240    // Sort each adjacency by neighbor id and fold parallel edges.
241    for v in 0..n as usize {
242        vertices[v].sort_by_key(|e| e.neighbor);
243        let mut folded: Vec<AdjEntry> = Vec::with_capacity(vertices[v].len());
244        for entry in &vertices[v] {
245            if let Some(last) = folded.last_mut() {
246                if last.neighbor == entry.neighbor {
247                    last.weight += entry.weight;
248                    continue;
249                }
250            }
251            folded.push(*entry);
252        }
253        vertices[v] = folded;
254    }
255
256    let mut total_weight_global = 0.0f64;
257    for v in 0..n as usize {
258        if total_weight[v] <= 0.0 {
259            return Err(IgraphError::InvalidArgument(
260                "walktrap: vertex with zero strength found; all vertices must have positive strength"
261                    .to_string(),
262            ));
263        }
264        total_weight_global += total_weight[v];
265    }
266
267    Ok(InternalGraph {
268        n,
269        vertices,
270        total_weight,
271        total_weight_global,
272        raw_total_weight,
273    })
274}
275
276// ---------------------------------------------------------------------
277// Internal: probabilities
278// ---------------------------------------------------------------------
279
280/// Length-n probability vector representing a community's t-step random
281/// walk distribution. Full (dense) representation; the upstream
282/// sparse/full switch is a deferred optimisation.
283type Probabilities = Vec<f64>;
284
285/// Compute `P^t · D⁻¹ · A` starting from a uniform distribution over
286/// `members`. `D⁻¹` is the diagonal of `1 / total_weight[v]`, and
287/// neighbours are taken from `g.vertices` (which already includes the
288/// synthesized self-loops).
289fn compute_probabilities(g: &InternalGraph, members: &[u32], steps: u32) -> Probabilities {
290    let n = g.n as usize;
291    let mut current = vec![0.0f64; n];
292    let inv_size = 1.0 / members.len() as f64;
293    for &v in members {
294        current[v as usize] = inv_size;
295    }
296    let mut next = vec![0.0f64; n];
297    for _ in 0..steps {
298        for v in 0..n {
299            next[v] = 0.0;
300        }
301        for u in 0..n {
302            let pu = current[u];
303            if pu == 0.0 {
304                continue;
305            }
306            let factor = pu / g.total_weight[u];
307            for entry in &g.vertices[u] {
308                next[entry.neighbor as usize] += factor * entry.weight;
309            }
310        }
311        std::mem::swap(&mut current, &mut next);
312    }
313    current
314}
315
316/// Squared distance between two communities' probability vectors,
317/// `Σ_v (P_a[v] - P_b[v])² / total_weight[v]`.
318fn distance_sq(g: &InternalGraph, a: &Probabilities, b: &Probabilities) -> f64 {
319    let mut acc = 0.0f64;
320    for v in 0..g.n as usize {
321        let d = a[v] - b[v];
322        acc += d * d / g.total_weight[v];
323    }
324    acc
325}
326
327// ---------------------------------------------------------------------
328// Internal: communities + neighbor pool
329// ---------------------------------------------------------------------
330
331#[derive(Clone)]
332struct CommunityState {
333    /// `false` means this community has been merged away.
334    active: bool,
335    size: u32,
336    /// `internal_weight` — sum of original edge weights with both
337    /// endpoints in this community (loops counted once). At the
338    /// singleton level this is the self-loop sum from the original
339    /// graph (NOT the synthesised one).
340    internal_weight: f64,
341    /// `total_weight` — sum of original edge weights incident to this
342    /// community. At the singleton level this is the degree-weight of
343    /// the original vertex (excluding the synthesised self-loop).
344    total_weight: f64,
345    /// Cached `P^t`. `None` until first needed.
346    probabilities: Option<Probabilities>,
347    /// Members of this community, by original vertex id. Built only
348    /// when we need to compute probabilities.
349    members: Vec<u32>,
350}
351
352#[derive(Clone)]
353struct NeighborEntry {
354    c1: u32,
355    c2: u32,
356    /// Lower-bound `Δσ` until `exact` is set.
357    delta_sigma: f64,
358    /// True once `delta_sigma` has been refined via the exact
359    /// L2-distance formula.
360    exact: bool,
361    /// Sum of original edge weights between c1 and c2.
362    weight: f64,
363    /// `false` means this neighbour entry has been superseded by a
364    /// merge and should be skipped on heap pop.
365    alive: bool,
366}
367
368struct Communities {
369    g: InternalGraph,
370    steps: u32,
371    nodes: Vec<CommunityState>,
372    /// Stable storage for neighbour entries. Indices into this vec are
373    /// used as `NeighborId`.
374    neighbors_pool: Vec<NeighborEntry>,
375    /// For each community id, sorted list of `(other_id, neighbor_id)`
376    /// adjacencies. Always `other_id != id`. We keep both halves so we
377    /// can iterate from either community.
378    adj: Vec<Vec<(u32, u32)>>,
379    /// Min-heap on `delta_sigma` of alive neighbour entries.
380    heap: Vec<u32>,
381    merges: Vec<[u32; 2]>,
382    modularity_trajectory: Vec<f64>,
383}
384
385// ---------------------------------------------------------------------
386// Internal: min-heap on `neighbors_pool[id].delta_sigma`
387// ---------------------------------------------------------------------
388
389fn heap_key(pool: &[NeighborEntry], id: u32) -> f64 {
390    pool[id as usize].delta_sigma
391}
392
393fn heap_push(heap: &mut Vec<u32>, pool: &[NeighborEntry], id: u32) {
394    heap.push(id);
395    let last = heap.len() - 1;
396    sift_up(heap, pool, last);
397}
398
399fn heap_pop(heap: &mut Vec<u32>, pool: &[NeighborEntry]) -> Option<u32> {
400    let last = heap.pop()?;
401    if heap.is_empty() {
402        return Some(last);
403    }
404    let top = std::mem::replace(&mut heap[0], last);
405    sift_down(heap, pool, 0);
406    Some(top)
407}
408
409fn sift_up(heap: &mut [u32], pool: &[NeighborEntry], mut i: usize) {
410    while i > 0 {
411        let parent = (i - 1) / 2;
412        if heap_key(pool, heap[i]) < heap_key(pool, heap[parent]) {
413            heap.swap(i, parent);
414            i = parent;
415        } else {
416            break;
417        }
418    }
419}
420
421fn sift_down(heap: &mut [u32], pool: &[NeighborEntry], mut i: usize) {
422    let n = heap.len();
423    loop {
424        let l = 2 * i + 1;
425        let r = 2 * i + 2;
426        let mut smallest = i;
427        if l < n && heap_key(pool, heap[l]) < heap_key(pool, heap[smallest]) {
428            smallest = l;
429        }
430        if r < n && heap_key(pool, heap[r]) < heap_key(pool, heap[smallest]) {
431            smallest = r;
432        }
433        if smallest == i {
434            break;
435        }
436        heap.swap(i, smallest);
437        i = smallest;
438    }
439}
440
441// ---------------------------------------------------------------------
442// Top-level driver (placeholder — full body lands in Step 4).
443// ---------------------------------------------------------------------
444
445fn run(graph: &Graph, weights: Option<&[f64]>, steps: u32) -> IgraphResult<WalktrapResult> {
446    let g = build_internal_graph(graph, weights)?;
447    drive(g, graph, weights, steps)
448}
449
450fn drive(
451    g: InternalGraph,
452    graph: &Graph,
453    weights: Option<&[f64]>,
454    steps: u32,
455) -> IgraphResult<WalktrapResult> {
456    let n = g.n;
457
458    // Trivial cases.
459    if n == 0 {
460        return Ok(WalktrapResult {
461            membership: Vec::new(),
462            nb_clusters: 0,
463            merges: Vec::new(),
464            modularity: vec![f64::NAN],
465        });
466    }
467    if g.raw_total_weight <= 0.0 {
468        // Edgeless: identity membership, NaN modularity, no merges.
469        return Ok(WalktrapResult {
470            membership: (0..n).collect(),
471            nb_clusters: n,
472            merges: Vec::new(),
473            modularity: vec![f64::NAN],
474        });
475    }
476
477    // Build initial singleton communities (one per vertex).
478    let mut nodes: Vec<CommunityState> = Vec::with_capacity(2 * n as usize);
479    for v in 0..n as usize {
480        // For the singleton community {v}, the original-graph internal
481        // weight equals the synthesised loop minus the synthesised
482        // contribution. We need the *original* self-loop weight if any.
483        // We compute these from `graph + weights` directly to keep
484        // book-keeping simple and unambiguous.
485        nodes.push(CommunityState {
486            active: true,
487            size: 1,
488            internal_weight: 0.0,
489            total_weight: 0.0,
490            probabilities: None,
491            members: vec![v as u32],
492        });
493    }
494    // Fill total_weight and internal_weight from the original edges.
495    for eid in 0..graph.ecount() {
496        let (u, v) = graph.edge(eid as u32)?;
497        let w = match weights {
498            Some(ws) => ws[eid],
499            None => 1.0,
500        };
501        nodes[u as usize].total_weight += w;
502        if u == v {
503            nodes[u as usize].internal_weight += w;
504        }
505        if u != v {
506            nodes[v as usize].total_weight += w;
507        }
508    }
509
510    // Initialise neighbour list and heap.
511    let mut neighbors_pool: Vec<NeighborEntry> = Vec::new();
512    let mut adj: Vec<Vec<(u32, u32)>> = vec![Vec::new(); (2 * n) as usize];
513    let mut heap: Vec<u32> = Vec::new();
514
515    // Build community-level adjacency: collapse parallel edges between
516    // distinct endpoints; skip self-loops (a self-loop is internal to
517    // the singleton and not a neighbour pair).
518    use std::collections::BTreeMap;
519    let mut pair_weight: BTreeMap<(u32, u32), f64> = BTreeMap::new();
520    for eid in 0..graph.ecount() {
521        let (u, v) = graph.edge(eid as u32)?;
522        if u == v {
523            continue;
524        }
525        let w = match weights {
526            Some(ws) => ws[eid],
527            None => 1.0,
528        };
529        let (a, b) = if u < v { (u, v) } else { (v, u) };
530        *pair_weight.entry((a, b)).or_insert(0.0) += w;
531    }
532
533    for ((c1, c2), w) in pair_weight {
534        // Lower-bound Δσ = -1 / min(|adj(c1)| as community, |adj(c2)|).
535        // At the singleton level upstream uses 1/min(deg(c1), deg(c2))
536        // where degrees come from the synthesised adjacency.
537        let d1 = g.vertices[c1 as usize].len() as f64;
538        let d2 = g.vertices[c2 as usize].len() as f64;
539        let ds_lower = -1.0 / d1.min(d2);
540        let id = neighbors_pool.len() as u32;
541        neighbors_pool.push(NeighborEntry {
542            c1,
543            c2,
544            delta_sigma: ds_lower,
545            exact: false,
546            weight: w,
547            alive: true,
548        });
549        adj[c1 as usize].push((c2, id));
550        adj[c2 as usize].push((c1, id));
551        heap_push(&mut heap, &neighbors_pool, id);
552    }
553    for v in 0..(2 * n) as usize {
554        adj[v].sort_by_key(|&(other, _)| other);
555    }
556
557    let mut comms = Communities {
558        g,
559        steps,
560        nodes,
561        neighbors_pool,
562        adj,
563        heap,
564        merges: Vec::new(),
565        modularity_trajectory: Vec::new(),
566    };
567
568    // Initial modularity (all singletons): Q_0 = -Σ a_c² where
569    // a_c = total_weight_c / (2m). For loop-free graphs the diagonal
570    // term Σ e_cc = Σ loop_weight / m which is 0; the canonical
571    // Walktrap output matches this.
572    let m = graph_total_edge_weight(graph, weights);
573    comms
574        .modularity_trajectory
575        .push(initial_modularity(&comms, m));
576
577    // Greedy agglomeration loop: pop neighbour entries until none
578    // remain; refine non-exact entries on the fly.
579    while let Some(id) = pop_exact(&mut comms) {
580        merge_pair(&mut comms, id, m);
581    }
582
583    // Recover the partition at argmax modularity.
584    let merges = comms.merges.clone();
585    let modularity = comms.modularity_trajectory.clone();
586    let mut packed = membership_at_best_modularity(n, &merges, &modularity);
587    let nb_clusters = densify_membership(&mut packed);
588
589    Ok(WalktrapResult {
590        membership: packed,
591        nb_clusters,
592        merges,
593        modularity,
594    })
595}
596
597fn graph_total_edge_weight(graph: &Graph, weights: Option<&[f64]>) -> f64 {
598    if let Some(ws) = weights {
599        ws.iter().copied().sum()
600    } else {
601        graph.ecount() as f64
602    }
603}
604
605fn initial_modularity(comms: &Communities, m: f64) -> f64 {
606    if m <= 0.0 {
607        return f64::NAN;
608    }
609    let two_m = 2.0 * m;
610    let mut q = 0.0f64;
611    for c in &comms.nodes {
612        if !c.active {
613            continue;
614        }
615        let a = c.total_weight / two_m;
616        q += c.internal_weight / m - a * a;
617    }
618    q
619}
620
621fn pop_exact(comms: &mut Communities) -> Option<u32> {
622    loop {
623        let id = heap_pop(&mut comms.heap, &comms.neighbors_pool)?;
624        if !comms.neighbors_pool[id as usize].alive {
625            continue;
626        }
627        if comms.neighbors_pool[id as usize].exact {
628            return Some(id);
629        }
630        // Refine: compute the exact Δσ via the L2 distance.
631        refine_delta_sigma(comms, id);
632        comms.neighbors_pool[id as usize].exact = true;
633        heap_push(&mut comms.heap, &comms.neighbors_pool, id);
634    }
635}
636
637fn refine_delta_sigma(comms: &mut Communities, id: u32) {
638    let entry = comms.neighbors_pool[id as usize].clone();
639    let c1 = entry.c1 as usize;
640    let c2 = entry.c2 as usize;
641    ensure_probabilities(comms, entry.c1);
642    ensure_probabilities(comms, entry.c2);
643    let s1 = f64::from(comms.nodes[c1].size);
644    let s2 = f64::from(comms.nodes[c2].size);
645    let p1 = comms.nodes[c1]
646        .probabilities
647        .as_ref()
648        .map_or_else(Vec::new, std::clone::Clone::clone);
649    let p2 = comms.nodes[c2]
650        .probabilities
651        .as_ref()
652        .map_or_else(Vec::new, std::clone::Clone::clone);
653    let d2 = distance_sq(&comms.g, &p1, &p2);
654    // Pons-Latapy: Δσ_{c1, c2} = (s1 * s2 / (s1 + s2)) * d² / N where
655    // N = Σ_v total_weight[v] (the global walk normaliser). We use the
656    // factor exactly as in walktrap_communities.cpp.
657    let n_global = comms.g.total_weight_global;
658    let delta = (s1 * s2 / (s1 + s2)) * d2 / n_global;
659    comms.neighbors_pool[id as usize].delta_sigma = delta;
660}
661
662fn ensure_probabilities(comms: &mut Communities, c: u32) {
663    if comms.nodes[c as usize].probabilities.is_some() {
664        return;
665    }
666    let members = comms.nodes[c as usize].members.clone();
667    let p = compute_probabilities(&comms.g, &members, comms.steps);
668    comms.nodes[c as usize].probabilities = Some(p);
669}
670
671fn merge_pair(comms: &mut Communities, id: u32, m: f64) {
672    let entry = comms.neighbors_pool[id as usize].clone();
673    comms.neighbors_pool[id as usize].alive = false;
674    let c1 = entry.c1;
675    let c2 = entry.c2;
676    let new_id = comms.nodes.len() as u32;
677
678    // Build the merged community.
679    let size = comms.nodes[c1 as usize].size + comms.nodes[c2 as usize].size;
680    let internal = comms.nodes[c1 as usize].internal_weight
681        + comms.nodes[c2 as usize].internal_weight
682        + entry.weight;
683    let total = comms.nodes[c1 as usize].total_weight + comms.nodes[c2 as usize].total_weight;
684    let mut members = Vec::with_capacity(
685        comms.nodes[c1 as usize].members.len() + comms.nodes[c2 as usize].members.len(),
686    );
687    members.extend_from_slice(&comms.nodes[c1 as usize].members);
688    members.extend_from_slice(&comms.nodes[c2 as usize].members);
689
690    // Cache P^t for the merged community using the size-weighted blend.
691    let p_merged: Option<Probabilities> = if let (Some(p1), Some(p2)) = (
692        comms.nodes[c1 as usize].probabilities.as_ref(),
693        comms.nodes[c2 as usize].probabilities.as_ref(),
694    ) {
695        let s1 = f64::from(comms.nodes[c1 as usize].size);
696        let s2 = f64::from(comms.nodes[c2 as usize].size);
697        let denom = s1 + s2;
698        let mut p = vec![0.0f64; comms.g.n as usize];
699        for v in 0..comms.g.n as usize {
700            p[v] = (s1 * p1[v] + s2 * p2[v]) / denom;
701        }
702        Some(p)
703    } else {
704        None
705    };
706
707    comms.nodes.push(CommunityState {
708        active: true,
709        size,
710        internal_weight: internal,
711        total_weight: total,
712        probabilities: p_merged,
713        members,
714    });
715    comms.adj.push(Vec::new());
716    comms.nodes[c1 as usize].active = false;
717    comms.nodes[c2 as usize].active = false;
718    // Free old probability caches to keep peak memory bounded.
719    comms.nodes[c1 as usize].probabilities = None;
720    comms.nodes[c2 as usize].probabilities = None;
721
722    // Rewire adjacency: combine c1's and c2's neighbour lists into the
723    // new community. For each k adjacent to c1, c2, or both: create one
724    // new neighbour entry between new_id and k. Mark the old c1↔k and
725    // c2↔k entries as dead.
726    use std::collections::BTreeMap;
727    let mut combined: BTreeMap<u32, (f64, Option<f64>, Option<f64>)> = BTreeMap::new();
728    // Format: combined[k] = (weight_sum, ds_via_c1, ds_via_c2)
729    for &(k, nid) in &comms.adj[c1 as usize] {
730        let entry_k = &comms.neighbors_pool[nid as usize];
731        if !entry_k.alive {
732            continue;
733        }
734        let ds_known = if entry_k.exact {
735            Some(entry_k.delta_sigma)
736        } else {
737            None
738        };
739        let slot = combined.entry(k).or_insert((0.0, None, None));
740        slot.0 += entry_k.weight;
741        slot.1 = ds_known;
742    }
743    for &(k, nid) in &comms.adj[c2 as usize] {
744        let entry_k = &comms.neighbors_pool[nid as usize];
745        if !entry_k.alive {
746            continue;
747        }
748        let ds_known = if entry_k.exact {
749            Some(entry_k.delta_sigma)
750        } else {
751            None
752        };
753        let slot = combined.entry(k).or_insert((0.0, None, None));
754        slot.0 += entry_k.weight;
755        slot.2 = ds_known;
756    }
757    // Mark all c1's / c2's edges as dead.
758    for &(_, nid) in &comms.adj[c1 as usize] {
759        comms.neighbors_pool[nid as usize].alive = false;
760    }
761    for &(_, nid) in &comms.adj[c2 as usize] {
762        comms.neighbors_pool[nid as usize].alive = false;
763    }
764
765    // Create the merged neighbour entries.
766    for (k, (w, ds_c1, ds_c2)) in combined {
767        if k == new_id || !comms.nodes[k as usize].active {
768            continue;
769        }
770        // Compose a lower bound for Δσ from the three-formula rule.
771        // For the chain cases we don't know an exact value, so fall back
772        // to the singleton-style `-1/min(|adj(new)|, |adj(k)|)` lower
773        // bound which is always ≤ the true value.
774        let (delta, exact) = if let (Some(d1), Some(d2)) = (ds_c1, ds_c2) {
775            // Triangle: both old neighbours were exact.
776            let s1 = f64::from(comms.nodes[c1 as usize].size);
777            let s2 = f64::from(comms.nodes[c2 as usize].size);
778            let sk = f64::from(comms.nodes[k as usize].size);
779            let new_s = s1 + s2;
780            let triangle =
781                ((s1 + sk) * d1 + (s2 + sk) * d2 - sk * entry.delta_sigma) / (new_s + sk);
782            (triangle, true)
783        } else {
784            let d_new = (comms.adj[c1 as usize].len() + comms.adj[c2 as usize].len()) as f64;
785            let d_k = comms.adj[k as usize].len() as f64;
786            let denom = d_new.min(d_k).max(1.0);
787            (-1.0 / denom, false)
788        };
789
790        let id_new = comms.neighbors_pool.len() as u32;
791        comms.neighbors_pool.push(NeighborEntry {
792            c1: new_id.min(k),
793            c2: new_id.max(k),
794            delta_sigma: delta,
795            exact,
796            weight: w,
797            alive: true,
798        });
799        comms.adj[new_id as usize].push((k, id_new));
800        comms.adj[k as usize].push((new_id, id_new));
801        heap_push(&mut comms.heap, &comms.neighbors_pool, id_new);
802    }
803    comms.adj[new_id as usize].sort_by_key(|&(o, _)| o);
804    comms.adj[c1 as usize].clear();
805    comms.adj[c2 as usize].clear();
806
807    // Record merge + modularity step.
808    comms.merges.push([c1, c2]);
809    let q_prev = comms.modularity_trajectory.last().copied().unwrap_or(0.0);
810    // ΔQ = (entry.weight / m) - 2 · a_c1 · a_c2 where a_c = total/2m.
811    let two_m = 2.0 * m;
812    let a1 = comms.nodes[c1 as usize].total_weight / two_m;
813    let a2 = comms.nodes[c2 as usize].total_weight / two_m;
814    let delta_q = entry.weight / m - 2.0 * a1 * a2;
815    comms.modularity_trajectory.push(q_prev + delta_q);
816}
817
818fn membership_at_best_modularity(n: u32, merges: &[[u32; 2]], modularity: &[f64]) -> Vec<u32> {
819    // Find the merge prefix that maximises modularity.
820    let mut best = 0usize;
821    let mut best_q = f64::NEG_INFINITY;
822    for (i, &q) in modularity.iter().enumerate() {
823        if q.is_finite() && q > best_q {
824            best_q = q;
825            best = i;
826        }
827    }
828    // Apply the first `best` merges to a union-find seeded at identity.
829    let mut parent: Vec<u32> = (0..n).collect();
830    let mut rep: Vec<u32> = (0..(n + best as u32)).collect();
831    fn find(rep: &mut [u32], x: u32) -> u32 {
832        let mut r = x;
833        while rep[r as usize] != r {
834            r = rep[r as usize];
835        }
836        let mut cur = x;
837        while rep[cur as usize] != r {
838            let nxt = rep[cur as usize];
839            rep[cur as usize] = r;
840            cur = nxt;
841        }
842        r
843    }
844    for (i, m) in merges.iter().take(best).enumerate() {
845        let new_id = n + i as u32;
846        let r1 = find(&mut rep, m[0]);
847        let r2 = find(&mut rep, m[1]);
848        rep[r1 as usize] = new_id;
849        rep[r2 as usize] = new_id;
850    }
851    for v in 0..n {
852        parent[v as usize] = find(&mut rep, v);
853    }
854    parent
855}
856
857fn densify_membership(membership: &mut [u32]) -> u32 {
858    use std::collections::BTreeMap;
859    let mut remap: BTreeMap<u32, u32> = BTreeMap::new();
860    let mut next = 0u32;
861    for v in membership.iter_mut() {
862        let id = *remap.entry(*v).or_insert_with(|| {
863            let n = next;
864            next += 1;
865            n
866        });
867        *v = id;
868    }
869    next
870}
871
872// ---------------------------------------------------------------------
873// Tests
874// ---------------------------------------------------------------------
875
876#[cfg(test)]
877mod tests {
878    use super::*;
879
880    fn near(a: f64, b: f64, tol: f64) -> bool {
881        (a - b).abs() <= tol || (a.is_nan() && b.is_nan())
882    }
883
884    fn vec_near(a: &[f64], b: &[f64], tol: f64) -> bool {
885        a.len() == b.len() && a.iter().zip(b).all(|(&x, &y)| near(x, y, tol))
886    }
887
888    fn triangle() -> Graph {
889        let mut g = Graph::with_vertices(3);
890        for &(u, v) in &[(0, 1), (1, 2), (2, 0)] {
891            g.add_edge(u, v).expect("add edge");
892        }
893        g
894    }
895
896    /// Mirrors `community_walktrap.out` "Basic test".
897    #[test]
898    fn c_basic_triangle() {
899        let g = triangle();
900        let r = walktrap(&g).expect("walktrap on triangle");
901        assert_eq!(r.merges, vec![[1, 2], [0, 3]]);
902        assert!(vec_near(
903            &r.modularity,
904            &[-1.0 / 3.0, -2.0 / 9.0, 0.0],
905            1e-12
906        ));
907        assert_eq!(r.membership, vec![0, 0, 0]);
908        assert_eq!(r.nb_clusters, 1);
909    }
910
911    /// Mirrors `community_walktrap.out` "Bug 2042".
912    #[test]
913    fn c_bug2042_single_weighted_edge_with_isolate() {
914        let mut g = Graph::with_vertices(3);
915        g.add_edge(0, 1).expect("add edge");
916        let r = walktrap_weighted(&g, &[0.2]).expect("walktrap weighted");
917        assert_eq!(r.merges, vec![[0, 1]]);
918        assert!(vec_near(&r.modularity, &[-0.5, 0.0], 1e-12));
919        // Membership labels are remapped to dense `0..k`; the
920        // partition is `{0,1}` and `{2}`.
921        assert_eq!(r.membership[0], r.membership[1]);
922        assert_ne!(r.membership[0], r.membership[2]);
923        assert_eq!(r.nb_clusters, 2);
924    }
925
926    /// Mirrors `community_walktrap.out` "Small weighted graph".
927    #[test]
928    fn c_ring6_weighted() {
929        let mut g = Graph::with_vertices(6);
930        for &(u, v) in &[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 0)] {
931            g.add_edge(u, v).expect("add edge");
932        }
933        let weights = [1.0_f64, 0.5, 0.25, 0.75, 1.25, 1.5];
934        let r = walktrap_weighted(&g, &weights).expect("walktrap weighted");
935        assert_eq!(r.merges, vec![[3, 4], [1, 2], [0, 5], [7, 8], [6, 9]]);
936        let expected_mod = [
937            -0.196_145_124_716_553_3,
938            -0.089_569_160_997_732_43,
939            -0.014_739_229_024_943_318,
940            0.146_258_503_401_360_5,
941            0.122_448_979_591_836_7,
942            0.0, // zapsmall(1e-15) in C output
943        ];
944        assert_eq!(r.modularity.len(), expected_mod.len());
945        for (a, b) in r.modularity.iter().zip(&expected_mod) {
946            assert!(near(*a, *b, 1e-12), "mod mismatch: {a} vs {b}");
947        }
948        // Argmax index = 3 → membership at that cut:
949        //   merge 0: c3,c4 → c6; merge 1: c1,c2 → c7; merge 2: c0,c5 → c8.
950        // Three communities at cut: {0,5} {1,2} {3,4}. Densify-friendly assertions:
951        assert_eq!(r.nb_clusters, 3);
952        assert_eq!(r.membership[0], r.membership[5]);
953        assert_eq!(r.membership[1], r.membership[2]);
954        assert_eq!(r.membership[3], r.membership[4]);
955        assert_ne!(r.membership[0], r.membership[1]);
956        assert_ne!(r.membership[0], r.membership[3]);
957        assert_ne!(r.membership[1], r.membership[3]);
958    }
959
960    /// Mirrors `community_walktrap.out` "Isolated vertices".
961    #[test]
962    fn c_five_isolated() {
963        let g = Graph::with_vertices(5);
964        let r = walktrap(&g).expect("walktrap isolated");
965        assert!(r.merges.is_empty());
966        assert_eq!(r.modularity.len(), 1);
967        assert!(r.modularity[0].is_nan());
968        assert_eq!(r.membership, vec![0, 1, 2, 3, 4]);
969        assert_eq!(r.nb_clusters, 5);
970    }
971
972    #[test]
973    fn rejects_directed_graph() {
974        let g = Graph::new(3, true).expect("directed graph");
975        assert!(matches!(walktrap(&g), Err(IgraphError::Unsupported(_))));
976    }
977
978    #[test]
979    fn rejects_steps_zero() {
980        let g = triangle();
981        let opts = WalktrapOptions { steps: 0 };
982        assert!(matches!(
983            walktrap_with_options(&g, None, opts),
984            Err(IgraphError::InvalidArgument(_))
985        ));
986    }
987
988    #[test]
989    fn rejects_negative_weight() {
990        let g = triangle();
991        let w = [1.0_f64, -0.1, 1.0];
992        assert!(matches!(
993            walktrap_weighted(&g, &w),
994            Err(IgraphError::InvalidArgument(_))
995        ));
996    }
997
998    #[test]
999    fn rejects_weight_length_mismatch() {
1000        let g = triangle();
1001        let w = [1.0_f64, 1.0];
1002        assert!(matches!(
1003            walktrap_weighted(&g, &w),
1004            Err(IgraphError::InvalidArgument(_))
1005        ));
1006    }
1007
1008    #[test]
1009    fn rejects_nan_weight() {
1010        let g = triangle();
1011        let w = [1.0_f64, f64::NAN, 1.0];
1012        assert!(matches!(
1013            walktrap_weighted(&g, &w),
1014            Err(IgraphError::InvalidArgument(_))
1015        ));
1016    }
1017
1018    #[test]
1019    fn empty_graph_returns_empty_membership() {
1020        let g = Graph::with_vertices(0);
1021        let r = walktrap(&g).expect("walktrap on empty graph");
1022        assert!(r.membership.is_empty());
1023        assert_eq!(r.nb_clusters, 0);
1024        assert!(r.merges.is_empty());
1025        assert_eq!(r.modularity.len(), 1);
1026        assert!(r.modularity[0].is_nan());
1027    }
1028
1029    #[test]
1030    fn single_vertex_no_edges() {
1031        let g = Graph::with_vertices(1);
1032        let r = walktrap(&g).expect("walktrap on single vertex");
1033        assert_eq!(r.membership, vec![0]);
1034        assert_eq!(r.nb_clusters, 1);
1035        assert!(r.merges.is_empty());
1036        // m == 0 → NaN trajectory.
1037        assert!(r.modularity[0].is_nan());
1038    }
1039
1040    /// Two K4 cliques bridged by a single light edge should split into
1041    /// two communities at the optimal modularity cut.
1042    #[test]
1043    fn two_k4_with_bridge() {
1044        let mut g = Graph::with_vertices(8);
1045        for &(u, v) in &[
1046            (0, 1),
1047            (0, 2),
1048            (0, 3),
1049            (1, 2),
1050            (1, 3),
1051            (2, 3),
1052            (4, 5),
1053            (4, 6),
1054            (4, 7),
1055            (5, 6),
1056            (5, 7),
1057            (6, 7),
1058            (3, 4),
1059        ] {
1060            g.add_edge(u, v).expect("add edge");
1061        }
1062        let r = walktrap(&g).expect("walktrap");
1063        assert_eq!(r.nb_clusters, 2);
1064        for v in 0..4u32 {
1065            assert_eq!(r.membership[v as usize], r.membership[0]);
1066        }
1067        for v in 4..8u32 {
1068            assert_eq!(r.membership[v as usize], r.membership[4]);
1069        }
1070        assert_ne!(r.membership[0], r.membership[4]);
1071    }
1072
1073    #[test]
1074    fn multi_edges_are_folded_by_weight_sum() {
1075        // Two K3s + a doubled bridge. Multi-edge handling shouldn't
1076        // change the partition.
1077        let mut g = Graph::with_vertices(6);
1078        for &(u, v) in &[
1079            (0, 1),
1080            (0, 2),
1081            (1, 2),
1082            (3, 4),
1083            (3, 5),
1084            (4, 5),
1085            (2, 3),
1086            (2, 3), // doubled bridge
1087        ] {
1088            g.add_edge(u, v).expect("add edge");
1089        }
1090        let r = walktrap(&g).expect("walktrap");
1091        // Smoke: must succeed and produce a valid partition.
1092        assert_eq!(r.membership.len(), 6);
1093        assert!((1..=6).contains(&r.nb_clusters));
1094    }
1095
1096    #[cfg(all(test, feature = "proptest-harness"))]
1097    mod prop {
1098        use super::*;
1099        use proptest::prelude::*;
1100
1101        prop_compose! {
1102            fn small_undirected_graph()(n in 2u32..=8u32, edges_seed in any::<u64>()) -> Graph {
1103                let mut g = Graph::with_vertices(n);
1104                let mut rng = edges_seed;
1105                let target_m = ((n * (n - 1)) / 2).min(n + 4) as usize;
1106                let mut added = 0usize;
1107                let mut guard = 0usize;
1108                while added < target_m && guard < target_m * 8 {
1109                    rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
1110                    let u = ((rng >> 33) % u64::from(n)) as u32;
1111                    rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
1112                    let v = ((rng >> 33) % u64::from(n)) as u32;
1113                    guard += 1;
1114                    if u == v { continue; }
1115                    if g.add_edge(u, v).is_ok() { added += 1; }
1116                }
1117                g
1118            }
1119        }
1120
1121        proptest! {
1122            #[test]
1123            fn walktrap_partition_is_valid(g in small_undirected_graph()) {
1124                let n = g.vcount();
1125                let r = walktrap(&g).expect("walktrap");
1126                prop_assert_eq!(r.membership.len(), n as usize);
1127                // Densified labels live in 0..nb_clusters.
1128                for &c in &r.membership {
1129                    prop_assert!(c < r.nb_clusters);
1130                }
1131                // Modularity trajectory length matches merges + 1.
1132                if g.ecount() > 0 {
1133                    prop_assert_eq!(r.modularity.len(), r.merges.len() + 1);
1134                    for &q in &r.modularity {
1135                        prop_assert!(q.is_finite(), "modularity must be finite for connected weighted ops");
1136                    }
1137                }
1138            }
1139
1140            #[test]
1141            fn walktrap_steps_in_range_does_not_crash(
1142                g in small_undirected_graph(),
1143                steps in 1u32..=8,
1144            ) {
1145                let r = walktrap_with_options(&g, None, WalktrapOptions { steps })
1146                    .expect("walktrap with options");
1147                prop_assert_eq!(r.membership.len(), g.vcount() as usize);
1148            }
1149        }
1150    }
1151}