Skip to main content

rust_igraph/algorithms/community/
fast_greedy_modularity.rs

1//! Fast greedy modularity community detection (ALGO-CO-007).
2//!
3//! Counterpart of `igraph_community_fastgreedy()` from
4//! `references/igraph/src/community/fast_modularity.c`.
5//!
6//! Clauset-Newman-Moore (2004): start with every vertex its own
7//! community, then greedily merge the community-pair whose merger
8//! yields the largest improvement in Newman-Girvan modularity. The
9//! Wakita-Tsurumi (2007) data-structure trick maintains a global
10//! max-heap over per-community max-`ΔQ` so each merge runs in
11//! `O((|N(to)| + |N(from)|) log V)`.
12//!
13//! References:
14//! - A. Clauset, M. E. J. Newman, C. Moore. *Finding community
15//!   structure in very large networks*, Phys. Rev. E 70 (2004).
16//!   <https://doi.org/10.1103/PhysRevE.70.066111>
17//! - K. Wakita, T. Tsurumi. *Finding community structure in mega-scale
18//!   social networks*, arXiv:cs/0702048.
19//!
20//! Phase-1 scope: **undirected**, optionally weighted with non-negative
21//! finite weights; rejects multi-edges (matches the upstream constraint)
22//! and directed graphs. Self-loops are accepted and contribute to the
23//! `loop_weight_sum` term, exactly as in C.
24//!
25//! Per-pair `ΔQ` update rules on each merge of `from → to`:
26//! - **Triangle** (both `to` and `from` already know `k`): the new
27//!   pair `(to, k)` has `ΔQ' = ΔQ(to, k) + ΔQ(from, k)`.
28//! - **Chain case 1** (only `to` knows `k`): `ΔQ' = ΔQ(to, k) -
29//!   2 · a[from] · a[k]` (the old `from`-`k` non-edge is now part of
30//!   `to`'s neighbourhood with implicit edge weight 0).
31//! - **Chain case 2** (only `from` knows `k`): `ΔQ' = ΔQ(from, k) -
32//!   2 · a[to] · a[k]`.
33//!
34//! `a[c]` is the fraction of total edge weight incident to community
35//! `c`. The running modularity `q` is updated as `q += ΔQ` on each
36//! accepted merge. The C drives the merge to completion (full
37//! dendrogram of `n-1` rows) and reports the partition at the highest
38//! Q-step; we do the same.
39//!
40//! Complexity: `O(|E| · log V)` per merge × up to `|V|` merges, so
41//! `O(|V| · |E| · log V)` worst case — matches the C reference.
42
43#![allow(
44    clippy::cast_possible_truncation,
45    clippy::cast_possible_wrap,
46    clippy::cast_precision_loss,
47    clippy::cast_sign_loss,
48    clippy::float_cmp,
49    clippy::items_after_statements,
50    clippy::many_single_char_names,
51    clippy::needless_range_loop,
52    clippy::too_many_lines
53)]
54
55use std::cmp::Ordering;
56use std::collections::{BTreeMap, BTreeSet, BinaryHeap};
57
58use crate::algorithms::properties::multiplicity::has_multiple;
59use crate::core::{Graph, IgraphError, IgraphResult};
60
61/// Result of [`fast_greedy_modularity`] / [`fast_greedy_modularity_weighted`].
62#[derive(Debug, Clone)]
63pub struct FastGreedyResult {
64    /// Per-vertex community label of the best-modularity partition,
65    /// densified to `0..nb_clusters`.
66    pub membership: Vec<u32>,
67    /// Number of distinct communities in `membership`.
68    pub nb_clusters: u32,
69    /// Merges in dendrogram order. Each row `[c1, c2]` merges clusters
70    /// `c1` and `c2` into the new cluster `n + i` where `i` is the
71    /// merge index. Same encoding as
72    /// `igraph_community_fastgreedy()` / Walktrap / EB.
73    pub merges: Vec<[u32; 2]>,
74    /// Modularity trajectory. `modularity[i]` is the modularity *after*
75    /// `i` merges have been applied to the all-singletons start
76    /// partition. Length = `merges.len() + 1`. For an edgeless graph
77    /// the single entry is `NaN`, matching the C convention.
78    pub modularity: Vec<f64>,
79}
80
81/// Run fast greedy modularity community detection on `graph` (unweighted).
82///
83/// # Errors
84/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
85/// - [`IgraphError::InvalidArgument`] if the graph has multi-edges.
86///
87/// # Examples
88///
89/// ```
90/// use rust_igraph::{Graph, fast_greedy_modularity};
91///
92/// // Two K5 cliques joined by a single bridge edge (0,5).
93/// let mut g = Graph::with_vertices(10);
94/// for &(u, v) in &[
95///     (0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4),
96///     (2, 3), (2, 4), (3, 4),
97///     (5, 6), (5, 7), (5, 8), (5, 9), (6, 7), (6, 8), (6, 9),
98///     (7, 8), (7, 9), (8, 9),
99///     (0, 5),
100/// ] {
101///     g.add_edge(u, v).unwrap();
102/// }
103/// let r = fast_greedy_modularity(&g).unwrap();
104/// assert_eq!(r.nb_clusters, 2);
105/// assert!((r.modularity.iter().copied().fold(f64::NEG_INFINITY, f64::max) - 0.452_381).abs() < 1e-5);
106/// ```
107pub fn fast_greedy_modularity(graph: &Graph) -> IgraphResult<FastGreedyResult> {
108    fast_greedy_modularity_impl(graph, None)
109}
110
111/// Run fast greedy modularity community detection on `graph` with edge `weights`.
112///
113/// `weights[i]` is the weight of edge id `i`; length must equal
114/// `graph.ecount()`. Weights must be finite and non-negative.
115///
116/// # Errors
117/// - [`IgraphError::Unsupported`] if `graph.is_directed()`.
118/// - [`IgraphError::InvalidArgument`] if the graph has multi-edges,
119///   `weights.len() != graph.ecount()`, or any weight is negative / NaN.
120pub fn fast_greedy_modularity_weighted(
121    graph: &Graph,
122    weights: &[f64],
123) -> IgraphResult<FastGreedyResult> {
124    fast_greedy_modularity_impl(graph, Some(weights))
125}
126
127fn fast_greedy_modularity_impl(
128    graph: &Graph,
129    weights: Option<&[f64]>,
130) -> IgraphResult<FastGreedyResult> {
131    if graph.is_directed() {
132        return Err(IgraphError::Unsupported(
133            "fast_greedy_modularity is undirected-only; directed variant is a follow-up AWU",
134        ));
135    }
136    let n = graph.vcount();
137    let n_us = n as usize;
138    let m_us = graph.ecount();
139
140    if let Some(w) = weights {
141        if w.len() != m_us {
142            return Err(IgraphError::InvalidArgument(
143                "weights length must equal graph.ecount()".into(),
144            ));
145        }
146        for &x in w {
147            if x.is_nan() {
148                return Err(IgraphError::InvalidArgument(
149                    "weights must not be NaN".into(),
150                ));
151            }
152            if x < 0.0 {
153                return Err(IgraphError::InvalidArgument(
154                    "weights must be non-negative".into(),
155                ));
156            }
157        }
158    }
159    if has_multiple(graph)? {
160        return Err(IgraphError::InvalidArgument(
161            "fast_greedy_modularity requires graphs without multi-edges".into(),
162        ));
163    }
164
165    if n == 0 {
166        return Ok(FastGreedyResult {
167            membership: Vec::new(),
168            nb_clusters: 0,
169            merges: Vec::new(),
170            modularity: Vec::new(),
171        });
172    }
173
174    // a[c] starts as raw incident strength; normalised by 2W below.
175    let mut a = vec![0.0_f64; n_us];
176    let mut loop_weight_sum = 0.0_f64;
177    let mut weight_sum = 0.0_f64;
178
179    for e in 0..m_us {
180        let (u, v) = graph.edge(e as u32)?;
181        let w = match weights {
182            Some(ws) => ws[e],
183            None => 1.0,
184        };
185        weight_sum += w;
186        a[u as usize] += w;
187        a[v as usize] += w;
188        if u == v {
189            loop_weight_sum += 2.0 * w;
190        }
191    }
192
193    if m_us == 0 {
194        // No edges: all singletons; Q undefined → NaN (matches C).
195        return Ok(FastGreedyResult {
196            membership: (0..n).collect(),
197            nb_clusters: n,
198            merges: Vec::new(),
199            modularity: vec![f64::NAN],
200        });
201    }
202
203    let two_w = 2.0 * weight_sum;
204    // a[i] /= 2W
205    if two_w > 0.0 {
206        for slot in &mut a {
207            *slot /= two_w;
208        }
209    }
210
211    // Per-community sorted neighbour map (neighbour id -> ΔQ).
212    let mut neis: Vec<BTreeMap<u32, f64>> = vec![BTreeMap::new(); n_us];
213
214    // Initial ΔQ for each non-loop edge.
215    for e in 0..m_us {
216        let (u, v) = graph.edge(e as u32)?;
217        if u == v {
218            continue;
219        }
220        let w = match weights {
221            Some(ws) => ws[e],
222            None => 1.0,
223        };
224        // dq = 2 * (w / 2W - a[u] * a[v])   (after a has been normalised by 2W)
225        let dq = 2.0 * (w / two_w - a[u as usize] * a[v as usize]);
226        // No multi-edges by precondition: assert by inserting.
227        neis[u as usize].insert(v, dq);
228        neis[v as usize].insert(u, dq);
229    }
230
231    // Initial modularity: loop_weight_sum / 2W - sum(a^2).
232    let mut q = if two_w > 0.0 {
233        let mut s = loop_weight_sum / two_w;
234        for &ai in &a {
235            s -= ai * ai;
236        }
237        s
238    } else {
239        0.0
240    };
241
242    // Build heap of (dq, c1, c2) for each canonical (c1 < c2) live edge.
243    let mut heap: BinaryHeap<HeapEntry> = BinaryHeap::new();
244    for c in 0..n_us {
245        for (&k, &dq) in &neis[c] {
246            if (c as u32) < k {
247                heap.push(HeapEntry {
248                    dq,
249                    c1: c as u32,
250                    c2: k,
251                });
252            }
253        }
254    }
255
256    let mut alive = vec![true; n_us];
257    let mut size = vec![1_u32; n_us];
258    let mut id: Vec<u32> = (0..n).collect();
259
260    let total_merges_cap = n_us.saturating_sub(1);
261    let mut merges: Vec<[u32; 2]> = Vec::with_capacity(total_merges_cap);
262    let mut modularity_traj: Vec<f64> = Vec::with_capacity(total_merges_cap + 1);
263    modularity_traj.push(q);
264
265    let mut best_q = q;
266    let mut best_n_merges = 0_usize;
267
268    while merges.len() < total_merges_cap {
269        // Pop until we find a non-stale entry.
270        let chosen = loop {
271            let Some(e) = heap.pop() else {
272                break None;
273            };
274            if !alive[e.c1 as usize] || !alive[e.c2 as usize] {
275                continue;
276            }
277            // Validate the stored dq still matches.
278            if let Some(&cur) = neis[e.c1 as usize].get(&e.c2) {
279                if cur == e.dq {
280                    break Some(e);
281                }
282            }
283        };
284        let Some(entry) = chosen else {
285            // No more inter-community edges (disconnected components remain).
286            break;
287        };
288
289        // Choose `to` = smaller id, `from` = larger id. This matches the
290        // common convention; the dendrogram encoding stores [to, from].
291        let (to, from) = if entry.c1 < entry.c2 {
292            (entry.c1, entry.c2)
293        } else {
294            (entry.c2, entry.c1)
295        };
296
297        q += entry.dq;
298
299        // Take both neighbour maps out so we can iterate freely without
300        // overlapping borrows.
301        let to_neis = std::mem::take(&mut neis[to as usize]);
302        let from_neis = std::mem::take(&mut neis[from as usize]);
303
304        // Collect the union of neighbour keys, skipping the two endpoints.
305        let mut all_keys: BTreeSet<u32> = BTreeSet::new();
306        for &k in to_neis.keys() {
307            if k != from {
308                all_keys.insert(k);
309            }
310        }
311        for &k in from_neis.keys() {
312            if k != to {
313                all_keys.insert(k);
314            }
315        }
316
317        let mut new_to_neis: BTreeMap<u32, f64> = BTreeMap::new();
318        for k in all_keys {
319            let in_to = to_neis.get(&k).copied();
320            let in_from = from_neis.get(&k).copied();
321            let new_dq = match (in_to, in_from) {
322                (Some(t), Some(f)) => t + f, // triangle
323                (Some(t), None) => t - 2.0 * a[from as usize] * a[k as usize], // chain 1
324                (None, Some(f)) => f - 2.0 * a[to as usize] * a[k as usize], // chain 2
325                (None, None) => unreachable!(),
326            };
327            new_to_neis.insert(k, new_dq);
328
329            // Mirror in k's map: drop `from`, set `to` -> new_dq.
330            let km = &mut neis[k as usize];
331            km.remove(&from);
332            km.insert(to, new_dq);
333
334            // Push the updated edge onto the heap in canonical (c1<c2) order.
335            let (c1, c2) = if to < k { (to, k) } else { (k, to) };
336            heap.push(HeapEntry { dq: new_dq, c1, c2 });
337        }
338        neis[to as usize] = new_to_neis;
339
340        alive[from as usize] = false;
341        size[to as usize] += size[from as usize];
342        a[to as usize] += a[from as usize];
343        a[from as usize] = 0.0;
344
345        merges.push([id[to as usize], id[from as usize]]);
346        id[to as usize] = u32::try_from(n_us)
347            .map_err(|_| IgraphError::Internal("vcount exceeds u32::MAX"))?
348            + u32::try_from(merges.len() - 1)
349                .map_err(|_| IgraphError::Internal("merges.len exceeds u32::MAX"))?;
350
351        modularity_traj.push(q);
352
353        if q > best_q {
354            best_q = q;
355            best_n_merges = merges.len();
356        }
357    }
358
359    // Build membership by applying the first best_n_merges merges to a
360    // singleton labelling. label[v] = current cluster label.
361    let mut label: Vec<u32> = (0..n).collect();
362    // Walk merges in chronological order; each merge produces a fresh
363    // cluster id n+i. Rewrite all vertices whose current label matches
364    // either side of the merge to the new id.
365    for (i, m) in merges.iter().take(best_n_merges).enumerate() {
366        let [c_a, c_b] = *m;
367        let new_id = n + u32::try_from(i)
368            .map_err(|_| IgraphError::Internal("merge index exceeds u32::MAX"))?;
369        for slot in &mut label {
370            if *slot == c_a || *slot == c_b {
371                *slot = new_id;
372            }
373        }
374    }
375
376    let (membership, nb_clusters) = densify_labels(&label);
377
378    Ok(FastGreedyResult {
379        membership,
380        nb_clusters,
381        merges,
382        modularity: modularity_traj,
383    })
384}
385
386fn densify_labels(labels: &[u32]) -> (Vec<u32>, u32) {
387    let mut remap: Vec<(u32, u32)> = Vec::new();
388    let mut out = Vec::with_capacity(labels.len());
389    for &l in labels {
390        let dense = if let Some(&(_, d)) = remap.iter().find(|&&(orig, _)| orig == l) {
391            d
392        } else {
393            let d = u32::try_from(remap.len()).expect("dense label fits u32");
394            remap.push((l, d));
395            d
396        };
397        out.push(dense);
398    }
399    let k = u32::try_from(remap.len()).expect("nb_clusters fits u32");
400    (out, k)
401}
402
403/// Heap entry: max-heap on `dq`, ties broken by smaller `(c1, c2)`
404/// winning, so the greedy choice is deterministic across runs.
405#[derive(Debug, Clone, Copy)]
406struct HeapEntry {
407    dq: f64,
408    c1: u32,
409    c2: u32,
410}
411
412impl PartialEq for HeapEntry {
413    fn eq(&self, other: &Self) -> bool {
414        self.dq.to_bits() == other.dq.to_bits() && self.c1 == other.c1 && self.c2 == other.c2
415    }
416}
417impl Eq for HeapEntry {}
418
419impl PartialOrd for HeapEntry {
420    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
421        Some(self.cmp(other))
422    }
423}
424
425impl Ord for HeapEntry {
426    fn cmp(&self, other: &Self) -> Ordering {
427        // primary: higher dq is "greater" (max-heap).
428        match self.dq.partial_cmp(&other.dq) {
429            Some(Ordering::Equal) | None => {
430                // tie: smaller (c1, c2) is "greater" so pops first.
431                other.c1.cmp(&self.c1).then_with(|| other.c2.cmp(&self.c2))
432            }
433            Some(o) => o,
434        }
435    }
436}
437
438#[cfg(test)]
439mod tests {
440    use super::*;
441
442    fn two_k5_bridge() -> Graph {
443        let mut g = Graph::with_vertices(10);
444        for u in 0..5u32 {
445            for v in (u + 1)..5 {
446                g.add_edge(u, v).expect("clique edge");
447            }
448        }
449        for u in 5..10u32 {
450            for v in (u + 1)..10 {
451                g.add_edge(u, v).expect("clique edge");
452            }
453        }
454        g.add_edge(0, 5).expect("bridge");
455        g
456    }
457
458    #[test]
459    fn empty_graph_returns_empty() {
460        let g = Graph::with_vertices(0);
461        let r = fast_greedy_modularity(&g).unwrap();
462        assert!(r.membership.is_empty());
463        assert_eq!(r.nb_clusters, 0);
464        assert!(r.merges.is_empty());
465        assert!(r.modularity.is_empty());
466    }
467
468    #[test]
469    fn edgeless_graph_yields_singletons_with_nan() {
470        let g = Graph::with_vertices(5);
471        let r = fast_greedy_modularity(&g).unwrap();
472        assert_eq!(r.membership, vec![0, 1, 2, 3, 4]);
473        assert_eq!(r.nb_clusters, 5);
474        assert!(r.merges.is_empty());
475        assert_eq!(r.modularity.len(), 1);
476        assert!(r.modularity[0].is_nan());
477    }
478
479    #[test]
480    fn single_vertex_no_edges() {
481        let g = Graph::with_vertices(1);
482        let r = fast_greedy_modularity(&g).unwrap();
483        assert_eq!(r.membership, vec![0]);
484        assert_eq!(r.nb_clusters, 1);
485        assert!(r.merges.is_empty());
486    }
487
488    #[test]
489    fn two_k5_bridge_q_matches_upstream() {
490        let g = two_k5_bridge();
491        let r = fast_greedy_modularity(&g).unwrap();
492        let best_q = r
493            .modularity
494            .iter()
495            .copied()
496            .fold(f64::NEG_INFINITY, f64::max);
497        assert!(
498            (best_q - 0.452_381).abs() < 1e-5,
499            "best Q = {best_q}, expected ≈ 0.452381"
500        );
501        assert_eq!(r.nb_clusters, 2);
502        // Vertices 0..4 share one community, 5..9 the other.
503        for v in 1..5 {
504            assert_eq!(r.membership[v], r.membership[0]);
505        }
506        for v in 6..10 {
507            assert_eq!(r.membership[v], r.membership[5]);
508        }
509        assert_ne!(r.membership[0], r.membership[5]);
510    }
511
512    #[test]
513    fn two_k5_bridge_with_uniform_weights_matches_unweighted() {
514        let g = two_k5_bridge();
515        let weights = vec![2.0_f64; g.ecount()];
516        let r = fast_greedy_modularity_weighted(&g, &weights).unwrap();
517        let best_q = r
518            .modularity
519            .iter()
520            .copied()
521            .fold(f64::NEG_INFINITY, f64::max);
522        assert!((best_q - 0.452_381).abs() < 1e-5);
523        assert_eq!(r.nb_clusters, 2);
524    }
525
526    #[test]
527    fn dendrogram_size_bounded_by_n_minus_1() {
528        let g = two_k5_bridge();
529        let r = fast_greedy_modularity(&g).unwrap();
530        assert!(r.merges.len() <= 9);
531        assert_eq!(r.modularity.len(), r.merges.len() + 1);
532    }
533
534    #[test]
535    fn two_disjoint_k4_yields_two_components() {
536        let mut g = Graph::with_vertices(8);
537        for u in 0..4u32 {
538            for v in (u + 1)..4 {
539                g.add_edge(u, v).unwrap();
540            }
541        }
542        for u in 4..8u32 {
543            for v in (u + 1)..8 {
544                g.add_edge(u, v).unwrap();
545            }
546        }
547        let r = fast_greedy_modularity(&g).unwrap();
548        // Disconnected → algorithm stops before crossing components.
549        assert_eq!(r.nb_clusters, 2);
550        for v in 1..4 {
551            assert_eq!(r.membership[v], r.membership[0]);
552        }
553        for v in 5..8 {
554            assert_eq!(r.membership[v], r.membership[4]);
555        }
556        assert_ne!(r.membership[0], r.membership[4]);
557    }
558
559    #[test]
560    fn rejects_directed_graph() {
561        let mut g = Graph::new(3, true).expect("3v directed graph");
562        g.add_edge(0, 1).expect("0-1");
563        let err = fast_greedy_modularity(&g).unwrap_err();
564        assert!(matches!(err, IgraphError::Unsupported(_)));
565    }
566
567    #[test]
568    fn rejects_multi_edges() {
569        let mut g = Graph::with_vertices(3);
570        g.add_edge(0, 1).unwrap();
571        g.add_edge(0, 1).unwrap();
572        g.add_edge(1, 2).unwrap();
573        let err = fast_greedy_modularity(&g).unwrap_err();
574        assert!(matches!(err, IgraphError::InvalidArgument(_)));
575    }
576
577    #[test]
578    fn rejects_negative_weight() {
579        let mut g = Graph::with_vertices(3);
580        g.add_edge(0, 1).unwrap();
581        g.add_edge(1, 2).unwrap();
582        let err = fast_greedy_modularity_weighted(&g, &[1.0, -0.5]).unwrap_err();
583        assert!(matches!(err, IgraphError::InvalidArgument(_)));
584    }
585
586    #[test]
587    fn rejects_weight_length_mismatch() {
588        let mut g = Graph::with_vertices(3);
589        g.add_edge(0, 1).unwrap();
590        g.add_edge(1, 2).unwrap();
591        let err = fast_greedy_modularity_weighted(&g, &[1.0]).unwrap_err();
592        assert!(matches!(err, IgraphError::InvalidArgument(_)));
593    }
594
595    #[test]
596    fn densify_labels_basic() {
597        let (m, k) = densify_labels(&[5, 5, 7, 5, 9, 7]);
598        assert_eq!(m, vec![0, 0, 1, 0, 2, 1]);
599        assert_eq!(k, 3);
600    }
601}