Skip to main content

rune_leiden/
lib.rs

1//! Leiden community detection — find densely-connected clusters in weighted graphs.
2//!
3//! The Leiden algorithm (Traag, Waltman & van Eck, 2019) partitions the nodes of a
4//! weighted undirected graph into communities that maximise the modularity objective:
5//!
6//! ```text
7//! Q = (1/2m) × Σ_{ij} [A_ij − γ × k_i × k_j / (2m)] × δ(c_i, c_j)
8//! ```
9//!
10//! where `m` is the total edge weight, `k_i` the weighted degree of node `i`, `γ` the
11//! resolution parameter, and `δ` the Kronecker delta. Leiden improves on Louvain by
12//! adding a refinement phase that prevents poorly-connected communities from merging.
13//!
14//! # Features
15//!
16//! - Weighted undirected graphs (parallel edges are summed)
17//! - Configurable resolution parameter `γ` for community granularity
18//! - Deterministic via `random_seed`
19//! - Pure Rust, no unsafe, no external library dependencies
20//!
21//! # Quick Start
22//!
23//! ```rust
24//! use rune_leiden::Leiden;
25//!
26//! // Two triangles connected by a single bridge edge
27//! let edges = vec![
28//!     (0, 1, 1.0), (1, 2, 1.0), (0, 2, 1.0),
29//!     (3, 4, 1.0), (4, 5, 1.0), (3, 5, 1.0),
30//!     (2, 3, 0.01),
31//! ];
32//!
33//! let result = Leiden::new().fit(6, &edges);
34//! assert_eq!(result.n_communities, 2);
35//! assert!(result.modularity >= 0.0);
36//! ```
37//!
38//! # CLI
39//!
40//! ```bash
41//! rune-leiden edges.txt
42//! rune-leiden edges.txt --resolution 0.5 --seed 123
43//! cat edges.txt | rune-leiden -
44//! ```
45
46mod graph;
47mod rng;
48
49use graph::Graph;
50
51/// Builder for configuring and running the Leiden community-detection algorithm.
52///
53/// Construct with [`Leiden::new`], chain optional parameters, then call [`Leiden::fit`].
54///
55/// # Example
56///
57/// ```rust
58/// use rune_leiden::Leiden;
59///
60/// let leiden = Leiden::new()
61///     .resolution(1.0)
62///     .max_iter(100)
63///     .random_seed(42);
64/// ```
65pub struct Leiden {
66    resolution: f64,
67    max_iter: usize,
68    random_seed: u64,
69}
70
71impl Default for Leiden {
72    fn default() -> Self {
73        Self::new()
74    }
75}
76
77impl Leiden {
78    /// Creates a new `Leiden` instance with default parameters.
79    ///
80    /// | Parameter | Default |
81    /// |---|---|
82    /// | `resolution` | `1.0` |
83    /// | `max_iter` | `100` |
84    /// | `random_seed` | `42` |
85    ///
86    /// # Example
87    ///
88    /// ```rust
89    /// use rune_leiden::Leiden;
90    ///
91    /// let leiden = Leiden::new();
92    /// ```
93    pub fn new() -> Self {
94        Leiden { resolution: 1.0, max_iter: 100, random_seed: 42 }
95    }
96
97    /// Sets the resolution parameter `γ`.
98    ///
99    /// Higher values produce more, smaller communities; lower values produce fewer,
100    /// larger communities. `1.0` corresponds to standard Newman–Girvan modularity.
101    ///
102    /// # Example
103    ///
104    /// ```rust
105    /// use rune_leiden::Leiden;
106    ///
107    /// let leiden = Leiden::new().resolution(0.5);
108    /// ```
109    pub fn resolution(mut self, r: f64) -> Self {
110        self.resolution = r;
111        self
112    }
113
114    /// Sets the maximum number of outer iterations (aggregation rounds).
115    ///
116    /// The algorithm terminates earlier if modularity stops improving. Defaults to `100`,
117    /// which is more than sufficient for most real-world graphs.
118    ///
119    /// # Example
120    ///
121    /// ```rust
122    /// use rune_leiden::Leiden;
123    ///
124    /// let leiden = Leiden::new().max_iter(50);
125    /// ```
126    pub fn max_iter(mut self, n: usize) -> Self {
127        self.max_iter = n;
128        self
129    }
130
131    /// Sets the seed for the internal PRNG. Identical seeds produce identical partitions.
132    ///
133    /// # Example
134    ///
135    /// ```rust
136    /// use rune_leiden::Leiden;
137    ///
138    /// let leiden = Leiden::new().random_seed(123);
139    /// ```
140    pub fn random_seed(mut self, s: u64) -> Self {
141        self.random_seed = s;
142        self
143    }
144
145    /// Partitions `n_nodes` nodes into communities by maximising modularity.
146    ///
147    /// `edges` is a list of undirected weighted edges `(u, v, weight)`. All node
148    /// indices must be in `0..n_nodes`. Parallel edges with the same `(u, v)` pair
149    /// are summed. Self-loops are ignored.
150    ///
151    /// Returns a [`CommunityResult`] with one community id per node, contiguous from `0`.
152    ///
153    /// # Panics
154    ///
155    /// Panics if any node index in `edges` is `≥ n_nodes`.
156    ///
157    /// # Example
158    ///
159    /// ```rust
160    /// use rune_leiden::Leiden;
161    ///
162    /// // Two fully-connected triangles joined by a weak bridge
163    /// let edges = vec![
164    ///     (0, 1, 1.0), (1, 2, 1.0), (0, 2, 1.0),
165    ///     (3, 4, 1.0), (4, 5, 1.0), (3, 5, 1.0),
166    ///     (2, 3, 0.01),
167    /// ];
168    ///
169    /// let result = Leiden::new().fit(6, &edges);
170    /// assert_eq!(result.n_communities, 2);
171    /// assert!(result.communities[0] == result.communities[1]);
172    /// assert!(result.communities[0] != result.communities[3]);
173    /// ```
174    pub fn fit(&self, n_nodes: usize, edges: &[(usize, usize, f64)]) -> CommunityResult {
175        for &(u, v, _) in edges {
176            assert!(u < n_nodes, "node index {u} out of range (n_nodes = {n_nodes})");
177            assert!(v < n_nodes, "node index {v} out of range (n_nodes = {n_nodes})");
178        }
179
180        if n_nodes == 0 {
181            return CommunityResult { communities: vec![], n_communities: 0, modularity: 0.0 };
182        }
183
184        if n_nodes == 1 {
185            return CommunityResult {
186                communities: vec![0],
187                n_communities: 1,
188                modularity: 0.0,
189            };
190        }
191
192        let mut rng = rng::Rng::new(self.random_seed);
193        let base_graph = Graph::new(n_nodes, edges);
194
195        // `node_map[supernode]` = list of original nodes it represents.
196        // Starts as identity: each original node is its own supernode.
197        let mut node_map: Vec<Vec<usize>> = (0..n_nodes).map(|i| vec![i]).collect();
198        let mut current_edges: Vec<(usize, usize, f64)> = edges.to_vec();
199        let mut current_n = n_nodes;
200
201        // `best_partition[original_node]` tracks the best full partition found.
202        let mut best_partition: Vec<usize> = (0..n_nodes).collect();
203        let mut best_modularity = f64::NEG_INFINITY;
204
205        for _iteration in 0..self.max_iter {
206            let graph = Graph::new(current_n, &current_edges);
207
208            let (local_partition, any_moves) =
209                phase1_move_nodes(&graph, self.resolution, &mut rng);
210
211            // Map the local partition back to original nodes for quality assessment.
212            let full_partition = resolve_partition(&node_map, &local_partition, n_nodes);
213            let modularity = graph::modularity(&base_graph, &full_partition, self.resolution);
214
215            if modularity > best_modularity {
216                best_modularity = modularity;
217                best_partition = full_partition;
218            }
219
220            if !any_moves {
221                break;
222            }
223
224            let refined_partition =
225                phase2_refine(&graph, &local_partition, self.resolution, &mut rng);
226
227            let (super_n, super_edges, new_map) =
228                phase3_aggregate(&graph, &refined_partition, &node_map);
229
230            node_map = new_map;
231            current_edges = super_edges;
232            current_n = super_n;
233
234            if current_n == 1 {
235                break;
236            }
237        }
238
239        let communities = renumber(&best_partition);
240        let n_communities = communities.iter().copied().max().map(|m| m + 1).unwrap_or(0);
241        let modularity = graph::modularity(&base_graph, &communities, self.resolution).max(0.0);
242
243        CommunityResult { communities, n_communities, modularity }
244    }
245}
246
247/// Resolves a partition on supernodes back to original-node community ids.
248fn resolve_partition(
249    node_map: &[Vec<usize>],
250    supernode_partition: &[usize],
251    n_nodes: usize,
252) -> Vec<usize> {
253    let mut result = vec![0usize; n_nodes];
254    for (supernode, community) in supernode_partition.iter().enumerate() {
255        for &original in &node_map[supernode] {
256            result[original] = *community;
257        }
258    }
259    result
260}
261
262/// Renumbers community ids to be contiguous starting from `0`.
263fn renumber(partition: &[usize]) -> Vec<usize> {
264    let max_id = partition.iter().copied().max().unwrap_or(0);
265    let mut remap = vec![usize::MAX; max_id + 1];
266    let mut next_id = 0usize;
267    let mut result = vec![0usize; partition.len()];
268    for (i, &c) in partition.iter().enumerate() {
269        if remap[c] == usize::MAX {
270            remap[c] = next_id;
271            next_id += 1;
272        }
273        result[i] = remap[c];
274    }
275    result
276}
277
278/// Phase 1: iteratively move nodes to neighbouring communities to maximise modularity.
279///
280/// Returns `(partition, any_moves)` where `any_moves` is `true` if at least one node
281/// moved during this call. The partition uses contiguous community ids from `0`.
282fn phase1_move_nodes(graph: &Graph, resolution: f64, rng: &mut rng::Rng) -> (Vec<usize>, bool) {
283    let n = graph.n_nodes;
284    let two_m = 2.0 * graph.total_weight;
285
286    let mut partition: Vec<usize> = (0..n).collect();
287    let mut community_total_degree = graph.degree.clone();
288
289    let mut node_order: Vec<usize> = (0..n).collect();
290    rng.shuffle(&mut node_order);
291
292    let mut ever_moved = false;
293
294    loop {
295        let mut moved = false;
296
297        for &v in &node_order {
298            let current_community = partition[v];
299            let k_v = graph.degree[v];
300
301            community_total_degree[current_community] -= k_v;
302
303            let mut community_weights: Vec<(usize, f64)> = Vec::new();
304            for &(nb, w) in graph.neighbours(v) {
305                let c_nb = partition[nb];
306                if let Some(entry) = community_weights.iter_mut().find(|(c, _)| *c == c_nb) {
307                    entry.1 += w;
308                } else {
309                    community_weights.push((c_nb, w));
310                }
311            }
312
313            let mut best_community = current_community;
314            let mut best_delta = delta_q(0.0, k_v, 0.0, two_m, resolution);
315
316            for (candidate_community, k_v_to_c) in community_weights {
317                let sigma_tot = community_total_degree[candidate_community];
318                let delta = delta_q(k_v_to_c, k_v, sigma_tot, two_m, resolution);
319                if delta > best_delta {
320                    best_delta = delta;
321                    best_community = candidate_community;
322                }
323            }
324
325            if best_community != current_community {
326                moved = true;
327                ever_moved = true;
328            }
329
330            partition[v] = best_community;
331            community_total_degree[best_community] += k_v;
332        }
333
334        if !moved {
335            break;
336        }
337    }
338
339    (renumber(&partition), ever_moved)
340}
341
342/// Modularity gain from moving node `v` into community `C`.
343///
344/// `k_v_to_c`: sum of edge weights from v to nodes already in C.
345/// `k_v`: weighted degree of v.
346/// `sigma_tot_c`: sum of degrees of nodes currently in C (before adding v).
347#[inline]
348fn delta_q(k_v_to_c: f64, k_v: f64, sigma_tot_c: f64, two_m: f64, resolution: f64) -> f64 {
349    k_v_to_c / (two_m / 2.0) - resolution * k_v * sigma_tot_c / (two_m * (two_m / 2.0))
350}
351
352/// Phase 2: refine the Phase 1 partition within each community.
353///
354/// For each Phase 1 community, start each constituent node as its own sub-community
355/// and greedily merge sub-communities. Returns a refined partition that is a
356/// sub-partition of `coarse_partition`.
357fn phase2_refine(
358    graph: &Graph,
359    coarse_partition: &[usize],
360    resolution: f64,
361    rng: &mut rng::Rng,
362) -> Vec<usize> {
363    let n = graph.n_nodes;
364    let two_m = 2.0 * graph.total_weight;
365
366    let n_communities = coarse_partition.iter().copied().max().map(|m| m + 1).unwrap_or(n);
367
368    // Collect nodes per coarse community
369    let mut community_members: Vec<Vec<usize>> = vec![Vec::new(); n_communities];
370    for v in 0..n {
371        community_members[coarse_partition[v]].push(v);
372    }
373
374    // Each node starts as its own sub-community
375    let mut sub_partition: Vec<usize> = (0..n).collect();
376    let mut sub_degree: Vec<f64> = graph.degree.clone();
377
378    for members in &community_members {
379        if members.len() <= 1 {
380            continue;
381        }
382
383        let mut order = members.clone();
384        rng.shuffle(&mut order);
385
386        for &v in &order {
387            let current_sub = sub_partition[v];
388            let k_v = graph.degree[v];
389
390            sub_degree[current_sub] -= k_v;
391
392            let mut sub_weights: Vec<(usize, f64)> = Vec::new();
393            for &(nb, w) in graph.neighbours(v) {
394                if coarse_partition[nb] != coarse_partition[v] {
395                    continue;
396                }
397                let s_nb = sub_partition[nb];
398                if let Some(entry) = sub_weights.iter_mut().find(|(s, _)| *s == s_nb) {
399                    entry.1 += w;
400                } else {
401                    sub_weights.push((s_nb, w));
402                }
403            }
404
405            let mut best_sub = current_sub;
406            let mut best_delta = 0.0;
407
408            for (candidate_sub, k_v_to_s) in sub_weights {
409                if candidate_sub == current_sub {
410                    continue;
411                }
412                let sigma_tot = sub_degree[candidate_sub];
413                let delta = delta_q(k_v_to_s, k_v, sigma_tot, two_m, resolution);
414                if delta > best_delta {
415                    best_delta = delta;
416                    best_sub = candidate_sub;
417                }
418            }
419
420            sub_partition[v] = best_sub;
421            sub_degree[best_sub] += k_v;
422        }
423    }
424
425    renumber(&sub_partition)
426}
427
428/// Output of [`phase3_aggregate`]: supernode count, supernode edges, and node map.
429type AggregateResult = (usize, Vec<(usize, usize, f64)>, Vec<Vec<usize>>);
430
431/// Phase 3: build the aggregated (supernode) graph from a refined partition.
432///
433/// Returns `(n_supernodes, supernode_edges, node_map)` where `node_map[supernode]`
434/// lists the original nodes that collapsed into that supernode.
435fn phase3_aggregate(
436    graph: &Graph,
437    refined_partition: &[usize],
438    node_map: &[Vec<usize>],
439) -> AggregateResult {
440    let n_super = refined_partition.iter().copied().max().map(|m| m + 1).unwrap_or(0);
441
442    // Build supernode → original-node mapping
443    let mut new_map: Vec<Vec<usize>> = vec![Vec::new(); n_super];
444    for (v, &community) in refined_partition.iter().enumerate() {
445        for &orig in &node_map[v] {
446            new_map[community].push(orig);
447        }
448    }
449
450    // Accumulate edge weights between supernodes
451    let mut edge_map: std::collections::HashMap<(usize, usize), f64> =
452        std::collections::HashMap::new();
453
454    for v in 0..graph.n_nodes {
455        let sv = refined_partition[v];
456        for &(nb, w) in graph.neighbours(v) {
457            let snb = refined_partition[nb];
458            if sv == snb {
459                continue;
460            }
461            let key = if sv < snb { (sv, snb) } else { (snb, sv) };
462            *edge_map.entry(key).or_insert(0.0) += w;
463        }
464    }
465
466    // Each undirected pair was counted twice (once per direction)
467    let super_edges: Vec<(usize, usize, f64)> =
468        edge_map.into_iter().map(|((u, v), w)| (u, v, w / 2.0)).collect();
469
470    (n_super, super_edges, new_map)
471}
472
473/// Result of a completed Leiden community-detection run.
474pub struct CommunityResult {
475    /// Community id for each input node. Ids are contiguous integers starting at `0`.
476    pub communities: Vec<usize>,
477    /// Number of distinct communities found.
478    pub n_communities: usize,
479    /// Final modularity score `Q`, guaranteed `≥ 0.0`.
480    pub modularity: f64,
481}
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486
487    fn two_blob_edges() -> Vec<(usize, usize, f64)> {
488        let mut edges = Vec::new();
489        for i in 0..5usize {
490            for j in (i + 1)..5 {
491                edges.push((i, j, 1.0));
492            }
493        }
494        for i in 5..10usize {
495            for j in (i + 1)..10 {
496                edges.push((i, j, 1.0));
497            }
498        }
499        // Thin bridge
500        edges.push((4, 5, 0.01));
501        edges
502    }
503
504    #[test]
505    fn two_blobs_produce_two_communities() {
506        let edges = two_blob_edges();
507        let result = Leiden::new().fit(10, &edges);
508        assert_eq!(result.n_communities, 2, "expected 2 communities, got {}", result.n_communities);
509        let c0 = result.communities[0];
510        let c5 = result.communities[5];
511        assert_ne!(c0, c5, "blobs should be in different communities");
512        for i in 0..5 {
513            assert_eq!(result.communities[i], c0, "node {i} should be in blob-0 community");
514        }
515        for i in 5..10 {
516            assert_eq!(result.communities[i], c5, "node {i} should be in blob-1 community");
517        }
518    }
519
520    #[test]
521    fn single_node_produces_one_community() {
522        let result = Leiden::new().fit(1, &[]);
523        assert_eq!(result.n_communities, 1);
524        assert_eq!(result.communities, vec![0]);
525    }
526
527    #[test]
528    fn complete_graph_produces_one_community() {
529        let edges: Vec<(usize, usize, f64)> = (0..5usize)
530            .flat_map(|i| (i + 1..5).map(move |j| (i, j, 1.0)))
531            .collect();
532        let result = Leiden::new().fit(5, &edges);
533        assert_eq!(result.n_communities, 1);
534    }
535
536    #[test]
537    fn triangle_produces_one_community() {
538        let edges = vec![(0, 1, 1.0), (1, 2, 1.0), (0, 2, 1.0)];
539        let result = Leiden::new().fit(3, &edges);
540        assert_eq!(result.n_communities, 1);
541    }
542
543    #[test]
544    fn n_communities_matches_distinct_values() {
545        let edges = two_blob_edges();
546        let result = Leiden::new().fit(10, &edges);
547        let distinct: std::collections::HashSet<usize> =
548            result.communities.iter().copied().collect();
549        assert_eq!(distinct.len(), result.n_communities);
550    }
551
552    #[test]
553    fn modularity_finite_and_nonnegative_for_structured_graph() {
554        let edges = two_blob_edges();
555        let result = Leiden::new().fit(10, &edges);
556        assert!(result.modularity.is_finite(), "modularity must be finite");
557        assert!(result.modularity >= 0.0, "modularity must be ≥ 0 for structured graph");
558    }
559
560    #[test]
561    fn deterministic_with_same_seed() {
562        let edges = two_blob_edges();
563        let r1 = Leiden::new().random_seed(7).fit(10, &edges);
564        let r2 = Leiden::new().random_seed(7).fit(10, &edges);
565        assert_eq!(r1.communities, r2.communities);
566        assert_eq!(r1.n_communities, r2.n_communities);
567    }
568
569    #[test]
570    fn communities_are_contiguous_from_zero() {
571        let edges = two_blob_edges();
572        let result = Leiden::new().fit(10, &edges);
573        let mut sorted: Vec<usize> = result.communities.iter().copied().collect();
574        sorted.sort_unstable();
575        sorted.dedup();
576        assert_eq!(sorted, (0..result.n_communities).collect::<Vec<_>>());
577    }
578
579    #[test]
580    fn isolated_nodes_each_get_own_community() {
581        let result = Leiden::new().fit(4, &[]);
582        assert_eq!(result.n_communities, 4);
583    }
584}