Skip to main content

phago_core/
louvain.rs

1//! Louvain community detection algorithm implementation.
2//!
3//! The Louvain algorithm is a greedy optimization method for detecting
4//! communities in large networks. It optimizes modularity through a
5//! two-phase iterative process:
6//!
7//! 1. **Local Moving**: Greedily move nodes to maximize modularity gain
8//! 2. **Aggregation**: Build a new graph with communities as nodes
9//!
10//! Reference: Blondel et al. (2008) "Fast unfolding of communities in large networks"
11
12use crate::types::NodeId;
13use std::collections::HashMap;
14
15/// Result of Louvain community detection.
16#[derive(Debug, Clone)]
17pub struct LouvainResult {
18    /// Communities as vectors of node IDs.
19    pub communities: Vec<Vec<NodeId>>,
20    /// Final modularity score (0.0 to 1.0, higher = better structure).
21    pub modularity: f64,
22    /// Number of Louvain passes performed.
23    pub passes: usize,
24}
25
26/// Internal node representation for the algorithm.
27#[derive(Clone)]
28struct LouvainNode {
29    /// Original node ID (or aggregated community ID).
30    #[allow(dead_code)]
31    id: usize,
32    /// Current community assignment.
33    community: usize,
34    /// Weighted degree (sum of incident edge weights).
35    weighted_degree: f64,
36    /// Self-loop weight (for aggregated nodes).
37    self_loop: f64,
38}
39
40/// Graph representation optimized for Louvain.
41struct LouvainGraph {
42    /// Nodes with their community assignments.
43    nodes: Vec<LouvainNode>,
44    /// Adjacency list: node_idx -> [(neighbor_idx, weight)].
45    adj: Vec<Vec<(usize, f64)>>,
46    /// Total edge weight (sum of all weights, counting undirected edges once).
47    total_weight: f64,
48}
49
50impl LouvainGraph {
51    /// Create from a list of edges with weights.
52    fn from_edges(node_count: usize, edges: &[(usize, usize, f64)]) -> Self {
53        let mut nodes: Vec<LouvainNode> = (0..node_count)
54            .map(|i| LouvainNode {
55                id: i,
56                community: i, // Each node starts in its own community
57                weighted_degree: 0.0,
58                self_loop: 0.0,
59            })
60            .collect();
61
62        let mut adj: Vec<Vec<(usize, f64)>> = vec![Vec::new(); node_count];
63        let mut total_weight = 0.0;
64
65        for &(from, to, weight) in edges {
66            if from == to {
67                // Self-loop
68                nodes[from].self_loop += weight;
69                nodes[from].weighted_degree += 2.0 * weight;
70                total_weight += weight;
71            } else {
72                adj[from].push((to, weight));
73                adj[to].push((from, weight));
74                nodes[from].weighted_degree += weight;
75                nodes[to].weighted_degree += weight;
76                total_weight += weight;
77            }
78        }
79
80        Self {
81            nodes,
82            adj,
83            total_weight,
84        }
85    }
86
87    /// Compute the modularity of the current partition.
88    fn modularity(&self) -> f64 {
89        if self.total_weight == 0.0 {
90            return 0.0;
91        }
92
93        let m2 = 2.0 * self.total_weight;
94        let mut q = 0.0;
95
96        // Sum of weights and degrees per community
97        let mut comm_internal: HashMap<usize, f64> = HashMap::new();
98        let mut comm_degree: HashMap<usize, f64> = HashMap::new();
99
100        for node in &self.nodes {
101            let c = node.community;
102            *comm_degree.entry(c).or_insert(0.0) += node.weighted_degree;
103            *comm_internal.entry(c).or_insert(0.0) += node.self_loop;
104        }
105
106        // Add internal edge weights
107        for (i, neighbors) in self.adj.iter().enumerate() {
108            let ci = self.nodes[i].community;
109            for &(j, weight) in neighbors {
110                if i < j && self.nodes[j].community == ci {
111                    *comm_internal.entry(ci).or_insert(0.0) += weight;
112                }
113            }
114        }
115
116        // Q = Σc [ (internal_c / m) - (degree_c / 2m)^2 ]
117        for (c, internal) in &comm_internal {
118            let degree = comm_degree.get(c).unwrap_or(&0.0);
119            q += internal / self.total_weight - (degree / m2).powi(2);
120        }
121
122        q
123    }
124
125    /// Compute modularity gain of moving node i to community c.
126    fn modularity_gain(&self, node_idx: usize, target_community: usize) -> f64 {
127        if self.total_weight == 0.0 {
128            return 0.0;
129        }
130
131        let node = &self.nodes[node_idx];
132        if node.community == target_community {
133            return 0.0;
134        }
135
136        let m2 = 2.0 * self.total_weight;
137        let ki = node.weighted_degree;
138
139        // Sum of weights to nodes in target community
140        let mut ki_in = 0.0;
141        for &(j, weight) in &self.adj[node_idx] {
142            if self.nodes[j].community == target_community {
143                ki_in += weight;
144            }
145        }
146
147        // Sum of weighted degrees in target community
148        let mut sigma_tot = 0.0;
149        for other in &self.nodes {
150            if other.community == target_community {
151                sigma_tot += other.weighted_degree;
152            }
153        }
154
155        // ΔQ = ki_in/m - (sigma_tot * ki) / (2m^2)
156        ki_in / self.total_weight - (sigma_tot * ki) / (m2 * self.total_weight)
157    }
158
159    /// Phase 1: Local moving of nodes to maximize modularity.
160    /// Returns true if any improvement was made.
161    fn local_moving(&mut self) -> bool {
162        let n = self.nodes.len();
163        let mut improved = false;
164        let mut changed = true;
165
166        while changed {
167            changed = false;
168
169            for i in 0..n {
170                let current_community = self.nodes[i].community;
171
172                // Find neighboring communities
173                let mut neighbor_communities: HashMap<usize, f64> = HashMap::new();
174                for &(j, weight) in &self.adj[i] {
175                    let c = self.nodes[j].community;
176                    *neighbor_communities.entry(c).or_insert(0.0) += weight;
177                }
178
179                // Always consider staying in current community
180                neighbor_communities.entry(current_community).or_insert(0.0);
181
182                // Find best community
183                let mut best_community = current_community;
184                let mut best_gain = 0.0;
185
186                for &c in neighbor_communities.keys() {
187                    // Temporarily remove from current community for gain calculation
188                    self.nodes[i].community = current_community; // Ensure starting state
189                    let gain = if c == current_community {
190                        0.0
191                    } else {
192                        // Gain of moving to c
193                        let gain_to_c = self.modularity_gain(i, c);
194                        // Loss of leaving current
195                        self.nodes[i].community = c; // Temporarily move
196                        let loss_from_current = -self.modularity_gain(i, current_community);
197                        self.nodes[i].community = current_community; // Restore
198                        gain_to_c + loss_from_current
199                    };
200
201                    if gain > best_gain + 1e-10 {
202                        best_gain = gain;
203                        best_community = c;
204                    }
205                }
206
207                if best_community != current_community {
208                    self.nodes[i].community = best_community;
209                    changed = true;
210                    improved = true;
211                }
212            }
213        }
214
215        improved
216    }
217
218    /// Phase 2: Aggregate communities into super-nodes.
219    fn aggregate(&self) -> (Self, Vec<Vec<usize>>) {
220        // Map old communities to new node indices
221        let mut comm_to_new: HashMap<usize, usize> = HashMap::new();
222        let mut communities: Vec<Vec<usize>> = Vec::new();
223
224        for (i, node) in self.nodes.iter().enumerate() {
225            let c = node.community;
226            if let Some(&new_idx) = comm_to_new.get(&c) {
227                communities[new_idx].push(i);
228            } else {
229                let new_idx = communities.len();
230                comm_to_new.insert(c, new_idx);
231                communities.push(vec![i]);
232            }
233        }
234
235        let new_node_count = communities.len();
236
237        // Compute edges between new nodes
238        let mut new_edges: HashMap<(usize, usize), f64> = HashMap::new();
239
240        for (i, neighbors) in self.adj.iter().enumerate() {
241            let new_i = comm_to_new[&self.nodes[i].community];
242            for &(j, weight) in neighbors {
243                let new_j = comm_to_new[&self.nodes[j].community];
244                let key = if new_i <= new_j {
245                    (new_i, new_j)
246                } else {
247                    (new_j, new_i)
248                };
249                *new_edges.entry(key).or_insert(0.0) += weight;
250            }
251        }
252
253        // Add self-loops from original nodes
254        for node in &self.nodes {
255            let new_idx = comm_to_new[&node.community];
256            *new_edges.entry((new_idx, new_idx)).or_insert(0.0) += node.self_loop;
257        }
258
259        // Convert to edge list (halve weights for undirected, except self-loops)
260        let edges: Vec<(usize, usize, f64)> = new_edges
261            .into_iter()
262            .map(|((a, b), w)| {
263                if a == b {
264                    (a, b, w) // Self-loop: don't halve
265                } else {
266                    (a, b, w / 2.0) // Undirected edge counted twice
267                }
268            })
269            .collect();
270
271        let new_graph = LouvainGraph::from_edges(new_node_count, &edges);
272        (new_graph, communities)
273    }
274}
275
276/// Run Louvain community detection on a graph.
277///
278/// # Arguments
279/// * `node_ids` - The original node IDs in order
280/// * `edges` - Edges as (from_idx, to_idx, weight) where indices correspond to node_ids
281///
282/// # Returns
283/// A `LouvainResult` containing communities (as NodeIds), modularity score, and pass count.
284pub fn louvain_communities(
285    node_ids: &[NodeId],
286    edges: &[(usize, usize, f64)],
287) -> LouvainResult {
288    if node_ids.is_empty() {
289        return LouvainResult {
290            communities: Vec::new(),
291            modularity: 0.0,
292            passes: 0,
293        };
294    }
295
296    // Single node case
297    if node_ids.len() == 1 {
298        return LouvainResult {
299            communities: vec![vec![node_ids[0]]],
300            modularity: 0.0,
301            passes: 0,
302        };
303    }
304
305    let mut graph = LouvainGraph::from_edges(node_ids.len(), edges);
306    let mut dendrogram: Vec<Vec<Vec<usize>>> = Vec::new();
307    let mut passes = 0;
308    const MAX_PASSES: usize = 100;
309
310    loop {
311        passes += 1;
312
313        // Phase 1: Local moving
314        let improved = graph.local_moving();
315
316        if !improved || passes >= MAX_PASSES {
317            // No improvement or max passes reached
318            break;
319        }
320
321        // Phase 2: Aggregate
322        let (new_graph, communities) = graph.aggregate();
323
324        // If only one community (or no change in community count), stop
325        if communities.len() == graph.nodes.len() || communities.len() == 1 {
326            break;
327        }
328
329        dendrogram.push(communities);
330        graph = new_graph;
331    }
332
333    // Extract final communities
334    let final_modularity = graph.modularity();
335
336    // Build community mapping from dendrogram
337    // Start with each node in its own group
338    let mut node_to_community: Vec<usize> = (0..node_ids.len()).collect();
339
340    // Apply each level of the dendrogram
341    for level in &dendrogram {
342        let mut new_mapping = vec![0; node_ids.len()];
343        for (new_comm, old_indices) in level.iter().enumerate() {
344            for &old_idx in old_indices {
345                // Find all original nodes that mapped to old_idx
346                for (orig, &comm) in node_to_community.iter().enumerate() {
347                    if comm == old_idx {
348                        new_mapping[orig] = new_comm;
349                    }
350                }
351            }
352        }
353        node_to_community = new_mapping;
354    }
355
356    // Apply final community assignments from the last graph
357    let mut final_mapping = vec![0; node_ids.len()];
358    if dendrogram.is_empty() {
359        // No aggregation happened, use direct community assignments
360        for (orig, &_) in node_to_community.iter().enumerate() {
361            final_mapping[orig] = graph.nodes[orig].community;
362        }
363    } else {
364        // Map through dendrogram to final communities
365        let last_level = &graph.nodes;
366        for (orig, &intermediate) in node_to_community.iter().enumerate() {
367            if intermediate < last_level.len() {
368                final_mapping[orig] = last_level[intermediate].community;
369            }
370        }
371    }
372
373    // Group nodes by community
374    let mut community_map: HashMap<usize, Vec<NodeId>> = HashMap::new();
375    for (i, &comm) in final_mapping.iter().enumerate() {
376        community_map.entry(comm).or_default().push(node_ids[i]);
377    }
378
379    let communities: Vec<Vec<NodeId>> = community_map.into_values().collect();
380
381    LouvainResult {
382        communities,
383        modularity: final_modularity,
384        passes,
385    }
386}
387
388/// Compute modularity of a given partition.
389///
390/// # Arguments
391/// * `node_count` - Number of nodes
392/// * `edges` - Edges as (from_idx, to_idx, weight)
393/// * `partition` - Community assignment for each node (indexed by node index)
394///
395/// # Returns
396/// Modularity score (typically 0.3-0.7 for good community structure).
397pub fn compute_modularity(
398    node_count: usize,
399    edges: &[(usize, usize, f64)],
400    partition: &[usize],
401) -> f64 {
402    if node_count == 0 || edges.is_empty() {
403        return 0.0;
404    }
405
406    let mut graph = LouvainGraph::from_edges(node_count, edges);
407
408    // Apply the given partition
409    for (i, &comm) in partition.iter().enumerate() {
410        if i < graph.nodes.len() {
411            graph.nodes[i].community = comm;
412        }
413    }
414
415    graph.modularity()
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421
422    fn node_id(seed: u64) -> NodeId {
423        NodeId::from_seed(seed)
424    }
425
426    #[test]
427    fn empty_graph() {
428        let result = louvain_communities(&[], &[]);
429        assert!(result.communities.is_empty());
430        assert_eq!(result.modularity, 0.0);
431    }
432
433    #[test]
434    fn single_node() {
435        let nodes = vec![node_id(1)];
436        let result = louvain_communities(&nodes, &[]);
437        assert_eq!(result.communities.len(), 1);
438        assert_eq!(result.communities[0].len(), 1);
439    }
440
441    #[test]
442    fn two_disconnected_nodes() {
443        let nodes = vec![node_id(1), node_id(2)];
444        let result = louvain_communities(&nodes, &[]);
445        // Each node in its own community
446        assert_eq!(result.communities.len(), 2);
447    }
448
449    #[test]
450    fn two_connected_nodes() {
451        let nodes = vec![node_id(1), node_id(2)];
452        let edges = vec![(0, 1, 1.0)];
453        let result = louvain_communities(&nodes, &edges);
454        // Should merge into one community
455        assert_eq!(result.communities.len(), 1);
456        assert_eq!(result.communities[0].len(), 2);
457    }
458
459    #[test]
460    fn triangle() {
461        let nodes = vec![node_id(1), node_id(2), node_id(3)];
462        let edges = vec![
463            (0, 1, 1.0),
464            (1, 2, 1.0),
465            (0, 2, 1.0),
466        ];
467        let result = louvain_communities(&nodes, &edges);
468        // All in one community
469        assert_eq!(result.communities.len(), 1);
470        assert_eq!(result.communities[0].len(), 3);
471    }
472
473    #[test]
474    fn two_triangles_weakly_connected() {
475        // Triangle 1: 0-1-2
476        // Triangle 2: 3-4-5
477        // Weak bridge: 2-3
478        let nodes: Vec<NodeId> = (0..6).map(|i| node_id(i as u64)).collect();
479        let edges = vec![
480            // Triangle 1
481            (0, 1, 1.0),
482            (1, 2, 1.0),
483            (0, 2, 1.0),
484            // Triangle 2
485            (3, 4, 1.0),
486            (4, 5, 1.0),
487            (3, 5, 1.0),
488            // Weak bridge
489            (2, 3, 0.1),
490        ];
491        let result = louvain_communities(&nodes, &edges);
492
493        // Should detect 2 communities
494        assert_eq!(result.communities.len(), 2);
495
496        // Each community should have 3 nodes
497        let sizes: Vec<usize> = result.communities.iter().map(|c| c.len()).collect();
498        assert!(sizes.contains(&3));
499        assert_eq!(sizes.iter().sum::<usize>(), 6);
500
501        // Modularity should be positive
502        assert!(result.modularity > 0.0, "modularity = {}", result.modularity);
503    }
504
505    #[test]
506    fn karate_club_style() {
507        // Simplified Zachary's karate club: two dense groups with sparse connections
508        // Group A: 0, 1, 2, 3 (densely connected)
509        // Group B: 4, 5, 6, 7 (densely connected)
510        // Sparse inter-group connections
511        let nodes: Vec<NodeId> = (0..8).map(|i| node_id(i as u64)).collect();
512        let edges = vec![
513            // Group A internal (complete)
514            (0, 1, 1.0), (0, 2, 1.0), (0, 3, 1.0),
515            (1, 2, 1.0), (1, 3, 1.0),
516            (2, 3, 1.0),
517            // Group B internal (complete)
518            (4, 5, 1.0), (4, 6, 1.0), (4, 7, 1.0),
519            (5, 6, 1.0), (5, 7, 1.0),
520            (6, 7, 1.0),
521            // Inter-group (sparse)
522            (3, 4, 0.2),
523        ];
524
525        let result = louvain_communities(&nodes, &edges);
526
527        // Should find 2 communities
528        assert_eq!(result.communities.len(), 2, "found {} communities", result.communities.len());
529
530        // Modularity for this structure should be high (> 0.3)
531        assert!(
532            result.modularity > 0.3,
533            "modularity {} should be > 0.3",
534            result.modularity
535        );
536    }
537
538    #[test]
539    fn modularity_calculation() {
540        // Simple case: 4 nodes in 2 pairs
541        let edges = vec![
542            (0, 1, 1.0), // Pair 1
543            (2, 3, 1.0), // Pair 2
544        ];
545        let partition = vec![0, 0, 1, 1]; // Each pair is a community
546
547        let q = compute_modularity(4, &edges, &partition);
548        // With this partition, modularity should be 0.5
549        // Q = 2 * (1/2 - (2/4)^2) = 2 * (0.5 - 0.25) = 0.5
550        assert!((q - 0.5).abs() < 0.01, "modularity = {}, expected ~0.5", q);
551    }
552
553    #[test]
554    fn modularity_all_one_community() {
555        // All nodes in one community should give Q = 0
556        let edges = vec![
557            (0, 1, 1.0),
558            (1, 2, 1.0),
559            (0, 2, 1.0),
560        ];
561        let partition = vec![0, 0, 0]; // All in same community
562
563        let q = compute_modularity(3, &edges, &partition);
564        assert!(q.abs() < 0.01, "modularity = {}, expected ~0", q);
565    }
566
567    #[test]
568    fn weighted_edges() {
569        // Strong edges within communities, weak between
570        let nodes: Vec<NodeId> = (0..4).map(|i| node_id(i as u64)).collect();
571        let edges = vec![
572            (0, 1, 5.0), // Strong within group 1
573            (2, 3, 5.0), // Strong within group 2
574            (1, 2, 0.1), // Weak between groups
575        ];
576
577        let result = louvain_communities(&nodes, &edges);
578
579        // Should detect 2 communities
580        assert_eq!(result.communities.len(), 2);
581
582        // Check that strong pairs are together
583        let comm_map: HashMap<NodeId, usize> = result
584            .communities
585            .iter()
586            .enumerate()
587            .flat_map(|(i, c)| c.iter().map(move |&n| (n, i)))
588            .collect();
589
590        assert_eq!(
591            comm_map.get(&node_id(0)),
592            comm_map.get(&node_id(1)),
593            "nodes 0 and 1 should be in same community"
594        );
595        assert_eq!(
596            comm_map.get(&node_id(2)),
597            comm_map.get(&node_id(3)),
598            "nodes 2 and 3 should be in same community"
599        );
600    }
601}