Skip to main content

scirs2_graph/algorithms/community/
girvan_newman.rs

1//! Girvan-Newman community detection algorithm
2//!
3//! Detects communities by iteratively removing edges with the highest
4//! edge betweenness centrality. This divisive hierarchical approach
5//! reveals community structure at multiple scales.
6//!
7//! # References
8//! - Girvan, M. & Newman, M.E.J. (2002). Community structure in social and
9//!   biological networks. PNAS, 99(12), 7821-7826.
10
11use super::types::CommunityResult;
12use crate::base::{EdgeWeight, Graph, Node};
13use crate::error::{GraphError, Result};
14use std::collections::{HashMap, HashSet, VecDeque};
15use std::hash::Hash;
16
17/// Configuration for Girvan-Newman algorithm
18#[derive(Debug, Clone)]
19pub struct GirvanNewmanConfig {
20    /// Target number of communities (None = find optimal via modularity)
21    pub num_communities: Option<usize>,
22    /// Maximum number of edge removal iterations
23    pub max_iterations: usize,
24}
25
26impl Default for GirvanNewmanConfig {
27    fn default() -> Self {
28        GirvanNewmanConfig {
29            num_communities: None,
30            max_iterations: 10000,
31        }
32    }
33}
34
35/// A dendrogram node representing the hierarchical decomposition
36#[derive(Debug, Clone)]
37pub struct DendrogramLevel<N: Node> {
38    /// Communities at this level
39    pub communities: Vec<HashSet<N>>,
40    /// Number of communities at this level
41    pub num_communities: usize,
42    /// Modularity score at this level
43    pub modularity: f64,
44    /// The edge that was removed to reach this level (source, target)
45    pub removed_edge: Option<(N, N)>,
46}
47
48/// Result of Girvan-Newman algorithm including the full dendrogram
49#[derive(Debug, Clone)]
50pub struct GirvanNewmanResult<N: Node> {
51    /// The best community partition found (maximizing modularity)
52    pub best_partition: CommunityResult<N>,
53    /// The full dendrogram showing hierarchical decomposition
54    pub dendrogram: Vec<DendrogramLevel<N>>,
55}
56
57/// Detect communities using the Girvan-Newman edge betweenness method
58///
59/// # Arguments
60/// * `graph` - The undirected graph to analyze
61/// * `config` - Algorithm configuration
62///
63/// # Returns
64/// A `GirvanNewmanResult` containing the best partition and full dendrogram
65///
66/// # Algorithm
67/// 1. Compute edge betweenness centrality for all edges
68/// 2. Remove the edge with the highest betweenness
69/// 3. Recompute connected components
70/// 4. Repeat until desired number of communities or all edges removed
71///
72/// # Time Complexity
73/// O(m^2 * n) where m is the number of edges and n is the number of nodes
74pub fn girvan_newman_result<N, E, Ix>(
75    graph: &Graph<N, E, Ix>,
76    config: &GirvanNewmanConfig,
77) -> Result<GirvanNewmanResult<N>>
78where
79    N: Node + Clone + Hash + Eq + std::fmt::Debug,
80    E: EdgeWeight + Into<f64> + Clone,
81    Ix: petgraph::graph::IndexType,
82{
83    let n = graph.node_count();
84    if n == 0 {
85        return Err(GraphError::InvalidGraph(
86            "Cannot detect communities in an empty graph".to_string(),
87        ));
88    }
89
90    let nodes: Vec<N> = graph.nodes().into_iter().cloned().collect();
91
92    // Build mutable adjacency representation
93    let mut adjacency: HashMap<N, HashSet<N>> = HashMap::new();
94    let mut edge_weights: HashMap<(N, N), f64> = HashMap::new();
95
96    for node in &nodes {
97        adjacency.insert(node.clone(), HashSet::new());
98    }
99
100    let edges = graph.edges();
101    for edge in &edges {
102        adjacency
103            .entry(edge.source.clone())
104            .or_default()
105            .insert(edge.target.clone());
106        adjacency
107            .entry(edge.target.clone())
108            .or_default()
109            .insert(edge.source.clone());
110
111        let w: f64 = edge.weight.clone().into();
112        edge_weights.insert((edge.source.clone(), edge.target.clone()), w);
113        edge_weights.insert((edge.target.clone(), edge.source.clone()), w);
114    }
115
116    // Compute initial total edge weight (for modularity)
117    let total_weight: f64 = edges
118        .iter()
119        .map(|e| {
120            let w: f64 = e.weight.clone().into();
121            w
122        })
123        .sum();
124
125    // Build degree map (original degrees for modularity computation)
126    let original_degrees: HashMap<N, f64> = nodes
127        .iter()
128        .map(|node| {
129            let deg: f64 = adjacency
130                .get(node)
131                .map(|neighbors| {
132                    neighbors
133                        .iter()
134                        .map(|nb| {
135                            edge_weights
136                                .get(&(node.clone(), nb.clone()))
137                                .copied()
138                                .unwrap_or(1.0)
139                        })
140                        .sum()
141                })
142                .unwrap_or(0.0);
143            (node.clone(), deg)
144        })
145        .collect();
146
147    let mut dendrogram: Vec<DendrogramLevel<N>> = Vec::new();
148    let mut best_modularity = f64::NEG_INFINITY;
149    let mut best_communities: Option<Vec<HashSet<N>>> = None;
150
151    // Record initial state
152    let initial_communities = find_connected_components(&adjacency);
153    let initial_modularity = compute_modularity(
154        &initial_communities,
155        &adjacency,
156        &original_degrees,
157        &edge_weights,
158        total_weight,
159    );
160
161    dendrogram.push(DendrogramLevel {
162        communities: initial_communities.clone(),
163        num_communities: initial_communities.len(),
164        modularity: initial_modularity,
165        removed_edge: None,
166    });
167
168    if initial_modularity > best_modularity {
169        best_modularity = initial_modularity;
170        best_communities = Some(initial_communities);
171    }
172
173    // Iteratively remove edges
174    for _iter in 0..config.max_iterations {
175        // Check if we've reached target number of communities
176        let current_communities = find_connected_components(&adjacency);
177        if let Some(target) = config.num_communities {
178            if current_communities.len() >= target {
179                break;
180            }
181        }
182
183        // Count remaining edges
184        let remaining_edges: usize = adjacency.values().map(|neighbors| neighbors.len()).sum();
185        if remaining_edges == 0 {
186            break;
187        }
188
189        // Compute edge betweenness centrality
190        let betweenness = compute_edge_betweenness(&adjacency, &nodes);
191
192        // Find edge with highest betweenness
193        let mut max_betweenness = f64::NEG_INFINITY;
194        let mut max_edge: Option<(N, N)> = None;
195
196        for ((u, v), &b) in &betweenness {
197            if b > max_betweenness {
198                max_betweenness = b;
199                max_edge = Some((u.clone(), v.clone()));
200            }
201        }
202
203        let (u, v) = match max_edge {
204            Some(edge) => edge,
205            None => break,
206        };
207
208        // Remove the edge
209        if let Some(neighbors) = adjacency.get_mut(&u) {
210            neighbors.remove(&v);
211        }
212        if let Some(neighbors) = adjacency.get_mut(&v) {
213            neighbors.remove(&u);
214        }
215
216        // Find new connected components
217        let new_communities = find_connected_components(&adjacency);
218        let new_modularity = compute_modularity(
219            &new_communities,
220            &adjacency,
221            &original_degrees,
222            &edge_weights,
223            total_weight,
224        );
225
226        dendrogram.push(DendrogramLevel {
227            communities: new_communities.clone(),
228            num_communities: new_communities.len(),
229            modularity: new_modularity,
230            removed_edge: Some((u, v)),
231        });
232
233        if new_modularity > best_modularity {
234            best_modularity = new_modularity;
235            best_communities = Some(new_communities);
236        }
237    }
238
239    // Build the best partition result
240    let communities = best_communities.unwrap_or_else(|| find_connected_components(&adjacency));
241
242    let mut node_communities: HashMap<N, usize> = HashMap::new();
243    for (comm_id, community) in communities.iter().enumerate() {
244        for node in community {
245            node_communities.insert(node.clone(), comm_id);
246        }
247    }
248
249    let mut result = CommunityResult::from_node_map(node_communities);
250    result.quality_score = Some(best_modularity);
251    result
252        .metadata
253        .insert("modularity".to_string(), best_modularity);
254    result
255        .metadata
256        .insert("dendrogram_levels".to_string(), dendrogram.len() as f64);
257
258    Ok(GirvanNewmanResult {
259        best_partition: result,
260        dendrogram,
261    })
262}
263
264/// Simplified interface returning just a CommunityResult
265pub fn girvan_newman_communities_result<N, E, Ix>(
266    graph: &Graph<N, E, Ix>,
267) -> Result<CommunityResult<N>>
268where
269    N: Node + Clone + Hash + Eq + std::fmt::Debug,
270    E: EdgeWeight + Into<f64> + Clone,
271    Ix: petgraph::graph::IndexType,
272{
273    let config = GirvanNewmanConfig::default();
274    let result = girvan_newman_result(graph, &config)?;
275    Ok(result.best_partition)
276}
277
278/// Compute edge betweenness centrality for all edges using Brandes' algorithm
279fn compute_edge_betweenness<N: Node + Clone + Hash + Eq + std::fmt::Debug>(
280    adjacency: &HashMap<N, HashSet<N>>,
281    nodes: &[N],
282) -> HashMap<(N, N), f64> {
283    let mut betweenness: HashMap<(N, N), f64> = HashMap::new();
284
285    // Initialize all edges with zero betweenness
286    for (u, neighbors) in adjacency {
287        for v in neighbors {
288            betweenness.insert((u.clone(), v.clone()), 0.0);
289        }
290    }
291
292    // For each source node, run BFS and accumulate betweenness
293    for source in nodes {
294        if adjacency.get(source).map(|n| n.is_empty()).unwrap_or(true) {
295            continue;
296        }
297
298        // BFS from source
299        let mut stack: Vec<N> = Vec::new();
300        let mut predecessors: HashMap<N, Vec<N>> = HashMap::new();
301        let mut sigma: HashMap<N, f64> = HashMap::new(); // Number of shortest paths
302        let mut dist: HashMap<N, i64> = HashMap::new(); // Distance from source
303
304        for node in nodes {
305            sigma.insert(node.clone(), 0.0);
306            dist.insert(node.clone(), -1);
307        }
308
309        sigma.insert(source.clone(), 1.0);
310        dist.insert(source.clone(), 0);
311
312        let mut queue: VecDeque<N> = VecDeque::new();
313        queue.push_back(source.clone());
314
315        while let Some(v) = queue.pop_front() {
316            stack.push(v.clone());
317
318            let v_dist = dist.get(&v).copied().unwrap_or(-1);
319            if v_dist < 0 {
320                continue;
321            }
322
323            if let Some(neighbors) = adjacency.get(&v) {
324                for w in neighbors {
325                    let w_dist = dist.get(w).copied().unwrap_or(-1);
326
327                    // w found for the first time?
328                    if w_dist < 0 {
329                        dist.insert(w.clone(), v_dist + 1);
330                        queue.push_back(w.clone());
331                    }
332
333                    // Shortest path to w via v?
334                    let current_w_dist = dist.get(w).copied().unwrap_or(-1);
335                    if current_w_dist == v_dist + 1 {
336                        let sigma_v = sigma.get(&v).copied().unwrap_or(0.0);
337                        *sigma.entry(w.clone()).or_insert(0.0) += sigma_v;
338                        predecessors.entry(w.clone()).or_default().push(v.clone());
339                    }
340                }
341            }
342        }
343
344        // Back-propagation of dependencies
345        let mut delta: HashMap<N, f64> = nodes.iter().map(|n| (n.clone(), 0.0)).collect();
346
347        while let Some(w) = stack.pop() {
348            if let Some(preds) = predecessors.get(&w) {
349                let sigma_w = sigma.get(&w).copied().unwrap_or(1.0);
350                let delta_w = delta.get(&w).copied().unwrap_or(0.0);
351
352                for v in preds {
353                    let sigma_v = sigma.get(v).copied().unwrap_or(1.0);
354                    let coeff = (sigma_v / sigma_w) * (1.0 + delta_w);
355
356                    // Add to edge betweenness (both directions)
357                    *betweenness.entry((v.clone(), w.clone())).or_insert(0.0) += coeff;
358
359                    // Accumulate node dependency
360                    *delta.entry(v.clone()).or_insert(0.0) += coeff;
361                }
362            }
363        }
364    }
365
366    // For undirected graphs, each edge is counted twice; normalize
367    for value in betweenness.values_mut() {
368        *value /= 2.0;
369    }
370
371    betweenness
372}
373
374/// Find connected components using BFS
375fn find_connected_components<N: Node + Clone + Hash + Eq>(
376    adjacency: &HashMap<N, HashSet<N>>,
377) -> Vec<HashSet<N>> {
378    let mut visited: HashSet<N> = HashSet::new();
379    let mut components: Vec<HashSet<N>> = Vec::new();
380
381    for node in adjacency.keys() {
382        if visited.contains(node) {
383            continue;
384        }
385
386        let mut component = HashSet::new();
387        let mut queue = VecDeque::new();
388        queue.push_back(node.clone());
389        visited.insert(node.clone());
390
391        while let Some(current) = queue.pop_front() {
392            component.insert(current.clone());
393
394            if let Some(neighbors) = adjacency.get(&current) {
395                for neighbor in neighbors {
396                    if !visited.contains(neighbor) {
397                        visited.insert(neighbor.clone());
398                        queue.push_back(neighbor.clone());
399                    }
400                }
401            }
402        }
403
404        components.push(component);
405    }
406
407    // Sort by size (largest first)
408    components.sort_by_key(|b| std::cmp::Reverse(b.len()));
409    components
410}
411
412/// Compute modularity Q for a partition
413///
414/// Q = (1/2m) * sum_{ij} [A_{ij} - k_i*k_j/(2m)] * delta(c_i, c_j)
415fn compute_modularity<N: Node + Clone + Hash + Eq>(
416    communities: &[HashSet<N>],
417    _adjacency: &HashMap<N, HashSet<N>>,
418    original_degrees: &HashMap<N, f64>,
419    edge_weights: &HashMap<(N, N), f64>,
420    total_weight: f64,
421) -> f64 {
422    if total_weight <= 0.0 {
423        return 0.0;
424    }
425
426    let two_m = 2.0 * total_weight;
427    let mut q = 0.0;
428
429    for community in communities {
430        for u in community {
431            for v in community {
432                let a_uv = edge_weights
433                    .get(&(u.clone(), v.clone()))
434                    .copied()
435                    .unwrap_or(0.0);
436                let k_u = original_degrees.get(u).copied().unwrap_or(0.0);
437                let k_v = original_degrees.get(v).copied().unwrap_or(0.0);
438
439                q += a_uv - (k_u * k_v) / two_m;
440            }
441        }
442    }
443
444    q / two_m
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    fn make_karate_club_like() -> Graph<i32, f64> {
452        // Two clear communities connected by a few bridge edges
453        let mut g = Graph::new();
454        for i in 0..10 {
455            g.add_node(i);
456        }
457
458        // Community A (0-4): dense connections
459        let _ = g.add_edge(0, 1, 1.0);
460        let _ = g.add_edge(0, 2, 1.0);
461        let _ = g.add_edge(0, 3, 1.0);
462        let _ = g.add_edge(1, 2, 1.0);
463        let _ = g.add_edge(1, 3, 1.0);
464        let _ = g.add_edge(2, 3, 1.0);
465        let _ = g.add_edge(2, 4, 1.0);
466        let _ = g.add_edge(3, 4, 1.0);
467
468        // Community B (5-9): dense connections
469        let _ = g.add_edge(5, 6, 1.0);
470        let _ = g.add_edge(5, 7, 1.0);
471        let _ = g.add_edge(5, 8, 1.0);
472        let _ = g.add_edge(6, 7, 1.0);
473        let _ = g.add_edge(6, 8, 1.0);
474        let _ = g.add_edge(7, 8, 1.0);
475        let _ = g.add_edge(7, 9, 1.0);
476        let _ = g.add_edge(8, 9, 1.0);
477
478        // Bridge edge between communities
479        let _ = g.add_edge(4, 5, 1.0);
480
481        g
482    }
483
484    fn make_triangle() -> Graph<i32, f64> {
485        let mut g: Graph<i32, f64> = Graph::new();
486        for i in 0..3 {
487            g.add_node(i);
488        }
489        let _ = g.add_edge(0, 1, 1.0);
490        let _ = g.add_edge(1, 2, 1.0);
491        let _ = g.add_edge(0, 2, 1.0);
492        g
493    }
494
495    #[test]
496    fn test_girvan_newman_basic() {
497        let g = make_karate_club_like();
498        let config = GirvanNewmanConfig::default();
499
500        let result = girvan_newman_result(&g, &config);
501        assert!(result.is_ok(), "Girvan-Newman should succeed");
502
503        let result = result.expect("result should be valid");
504        assert!(
505            result.best_partition.num_communities >= 2,
506            "Should find at least 2 communities, found {}",
507            result.best_partition.num_communities
508        );
509    }
510
511    #[test]
512    fn test_girvan_newman_target_communities() {
513        let g = make_karate_club_like();
514        let config = GirvanNewmanConfig {
515            num_communities: Some(2),
516            max_iterations: 1000,
517        };
518
519        let result = girvan_newman_result(&g, &config);
520        assert!(result.is_ok());
521
522        let result = result.expect("result should be valid");
523        // Should have at least 2 communities
524        assert!(result.best_partition.num_communities >= 2);
525    }
526
527    #[test]
528    fn test_girvan_newman_dendrogram() {
529        let g = make_karate_club_like();
530        let config = GirvanNewmanConfig {
531            num_communities: Some(3),
532            max_iterations: 1000,
533        };
534
535        let result = girvan_newman_result(&g, &config);
536        assert!(result.is_ok());
537
538        let result = result.expect("result should be valid");
539        // Dendrogram should have multiple levels
540        assert!(
541            result.dendrogram.len() >= 2,
542            "Dendrogram should have at least 2 levels, has {}",
543            result.dendrogram.len()
544        );
545
546        // First level should have the initial number of components
547        assert_eq!(result.dendrogram[0].removed_edge, None);
548
549        // Subsequent levels should have removed edges
550        for level in result.dendrogram.iter().skip(1) {
551            assert!(level.removed_edge.is_some());
552        }
553    }
554
555    #[test]
556    fn test_girvan_newman_bridge_removal() {
557        // In a graph with two communities joined by a single bridge,
558        // the bridge should be removed first
559        let g = make_karate_club_like();
560        let config = GirvanNewmanConfig {
561            num_communities: Some(2),
562            max_iterations: 1000,
563        };
564
565        let result = girvan_newman_result(&g, &config);
566        assert!(result.is_ok());
567
568        let result = result.expect("result should be valid");
569
570        // Check that communities separate nodes correctly
571        // Nodes 0-4 should be in one community, 5-9 in another
572        let comm_0 = result.best_partition.get_community(&0);
573        let comm_5 = result.best_partition.get_community(&5);
574
575        assert!(comm_0.is_some());
576        assert!(comm_5.is_some());
577
578        // They should be in different communities
579        // (modularity optimization may produce slight variations, but the bridge
580        //  community separation should hold)
581        if result.best_partition.num_communities >= 2 {
582            // Verify all A-side nodes are together
583            let comm_a = comm_0.expect("node 0 should have community");
584            for node in [1, 2, 3, 4] {
585                let c = result.best_partition.get_community(&node);
586                assert!(c.is_some(), "Node {node} should have community assignment");
587                assert_eq!(
588                    c.expect("community should exist"),
589                    comm_a,
590                    "Node {node} should be in same community as node 0"
591                );
592            }
593        }
594    }
595
596    #[test]
597    fn test_girvan_newman_triangle() {
598        let g = make_triangle();
599        let config = GirvanNewmanConfig::default();
600
601        let result = girvan_newman_result(&g, &config);
602        assert!(result.is_ok());
603
604        let result = result.expect("result should be valid");
605        // All 3 nodes should have community assignments
606        for node in [0, 1, 2] {
607            assert!(
608                result.best_partition.get_community(&node).is_some(),
609                "Node {node} should have a community"
610            );
611        }
612    }
613
614    #[test]
615    fn test_girvan_newman_empty_graph() {
616        let g: Graph<i32, f64> = Graph::new();
617        let config = GirvanNewmanConfig::default();
618
619        let result = girvan_newman_result(&g, &config);
620        assert!(result.is_err(), "Should fail on empty graph");
621    }
622
623    #[test]
624    fn test_girvan_newman_single_node() {
625        let mut g: Graph<i32, f64> = Graph::new();
626        g.add_node(0);
627
628        let config = GirvanNewmanConfig::default();
629        let result = girvan_newman_result(&g, &config);
630        assert!(result.is_ok());
631
632        let result = result.expect("result should be valid");
633        assert_eq!(result.best_partition.num_communities, 1);
634    }
635
636    #[test]
637    fn test_girvan_newman_communities_result_simplified() {
638        let g = make_karate_club_like();
639
640        let result = girvan_newman_communities_result(&g);
641        assert!(result.is_ok());
642
643        let result = result.expect("result should be valid");
644        assert!(result.num_communities >= 1);
645        assert!(result.quality_score.is_some());
646    }
647
648    #[test]
649    fn test_edge_betweenness_computation() {
650        // In a path graph 0-1-2, edge (0,1) and (1,2) should have higher betweenness
651        // than in a triangle where edges are equally important
652        let mut adjacency: HashMap<i32, HashSet<i32>> = HashMap::new();
653        adjacency.insert(0, [1].iter().copied().collect());
654        adjacency.insert(1, [0, 2].iter().copied().collect());
655        adjacency.insert(2, [1].iter().copied().collect());
656
657        let nodes = vec![0, 1, 2];
658        let betweenness = compute_edge_betweenness(&adjacency, &nodes);
659
660        // Edge (0,1) should have positive betweenness
661        let b_01 = betweenness.get(&(0, 1)).copied().unwrap_or(0.0);
662        assert!(
663            b_01 > 0.0,
664            "Edge (0,1) should have positive betweenness, got {b_01}"
665        );
666
667        let b_12 = betweenness.get(&(1, 2)).copied().unwrap_or(0.0);
668        assert!(
669            b_12 > 0.0,
670            "Edge (1,2) should have positive betweenness, got {b_12}"
671        );
672    }
673}