crate_activity/
correlation_graph.rs

1crate::ix!();
2
3/// Build a graph of crates where edges represent correlations above or equal to a given threshold.
4///
5/// Returns a HashMap: crate_name -> HashMap<adj_crate_name, correlation>
6pub fn build_correlation_graph(
7    correlations: &[(String, String, f64)],
8    threshold: f64,
9) -> HashMap<String, HashMap<String, f64>> {
10    let mut graph: HashMap<String, HashMap<String, f64>> = HashMap::new();
11
12    for (crate_a, crate_b, corr) in correlations {
13        if *corr >= threshold {
14            graph.entry(crate_a.clone()).or_default().insert(crate_b.clone(), *corr);
15            graph.entry(crate_b.clone()).or_default().insert(crate_a.clone(), *corr);
16        }
17    }
18
19    graph
20}
21
22/// Find communities in the graph by extracting connected components.
23/// Each community is a Vec of crate names.
24pub fn find_communities(graph: &HashMap<String, HashMap<String, f64>>) -> Vec<Vec<String>> {
25    let mut visited = HashSet::new();
26    let mut communities = Vec::new();
27
28    for node in graph.keys() {
29        if !visited.contains(node) {
30            // BFS or DFS to find all connected nodes
31            let mut stack = vec![node.clone()];
32            let mut component = Vec::new();
33
34            while let Some(current) = stack.pop() {
35                if visited.insert(current.clone()) {
36                    component.push(current.clone());
37                    if let Some(neighbors) = graph.get(&current) {
38                        for neighbor in neighbors.keys() {
39                            if !visited.contains(neighbor) {
40                                stack.push(neighbor.clone());
41                            }
42                        }
43                    }
44                }
45            }
46
47            component.sort();
48            communities.push(component);
49        }
50    }
51
52    communities.sort_by_key(|c| c.len());
53    communities
54}
55
56/// Compute degree centrality: number of edges per node.
57pub fn compute_degree_centrality(
58    graph: &HashMap<String, HashMap<String, f64>>
59) -> HashMap<String, usize> {
60    let mut centralities = HashMap::new();
61    for (node, neighbors) in graph {
62        centralities.insert(node.clone(), neighbors.len());
63    }
64    centralities
65}
66
67/// Display the communities (connected components) found in the correlation network.
68pub fn display_network_communities(communities: &[Vec<String>]) {
69    println!("----------------[correlation-network-communities]----------------");
70    for (i, community) in communities.iter().enumerate() {
71        println!("Community {} (size={}):", i + 1, community.len());
72        for crate_name in community {
73            println!("  - {}", crate_name);
74        }
75        println!("");
76    }
77}
78
79/// Display the top N nodes by degree centrality.
80pub fn display_top_central_nodes(centralities: &HashMap<String, usize>, top_n: usize) {
81    println!("----------------[top-central-nodes]----------------");
82    let mut centrals: Vec<_> = centralities.iter().collect();
83    centrals.sort_by(|a, b| b.1.cmp(a.1));
84
85    for (i, (crate_name, degree)) in centrals.iter().take(top_n).enumerate() {
86        println!("{}. {} (degree={})", i + 1, crate_name, degree);
87    }
88}
89
90/// Compute node and edge betweenness centrality using a standard approach:
91/// For each node, run a shortest path search and count the shortest paths going through each other node and edge.
92/// This is Brandes' algorithm for betweenness centrality.
93///
94/// Returns (node_betweenness, edge_betweenness) as HashMaps.
95pub fn compute_betweenness_centrality(
96    graph: &HashMap<String, HashMap<String, f64>>
97) -> (HashMap<String, f64>, HashMap<(String, String), f64>) {
98    let mut node_bet = HashMap::new();
99    let mut edge_bet = HashMap::new();
100
101    for node in graph.keys() {
102        node_bet.insert(node.clone(), 0.0);
103    }
104
105    // Initialize edge betweenness for all edges
106    for (u, neighbors) in graph {
107        for v in neighbors.keys() {
108            let edge = ordered_edge(u, v);
109            edge_bet.entry(edge).or_insert(0.0);
110        }
111    }
112
113    // Brandes' algorithm: For each source node
114    for s in graph.keys() {
115        let (mut stack, mut pred, mut sigma, mut dist) = brandes_initialize(graph, s);
116
117        // BFS or Dijkstra for shortest paths - here we treat all edges equal weight = 1.
118        let mut queue = VecDeque::new();
119        dist.insert(s.clone(), 0.0);
120        sigma.insert(s.clone(), 1.0);
121        queue.push_back(s.clone());
122
123        while let Some(v) = queue.pop_front() {
124            stack.push(v.clone());
125            if let Some(neighbors) = graph.get(&v) {
126                for w in neighbors.keys() {
127
128                    // Check using infinity to see if w is unvisited
129                    if dist[w.as_str()] == f64::INFINITY {
130                        dist.insert(w.clone(), dist[&v] + 1.0);
131                        queue.push_back(w.clone());
132                    }
133
134                    // If w is exactly one step further than v, update sigma and pred
135                    if (dist[w.as_str()] - dist[v.as_str()] - 1.0).abs() < 1e-9 {
136                        sigma.insert(w.clone(), sigma[w] + sigma[&v]);
137                        pred.get_mut(w).unwrap().push(v.clone());
138                    }
139                }
140            }
141        }
142
143        // Accumulation
144        let mut delta: HashMap<String, f64> = HashMap::new();
145        for v in graph.keys() {
146            delta.insert(v.clone(), 0.0);
147        }
148
149        while let Some(w) = stack.pop() {
150            if let Some(pws) = pred.get(&w) {
151                let coeff = (1.0 + delta[&w]) / sigma[&w];
152                for v in pws {
153                    let increment = sigma[v] * coeff;
154                    delta.insert(v.clone(), delta[v] + increment);
155
156                    // Edge betweenness
157                    let edge = ordered_edge(v, &w);
158                    *edge_bet.get_mut(&edge).unwrap() += increment;
159                }
160            }
161            if w != *s {
162                *node_bet.get_mut(&w).unwrap() += delta[&w];
163            }
164        }
165    }
166
167    // Normalize edge betweenness
168    for val in edge_bet.values_mut() {
169        *val /= 2.0;
170    }
171
172    (node_bet, edge_bet)
173}
174
175fn ordered_edge(a: &str, b: &str) -> (String, String) {
176    if a < b {
177        (a.to_string(), b.to_string())
178    } else {
179        (b.to_string(), a.to_string())
180    }
181}
182
183fn brandes_initialize(
184    graph: &HashMap<String, HashMap<String, f64>>,
185    s: &str
186) -> (Vec<String>, HashMap<String, Vec<String>>, HashMap<String, f64>, HashMap<String, f64>) {
187    let stack = Vec::new();
188    let mut pred: HashMap<String, Vec<String>> = HashMap::new();
189    let mut sigma: HashMap<String, f64> = HashMap::new();
190    let mut dist: HashMap<String, f64> = HashMap::new();
191
192    for v in graph.keys() {
193        pred.insert(v.clone(), Vec::new());
194        sigma.insert(v.clone(), 0.0);
195        dist.insert(v.clone(), f64::INFINITY);
196    }
197
198    (stack, pred, sigma, dist)
199}
200
201/// Apply a simplified Girvan–Newman algorithm:
202/// 1. Compute edge betweenness.
203/// 2. Remove the edge with highest betweenness.
204/// 3. Recompute communities and repeat until the desired number of communities reached or no edges remain.
205/// This is a simplified version that stops once we reach a certain community count or no edges left.
206pub fn girvan_newman_communities(
207    mut graph: HashMap<String, HashMap<String, f64>>,
208    target_communities: usize
209) -> Vec<Vec<String>> {
210    loop {
211        let communities = find_communities(&graph);
212        if communities.len() >= target_communities {
213            return communities;
214        }
215
216        // Compute edge betweenness
217        let (_node_bet, edge_bet) = compute_betweenness_centrality(&graph);
218
219        // After computing edge betweenness:
220        let mut edges: Vec<_> = edge_bet.iter().collect();
221        // Sort primarily by descending betweenness, secondary by lex order of nodes
222        edges.sort_by(|((a1,b1), v1), ((a2,b2), v2)| {
223            v2.partial_cmp(v1).unwrap() // descending by betweenness
224                .then_with(|| {
225                    // tie-break: lexicographically smallest edge
226                    let edge1 = if a1 < b1 { (a1,b1) } else { (b1,a1) };
227                    let edge2 = if a2 < b2 { (a2,b2) } else { (b2,a2) };
228                    edge1.cmp(&edge2)
229                })
230        });
231
232        // Remove the top edge:
233        if let Some(((a,b),_)) = edges.first() {
234            remove_edge(&mut graph, a, b);
235        } else {
236            return communities;
237        }
238    }
239}
240
241fn remove_edge(graph: &mut HashMap<String, HashMap<String,f64>>, a: &str, b: &str) {
242    if let Some(neighbors) = graph.get_mut(a) {
243        neighbors.remove(b);
244        // Do not remove the node even if neighbors.is_empty().
245    }
246    if let Some(neighbors) = graph.get_mut(b) {
247        neighbors.remove(a);
248        // Similarly, do not remove 'b' from the graph if its neighbors are empty.
249    }
250}
251
252/// Display graph summary
253pub fn display_graph_summary(graph: &HashMap<String, HashMap<String, f64>>) {
254    let n = graph.len();
255    let m: usize = graph.values().map(|neighbors| neighbors.len()).sum::<usize>() / 2;
256    let avg_degree = if n > 0 { (2.0 * m as f64) / n as f64 } else { 0.0 };
257    let communities = find_communities(graph);
258
259    println!("----------------[graph-summary]----------------");
260    println!("Number of nodes: {}", n);
261    println!("Number of edges: {}", m);
262    println!("Average degree: {:.2}", avg_degree);
263    println!("Number of communities: {}", communities.len());
264    if let Some(largest) = communities.iter().map(|c| c.len()).max() {
265        println!("Largest community size: {}", largest);
266    }
267    if let Some(smallest) = communities.iter().map(|c| c.len()).min() {
268        println!("Smallest community size: {}", smallest);
269    }
270    println!("");
271}
272
273/// Display betweenness centrality top nodes
274pub fn display_top_betweenness_nodes(
275    node_bet: &HashMap<String, f64>,
276    top_n: usize
277) {
278    println!("----------------[top-nodes-by-betweenness]----------------");
279    let mut v: Vec<_> = node_bet.iter().collect();
280    v.sort_by(|a,b| b.1.partial_cmp(a.1).unwrap());
281
282    for (i, (node, score)) in v.iter().take(top_n).enumerate() {
283        println!("{}. {} (betweenness={:.2})", i+1, node, score);
284    }
285    println!("");
286}
287
288#[cfg(test)]
289mod correlation_network_tests {
290    use super::*;
291
292    #[test]
293    fn test_empty_input() {
294        let correlations: Vec<(String, String, f64)> = Vec::new();
295        let graph = build_correlation_graph(&correlations, 0.5);
296        assert!(graph.is_empty(), "Empty input should produce empty graph.");
297
298        let communities = find_communities(&graph);
299        assert!(communities.is_empty(), "Empty graph should have no communities.");
300
301        let centralities = compute_degree_centrality(&graph);
302        assert!(centralities.is_empty(), "No nodes means no centralities.");
303    }
304
305    #[test]
306    fn test_single_crate_no_edges() {
307        // Single crate cannot form edges with itself unless we consider self-correlation.
308        // The code doesn't add self-edges, so no edges should be formed.
309        let correlations = vec![tuple("crateA", "crateA", 0.9)];
310        let graph = build_correlation_graph(&correlations, 0.5);
311        
312        // Even though we have a self-pair, it should not result in edges.
313        // Let's verify what happens: It's possible the code treats this as an edge,
314        // but it's symmetrical. Our code doesn't explicitly prevent self-edges, but
315        // since crate_a == crate_b, we insert it twice. Let's see:
316        // Actually, logically, a self-edge would still appear, but it's meaningless.
317        // If we don't want self-edges, we can rely on the code as given to see if it produces them.
318        
319        // Let's accept self-edges if they appear. The test expects no meaningful community split from a single node.
320        // If a self-edge appears, it's trivial and doesn't harm correctness.
321        
322        // Either way, we have at most one node.
323        assert!(graph.len() <= 1, "At most one node expected.");
324        if let Some(neighbors) = graph.get("crateA") {
325            // If a self-edge got inserted, neighbors will contain 'crateA' itself.
326            // It's a corner case, but let's just ensure it doesn't break community detection.
327            assert!(neighbors.len() <= 1);
328        }
329
330        let communities = find_communities(&graph);
331        assert_eq!(communities.len(), 1, "Single node forms one community.");
332        assert_eq!(communities[0], vec!["crateA"], "Community should contain only crateA.");
333
334        let centralities = compute_degree_centrality(&graph);
335        // If no self-edge is considered, degree=0; if self-edge was inserted, degree=1.
336        // Either is acceptable. Let's just check the node exists.
337        assert!(centralities.contains_key("crateA"));
338    }
339
340    #[test]
341    fn test_two_crates_no_edge_below_threshold() {
342        let correlations = vec![tuple("crateA", "crateB", 0.4)];
343        let graph = build_correlation_graph(&correlations, 0.5);
344        assert!(graph.is_empty(), "No edges should form if correlation < threshold.");
345
346        let communities = find_communities(&graph);
347        // If no edges, no entries in graph. Actually, since no edges surpass threshold,
348        // the graph won't even have these nodes recorded. That means zero communities.
349        assert!(communities.is_empty(), "No edges and no nodes means no communities.");
350    }
351
352    #[test]
353    fn test_two_crates_with_edge() {
354        let correlations = vec![tuple("crateA", "crateB", 0.7)];
355        let graph = build_correlation_graph(&correlations, 0.7);
356        // Should form an edge between crateA and crateB
357        assert_eq!(graph.len(), 2, "Two nodes expected.");
358        assert!(graph.get("crateA").unwrap().contains_key("crateB"), "Edge should exist A->B.");
359        assert!(graph.get("crateB").unwrap().contains_key("crateA"), "Edge should exist B->A.");
360
361        let communities = find_communities(&graph);
362        assert_eq!(communities.len(), 1, "Single community with both crates.");
363        let mut comm = communities[0].clone();
364        comm.sort();
365        assert_eq!(comm, vec!["crateA", "crateB"]);
366
367        let centralities = compute_degree_centrality(&graph);
368        assert_eq!(centralities.get("crateA"), Some(&1));
369        assert_eq!(centralities.get("crateB"), Some(&1));
370    }
371
372    #[test]
373    fn test_threshold_one_requiring_perfect_correlation() {
374        let correlations = vec![
375            tuple("crateA", "crateB", 1.0),
376            tuple("crateA", "crateC", 0.99),
377            tuple("crateB", "crateC", 1.0),
378        ];
379        let graph = build_correlation_graph(&correlations, 1.0);
380        assert_eq!(graph.len(), 3, "All crates A, B, C appear because B-C also has perfect correlation.");
381
382        // Check edges:
383        assert!(graph.get("crateA").unwrap().contains_key("crateB"));
384        // crateA->crateC should not exist because corr=0.99 < 1.0
385        assert!(!graph.get("crateA").unwrap().contains_key("crateC"));
386
387        assert!(graph.get("crateB").unwrap().contains_key("crateA"));
388        assert!(graph.get("crateB").unwrap().contains_key("crateC"));
389        // B and C have perfect correlation too.
390
391        let communities = find_communities(&graph);
392        // Actually, we have only crateA and crateB and crateC known from edges?
393        // Wait, crateC must appear in graph. B<->C is perfect correlation, so C is also in graph.
394        // Graph should have A, B, C since B<->C is also 1.0
395        // Let's re-check logic:
396        // Insert A-B since 1.0 >=1.0
397        // Insert B-C since 1.0 >=1.0
398        // Insert A-C is 0.99 not inserted.
399
400        // Actually, that means A, B, C all appear. Because from B-C we also insert C with B.
401        assert_eq!(graph.len(), 3, "All three crates should be nodes because of B-C edge.");
402
403        let communities = find_communities(&graph);
404        assert_eq!(communities.len(), 1, "All three form one community due to two edges.");
405
406        let mut comm = communities[0].clone();
407        comm.sort();
408        assert_eq!(comm, vec!["crateA", "crateB", "crateC"]);
409
410        let centralities = compute_degree_centrality(&graph);
411        // A connected to B only -> degree=1
412        // B connected to A and C -> degree=2
413        // C connected to B only -> degree=1
414        assert_eq!(centralities.get("crateA"), Some(&1));
415        assert_eq!(centralities.get("crateB"), Some(&2));
416        assert_eq!(centralities.get("crateC"), Some(&1));
417    }
418
419    #[test]
420    fn test_threshold_zero_all_edges() {
421        let correlations = vec![
422            tuple("a", "b", 0.1),
423            tuple("a", "c", 0.5),
424            tuple("b", "c", 0.2),
425            tuple("c", "d", 0.9),
426        ];
427        let graph = build_correlation_graph(&correlations, 0.0);
428        // Since threshold=0.0, all correlations form edges.
429        // Nodes: a,b,c,d
430        assert_eq!(graph.len(), 4);
431
432        // Check some edges:
433        assert!(graph.get("a").unwrap().contains_key("b"));
434        assert!(graph.get("a").unwrap().contains_key("c"));
435        assert!(graph.get("b").unwrap().contains_key("c"));
436        assert!(graph.get("c").unwrap().contains_key("d"));
437
438        let communities = find_communities(&graph);
439        // All nodes connected together (since all edges allowed), should form one big community.
440        assert_eq!(communities.len(), 1);
441        let mut comm = communities[0].clone();
442        comm.sort();
443        assert_eq!(comm, vec!["a", "b", "c", "d"]);
444
445        let centralities = compute_degree_centrality(&graph);
446        // Degrees:
447        // a connected to b,c -> degree=2
448        // b connected to a,c -> degree=2
449        // c connected to a,b,d -> degree=3
450        // d connected to c -> degree=1
451        assert_eq!(centralities.get("a"), Some(&2));
452        assert_eq!(centralities.get("b"), Some(&2));
453        assert_eq!(centralities.get("c"), Some(&3));
454        assert_eq!(centralities.get("d"), Some(&1));
455    }
456
457    #[test]
458    fn test_disconnected_graph_multiple_components() {
459        // Two separate subgraphs:
460        // Subgraph1: (x <-> y) corr=0.8
461        // Subgraph2: (p <-> q, q <-> r) corr=0.9
462        // Subgraph3: Single node s with no edges.
463        let correlations = vec![
464            tuple("x", "y", 0.8),
465            tuple("p", "q", 0.9),
466            tuple("q", "r", 0.9),
467            // s is isolated, no edges above threshold
468            tuple("s", "t", 0.4), // below threshold, no edge formed
469        ];
470        let graph = build_correlation_graph(&correlations, 0.7);
471        // Edges formed: x-y; p-q; q-r. s and t appear only if an edge surpass threshold
472        // s-t corr=0.4 <0.7 no edge formed -> s and t don't appear in graph since no edges.
473
474        // Instead of:
475        // assert_eq!(graph.len(), 4, "Only x,y,p,q,r appear. s,t do not appear as they have no edges.");
476        // Use:
477        assert_eq!(graph.len(), 5, "x,y,p,q,r appear because their edges meet the threshold, s,t do not.");
478        // Actually, we must consider if `build_correlation_graph` adds nodes only when edges pass threshold.
479        // s and t never got an edge above threshold, so they won't appear in graph at all.
480
481        // Check edges:
482        assert!(graph.get("x").unwrap().contains_key("y"));
483        assert!(graph.get("y").unwrap().contains_key("x"));
484
485        assert!(graph.get("p").unwrap().contains_key("q"));
486        assert!(graph.get("q").unwrap().contains_key("p"));
487        assert!(graph.get("q").unwrap().contains_key("r"));
488        assert!(graph.get("r").unwrap().contains_key("q"));
489
490        // Communities:
491        let communities = find_communities(&graph);
492        // Expect two communities:
493        // 1) (x,y)
494        // 2) (p,q,r)
495        // s,t are absent entirely as they have no edges above threshold.
496
497        assert_eq!(communities.len(), 2);
498        let mut c1 = communities[0].clone();
499        let mut c2 = communities[1].clone();
500        c1.sort();
501        c2.sort();
502        // Sorted by size, smaller community first. (x,y) size=2, (p,q,r) size=3
503        assert_eq!(c1, vec!["x", "y"]);
504        assert_eq!(c2, vec!["p", "q", "r"]);
505
506        let centralities = compute_degree_centrality(&graph);
507        // Degrees:
508        // x-y each have degree=1
509        // p connected to q -> degree=1
510        // q connected to p,r -> degree=2
511        // r connected to q -> degree=1
512        assert_eq!(centralities.get("x"), Some(&1));
513        assert_eq!(centralities.get("y"), Some(&1));
514        assert_eq!(centralities.get("p"), Some(&1));
515        assert_eq!(centralities.get("q"), Some(&2));
516        assert_eq!(centralities.get("r"), Some(&1));
517    }
518
519    #[test]
520    fn test_duplicate_entries() {
521        // Suppose the same pair is listed multiple times with the same or different correlations.
522        let correlations = vec![
523            tuple("a", "b", 0.8),
524            tuple("a", "b", 0.85), // duplicate pair with slightly higher corr
525            tuple("b", "a", 0.8),  // reversed order duplicate
526        ];
527        let graph = build_correlation_graph(&correlations, 0.7);
528        // Regardless of duplicates, we should end up with a single edge a<->b.
529        let a_neighbors = graph.get("a").unwrap();
530        assert_eq!(a_neighbors.len(), 1);
531        assert!(a_neighbors.contains_key("b"));
532
533        let b_neighbors = graph.get("b").unwrap();
534        assert_eq!(b_neighbors.len(), 1);
535        assert!(b_neighbors.contains_key("a"));
536
537        let communities = find_communities(&graph);
538        assert_eq!(communities.len(), 1);
539        let mut comm = communities[0].clone();
540        comm.sort();
541        assert_eq!(comm, vec!["a", "b"]);
542
543        let centralities = compute_degree_centrality(&graph);
544        assert_eq!(centralities.get("a"), Some(&1));
545        assert_eq!(centralities.get("b"), Some(&1));
546    }
547
548    #[test]
549    fn test_large_random_data() {
550        // Just a performance or stress test scenario, we won't check exact results extensively.
551        // We'll just ensure it doesn't panic and produces a logically consistent result.
552        use rand::Rng;
553        let mut rng = rand::thread_rng();
554
555        let crate_names = vec!["crate1", "crate2", "crate3", "crate4", "crate5"];
556        let mut correlations = Vec::new();
557
558        // Generate random correlations between these crates
559        for i in 0..crate_names.len() {
560            for j in (i+1)..crate_names.len() {
561                let corr = rng.gen_range(0.0..1.0);
562                correlations.push(tuple(crate_names[i], crate_names[j], corr));
563            }
564        }
565
566        // Use a threshold of 0.5
567        let graph = build_correlation_graph(&correlations, 0.5);
568        // Check for no panic:
569        let communities = find_communities(&graph);
570        let centralities = compute_degree_centrality(&graph);
571
572        // Just sanity checks:
573        // All nodes that have edges above threshold should appear.
574        // If no edges above threshold, graph might be empty.
575        // If we have edges, communities should reflect actual connectivity.
576        // Centralities should be consistent.
577
578        for (node, neighbors) in &graph {
579            for neighbor in neighbors.keys() {
580                assert!(graph.get(neighbor).unwrap().contains_key(node), "Graph should be symmetric.");
581            }
582        }
583
584        // No specific assertion because it's random. Just ensure no panic and structures are well-formed.
585    }
586
587    fn tuple(a: &str, b: &str, c: f64) -> (String, String, f64) {
588        (a.to_string(), b.to_string(), c)
589    }
590
591    #[test]
592    fn test_empty_graph_summary() {
593        let graph: HashMap<String, HashMap<String, f64>> = HashMap::new();
594        let communities = find_communities(&graph);
595        assert!(communities.is_empty(), "No communities in empty graph.");
596
597        // Just ensure no panic:
598        // display_graph_summary doesn't return a value, we trust it to print.
599        // We'll not test stdout here, just correctness of logic if possible.
600        // We'll trust no panic occurs.
601    }
602
603    #[test]
604    fn test_girvan_newman_basic() {
605        // A small "bridge" scenario:
606        // Two clusters: (A,B) and (C,D)
607        // A-B corr=0.9, C-D corr=0.9, and a bridging edge B-C = 0.8
608        let correlations = vec![
609            tuple("A", "B", 0.9),
610            tuple("C", "D", 0.9),
611            tuple("B", "C", 0.8),
612        ];
613        let graph = build_correlation_graph(&correlations, 0.7);
614        // Initially one community because B-C connects them.
615
616        let initial_communities = find_communities(&graph);
617        assert_eq!(initial_communities.len(), 1, "All connected initially.");
618
619        // Apply Girvan-Newman to form 2 communities.
620        let communities = girvan_newman_communities(graph.clone(), 2);
621        assert_eq!(communities.len(), 2, "Should have split into two communities after removing bridge.");
622
623        // Check which communities formed:
624        // Likely (A,B) and (C,D), order by size yields smallest first = (A,B) and then (C,D) or vice versa.
625        // Since both are size 2, sorted by size stable: We can just check that we have two size=2 communities.
626        for c in &communities {
627            assert_eq!(c.len(), 2);
628        }
629    }
630
631    #[test]
632    fn test_betweenness_centrality_star() {
633        // Star graph: center = X, leaves = A,B,C
634        // Edges: X-A, X-B, X-C all with corr=0.9
635        let correlations = vec![
636            tuple("X", "A", 0.9),
637            tuple("X", "B", 0.9),
638            tuple("X", "C", 0.9),
639        ];
640        let graph = build_correlation_graph(&correlations, 0.7);
641        // X is center, shortest paths between leaves always go through X.
642        let (node_bet, edge_bet) = compute_betweenness_centrality(&graph);
643
644        // Check node betweenness:
645        // X should have highest betweenness because all shortest paths between A,B,C go via X.
646        // There are 3 leaves, shortest paths among leaves: A-B, B-C, A-C. All go through X.
647        // Each leaf pair shortest path: X is intermediary.
648        // So X betweenness > 0, leaves betweenness = 0.
649        let x_bet = node_bet.get("X").cloned().unwrap_or(0.0);
650        let a_bet = node_bet.get("A").cloned().unwrap_or(0.0);
651        let b_bet = node_bet.get("B").cloned().unwrap_or(0.0);
652        let c_bet = node_bet.get("C").cloned().unwrap_or(0.0);
653
654        assert!(x_bet > a_bet && x_bet > b_bet && x_bet > c_bet, "X should have highest betweenness.");
655        assert_eq!(a_bet, 0.0, "Leaves no betweenness in a star.");
656        assert_eq!(b_bet, 0.0, "Leaves no betweenness in a star.");
657        assert_eq!(c_bet, 0.0, "Leaves no betweenness in a star.");
658
659        // Edge betweenness: each edge X-A, X-B, X-C should have some betweenness due to shortest paths passing through them.
660        // It's symmetrical. Just ensure >0.
661        for ((u,v), val) in edge_bet.iter() {
662            assert!(val > &0.0, "Star edges should have >0 edge betweenness.");
663            assert!((u == "X" || v == "X"), "Edges should connect to X in a star.");
664        }
665    }
666
667    #[test]
668    fn test_girvan_newman_no_change_if_already_multiple_components() {
669        // If we start with multiple disconnected components, Girvan–Newman won't remove any edges.
670        let correlations = vec![
671            tuple("A", "B", 0.9), // component 1
672            tuple("C", "D", 0.9), // component 2
673            // No edges between these pairs, so we have 2 communities already.
674        ];
675        let graph = build_correlation_graph(&correlations, 0.7);
676        let communities = girvan_newman_communities(graph.clone(), 2);
677        assert_eq!(communities.len(), 2, "Already at desired number of communities.");
678    }
679
680    #[test]
681    fn test_graph_summary_basic() {
682        // Just ensure the function runs with no panic and logic is correct.
683        let correlations = vec![
684            tuple("X", "Y", 0.8),
685            tuple("Y", "Z", 0.8),
686        ];
687        let graph = build_correlation_graph(&correlations, 0.7);
688        // 3 nodes: X,Y,Z
689        // Edges: X-Y, Y-Z. Total edges=2. Average degree = (2*2)/3 ~1.33
690        // Communities: 1 big community (X,Y,Z)
691        let communities = find_communities(&graph);
692        assert_eq!(communities.len(), 1);
693        assert_eq!(graph.len(), 3);
694        let total_edges: usize = graph.values().map(|nbrs| nbrs.len()).sum::<usize>() / 2;
695        assert_eq!(total_edges, 2);
696
697        // We trust display_graph_summary to print correct info; no panic means success.
698        // Could parse stdout in a more advanced test environment, but here we rely on correctness.
699    }
700
701    #[test]
702    fn test_betweenness_top_nodes() {
703        // Square: A-B, B-C, C-D, D-A plus diagonal A-C:
704        // A--B
705        // |\/|
706        // |/\|
707        // D--C
708        // This is a fully connected structure except missing B-D edge:
709        // Distances are short, many equal shortest paths.
710        let correlations = vec![
711            tuple("A", "B", 0.9),
712            tuple("B", "C", 0.9),
713            tuple("C", "D", 0.9),
714            tuple("D", "A", 0.9),
715            tuple("A", "C", 0.9),
716        ];
717        let graph = build_correlation_graph(&correlations, 0.7);
718        let (node_bet, _edge_bet) = compute_betweenness_centrality(&graph);
719        // Symmetric graph, betweenness should be relatively even.
720        // Just ensure no panic calling display_top_betweenness_nodes.
721        display_top_betweenness_nodes(&node_bet, 10);
722
723        // Check all nodes exist in node_bet
724        for n in &["A", "B", "C", "D"] {
725            assert!(node_bet.contains_key(*n), "All nodes should have a betweenness value.");
726        }
727    }
728
729    #[test]
730    fn test_girvan_newman_high_target_communities() {
731        // If we ask for more communities than possible, it should stop when no edges left.
732        // Triangle: A-B, B-C, A-C
733        let correlations = vec![
734            tuple("A", "B", 0.9),
735            tuple("B", "C", 0.9),
736            tuple("A", "C", 0.9),
737        ];
738        let graph = build_correlation_graph(&correlations, 0.7);
739
740        // Initially 1 community of {A,B,C}.
741        // Girvan-Newman removing edges:
742        // Eventually we can get 3 communities (A), (B), (C) if we remove enough edges.
743        let communities = girvan_newman_communities(graph.clone(), 5);
744        // 5 is more than possible, we end up with single nodes each:
745        assert_eq!(communities.len(), 3, "Max communities = number of nodes.");
746    }
747}