1crate::ix!();
2
3pub fn build_correlation_graph(
7 correlations: &[(String, String, f64)],
8 threshold: f64,
9) -> HashMap<String, HashMap<String, f64>> {
10 let mut graph: HashMap<String, HashMap<String, f64>> = HashMap::new();
11
12 for (crate_a, crate_b, corr) in correlations {
13 if *corr >= threshold {
14 graph.entry(crate_a.clone()).or_default().insert(crate_b.clone(), *corr);
15 graph.entry(crate_b.clone()).or_default().insert(crate_a.clone(), *corr);
16 }
17 }
18
19 graph
20}
21
22pub fn find_communities(graph: &HashMap<String, HashMap<String, f64>>) -> Vec<Vec<String>> {
25 let mut visited = HashSet::new();
26 let mut communities = Vec::new();
27
28 for node in graph.keys() {
29 if !visited.contains(node) {
30 let mut stack = vec![node.clone()];
32 let mut component = Vec::new();
33
34 while let Some(current) = stack.pop() {
35 if visited.insert(current.clone()) {
36 component.push(current.clone());
37 if let Some(neighbors) = graph.get(¤t) {
38 for neighbor in neighbors.keys() {
39 if !visited.contains(neighbor) {
40 stack.push(neighbor.clone());
41 }
42 }
43 }
44 }
45 }
46
47 component.sort();
48 communities.push(component);
49 }
50 }
51
52 communities.sort_by_key(|c| c.len());
53 communities
54}
55
56pub fn compute_degree_centrality(
58 graph: &HashMap<String, HashMap<String, f64>>
59) -> HashMap<String, usize> {
60 let mut centralities = HashMap::new();
61 for (node, neighbors) in graph {
62 centralities.insert(node.clone(), neighbors.len());
63 }
64 centralities
65}
66
67pub fn display_network_communities(communities: &[Vec<String>]) {
69 println!("----------------[correlation-network-communities]----------------");
70 for (i, community) in communities.iter().enumerate() {
71 println!("Community {} (size={}):", i + 1, community.len());
72 for crate_name in community {
73 println!(" - {}", crate_name);
74 }
75 println!("");
76 }
77}
78
79pub fn display_top_central_nodes(centralities: &HashMap<String, usize>, top_n: usize) {
81 println!("----------------[top-central-nodes]----------------");
82 let mut centrals: Vec<_> = centralities.iter().collect();
83 centrals.sort_by(|a, b| b.1.cmp(a.1));
84
85 for (i, (crate_name, degree)) in centrals.iter().take(top_n).enumerate() {
86 println!("{}. {} (degree={})", i + 1, crate_name, degree);
87 }
88}
89
90pub fn compute_betweenness_centrality(
96 graph: &HashMap<String, HashMap<String, f64>>
97) -> (HashMap<String, f64>, HashMap<(String, String), f64>) {
98 let mut node_bet = HashMap::new();
99 let mut edge_bet = HashMap::new();
100
101 for node in graph.keys() {
102 node_bet.insert(node.clone(), 0.0);
103 }
104
105 for (u, neighbors) in graph {
107 for v in neighbors.keys() {
108 let edge = ordered_edge(u, v);
109 edge_bet.entry(edge).or_insert(0.0);
110 }
111 }
112
113 for s in graph.keys() {
115 let (mut stack, mut pred, mut sigma, mut dist) = brandes_initialize(graph, s);
116
117 let mut queue = VecDeque::new();
119 dist.insert(s.clone(), 0.0);
120 sigma.insert(s.clone(), 1.0);
121 queue.push_back(s.clone());
122
123 while let Some(v) = queue.pop_front() {
124 stack.push(v.clone());
125 if let Some(neighbors) = graph.get(&v) {
126 for w in neighbors.keys() {
127
128 if dist[w.as_str()] == f64::INFINITY {
130 dist.insert(w.clone(), dist[&v] + 1.0);
131 queue.push_back(w.clone());
132 }
133
134 if (dist[w.as_str()] - dist[v.as_str()] - 1.0).abs() < 1e-9 {
136 sigma.insert(w.clone(), sigma[w] + sigma[&v]);
137 pred.get_mut(w).unwrap().push(v.clone());
138 }
139 }
140 }
141 }
142
143 let mut delta: HashMap<String, f64> = HashMap::new();
145 for v in graph.keys() {
146 delta.insert(v.clone(), 0.0);
147 }
148
149 while let Some(w) = stack.pop() {
150 if let Some(pws) = pred.get(&w) {
151 let coeff = (1.0 + delta[&w]) / sigma[&w];
152 for v in pws {
153 let increment = sigma[v] * coeff;
154 delta.insert(v.clone(), delta[v] + increment);
155
156 let edge = ordered_edge(v, &w);
158 *edge_bet.get_mut(&edge).unwrap() += increment;
159 }
160 }
161 if w != *s {
162 *node_bet.get_mut(&w).unwrap() += delta[&w];
163 }
164 }
165 }
166
167 for val in edge_bet.values_mut() {
169 *val /= 2.0;
170 }
171
172 (node_bet, edge_bet)
173}
174
175fn ordered_edge(a: &str, b: &str) -> (String, String) {
176 if a < b {
177 (a.to_string(), b.to_string())
178 } else {
179 (b.to_string(), a.to_string())
180 }
181}
182
183fn brandes_initialize(
184 graph: &HashMap<String, HashMap<String, f64>>,
185 s: &str
186) -> (Vec<String>, HashMap<String, Vec<String>>, HashMap<String, f64>, HashMap<String, f64>) {
187 let stack = Vec::new();
188 let mut pred: HashMap<String, Vec<String>> = HashMap::new();
189 let mut sigma: HashMap<String, f64> = HashMap::new();
190 let mut dist: HashMap<String, f64> = HashMap::new();
191
192 for v in graph.keys() {
193 pred.insert(v.clone(), Vec::new());
194 sigma.insert(v.clone(), 0.0);
195 dist.insert(v.clone(), f64::INFINITY);
196 }
197
198 (stack, pred, sigma, dist)
199}
200
201pub fn girvan_newman_communities(
207 mut graph: HashMap<String, HashMap<String, f64>>,
208 target_communities: usize
209) -> Vec<Vec<String>> {
210 loop {
211 let communities = find_communities(&graph);
212 if communities.len() >= target_communities {
213 return communities;
214 }
215
216 let (_node_bet, edge_bet) = compute_betweenness_centrality(&graph);
218
219 let mut edges: Vec<_> = edge_bet.iter().collect();
221 edges.sort_by(|((a1,b1), v1), ((a2,b2), v2)| {
223 v2.partial_cmp(v1).unwrap() .then_with(|| {
225 let edge1 = if a1 < b1 { (a1,b1) } else { (b1,a1) };
227 let edge2 = if a2 < b2 { (a2,b2) } else { (b2,a2) };
228 edge1.cmp(&edge2)
229 })
230 });
231
232 if let Some(((a,b),_)) = edges.first() {
234 remove_edge(&mut graph, a, b);
235 } else {
236 return communities;
237 }
238 }
239}
240
241fn remove_edge(graph: &mut HashMap<String, HashMap<String,f64>>, a: &str, b: &str) {
242 if let Some(neighbors) = graph.get_mut(a) {
243 neighbors.remove(b);
244 }
246 if let Some(neighbors) = graph.get_mut(b) {
247 neighbors.remove(a);
248 }
250}
251
252pub fn display_graph_summary(graph: &HashMap<String, HashMap<String, f64>>) {
254 let n = graph.len();
255 let m: usize = graph.values().map(|neighbors| neighbors.len()).sum::<usize>() / 2;
256 let avg_degree = if n > 0 { (2.0 * m as f64) / n as f64 } else { 0.0 };
257 let communities = find_communities(graph);
258
259 println!("----------------[graph-summary]----------------");
260 println!("Number of nodes: {}", n);
261 println!("Number of edges: {}", m);
262 println!("Average degree: {:.2}", avg_degree);
263 println!("Number of communities: {}", communities.len());
264 if let Some(largest) = communities.iter().map(|c| c.len()).max() {
265 println!("Largest community size: {}", largest);
266 }
267 if let Some(smallest) = communities.iter().map(|c| c.len()).min() {
268 println!("Smallest community size: {}", smallest);
269 }
270 println!("");
271}
272
273pub fn display_top_betweenness_nodes(
275 node_bet: &HashMap<String, f64>,
276 top_n: usize
277) {
278 println!("----------------[top-nodes-by-betweenness]----------------");
279 let mut v: Vec<_> = node_bet.iter().collect();
280 v.sort_by(|a,b| b.1.partial_cmp(a.1).unwrap());
281
282 for (i, (node, score)) in v.iter().take(top_n).enumerate() {
283 println!("{}. {} (betweenness={:.2})", i+1, node, score);
284 }
285 println!("");
286}
287
288#[cfg(test)]
289mod correlation_network_tests {
290 use super::*;
291
292 #[test]
293 fn test_empty_input() {
294 let correlations: Vec<(String, String, f64)> = Vec::new();
295 let graph = build_correlation_graph(&correlations, 0.5);
296 assert!(graph.is_empty(), "Empty input should produce empty graph.");
297
298 let communities = find_communities(&graph);
299 assert!(communities.is_empty(), "Empty graph should have no communities.");
300
301 let centralities = compute_degree_centrality(&graph);
302 assert!(centralities.is_empty(), "No nodes means no centralities.");
303 }
304
305 #[test]
306 fn test_single_crate_no_edges() {
307 let correlations = vec![tuple("crateA", "crateA", 0.9)];
310 let graph = build_correlation_graph(&correlations, 0.5);
311
312 assert!(graph.len() <= 1, "At most one node expected.");
324 if let Some(neighbors) = graph.get("crateA") {
325 assert!(neighbors.len() <= 1);
328 }
329
330 let communities = find_communities(&graph);
331 assert_eq!(communities.len(), 1, "Single node forms one community.");
332 assert_eq!(communities[0], vec!["crateA"], "Community should contain only crateA.");
333
334 let centralities = compute_degree_centrality(&graph);
335 assert!(centralities.contains_key("crateA"));
338 }
339
340 #[test]
341 fn test_two_crates_no_edge_below_threshold() {
342 let correlations = vec![tuple("crateA", "crateB", 0.4)];
343 let graph = build_correlation_graph(&correlations, 0.5);
344 assert!(graph.is_empty(), "No edges should form if correlation < threshold.");
345
346 let communities = find_communities(&graph);
347 assert!(communities.is_empty(), "No edges and no nodes means no communities.");
350 }
351
352 #[test]
353 fn test_two_crates_with_edge() {
354 let correlations = vec![tuple("crateA", "crateB", 0.7)];
355 let graph = build_correlation_graph(&correlations, 0.7);
356 assert_eq!(graph.len(), 2, "Two nodes expected.");
358 assert!(graph.get("crateA").unwrap().contains_key("crateB"), "Edge should exist A->B.");
359 assert!(graph.get("crateB").unwrap().contains_key("crateA"), "Edge should exist B->A.");
360
361 let communities = find_communities(&graph);
362 assert_eq!(communities.len(), 1, "Single community with both crates.");
363 let mut comm = communities[0].clone();
364 comm.sort();
365 assert_eq!(comm, vec!["crateA", "crateB"]);
366
367 let centralities = compute_degree_centrality(&graph);
368 assert_eq!(centralities.get("crateA"), Some(&1));
369 assert_eq!(centralities.get("crateB"), Some(&1));
370 }
371
372 #[test]
373 fn test_threshold_one_requiring_perfect_correlation() {
374 let correlations = vec![
375 tuple("crateA", "crateB", 1.0),
376 tuple("crateA", "crateC", 0.99),
377 tuple("crateB", "crateC", 1.0),
378 ];
379 let graph = build_correlation_graph(&correlations, 1.0);
380 assert_eq!(graph.len(), 3, "All crates A, B, C appear because B-C also has perfect correlation.");
381
382 assert!(graph.get("crateA").unwrap().contains_key("crateB"));
384 assert!(!graph.get("crateA").unwrap().contains_key("crateC"));
386
387 assert!(graph.get("crateB").unwrap().contains_key("crateA"));
388 assert!(graph.get("crateB").unwrap().contains_key("crateC"));
389 let communities = find_communities(&graph);
392 assert_eq!(graph.len(), 3, "All three crates should be nodes because of B-C edge.");
402
403 let communities = find_communities(&graph);
404 assert_eq!(communities.len(), 1, "All three form one community due to two edges.");
405
406 let mut comm = communities[0].clone();
407 comm.sort();
408 assert_eq!(comm, vec!["crateA", "crateB", "crateC"]);
409
410 let centralities = compute_degree_centrality(&graph);
411 assert_eq!(centralities.get("crateA"), Some(&1));
415 assert_eq!(centralities.get("crateB"), Some(&2));
416 assert_eq!(centralities.get("crateC"), Some(&1));
417 }
418
419 #[test]
420 fn test_threshold_zero_all_edges() {
421 let correlations = vec![
422 tuple("a", "b", 0.1),
423 tuple("a", "c", 0.5),
424 tuple("b", "c", 0.2),
425 tuple("c", "d", 0.9),
426 ];
427 let graph = build_correlation_graph(&correlations, 0.0);
428 assert_eq!(graph.len(), 4);
431
432 assert!(graph.get("a").unwrap().contains_key("b"));
434 assert!(graph.get("a").unwrap().contains_key("c"));
435 assert!(graph.get("b").unwrap().contains_key("c"));
436 assert!(graph.get("c").unwrap().contains_key("d"));
437
438 let communities = find_communities(&graph);
439 assert_eq!(communities.len(), 1);
441 let mut comm = communities[0].clone();
442 comm.sort();
443 assert_eq!(comm, vec!["a", "b", "c", "d"]);
444
445 let centralities = compute_degree_centrality(&graph);
446 assert_eq!(centralities.get("a"), Some(&2));
452 assert_eq!(centralities.get("b"), Some(&2));
453 assert_eq!(centralities.get("c"), Some(&3));
454 assert_eq!(centralities.get("d"), Some(&1));
455 }
456
457 #[test]
458 fn test_disconnected_graph_multiple_components() {
459 let correlations = vec![
464 tuple("x", "y", 0.8),
465 tuple("p", "q", 0.9),
466 tuple("q", "r", 0.9),
467 tuple("s", "t", 0.4), ];
470 let graph = build_correlation_graph(&correlations, 0.7);
471 assert_eq!(graph.len(), 5, "x,y,p,q,r appear because their edges meet the threshold, s,t do not.");
478 assert!(graph.get("x").unwrap().contains_key("y"));
483 assert!(graph.get("y").unwrap().contains_key("x"));
484
485 assert!(graph.get("p").unwrap().contains_key("q"));
486 assert!(graph.get("q").unwrap().contains_key("p"));
487 assert!(graph.get("q").unwrap().contains_key("r"));
488 assert!(graph.get("r").unwrap().contains_key("q"));
489
490 let communities = find_communities(&graph);
492 assert_eq!(communities.len(), 2);
498 let mut c1 = communities[0].clone();
499 let mut c2 = communities[1].clone();
500 c1.sort();
501 c2.sort();
502 assert_eq!(c1, vec!["x", "y"]);
504 assert_eq!(c2, vec!["p", "q", "r"]);
505
506 let centralities = compute_degree_centrality(&graph);
507 assert_eq!(centralities.get("x"), Some(&1));
513 assert_eq!(centralities.get("y"), Some(&1));
514 assert_eq!(centralities.get("p"), Some(&1));
515 assert_eq!(centralities.get("q"), Some(&2));
516 assert_eq!(centralities.get("r"), Some(&1));
517 }
518
519 #[test]
520 fn test_duplicate_entries() {
521 let correlations = vec![
523 tuple("a", "b", 0.8),
524 tuple("a", "b", 0.85), tuple("b", "a", 0.8), ];
527 let graph = build_correlation_graph(&correlations, 0.7);
528 let a_neighbors = graph.get("a").unwrap();
530 assert_eq!(a_neighbors.len(), 1);
531 assert!(a_neighbors.contains_key("b"));
532
533 let b_neighbors = graph.get("b").unwrap();
534 assert_eq!(b_neighbors.len(), 1);
535 assert!(b_neighbors.contains_key("a"));
536
537 let communities = find_communities(&graph);
538 assert_eq!(communities.len(), 1);
539 let mut comm = communities[0].clone();
540 comm.sort();
541 assert_eq!(comm, vec!["a", "b"]);
542
543 let centralities = compute_degree_centrality(&graph);
544 assert_eq!(centralities.get("a"), Some(&1));
545 assert_eq!(centralities.get("b"), Some(&1));
546 }
547
548 #[test]
549 fn test_large_random_data() {
550 use rand::Rng;
553 let mut rng = rand::thread_rng();
554
555 let crate_names = vec!["crate1", "crate2", "crate3", "crate4", "crate5"];
556 let mut correlations = Vec::new();
557
558 for i in 0..crate_names.len() {
560 for j in (i+1)..crate_names.len() {
561 let corr = rng.gen_range(0.0..1.0);
562 correlations.push(tuple(crate_names[i], crate_names[j], corr));
563 }
564 }
565
566 let graph = build_correlation_graph(&correlations, 0.5);
568 let communities = find_communities(&graph);
570 let centralities = compute_degree_centrality(&graph);
571
572 for (node, neighbors) in &graph {
579 for neighbor in neighbors.keys() {
580 assert!(graph.get(neighbor).unwrap().contains_key(node), "Graph should be symmetric.");
581 }
582 }
583
584 }
586
587 fn tuple(a: &str, b: &str, c: f64) -> (String, String, f64) {
588 (a.to_string(), b.to_string(), c)
589 }
590
591 #[test]
592 fn test_empty_graph_summary() {
593 let graph: HashMap<String, HashMap<String, f64>> = HashMap::new();
594 let communities = find_communities(&graph);
595 assert!(communities.is_empty(), "No communities in empty graph.");
596
597 }
602
603 #[test]
604 fn test_girvan_newman_basic() {
605 let correlations = vec![
609 tuple("A", "B", 0.9),
610 tuple("C", "D", 0.9),
611 tuple("B", "C", 0.8),
612 ];
613 let graph = build_correlation_graph(&correlations, 0.7);
614 let initial_communities = find_communities(&graph);
617 assert_eq!(initial_communities.len(), 1, "All connected initially.");
618
619 let communities = girvan_newman_communities(graph.clone(), 2);
621 assert_eq!(communities.len(), 2, "Should have split into two communities after removing bridge.");
622
623 for c in &communities {
627 assert_eq!(c.len(), 2);
628 }
629 }
630
631 #[test]
632 fn test_betweenness_centrality_star() {
633 let correlations = vec![
636 tuple("X", "A", 0.9),
637 tuple("X", "B", 0.9),
638 tuple("X", "C", 0.9),
639 ];
640 let graph = build_correlation_graph(&correlations, 0.7);
641 let (node_bet, edge_bet) = compute_betweenness_centrality(&graph);
643
644 let x_bet = node_bet.get("X").cloned().unwrap_or(0.0);
650 let a_bet = node_bet.get("A").cloned().unwrap_or(0.0);
651 let b_bet = node_bet.get("B").cloned().unwrap_or(0.0);
652 let c_bet = node_bet.get("C").cloned().unwrap_or(0.0);
653
654 assert!(x_bet > a_bet && x_bet > b_bet && x_bet > c_bet, "X should have highest betweenness.");
655 assert_eq!(a_bet, 0.0, "Leaves no betweenness in a star.");
656 assert_eq!(b_bet, 0.0, "Leaves no betweenness in a star.");
657 assert_eq!(c_bet, 0.0, "Leaves no betweenness in a star.");
658
659 for ((u,v), val) in edge_bet.iter() {
662 assert!(val > &0.0, "Star edges should have >0 edge betweenness.");
663 assert!((u == "X" || v == "X"), "Edges should connect to X in a star.");
664 }
665 }
666
667 #[test]
668 fn test_girvan_newman_no_change_if_already_multiple_components() {
669 let correlations = vec![
671 tuple("A", "B", 0.9), tuple("C", "D", 0.9), ];
675 let graph = build_correlation_graph(&correlations, 0.7);
676 let communities = girvan_newman_communities(graph.clone(), 2);
677 assert_eq!(communities.len(), 2, "Already at desired number of communities.");
678 }
679
680 #[test]
681 fn test_graph_summary_basic() {
682 let correlations = vec![
684 tuple("X", "Y", 0.8),
685 tuple("Y", "Z", 0.8),
686 ];
687 let graph = build_correlation_graph(&correlations, 0.7);
688 let communities = find_communities(&graph);
692 assert_eq!(communities.len(), 1);
693 assert_eq!(graph.len(), 3);
694 let total_edges: usize = graph.values().map(|nbrs| nbrs.len()).sum::<usize>() / 2;
695 assert_eq!(total_edges, 2);
696
697 }
700
701 #[test]
702 fn test_betweenness_top_nodes() {
703 let correlations = vec![
711 tuple("A", "B", 0.9),
712 tuple("B", "C", 0.9),
713 tuple("C", "D", 0.9),
714 tuple("D", "A", 0.9),
715 tuple("A", "C", 0.9),
716 ];
717 let graph = build_correlation_graph(&correlations, 0.7);
718 let (node_bet, _edge_bet) = compute_betweenness_centrality(&graph);
719 display_top_betweenness_nodes(&node_bet, 10);
722
723 for n in &["A", "B", "C", "D"] {
725 assert!(node_bet.contains_key(*n), "All nodes should have a betweenness value.");
726 }
727 }
728
729 #[test]
730 fn test_girvan_newman_high_target_communities() {
731 let correlations = vec![
734 tuple("A", "B", 0.9),
735 tuple("B", "C", 0.9),
736 tuple("A", "C", 0.9),
737 ];
738 let graph = build_correlation_graph(&correlations, 0.7);
739
740 let communities = girvan_newman_communities(graph.clone(), 5);
744 assert_eq!(communities.len(), 3, "Max communities = number of nodes.");
746 }
747}