Skip to main content

the_code_graph_domain/analysis/
community.rs

1// Leiden community detection algorithm
2
3use crate::model::{
4    Community, CommunityAnalysis, CommunityConfig, CommunityStats, Edge, EdgeKind, SymbolNode,
5};
6use rand::rngs::StdRng;
7use rand::seq::SliceRandom;
8use std::collections::HashMap;
9
10fn is_high_confidence(kind: &EdgeKind) -> bool {
11    matches!(
12        kind,
13        EdgeKind::Calls | EdgeKind::Extends | EdgeKind::Implements | EdgeKind::Embeds
14    )
15}
16
17#[allow(dead_code)]
18pub(crate) struct LeidenGraph {
19    pub n: usize,
20    pub neighbors: Vec<Vec<(usize, f64)>>,
21    pub degree: Vec<f64>,
22    pub total_weight: f64,
23    pub node_to_index: HashMap<String, usize>,
24    pub index_to_node: Vec<String>,
25}
26
27impl LeidenGraph {
28    pub fn from_symbols_and_edges(symbols: &[SymbolNode], edges: &[Edge]) -> Self {
29        let mut node_to_index = HashMap::new();
30        let mut index_to_node = Vec::new();
31        for s in symbols {
32            let idx = index_to_node.len();
33            node_to_index.insert(s.qualified_name.clone(), idx);
34            index_to_node.push(s.qualified_name.clone());
35        }
36        let n = index_to_node.len();
37        let mut neighbors: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
38        let mut edge_weights: HashMap<(usize, usize), f64> = HashMap::new();
39
40        for e in edges {
41            if !is_high_confidence(&e.kind) {
42                continue;
43            }
44            let Some(&si) = node_to_index.get(&e.source) else {
45                continue;
46            };
47            let Some(&ti) = node_to_index.get(&e.target) else {
48                continue;
49            };
50            if si == ti {
51                continue;
52            }
53            let (lo, hi) = if si < ti { (si, ti) } else { (ti, si) };
54            *edge_weights.entry((lo, hi)).or_default() += 1.0;
55        }
56
57        let mut degree = vec![0.0; n];
58        let mut total_weight = 0.0;
59        for (&(u, v), &w) in &edge_weights {
60            neighbors[u].push((v, w));
61            neighbors[v].push((u, w));
62            degree[u] += w;
63            degree[v] += w;
64            total_weight += w;
65        }
66
67        Self {
68            n,
69            neighbors,
70            degree,
71            total_weight,
72            node_to_index,
73            index_to_node,
74        }
75    }
76}
77
78pub(crate) struct Partition {
79    pub community: Vec<usize>,
80    pub community_weight: Vec<f64>,
81}
82
83impl Partition {
84    pub fn singleton(n: usize) -> Self {
85        Self {
86            community: (0..n).collect(),
87            community_weight: vec![0.0; n],
88        }
89    }
90
91    pub fn singleton_with_graph(graph: &LeidenGraph) -> Self {
92        Self {
93            community: (0..graph.n).collect(),
94            community_weight: graph.degree.clone(),
95        }
96    }
97
98    pub fn move_node(&mut self, node: usize, target: usize, graph: &LeidenGraph) {
99        let old = self.community[node];
100        if old == target {
101            return;
102        }
103        self.community_weight[old] -= graph.degree[node];
104        self.community_weight[target] += graph.degree[node];
105        self.community[node] = target;
106    }
107
108    pub fn distinct_communities(&self) -> std::collections::HashSet<usize> {
109        self.community.iter().copied().collect()
110    }
111}
112
113fn compute_modularity(graph: &LeidenGraph, partition: &Partition, gamma: f64) -> f64 {
114    if graph.total_weight == 0.0 {
115        return 0.0;
116    }
117    let m = graph.total_weight;
118    let m2 = 2.0 * m;
119
120    // Per-community: L_c (internal edge weight) and K_c (total degree)
121    let max_c = partition.community.iter().copied().max().unwrap_or(0) + 1;
122    let mut internal_weight = vec![0.0; max_c];
123    let mut comm_degree = vec![0.0; max_c];
124
125    for u in 0..graph.n {
126        let cu = partition.community[u];
127        comm_degree[cu] += graph.degree[u];
128        for &(v, w) in &graph.neighbors[u] {
129            if partition.community[v] == cu && u < v {
130                internal_weight[cu] += w;
131            }
132        }
133    }
134
135    // Q = Σ_c [ L_c/m - γ (K_c / 2m)² ]
136    let mut q = 0.0;
137    for c in 0..max_c {
138        q += internal_weight[c] / m - gamma * (comm_degree[c] / m2).powi(2);
139    }
140    q
141}
142
143fn local_moving(
144    graph: &LeidenGraph,
145    partition: &mut Partition,
146    gamma: f64,
147    rng: &mut StdRng,
148) -> bool {
149    if graph.n == 0 {
150        return false;
151    }
152    let m2 = 2.0 * graph.total_weight;
153    if m2 == 0.0 {
154        return false;
155    }
156    let mut any_moved = false;
157    let mut order: Vec<usize> = (0..graph.n).collect();
158    order.shuffle(rng);
159
160    let mut improved = true;
161    while improved {
162        improved = false;
163        for &node in &order {
164            let old_comm = partition.community[node];
165            let ki = graph.degree[node];
166            if ki == 0.0 {
167                continue;
168            }
169
170            // Compute edge weight to each neighboring community
171            let mut comm_edge_weight: HashMap<usize, f64> = HashMap::new();
172            for &(neighbor, w) in &graph.neighbors[node] {
173                *comm_edge_weight
174                    .entry(partition.community[neighbor])
175                    .or_default() += w;
176            }
177
178            // Compute delta for removing node from current community
179            let w_old = comm_edge_weight.get(&old_comm).copied().unwrap_or(0.0);
180            let sigma_old = partition.community_weight[old_comm] - ki;
181            let remove_gain =
182                -w_old / graph.total_weight + gamma * ki * sigma_old / (m2 * graph.total_weight);
183
184            let mut best_comm = old_comm;
185            let mut best_gain = 0.0;
186
187            // Sort candidates for deterministic tie-breaking
188            let mut candidates: Vec<(usize, f64)> = comm_edge_weight
189                .iter()
190                .filter(|(&c, _)| c != old_comm)
191                .map(|(&c, &w)| (c, w))
192                .collect();
193            candidates.sort_by_key(|&(c, _)| c);
194
195            for (target_comm, w_target) in candidates {
196                let sigma_target = partition.community_weight[target_comm];
197                let insert_gain = w_target / graph.total_weight
198                    - gamma * ki * sigma_target / (m2 * graph.total_weight);
199                let total_gain = remove_gain + insert_gain;
200                if total_gain > best_gain {
201                    best_gain = total_gain;
202                    best_comm = target_comm;
203                }
204            }
205
206            if best_comm != old_comm {
207                partition.move_node(node, best_comm, graph);
208                improved = true;
209                any_moved = true;
210            }
211        }
212    }
213    any_moved
214}
215
216fn refinement(
217    graph: &LeidenGraph,
218    partition: &Partition,
219    gamma: f64,
220    rng: &mut StdRng,
221) -> Partition {
222    // Start from singletons — each node is its own sub-community
223    let mut refined = Partition::singleton_with_graph(graph);
224
225    let m2 = 2.0 * graph.total_weight;
226    if m2 == 0.0 {
227        return refined;
228    }
229
230    // Process each Phase 1 community separately
231    let communities = partition.distinct_communities();
232    for phase1_comm in communities {
233        let members: Vec<usize> = (0..graph.n)
234            .filter(|&i| partition.community[i] == phase1_comm)
235            .collect();
236        if members.len() <= 1 {
237            continue;
238        }
239
240        // Randomize visit order within this community
241        let mut order = members.clone();
242        order.shuffle(rng);
243
244        for &node in &order {
245            let cur_sub = refined.community[node];
246            let ki = graph.degree[node];
247            if ki == 0.0 {
248                continue;
249            }
250
251            // Find adjacent sub-communities within the same Phase 1 community
252            let mut sub_edge_weight: HashMap<usize, f64> = HashMap::new();
253            for &(neighbor, w) in &graph.neighbors[node] {
254                if partition.community[neighbor] == phase1_comm {
255                    let sub = refined.community[neighbor];
256                    if sub != cur_sub {
257                        *sub_edge_weight.entry(sub).or_default() += w;
258                    }
259                }
260            }
261
262            let w_old = {
263                let mut w = 0.0;
264                for &(neighbor, weight) in &graph.neighbors[node] {
265                    if refined.community[neighbor] == cur_sub && neighbor != node {
266                        w += weight;
267                    }
268                }
269                w
270            };
271            let sigma_old = refined.community_weight[cur_sub] - ki;
272            let remove_gain =
273                -w_old / graph.total_weight + gamma * ki * sigma_old / (m2 * graph.total_weight);
274
275            let mut best_sub = cur_sub;
276            let mut best_gain = 0.0;
277
278            let mut candidates: Vec<(usize, f64)> =
279                sub_edge_weight.iter().map(|(&c, &w)| (c, w)).collect();
280            candidates.sort_by_key(|&(c, _)| c);
281
282            for (target_sub, w_target) in candidates {
283                let sigma_target = refined.community_weight[target_sub];
284                let insert_gain = w_target / graph.total_weight
285                    - gamma * ki * sigma_target / (m2 * graph.total_weight);
286                let total_gain = remove_gain + insert_gain;
287                if total_gain > best_gain {
288                    best_gain = total_gain;
289                    best_sub = target_sub;
290                }
291            }
292
293            if best_sub != cur_sub {
294                refined.move_node(node, best_sub, graph);
295            }
296        }
297    }
298    refined
299}
300
301/// Collapse communities into super-nodes, producing an aggregated graph.
302/// Returns None if every node is already its own community (no aggregation possible).
303fn aggregate(graph: &LeidenGraph, partition: &Partition) -> Option<(LeidenGraph, Vec<usize>)> {
304    let communities = partition.distinct_communities();
305    if communities.len() == graph.n {
306        return None;
307    }
308
309    // Map old community IDs to dense indices
310    let mut comm_ids: Vec<usize> = communities.into_iter().collect();
311    comm_ids.sort();
312    let comm_to_new: HashMap<usize, usize> = comm_ids
313        .iter()
314        .enumerate()
315        .map(|(new_idx, &old_id)| (old_id, new_idx))
316        .collect();
317    let n_new = comm_ids.len();
318
319    // mapping[original_node] = new super-node index
320    let mapping: Vec<usize> = partition.community.iter().map(|c| comm_to_new[c]).collect();
321
322    // Build aggregated inter-community edge weights
323    let mut agg_edges: HashMap<(usize, usize), f64> = HashMap::new();
324    for u in 0..graph.n {
325        let cu = mapping[u];
326        for &(v, w) in &graph.neighbors[u] {
327            let cv = mapping[v];
328            if cu < cv {
329                *agg_edges.entry((cu, cv)).or_default() += w;
330            }
331        }
332    }
333
334    let mut neighbors: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n_new];
335    // Degree of each super-node = sum of degrees of constituent nodes
336    let mut degree = vec![0.0; n_new];
337    for (i, &d) in graph.degree.iter().enumerate() {
338        degree[mapping[i]] += d;
339    }
340    // total_weight is preserved (internal edges become self-loops conceptually)
341    let total_weight = graph.total_weight;
342
343    for (&(u, v), &w) in &agg_edges {
344        neighbors[u].push((v, w));
345        neighbors[v].push((u, w));
346    }
347
348    let mut index_to_node = vec![String::new(); n_new];
349    let mut node_to_index = HashMap::new();
350    for (new_idx, &old_comm) in comm_ids.iter().enumerate() {
351        let name = format!("super_{old_comm}");
352        index_to_node[new_idx] = name.clone();
353        node_to_index.insert(name, new_idx);
354    }
355
356    let agg_graph = LeidenGraph {
357        n: n_new,
358        neighbors,
359        degree,
360        total_weight,
361        node_to_index,
362        index_to_node,
363    };
364
365    Some((agg_graph, mapping))
366}
367
368pub(crate) fn leiden(graph: &LeidenGraph, gamma: f64, seed: Option<u64>) -> (Partition, f64) {
369    use rand::SeedableRng;
370
371    if graph.n == 0 {
372        return (Partition::singleton(0), 0.0);
373    }
374    if graph.total_weight == 0.0 {
375        return (Partition::singleton(graph.n), 0.0);
376    }
377
378    let mut rng = match seed {
379        Some(s) => StdRng::seed_from_u64(s),
380        None => StdRng::from_os_rng(),
381    };
382
383    let mut current_graph = None::<LeidenGraph>;
384    // Track how original nodes map through aggregation levels
385    let mut global_mapping: Vec<usize> = (0..graph.n).collect();
386
387    let max_iterations = 20;
388    for _ in 0..max_iterations {
389        let g = current_graph.as_ref().unwrap_or(graph);
390        let mut partition = Partition::singleton_with_graph(g);
391
392        let moved = local_moving(g, &mut partition, gamma, &mut rng);
393        if !moved {
394            break;
395        }
396
397        let refined = refinement(g, &partition, gamma, &mut rng);
398
399        // Update global mapping with refined partition
400        for gm in global_mapping.iter_mut() {
401            *gm = refined.community[*gm];
402        }
403
404        match aggregate(g, &refined) {
405            Some((agg_graph, mapping)) => {
406                // Update global mapping through aggregation
407                for gm in global_mapping.iter_mut() {
408                    *gm = mapping[*gm];
409                }
410                current_graph = Some(agg_graph);
411            }
412            None => break,
413        }
414    }
415
416    // Build final partition from global mapping
417    let mut final_partition = Partition {
418        community: global_mapping,
419        community_weight: vec![0.0; graph.n],
420    };
421    // Renumber communities to be 0..k-1
422    let mut remap: HashMap<usize, usize> = HashMap::new();
423    let mut next_id = 0;
424    for c in final_partition.community.iter_mut() {
425        let new_id = *remap.entry(*c).or_insert_with(|| {
426            let id = next_id;
427            next_id += 1;
428            id
429        });
430        *c = new_id;
431    }
432    // Recompute community weights
433    final_partition.community_weight = vec![0.0; next_id];
434    for (i, &c) in final_partition.community.iter().enumerate() {
435        final_partition.community_weight[c] += graph.degree[i];
436    }
437
438    let q = compute_modularity(graph, &final_partition, gamma);
439    (final_partition, q)
440}
441
442fn derive_community_name(members: &[String], community_id: usize) -> String {
443    let generic = ["mod", "lib", "index", "main", "utils", "helpers"];
444
445    let mut file_counts: HashMap<&str, usize> = HashMap::new();
446    for m in members {
447        if let Some(file_part) = m.split("::").next() {
448            let stem = std::path::Path::new(file_part)
449                .file_stem()
450                .and_then(|s| s.to_str())
451                .unwrap_or("");
452            if !stem.is_empty() {
453                *file_counts.entry(stem).or_default() += 1;
454            }
455        }
456    }
457
458    let mut best: Vec<(&str, usize)> = file_counts.into_iter().collect();
459    best.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(b.0)));
460
461    if let Some((name, _)) = best.first() {
462        if !generic.contains(name) {
463            return name.to_string();
464        }
465    }
466    format!("community_{community_id}")
467}
468
469pub fn detect_communities(
470    symbols: &[SymbolNode],
471    edges: &[Edge],
472    config: &CommunityConfig,
473) -> CommunityAnalysis {
474    let graph = LeidenGraph::from_symbols_and_edges(symbols, edges);
475    if graph.n == 0 {
476        return CommunityAnalysis {
477            communities: vec![],
478            modularity: 0.0,
479            stats: CommunityStats {
480                count: 0,
481                avg_size: 0.0,
482                largest_size: 0,
483                isolated_nodes: 0,
484            },
485        };
486    }
487
488    let (partition, modularity) = leiden(&graph, config.resolution, config.seed);
489
490    // Count isolated nodes (degree-0)
491    let isolated_nodes = graph.degree.iter().filter(|&&d| d == 0.0).count();
492
493    // Group nodes by community
494    let mut community_members: HashMap<usize, Vec<usize>> = HashMap::new();
495    for (i, &c) in partition.community.iter().enumerate() {
496        community_members.entry(c).or_default().push(i);
497    }
498
499    let mut communities: Vec<Community> = Vec::new();
500    for (&comm_id, members) in &community_members {
501        if members.len() < config.min_community_size {
502            continue;
503        }
504
505        let member_names: Vec<String> = members
506            .iter()
507            .map(|&i| graph.index_to_node[i].clone())
508            .collect();
509
510        // Compute internal and boundary edges
511        let member_set: std::collections::HashSet<usize> = members.iter().copied().collect();
512        let mut internal_edges = 0usize;
513        let mut boundary_edges = 0usize;
514        for &node in members {
515            for &(neighbor, _) in &graph.neighbors[node] {
516                if member_set.contains(&neighbor) {
517                    if node < neighbor {
518                        internal_edges += 1;
519                    }
520                } else {
521                    boundary_edges += 1;
522                }
523            }
524        }
525
526        // Modularity contribution for this community
527        let m = graph.total_weight;
528        let m2 = 2.0 * m;
529        let kc: f64 = members.iter().map(|&i| graph.degree[i]).sum();
530        let modularity_contribution = if m > 0.0 {
531            (internal_edges as f64) / m - config.resolution * (kc / m2).powi(2)
532        } else {
533            0.0
534        };
535
536        let name = derive_community_name(&member_names, comm_id);
537
538        communities.push(Community {
539            id: comm_id,
540            name,
541            members: member_names,
542            modularity_contribution,
543            internal_edges,
544            boundary_edges,
545        });
546    }
547
548    // Sort by size descending
549    communities.sort_by(|a, b| b.members.len().cmp(&a.members.len()));
550
551    // Re-number IDs after sorting
552    for (i, c) in communities.iter_mut().enumerate() {
553        c.id = i;
554    }
555
556    let count = communities.len();
557    let total_members: usize = communities.iter().map(|c| c.members.len()).sum();
558    let avg_size = if count > 0 {
559        total_members as f64 / count as f64
560    } else {
561        0.0
562    };
563    let largest_size = communities.first().map(|c| c.members.len()).unwrap_or(0);
564
565    CommunityAnalysis {
566        communities,
567        modularity,
568        stats: CommunityStats {
569            count,
570            avg_size,
571            largest_size,
572            isolated_nodes,
573        },
574    }
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580    use crate::model::{Location, SymbolKind};
581    use rand::rngs::StdRng;
582    use rand::SeedableRng;
583    use std::collections::HashSet;
584
585    fn make_symbol(name: &str, qn: &str, kind: SymbolKind) -> SymbolNode {
586        SymbolNode {
587            name: name.to_string(),
588            qualified_name: qn.to_string(),
589            kind,
590            location: Location {
591                file: "src/lib.rs".into(),
592                line_start: 1,
593                line_end: 10,
594                col_start: 0,
595                col_end: 0,
596            },
597            visibility: crate::model::Visibility::Public,
598            is_exported: true,
599            is_async: false,
600            is_test: false,
601            decorators: vec![],
602            signature: None,
603        }
604    }
605
606    fn make_edge(kind: EdgeKind, source: &str, target: &str) -> Edge {
607        Edge {
608            kind,
609            source: source.to_string(),
610            target: target.to_string(),
611            metadata: None,
612        }
613    }
614
615    #[test]
616    fn graph_from_symbols_and_edges() {
617        let symbols = vec![
618            make_symbol("a", "m::a", SymbolKind::Function),
619            make_symbol("b", "m::b", SymbolKind::Function),
620            make_symbol("c", "m::c", SymbolKind::Function),
621        ];
622        let edges = vec![
623            make_edge(EdgeKind::Calls, "m::a", "m::b"),
624            make_edge(EdgeKind::Calls, "m::b", "m::c"),
625        ];
626        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
627        assert_eq!(graph.n, 3);
628        assert!((graph.total_weight - 2.0).abs() < f64::EPSILON);
629        assert!((graph.degree[graph.node_to_index["m::a"]] - 1.0).abs() < f64::EPSILON);
630        assert!((graph.degree[graph.node_to_index["m::b"]] - 2.0).abs() < f64::EPSILON);
631    }
632
633    #[test]
634    fn graph_filters_non_high_confidence_edges() {
635        let symbols = vec![
636            make_symbol("a", "m::a", SymbolKind::Function),
637            make_symbol("b", "m::b", SymbolKind::Function),
638        ];
639        let edges = vec![make_edge(EdgeKind::Contains, "m::a", "m::b")];
640        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
641        assert_eq!(graph.n, 2);
642        assert!((graph.total_weight - 0.0).abs() < f64::EPSILON);
643    }
644
645    #[test]
646    fn graph_deduplicates_bidirectional_edges() {
647        let symbols = vec![
648            make_symbol("a", "m::a", SymbolKind::Function),
649            make_symbol("b", "m::b", SymbolKind::Function),
650        ];
651        let edges = vec![
652            make_edge(EdgeKind::Calls, "m::a", "m::b"),
653            make_edge(EdgeKind::Calls, "m::b", "m::a"),
654        ];
655        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
656        assert!((graph.total_weight - 2.0).abs() < f64::EPSILON);
657        assert!((graph.degree[graph.node_to_index["m::a"]] - 2.0).abs() < f64::EPSILON);
658    }
659
660    #[test]
661    fn modularity_singleton_partition_is_zero() {
662        let symbols = vec![
663            make_symbol("a", "m::a", SymbolKind::Function),
664            make_symbol("b", "m::b", SymbolKind::Function),
665        ];
666        let edges = vec![make_edge(EdgeKind::Calls, "m::a", "m::b")];
667        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
668        let partition = Partition::singleton(graph.n);
669        let q = compute_modularity(&graph, &partition, 1.0);
670        assert!(q <= 0.0 + f64::EPSILON);
671    }
672
673    #[test]
674    fn modularity_all_in_one_community() {
675        let symbols = vec![
676            make_symbol("a", "m::a", SymbolKind::Function),
677            make_symbol("b", "m::b", SymbolKind::Function),
678        ];
679        let edges = vec![make_edge(EdgeKind::Calls, "m::a", "m::b")];
680        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
681        let mut partition = Partition::singleton(graph.n);
682        partition.move_node(1, 0, &graph);
683        let q = compute_modularity(&graph, &partition, 1.0);
684        assert!(q.abs() < f64::EPSILON);
685    }
686
687    #[test]
688    fn empty_graph_modularity_is_zero() {
689        let graph = LeidenGraph::from_symbols_and_edges(&[], &[]);
690        let partition = Partition::singleton(0);
691        let q = compute_modularity(&graph, &partition, 1.0);
692        assert!((q - 0.0).abs() < f64::EPSILON);
693    }
694
695    #[test]
696    fn isolated_nodes_have_zero_degree() {
697        let symbols = vec![
698            make_symbol("a", "m::a", SymbolKind::Function),
699            make_symbol("b", "m::b", SymbolKind::Function),
700            make_symbol("c", "m::c", SymbolKind::Function),
701        ];
702        let edges = vec![make_edge(EdgeKind::Calls, "m::a", "m::b")];
703        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
704        assert!((graph.degree[graph.node_to_index["m::c"]] - 0.0).abs() < f64::EPSILON);
705    }
706
707    // ---- T03: Local Moving tests ----
708
709    /// Helper: Two K4 cliques connected by a single bridge edge
710    fn build_two_cliques_bridge() -> (Vec<SymbolNode>, Vec<Edge>) {
711        let mut symbols = Vec::new();
712        let mut edges = Vec::new();
713        for i in 0..4 {
714            symbols.push(make_symbol(
715                &format!("a{i}"),
716                &format!("a::a{i}"),
717                SymbolKind::Function,
718            ));
719            symbols.push(make_symbol(
720                &format!("b{i}"),
721                &format!("b::b{i}"),
722                SymbolKind::Function,
723            ));
724        }
725        // Clique A: all pairs
726        for i in 0..4 {
727            for j in (i + 1)..4 {
728                edges.push(make_edge(
729                    EdgeKind::Calls,
730                    &format!("a::a{i}"),
731                    &format!("a::a{j}"),
732                ));
733            }
734        }
735        // Clique B: all pairs
736        for i in 0..4 {
737            for j in (i + 1)..4 {
738                edges.push(make_edge(
739                    EdgeKind::Calls,
740                    &format!("b::b{i}"),
741                    &format!("b::b{j}"),
742                ));
743            }
744        }
745        // Bridge
746        edges.push(make_edge(EdgeKind::Calls, "a::a0", "b::b0"));
747        (symbols, edges)
748    }
749
750    #[test]
751    fn local_moving_merges_triangle() {
752        let symbols = vec![
753            make_symbol("a", "m::a", SymbolKind::Function),
754            make_symbol("b", "m::b", SymbolKind::Function),
755            make_symbol("c", "m::c", SymbolKind::Function),
756        ];
757        let edges = vec![
758            make_edge(EdgeKind::Calls, "m::a", "m::b"),
759            make_edge(EdgeKind::Calls, "m::b", "m::c"),
760            make_edge(EdgeKind::Calls, "m::a", "m::c"),
761        ];
762        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
763        let mut partition = Partition::singleton_with_graph(&graph);
764        let mut rng = StdRng::seed_from_u64(42);
765        let moved = local_moving(&graph, &mut partition, 1.0, &mut rng);
766        assert!(moved);
767        assert_eq!(partition.community[0], partition.community[1]);
768        assert_eq!(partition.community[1], partition.community[2]);
769    }
770
771    #[test]
772    fn local_moving_separates_two_cliques() {
773        let (symbols, edges) = build_two_cliques_bridge();
774        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
775        let mut partition = Partition::singleton_with_graph(&graph);
776        let mut rng = StdRng::seed_from_u64(42);
777        local_moving(&graph, &mut partition, 1.0, &mut rng);
778        let distinct: std::collections::HashSet<usize> =
779            partition.community.iter().copied().collect();
780        assert!(
781            distinct.len() >= 2,
782            "expected at least 2 communities, got {}",
783            distinct.len()
784        );
785    }
786
787    #[test]
788    fn local_moving_no_edges_no_moves() {
789        let symbols = vec![
790            make_symbol("a", "m::a", SymbolKind::Function),
791            make_symbol("b", "m::b", SymbolKind::Function),
792        ];
793        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &[]);
794        let mut partition = Partition::singleton_with_graph(&graph);
795        let mut rng = StdRng::seed_from_u64(42);
796        let moved = local_moving(&graph, &mut partition, 1.0, &mut rng);
797        assert!(!moved);
798    }
799
800    #[test]
801    fn local_moving_deterministic_with_same_seed() {
802        let (symbols, edges) = build_two_cliques_bridge();
803        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
804
805        let mut p1 = Partition::singleton_with_graph(&graph);
806        let mut rng1 = StdRng::seed_from_u64(42);
807        local_moving(&graph, &mut p1, 1.0, &mut rng1);
808
809        let mut p2 = Partition::singleton_with_graph(&graph);
810        let mut rng2 = StdRng::seed_from_u64(42);
811        local_moving(&graph, &mut p2, 1.0, &mut rng2);
812
813        assert_eq!(p1.community, p2.community);
814    }
815
816    // ---- T04: Refinement tests ----
817
818    /// BFS connectivity check for a subset of nodes in the graph
819    fn is_connected(graph: &LeidenGraph, members: &[usize]) -> bool {
820        use std::collections::{HashSet, VecDeque};
821        if members.is_empty() {
822            return true;
823        }
824        let member_set: HashSet<usize> = members.iter().copied().collect();
825        let mut visited = HashSet::new();
826        let mut queue = VecDeque::new();
827        visited.insert(members[0]);
828        queue.push_back(members[0]);
829        while let Some(node) = queue.pop_front() {
830            for &(neighbor, _) in &graph.neighbors[node] {
831                if member_set.contains(&neighbor) && visited.insert(neighbor) {
832                    queue.push_back(neighbor);
833                }
834            }
835        }
836        visited.len() == members.len()
837    }
838
839    #[test]
840    fn refinement_preserves_connectivity() {
841        let (symbols, edges) = build_two_cliques_bridge();
842        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
843
844        // Simulate Phase 1 having merged everything into one community
845        let mut partition = Partition::singleton_with_graph(&graph);
846        for i in 1..graph.n {
847            partition.move_node(i, 0, &graph);
848        }
849
850        let mut rng = StdRng::seed_from_u64(42);
851        let refined = refinement(&graph, &partition, 1.0, &mut rng);
852
853        for c in refined.distinct_communities() {
854            let members: Vec<usize> = (0..graph.n)
855                .filter(|&i| refined.community[i] == c)
856                .collect();
857            if members.len() > 1 {
858                assert!(
859                    is_connected(&graph, &members),
860                    "Community {} with {} members is not connected",
861                    c,
862                    members.len()
863                );
864            }
865        }
866    }
867
868    #[test]
869    fn refinement_singletons_remain_singletons() {
870        let symbols = vec![
871            make_symbol("a", "m::a", SymbolKind::Function),
872            make_symbol("b", "m::b", SymbolKind::Function),
873        ];
874        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &[]);
875        let partition = Partition::singleton_with_graph(&graph);
876        let mut rng = StdRng::seed_from_u64(42);
877        let refined = refinement(&graph, &partition, 1.0, &mut rng);
878        assert_ne!(refined.community[0], refined.community[1]);
879    }
880
881    // ---- T05: Aggregation + Leiden loop tests ----
882
883    /// 4 complete subgraphs K5 connected by single bridge edges
884    fn build_multiscale_graph() -> (Vec<SymbolNode>, Vec<Edge>) {
885        let mut symbols = Vec::new();
886        let mut edges = Vec::new();
887        for clique in 0..4 {
888            for i in 0..5 {
889                symbols.push(make_symbol(
890                    &format!("c{clique}_n{i}"),
891                    &format!("src/mod{clique}.rs::c{clique}_n{i}"),
892                    SymbolKind::Function,
893                ));
894            }
895            for i in 0..5 {
896                for j in (i + 1)..5 {
897                    edges.push(make_edge(
898                        EdgeKind::Calls,
899                        &format!("src/mod{clique}.rs::c{clique}_n{i}"),
900                        &format!("src/mod{clique}.rs::c{clique}_n{j}"),
901                    ));
902                }
903            }
904        }
905        for clique in 0..3 {
906            edges.push(make_edge(
907                EdgeKind::Calls,
908                &format!("src/mod{clique}.rs::c{clique}_n0"),
909                &format!("src/mod{}.rs::c{}_n0", clique + 1, clique + 1),
910            ));
911        }
912        (symbols, edges)
913    }
914
915    #[test]
916    fn leiden_two_cliques_finds_two_communities() {
917        let (symbols, edges) = build_two_cliques_bridge();
918        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
919        let (partition, _) = leiden(&graph, 1.0, Some(42));
920        let distinct: HashSet<usize> = partition.community.iter().copied().collect();
921        assert_eq!(distinct.len(), 2);
922    }
923
924    #[test]
925    fn leiden_triangle_finds_one_community() {
926        let symbols = vec![
927            make_symbol("a", "m::a", SymbolKind::Function),
928            make_symbol("b", "m::b", SymbolKind::Function),
929            make_symbol("c", "m::c", SymbolKind::Function),
930        ];
931        let edges = vec![
932            make_edge(EdgeKind::Calls, "m::a", "m::b"),
933            make_edge(EdgeKind::Calls, "m::b", "m::c"),
934            make_edge(EdgeKind::Calls, "m::a", "m::c"),
935        ];
936        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
937        let (partition, _) = leiden(&graph, 1.0, Some(42));
938        let distinct: HashSet<usize> = partition.community.iter().copied().collect();
939        assert_eq!(distinct.len(), 1);
940    }
941
942    #[test]
943    fn leiden_empty_graph() {
944        let graph = LeidenGraph::from_symbols_and_edges(&[], &[]);
945        let (partition, modularity) = leiden(&graph, 1.0, Some(42));
946        assert!(partition.community.is_empty());
947        assert!((modularity - 0.0).abs() < f64::EPSILON);
948    }
949
950    #[test]
951    fn leiden_deterministic_with_seed() {
952        let (symbols, edges) = build_multiscale_graph();
953        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
954        let (p1, q1) = leiden(&graph, 1.0, Some(42));
955        let (p2, q2) = leiden(&graph, 1.0, Some(42));
956        assert_eq!(p1.community, p2.community);
957        assert!((q1 - q2).abs() < f64::EPSILON);
958    }
959
960    #[test]
961    fn leiden_higher_resolution_more_communities() {
962        let (symbols, edges) = build_multiscale_graph();
963        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
964        let (p_low, _) = leiden(&graph, 0.1, Some(42));
965        let (p_high, _) = leiden(&graph, 5.0, Some(42));
966        let n_low: HashSet<usize> = p_low.community.iter().copied().collect();
967        let n_high: HashSet<usize> = p_high.community.iter().copied().collect();
968        assert!(
969            n_high.len() > n_low.len(),
970            "gamma=5.0 should produce more communities than gamma=0.1: {} vs {}",
971            n_high.len(),
972            n_low.len()
973        );
974    }
975
976    #[test]
977    fn leiden_all_communities_connected() {
978        let (symbols, edges) = build_multiscale_graph();
979        let graph = LeidenGraph::from_symbols_and_edges(&symbols, &edges);
980        let (partition, _) = leiden(&graph, 1.0, Some(42));
981        for c in partition.distinct_communities() {
982            let members: Vec<usize> = (0..graph.n)
983                .filter(|&i| partition.community[i] == c)
984                .collect();
985            if members.len() > 1 {
986                assert!(
987                    is_connected(&graph, &members),
988                    "Community {} with {} members is not connected",
989                    c,
990                    members.len()
991                );
992            }
993        }
994    }
995
996    // ---- T06: Community analysis assembly + naming tests ----
997
998    #[test]
999    fn detect_communities_returns_sorted_by_size() {
1000        let (symbols, edges) = build_two_cliques_bridge();
1001        let config = CommunityConfig::default();
1002        let analysis = detect_communities(&symbols, &edges, &config);
1003        for i in 1..analysis.communities.len() {
1004            assert!(
1005                analysis.communities[i - 1].members.len() >= analysis.communities[i].members.len()
1006            );
1007        }
1008    }
1009
1010    #[test]
1011    fn detect_communities_min_size_filters() {
1012        let (symbols, edges) = build_two_cliques_bridge();
1013        let mut config = CommunityConfig::default();
1014        config.min_community_size = 100;
1015        let analysis = detect_communities(&symbols, &edges, &config);
1016        assert!(analysis.communities.is_empty());
1017    }
1018
1019    #[test]
1020    fn detect_communities_counts_isolated_nodes() {
1021        let symbols = vec![
1022            make_symbol("a", "m::a", SymbolKind::Function),
1023            make_symbol("b", "m::b", SymbolKind::Function),
1024            make_symbol("c", "m::c", SymbolKind::Function),
1025        ];
1026        let edges = vec![make_edge(EdgeKind::Calls, "m::a", "m::b")];
1027        let config = CommunityConfig {
1028            min_community_size: 1,
1029            ..CommunityConfig::default()
1030        };
1031        let analysis = detect_communities(&symbols, &edges, &config);
1032        assert_eq!(analysis.stats.isolated_nodes, 1);
1033    }
1034
1035    #[test]
1036    fn derive_name_uses_most_common_file_stem() {
1037        let members = vec![
1038            "src/auth.rs::login".to_string(),
1039            "src/auth.rs::logout".to_string(),
1040            "src/auth.rs::verify".to_string(),
1041            "src/session.rs::create".to_string(),
1042        ];
1043        assert_eq!(derive_community_name(&members, 0), "auth");
1044    }
1045
1046    #[test]
1047    fn derive_name_falls_back_for_generic_stems() {
1048        let members = vec!["src/mod.rs::foo".to_string(), "src/mod.rs::bar".to_string()];
1049        assert_eq!(derive_community_name(&members, 7), "community_7");
1050    }
1051
1052    #[test]
1053    fn detect_communities_modularity_positive_for_multi_community() {
1054        let (symbols, edges) = build_two_cliques_bridge();
1055        let config = CommunityConfig::default();
1056        let analysis = detect_communities(&symbols, &edges, &config);
1057        assert!(analysis.communities.len() >= 2);
1058        assert!(analysis.modularity > 0.0);
1059    }
1060}