use std::collections::HashMap;
use petgraph::graph::{DiGraph, NodeIndex};
use petgraph::visit::EdgeRef;
use rand::rngs::StdRng;
use rand::seq::SliceRandom;
use rand::SeedableRng;
use serde::{Deserialize, Serialize};
use crate::core::entity::EdgeKind;
use crate::core::symbol_graph::{SymbolGraph, SymbolNode};
const LOUVAIN_SEED: u64 = 42;
const MAX_PASSES: usize = 100;
const MAX_SWEEPS_PER_PASS: usize = 50;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct CommunityRecord {
pub id: usize,
pub members: Vec<String>,
pub member_count: usize,
pub modularity_contribution: f64,
pub centroid_symbol: String,
pub dominant_files: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LouvainCommunities {
pub assignments: HashMap<String, usize>,
pub community_count: usize,
pub modularity: f64,
}
#[allow(clippy::type_complexity)]
fn project_undirected(
graph: &DiGraph<SymbolNode, EdgeKind>,
by_symbol: &HashMap<String, NodeIndex>,
) -> (HashMap<String, usize>, Vec<Vec<(usize, f64)>>) {
let mut symbols: Vec<&String> = by_symbol.keys().collect();
symbols.sort();
let mut symbol_to_id: HashMap<String, usize> = HashMap::with_capacity(symbols.len());
for (i, s) in symbols.iter().enumerate() {
symbol_to_id.insert((*s).clone(), i);
}
let mut paired: HashMap<(usize, usize), f64> = HashMap::new();
for edge in graph.edge_references() {
let src_sym = match graph.node_weight(edge.source()) {
Some(n) => &n.symbol,
None => continue,
};
let tgt_sym = match graph.node_weight(edge.target()) {
Some(n) => &n.symbol,
None => continue,
};
let (a, b) = match (symbol_to_id.get(src_sym), symbol_to_id.get(tgt_sym)) {
(Some(&a), Some(&b)) => (a, b),
_ => continue,
};
if a == b {
continue;
}
let key = if a < b { (a, b) } else { (b, a) };
let w = edge.weight().score_multiplier() as f64;
*paired.entry(key).or_insert(0.0) += w;
}
let mut adjacency: Vec<Vec<(usize, f64)>> = vec![Vec::new(); symbols.len()];
for ((a, b), w) in paired {
adjacency[a].push((b, w));
adjacency[b].push((a, w));
}
(symbol_to_id, adjacency)
}
type SuperGraph = Vec<HashMap<usize, f64>>;
fn weighted_degrees(adj: &[Vec<(usize, f64)>]) -> Vec<f64> {
adj.iter()
.map(|row| row.iter().map(|(_, w)| w).sum::<f64>())
.collect()
}
fn total_weight(adj: &[Vec<(usize, f64)>]) -> f64 {
let twice: f64 = adj.iter().flat_map(|row| row.iter().map(|(_, w)| *w)).sum();
twice / 2.0
}
fn one_louvain_level(adj: &[Vec<(usize, f64)>], rng: &mut StdRng) -> Vec<usize> {
let n = adj.len();
if n == 0 {
return Vec::new();
}
let degrees = weighted_degrees(adj);
let m = total_weight(adj);
if m <= 0.0 {
return (0..n).collect();
}
let two_m = 2.0 * m;
let mut community: Vec<usize> = (0..n).collect();
let mut sigma_tot: Vec<f64> = degrees.clone();
let mut order: Vec<usize> = (0..n).collect();
for _ in 0..MAX_SWEEPS_PER_PASS {
order.shuffle(rng);
let mut moved = false;
for &node in order.iter() {
let current = community[node];
let k_i = degrees[node];
let (k_i_in_current, weights_to_comm) = neighbor_weights(node, &community, adj);
sigma_tot[current] -= k_i;
let mut best_comm = current;
let mut best_gain: f64 = 0.0;
for (&cand, &k_i_in_cand) in weights_to_comm.iter() {
let gain = k_i_in_cand / m - sigma_tot[cand] * k_i / (two_m * m);
if gain > best_gain {
best_gain = gain;
best_comm = cand;
}
}
let stay_gain = k_i_in_current / m - sigma_tot[current] * k_i / (two_m * m);
if stay_gain > best_gain {
best_comm = current;
}
sigma_tot[best_comm] += k_i;
if best_comm != current {
community[node] = best_comm;
moved = true;
}
}
if !moved {
break;
}
}
relabel_contiguous(&community)
}
fn neighbor_weights(
node: usize,
community: &[usize],
adj: &[Vec<(usize, f64)>],
) -> (f64, HashMap<usize, f64>) {
let own = community[node];
let mut k_i_in_own = 0.0;
let mut weights: HashMap<usize, f64> = HashMap::new();
for &(nb, w) in &adj[node] {
if nb == node {
continue;
}
let comm = community[nb];
if comm == own {
k_i_in_own += w;
}
*weights.entry(comm).or_insert(0.0) += w;
}
(k_i_in_own, weights)
}
fn relabel_contiguous(community: &[usize]) -> Vec<usize> {
let mut remap: HashMap<usize, usize> = HashMap::new();
let mut next = 0usize;
community
.iter()
.map(|&c| {
*remap.entry(c).or_insert_with(|| {
let id = next;
next += 1;
id
})
})
.collect()
}
fn aggregate(adj: &[Vec<(usize, f64)>], community: &[usize]) -> SuperGraph {
let k = community.iter().copied().max().map_or(0, |m| m + 1);
let mut out: SuperGraph = vec![HashMap::new(); k];
for (i, row) in adj.iter().enumerate() {
let ci = community[i];
for &(j, w) in row {
let cj = community[j];
*out[ci].entry(cj).or_insert(0.0) += w;
}
}
out
}
fn super_to_adj(super_g: &SuperGraph) -> Vec<Vec<(usize, f64)>> {
super_g
.iter()
.map(|m| m.iter().map(|(&j, &w)| (j, w)).collect())
.collect()
}
fn modularity(adj: &[Vec<(usize, f64)>], community: &[usize]) -> f64 {
let m = total_weight(adj);
if m <= 0.0 {
return 0.0;
}
let two_m = 2.0 * m;
let mut l_c: HashMap<usize, f64> = HashMap::new();
let mut d_c: HashMap<usize, f64> = HashMap::new();
for (i, row) in adj.iter().enumerate() {
let ci = community[i];
for &(j, w) in row {
let cj = community[j];
*d_c.entry(ci).or_insert(0.0) += w; if ci == cj {
*l_c.entry(ci).or_insert(0.0) += w / 2.0; }
}
}
let mut q = 0.0;
for (c, l) in &l_c {
let d = d_c.get(c).copied().unwrap_or(0.0);
q += l / m - (d / two_m).powi(2);
}
for (c, d) in &d_c {
if !l_c.contains_key(c) {
q -= (d / two_m).powi(2);
}
}
q
}
#[allow(clippy::ptr_arg)]
fn prune_singletons(adj: &[Vec<(usize, f64)>], community: &mut Vec<usize>) {
let mut sizes: HashMap<usize, usize> = HashMap::new();
for &c in community.iter() {
*sizes.entry(c).or_insert(0) += 1;
}
for i in 0..community.len() {
if sizes.get(&community[i]).copied().unwrap_or(0) != 1 {
continue;
}
let mut best: Option<(usize, f64)> = None;
for &(nb, w) in &adj[i] {
let cnb = community[nb];
if cnb == community[i] {
continue;
}
match best {
Some((_, bw)) if w <= bw => {}
_ => best = Some((cnb, w)),
}
}
if let Some((new_c, _)) = best {
let old = community[i];
community[i] = new_c;
*sizes.entry(old).or_insert(0) -= 1;
*sizes.entry(new_c).or_insert(0) += 1;
}
}
}
fn relabel_by_size(community: &[usize]) -> Vec<usize> {
let mut counts: HashMap<usize, usize> = HashMap::new();
for &c in community {
*counts.entry(c).or_insert(0) += 1;
}
let mut pairs: Vec<(usize, usize)> = counts.into_iter().collect();
pairs.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
let mut remap: HashMap<usize, usize> = HashMap::new();
for (new_id, (old_id, _)) in pairs.into_iter().enumerate() {
remap.insert(old_id, new_id);
}
community.iter().map(|c| remap[c]).collect()
}
impl LouvainCommunities {
pub fn detect(graph: &SymbolGraph) -> Self {
let nodes = graph.all_nodes();
let edges = graph.all_edges();
if nodes.is_empty() {
return Self {
assignments: HashMap::new(),
community_count: 0,
modularity: 0.0,
};
}
let mut symbols: Vec<String> = nodes.iter().map(|(s, _, _)| s.clone()).collect();
symbols.sort();
symbols.dedup();
let mut symbol_to_id: HashMap<String, usize> = HashMap::with_capacity(symbols.len());
for (i, s) in symbols.iter().enumerate() {
symbol_to_id.insert(s.clone(), i);
}
let id_to_symbol: Vec<String> = symbols;
let mut paired: HashMap<(usize, usize), f64> = HashMap::new();
for (src, tgt, kind) in &edges {
let (a, b) = match (symbol_to_id.get(src), symbol_to_id.get(tgt)) {
(Some(&a), Some(&b)) => (a, b),
_ => continue,
};
if a == b {
continue;
}
let key = if a < b { (a, b) } else { (b, a) };
*paired.entry(key).or_insert(0.0) += kind.score_multiplier() as f64;
}
let mut adjacency: Vec<Vec<(usize, f64)>> = vec![Vec::new(); id_to_symbol.len()];
for ((a, b), w) in paired {
adjacency[a].push((b, w));
adjacency[b].push((a, w));
}
let mut rng = StdRng::seed_from_u64(LOUVAIN_SEED);
let mut node_to_community: Vec<usize> = (0..adjacency.len()).collect();
let mut current_adj = adjacency.clone();
let mut current_assignment: Vec<usize> = (0..adjacency.len()).collect();
let mut last_q = modularity(&adjacency, &node_to_community);
for _pass in 0..MAX_PASSES {
let level = one_louvain_level(¤t_adj, &mut rng);
for i in 0..node_to_community.len() {
let cur = current_assignment[node_to_community[i]];
let _ = cur; }
for nc in node_to_community.iter_mut() {
let super_id = current_assignment[*nc];
*nc = level[super_id];
}
current_assignment = (0..(level.iter().copied().max().map_or(0, |m| m + 1))).collect();
let super_g = aggregate(¤t_adj, &level);
current_adj = super_to_adj(&super_g);
let q = modularity(&adjacency, &node_to_community);
if (q - last_q).abs() < 1e-9 || current_adj.len() <= 1 {
break;
}
last_q = q;
}
prune_singletons(&adjacency, &mut node_to_community);
let final_assignment = relabel_by_size(&node_to_community);
let final_q = modularity(&adjacency, &final_assignment);
let mut assignments: HashMap<String, usize> = HashMap::with_capacity(id_to_symbol.len());
for (i, sym) in id_to_symbol.iter().enumerate() {
assignments.insert(sym.clone(), final_assignment[i]);
}
let community_count = final_assignment.iter().copied().max().map_or(0, |m| m + 1);
Self {
assignments,
community_count,
modularity: final_q,
}
}
}
#[allow(dead_code, clippy::type_complexity)]
fn _keep_project_in_scope(
g: &DiGraph<SymbolNode, EdgeKind>,
m: &HashMap<String, NodeIndex>,
) -> (HashMap<String, usize>, Vec<Vec<(usize, f64)>>) {
project_undirected(g, m)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::chunker::ChunkType;
use crate::core::symbol_graph::ChunkTuple;
fn chunk(id: &str, file: &str, name: &str, calls: &[&str]) -> ChunkTuple {
(
id.to_string(),
file.to_string(),
Some(name.to_string()),
calls.iter().map(|s| s.to_string()).collect(),
Vec::new(),
ChunkType::Function,
)
}
#[test]
fn two_cliques_yield_two_communities() {
let chunks = vec![
chunk("a:1", "a.rs", "a1", &["a2", "a3", "a4"]),
chunk("a:2", "a.rs", "a2", &["a1", "a3", "a4"]),
chunk("a:3", "a.rs", "a3", &["a1", "a2", "a4"]),
chunk("a:4", "a.rs", "a4", &["a1", "a2", "a3", "b1"]),
chunk("b:1", "b.rs", "b1", &["b2", "b3", "b4"]),
chunk("b:2", "b.rs", "b2", &["b1", "b3", "b4"]),
chunk("b:3", "b.rs", "b3", &["b1", "b2", "b4"]),
chunk("b:4", "b.rs", "b4", &["b1", "b2", "b3"]),
];
let g = SymbolGraph::build_from_chunks(&chunks);
let c = LouvainCommunities::detect(&g);
assert_eq!(
c.community_count, 2,
"expected 2 communities, got {} (assignments={:?})",
c.community_count, c.assignments
);
let a1 = c.assignments["a1"];
let a2 = c.assignments["a2"];
let b1 = c.assignments["b1"];
let b2 = c.assignments["b2"];
assert_eq!(a1, a2, "a-clique should share a community");
assert_eq!(b1, b2, "b-clique should share a community");
assert_ne!(a1, b1, "the two cliques must be in different communities");
assert!(c.modularity > 0.0, "Q must be positive: {}", c.modularity);
}
#[test]
fn path_graph_modularity_is_positive() {
let chunks = vec![
chunk("a:1", "x.rs", "a", &["b"]),
chunk("b:1", "x.rs", "b", &["c"]),
chunk("c:1", "x.rs", "c", &["d"]),
chunk("d:1", "x.rs", "d", &[]),
];
let g = SymbolGraph::build_from_chunks(&chunks);
let c = LouvainCommunities::detect(&g);
assert!(c.community_count >= 1);
assert!(c.modularity >= 0.0, "Q must be ≥ 0: {}", c.modularity);
}
#[test]
fn empty_graph_returns_zero_communities() {
let g = SymbolGraph::new();
let c = LouvainCommunities::detect(&g);
assert_eq!(c.community_count, 0);
assert!(c.assignments.is_empty());
assert_eq!(c.modularity, 0.0);
}
#[test]
fn deterministic_across_runs() {
let chunks = vec![
chunk("a:1", "a.rs", "a1", &["a2", "a3"]),
chunk("a:2", "a.rs", "a2", &["a1", "a3"]),
chunk("a:3", "a.rs", "a3", &["a1", "a2"]),
chunk("b:1", "b.rs", "b1", &["b2"]),
chunk("b:2", "b.rs", "b2", &["b1"]),
];
let g = SymbolGraph::build_from_chunks(&chunks);
let c1 = LouvainCommunities::detect(&g);
let c2 = LouvainCommunities::detect(&g);
assert_eq!(c1.assignments, c2.assignments);
assert_eq!(c1.community_count, c2.community_count);
}
#[test]
fn singleton_pruned_into_neighbour() {
let chunks = vec![
chunk("a:1", "x.rs", "a1", &["a2"]),
chunk("a:2", "x.rs", "a2", &["a1"]),
chunk("b:1", "x.rs", "bridge", &["a1"]),
chunk("c:1", "y.rs", "c1", &["c2"]),
chunk("c:2", "y.rs", "c2", &["c1"]),
];
let g = SymbolGraph::build_from_chunks(&chunks);
let c = LouvainCommunities::detect(&g);
let bridge = c.assignments["bridge"];
let a1 = c.assignments["a1"];
assert_eq!(bridge, a1, "bridge singleton should merge into a-cluster");
}
}