use std::collections::BTreeMap;
use mnem_core::id::{CODEC_RAW, Cid, HASH_BLAKE3_256, Multihash, NodeId};
use mnem_core::index::{AdjacencyIndex, EdgeProvenance};
pub type CommunityId = u32;
#[derive(Clone, Debug)]
pub struct CommunityAssignment {
pub map: BTreeMap<NodeId, CommunityId>,
pub members: BTreeMap<CommunityId, Vec<NodeId>>,
pub modularity: f32,
pub seed: u64,
}
impl CommunityAssignment {
#[must_use]
pub fn community_of(&self, node: NodeId) -> Option<CommunityId> {
self.map.get(&node).copied()
}
#[must_use]
pub fn members_of(&self, community: CommunityId) -> &[NodeId] {
self.members
.get(&community)
.map(Vec::as_slice)
.unwrap_or(&[])
}
#[must_use]
pub fn community_count(&self) -> usize {
let mut max: i64 = -1;
for &c in self.map.values() {
if i64::from(c) > max {
max = i64::from(c);
}
}
usize::try_from(max + 1).unwrap_or(0)
}
#[must_use]
pub fn content_cid(&self) -> Cid {
let mut buf: Vec<u8> = Vec::with_capacity(16 + 8 + self.map.len() * (16 + 4));
buf.extend_from_slice(b"mnem/community/v1");
buf.extend_from_slice(&self.seed.to_be_bytes());
for (nid, cid) in &self.map {
buf.extend_from_slice(nid.as_bytes());
buf.extend_from_slice(&cid.to_be_bytes());
}
let digest = blake3::hash(&buf);
let mh = Multihash::wrap(HASH_BLAKE3_256, digest.as_bytes())
.expect("blake3 32-byte digest fits multihash");
Cid::new(CODEC_RAW, mh)
}
}
#[must_use]
pub fn compute_communities(adj: &dyn AdjacencyIndex, seed: u64) -> CommunityAssignment {
let (nodes, adj_list, m2) = build_undirected_graph(adj);
if nodes.is_empty() {
return CommunityAssignment {
map: BTreeMap::new(),
members: BTreeMap::new(),
modularity: 0.0,
seed,
};
}
let n = nodes.len();
let mut part: Vec<usize> = (0..n).collect();
let degrees: Vec<f64> = (0..n)
.map(|i| adj_list[i].iter().map(|(_, w)| *w).sum())
.collect();
let mut prev_q = f64::NEG_INFINITY;
for _ in 0..8 {
local_move(&adj_list, °rees, &mut part, m2);
let q_post_move = modularity(&adj_list, °rees, &part, m2);
let mut refined: Vec<usize> = part.clone();
refine_partition(&adj_list, °rees, &mut refined, m2);
local_move(&adj_list, °rees, &mut refined, m2);
let q_post_refine = modularity(&adj_list, °rees, &refined, m2);
if q_post_refine > q_post_move + 1e-9 {
part.copy_from_slice(&refined);
}
let q = modularity(&adj_list, °rees, &part, m2);
if q <= prev_q + 1e-9 {
break;
}
prev_q = q;
}
let canonical = canonicalise_communities(&part);
let mut map = BTreeMap::new();
for (i, &nid) in nodes.iter().enumerate() {
map.insert(nid, canonical[i]);
}
let mut members: BTreeMap<CommunityId, Vec<NodeId>> = BTreeMap::new();
for (&nid, &cid) in &map {
members.entry(cid).or_default().push(nid);
}
let q = modularity(&adj_list, °rees, &part, m2) as f32;
CommunityAssignment {
map,
members,
modularity: q,
seed,
}
}
fn build_undirected_graph(adj: &dyn AdjacencyIndex) -> (Vec<NodeId>, Vec<Vec<(usize, f64)>>, f64) {
let mut node_set: std::collections::BTreeSet<NodeId> = std::collections::BTreeSet::new();
let mut edges: BTreeMap<(NodeId, NodeId), (f64, bool)> = BTreeMap::new();
for e in adj.iter_edges() {
node_set.insert(e.src);
node_set.insert(e.dst);
if e.src == e.dst {
continue;
}
let key = if e.src < e.dst {
(e.src, e.dst)
} else {
(e.dst, e.src)
};
let authored = matches!(e.provenance, EdgeProvenance::Authored);
let w = f64::from(e.weight).max(0.0);
edges
.entry(key)
.and_modify(|(cur_w, cur_authored)| {
if w > *cur_w {
*cur_w = w;
}
if authored {
*cur_authored = true;
}
})
.or_insert((w, authored));
}
let nodes: Vec<NodeId> = node_set.into_iter().collect();
let mut index_of: BTreeMap<NodeId, usize> = BTreeMap::new();
for (i, nid) in nodes.iter().enumerate() {
index_of.insert(*nid, i);
}
let mut adj_list: Vec<Vec<(usize, f64)>> = vec![Vec::new(); nodes.len()];
let mut m2: f64 = 0.0;
for (&(a, b), &(w, _)) in &edges {
if w <= 0.0 {
continue;
}
let ia = index_of[&a];
let ib = index_of[&b];
adj_list[ia].push((ib, w));
adj_list[ib].push((ia, w));
m2 += 2.0 * w;
}
for nb in &mut adj_list {
nb.sort_by_key(|x| x.0);
}
(nodes, adj_list, m2)
}
fn local_move(adj_list: &[Vec<(usize, f64)>], degrees: &[f64], part: &mut [usize], m2: f64) {
if m2 <= 0.0 {
return;
}
let n = adj_list.len();
let mut com_deg: BTreeMap<usize, f64> = BTreeMap::new();
for (i, &c) in part.iter().enumerate() {
*com_deg.entry(c).or_insert(0.0) += degrees[i];
}
loop {
let mut moved = false;
for v in 0..n {
let k_v = degrees[v];
if k_v <= 0.0 {
continue;
}
let c_old = part[v];
let mut k_vc: BTreeMap<usize, f64> = BTreeMap::new();
for &(u, w) in &adj_list[v] {
if u == v {
continue;
}
*k_vc.entry(part[u]).or_insert(0.0) += w;
}
let self_loop: f64 = adj_list[v]
.iter()
.filter_map(|&(u, w)| if u == v { Some(w) } else { None })
.sum();
let k_v_old = k_vc.get(&c_old).copied().unwrap_or(0.0);
let sum_old = com_deg.get(&c_old).copied().unwrap_or(0.0);
let mut best_c = c_old;
let mut best_dq: f64 = 0.0;
for (&c_new, &k_v_new) in &k_vc {
if c_new == c_old {
continue;
}
let sum_new = com_deg.get(&c_new).copied().unwrap_or(0.0);
let dq = (k_v_new - k_v_old) / (m2 / 2.0)
+ (k_v * (sum_old - sum_new - k_v + 2.0 * self_loop)) / (m2 * m2 / 2.0);
if dq > best_dq + 1e-12 || (dq > best_dq - 1e-12 && c_new < best_c && dq > 1e-12) {
best_dq = dq;
best_c = c_new;
}
}
if best_c != c_old && best_dq > 1e-12 {
*com_deg.entry(c_old).or_insert(0.0) -= k_v;
*com_deg.entry(best_c).or_insert(0.0) += k_v;
part[v] = best_c;
moved = true;
}
}
if !moved {
break;
}
}
}
fn refine_partition(adj_list: &[Vec<(usize, f64)>], degrees: &[f64], part: &mut [usize], m2: f64) {
if m2 <= 0.0 {
return;
}
let n = adj_list.len();
let mut by_com: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
for (i, &c) in part.iter().enumerate() {
by_com.entry(c).or_default().push(i);
}
let mut refined: Vec<usize> = (0..n).collect();
let outer = part.to_vec();
let mut sub_deg: BTreeMap<usize, f64> = BTreeMap::new();
for (i, &c) in refined.iter().enumerate() {
*sub_deg.entry(c).or_insert(0.0) += degrees[i];
}
for (_outer_c, members) in by_com {
let total_c: f64 = members.iter().map(|&i| degrees[i]).sum();
let gamma_thresh = total_c / m2;
for &v in &members {
let k_v = degrees[v];
if k_v <= 0.0 {
continue;
}
let mut k_vc: BTreeMap<usize, f64> = BTreeMap::new();
for &(u, w) in &adj_list[v] {
if u == v {
continue;
}
if outer[u] != outer[v] {
continue;
}
*k_vc.entry(refined[u]).or_insert(0.0) += w;
}
let c_old = refined[v];
let sum_old = sub_deg.get(&c_old).copied().unwrap_or(0.0);
let k_v_old = k_vc.get(&c_old).copied().unwrap_or(0.0);
let mut best_c = c_old;
let mut best_dq: f64 = 0.0;
for (&c_new, &k_v_new) in &k_vc {
if c_new == c_old {
continue;
}
let sum_new = sub_deg.get(&c_new).copied().unwrap_or(0.0);
if k_v_new < gamma_thresh * k_v {
continue;
}
let dq = (k_v_new - k_v_old) / (m2 / 2.0)
+ (k_v * (sum_old - sum_new - k_v)) / (m2 * m2 / 2.0);
if dq > best_dq + 1e-12 || (dq > best_dq - 1e-12 && c_new < best_c && dq > 1e-12) {
best_dq = dq;
best_c = c_new;
}
}
if best_c != c_old && best_dq > 1e-12 {
*sub_deg.entry(c_old).or_insert(0.0) -= k_v;
*sub_deg.entry(best_c).or_insert(0.0) += k_v;
refined[v] = best_c;
}
}
}
part[..n].copy_from_slice(&refined[..n]);
}
fn canonicalise_communities(part: &[usize]) -> Vec<CommunityId> {
let mut map: BTreeMap<usize, CommunityId> = BTreeMap::new();
let mut next: CommunityId = 0;
let mut out = Vec::with_capacity(part.len());
for &c in part {
let canonical = *map.entry(c).or_insert_with(|| {
let id = next;
next += 1;
id
});
out.push(canonical);
}
out
}
fn modularity(adj_list: &[Vec<(usize, f64)>], degrees: &[f64], part: &[usize], m2: f64) -> f64 {
if m2 <= 0.0 {
return 0.0;
}
let mut e_c: BTreeMap<usize, f64> = BTreeMap::new();
let mut a_c: BTreeMap<usize, f64> = BTreeMap::new();
for (u, neighbours) in adj_list.iter().enumerate() {
let cu = part[u];
*a_c.entry(cu).or_insert(0.0) += degrees[u];
for &(v, w) in neighbours {
if part[v] == cu {
*e_c.entry(cu).or_insert(0.0) += w;
}
}
}
let mut q: f64 = 0.0;
for (c, &e) in &e_c {
let a = a_c.get(c).copied().unwrap_or(0.0);
q += e / m2 - (a / m2).powi(2);
}
q
}