use super::adapter::{NodeId, RdfGraphAdapter};
use std::collections::HashMap;
pub struct LouvainCommunities {
pub resolution: f64,
pub max_iter: usize,
}
impl Default for LouvainCommunities {
fn default() -> Self {
Self::new()
}
}
impl LouvainCommunities {
pub fn new() -> Self {
Self {
resolution: 1.0,
max_iter: 10,
}
}
pub fn detect(&self, graph: &RdfGraphAdapter) -> HashMap<NodeId, usize> {
let n = graph.node_count();
if n == 0 {
return HashMap::new();
}
let mut community: Vec<usize> = (0..n).collect();
let m2: f64 = (0..n)
.flat_map(|u| graph.adjacency[u].iter().map(|(_, w)| *w))
.sum::<f64>()
* 2.0;
let m2 = if m2 == 0.0 { 1.0 } else { m2 };
let degree: Vec<f64> = (0..n)
.map(|u| {
graph.adjacency[u].iter().map(|(_, w)| *w).sum::<f64>()
+ graph.reverse_adjacency[u]
.iter()
.map(|(_, w)| *w)
.sum::<f64>()
})
.collect();
for _pass in 0..self.max_iter {
let mut improved = false;
for u in 0..n {
let current_c = community[u];
let mut neighbor_weight: HashMap<usize, f64> = HashMap::new();
for &(v, w) in &graph.adjacency[u] {
*neighbor_weight.entry(community[v]).or_insert(0.0) += w;
}
for &(v, w) in &graph.reverse_adjacency[u] {
*neighbor_weight.entry(community[v]).or_insert(0.0) += w;
}
let sigma_c: f64 = (0..n)
.filter(|&v| community[v] == current_c && v != u)
.map(|v| degree[v])
.sum();
let k_u = degree[u];
let k_u_c = neighbor_weight.get(¤t_c).copied().unwrap_or(0.0);
let remove_gain = k_u_c - self.resolution * sigma_c * k_u / m2;
let mut best_c = current_c;
let mut best_gain = 0.0;
for (&c, &k_u_nc) in &neighbor_weight {
if c == current_c {
continue;
}
let sigma_nc: f64 = (0..n)
.filter(|&v| community[v] == c)
.map(|v| degree[v])
.sum();
let gain = k_u_nc - remove_gain - self.resolution * sigma_nc * k_u / m2;
if gain > best_gain {
best_gain = gain;
best_c = c;
}
}
if best_c != current_c {
community[u] = best_c;
improved = true;
}
}
if !improved {
break;
}
}
let mut label_map: HashMap<usize, usize> = HashMap::new();
let mut next_label = 0usize;
for &c in &community {
if let std::collections::hash_map::Entry::Vacant(e) = label_map.entry(c) {
e.insert(next_label);
next_label += 1;
}
}
(0..n)
.map(|u| (u, *label_map.get(&community[u]).unwrap_or(&0)))
.collect()
}
pub fn communities(&self, graph: &RdfGraphAdapter) -> Vec<Vec<NodeId>> {
let partition = self.detect(graph);
let num_communities = partition
.values()
.copied()
.max()
.map(|m| m + 1)
.unwrap_or(0);
let mut result: Vec<Vec<NodeId>> = vec![Vec::new(); num_communities];
for (node, comm) in &partition {
if let Some(bucket) = result.get_mut(*comm) {
bucket.push(*node);
}
}
result
}
pub fn modularity(&self, graph: &RdfGraphAdapter, partition: &HashMap<NodeId, usize>) -> f64 {
let n = graph.node_count();
let m2: f64 = (0..n)
.flat_map(|u| graph.adjacency[u].iter().map(|(_, w)| *w))
.sum::<f64>()
* 2.0;
if m2 == 0.0 {
return 0.0;
}
let degree: Vec<f64> = (0..n)
.map(|u| {
graph.adjacency[u].iter().map(|(_, w)| *w).sum::<f64>()
+ graph.reverse_adjacency[u]
.iter()
.map(|(_, w)| *w)
.sum::<f64>()
})
.collect();
let mut q = 0.0f64;
for u in 0..n {
for &(v, w) in &graph.adjacency[u] {
if partition.get(&u) == partition.get(&v) {
q += w - self.resolution * degree[u] * degree[v] / m2;
}
}
}
q / m2 * 2.0
}
}
#[cfg(test)]
mod tests {
use super::*;
fn build_two_clusters() -> RdfGraphAdapter {
let edges = [
("ex:A", "ex:B"),
("ex:B", "ex:A"),
("ex:B", "ex:C"),
("ex:C", "ex:B"),
("ex:A", "ex:C"),
("ex:C", "ex:A"),
("ex:D", "ex:E"),
("ex:E", "ex:D"),
("ex:E", "ex:F"),
("ex:F", "ex:E"),
("ex:D", "ex:F"),
("ex:F", "ex:D"),
("ex:C", "ex:D"), ];
let triples: Vec<(String, String, String)> = edges
.iter()
.map(|(s, o)| (s.to_string(), "ex:rel".to_string(), o.to_string()))
.collect();
RdfGraphAdapter::from_triples(&triples)
}
#[test]
fn test_louvain_two_clusters() {
let g = build_two_clusters();
let partition = LouvainCommunities::new().detect(&g);
assert_eq!(partition.len(), g.node_count());
let a_id = g.get_node_id("ex:A").unwrap();
let b_id = g.get_node_id("ex:B").unwrap();
let c_id = g.get_node_id("ex:C").unwrap();
assert_eq!(
partition[&a_id], partition[&b_id],
"A and B should share a community"
);
assert_eq!(
partition[&b_id], partition[&c_id],
"B and C should share a community"
);
let d_id = g.get_node_id("ex:D").unwrap();
let e_id = g.get_node_id("ex:E").unwrap();
assert_eq!(partition[&d_id], partition[&e_id]);
}
#[test]
fn test_louvain_all_nodes_assigned() {
let g = build_two_clusters();
let partition = LouvainCommunities::new().detect(&g);
for i in 0..g.node_count() {
assert!(partition.contains_key(&i));
}
}
#[test]
fn test_louvain_empty_graph() {
let g = RdfGraphAdapter::from_triples(&[]);
let partition = LouvainCommunities::new().detect(&g);
assert!(partition.is_empty());
}
#[test]
fn test_louvain_single_node() {
let triples = vec![(
"ex:A".to_string(),
"ex:rel".to_string(),
"\"lit\"".to_string(),
)];
let g = RdfGraphAdapter::from_triples(&triples);
let partition = LouvainCommunities::new().detect(&g);
assert_eq!(partition.len(), 1);
}
#[test]
fn test_communities_structure() {
let g = build_two_clusters();
let comms = LouvainCommunities::new().communities(&g);
for c in &comms {
assert!(!c.is_empty());
}
let total: usize = comms.iter().map(|c| c.len()).sum();
assert_eq!(total, g.node_count());
}
#[test]
fn test_modularity_single_community() {
let g = build_two_clusters();
let partition: HashMap<NodeId, usize> = (0..g.node_count()).map(|i| (i, 0)).collect();
let q = LouvainCommunities::new().modularity(&g, &partition);
assert!(q.is_finite());
}
#[test]
fn test_modularity_two_perfect_clusters() {
let g = build_two_clusters();
let a_id = g.get_node_id("ex:A").unwrap();
let b_id = g.get_node_id("ex:B").unwrap();
let c_id = g.get_node_id("ex:C").unwrap();
let d_id = g.get_node_id("ex:D").unwrap();
let e_id = g.get_node_id("ex:E").unwrap();
let f_id = g.get_node_id("ex:F").unwrap();
let partition: HashMap<NodeId, usize> = [
(a_id, 0),
(b_id, 0),
(c_id, 0),
(d_id, 1),
(e_id, 1),
(f_id, 1),
]
.into_iter()
.collect();
let q = LouvainCommunities::new().modularity(&g, &partition);
assert!(
q > 0.0,
"modularity of good partition should be positive, got {q}"
);
}
#[test]
fn test_modularity_empty_graph() {
let g = RdfGraphAdapter::from_triples(&[]);
let partition: HashMap<NodeId, usize> = HashMap::new();
let q = LouvainCommunities::new().modularity(&g, &partition);
assert_eq!(q, 0.0);
}
#[test]
fn test_louvain_resolution_parameter() {
let g = build_two_clusters();
let p_high = LouvainCommunities {
resolution: 2.0,
max_iter: 20,
}
.detect(&g);
let p_low = LouvainCommunities {
resolution: 0.1,
max_iter: 20,
}
.detect(&g);
let n_high = p_high
.values()
.copied()
.collect::<std::collections::HashSet<_>>()
.len();
let n_low = p_low
.values()
.copied()
.collect::<std::collections::HashSet<_>>()
.len();
assert!(n_high >= n_low || n_high >= 1);
let _ = n_low; }
}