use crate::{Error, ErrorKind, Graph};
use std::hash::Hash;
use std::{
collections::{hash_map::Entry, HashMap, HashSet},
fmt::Display,
};
type CommunityId = usize;
type NodeId = usize;
struct Modularity<T>
where
T: Hash + Eq + Clone + Ord + Display + Send + Sync,
{
m: f32, resolution: f32,
seed: Option<u64>, m_half: f32, original_nodes: Vec<T>, origin_nodes_community: Vec<CommunityId>,
nodes: Vec<CNode>,
communities: Vec<Community>,
edges: Vec<Vec<WEdge>>,
}
#[derive(Clone)]
struct Community {
next_id: CommunityId,
nodes: Vec<NodeId>,
total_degree: f32,
}
impl Community {
fn init(node_id: NodeId) -> Self {
Self {
nodes: vec![node_id],
next_id: 0,
total_degree: 0.0,
}
}
fn add_node(&mut self, node_id: NodeId, degree: f32) {
self.nodes.push(node_id);
self.total_degree += degree;
}
fn remove_node(&mut self, node_id: NodeId, degree: f32) {
if let Some(pos) = self.nodes.iter().position(|&x| x == node_id) {
self.nodes.swap_remove(pos);
self.total_degree -= degree;
} else {
panic!("Node {} not found in community {:?}", node_id, self.nodes);
}
}
}
struct CNode {
community_id: CommunityId,
degree: f32,
self_reference_weight: f32,
communities: HashMap<CommunityId, f32>, }
impl CNode {
fn init(node_id: NodeId) -> Self {
Self {
community_id: node_id,
degree: 0.0,
self_reference_weight: 0.0,
communities: HashMap::new(),
}
}
fn init_cache(&mut self, edges: &[WEdge], node_component: &[CommunityId]) {
let mut sum_weights = self.self_reference_weight;
for edge in edges.iter() {
sum_weights += edge.weight;
let neighbor = edge.to;
let neighbor_community = node_component[neighbor];
*self.communities.entry(neighbor_community).or_insert(0.0) += edge.weight;
}
self.degree = sum_weights;
}
}
#[derive(PartialEq, Copy, Clone, Debug)]
struct WEdge {
from: NodeId,
to: NodeId,
weight: f32,
}
pub fn louvain_communities<T, A>(
graph: &Graph<T, A>,
weighted: bool,
resolution: Option<f64>,
threshold: Option<f64>,
seed: Option<u64>,
) -> Result<Vec<HashSet<T>>, Error>
where
T: Hash + Eq + Clone + Ord + Display + Send + Sync,
A: Clone + Send + Sync,
{
let mut partitions = louvain_partitions(graph, weighted, resolution, threshold, seed)?;
match partitions.is_empty() {
false => Ok(partitions.pop().unwrap()),
true => Err(Error {
kind: ErrorKind::NoPartitions,
message: "No partitions were found.".to_string(),
}),
}
}
pub fn louvain_partitions<T, A>(
graph: &Graph<T, A>,
weighted: bool,
resolution: Option<f64>,
_threshold: Option<f64>,
seed: Option<u64>,
) -> Result<Vec<Vec<HashSet<T>>>, Error>
where
T: Hash + Eq + Clone + Ord + Display + Send + Sync,
A: Clone + Send + Sync,
{
let mut modularity = Modularity::new(graph, weighted, seed)?;
modularity.resolution = resolution.unwrap_or(1.0) as f32;
let _result = modularity.run_louvain()?;
let mut all_partitions = Vec::new();
let final_communities = modularity.get_final_communities();
all_partitions.push(final_communities);
Ok(all_partitions)
}
impl<T> Modularity<T>
where
T: Hash + Eq + Clone + Ord + Display + Send + Sync,
{
fn get_community_keys_iter(
&self,
communities_map: &HashMap<CommunityId, f32>,
) -> Vec<CommunityId> {
if self.seed.is_some() {
let mut keys: Vec<CommunityId> = Vec::with_capacity(communities_map.len());
keys.extend(communities_map.keys().copied());
keys.sort_unstable(); keys
} else {
communities_map.keys().copied().collect()
}
}
fn new<A>(graph: &Graph<T, A>, weighted: bool, seed: Option<u64>) -> Result<Self, Error>
where
A: Clone + Send + Sync,
{
let nodes_len = graph.number_of_nodes();
let resolution = 1.0;
let original_nodes: Vec<T> = graph
.get_all_nodes()
.iter()
.map(|n| n.name.clone())
.collect();
let origin_nodes_community = (0..nodes_len).collect();
let nodes = (0..nodes_len).map(CNode::init).collect();
let communities = (0..nodes_len).map(Community::init).collect();
let mut wedges: Vec<Vec<WEdge>> = vec![Vec::new(); nodes_len];
let mut m = 0.0;
let all_edges = graph.get_all_edges();
let estimated_degree = if nodes_len > 0 {
(all_edges.len() * 2) / nodes_len + 1
} else {
0
};
for wedge_vec in wedges.iter_mut() {
wedge_vec.reserve(estimated_degree);
}
for edge in all_edges {
let from = graph.get_node_index(&edge.u).unwrap();
let to = graph.get_node_index(&edge.v).unwrap();
let weight = if weighted { edge.weight as f32 } else { 1.0 };
wedges[from].push(WEdge { from, to, weight });
wedges[to].push(WEdge {
from: to,
to: from,
weight,
});
m += 2.0 * weight;
}
Ok(Self {
m,
resolution,
seed,
m_half: m * 0.5,
original_nodes,
origin_nodes_community,
nodes,
communities,
edges: wedges,
})
}
fn run_louvain(&mut self) -> Result<Vec<Vec<HashSet<T>>>, Error> {
self.init_caches();
if self.m == 0.0 {
let final_communities = self.get_communities();
return Ok(vec![final_communities]);
}
let mut some_change = true;
let mut all_partitions = Vec::new();
let mut iteration_count = 0;
let max_iterations = 1000;
while some_change && iteration_count < max_iterations {
some_change = false;
let mut local_change = true;
iteration_count += 1;
let mut inner_iteration_count = 0;
let max_inner_iterations = 1000;
while local_change && inner_iteration_count < max_inner_iterations {
local_change = false;
inner_iteration_count += 1;
let mut step = 0;
let mut node_index = 0;
let num_communities = self.communities.len();
if num_communities == 0 {
break;
}
while step < num_communities {
let best_community = self.find_best_community(node_index);
let node = &self.nodes[node_index];
if let Some(best_community) = best_community {
if best_community != node.community_id {
self.move_node_to(node_index, best_community);
local_change = true;
}
}
step += 1;
node_index = (node_index + 1) % num_communities;
}
some_change = local_change || some_change;
}
if some_change {
let current_communities = self.get_communities();
all_partitions.push(current_communities);
self.merge_nodes();
}
}
let final_communities = self.get_communities();
all_partitions.push(final_communities);
Ok(all_partitions)
}
fn init_caches(&mut self) {
let node_component: Vec<CommunityId> = self.nodes.iter().map(|n| n.community_id).collect();
for (node_index, node) in self.nodes.iter_mut().enumerate() {
node.init_cache(&self.edges[node_index], &node_component);
}
for community in self.communities.iter_mut() {
community.total_degree = Self::community_total_degree_compute(community, &self.nodes);
}
}
fn community_total_degree_compute(community: &Community, nodes: &[CNode]) -> f32 {
let mut sum = 0.0;
for node_index in community.nodes.iter() {
let node = &nodes[*node_index];
sum += node.degree;
}
sum
}
fn find_best_community(&self, node_id: NodeId) -> Option<CommunityId> {
let mut best: f32 = 0.0;
let mut best_community = None;
let node = &self.nodes[node_id];
if node.communities.is_empty() {
return None;
}
if self.seed.is_none() {
for (&community_index, &shared_degree) in &node.communities {
if shared_degree > 0.0 {
let q_value = self.q(node_id, community_index, shared_degree);
if q_value > best {
best = q_value;
best_community = Some(community_index);
}
}
}
} else {
let community_keys = self.get_community_keys_iter(&node.communities);
for community_index in community_keys {
if let Some(shared_degree) = node.communities.get(&community_index) {
if *shared_degree > 0.0 {
let q_value = self.q(node_id, community_index, *shared_degree);
if q_value > best {
best = q_value;
best_community = Some(community_index);
}
}
}
}
}
best_community
}
fn q(&self, node_id: NodeId, community_id: CommunityId, shared_degree: f32) -> f32 {
let node = &self.nodes[node_id];
let current_community = node.community_id;
let community = &self.communities[community_id];
if current_community == community_id {
if community.nodes.len() == 1 {
0.0
} else {
let d_i = node.degree;
let d_j = community.total_degree - d_i;
let d_ij = shared_degree * 2.0;
(d_ij - self.resolution * (d_i * d_j) / (self.m_half)) / self.m
}
} else {
let d_i = node.degree;
let d_j = community.total_degree;
let d_ij = shared_degree * 2.0;
(d_ij - self.resolution * (d_i * d_j) / (self.m_half)) / self.m
}
}
fn move_node_to(&mut self, node_id: NodeId, community_id: CommunityId) {
let old_community_id = self.nodes[node_id].community_id;
let node_degree = self.nodes[node_id].degree;
let old_community = &mut self.communities[old_community_id];
old_community.remove_node(node_id, node_degree);
let new_community = &mut self.communities[community_id];
new_community.add_node(node_id, node_degree);
for wedge in self.edges[node_id].iter() {
let neighbor = wedge.to;
let neighbor_node = &mut self.nodes[neighbor];
if let Entry::Occupied(mut entry) = neighbor_node.communities.entry(old_community_id) {
let new_val = *entry.get() - wedge.weight;
if new_val == 0.0 {
entry.remove();
} else {
*entry.get_mut() = new_val;
}
}
*neighbor_node.communities.entry(community_id).or_insert(0.0) += wedge.weight;
}
self.nodes[node_id].community_id = community_id;
}
fn get_communities(&self) -> Vec<HashSet<T>> {
let active_communities = self
.communities
.iter()
.filter(|c| !c.nodes.is_empty())
.count();
let mut communities_map: HashMap<CommunityId, HashSet<T>> =
HashMap::with_capacity(active_communities);
for (node_index, &community_id) in self.origin_nodes_community.iter().enumerate() {
let actual_community = self.nodes[community_id].community_id;
communities_map
.entry(actual_community)
.or_insert_with(HashSet::new)
.insert(self.original_nodes[node_index].clone());
}
if self.seed.is_some() {
let mut sorted_communities: Vec<(CommunityId, HashSet<T>)> =
communities_map.into_iter().collect();
sorted_communities.sort_by_key(|(community_id, _)| *community_id);
sorted_communities
.into_iter()
.map(|(_, community)| community)
.collect()
} else {
communities_map.into_values().collect()
}
}
fn get_final_communities(&self) -> Vec<HashSet<T>> {
self.get_communities()
}
fn merge_nodes(&mut self) {
let mut new_community_count = 0;
for c in self.communities.iter_mut() {
if !c.nodes.is_empty() {
c.next_id = new_community_count;
new_community_count += 1;
}
}
let mut new_communities: Vec<Community> = Vec::with_capacity(new_community_count);
let mut new_edges: Vec<Vec<WEdge>> = vec![Vec::new(); new_community_count];
let mut new_nodes: Vec<CNode> = Vec::with_capacity(new_community_count);
let mut m = 0.0;
for community in self.communities.iter() {
if community.nodes.is_empty() {
continue;
}
let new_community_id = community.next_id;
new_communities.push(Community::init(new_community_id));
let mut new_node = CNode::init(new_community_id);
let mut edges_for_community: HashMap<CommunityId, f32> = HashMap::new();
let mut self_reference = 0.0;
for &node_id in community.nodes.iter() {
for wedge in self.edges[node_id].iter() {
let neighbor_community = self.nodes[wedge.to].community_id;
let neighbor_community_new = self.communities[neighbor_community].next_id;
*edges_for_community
.entry(neighbor_community_new)
.or_insert(0.0) += wedge.weight;
}
self_reference += self.nodes[node_id].self_reference_weight;
}
if self.seed.is_some() {
let mut sorted_edges: Vec<(&CommunityId, &f32)> =
Vec::with_capacity(edges_for_community.len());
sorted_edges.extend(edges_for_community.iter());
sorted_edges.sort_unstable_by_key(|(community_id, _)| *community_id);
for (neighbor_community, weight) in sorted_edges {
m += weight;
if *neighbor_community == new_community_id {
self_reference += weight;
} else {
new_edges[new_community_id].push(WEdge {
from: new_community_id,
to: *neighbor_community,
weight: *weight,
});
}
}
} else {
for (neighbor_community, weight) in &edges_for_community {
m += weight;
if *neighbor_community == new_community_id {
self_reference += weight;
} else {
new_edges[new_community_id].push(WEdge {
from: new_community_id,
to: *neighbor_community,
weight: *weight,
});
}
}
}
new_node.self_reference_weight = self_reference;
new_nodes.push(new_node);
}
for i in 0..self.origin_nodes_community.len() {
let new_community_old_id = self.nodes[self.origin_nodes_community[i]].community_id;
let new_community_id = self.communities[new_community_old_id].next_id;
self.origin_nodes_community[i] = new_community_id;
}
self.communities = new_communities;
self.nodes = new_nodes;
self.edges = new_edges;
self.m = m;
self.m_half = m * 0.5;
self.init_caches();
}
}
pub fn compute_modularity<T, A>(
graph: &Graph<T, A>,
communities: &[HashSet<T>],
weighted: bool,
) -> f32
where
T: Hash + Eq + Clone + Ord + Display + Send + Sync,
A: Clone + Send + Sync,
{
let m = graph.size(weighted) as f32;
let mut q = 0.0f32;
let mut node_to_community: HashMap<&T, usize> = HashMap::new();
for (comm_id, community) in communities.iter().enumerate() {
for node in community {
node_to_community.insert(node, comm_id);
}
}
for community in communities {
let mut internal_edges = 0.0f32;
let mut total_degree = 0.0f32;
for node in community {
let degree = if weighted {
graph.get_node_weighted_degree(node.clone()).unwrap_or(0.0) as f32
} else {
graph.get_node_degree(node.clone()).unwrap_or(0) as f32
};
total_degree += degree;
if let Ok(neighbors) = graph.get_successor_nodes(node.clone()) {
for neighbor in neighbors {
if let Some(&neighbor_comm) = node_to_community.get(&neighbor.name) {
if neighbor_comm == *node_to_community.get(node).unwrap() {
let edge_weight = if weighted {
if let Ok(edge) =
graph.get_edge(node.clone(), neighbor.name.clone())
{
edge.weight as f32
} else {
1.0f32
}
} else {
1.0f32
};
internal_edges += edge_weight;
}
}
}
}
}
internal_edges /= 2.0; q += internal_edges / m - (total_degree / (2.0 * m)).powi(2);
}
q
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{Edge, Graph, GraphSpecs};
#[test]
fn test_louvain_basic() {
let mut graph = Graph::<usize, ()>::new(GraphSpecs::undirected_create_missing());
graph
.add_edges(vec![
Edge::new(0, 1),
Edge::new(0, 2),
Edge::new(1, 2),
Edge::new(2, 3), Edge::new(3, 4),
Edge::new(3, 5),
Edge::new(4, 5),
])
.expect("couldn't add edges");
let communities = louvain_communities(&graph, false, None, None, None).unwrap();
assert_eq!(communities.len(), 2);
let mut community_sizes: Vec<usize> = communities.iter().map(|c| c.len()).collect();
community_sizes.sort();
assert_eq!(community_sizes, vec![3, 3]);
}
#[test]
fn test_modularity_calculation() {
let mut graph = Graph::<usize, ()>::new(GraphSpecs::undirected_create_missing());
graph
.add_edges(vec![
Edge::new(0, 1),
Edge::new(1, 2),
Edge::new(3, 4),
Edge::new(4, 5),
])
.expect("couldn't add edges");
let mut community_0 = HashSet::new();
community_0.insert(0usize);
community_0.insert(1usize);
community_0.insert(2usize);
let mut community_1 = HashSet::new();
community_1.insert(3usize);
community_1.insert(4usize);
community_1.insert(5usize);
let communities_structured = vec![community_0, community_1];
let mod_structured = compute_modularity(&graph, &communities_structured, false);
let mut community_all = HashSet::new();
community_all.insert(0usize);
community_all.insert(1usize);
community_all.insert(2usize);
community_all.insert(3usize);
community_all.insert(4usize);
community_all.insert(5usize);
let communities_all = vec![community_all];
let mod_all = compute_modularity(&graph, &communities_all, false);
assert!(mod_structured > mod_all);
}
#[test]
fn test_weighted_louvain() {
let mut graph = Graph::<&str, ()>::new(GraphSpecs::undirected_create_missing());
graph
.add_edges(vec![
Edge::with_weight("a", "b", 5.0),
Edge::with_weight("b", "c", 5.0),
Edge::with_weight("c", "d", 1.0), Edge::with_weight("d", "e", 5.0),
])
.expect("couldn't add edges");
let communities = louvain_communities(&graph, true, None, None, None).unwrap();
assert!(communities.len() >= 2);
}
#[test]
fn test_resolution_parameter_behavior() {
let mut graph = Graph::<usize, ()>::new(GraphSpecs::undirected_create_missing());
graph
.add_edges(vec![
Edge::new(0, 1),
Edge::new(0, 2),
Edge::new(0, 3),
Edge::new(1, 2),
Edge::new(1, 3),
Edge::new(2, 3),
Edge::new(3, 4),
Edge::new(4, 5),
Edge::new(4, 6),
Edge::new(4, 7),
Edge::new(5, 6),
Edge::new(5, 7),
Edge::new(6, 7),
])
.expect("couldn't add edges");
let communities_low_res =
louvain_communities(&graph, false, Some(0.5), None, Some(42)).unwrap();
let communities_high_res =
louvain_communities(&graph, false, Some(2.0), None, Some(42)).unwrap();
assert!(communities_low_res.len() <= communities_high_res.len());
assert!(communities_low_res.len() >= 1);
assert!(communities_high_res.len() >= 1);
let mut all_nodes_low: HashSet<usize> = HashSet::new();
for community in &communities_low_res {
for &node in community {
assert!(
!all_nodes_low.contains(&node),
"Node {} appears in multiple communities",
node
);
all_nodes_low.insert(node);
}
}
assert_eq!(
all_nodes_low.len(),
8,
"Not all nodes are assigned to communities"
);
let mut all_nodes_high: HashSet<usize> = HashSet::new();
for community in &communities_high_res {
for &node in community {
assert!(
!all_nodes_high.contains(&node),
"Node {} appears in multiple communities",
node
);
all_nodes_high.insert(node);
}
}
assert_eq!(
all_nodes_high.len(),
8,
"Not all nodes are assigned to communities"
);
}
}