use std::collections::HashMap;
use std::path::Path;
use serde::{Deserialize, Serialize};
pub mod summary;
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum CommunityError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CommunityId(pub u64);
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct CommunityState {
pub communities: HashMap<CommunityId, Vec<String>>,
edges: Vec<(String, String)>,
}
impl CommunityState {
#[must_use]
pub fn empty() -> Self {
Self {
communities: HashMap::new(),
edges: Vec::new(),
}
}
#[must_use]
pub fn into_parts(self) -> Vec<(u64, Vec<String>)> {
let mut parts: Vec<(u64, Vec<String>)> = self
.communities
.into_iter()
.map(|(CommunityId(id), members)| (id, members))
.collect();
parts.sort_by_key(|(id, _)| *id);
parts
}
#[must_use]
pub fn from_parts(parts: Vec<(u64, Vec<String>)>) -> Self {
let communities = parts
.into_iter()
.map(|(id, members)| (CommunityId(id), members))
.collect();
Self {
communities,
edges: Vec::new(),
}
}
pub fn save(&self, path: &Path) -> Result<(), CommunityError> {
let dir = path.join("communities");
std::fs::create_dir_all(&dir)?;
let file_path = dir.join("state.json");
let parts: Vec<(u64, Vec<String>)> = self
.communities
.iter()
.map(|(CommunityId(id), members)| (*id, members.clone()))
.collect();
let json = serde_json::to_string_pretty(&parts)?;
std::fs::write(file_path, json)?;
Ok(())
}
pub fn load(path: &Path) -> Result<Self, CommunityError> {
let file_path = path.join("communities").join("state.json");
let json = std::fs::read_to_string(file_path)?;
let parts: Vec<(u64, Vec<String>)> = serde_json::from_str(&json)?;
Ok(Self::from_parts(parts))
}
pub fn update(&mut self, new_edges: &[(String, String)]) -> &mut Self {
for edge in new_edges {
if !self.edges.contains(edge) {
self.edges.push(edge.clone());
}
}
let mut affected: std::collections::HashSet<String> = std::collections::HashSet::new();
let adj = build_adjacency(&self.edges);
for (a, b) in new_edges {
let _ = affected.insert(a.clone());
let _ = affected.insert(b.clone());
for nbr in adj.get(a).into_iter().flatten() {
let _ = affected.insert(nbr.clone());
}
for nbr in adj.get(b).into_iter().flatten() {
let _ = affected.insert(nbr.clone());
}
let one_hop: Vec<String> = adj
.get(a)
.into_iter()
.flatten()
.chain(adj.get(b).into_iter().flatten())
.cloned()
.collect();
for node in one_hop {
for nbr in adj.get(&node).into_iter().flatten() {
let _ = affected.insert(nbr.clone());
}
}
}
let mut labels: HashMap<String, u64> = HashMap::new();
for (CommunityId(id), members) in &self.communities {
for member in members {
let _ = labels.insert(member.clone(), *id);
}
}
let all_nodes = collect_nodes(&self.edges);
let mut next_id = labels.values().copied().max().unwrap_or(0) + 1;
for node in &all_nodes {
let _ = labels.entry(node.clone()).or_insert_with(|| {
let id = next_id;
next_id += 1;
id
});
}
let affected_labels: std::collections::HashSet<u64> = affected
.iter()
.filter_map(|n| labels.get(n))
.copied()
.collect();
if affected_labels.len() > 1 {
let all_nodes = collect_nodes(&self.edges);
let mut new_labels = connected_component_labels(&all_nodes, &adj);
propagate_labels(&adj, &mut new_labels, 50);
labels = new_labels;
} else {
propagate_labels_restricted(&adj, &mut labels, &affected, 50);
}
self.communities = invert_labels(labels);
self
}
}
#[must_use]
pub fn detect_communities(edges: &[(String, String)]) -> CommunityState {
let nodes = collect_nodes(edges);
if nodes.is_empty() {
return CommunityState {
communities: HashMap::new(),
edges: edges.to_vec(),
};
}
let adj = build_adjacency(edges);
let labels = connected_component_labels(&nodes, &adj);
let mut labels = labels;
propagate_labels(&adj, &mut labels, 50);
CommunityState {
communities: invert_labels(labels),
edges: edges.to_vec(),
}
}
fn connected_component_labels(
nodes: &[String],
adj: &HashMap<String, Vec<String>>,
) -> HashMap<String, u64> {
let mut labels: HashMap<String, u64> = HashMap::new();
let node_index: HashMap<&str, u64> = nodes
.iter()
.enumerate()
.map(|(i, n)| (n.as_str(), i as u64))
.collect();
for start in nodes {
if labels.contains_key(start.as_str()) {
continue;
}
let component_label = *node_index.get(start.as_str()).unwrap_or(&0);
let mut queue = std::collections::VecDeque::new();
queue.push_back(start.clone());
while let Some(node) = queue.pop_front() {
if labels.contains_key(node.as_str()) {
continue;
}
let _ = labels.insert(node.clone(), component_label);
for nbr in adj.get(&node).into_iter().flatten() {
if !labels.contains_key(nbr.as_str()) {
queue.push_back(nbr.clone());
}
}
}
}
labels
}
fn collect_nodes(edges: &[(String, String)]) -> Vec<String> {
let mut seen: std::collections::HashSet<&str> = std::collections::HashSet::new();
let mut nodes: Vec<String> = Vec::new();
for (a, b) in edges {
if seen.insert(a.as_str()) {
nodes.push(a.clone());
}
if seen.insert(b.as_str()) {
nodes.push(b.clone());
}
}
nodes
}
fn build_adjacency(edges: &[(String, String)]) -> HashMap<String, Vec<String>> {
let mut adj: HashMap<String, Vec<String>> = HashMap::new();
for (a, b) in edges {
adj.entry(a.clone()).or_default().push(b.clone());
adj.entry(b.clone()).or_default().push(a.clone());
}
adj
}
fn propagate_labels(
adj: &HashMap<String, Vec<String>>,
labels: &mut HashMap<String, u64>,
max_iters: usize,
) {
let nodes: Vec<String> = labels.keys().cloned().collect();
for _ in 0..max_iters {
let prev = labels.clone();
for node in &nodes {
if let Some(neighbours) = adj.get(node) {
let neighbour_labels: Vec<u64> = neighbours
.iter()
.filter_map(|n| labels.get(n))
.copied()
.collect();
if let Some(most_common) = most_frequent(&neighbour_labels) {
let _ = labels.insert(node.clone(), most_common);
}
}
}
if *labels == prev {
break;
}
}
}
fn propagate_labels_restricted(
adj: &HashMap<String, Vec<String>>,
labels: &mut HashMap<String, u64>,
restricted: &std::collections::HashSet<String>,
max_iters: usize,
) {
let nodes: Vec<String> = restricted.iter().cloned().collect();
for _ in 0..max_iters {
let prev = labels.clone();
for node in &nodes {
if let Some(neighbours) = adj.get(node) {
let neighbour_labels: Vec<u64> = neighbours
.iter()
.filter_map(|n| labels.get(n))
.copied()
.collect();
if let Some(most_common) = most_frequent(&neighbour_labels) {
let _ = labels.insert(node.clone(), most_common);
}
}
}
if *labels == prev {
break;
}
}
}
fn most_frequent(values: &[u64]) -> Option<u64> {
if values.is_empty() {
return None;
}
let mut counts: HashMap<u64, usize> = HashMap::new();
for &v in values {
*counts.entry(v).or_insert(0) += 1;
}
counts
.into_iter()
.max_by(|(v1, c1), (v2, c2)| c1.cmp(c2).then(v2.cmp(v1)))
.map(|(v, _)| v)
}
fn invert_labels(labels: HashMap<String, u64>) -> HashMap<CommunityId, Vec<String>> {
let mut communities: HashMap<CommunityId, Vec<String>> = HashMap::new();
for (node, label) in labels {
communities
.entry(CommunityId(label))
.or_default()
.push(node);
}
for members in communities.values_mut() {
members.sort_unstable();
}
communities
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
mod tests {
use super::*;
#[test]
fn strongly_connected_graph_forms_one_community() {
let edges = vec![
("a".to_owned(), "b".to_owned()),
("b".to_owned(), "c".to_owned()),
("a".to_owned(), "c".to_owned()),
];
let state = detect_communities(&edges);
assert_eq!(
state.communities.len(),
1,
"triangle must form one community"
);
let members = state.communities.values().next().unwrap();
assert_eq!(members.len(), 3);
}
#[test]
fn disconnected_graphs_form_separate_communities() {
let edges = vec![
("a".to_owned(), "b".to_owned()),
("b".to_owned(), "c".to_owned()),
("a".to_owned(), "c".to_owned()),
("x".to_owned(), "y".to_owned()),
("y".to_owned(), "z".to_owned()),
("x".to_owned(), "z".to_owned()),
];
let state = detect_communities(&edges);
assert_eq!(
state.communities.len(),
2,
"two cliques must form two communities"
);
for members in state.communities.values() {
assert_eq!(members.len(), 3);
}
}
#[test]
fn state_roundtrips_via_into_from_parts() {
let edges = vec![
("a".to_owned(), "b".to_owned()),
("b".to_owned(), "c".to_owned()),
];
let state = detect_communities(&edges);
let original_len = state.communities.len();
let original_members: std::collections::HashSet<String> =
state.communities.values().flatten().cloned().collect();
let parts = state.into_parts();
let restored = CommunityState::from_parts(parts);
assert_eq!(restored.communities.len(), original_len);
let restored_members: std::collections::HashSet<String> =
restored.communities.values().flatten().cloned().collect();
assert_eq!(restored_members, original_members);
}
#[test]
fn incremental_update_merges_edges() {
let initial_edges = vec![
("a".to_owned(), "b".to_owned()),
("b".to_owned(), "c".to_owned()),
("a".to_owned(), "c".to_owned()),
("x".to_owned(), "y".to_owned()),
("y".to_owned(), "z".to_owned()),
("x".to_owned(), "z".to_owned()),
];
let mut state = detect_communities(&initial_edges);
assert_eq!(
state.communities.len(),
2,
"should start as two communities"
);
let bridge = vec![("c".to_owned(), "x".to_owned())];
let _ = state.update(&bridge);
assert_eq!(
state.communities.len(),
1,
"bridge edge should merge into one community; got: {:?}",
state.communities
);
}
}