use crate::KnowledgeGraph;
use petgraph::algo::tarjan_scc;
use petgraph::visit::{EdgeRef, IntoNodeIdentifiers, NodeIndexable};
use std::cmp::Ordering;
use std::collections::HashMap;
fn uf_find(parent: &mut [usize], i: usize) -> usize {
if parent[i] != i {
parent[i] = uf_find(parent, parent[i]); }
parent[i]
}
fn uf_union(parent: &mut [usize], rank: &mut [usize], x: usize, y: usize) {
let px = uf_find(parent, x);
let py = uf_find(parent, y);
if px == py {
return;
}
match rank[px].cmp(&rank[py]) {
Ordering::Less => parent[px] = py,
Ordering::Greater => parent[py] = px,
Ordering::Equal => {
parent[py] = px;
rank[px] += 1;
}
}
}
#[must_use]
pub fn strongly_connected_components(kg: &KnowledgeGraph) -> Vec<Vec<String>> {
let graph = kg.as_petgraph();
let sccs = tarjan_scc(graph);
sccs.into_iter()
.map(|component| {
component
.into_iter()
.map(|idx| graph[idx].id.as_str().to_owned())
.collect()
})
.collect()
}
#[must_use]
pub fn weakly_connected_components(kg: &KnowledgeGraph) -> Vec<Vec<String>> {
let graph = kg.as_petgraph();
let n = graph.node_count();
if n == 0 {
return vec![];
}
let mut parent: Vec<usize> = (0..n).collect();
let mut rank: Vec<usize> = vec![0; n];
for edge in graph.edge_references() {
let src = graph.to_index(edge.source());
let dst = graph.to_index(edge.target());
uf_union(&mut parent, &mut rank, src, dst);
}
let mut components: HashMap<usize, Vec<String>> = HashMap::new();
for node in graph.node_identifiers() {
let idx = graph.to_index(node);
let root = uf_find(&mut parent, idx);
components
.entry(root)
.or_default()
.push(graph[node].id.as_str().to_owned());
}
components.into_values().collect()
}
#[must_use]
pub fn connected_components(kg: &KnowledgeGraph) -> Vec<Vec<String>> {
weakly_connected_components(kg)
}
#[derive(Debug, Clone)]
pub struct ComponentStats {
pub num_components: usize,
pub max_component_size: usize,
pub min_component_size: usize,
pub avg_component_size: f64,
pub largest_component_fraction: f64,
}
#[must_use]
#[allow(clippy::cast_precision_loss)]
pub fn component_stats(components: &[Vec<String>]) -> ComponentStats {
if components.is_empty() {
return ComponentStats {
num_components: 0,
max_component_size: 0,
min_component_size: 0,
avg_component_size: 0.0,
largest_component_fraction: 0.0,
};
}
let sizes: Vec<usize> = components.iter().map(Vec::len).collect();
let total: usize = sizes.iter().sum();
let max_size = sizes.iter().copied().max().unwrap_or(0);
let min_size = sizes.iter().copied().min().unwrap_or(0);
ComponentStats {
num_components: components.len(),
max_component_size: max_size,
min_component_size: min_size,
avg_component_size: total as f64 / components.len() as f64,
largest_component_fraction: if total > 0 {
max_size as f64 / total as f64
} else {
0.0
},
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Triple;
#[test]
fn test_wcc_chain() {
let mut kg = KnowledgeGraph::new();
kg.add_triple(Triple::new("A", "rel", "B"));
kg.add_triple(Triple::new("B", "rel", "C"));
let wcc = weakly_connected_components(&kg);
assert_eq!(wcc.len(), 1, "Chain should be 1 WCC");
assert_eq!(wcc[0].len(), 3);
}
#[test]
fn test_scc_chain() {
let mut kg = KnowledgeGraph::new();
kg.add_triple(Triple::new("A", "rel", "B"));
kg.add_triple(Triple::new("B", "rel", "C"));
let scc = strongly_connected_components(&kg);
assert_eq!(scc.len(), 3, "Chain should be 3 SCCs (one per node)");
}
#[test]
fn test_scc_cycle() {
let mut kg = KnowledgeGraph::new();
kg.add_triple(Triple::new("A", "rel", "B"));
kg.add_triple(Triple::new("B", "rel", "C"));
kg.add_triple(Triple::new("C", "rel", "A"));
let scc = strongly_connected_components(&kg);
assert_eq!(scc.len(), 1, "Cycle should be 1 SCC");
assert_eq!(scc[0].len(), 3);
}
#[test]
fn test_disconnected_wcc() {
let mut kg = KnowledgeGraph::new();
kg.add_triple(Triple::new("A", "rel", "B"));
kg.add_triple(Triple::new("X", "rel", "Y"));
let wcc = weakly_connected_components(&kg);
assert_eq!(wcc.len(), 2, "Should have 2 WCCs");
}
#[test]
fn test_component_stats() {
let components = vec![vec!["A".into(), "B".into(), "C".into()], vec!["X".into()]];
let stats = component_stats(&components);
assert_eq!(stats.num_components, 2);
assert_eq!(stats.max_component_size, 3);
assert_eq!(stats.min_component_size, 1);
assert!((stats.avg_component_size - 2.0).abs() < 1e-6);
assert!((stats.largest_component_fraction - 0.75).abs() < 1e-6);
}
}