use super::adapter::{NodeId, RdfGraphAdapter};
use std::collections::VecDeque;
pub struct ConnectedComponents;
impl ConnectedComponents {
pub fn weakly_connected(graph: &RdfGraphAdapter) -> Vec<Vec<NodeId>> {
let n = graph.node_count();
let mut visited = vec![false; n];
let mut components: Vec<Vec<NodeId>> = Vec::new();
for start in 0..n {
if visited[start] {
continue;
}
let mut component: Vec<NodeId> = Vec::new();
let mut queue: VecDeque<NodeId> = VecDeque::new();
queue.push_back(start);
visited[start] = true;
while let Some(u) = queue.pop_front() {
component.push(u);
for &(v, _) in &graph.adjacency[u] {
if !visited[v] {
visited[v] = true;
queue.push_back(v);
}
}
for &(v, _) in &graph.reverse_adjacency[u] {
if !visited[v] {
visited[v] = true;
queue.push_back(v);
}
}
}
components.push(component);
}
components
}
pub fn strongly_connected(graph: &RdfGraphAdapter) -> Vec<Vec<NodeId>> {
let n = graph.node_count();
let mut index_counter = 0usize;
let mut stack: Vec<NodeId> = Vec::new();
let mut lowlink = vec![usize::MAX; n];
let mut index = vec![usize::MAX; n];
let mut on_stack = vec![false; n];
let mut sccs: Vec<Vec<NodeId>> = Vec::new();
for v in 0..n {
if index[v] == usize::MAX {
Self::tarjan_dfs(
v,
graph,
&mut index_counter,
&mut stack,
&mut lowlink,
&mut index,
&mut on_stack,
&mut sccs,
);
}
}
sccs
}
#[allow(clippy::too_many_arguments)]
fn tarjan_dfs(
root: NodeId,
graph: &RdfGraphAdapter,
index_counter: &mut usize,
stack: &mut Vec<NodeId>,
lowlink: &mut [usize],
index: &mut [usize],
on_stack: &mut [bool],
sccs: &mut Vec<Vec<NodeId>>,
) {
let mut call_stack: Vec<(NodeId, usize)> = Vec::new();
index[root] = *index_counter;
lowlink[root] = *index_counter;
*index_counter += 1;
stack.push(root);
on_stack[root] = true;
call_stack.push((root, 0));
while let Some((v, edge_idx)) = call_stack.last_mut() {
let v = *v;
let adj = &graph.adjacency[v];
if *edge_idx < adj.len() {
let (w, _) = adj[*edge_idx];
*edge_idx += 1;
if index[w] == usize::MAX {
index[w] = *index_counter;
lowlink[w] = *index_counter;
*index_counter += 1;
stack.push(w);
on_stack[w] = true;
call_stack.push((w, 0));
} else if on_stack[w] {
let lv = lowlink[v];
lowlink[v] = lv.min(index[w]);
}
} else {
call_stack.pop();
if let Some(&(parent, _)) = call_stack.last() {
let lv = lowlink[parent];
lowlink[parent] = lv.min(lowlink[v]);
}
if lowlink[v] == index[v] {
let mut scc: Vec<NodeId> = Vec::new();
loop {
let w = stack.pop().unwrap_or(v);
on_stack[w] = false;
scc.push(w);
if w == v {
break;
}
}
sccs.push(scc);
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn build_graph(edges: &[(&str, &str)]) -> RdfGraphAdapter {
let triples: Vec<(String, String, String)> = edges
.iter()
.map(|(s, o)| (s.to_string(), "ex:rel".to_string(), o.to_string()))
.collect();
RdfGraphAdapter::from_triples(&triples)
}
#[test]
fn test_wcc_single_component() {
let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C")]);
let comps = ConnectedComponents::weakly_connected(&g);
assert_eq!(comps.len(), 1);
assert_eq!(comps[0].len(), 3);
}
#[test]
fn test_wcc_two_disconnected() {
let g = build_graph(&[("ex:A", "ex:B"), ("ex:C", "ex:D")]);
let comps = ConnectedComponents::weakly_connected(&g);
assert_eq!(comps.len(), 2);
}
#[test]
fn test_wcc_empty_graph() {
let g = RdfGraphAdapter::from_triples(&[]);
let comps = ConnectedComponents::weakly_connected(&g);
assert!(comps.is_empty());
}
#[test]
fn test_wcc_all_nodes_covered() {
let g = build_graph(&[("ex:A", "ex:B"), ("ex:C", "ex:D"), ("ex:E", "ex:F")]);
let comps = ConnectedComponents::weakly_connected(&g);
let total: usize = comps.iter().map(|c| c.len()).sum();
assert_eq!(total, g.node_count());
}
#[test]
fn test_wcc_directed_treated_undirected() {
let g = build_graph(&[("ex:A", "ex:B"), ("ex:C", "ex:B")]);
let comps = ConnectedComponents::weakly_connected(&g);
assert_eq!(comps.len(), 1);
}
#[test]
fn test_scc_simple_cycle() {
let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C"), ("ex:C", "ex:A")]);
let sccs = ConnectedComponents::strongly_connected(&g);
let big: Vec<&Vec<NodeId>> = sccs.iter().filter(|c| c.len() == 3).collect();
assert_eq!(big.len(), 1);
}
#[test]
fn test_scc_dag_all_trivial() {
let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C")]);
let sccs = ConnectedComponents::strongly_connected(&g);
assert_eq!(sccs.len(), 3);
for scc in &sccs {
assert_eq!(scc.len(), 1);
}
}
#[test]
fn test_scc_two_cycles() {
let g = build_graph(&[
("ex:A", "ex:B"),
("ex:B", "ex:A"),
("ex:C", "ex:D"),
("ex:D", "ex:C"),
]);
let sccs = ConnectedComponents::strongly_connected(&g);
let size2: Vec<&Vec<NodeId>> = sccs.iter().filter(|c| c.len() == 2).collect();
assert_eq!(size2.len(), 2);
}
#[test]
fn test_scc_empty_graph() {
let g = RdfGraphAdapter::from_triples(&[]);
let sccs = ConnectedComponents::strongly_connected(&g);
assert!(sccs.is_empty());
}
#[test]
fn test_scc_all_nodes_covered() {
let g = build_graph(&[
("ex:A", "ex:B"),
("ex:B", "ex:C"),
("ex:C", "ex:A"),
("ex:D", "ex:E"),
]);
let sccs = ConnectedComponents::strongly_connected(&g);
let total: usize = sccs.iter().map(|c| c.len()).sum();
assert_eq!(total, g.node_count());
}
#[test]
fn test_scc_cycle_with_tail() {
let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C"), ("ex:C", "ex:B")]);
let sccs = ConnectedComponents::strongly_connected(&g);
let large: Vec<&Vec<NodeId>> = sccs.iter().filter(|c| c.len() == 2).collect();
assert_eq!(large.len(), 1);
}
}