use std::collections::HashMap;
use crate::graph::Graph;
use crate::types::{ulid_encode, DbError, Value};
use super::{opt_usize, Direction, GraphSnapshot, Row};
pub fn run_wcc(graph: &Graph, params: &HashMap<String, Value>) -> Result<Vec<Row>, DbError> {
let min_size = opt_usize(params, "minSize", 1)?;
if min_size == 0 {
return Err(DbError::Query("parameter 'minSize' must be at least 1".into()));
}
let snap = GraphSnapshot::build(graph, None);
let n = snap.n;
if n == 0 {
return Ok(vec![]);
}
let mut parent: Vec<usize> = (0..n).collect();
let mut rank: Vec<usize> = vec![0; n];
fn find(parent: &mut Vec<usize>, x: usize) -> usize {
if parent[x] != x {
parent[x] = find(parent, parent[x]); }
parent[x]
}
fn union(parent: &mut Vec<usize>, rank: &mut Vec<usize>, x: usize, y: usize) {
let rx = find(parent, x);
let ry = find(parent, y);
if rx == ry {
return; }
match rank[rx].cmp(&rank[ry]) {
std::cmp::Ordering::Less => parent[rx] = ry,
std::cmp::Ordering::Greater => parent[ry] = rx,
std::cmp::Ordering::Equal => {
parent[ry] = rx;
rank[rx] += 1; }
}
}
for i in 0..n {
for (j, _) in snap.neighbors(i, Direction::Any) {
union(&mut parent, &mut rank, i, j);
}
}
for i in 0..n {
let _ = find(&mut parent, i);
}
build_component_rows(&snap, &parent, min_size)
}
pub fn run_scc(graph: &Graph, params: &HashMap<String, Value>) -> Result<Vec<Row>, DbError> {
let min_size = opt_usize(params, "minSize", 1)?;
if min_size == 0 {
return Err(DbError::Query("parameter 'minSize' must be at least 1".into()));
}
let snap = GraphSnapshot::build(graph, None);
let n = snap.n;
if n == 0 {
return Ok(vec![]);
}
let mut visited = vec![false; n];
let mut finish_order: Vec<usize> = Vec::with_capacity(n);
for start in 0..n {
if !visited[start] {
dfs_finish(&snap, start, &mut visited, &mut finish_order, Direction::Out);
}
}
let mut component: Vec<usize> = vec![usize::MAX; n];
let mut comp_id = 0;
for &start in finish_order.iter().rev() {
if component[start] == usize::MAX {
assign_component(&snap, start, comp_id, &mut component, Direction::In);
comp_id += 1;
}
}
build_component_rows(&snap, &component, min_size)
}
fn dfs_finish(
snap: &GraphSnapshot,
start: usize,
visited: &mut Vec<bool>,
finish: &mut Vec<usize>,
dir: Direction,
) {
let mut stack: Vec<(usize, usize)> = vec![(start, 0)];
visited[start] = true;
let mut nbrs_cache: Vec<Vec<usize>> = vec![vec![]; snap.n];
nbrs_cache[start] = snap.unique_neighbor_indices(start, dir);
while let Some((node, cursor)) = stack.last_mut() {
let node = *node;
if *cursor < nbrs_cache[node].len() {
let nbr = nbrs_cache[node][*cursor];
*cursor += 1;
if !visited[nbr] {
visited[nbr] = true;
nbrs_cache[nbr] = snap.unique_neighbor_indices(nbr, dir);
stack.push((nbr, 0));
}
} else {
finish.push(node);
stack.pop();
}
}
}
fn assign_component(
snap: &GraphSnapshot,
start: usize,
comp_id: usize,
component: &mut Vec<usize>,
dir: Direction,
) {
let mut stack = vec![start];
component[start] = comp_id;
while let Some(node) = stack.pop() {
for nbr in snap.unique_neighbor_indices(node, dir) {
if component[nbr] == usize::MAX {
component[nbr] = comp_id;
stack.push(nbr);
}
}
}
}
fn build_component_rows(
snap: &GraphSnapshot,
component: &[usize],
min_size: usize,
) -> Result<Vec<Row>, DbError> {
let n = snap.n;
let mut size_map: HashMap<usize, usize> = HashMap::new();
for &c in component.iter() {
*size_map.entry(c).or_insert(0) += 1;
}
let mut rep_map: HashMap<usize, usize> = HashMap::new();
for (i, &c) in component.iter().enumerate() {
rep_map.entry(c).or_insert(i);
}
let mut rows = Vec::new();
for i in 0..n {
let c = component[i];
let sz = size_map[&c];
if sz < min_size {
continue;
}
let rep_idx = rep_map[&c];
let mut row = HashMap::new();
row.insert(
"node".to_string(),
Value::String(ulid_encode(snap.node_ids[i].0)),
);
row.insert(
"component".to_string(),
Value::String(ulid_encode(snap.node_ids[rep_idx].0)),
);
row.insert("size".to_string(), Value::Int(sz as i64));
rows.push(row);
}
Ok(rows)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::Graph;
use crate::types::{Edge, Node, NodeId, Value};
fn make_node(g: &mut Graph) -> NodeId {
let id = g.alloc_node_id();
g.apply_insert_node(Node {
id,
labels: vec!["N".into()],
properties: Default::default(),
});
id
}
fn make_directed_edge(g: &mut Graph, from: NodeId, to: NodeId) {
let id = g.alloc_edge_id();
g.apply_insert_edge(Edge {
id,
from_node: from,
to_node: to,
label: "E".into(),
properties: Default::default(),
directed: true,
});
}
#[test]
fn wcc_single_component() {
let mut g = Graph::new();
let a = make_node(&mut g);
let b = make_node(&mut g);
let c = make_node(&mut g);
make_directed_edge(&mut g, a, b);
make_directed_edge(&mut g, b, c);
let params = HashMap::new();
let rows = run_wcc(&g, ¶ms).unwrap();
assert_eq!(rows.len(), 3);
let comps: Vec<&Value> = rows.iter().map(|r| &r["component"]).collect();
assert!(comps.iter().all(|c| *c == comps[0]));
}
#[test]
fn wcc_two_disconnected_components() {
let mut g = Graph::new();
let a = make_node(&mut g);
let b = make_node(&mut g);
let c = make_node(&mut g);
let d = make_node(&mut g);
make_directed_edge(&mut g, a, b); make_directed_edge(&mut g, c, d);
let params = HashMap::new();
let rows = run_wcc(&g, ¶ms).unwrap();
let comp_set: std::collections::HashSet<String> = rows
.iter()
.map(|r| match &r["component"] {
Value::String(s) => s.clone(),
other => panic!("{other:?}"),
})
.collect();
assert_eq!(comp_set.len(), 2);
}
#[test]
fn wcc_min_size_filters_small_components() {
let mut g = Graph::new();
let a = make_node(&mut g);
let b = make_node(&mut g);
let lone = make_node(&mut g); make_directed_edge(&mut g, a, b);
let params: HashMap<String, Value> =
[("minSize".to_string(), Value::Int(2))].into_iter().collect();
let rows = run_wcc(&g, ¶ms).unwrap();
assert_eq!(rows.len(), 2);
}
#[test]
fn wcc_empty_graph() {
let g = Graph::new();
let rows = run_wcc(&g, &HashMap::new()).unwrap();
assert!(rows.is_empty());
}
#[test]
fn scc_cycle_is_single_component() {
let mut g = Graph::new();
let a = make_node(&mut g);
let b = make_node(&mut g);
let c = make_node(&mut g);
make_directed_edge(&mut g, a, b);
make_directed_edge(&mut g, b, c);
make_directed_edge(&mut g, c, a);
let params = HashMap::new();
let rows = run_scc(&g, ¶ms).unwrap();
assert_eq!(rows.len(), 3);
let comps: Vec<_> = rows.iter().map(|r| r["component"].clone()).collect();
assert!(comps.iter().all(|c| *c == comps[0]));
}
#[test]
fn scc_dag_all_singletons() {
let mut g = Graph::new();
let a = make_node(&mut g);
let b = make_node(&mut g);
let c = make_node(&mut g);
make_directed_edge(&mut g, a, b);
make_directed_edge(&mut g, b, c);
let params = HashMap::new();
let rows = run_scc(&g, ¶ms).unwrap();
assert_eq!(rows.len(), 3);
let comp_set: std::collections::HashSet<String> = rows
.iter()
.map(|r| match &r["component"] {
Value::String(s) => s.clone(),
other => panic!("{other:?}"),
})
.collect();
assert_eq!(comp_set.len(), 3); }
#[test]
fn scc_min_size_two_filters_singletons() {
let mut g = Graph::new();
let a = make_node(&mut g);
let b = make_node(&mut g);
let c = make_node(&mut g);
let _d = make_node(&mut g);
make_directed_edge(&mut g, a, b);
make_directed_edge(&mut g, b, c);
make_directed_edge(&mut g, c, a);
let params: HashMap<String, Value> =
[("minSize".to_string(), Value::Int(2))].into_iter().collect();
let rows = run_scc(&g, ¶ms).unwrap();
assert_eq!(rows.len(), 3); }
#[test]
fn wcc_min_size_zero_errors() {
let g = Graph::new();
let params: HashMap<String, Value> =
[("minSize".to_string(), Value::Int(0))].into_iter().collect();
assert!(run_wcc(&g, ¶ms).is_err());
}
}