Skip to main content

oxirs_arq/analytics/
components.rs

1//! Connected-components analysis: weakly and strongly connected components
2
3use super::adapter::{NodeId, RdfGraphAdapter};
4use std::collections::VecDeque;
5
6/// Connected-component algorithms for RDF graphs.
7pub struct ConnectedComponents;
8
9impl ConnectedComponents {
10    /// Weakly connected components (edge direction ignored).
11    ///
12    /// Uses BFS on the undirected version of the graph.
13    pub fn weakly_connected(graph: &RdfGraphAdapter) -> Vec<Vec<NodeId>> {
14        let n = graph.node_count();
15        let mut visited = vec![false; n];
16        let mut components: Vec<Vec<NodeId>> = Vec::new();
17
18        for start in 0..n {
19            if visited[start] {
20                continue;
21            }
22            let mut component: Vec<NodeId> = Vec::new();
23            let mut queue: VecDeque<NodeId> = VecDeque::new();
24            queue.push_back(start);
25            visited[start] = true;
26
27            while let Some(u) = queue.pop_front() {
28                component.push(u);
29                // Follow both outgoing and incoming edges
30                for &(v, _) in &graph.adjacency[u] {
31                    if !visited[v] {
32                        visited[v] = true;
33                        queue.push_back(v);
34                    }
35                }
36                for &(v, _) in &graph.reverse_adjacency[u] {
37                    if !visited[v] {
38                        visited[v] = true;
39                        queue.push_back(v);
40                    }
41                }
42            }
43            components.push(component);
44        }
45        components
46    }
47
48    /// Strongly connected components via Tarjan's DFS algorithm.
49    ///
50    /// Returns a list of SCCs in reverse topological order.
51    pub fn strongly_connected(graph: &RdfGraphAdapter) -> Vec<Vec<NodeId>> {
52        let n = graph.node_count();
53        let mut index_counter = 0usize;
54        let mut stack: Vec<NodeId> = Vec::new();
55        let mut lowlink = vec![usize::MAX; n];
56        let mut index = vec![usize::MAX; n];
57        let mut on_stack = vec![false; n];
58        let mut sccs: Vec<Vec<NodeId>> = Vec::new();
59
60        for v in 0..n {
61            if index[v] == usize::MAX {
62                Self::tarjan_dfs(
63                    v,
64                    graph,
65                    &mut index_counter,
66                    &mut stack,
67                    &mut lowlink,
68                    &mut index,
69                    &mut on_stack,
70                    &mut sccs,
71                );
72            }
73        }
74        sccs
75    }
76
77    /// Iterative Tarjan DFS (avoids stack overflow on large graphs).
78    #[allow(clippy::too_many_arguments)]
79    fn tarjan_dfs(
80        root: NodeId,
81        graph: &RdfGraphAdapter,
82        index_counter: &mut usize,
83        stack: &mut Vec<NodeId>,
84        lowlink: &mut [usize],
85        index: &mut [usize],
86        on_stack: &mut [bool],
87        sccs: &mut Vec<Vec<NodeId>>,
88    ) {
89        // Iterative version using an explicit call stack.
90        // Each frame stores: (node, iterator position into adjacency list)
91        let mut call_stack: Vec<(NodeId, usize)> = Vec::new();
92
93        // Initialise root
94        index[root] = *index_counter;
95        lowlink[root] = *index_counter;
96        *index_counter += 1;
97        stack.push(root);
98        on_stack[root] = true;
99        call_stack.push((root, 0));
100
101        while let Some((v, edge_idx)) = call_stack.last_mut() {
102            let v = *v;
103            let adj = &graph.adjacency[v];
104
105            if *edge_idx < adj.len() {
106                let (w, _) = adj[*edge_idx];
107                *edge_idx += 1;
108
109                if index[w] == usize::MAX {
110                    // Tree edge – recurse
111                    index[w] = *index_counter;
112                    lowlink[w] = *index_counter;
113                    *index_counter += 1;
114                    stack.push(w);
115                    on_stack[w] = true;
116                    call_stack.push((w, 0));
117                } else if on_stack[w] {
118                    // Back edge
119                    let lv = lowlink[v];
120                    lowlink[v] = lv.min(index[w]);
121                }
122            } else {
123                // All neighbours processed – pop frame
124                call_stack.pop();
125
126                if let Some(&(parent, _)) = call_stack.last() {
127                    let lv = lowlink[parent];
128                    lowlink[parent] = lv.min(lowlink[v]);
129                }
130
131                // Check if v is a root of an SCC
132                if lowlink[v] == index[v] {
133                    let mut scc: Vec<NodeId> = Vec::new();
134                    loop {
135                        let w = stack.pop().unwrap_or(v);
136                        on_stack[w] = false;
137                        scc.push(w);
138                        if w == v {
139                            break;
140                        }
141                    }
142                    sccs.push(scc);
143                }
144            }
145        }
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    fn build_graph(edges: &[(&str, &str)]) -> RdfGraphAdapter {
154        let triples: Vec<(String, String, String)> = edges
155            .iter()
156            .map(|(s, o)| (s.to_string(), "ex:rel".to_string(), o.to_string()))
157            .collect();
158        RdfGraphAdapter::from_triples(&triples)
159    }
160
161    // ── Weakly connected ──────────────────────────────────────────────────
162
163    #[test]
164    fn test_wcc_single_component() {
165        let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C")]);
166        let comps = ConnectedComponents::weakly_connected(&g);
167        assert_eq!(comps.len(), 1);
168        assert_eq!(comps[0].len(), 3);
169    }
170
171    #[test]
172    fn test_wcc_two_disconnected() {
173        let g = build_graph(&[("ex:A", "ex:B"), ("ex:C", "ex:D")]);
174        let comps = ConnectedComponents::weakly_connected(&g);
175        assert_eq!(comps.len(), 2);
176    }
177
178    #[test]
179    fn test_wcc_empty_graph() {
180        let g = RdfGraphAdapter::from_triples(&[]);
181        let comps = ConnectedComponents::weakly_connected(&g);
182        assert!(comps.is_empty());
183    }
184
185    #[test]
186    fn test_wcc_all_nodes_covered() {
187        let g = build_graph(&[("ex:A", "ex:B"), ("ex:C", "ex:D"), ("ex:E", "ex:F")]);
188        let comps = ConnectedComponents::weakly_connected(&g);
189        let total: usize = comps.iter().map(|c| c.len()).sum();
190        assert_eq!(total, g.node_count());
191    }
192
193    #[test]
194    fn test_wcc_directed_treated_undirected() {
195        // A → B  and  C → B  are weakly connected via B
196        let g = build_graph(&[("ex:A", "ex:B"), ("ex:C", "ex:B")]);
197        let comps = ConnectedComponents::weakly_connected(&g);
198        assert_eq!(comps.len(), 1);
199    }
200
201    // ── Strongly connected ────────────────────────────────────────────────
202
203    #[test]
204    fn test_scc_simple_cycle() {
205        // A → B → C → A  forms one SCC
206        let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C"), ("ex:C", "ex:A")]);
207        let sccs = ConnectedComponents::strongly_connected(&g);
208        // One SCC of size 3
209        let big: Vec<&Vec<NodeId>> = sccs.iter().filter(|c| c.len() == 3).collect();
210        assert_eq!(big.len(), 1);
211    }
212
213    #[test]
214    fn test_scc_dag_all_trivial() {
215        // In a DAG every SCC is a single node
216        let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C")]);
217        let sccs = ConnectedComponents::strongly_connected(&g);
218        assert_eq!(sccs.len(), 3);
219        for scc in &sccs {
220            assert_eq!(scc.len(), 1);
221        }
222    }
223
224    #[test]
225    fn test_scc_two_cycles() {
226        // Cycle1: A→B→A   Cycle2: C→D→C
227        let g = build_graph(&[
228            ("ex:A", "ex:B"),
229            ("ex:B", "ex:A"),
230            ("ex:C", "ex:D"),
231            ("ex:D", "ex:C"),
232        ]);
233        let sccs = ConnectedComponents::strongly_connected(&g);
234        let size2: Vec<&Vec<NodeId>> = sccs.iter().filter(|c| c.len() == 2).collect();
235        assert_eq!(size2.len(), 2);
236    }
237
238    #[test]
239    fn test_scc_empty_graph() {
240        let g = RdfGraphAdapter::from_triples(&[]);
241        let sccs = ConnectedComponents::strongly_connected(&g);
242        assert!(sccs.is_empty());
243    }
244
245    #[test]
246    fn test_scc_all_nodes_covered() {
247        let g = build_graph(&[
248            ("ex:A", "ex:B"),
249            ("ex:B", "ex:C"),
250            ("ex:C", "ex:A"),
251            ("ex:D", "ex:E"),
252        ]);
253        let sccs = ConnectedComponents::strongly_connected(&g);
254        let total: usize = sccs.iter().map(|c| c.len()).sum();
255        assert_eq!(total, g.node_count());
256    }
257
258    #[test]
259    fn test_scc_cycle_with_tail() {
260        // A → B → C → B  (B–C cycle, A is a tail)
261        let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C"), ("ex:C", "ex:B")]);
262        let sccs = ConnectedComponents::strongly_connected(&g);
263        // B and C form one SCC of size 2; A is trivial
264        let large: Vec<&Vec<NodeId>> = sccs.iter().filter(|c| c.len() == 2).collect();
265        assert_eq!(large.len(), 1);
266    }
267}