Skip to main content

graphos_adapters/plugins/algorithms/
components.rs

1//! Graph component algorithms.
2//!
3//! This module provides algorithms for finding connected components,
4//! strongly connected components, and topological ordering.
5
6use graphos_common::types::{NodeId, Value};
7use graphos_common::utils::error::Result;
8use graphos_common::utils::hash::{FxHashMap, FxHashSet};
9use graphos_core::graph::Direction;
10use graphos_core::graph::lpg::LpgStore;
11
12use super::super::{AlgorithmResult, ParameterDef, Parameters};
13use super::traits::{ComponentResultBuilder, GraphAlgorithm};
14
15// ============================================================================
16// Union-Find Data Structure
17// ============================================================================
18
19/// Union-Find (Disjoint Set Union) with path compression and union by rank.
20///
21/// This is the optimal data structure for incremental connectivity queries,
22/// providing nearly O(1) amortized operations.
23pub struct UnionFind {
24    /// Parent pointers (self-loop means root)
25    parent: Vec<usize>,
26    /// Rank for union by rank
27    rank: Vec<usize>,
28}
29
30impl UnionFind {
31    /// Creates a new Union-Find with `n` elements, each in its own set.
32    pub fn new(n: usize) -> Self {
33        Self {
34            parent: (0..n).collect(),
35            rank: vec![0; n],
36        }
37    }
38
39    /// Finds the representative (root) of the set containing `x`.
40    ///
41    /// Uses path compression for amortized O(α(n)) complexity.
42    pub fn find(&mut self, x: usize) -> usize {
43        if self.parent[x] != x {
44            self.parent[x] = self.find(self.parent[x]); // Path compression
45        }
46        self.parent[x]
47    }
48
49    /// Unions the sets containing `x` and `y`.
50    ///
51    /// Returns `true` if they were in different sets (union performed).
52    pub fn union(&mut self, x: usize, y: usize) -> bool {
53        let root_x = self.find(x);
54        let root_y = self.find(y);
55
56        if root_x == root_y {
57            return false;
58        }
59
60        // Union by rank
61        match self.rank[root_x].cmp(&self.rank[root_y]) {
62            std::cmp::Ordering::Less => {
63                self.parent[root_x] = root_y;
64            }
65            std::cmp::Ordering::Greater => {
66                self.parent[root_y] = root_x;
67            }
68            std::cmp::Ordering::Equal => {
69                self.parent[root_y] = root_x;
70                self.rank[root_x] += 1;
71            }
72        }
73
74        true
75    }
76
77    /// Returns `true` if `x` and `y` are in the same set.
78    pub fn connected(&mut self, x: usize, y: usize) -> bool {
79        self.find(x) == self.find(y)
80    }
81}
82
83// ============================================================================
84// Connected Components (Undirected/Weakly Connected)
85// ============================================================================
86
87/// Finds connected components in an undirected graph (or weakly connected
88/// components in a directed graph).
89///
90/// Uses Union-Find for optimal performance.
91///
92/// # Returns
93///
94/// A map from node ID to component ID.
95pub fn connected_components(store: &LpgStore) -> FxHashMap<NodeId, u64> {
96    let node_ids = store.node_ids();
97    let n = node_ids.len();
98
99    if n == 0 {
100        return FxHashMap::default();
101    }
102
103    // Map NodeId -> index
104    let mut node_to_idx: FxHashMap<NodeId, usize> = FxHashMap::default();
105    for (idx, &node) in node_ids.iter().enumerate() {
106        node_to_idx.insert(node, idx);
107    }
108
109    let mut uf = UnionFind::new(n);
110
111    // Process all edges (treating graph as undirected)
112    for &node in &node_ids {
113        let idx = node_to_idx[&node];
114
115        // Outgoing edges
116        for (neighbor, _) in store.edges_from(node, Direction::Outgoing) {
117            if let Some(&neighbor_idx) = node_to_idx.get(&neighbor) {
118                uf.union(idx, neighbor_idx);
119            }
120        }
121
122        // Incoming edges (for weakly connected)
123        for (neighbor, _) in store.edges_from(node, Direction::Incoming) {
124            if let Some(&neighbor_idx) = node_to_idx.get(&neighbor) {
125                uf.union(idx, neighbor_idx);
126            }
127        }
128    }
129
130    // Build result: map each node to its component
131    let mut root_to_component: FxHashMap<usize, u64> = FxHashMap::default();
132    let mut next_component = 0u64;
133
134    let mut result: FxHashMap<NodeId, u64> = FxHashMap::default();
135    for (idx, &node) in node_ids.iter().enumerate() {
136        let root = uf.find(idx);
137        let component_id = *root_to_component.entry(root).or_insert_with(|| {
138            let id = next_component;
139            next_component += 1;
140            id
141        });
142        result.insert(node, component_id);
143    }
144
145    result
146}
147
148/// Returns the number of connected components.
149pub fn connected_component_count(store: &LpgStore) -> usize {
150    let components = connected_components(store);
151    let unique: FxHashSet<u64> = components.values().copied().collect();
152    unique.len()
153}
154
155// ============================================================================
156// Strongly Connected Components (Tarjan's Algorithm)
157// ============================================================================
158
159/// Tarjan's algorithm state for a single node.
160struct TarjanState {
161    index: usize,
162    low_link: usize,
163    on_stack: bool,
164}
165
166/// Finds strongly connected components in a directed graph using Tarjan's algorithm.
167///
168/// # Returns
169///
170/// A map from node ID to SCC ID.
171pub fn strongly_connected_components(store: &LpgStore) -> FxHashMap<NodeId, u64> {
172    let node_ids = store.node_ids();
173
174    if node_ids.is_empty() {
175        return FxHashMap::default();
176    }
177
178    // State for Tarjan's algorithm
179    let mut state: FxHashMap<NodeId, TarjanState> = FxHashMap::default();
180    let mut stack: Vec<NodeId> = Vec::new();
181    let mut index = 0usize;
182    let mut scc_id = 0u64;
183    let mut result: FxHashMap<NodeId, u64> = FxHashMap::default();
184
185    // We need to visit all nodes, starting DFS from unvisited ones
186    for &start in &node_ids {
187        if state.contains_key(&start) {
188            continue;
189        }
190
191        // Iterative Tarjan's using explicit stack
192        // Each entry: (node, neighbor_iter_state, is_first_visit)
193        let mut dfs_stack: Vec<(NodeId, Vec<NodeId>, usize, bool)> = Vec::new();
194
195        // Initialize start node
196        state.insert(
197            start,
198            TarjanState {
199                index,
200                low_link: index,
201                on_stack: true,
202            },
203        );
204        index += 1;
205        stack.push(start);
206
207        let neighbors: Vec<NodeId> = store
208            .edges_from(start, Direction::Outgoing)
209            .map(|(n, _)| n)
210            .collect();
211        dfs_stack.push((start, neighbors, 0, true));
212
213        while let Some((node, neighbors, neighbor_idx, _first_visit)) = dfs_stack.last_mut() {
214            let node = *node;
215
216            if *neighbor_idx >= neighbors.len() {
217                // Done with all neighbors
218                dfs_stack.pop();
219
220                // Check if this is an SCC root
221                let node_state = state.get(&node).unwrap();
222                if node_state.low_link == node_state.index {
223                    // Pop the SCC from stack
224                    loop {
225                        let w = stack.pop().unwrap();
226                        state.get_mut(&w).unwrap().on_stack = false;
227                        result.insert(w, scc_id);
228                        if w == node {
229                            break;
230                        }
231                    }
232                    scc_id += 1;
233                }
234
235                // Update parent's low_link
236                if let Some((parent, _, _, _)) = dfs_stack.last() {
237                    let node_low = state.get(&node).unwrap().low_link;
238                    let parent_state = state.get_mut(parent).unwrap();
239                    if node_low < parent_state.low_link {
240                        parent_state.low_link = node_low;
241                    }
242                }
243
244                continue;
245            }
246
247            let neighbor = neighbors[*neighbor_idx];
248            *neighbor_idx += 1;
249
250            if let Some(neighbor_state) = state.get(&neighbor) {
251                // Already visited
252                if neighbor_state.on_stack {
253                    // Back edge: update low_link
254                    // Extract index before mutable borrow
255                    let neighbor_index = neighbor_state.index;
256                    let node_state = state.get_mut(&node).unwrap();
257                    if neighbor_index < node_state.low_link {
258                        node_state.low_link = neighbor_index;
259                    }
260                }
261            } else {
262                // Unvisited: recurse
263                state.insert(
264                    neighbor,
265                    TarjanState {
266                        index,
267                        low_link: index,
268                        on_stack: true,
269                    },
270                );
271                index += 1;
272                stack.push(neighbor);
273
274                let neighbor_neighbors: Vec<NodeId> = store
275                    .edges_from(neighbor, Direction::Outgoing)
276                    .map(|(n, _)| n)
277                    .collect();
278                dfs_stack.push((neighbor, neighbor_neighbors, 0, true));
279            }
280        }
281    }
282
283    result
284}
285
286/// Returns the number of strongly connected components.
287pub fn strongly_connected_component_count(store: &LpgStore) -> usize {
288    let components = strongly_connected_components(store);
289    let unique: FxHashSet<u64> = components.values().copied().collect();
290    unique.len()
291}
292
293// ============================================================================
294// Topological Sort (Kahn's Algorithm)
295// ============================================================================
296
297/// Performs topological sort on a directed acyclic graph using Kahn's algorithm.
298///
299/// # Returns
300///
301/// `Some(order)` if the graph is a DAG, `None` if there's a cycle.
302pub fn topological_sort(store: &LpgStore) -> Option<Vec<NodeId>> {
303    let node_ids = store.node_ids();
304
305    if node_ids.is_empty() {
306        return Some(Vec::new());
307    }
308
309    // Compute in-degrees
310    let mut in_degree: FxHashMap<NodeId, usize> = FxHashMap::default();
311    for &node in &node_ids {
312        in_degree.entry(node).or_insert(0);
313    }
314
315    for &node in &node_ids {
316        for (neighbor, _) in store.edges_from(node, Direction::Outgoing) {
317            *in_degree.entry(neighbor).or_default() += 1;
318        }
319    }
320
321    // Initialize queue with nodes having in-degree 0
322    let mut queue: Vec<NodeId> = in_degree
323        .iter()
324        .filter(|&(_, deg)| *deg == 0)
325        .map(|(&node, _)| node)
326        .collect();
327
328    let mut result = Vec::with_capacity(node_ids.len());
329
330    while let Some(node) = queue.pop() {
331        result.push(node);
332
333        for (neighbor, _) in store.edges_from(node, Direction::Outgoing) {
334            if let Some(deg) = in_degree.get_mut(&neighbor) {
335                *deg -= 1;
336                if *deg == 0 {
337                    queue.push(neighbor);
338                }
339            }
340        }
341    }
342
343    // Check for cycle
344    if result.len() == node_ids.len() {
345        Some(result)
346    } else {
347        None // Cycle detected
348    }
349}
350
351/// Checks if the graph is a DAG (Directed Acyclic Graph).
352pub fn is_dag(store: &LpgStore) -> bool {
353    topological_sort(store).is_some()
354}
355
356// ============================================================================
357// Algorithm Wrappers for Plugin Registry
358// ============================================================================
359
360/// Connected components algorithm wrapper.
361pub struct ConnectedComponentsAlgorithm;
362
363impl GraphAlgorithm for ConnectedComponentsAlgorithm {
364    fn name(&self) -> &str {
365        "connected_components"
366    }
367
368    fn description(&self) -> &str {
369        "Find connected components (undirected) or weakly connected components (directed)"
370    }
371
372    fn parameters(&self) -> &[ParameterDef] {
373        &[]
374    }
375
376    fn execute(&self, store: &LpgStore, _params: &Parameters) -> Result<AlgorithmResult> {
377        let components = connected_components(store);
378
379        let mut builder = ComponentResultBuilder::with_capacity(components.len());
380        for (node, component) in components {
381            builder.push(node, component);
382        }
383
384        Ok(builder.build())
385    }
386}
387
388/// Strongly connected components algorithm wrapper.
389pub struct StronglyConnectedComponentsAlgorithm;
390
391impl GraphAlgorithm for StronglyConnectedComponentsAlgorithm {
392    fn name(&self) -> &str {
393        "strongly_connected_components"
394    }
395
396    fn description(&self) -> &str {
397        "Find strongly connected components using Tarjan's algorithm"
398    }
399
400    fn parameters(&self) -> &[ParameterDef] {
401        &[]
402    }
403
404    fn execute(&self, store: &LpgStore, _params: &Parameters) -> Result<AlgorithmResult> {
405        let components = strongly_connected_components(store);
406
407        let mut builder = ComponentResultBuilder::with_capacity(components.len());
408        for (node, component) in components {
409            builder.push(node, component);
410        }
411
412        Ok(builder.build())
413    }
414}
415
416/// Topological sort algorithm wrapper.
417pub struct TopologicalSortAlgorithm;
418
419impl GraphAlgorithm for TopologicalSortAlgorithm {
420    fn name(&self) -> &str {
421        "topological_sort"
422    }
423
424    fn description(&self) -> &str {
425        "Topological ordering of a DAG using Kahn's algorithm"
426    }
427
428    fn parameters(&self) -> &[ParameterDef] {
429        &[]
430    }
431
432    fn execute(&self, store: &LpgStore, _params: &Parameters) -> Result<AlgorithmResult> {
433        match topological_sort(store) {
434            Some(order) => {
435                let mut result =
436                    AlgorithmResult::new(vec!["node_id".to_string(), "order".to_string()]);
437                for (idx, node) in order.iter().enumerate() {
438                    result.add_row(vec![Value::Int64(node.0 as i64), Value::Int64(idx as i64)]);
439                }
440                Ok(result)
441            }
442            None => {
443                // Return empty result with error indication for cycles
444                let mut result = AlgorithmResult::new(vec!["error".to_string()]);
445                result.add_row(vec![Value::String("Graph contains a cycle".into())]);
446                Ok(result)
447            }
448        }
449    }
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455
456    fn create_dag() -> LpgStore {
457        let store = LpgStore::new();
458
459        // Create a DAG:
460        //   0 -> 1 -> 3
461        //   |    |
462        //   v    v
463        //   2 -> 4
464        let n0 = store.create_node(&["Node"]);
465        let n1 = store.create_node(&["Node"]);
466        let n2 = store.create_node(&["Node"]);
467        let n3 = store.create_node(&["Node"]);
468        let n4 = store.create_node(&["Node"]);
469
470        store.create_edge(n0, n1, "EDGE");
471        store.create_edge(n0, n2, "EDGE");
472        store.create_edge(n1, n3, "EDGE");
473        store.create_edge(n1, n4, "EDGE");
474        store.create_edge(n2, n4, "EDGE");
475
476        store
477    }
478
479    fn create_cyclic_graph() -> LpgStore {
480        let store = LpgStore::new();
481
482        // Create a cycle: 0 -> 1 -> 2 -> 0
483        let n0 = store.create_node(&["Node"]);
484        let n1 = store.create_node(&["Node"]);
485        let n2 = store.create_node(&["Node"]);
486
487        store.create_edge(n0, n1, "EDGE");
488        store.create_edge(n1, n2, "EDGE");
489        store.create_edge(n2, n0, "EDGE");
490
491        store
492    }
493
494    fn create_disconnected_graph() -> LpgStore {
495        let store = LpgStore::new();
496
497        // Two disconnected components: {0, 1} and {2, 3}
498        let n0 = store.create_node(&["Node"]);
499        let n1 = store.create_node(&["Node"]);
500        let n2 = store.create_node(&["Node"]);
501        let n3 = store.create_node(&["Node"]);
502
503        store.create_edge(n0, n1, "EDGE");
504        store.create_edge(n2, n3, "EDGE");
505
506        store
507    }
508
509    #[test]
510    fn test_union_find() {
511        let mut uf = UnionFind::new(5);
512
513        assert!(!uf.connected(0, 1));
514        uf.union(0, 1);
515        assert!(uf.connected(0, 1));
516
517        uf.union(2, 3);
518        assert!(uf.connected(2, 3));
519        assert!(!uf.connected(0, 2));
520
521        uf.union(1, 2);
522        assert!(uf.connected(0, 3));
523    }
524
525    #[test]
526    fn test_connected_components_single() {
527        let store = create_dag();
528        let count = connected_component_count(&store);
529        assert_eq!(count, 1);
530    }
531
532    #[test]
533    fn test_connected_components_disconnected() {
534        let store = create_disconnected_graph();
535        let count = connected_component_count(&store);
536        assert_eq!(count, 2);
537    }
538
539    #[test]
540    fn test_scc_dag() {
541        let store = create_dag();
542        // In a DAG, each node is its own SCC
543        let count = strongly_connected_component_count(&store);
544        assert_eq!(count, 5);
545    }
546
547    #[test]
548    fn test_scc_cycle() {
549        let store = create_cyclic_graph();
550        // All nodes in a single cycle form one SCC
551        let count = strongly_connected_component_count(&store);
552        assert_eq!(count, 1);
553    }
554
555    #[test]
556    fn test_topological_sort_dag() {
557        let store = create_dag();
558        let order = topological_sort(&store);
559        assert!(order.is_some());
560
561        let order = order.unwrap();
562        assert_eq!(order.len(), 5);
563
564        // Verify topological property: if there's an edge u -> v, u comes before v
565        let position: FxHashMap<NodeId, usize> =
566            order.iter().enumerate().map(|(i, &n)| (n, i)).collect();
567
568        for &node in &order {
569            for (neighbor, _) in store.edges_from(node, Direction::Outgoing) {
570                assert!(position[&node] < position[&neighbor]);
571            }
572        }
573    }
574
575    #[test]
576    fn test_topological_sort_cycle() {
577        let store = create_cyclic_graph();
578        let order = topological_sort(&store);
579        assert!(order.is_none()); // Cycle detected
580    }
581
582    #[test]
583    fn test_is_dag() {
584        let dag = create_dag();
585        assert!(is_dag(&dag));
586
587        let cyclic = create_cyclic_graph();
588        assert!(!is_dag(&cyclic));
589    }
590}