webgraph_algo/sccs/
tarjan.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Matteo Dell'Acqua
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use super::Sccs;
9use dsi_progress_logger::ProgressLog;
10use std::ops::ControlFlow::{Break, Continue};
11use sux::bits::BitVec;
12use sux::traits::BitVecOpsMut;
13use webgraph::traits::RandomAccessGraph;
14use webgraph::visits::{Sequential, StoppedWhenDone, depth_first::*};
15
16/// Tarjan's algorithm for strongly connected components.
17pub fn tarjan(graph: impl RandomAccessGraph, pl: &mut impl ProgressLog) -> Sccs {
18    let num_nodes = graph.num_nodes();
19    pl.item_name("node");
20    pl.expected_updates(Some(num_nodes));
21    pl.start("Computing strongly connected components...");
22
23    let mut visit = SeqPred::new(&graph);
24    let mut lead = BitVec::with_capacity(128);
25    // Sentinel value guaranteeing that this stack is never empty
26    lead.push(true);
27    let mut component_stack = Vec::with_capacity(16);
28    let mut high_link = vec![0; num_nodes].into_boxed_slice();
29    // Node timestamps will start at num_nodes and will decrease with time,
30    // that is, they will go in opposite order with respect to the classical
31    // implementation. We keep track of the highest index seen, instead
32    // of the lowest index seen, and we number components starting from
33    // zero. We also raise index by the number of elements of each emitted
34    // component. In this way unvisited nodes and emitted nodes have always
35    // a lower value than index. This strategy is analogous to that
36    // described in https://www.timl.id.au/scc, but in that case using
37    // increasing timestamps results in components not being labeled
38    // starting from zero, which is the case here instead.
39    let mut index = num_nodes;
40    let mut root_low_link = 0;
41    let mut number_of_components = 0;
42
43    if visit
44        .visit(0..num_nodes, |event| {
45            match event {
46                EventPred::Init { .. } => {
47                    root_low_link = index;
48                }
49                EventPred::Previsit { node: curr, .. } => {
50                    pl.light_update();
51                    high_link[curr] = index; // >= num_nodes, <= usize::MAX
52                    index -= 1;
53                    lead.push(true);
54                }
55                EventPred::Revisit { node, pred, .. } => {
56                    // curr has not been emitted yet but it has a higher link
57                    if high_link[pred] < high_link[node] {
58                        // Safe as the stack is never empty
59                        lead.set(lead.len() - 1, false);
60                        high_link[pred] = high_link[node];
61                        if high_link[pred] == root_low_link && index == 0 {
62                            // All nodes have been discovered, and we
63                            // found a high link identical to that of the
64                            // root: thus, all nodes on the visit path
65                            // and all nodes in the component stack
66                            // belong to the same component.
67
68                            // pred is the last node on the visit path,
69                            // so it won't be returned by the stack method
70                            high_link[pred] = number_of_components;
71                            for &node in component_stack.iter() {
72                                high_link[node] = number_of_components;
73                            }
74                            // Nodes on the visit path will be assigned
75                            // to the same component later
76                            return Break(StoppedWhenDone {});
77                        }
78                    }
79                }
80                EventPred::Postvisit {
81                    node, parent: pred, ..
82                } => {
83                    // Safe as the stack is never empty
84                    if lead.pop().unwrap() {
85                        // Set the component index of nodes in the component
86                        // stack with higher link than the current node
87                        while let Some(comp_node) = component_stack.pop() {
88                            // TODO: ugly
89                            if high_link[node] < high_link[comp_node] {
90                                component_stack.push(comp_node);
91                                break;
92                            }
93                            index += 1;
94                            high_link[comp_node] = number_of_components;
95                        }
96                        // Set the component index of the current node
97                        high_link[node] = number_of_components;
98                        index += 1;
99                        number_of_components += 1;
100                    } else {
101                        component_stack.push(node);
102                        // Propagate knowledge to the parent
103                        if high_link[pred] < high_link[node] {
104                            // Safe as the stack is never empty
105                            lead.set(lead.len() - 1, false);
106                            high_link[pred] = high_link[node];
107                        }
108                    }
109                }
110                _ => {}
111            }
112            Continue(())
113        })
114        .is_break()
115    {
116        // In case we exited early, complete the assignment
117        for node in visit.stack() {
118            high_link[node] = number_of_components;
119        }
120        number_of_components += 1;
121    }
122    pl.done();
123    Sccs::new(number_of_components, high_link)
124}