webgraph_algo/sccs/
tarjan.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Matteo Dell'Acqua
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use super::Sccs;
9use crate::visits::{depth_first::*, Sequential, StoppedWhenDone};
10use dsi_progress_logger::ProgressLog;
11use std::ops::ControlFlow::{Break, Continue};
12use sux::bits::BitVec;
13use webgraph::traits::RandomAccessGraph;
14
15/// Tarjan's algorithm for strongly connected components.
16pub fn tarjan(graph: impl RandomAccessGraph, pl: &mut impl ProgressLog) -> Sccs {
17    let num_nodes = graph.num_nodes();
18    pl.item_name("node");
19    pl.expected_updates(Some(num_nodes));
20    pl.start("Computing strongly connected components...");
21
22    let mut visit = SeqPred::new(&graph);
23    let mut lead = BitVec::with_capacity(128);
24    // Sentinel value guaranteeing that this stack is never empty
25    lead.push(true);
26    let mut component_stack = Vec::with_capacity(16);
27    let mut high_link = vec![0; num_nodes].into_boxed_slice();
28    // Node timestamps will start at num_nodes and will decrease with time,
29    // that is, they will go in opposite order with respect to the classical
30    // implementation. We keep track of the highest index seen, instead
31    // of the lowest index seen, and we number components starting from
32    // zero. We also raise index by the number of elements of each emitted
33    // component. In this way unvisited nodes and emitted nodes have always
34    // a lower value than index. This strategy is analogous to that
35    // described in https://www.timl.id.au/scc, but in that case using
36    // increasing timestamps results in components not being labelled
37    // starting from zero, which is the case here instead.
38    let mut index = num_nodes;
39    let mut root_low_link = 0;
40    let mut number_of_components = 0;
41
42    if visit
43        .visit(0..num_nodes, |event| {
44            match event {
45                EventPred::Init { .. } => {
46                    root_low_link = index;
47                }
48                EventPred::Previsit { node: curr, .. } => {
49                    pl.light_update();
50                    high_link[curr] = index; // >= num_nodes, <= usize::MAX
51                    index -= 1;
52                    lead.push(true);
53                }
54                EventPred::Revisit { node, pred, .. } => {
55                    // curr has not been emitted yet but it has a higher link
56                    if high_link[pred] < high_link[node] {
57                        // Safe as the stack is never empty
58                        lead.set(lead.len() - 1, false);
59                        high_link[pred] = high_link[node];
60                        if high_link[pred] == root_low_link && index == 0 {
61                            // All nodes have been discovered, and we
62                            // found a high link identical to that of the
63                            // root: thus, all nodes on the visit path
64                            // and all nodes in the component stack
65                            // belong to the same component.
66
67                            // pred is the last node on the visit path,
68                            // so it won't be returned by the stack method
69                            high_link[pred] = number_of_components;
70                            for &node in component_stack.iter() {
71                                high_link[node] = number_of_components;
72                            }
73                            // Nodes on the visit path will be assigned
74                            // to the same component later
75                            return Break(StoppedWhenDone {});
76                        }
77                    }
78                }
79                EventPred::Postvisit { node, pred, .. } => {
80                    // Safe as the stack is never empty
81                    if lead.pop().unwrap() {
82                        // Set the component index of nodes in the component
83                        // stack with higher link than the current node
84                        while let Some(comp_node) = component_stack.pop() {
85                            // TODO: ugly
86                            if high_link[node] < high_link[comp_node] {
87                                component_stack.push(comp_node);
88                                break;
89                            }
90                            index += 1;
91                            high_link[comp_node] = number_of_components;
92                        }
93                        // Set the component index of the current node
94                        high_link[node] = number_of_components;
95                        index += 1;
96                        number_of_components += 1;
97                    } else {
98                        component_stack.push(node);
99                        // Propagate knowledge to the parent
100                        if high_link[pred] < high_link[node] {
101                            // Safe as the stack is never empty
102                            lead.set(lead.len() - 1, false);
103                            high_link[pred] = high_link[node];
104                        }
105                    }
106                }
107                _ => {}
108            }
109            Continue(())
110        })
111        .is_break()
112    {
113        // In case we exited early, complete the assignment
114        for node in visit.stack() {
115            high_link[node] = number_of_components;
116        }
117        number_of_components += 1;
118    }
119    pl.done();
120    Sccs::new(number_of_components, high_link)
121}