webgraph_algo/sccs/tarjan.rs
1/*
2 * SPDX-FileCopyrightText: 2024 Matteo Dell'Acqua
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use super::Sccs;
9use dsi_progress_logger::ProgressLog;
10use std::ops::ControlFlow::{Break, Continue};
11use sux::bits::BitVec;
12use sux::traits::BitVecOpsMut;
13use webgraph::traits::RandomAccessGraph;
14use webgraph::visits::{Sequential, StoppedWhenDone, depth_first::*};
15
16/// Tarjan's algorithm for strongly connected components.
17pub fn tarjan(graph: impl RandomAccessGraph, pl: &mut impl ProgressLog) -> Sccs {
18 let num_nodes = graph.num_nodes();
19 pl.item_name("node");
20 pl.expected_updates(Some(num_nodes));
21 pl.start("Computing strongly connected components...");
22
23 let mut visit = SeqPred::new(&graph);
24 let mut lead = BitVec::with_capacity(128);
25 // Sentinel value guaranteeing that this stack is never empty
26 lead.push(true);
27 let mut component_stack = Vec::with_capacity(16);
28 let mut high_link = vec![0; num_nodes].into_boxed_slice();
29 // Node timestamps will start at num_nodes and will decrease with time,
30 // that is, they will go in opposite order with respect to the classical
31 // implementation. We keep track of the highest index seen, instead
32 // of the lowest index seen, and we number components starting from
33 // zero. We also raise index by the number of elements of each emitted
34 // component. In this way unvisited nodes and emitted nodes have always
35 // a lower value than index. This strategy is analogous to that
36 // described in https://www.timl.id.au/scc, but in that case using
37 // increasing timestamps results in components not being labeled
38 // starting from zero, which is the case here instead.
39 let mut index = num_nodes;
40 let mut root_low_link = 0;
41 let mut number_of_components = 0;
42
43 if visit
44 .visit(0..num_nodes, |event| {
45 match event {
46 EventPred::Init { .. } => {
47 root_low_link = index;
48 }
49 EventPred::Previsit { node: curr, .. } => {
50 pl.light_update();
51 high_link[curr] = index; // >= num_nodes, <= usize::MAX
52 index -= 1;
53 lead.push(true);
54 }
55 EventPred::Revisit { node, pred, .. } => {
56 // curr has not been emitted yet but it has a higher link
57 if high_link[pred] < high_link[node] {
58 // Safe as the stack is never empty
59 lead.set(lead.len() - 1, false);
60 high_link[pred] = high_link[node];
61 if high_link[pred] == root_low_link && index == 0 {
62 // All nodes have been discovered, and we
63 // found a high link identical to that of the
64 // root: thus, all nodes on the visit path
65 // and all nodes in the component stack
66 // belong to the same component.
67
68 // pred is the last node on the visit path,
69 // so it won't be returned by the stack method
70 high_link[pred] = number_of_components;
71 for &node in component_stack.iter() {
72 high_link[node] = number_of_components;
73 }
74 // Nodes on the visit path will be assigned
75 // to the same component later
76 return Break(StoppedWhenDone {});
77 }
78 }
79 }
80 EventPred::Postvisit {
81 node, parent: pred, ..
82 } => {
83 // Safe as the stack is never empty
84 if lead.pop().unwrap() {
85 // Set the component index of nodes in the component
86 // stack with higher link than the current node
87 while let Some(comp_node) = component_stack.pop() {
88 // TODO: ugly
89 if high_link[node] < high_link[comp_node] {
90 component_stack.push(comp_node);
91 break;
92 }
93 index += 1;
94 high_link[comp_node] = number_of_components;
95 }
96 // Set the component index of the current node
97 high_link[node] = number_of_components;
98 index += 1;
99 number_of_components += 1;
100 } else {
101 component_stack.push(node);
102 // Propagate knowledge to the parent
103 if high_link[pred] < high_link[node] {
104 // Safe as the stack is never empty
105 lead.set(lead.len() - 1, false);
106 high_link[pred] = high_link[node];
107 }
108 }
109 }
110 _ => {}
111 }
112 Continue(())
113 })
114 .is_break()
115 {
116 // In case we exited early, complete the assignment
117 for node in visit.stack() {
118 high_link[node] = number_of_components;
119 }
120 number_of_components += 1;
121 }
122 pl.done();
123 Sccs::new(number_of_components, high_link)
124}