webgraph_algo/sccs/tarjan.rs
1/*
2 * SPDX-FileCopyrightText: 2024 Matteo Dell'Acqua
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use super::Sccs;
9use crate::visits::{depth_first::*, Sequential, StoppedWhenDone};
10use dsi_progress_logger::ProgressLog;
11use std::ops::ControlFlow::{Break, Continue};
12use sux::bits::BitVec;
13use webgraph::traits::RandomAccessGraph;
14
15/// Tarjan's algorithm for strongly connected components.
16pub fn tarjan(graph: impl RandomAccessGraph, pl: &mut impl ProgressLog) -> Sccs {
17 let num_nodes = graph.num_nodes();
18 pl.item_name("node");
19 pl.expected_updates(Some(num_nodes));
20 pl.start("Computing strongly connected components...");
21
22 let mut visit = SeqPred::new(&graph);
23 let mut lead = BitVec::with_capacity(128);
24 // Sentinel value guaranteeing that this stack is never empty
25 lead.push(true);
26 let mut component_stack = Vec::with_capacity(16);
27 let mut high_link = vec![0; num_nodes].into_boxed_slice();
28 // Node timestamps will start at num_nodes and will decrease with time,
29 // that is, they will go in opposite order with respect to the classical
30 // implementation. We keep track of the highest index seen, instead
31 // of the lowest index seen, and we number components starting from
32 // zero. We also raise index by the number of elements of each emitted
33 // component. In this way unvisited nodes and emitted nodes have always
34 // a lower value than index. This strategy is analogous to that
35 // described in https://www.timl.id.au/scc, but in that case using
36 // increasing timestamps results in components not being labelled
37 // starting from zero, which is the case here instead.
38 let mut index = num_nodes;
39 let mut root_low_link = 0;
40 let mut number_of_components = 0;
41
42 if visit
43 .visit(0..num_nodes, |event| {
44 match event {
45 EventPred::Init { .. } => {
46 root_low_link = index;
47 }
48 EventPred::Previsit { node: curr, .. } => {
49 pl.light_update();
50 high_link[curr] = index; // >= num_nodes, <= usize::MAX
51 index -= 1;
52 lead.push(true);
53 }
54 EventPred::Revisit { node, pred, .. } => {
55 // curr has not been emitted yet but it has a higher link
56 if high_link[pred] < high_link[node] {
57 // Safe as the stack is never empty
58 lead.set(lead.len() - 1, false);
59 high_link[pred] = high_link[node];
60 if high_link[pred] == root_low_link && index == 0 {
61 // All nodes have been discovered, and we
62 // found a high link identical to that of the
63 // root: thus, all nodes on the visit path
64 // and all nodes in the component stack
65 // belong to the same component.
66
67 // pred is the last node on the visit path,
68 // so it won't be returned by the stack method
69 high_link[pred] = number_of_components;
70 for &node in component_stack.iter() {
71 high_link[node] = number_of_components;
72 }
73 // Nodes on the visit path will be assigned
74 // to the same component later
75 return Break(StoppedWhenDone {});
76 }
77 }
78 }
79 EventPred::Postvisit { node, pred, .. } => {
80 // Safe as the stack is never empty
81 if lead.pop().unwrap() {
82 // Set the component index of nodes in the component
83 // stack with higher link than the current node
84 while let Some(comp_node) = component_stack.pop() {
85 // TODO: ugly
86 if high_link[node] < high_link[comp_node] {
87 component_stack.push(comp_node);
88 break;
89 }
90 index += 1;
91 high_link[comp_node] = number_of_components;
92 }
93 // Set the component index of the current node
94 high_link[node] = number_of_components;
95 index += 1;
96 number_of_components += 1;
97 } else {
98 component_stack.push(node);
99 // Propagate knowledge to the parent
100 if high_link[pred] < high_link[node] {
101 // Safe as the stack is never empty
102 lead.set(lead.len() - 1, false);
103 high_link[pred] = high_link[node];
104 }
105 }
106 }
107 _ => {}
108 }
109 Continue(())
110 })
111 .is_break()
112 {
113 // In case we exited early, complete the assignment
114 for node in visit.stack() {
115 high_link[node] = number_of_components;
116 }
117 number_of_components += 1;
118 }
119 pl.done();
120 Sccs::new(number_of_components, high_link)
121}