webgraph_algo/sccs/mod.rs
1/*
2 * SPDX-FileCopyrightText: 2024 Matteo Dell'Acqua
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Algorithms used to compute and work with strongly connected components.
9//!
10//! There are two implementations for directed graph: [Tarjan's
11//! algorithm](tarjan()) and [Kosaraju's algorithm](kosaraju()). The former is to be
12//! preferred in almost all cases: Kosaraju's algorithm is slower and requires
13//! the transpose of the graph—it is mainly useful for testing and debugging.
14//!
15//! For symmetric (i.e., undirected) graphs there is a [sequential](symm_seq())
16//! and a [parallel](symm_par()) implementation that computes connected
17//! components.
18//!
19//! # Examples
20//! ```
21//! use dsi_progress_logger::no_logging;
22//! use webgraph::{graphs::vec_graph::VecGraph};
23//! use webgraph_algo::sccs::*;
24//!
25//! let graph = VecGraph::from_arcs([(0, 1), (1, 2), (2, 0), (1, 3)]);
26//!
27//! // Let's build the graph SCCS with Tarjan's algorithm
28//! let mut scc = tarjan(graph, no_logging![]);
29//!
30//! // Let's sort the SCC by size
31//! let sizes = scc.sort_by_size();
32//!
33//! assert_eq!(sizes, vec![3, 1].into_boxed_slice());
34//! assert_eq!(scc.components(), &vec![0, 0, 0, 1]);
35//! ```
36
37use crate::llp;
38use epserde::prelude::*;
39use rayon::{
40 iter::{IntoParallelRefMutIterator, ParallelIterator},
41 slice::ParallelSliceMut,
42};
43
44mod tarjan;
45pub use tarjan::*;
46
47mod kosaraju;
48pub use kosaraju::*;
49mod symm_seq;
50pub use symm_seq::*;
51
52mod symm_par;
53pub use symm_par::*;
54
55/// Strongly connected components.
56///
57/// An instance of this structure stores the [index of the
58/// component](Sccs::components) of each node. Components are numbered from 0 to
59/// [`num_components`](Sccs::num_components).
60///
61/// Moreover, this structure makes it possible to [sort the components by
62/// size](Sccs::sort_by_size), possibly using [parallel
63/// methods](Sccs::par_sort_by_size).
64#[derive(Epserde, Clone, Copy, Debug)]
65pub struct Sccs<C: AsRef<[usize]> = Box<[usize]>> {
66 num_components: usize,
67 components: C,
68}
69
70impl<C: AsRef<[usize]>> Sccs<C> {
71 pub fn new(num_components: usize, components: C) -> Self {
72 Sccs {
73 num_components,
74 components,
75 }
76 }
77
78 /// Returns the number of strongly connected components.
79 pub fn num_components(&self) -> usize {
80 self.num_components
81 }
82
83 /// Returns a slice containing, for each node, the index of the component
84 /// it belongs to.
85 #[inline(always)]
86 pub fn components(&self) -> &[usize] {
87 self.components.as_ref()
88 }
89
90 /// Returns the sizes of all components.
91 pub fn compute_sizes(&self) -> Box<[usize]> {
92 let mut sizes = vec![0; self.num_components()];
93 for &node_component in self.components() {
94 sizes[node_component] += 1;
95 }
96 sizes.into_boxed_slice()
97 }
98}
99
100impl<C: AsMut<[usize]> + AsRef<[usize]>> Sccs<C> {
101 /// Renumbers the components by decreasing size.
102 ///
103 /// After a call to this method, the sizes of strongly connected components
104 /// will decreasing in the component index. The method returns the sizes of
105 /// the components after the renumbering.
106 pub fn sort_by_size(&mut self) -> Box<[usize]> {
107 let mut sizes = self.compute_sizes();
108 assert!(sizes.len() == self.num_components());
109 let mut sort_perm = Vec::from_iter(0..sizes.len());
110 sort_perm.sort_unstable_by(|&x, &y| sizes[y].cmp(&sizes[x]));
111 let mut inv_perm = vec![0; sizes.len()];
112 sort_perm
113 .iter()
114 .enumerate()
115 .for_each(|(i, &x)| inv_perm[x] = i);
116
117 self.components
118 .as_mut()
119 .iter_mut()
120 .for_each(|node_component| *node_component = inv_perm[*node_component]);
121 sizes.sort_by(|&x, &y| y.cmp(&x));
122 sizes
123 }
124
125 /// Renumbers the components by decreasing size using parallel methods.
126 ///
127 /// After a call to this method, the sizes of strongly connected components
128 /// will decreasing in the component index. The method returns the sizes of
129 /// the components after the renumbering.
130 pub fn par_sort_by_size(&mut self) -> Box<[usize]> {
131 let mut sizes = self.compute_sizes();
132 assert!(sizes.len() == self.num_components());
133 let mut sort_perm = Vec::from_iter(0..sizes.len());
134 sort_perm.par_sort_unstable_by(|&x, &y| sizes[y].cmp(&sizes[x]));
135 let mut inv_perm = vec![0; sizes.len()];
136 llp::invert_permutation(&sort_perm, &mut inv_perm);
137 self.components
138 .as_mut()
139 .par_iter_mut()
140 .for_each(|node_component| *node_component = inv_perm[*node_component]);
141 sizes.sort_by(|&x, &y| y.cmp(&x));
142 sizes
143 }
144}