webgraph_cli/
sccs.rs

1/*
2 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7use crate::{build_info, num_threads_parser, pretty_print_elapsed, IntVectorFormat};
8use anyhow::Result;
9use clap::Parser;
10use dsi_bitstream::prelude::factory::CodesReaderFactoryHelper;
11use dsi_bitstream::prelude::*;
12use dsi_progress_logger::{progress_logger, ProgressLog};
13use std::path::PathBuf;
14use webgraph::graphs::bvgraph::get_endianness;
15use webgraph::prelude::*;
16
17use super::GlobalArgs;
18
19#[derive(Parser, Debug)]
20#[command(name = "webgraph-sccs", version=build_info::version_string())]
21/// Computes the strongly connected components of a graph of given basename.
22#[doc = include_str!("common_ps.txt")]
23#[doc = include_str!("common_env.txt")]
24pub struct Cli {
25    #[clap(flatten)]
26    global_args: GlobalArgs,
27    #[clap(flatten)]
28    args: CliArgs,
29}
30
31#[derive(Parser, Debug)]
32pub struct CliArgs {
33    /// The basename of the graph.
34    pub basename: PathBuf,
35
36    /// The path where to save the strongly connected components.
37    pub sccs: PathBuf,
38
39    #[arg(short = 's', long)]
40    /// Compute the size of the strongly connected components and store them
41    /// at the given path.
42    pub sizes: Option<PathBuf>,
43
44    #[arg(short, long)]
45    /// Renumber components in decreasing-size order (implicitly, compute sizes).
46    pub renumber: bool,
47
48    #[arg(short = 'j', long, default_value_t = rayon::current_num_threads().max(1), value_parser = num_threads_parser)]
49    /// The number of threads to use to compute the sizes of the components.
50    pub num_threads: usize,
51
52    #[arg(long, value_enum, default_value_t = IntVectorFormat::Ascii)]
53    /// The storage format for components and component sizes.
54    pub fmt: IntVectorFormat,
55}
56
57pub fn cli_main<I, T>(args: I) -> Result<()>
58where
59    I: IntoIterator<Item = T>,
60    T: Into<std::ffi::OsString> + Clone,
61{
62    let start = std::time::Instant::now();
63    let cli = Cli::parse_from(args);
64    main(cli.global_args, cli.args)?;
65
66    log::info!(
67        "The command took {}",
68        pretty_print_elapsed(start.elapsed().as_secs_f64())
69    );
70
71    Ok(())
72}
73
74pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
75    match get_endianness(&args.basename)?.as_str() {
76        #[cfg(feature = "be_bins")]
77        BE::NAME => sccs::<BE>(global_args, args),
78        #[cfg(feature = "le_bins")]
79        LE::NAME => sccs::<LE>(global_args, args),
80        e => panic!("Unknown endianness: {}", e),
81    }
82}
83
84pub fn sccs<E: Endianness>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
85where
86    MemoryFactory<E, MmapHelper<u32>>: CodesReaderFactoryHelper<E>,
87    for<'a> LoadModeCodesReader<'a, E, LoadMmap>: BitSeek,
88{
89    log::info!("Loading the graph from {}", args.basename.display());
90    let graph = BvGraph::with_basename(&args.basename)
91        .mode::<LoadMmap>()
92        .flags(MemoryFlags::TRANSPARENT_HUGE_PAGES | MemoryFlags::RANDOM_ACCESS)
93        .endianness::<E>()
94        .load()?;
95
96    let mut pl = progress_logger![];
97    if let Some(log_interval) = global_args.log_interval {
98        pl.log_interval(log_interval);
99    }
100
101    let mut sccs = webgraph_algo::sccs::tarjan(graph, &mut pl);
102    log::info!(
103        "Found {} strongly connected components",
104        sccs.num_components()
105    );
106
107    if args.renumber {
108        log::info!("Renumbering components by decreasing size");
109        let component_sizes = if args.num_threads == 1 {
110            log::debug!("Using sequential algorithm");
111            sccs.sort_by_size()
112        } else {
113            log::debug!("Using parallel algorithm with {} threads", args.num_threads);
114            sccs.par_sort_by_size()
115        };
116        let max = component_sizes.first().copied();
117        args.fmt.store_usizes(&args.sccs, &component_sizes, max)?;
118    } else if let Some(sizes_path) = args.sizes {
119        log::info!("Computing the sizes of the components");
120        let sizes = sccs.compute_sizes();
121        args.fmt.store_usizes(sizes_path, &sizes, None)?;
122    };
123
124    args.fmt
125        .store_usizes(&args.sccs, sccs.components(), Some(sccs.num_components()))?;
126
127    Ok(())
128}