webgraph_cli/to/
arcs.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::GlobalArgs;
9use anyhow::Result;
10use clap::Parser;
11use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12use dsi_bitstream::prelude::*;
13use dsi_progress_logger::prelude::*;
14use lender::*;
15use std::io::Write;
16use std::path::PathBuf;
17use webgraph::graphs::bvgraph::get_endianness;
18use webgraph::traits::SequentialLabeling;
19use webgraph::utils::MmapHelper;
20
21#[derive(Parser, Debug)]
22#[command(name = "arcs", about = "Writes to standard out a graph as a list of arcs to stdout. Each arc comprises a pair of nodes separated by a TAB (but the format is customizable). By default, the command will write nodes as numerical identifiers, but you can use --labels to pass a file containing the identifier of each node. The first string will be the label of node 0, the second for node 1, and so on. The `.nodes` file created by the `from arcs` command is compatible with `--labels`.", long_about = None)]
23pub struct CliArgs {
24    /// The basename of the graph.
25    pub src: PathBuf,
26
27    #[arg(long, default_value_t = '\t')]
28    /// The separator between source and target nodes.
29    pub separator: char,
30
31    #[arg(long)]
32    /// The label of each node. The file is expected to be one string per line,
33    /// the first line will be the label of node 0.
34    /// You can pass here the `.nodes` file generated by the `from arcs` command.
35    pub labels: Option<PathBuf>,
36}
37
38pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
39    match get_endianness(&args.src)?.as_str() {
40        #[cfg(feature = "be_bins")]
41        BE::NAME => to_csv::<BE>(global_args, args),
42        #[cfg(feature = "le_bins")]
43        LE::NAME => to_csv::<LE>(global_args, args),
44        e => panic!("Unknown endianness: {}", e),
45    }
46}
47
48pub fn to_csv<E: Endianness + 'static>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
49where
50    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
51{
52    let graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(args.src)
53        .endianness::<E>()
54        .load()?;
55    let num_nodes = graph.num_nodes();
56
57    let labels = if let Some(labels) = args.labels {
58        Some(
59            std::fs::read_to_string(labels)?
60                .lines()
61                .map(|l| l.to_string())
62                .collect::<Vec<_>>(),
63        )
64    } else {
65        None
66    };
67
68    // read the csv and put it inside the sort pairs
69    let mut stdout = std::io::BufWriter::new(std::io::stdout().lock());
70    let mut pl = ProgressLogger::default();
71    pl.display_memory(true)
72        .item_name("nodes")
73        .expected_updates(Some(num_nodes));
74
75    if let Some(duration) = global_args.log_interval {
76        pl.log_interval(duration);
77    }
78
79    pl.start("Reading BvGraph");
80
81    if let Some(labels) = labels {
82        for_! ( (src, succ) in graph.iter() {
83            for dst in succ {
84                writeln!(stdout, "{}{}{}", labels[src], args.separator, labels[dst])?;
85            }
86            pl.light_update();
87        });
88    } else {
89        for_! ( (src, succ) in graph.iter() {
90            for dst in succ {
91                writeln!(stdout, "{}{}{}", src, args.separator, dst)?;
92            }
93            pl.light_update();
94        });
95    }
96
97    pl.done();
98    Ok(())
99}