Skip to main content

webgraph_cli/to/
arcs.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::GlobalArgs;
9use anyhow::Result;
10use clap::Parser;
11use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12use dsi_bitstream::prelude::*;
13use dsi_progress_logger::prelude::*;
14use lender::*;
15use std::io::Write;
16use std::path::PathBuf;
17use webgraph::graphs::bvgraph::get_endianness;
18use webgraph::traits::SequentialLabeling;
19use webgraph::utils::MmapHelper;
20
21#[derive(Parser, Debug)]
22#[command(name = "arcs", about = "Writes to standard out a graph as a list of arcs to stdout. Each arc comprises a pair of nodes separated by a TAB (but the format is customizable). By default, the command will write nodes as numerical identifiers, but you can use --labels to pass a file containing the identifier of each node. The first string will be the label of node 0, the second for node 1, and so on. The \".nodes\" file created by the \"from arcs\" command is compatible with \"--labels\".", long_about = None)]
23pub struct CliArgs {
24    /// The basename of the graph.
25    pub basename: PathBuf,
26
27    #[arg(long, default_value_t = '\t')]
28    /// The separator between source and target nodes.
29    pub separator: char,
30
31    #[arg(long)]
32    /// The label of each node. The file is expected to be one string per line,
33    /// the first line will be the label of node 0.
34    /// You can pass here the ".nodes" file generated by the "from arcs" command.
35    pub labels: Option<PathBuf>,
36}
37
38pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
39    match get_endianness(&args.basename)?.as_str() {
40        #[cfg(feature = "be_bins")]
41        BE::NAME => to_csv::<BE>(global_args, args),
42        #[cfg(feature = "le_bins")]
43        LE::NAME => to_csv::<LE>(global_args, args),
44        e => panic!("Unknown endianness: {}", e),
45    }
46}
47
48pub fn to_csv<E: Endianness + 'static>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
49where
50    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
51{
52    let graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(args.basename)
53        .endianness::<E>()
54        .load()?;
55    let num_nodes = graph.num_nodes();
56
57    let labels = if let Some(labels) = args.labels {
58        Some(
59            std::fs::read_to_string(labels)?
60                .lines()
61                .map(|l| l.to_string())
62                .collect::<Vec<_>>(),
63        )
64    } else {
65        None
66    };
67
68    // read the csv and put it inside the sort pairs
69    let mut stdout = std::io::BufWriter::new(std::io::stdout().lock());
70    let mut pl = ProgressLogger::default();
71    pl.display_memory(true)
72        .item_name("nodes")
73        .expected_updates(Some(num_nodes));
74
75    if let Some(duration) = global_args.log_interval {
76        pl.log_interval(duration);
77    }
78
79    pl.start("Reading BvGraph");
80
81    if let Some(labels) = labels {
82        anyhow::ensure!(
83            labels.len() >= num_nodes,
84            "Labels file has {} lines but the graph has {} nodes",
85            labels.len(),
86            num_nodes
87        );
88        for_! ( (src, succ) in graph.iter() {
89            for dst in succ {
90                writeln!(stdout, "{}{}{}", labels[src], args.separator, labels[dst])?;
91            }
92            pl.light_update();
93        });
94    } else {
95        for_! ( (src, succ) in graph.iter() {
96            for dst in succ {
97                writeln!(stdout, "{}{}{}", src, args.separator, dst)?;
98            }
99            pl.light_update();
100        });
101    }
102
103    pl.done();
104    Ok(())
105}