webgraph_cli/build/
offsets.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::GlobalArgs;
9use anyhow::{Context, Result};
10use clap::Parser;
11use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
12use dsi_progress_logger::prelude::*;
13use std::{io::BufWriter, path::PathBuf};
14use webgraph::prelude::*;
15
16#[derive(Parser, Debug)]
17#[command(name = "offsets", about = "Builds the offsets file of a graph.", long_about = None)]
18pub struct CliArgs {
19    /// The basename of the graph.
20    pub src: PathBuf,
21}
22
23pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
24    match get_endianness(&args.src)?.as_str() {
25        #[cfg(feature = "be_bins")]
26        BE::NAME => build_offsets::<BE>(global_args, args),
27        #[cfg(feature = "le_bins")]
28        LE::NAME => build_offsets::<LE>(global_args, args),
29        e => panic!("Unknown endianness: {}", e),
30    }
31}
32
33pub fn build_offsets<E: Endianness + 'static>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
34where
35    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
36    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek,
37{
38    // Creates the sequential iterator over the graph
39    let seq_graph = BvGraphSeq::with_basename(&args.src)
40        .endianness::<E>()
41        .load()?;
42    let offsets = args.src.with_extension(OFFSETS_EXTENSION);
43    let file = std::fs::File::create(&offsets)
44        .with_context(|| format!("Could not create {}", offsets.display()))?;
45    // create a bit writer on the file
46    let mut writer = <BufBitWriter<BE, _>>::new(<WordAdapter<u64, _>>::new(
47        BufWriter::with_capacity(1 << 20, file),
48    ));
49    // progress bar
50    let mut pl = ProgressLogger::default();
51    pl.display_memory(true)
52        .item_name("offset")
53        .expected_updates(Some(seq_graph.num_nodes()));
54    if let Some(duration) = global_args.log_interval {
55        pl.log_interval(duration);
56    }
57    pl.start("Computing offsets...");
58    // read the graph a write the offsets
59    let mut offset = 0;
60    let mut degs_iter = seq_graph.offset_deg_iter();
61    for (new_offset, _degree) in &mut degs_iter {
62        // write where
63        writer
64            .write_gamma((new_offset - offset) as _)
65            .context("Could not write gamma")?;
66        offset = new_offset;
67        // decode the next nodes so we know where the next node_id starts
68        pl.light_update();
69    }
70    // write the last offset, this is done to avoid decoding the last node
71    writer
72        .write_gamma((degs_iter.get_pos() - offset) as _)
73        .context("Could not write final gamma")?;
74    pl.light_update();
75    pl.done();
76    Ok(())
77}