Skip to main content

webgraph_cli/build/
offsets.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::GlobalArgs;
9use anyhow::{Context, Result};
10use clap::Parser;
11use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
12use dsi_progress_logger::prelude::*;
13use std::path::PathBuf;
14use webgraph::prelude::*;
15
16#[derive(Parser, Debug)]
17#[command(name = "offsets", about = "Builds the offsets file of a graph.", long_about = None)]
18pub struct CliArgs {
19    /// The basename of the graph.
20    pub basename: PathBuf,
21}
22
23pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
24    match get_endianness(&args.basename)?.as_str() {
25        #[cfg(feature = "be_bins")]
26        BE::NAME => build_offsets::<BE>(global_args, args),
27        #[cfg(feature = "le_bins")]
28        LE::NAME => build_offsets::<LE>(global_args, args),
29        e => panic!("Unknown endianness: {}", e),
30    }
31}
32
33pub fn build_offsets<E: Endianness + 'static>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
34where
35    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
36    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek,
37{
38    // Creates the sequential iterator over the graph
39    let seq_graph = BvGraphSeq::with_basename(&args.basename)
40        .endianness::<E>()
41        .load()?;
42    let offsets = args.basename.with_extension(OFFSETS_EXTENSION);
43    // create a bit writer on the file
44    let mut writer = buf_bit_writer::from_path::<BE, usize>(&offsets)
45        .with_context(|| format!("Could not create {}", offsets.display()))?;
46    // progress bar
47    let mut pl = ProgressLogger::default();
48    pl.display_memory(true)
49        .item_name("offset")
50        .expected_updates(Some(seq_graph.num_nodes()));
51    if let Some(duration) = global_args.log_interval {
52        pl.log_interval(duration);
53    }
54    pl.start("Computing offsets...");
55    // read the graph a write the offsets
56    let mut offset = 0;
57    let mut degs_iter = seq_graph.offset_deg_iter();
58    for (new_offset, _degree) in &mut degs_iter {
59        // write where
60        writer
61            .write_gamma((new_offset - offset) as _)
62            .context("Could not write gamma")?;
63        offset = new_offset;
64        // decode the next nodes so we know where the next node_id starts
65        pl.light_update();
66    }
67    // write the last offset, this is done to avoid decoding the last node
68    writer
69        .write_gamma((degs_iter.get_pos() - offset) as _)
70        .context("Could not write final gamma")?;
71    pl.light_update();
72    pl.done();
73    Ok(())
74}