webgraph_cli/build/
dcf.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::GlobalArgs;
9use anyhow::{Context, Result};
10use clap::Parser;
11use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12use dsi_bitstream::prelude::*;
13use dsi_progress_logger::prelude::*;
14use epserde::prelude::*;
15use log::info;
16use std::fs::File;
17use std::io::{BufReader, BufWriter};
18use std::path::PathBuf;
19use sux::prelude::*;
20use webgraph::prelude::*;
21
22#[derive(Parser, Debug)]
23#[command(name = "dcf", about = "Builds the Elias–Fano representation of the degree cumulative function of a graph.", long_about = None)]
24pub struct CliArgs {
25    /// The basename of the graph.
26    pub src: PathBuf,
27}
28
29pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
30    match get_endianness(&args.src)?.as_str() {
31        #[cfg(feature = "be_bins")]
32        BE::NAME => build_dcf::<BE>(global_args, args),
33        #[cfg(feature = "le_bins")]
34        LE::NAME => build_dcf::<LE>(global_args, args),
35        e => panic!("Unknown endianness: {}", e),
36    }
37}
38
39pub fn build_dcf<E: Endianness + 'static>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
40where
41    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
42    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek,
43{
44    let basename = args.src;
45    let properties_path = basename.with_extension(PROPERTIES_EXTENSION);
46    let f = File::open(&properties_path).with_context(|| {
47        format!(
48            "Could not open properties file: {}",
49            properties_path.display()
50        )
51    })?;
52    let map = java_properties::read(BufReader::new(f))?;
53    let num_nodes = map.get("nodes").unwrap().parse::<usize>()?;
54    let num_arcs = map.get("arcs").unwrap().parse::<usize>()?;
55
56    let mut efb = EliasFanoBuilder::new(num_nodes + 1, num_arcs);
57
58    let ef_path = basename.with_extension(DEG_CUMUL_EXTENSION);
59    let mut ef_file = BufWriter::new(
60        File::create(&ef_path)
61            .with_context(|| format!("Could not create {}", ef_path.display()))?,
62    );
63
64    let mut pl = ProgressLogger::default();
65    pl.display_memory(true)
66        .item_name("offset")
67        .expected_updates(Some(num_nodes));
68    if let Some(duration) = global_args.log_interval {
69        pl.log_interval(duration);
70    }
71    let seq_graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&basename)
72        .endianness::<E>()
73        .load()
74        .with_context(|| format!("Could not load graph at {}", basename.display()))?;
75    // otherwise directly read the graph
76    // progress bar
77    pl.start("Building the degree cumulative function...");
78    // read the graph a write the offsets
79    let iter = seq_graph.offset_deg_iter();
80    let mut cumul_deg = 0;
81
82    efb.push(0);
83    for (_new_offset, degree) in iter {
84        cumul_deg += degree;
85        // write where
86        efb.push(cumul_deg as _);
87        // decode the next nodes so we know where the next node_id starts
88        pl.light_update();
89    }
90    pl.done();
91
92    let ef = efb.build();
93    let ef: DCF = unsafe {
94        ef.map_high_bits(|bits| {
95            SelectZeroAdaptConst::<_, _, 12, 4>::new(SelectAdaptConst::<_, _, 12, 4>::new(bits))
96        })
97    };
98
99    info!("Writing to disk...");
100
101    unsafe {
102        ef.serialize(&mut ef_file).with_context(|| {
103            format!(
104                "Could not serialize degree cumulative list to {}",
105                ef_path.display()
106            )
107        })
108    }?;
109
110    info!("Completed.");
111
112    Ok(())
113}