webgraph_cli/check/
ef.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::GlobalArgs;
9use anyhow::{Context, Result};
10use clap::Parser;
11use dsi_bitstream::prelude::*;
12use dsi_progress_logger::prelude::*;
13use epserde::prelude::*;
14use log::info;
15use std::fs::File;
16use std::io::BufReader;
17use std::path::PathBuf;
18use sux::prelude::*;
19use webgraph::graphs::bvgraph::{EF, EF_EXTENSION, OFFSETS_EXTENSION, PROPERTIES_EXTENSION};
20
21#[derive(Parser, Debug)]
22#[command(name = "ef", about = "Checks that the '.ef' file (and `.offsets` if present) is consistent with the graph.", long_about = None)]
23pub struct CliArgs {
24    /// The basename of the graph.
25    pub src: PathBuf,
26}
27
28pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
29    let properties_path = args.src.with_extension(PROPERTIES_EXTENSION);
30    let f = File::open(&properties_path).with_context(|| {
31        format!(
32            "Could not load properties file: {}",
33            properties_path.display()
34        )
35    })?;
36    let map = java_properties::read(BufReader::new(f))?;
37    let num_nodes = map.get("nodes").unwrap().parse::<usize>()?;
38
39    // Creates the offsets file
40    let of_file_path = args.src.with_extension(OFFSETS_EXTENSION);
41
42    let ef = EF::mmap(args.src.with_extension(EF_EXTENSION), Flags::default())?;
43
44    let mut pl = ProgressLogger::default();
45    pl.display_memory(true)
46        .item_name("offset")
47        .expected_updates(Some(num_nodes));
48    if let Some(duration) = &global_args.log_interval {
49        pl.log_interval(*duration);
50    }
51
52    // if the offset files exists, read it to build elias-fano
53    if of_file_path.exists() {
54        let of_file = BufReader::with_capacity(1 << 20, File::open(of_file_path)?);
55        // create a bit reader on the file
56        let mut reader = BufBitReader::<BE, _>::new(<WordAdapter<u32, _>>::new(of_file));
57        // progress bar
58        pl.start("Checking offsets file against Elias-Fano...");
59        // read the graph a write the offsets
60        let mut offset = 0;
61        for node_id in 0..num_nodes + 1 {
62            // write where
63            offset += reader.read_gamma()?;
64            // read ef
65            let ef_res = ef.get(node_id as _);
66            assert_eq!(offset, ef_res as u64, "node_id: {}", node_id);
67            // decode the next nodes so we know where the next node_id starts
68            pl.light_update();
69        }
70    } else {
71        info!("No offsets file, checking against graph file only");
72    }
73
74    let mut pl = ProgressLogger::default();
75    pl.display_memory(true)
76        .item_name("offset")
77        .expected_updates(Some(num_nodes));
78    if let Some(duration) = global_args.log_interval {
79        pl.log_interval(duration);
80    }
81
82    let seq_graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
83        .endianness::<BE>()
84        .load()?;
85    // otherwise directly read the graph
86    // progress bar
87    pl.start("Checking graph against Elias-Fano...");
88    // read the graph a write the offsets
89    for (node, (new_offset, _degree)) in seq_graph.offset_deg_iter().enumerate() {
90        // decode the next nodes so we know where the next node_id starts
91        // read ef
92        let ef_res = ef.get(node as _);
93        assert_eq!(new_offset, ef_res as u64, "node_id: {}", node);
94        pl.light_update();
95    }
96    pl.done();
97    Ok(())
98}