webgraph_cli/check/
ef.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::GlobalArgs;
9use anyhow::{Context, Result};
10use clap::Parser;
11use dsi_bitstream::prelude::*;
12use dsi_progress_logger::prelude::*;
13use epserde::prelude::*;
14use log::info;
15use std::fs::File;
16use std::io::BufReader;
17use std::path::PathBuf;
18use sux::traits::IndexedSeq;
19use webgraph::graphs::bvgraph::{EF, EF_EXTENSION, OFFSETS_EXTENSION, PROPERTIES_EXTENSION};
20
21#[derive(Parser, Debug)]
22#[command(name = "ef", about = "Checks that the '.ef' file (and `.offsets` if present) is consistent with the graph.", long_about = None)]
23pub struct CliArgs {
24    /// The basename of the graph.
25    pub src: PathBuf,
26}
27
28pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
29    let properties_path = args.src.with_extension(PROPERTIES_EXTENSION);
30    let f = File::open(&properties_path).with_context(|| {
31        format!(
32            "Could not load properties file: {}",
33            properties_path.display()
34        )
35    })?;
36    let map = java_properties::read(BufReader::new(f))?;
37    let num_nodes = map.get("nodes").unwrap().parse::<usize>()?;
38
39    // Creates the offsets file
40    let of_file_path = args.src.with_extension(OFFSETS_EXTENSION);
41
42    let ef = unsafe { EF::mmap(args.src.with_extension(EF_EXTENSION), Flags::default()) }?;
43    let ef = ef.uncase();
44
45    let mut pl = ProgressLogger::default();
46    pl.display_memory(true)
47        .item_name("offset")
48        .expected_updates(Some(num_nodes));
49    if let Some(duration) = &global_args.log_interval {
50        pl.log_interval(*duration);
51    }
52
53    // if the offset files exists, read it to build elias-fano
54    if of_file_path.exists() {
55        let of_file = BufReader::with_capacity(1 << 20, File::open(of_file_path)?);
56        // create a bit reader on the file
57        let mut reader = BufBitReader::<BE, _>::new(<WordAdapter<u32, _>>::new(of_file));
58        // progress bar
59        pl.start("Checking offsets file against Elias-Fano...");
60        // read the graph a write the offsets
61        let mut offset = 0;
62        for node_id in 0..num_nodes + 1 {
63            // write where
64            offset += reader.read_gamma()?;
65            // read ef
66            let ef_res = ef.get(node_id as _);
67            assert_eq!(offset, ef_res as u64, "node_id: {}", node_id);
68            // decode the next nodes so we know where the next node_id starts
69            pl.light_update();
70        }
71    } else {
72        info!("No offsets file, checking against graph file only");
73    }
74
75    let mut pl = ProgressLogger::default();
76    pl.display_memory(true)
77        .item_name("offset")
78        .expected_updates(Some(num_nodes));
79    if let Some(duration) = global_args.log_interval {
80        pl.log_interval(duration);
81    }
82
83    let seq_graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
84        .endianness::<BE>()
85        .load()?;
86    // otherwise directly read the graph
87    // progress bar
88    pl.start("Checking graph against Elias-Fano...");
89    // read the graph a write the offsets
90    for (node, (new_offset, _degree)) in seq_graph.offset_deg_iter().enumerate() {
91        // decode the next nodes so we know where the next node_id starts
92        // read ef
93        let ef_res = ef.get(node as _);
94        assert_eq!(new_offset, ef_res as u64, "node_id: {}", node);
95        pl.light_update();
96    }
97    pl.done();
98    Ok(())
99}