Skip to main content

webgraph_cli/check/
ef.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::GlobalArgs;
9use anyhow::{Context, Result};
10use clap::Parser;
11use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12use dsi_bitstream::prelude::*;
13use dsi_progress_logger::prelude::*;
14use epserde::prelude::*;
15use log::info;
16use std::fs::File;
17use std::io::BufReader;
18use std::path::PathBuf;
19use sux::traits::IndexedSeq;
20use webgraph::graphs::bvgraph::get_endianness;
21use webgraph::graphs::bvgraph::{EF, EF_EXTENSION, OFFSETS_EXTENSION, PROPERTIES_EXTENSION};
22use webgraph::prelude::*;
23
24#[derive(Parser, Debug)]
25#[command(name = "ef", about = "Checks that the \".ef\" file (and \".offsets\" if present) is consistent with the graph.", long_about = None)]
26pub struct CliArgs {
27    /// The basename of the graph.
28    pub basename: PathBuf,
29}
30
31pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
32    match get_endianness(&args.basename)?.as_str() {
33        #[cfg(feature = "be_bins")]
34        BE::NAME => check_ef::<BE>(global_args, args),
35        #[cfg(feature = "le_bins")]
36        LE::NAME => check_ef::<LE>(global_args, args),
37        e => panic!("Unknown endianness: {}", e),
38    }
39}
40
41pub fn check_ef<E: Endianness + 'static>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
42where
43    MmapHelper<u32>: CodesReaderFactoryHelper<E>,
44    for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek,
45{
46    let properties_path = args.basename.with_extension(PROPERTIES_EXTENSION);
47    let f = File::open(&properties_path).with_context(|| {
48        format!(
49            "Could not load properties file: {}",
50            properties_path.display()
51        )
52    })?;
53    let map = java_properties::read(BufReader::new(f))?;
54    let num_nodes = map.get("nodes").unwrap().parse::<usize>()?;
55
56    // Creates the offsets file
57    let of_file_path = args.basename.with_extension(OFFSETS_EXTENSION);
58
59    let ef = unsafe { EF::mmap(args.basename.with_extension(EF_EXTENSION), Flags::default()) }?;
60    let ef = ef.uncase();
61
62    let mut pl = ProgressLogger::default();
63    pl.display_memory(true)
64        .item_name("offset")
65        .expected_updates(Some(num_nodes));
66    if let Some(duration) = &global_args.log_interval {
67        pl.log_interval(*duration);
68    }
69
70    // if the offset files exists, read it to build elias-fano
71    if of_file_path.exists() {
72        // create a bit reader on the file
73        let mut reader = buf_bit_reader::from_path::<BE, u32>(of_file_path)?;
74        // progress bar
75        pl.start("Checking offsets file against Elias-Fano...");
76        // read the graph a write the offsets
77        let mut offset = 0;
78        for node_id in 0..num_nodes + 1 {
79            // write where
80            offset += reader.read_gamma()?;
81            // read ef
82            let ef_res = ef.get(node_id as _);
83            assert_eq!(offset, ef_res as u64, "node_id: {}", node_id);
84            // decode the next nodes so we know where the next node_id starts
85            pl.light_update();
86        }
87    } else {
88        info!("No offsets file, checking against graph file only");
89    }
90
91    let mut pl = ProgressLogger::default();
92    pl.display_memory(true)
93        .item_name("offset")
94        .expected_updates(Some(num_nodes));
95    if let Some(duration) = global_args.log_interval {
96        pl.log_interval(duration);
97    }
98
99    let seq_graph =
100        webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.basename)
101            .endianness::<E>()
102            .load()?;
103    // otherwise directly read the graph
104    // progress bar
105    pl.start("Checking graph against Elias-Fano...");
106    // read the graph a write the offsets
107    for (node, (new_offset, _degree)) in seq_graph.offset_deg_iter().enumerate() {
108        // decode the next nodes so we know where the next node_id starts
109        // read ef
110        let ef_res = ef.get(node as _);
111        assert_eq!(new_offset, ef_res as u64, "node_id: {}", node);
112        pl.light_update();
113    }
114    pl.done();
115    Ok(())
116}