use crate::GlobalArgs;
use anyhow::{Context, Result};
use clap::Parser;
use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
use dsi_bitstream::prelude::*;
use dsi_progress_logger::prelude::*;
use epserde::prelude::*;
use log::info;
use std::fs::File;
use std::io::BufReader;
use std::path::PathBuf;
use sux::traits::IndexedSeq;
use webgraph::graphs::bvgraph::get_endianness;
use webgraph::graphs::bvgraph::{EF, EF_EXTENSION, OFFSETS_EXTENSION, PROPERTIES_EXTENSION};
use webgraph::prelude::*;
#[derive(Parser, Debug)]
#[command(name = "ef", about = "Checks that the \".ef\" file (and \".offsets\" if present) is consistent with the graph.", long_about = None)]
pub struct CliArgs {
pub basename: PathBuf,
}
pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
match get_endianness(&args.basename)?.as_str() {
#[cfg(feature = "be_bins")]
BE::NAME => check_ef::<BE>(global_args, args),
#[cfg(feature = "le_bins")]
LE::NAME => check_ef::<LE>(global_args, args),
e => panic!("Unknown endianness: {}", e),
}
}
pub fn check_ef<E: Endianness + 'static>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
where
MmapHelper<u32>: CodesReaderFactoryHelper<E>,
for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek,
{
let properties_path = args.basename.with_extension(PROPERTIES_EXTENSION);
let f = File::open(&properties_path).with_context(|| {
format!(
"Could not load properties file: {}",
properties_path.display()
)
})?;
let map = java_properties::read(BufReader::new(f))?;
let num_nodes = map.get("nodes").unwrap().parse::<usize>()?;
let of_file_path = args.basename.with_extension(OFFSETS_EXTENSION);
let ef = unsafe { EF::mmap(args.basename.with_extension(EF_EXTENSION), Flags::default()) }?;
let ef = ef.uncase();
let mut pl = ProgressLogger::default();
pl.display_memory(true)
.item_name("offset")
.expected_updates(Some(num_nodes));
if let Some(duration) = &global_args.log_interval {
pl.log_interval(*duration);
}
if of_file_path.exists() {
let mut reader = buf_bit_reader::from_path::<BE, u32>(of_file_path)?;
pl.start("Checking offsets file against Elias-Fano...");
let mut offset = 0;
for node_id in 0..num_nodes + 1 {
offset += reader.read_gamma()?;
let ef_res = ef.get(node_id as _);
assert_eq!(offset, ef_res as u64, "node_id: {}", node_id);
pl.light_update();
}
} else {
info!("No offsets file, checking against graph file only");
}
let mut pl = ProgressLogger::default();
pl.display_memory(true)
.item_name("offset")
.expected_updates(Some(num_nodes));
if let Some(duration) = global_args.log_interval {
pl.log_interval(duration);
}
let seq_graph =
webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.basename)
.endianness::<E>()
.load()?;
pl.start("Checking graph against Elias-Fano...");
for (node, (new_offset, _degree)) in seq_graph.offset_deg_iter().enumerate() {
let ef_res = ef.get(node as _);
assert_eq!(new_offset, ef_res as u64, "node_id: {}", node);
pl.light_update();
}
pl.done();
Ok(())
}