1use crate::GlobalArgs;
9use anyhow::{Context, Result};
10use clap::Parser;
11use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12use dsi_bitstream::prelude::*;
13use dsi_progress_logger::prelude::*;
14use epserde::prelude::*;
15use log::info;
16use std::fs::File;
17use std::io::BufReader;
18use std::path::PathBuf;
19use sux::traits::IndexedSeq;
20use webgraph::graphs::bvgraph::get_endianness;
21use webgraph::graphs::bvgraph::{EF, EF_EXTENSION, OFFSETS_EXTENSION, PROPERTIES_EXTENSION};
22use webgraph::prelude::*;
23
24#[derive(Parser, Debug)]
25#[command(name = "ef", about = "Checks that the \".ef\" file (and \".offsets\" if present) is consistent with the graph.", long_about = None)]
26pub struct CliArgs {
27 pub basename: PathBuf,
29}
30
31pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
32 match get_endianness(&args.basename)?.as_str() {
33 #[cfg(feature = "be_bins")]
34 BE::NAME => check_ef::<BE>(global_args, args),
35 #[cfg(feature = "le_bins")]
36 LE::NAME => check_ef::<LE>(global_args, args),
37 e => panic!("Unknown endianness: {}", e),
38 }
39}
40
41pub fn check_ef<E: Endianness + 'static>(global_args: GlobalArgs, args: CliArgs) -> Result<()>
42where
43 MmapHelper<u32>: CodesReaderFactoryHelper<E>,
44 for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek,
45{
46 let properties_path = args.basename.with_extension(PROPERTIES_EXTENSION);
47 let f = File::open(&properties_path).with_context(|| {
48 format!(
49 "Could not load properties file: {}",
50 properties_path.display()
51 )
52 })?;
53 let map = java_properties::read(BufReader::new(f))?;
54 let num_nodes = map.get("nodes").unwrap().parse::<usize>()?;
55
56 let of_file_path = args.basename.with_extension(OFFSETS_EXTENSION);
58
59 let ef = unsafe { EF::mmap(args.basename.with_extension(EF_EXTENSION), Flags::default()) }?;
60 let ef = ef.uncase();
61
62 let mut pl = ProgressLogger::default();
63 pl.display_memory(true)
64 .item_name("offset")
65 .expected_updates(Some(num_nodes));
66 if let Some(duration) = &global_args.log_interval {
67 pl.log_interval(*duration);
68 }
69
70 if of_file_path.exists() {
72 let mut reader = buf_bit_reader::from_path::<BE, u32>(of_file_path)?;
74 pl.start("Checking offsets file against Elias-Fano...");
76 let mut offset = 0;
78 for node_id in 0..num_nodes + 1 {
79 offset += reader.read_gamma()?;
81 let ef_res = ef.get(node_id as _);
83 assert_eq!(offset, ef_res as u64, "node_id: {}", node_id);
84 pl.light_update();
86 }
87 } else {
88 info!("No offsets file, checking against graph file only");
89 }
90
91 let mut pl = ProgressLogger::default();
92 pl.display_memory(true)
93 .item_name("offset")
94 .expected_updates(Some(num_nodes));
95 if let Some(duration) = global_args.log_interval {
96 pl.log_interval(duration);
97 }
98
99 let seq_graph =
100 webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.basename)
101 .endianness::<E>()
102 .load()?;
103 pl.start("Checking graph against Elias-Fano...");
106 for (node, (new_offset, _degree)) in seq_graph.offset_deg_iter().enumerate() {
108 let ef_res = ef.get(node as _);
111 assert_eq!(new_offset, ef_res as u64, "node_id: {}", node);
112 pl.light_update();
113 }
114 pl.done();
115 Ok(())
116}