1use crate::GlobalArgs;
9use anyhow::{Context, Result};
10use clap::Parser;
11use dsi_bitstream::dispatch::factory::CodesReaderFactoryHelper;
12use dsi_bitstream::prelude::*;
13use dsi_progress_logger::prelude::*;
14use epserde::prelude::*;
15use log::info;
16use mmap_rs::MmapFlags;
17use std::fs::File;
18use std::io::{BufReader, BufWriter, Seek};
19use std::path::PathBuf;
20use sux::prelude::*;
21use webgraph::prelude::*;
22
23#[derive(Parser, Debug, Clone)]
24#[command(name = "ef", about = "Builds the Elias-Fano representation of the offsets of a graph.", long_about = None)]
25pub struct CliArgs {
26 pub src: PathBuf,
28 pub number_of_nodes: Option<usize>,
34}
35
36pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
37 let ef_path = args.src.with_extension(EF_EXTENSION);
38 if ef_path.exists() && ef_path.metadata()?.permissions().readonly() {
41 return Err(anyhow::anyhow!(
42 "The file is not writable: {}",
43 ef_path.display()
44 ));
45 }
46
47 match get_endianness(&args.src)?.as_str() {
48 #[cfg(feature = "be_bins")]
49 BE::NAME => build_eliasfano::<BE>(global_args, args),
50 #[cfg(feature = "le_bins")]
51 LE::NAME => build_eliasfano::<LE>(global_args, args),
52 e => panic!("Unknown endianness: {}", e),
53 }
54}
55
56pub fn build_eliasfano<E: Endianness + 'static>(
57 global_args: GlobalArgs,
58 args: CliArgs,
59) -> Result<()>
60where
61 for<'a> BufBitReader<E, MemWordReader<u32, &'a [u32]>>: CodesRead<E> + BitSeek,
62{
63 let mut pl = ProgressLogger::default();
64 pl.display_memory(true).item_name("offset");
65 if let Some(duration) = &global_args.log_interval {
66 pl.log_interval(*duration);
67 }
68
69 let basename = args.src.clone();
70 if let Some(num_nodes) = args.number_of_nodes {
71 pl.expected_updates(Some(num_nodes));
72 let of_file_path = basename.with_extension(LABELOFFSETS_EXTENSION);
74 if of_file_path.exists() {
75 let labels_path = basename.with_extension(LABELS_EXTENSION);
76 let mut file = File::open(&labels_path)
77 .with_context(|| format!("Could not open {}", labels_path.display()))?;
78 let file_len = 8 * file
79 .seek(std::io::SeekFrom::End(0))
80 .with_context(|| format!("Could not seek to end of {}", labels_path.display()))?;
81
82 let mut efb = EliasFanoBuilder::new(num_nodes + 1, file_len as usize);
83
84 info!("The offsets file exists, reading it to build Elias-Fano");
85
86 let of = <MmapHelper<u32>>::mmap(of_file_path, MmapFlags::SEQUENTIAL)?;
87 build_eliasfano_from_offsets(
88 &global_args,
89 &args,
90 num_nodes,
91 of.new_reader(),
92 &mut pl,
93 &mut efb,
94 )?;
95 return serialize_eliasfano(&global_args, &args, efb, &mut pl);
96 }
97 }
98
99 let of_file_path = basename.with_extension(OFFSETS_EXTENSION);
101
102 let graph_path = basename.with_extension(GRAPH_EXTENSION);
103 info!("Getting size of graph at '{}'", graph_path.display());
104 let mut file = File::open(&graph_path)
105 .with_context(|| format!("Could not open {}", graph_path.display()))?;
106 let file_len = 8 * file
107 .seek(std::io::SeekFrom::End(0))
108 .with_context(|| format!("Could not seek in {}", graph_path.display()))?;
109 info!("Graph file size: {} bits", file_len);
110
111 let num_nodes = args
116 .number_of_nodes
117 .map(Ok::<_, anyhow::Error>)
118 .unwrap_or_else(|| {
119 let properties_path = basename.with_extension(PROPERTIES_EXTENSION);
120 info!(
121 "Reading num_of_nodes from properties file at '{}'",
122 properties_path.display()
123 );
124 let f = File::open(&properties_path).with_context(|| {
125 format!(
126 "Could not open properties file: {}",
127 properties_path.display()
128 )
129 })?;
130 let map = java_properties::read(BufReader::new(f))?;
131 Ok(map.get("nodes").unwrap().parse::<usize>()?)
132 })?;
133 pl.expected_updates(Some(num_nodes));
134
135 let mut efb = EliasFanoBuilder::new(num_nodes + 1, file_len as usize);
136
137 info!("Checking if offsets exists at '{}'", of_file_path.display());
138 if of_file_path.exists() {
140 info!("The offsets file exists, reading it to build Elias-Fano");
141 let of = <MmapHelper<u32>>::mmap(of_file_path, MmapFlags::SEQUENTIAL)?;
142 build_eliasfano_from_offsets(
143 &global_args,
144 &args,
145 num_nodes,
146 of.new_reader(),
147 &mut pl,
148 &mut efb,
149 )?;
150 } else {
151 build_eliasfano_from_graph(&args, &mut pl, &mut efb)?;
152 }
153
154 serialize_eliasfano(&global_args, &args, efb, &mut pl)
155}
156
157pub fn build_eliasfano_from_graph(
158 args: &CliArgs,
159 pl: &mut impl ProgressLog,
160 efb: &mut EliasFanoBuilder,
161) -> Result<()> {
162 info!("The offsets file does not exists, reading the graph to build Elias-Fano");
163 match get_endianness(&args.src)?.as_str() {
164 #[cfg(feature = "be_bins")]
165 BE::NAME => build_eliasfano_from_graph_with_endianness::<BE>(args, pl, efb),
166 #[cfg(feature = "le_bins")]
167 LE::NAME => build_eliasfano_from_graph_with_endianness::<LE>(args, pl, efb),
168 e => panic!("Unknown endianness: {}", e),
169 }
170}
171
172pub fn build_eliasfano_from_offsets<E: Endianness>(
173 _global_args: &GlobalArgs,
174 _args: &CliArgs,
175 num_nodes: usize,
176 mut reader: impl GammaRead<E>,
177 pl: &mut impl ProgressLog,
178 efb: &mut EliasFanoBuilder,
179) -> Result<()> {
180 info!("Building Elias-Fano from offsets...");
181
182 pl.start("Translating offsets to EliasFano...");
184 let mut offset = 0;
186 for _node_id in 0..num_nodes + 1 {
187 offset += reader.read_gamma().context("Could not read gamma")?;
189 efb.push(offset as _);
190 pl.light_update();
192 }
193 pl.done();
194 Ok(())
195}
196
197pub fn build_eliasfano_from_graph_with_endianness<E: Endianness>(
198 args: &CliArgs,
199 pl: &mut impl ProgressLog,
200 efb: &mut EliasFanoBuilder,
201) -> Result<()>
202where
203 MmapHelper<u32>: CodesReaderFactoryHelper<E>,
204 for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek,
205{
206 let seq_graph = webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
207 .endianness::<E>()
208 .load()
209 .with_context(|| format!("Could not load graph at {}", args.src.display()))?;
210 pl.start("Building EliasFano...");
213 let mut iter = seq_graph.offset_deg_iter();
215 for (new_offset, _degree) in iter.by_ref() {
216 efb.push(new_offset as _);
218 pl.light_update();
220 }
221 efb.push(iter.get_pos() as _);
222 Ok(())
223}
224
225pub fn serialize_eliasfano(
226 global_args: &GlobalArgs,
227 args: &CliArgs,
228 efb: EliasFanoBuilder,
229 pl: &mut impl ProgressLog,
230) -> Result<()> {
231 let ef = efb.build();
232 pl.done();
233
234 let mut pl = ProgressLogger::default();
235 pl.display_memory(true);
236 if let Some(duration) = &global_args.log_interval {
237 pl.log_interval(*duration);
238 }
239 pl.start("Building the Index over the ones in the high-bits...");
240 let ef: EF = unsafe { ef.map_high_bits(SelectAdaptConst::<_, _, 12, 4>::new) };
241 pl.done();
242
243 let mut pl = ProgressLogger::default();
244 pl.display_memory(true);
245 if let Some(duration) = &global_args.log_interval {
246 pl.log_interval(*duration);
247 }
248 pl.start("Writing to disk...");
249
250 let ef_path = args.src.with_extension(EF_EXTENSION);
251 info!("Creating Elias-Fano at '{}'", ef_path.display());
252 let mut ef_file = BufWriter::new(
253 File::create(&ef_path)
254 .with_context(|| format!("Could not create {}", ef_path.display()))?,
255 );
256
257 unsafe {
259 ef.serialize(&mut ef_file)
260 .with_context(|| format!("Could not serialize EliasFano to {}", ef_path.display()))
261 }?;
262
263 pl.done();
264 Ok(())
265}