Skip to main content

webgraph/graphs/bvgraph/
load.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use super::*;
9use crate::prelude::*;
10use anyhow::{Context, Result};
11use dsi_bitstream::prelude::*;
12use dsi_bitstream::{dispatch::code_consts, dispatch::factory::CodesReaderFactoryHelper};
13use epserde::deser::Owned;
14use epserde::prelude::*;
15use sealed::sealed;
16use std::{
17    io::BufReader,
18    path::{Path, PathBuf},
19};
20
21/// Sequential or random access.
22#[doc(hidden)]
23#[sealed]
24pub trait Access: 'static {}
25
26#[derive(Debug, Clone)]
27pub struct Sequential {}
28#[sealed]
29impl Access for Sequential {}
30
31#[derive(Debug, Clone)]
32pub struct Random {}
33#[sealed]
34impl Access for Random {}
35
36/// [`Static`] or [`Dynamic`] dispatch.
37#[sealed]
38pub trait Dispatch: 'static {}
39
40/// Static dispatch.
41///
42/// You have to specify all codes used of the graph. The defaults
43/// are the same as the default parameters of the Java version.
44#[derive(Debug, Clone)]
45pub struct Static<
46    const OUTDEGREES: usize = { code_consts::GAMMA },
47    const REFERENCES: usize = { code_consts::UNARY },
48    const BLOCKS: usize = { code_consts::GAMMA },
49    const INTERVALS: usize = { code_consts::GAMMA },
50    const RESIDUALS: usize = { code_consts::ZETA3 },
51> {}
52
53#[sealed]
54impl<
55    const OUTDEGREES: usize,
56    const REFERENCES: usize,
57    const BLOCKS: usize,
58    const INTERVALS: usize,
59    const RESIDUALS: usize,
60> Dispatch for Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>
61{
62}
63
64/// Dynamic dispatch.
65///
66/// Parameters are retrieved from the graph properties.
67#[derive(Debug, Clone)]
68pub struct Dynamic {}
69
70#[sealed]
71impl Dispatch for Dynamic {}
72
73/// Load mode.
74///
75/// The load mode is the way the graph data is accessed. Each load mode has
76/// a corresponding strategy to access the graph and the offsets.
77///
78/// You can set both modes with [`LoadConfig::mode`], or set them separately with
79/// [`LoadConfig::graph_mode`] and [`LoadConfig::offsets_mode`].
80#[sealed]
81pub trait LoadMode: 'static {
82    type Factory<E: Endianness>;
83
84    fn new_factory<E: Endianness, P: AsRef<Path>>(
85        graph: P,
86        flags: codecs::MemoryFlags,
87    ) -> Result<Self::Factory<E>>;
88
89    type Offsets: Offsets;
90
91    fn load_offsets<P: AsRef<Path>>(
92        offsets: P,
93        flags: MemoryFlags,
94    ) -> Result<MemCase<Self::Offsets>>;
95}
96
97/// A type alias for a buffered reader that reads from a memory buffer a `u32` at a time.
98pub type MemBufReader<'a, E> = BufBitReader<E, MemWordReader<u32, &'a [u32]>>;
99/// A type alias for a buffered reader that reads from a file buffer a `u32` at a time.
100pub type FileBufReader<E> = BufBitReader<E, WordAdapter<u32, BufReader<std::fs::File>>>;
101/// A type alias for the [`CodesReaderFactory`] associated with a [`LoadMode`].
102///
103/// This type can be used in client methods that abstract over endianness to
104/// impose the necessary trait bounds on the factory associated with the load
105/// mode: one has just to write, for example, for the [`Mmap`] load mode:
106/// ```ignore
107/// LoadModeFactory<E, Mmap>: CodesReaderFactoryHelper<E>
108/// ```
109///
110/// Additional trait bounds on the [`CodesRead`] associated with the factory
111/// can be imposed by using the [`LoadModeCodesReader`] type alias.
112pub type LoadModeFactory<E, LM> = <LM as LoadMode>::Factory<E>;
113/// A type alias for the code reader returned by the [`CodesReaderFactory`]
114/// associated with a [`LoadMode`].
115///
116/// This type can be used in client methods that abstract over endianness to
117/// impose bounds on the code reader associated to the factory associated with
118/// the load mode, usually in conjunction with [`LoadModeFactory`]. For example,
119/// for the [`Mmap`] load mode:
120/// ```ignore
121/// LoadModeFactory<E, Mmap>: CodesReaderFactoryHelper<E>
122/// LoadModeCodesReader<'a, E, Mmap>: BitSeek
123/// ```
124pub type LoadModeCodesReader<'a, E, LM> =
125    <LoadModeFactory<E, LM> as CodesReaderFactory<E>>::CodesReader<'a>;
126
127/// The graph is read from a file; offsets are fully deserialized in memory.
128///
129/// Note that you must guarantee that the graph file is padded with enough
130/// zeroes so that it can be read one `u32` at a time.
131#[derive(Debug, Clone)]
132pub struct File {}
133#[sealed]
134impl LoadMode for File {
135    type Factory<E: Endianness> = FileFactory<E>;
136    type Offsets = Owned<EF>;
137
138    fn new_factory<E: Endianness, P: AsRef<Path>>(
139        graph: P,
140        _flags: MemoryFlags,
141    ) -> Result<Self::Factory<E>> {
142        FileFactory::<E>::new(graph)
143    }
144
145    fn load_offsets<P: AsRef<Path>>(
146        offsets: P,
147        _flags: MemoryFlags,
148    ) -> Result<MemCase<Self::Offsets>> {
149        let path = offsets.as_ref();
150        unsafe {
151            EF::load_full(path)
152                .with_context(|| format!("Cannot load Elias-Fano pointer list {}", path.display()))
153                .map(Into::into)
154        }
155    }
156}
157
158/// The graph and offsets are memory mapped.
159///
160/// This is the default mode. You can [set memory-mapping flags](LoadConfig::flags).
161#[derive(Debug, Clone)]
162pub struct Mmap {}
163#[sealed]
164impl LoadMode for Mmap {
165    type Factory<E: Endianness> = MmapHelper<u32>;
166    type Offsets = EF;
167
168    fn new_factory<E: Endianness, P: AsRef<Path>>(
169        graph: P,
170        flags: MemoryFlags,
171    ) -> Result<Self::Factory<E>> {
172        MmapHelper::mmap(graph, flags.into())
173    }
174
175    fn load_offsets<P: AsRef<Path>>(
176        offsets: P,
177        flags: MemoryFlags,
178    ) -> Result<MemCase<Self::Offsets>> {
179        let path = offsets.as_ref();
180        unsafe {
181            EF::mmap(path, flags.into())
182                .with_context(|| format!("Cannot map Elias-Fano pointer list {}", path.display()))
183        }
184    }
185}
186
187/// The graph and offsets are loaded into allocated memory.
188#[derive(Debug, Clone)]
189pub struct LoadMem {}
190#[sealed]
191impl LoadMode for LoadMem {
192    type Factory<E: Endianness> = MemoryFactory<E, Box<[u32]>>;
193    type Offsets = EF;
194
195    fn new_factory<E: Endianness, P: AsRef<Path>>(
196        graph: P,
197        _flags: MemoryFlags,
198    ) -> Result<Self::Factory<E>> {
199        MemoryFactory::<E, _>::new_mem(graph)
200    }
201
202    fn load_offsets<P: AsRef<Path>>(
203        offsets: P,
204        _flags: MemoryFlags,
205    ) -> Result<MemCase<Self::Offsets>> {
206        let path = offsets.as_ref();
207        unsafe {
208            EF::load_mem(path)
209                .with_context(|| format!("Cannot load Elias-Fano pointer list {}", path.display()))
210        }
211    }
212}
213
214/// The graph and offsets are loaded into memory obtained via `mmap()`.
215///
216/// You can [set memory-mapping flags](LoadConfig::flags).
217#[derive(Debug, Clone)]
218pub struct LoadMmap {}
219#[sealed]
220impl LoadMode for LoadMmap {
221    type Factory<E: Endianness> = MemoryFactory<E, MmapHelper<u32>>;
222    type Offsets = EF;
223
224    fn new_factory<E: Endianness, P: AsRef<Path>>(
225        graph: P,
226        flags: MemoryFlags,
227    ) -> Result<Self::Factory<E>> {
228        MemoryFactory::<E, _>::new_mmap(graph, flags)
229    }
230
231    fn load_offsets<P: AsRef<Path>>(
232        offsets: P,
233        flags: MemoryFlags,
234    ) -> Result<MemCase<Self::Offsets>> {
235        let path = offsets.as_ref();
236        unsafe {
237            EF::load_mmap(path, flags.into())
238                .with_context(|| format!("Cannot load Elias-Fano pointer list {}", path.display()))
239        }
240    }
241}
242
243/// A load configuration for a [`BvGraph`]/[`BvGraphSeq`].
244///
245/// A basic configuration is returned by
246/// [`BvGraph::with_basename`]/[`BvGraphSeq::with_basename`]. The configuration
247/// can then be customized using the setter methods of this struct, chained in
248/// builder style, and finalized by calling [`load`](LoadConfig::load).
249///
250/// # Defaults
251///
252/// The default configuration returned by `with_basename` uses:
253/// - big endianness ([`BE`]);
254/// - [dynamic dispatch](`Dynamic`);
255/// - [memory mapping](`Mmap`) for both the graph and the offsets.
256///
257/// # Configuration Axes
258///
259/// ## Access Mode
260///
261/// - [`BvGraph::with_basename`] returns a configuration for **random access**,
262///   which requires the Elias–Fano offsets file (`.ef`). The resulting graph
263///   supports both random access and sequential iteration.
264/// - [`BvGraphSeq::with_basename`] returns a configuration for **sequential
265///   access**, which only needs the graph file (`.graph`). The resulting graph
266///   supports only sequential iteration.
267///
268/// ## Endianness
269///
270/// - [`endianness`](LoadConfig::endianness): sets the endianness of the graph
271///   file. Use `endianness::<BE>()` for big-endian (the default and the Java
272///   convention) or `endianness::<LE>()` for little-endian.
273///
274/// ## Code Dispatch
275///
276/// - [`dispatch`](LoadConfig::dispatch): chooses between:
277///   - [`Dynamic`] (default): reads the codes from the properties file;
278///     slightly slower due to indirect dispatch, but works with any graph.
279///   - [`Static`]: the codes are fixed at compile time via const generics,
280///     enabling more aggressive optimization. The defaults match the Java
281///     defaults (γ for outdegrees, unary for references, γ for blocks, γ for
282///     intervals, ζ₃ for residuals). If your graph uses non-default codes,
283///     you must specify them explicitly.
284///
285/// ## Load Mode
286///
287/// Controls how the graph bitstream and the offsets are accessed.
288///
289/// - [`mode`](LoadConfig::mode): sets the load mode for **both** the graph
290///   and the offsets. You can also set them independently:
291///   - [`graph_mode`](LoadConfig::graph_mode): sets the mode for the graph
292///     only;
293///   - [`offsets_mode`](LoadConfig::offsets_mode): sets the mode for the
294///     offsets only (random access only).
295///
296/// The available modes are:
297///
298/// - [`Mmap`] (default): memory maps the file. This is the most
299///   memory-efficient mode, as the OS manages paging. It is the recommended
300///   mode for large graphs.
301/// - [`LoadMem`]: reads the file into allocated memory.
302/// - [`LoadMmap`]: reads the file into memory obtained via `mmap`, rather than
303///   the standard allocator.
304/// - [`File`]: reads the graph from a file stream. The offsets are fully
305///   deserialized in memory using [ε-serde]'s
306///   [`load_full`](epserde::deser::Deserialize::load_full). Note that the
307///   graph file must be padded correctly for this mode.
308///
309/// ## Memory flags
310///
311/// When using [`Mmap`] or [`LoadMmap`], you can set [`MemoryFlags`] to
312/// request transparent huge pages, etc.:
313///
314/// - [`flags`](LoadConfig::flags): sets flags for both the graph and offsets.
315/// - [`graph_flags`](LoadConfig::graph_flags): sets flags for the graph only.
316/// - [`offsets_flags`](LoadConfig::offsets_flags): sets flags for the offsets
317///   only (random access only).
318///
319/// # Examples
320///
321/// Load with all defaults (big-endian, dynamic dispatch, memory-mapped):
322/// ```ignore
323/// let graph = BvGraph::with_basename("BASENAME").load()?;
324/// ```
325///
326/// Load a little-endian graph:
327/// ```ignore
328/// let graph = BvGraph::with_basename("BASENAME")
329///     .endianness::<LE>()
330///     .load()?;
331/// ```
332///
333/// Load with static dispatch (using default codes):
334/// ```ignore
335/// let graph = BvGraph::with_basename("BASENAME")
336///     .dispatch::<Static>()
337///     .load()?;
338/// ```
339///
340/// Load into memory rather than memory-mapping:
341/// ```ignore
342/// let graph = BvGraph::with_basename("BASENAME")
343///     .mode::<LoadMem>()
344///     .load()?;
345/// ```
346///
347/// Load a sequential-access graph (no `.ef` file needed):
348/// ```ignore
349/// let graph = BvGraphSeq::with_basename("BASENAME").load()?;
350/// ```
351///
352/// Combine options:
353/// ```ignore
354/// let graph = BvGraph::with_basename("BASENAME")
355///     .endianness::<LE>()
356///     .dispatch::<Static>()
357///     .mode::<LoadMem>()
358///     .load()?;
359/// ```
360///
361/// [ε-serde]: <https://docs.rs/epserde/latest/epserde/>
362#[derive(Debug, Clone)]
363pub struct LoadConfig<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode> {
364    pub(crate) basename: PathBuf,
365    pub(crate) graph_load_flags: MemoryFlags,
366    pub(crate) offsets_load_flags: MemoryFlags,
367    pub(crate) _marker: std::marker::PhantomData<(E, A, D, GLM, OLM)>,
368}
369
370impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
371    LoadConfig<E, A, D, GLM, OLM>
372{
373    /// Set the endianness of the graph and offsets file.
374    pub fn endianness<E2: Endianness>(self) -> LoadConfig<E2, A, D, GLM, OLM>
375    where
376        GLM: LoadMode,
377        OLM: LoadMode,
378    {
379        LoadConfig {
380            basename: self.basename,
381            graph_load_flags: self.graph_load_flags,
382            offsets_load_flags: self.offsets_load_flags,
383            _marker: std::marker::PhantomData,
384        }
385    }
386}
387
388impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
389    LoadConfig<E, A, D, GLM, OLM>
390{
391    /// Choose between [`Static`] and [`Dynamic`] dispatch.
392    pub fn dispatch<D2: Dispatch>(self) -> LoadConfig<E, A, D2, GLM, OLM> {
393        LoadConfig {
394            basename: self.basename,
395            graph_load_flags: self.graph_load_flags,
396            offsets_load_flags: self.offsets_load_flags,
397            _marker: std::marker::PhantomData,
398        }
399    }
400}
401
402impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
403    LoadConfig<E, A, D, GLM, OLM>
404{
405    /// Choose the [`LoadMode`] for the graph and offsets.
406    pub fn mode<LM: LoadMode>(self) -> LoadConfig<E, A, D, LM, LM> {
407        LoadConfig {
408            basename: self.basename,
409            graph_load_flags: self.graph_load_flags,
410            offsets_load_flags: self.offsets_load_flags,
411            _marker: std::marker::PhantomData,
412        }
413    }
414}
415
416impl<E: Endianness, A: Access, D: Dispatch> LoadConfig<E, A, D, Mmap, Mmap> {
417    /// Set flags for memory-mapping (both graph and offsets).
418    pub fn flags(self, flags: MemoryFlags) -> LoadConfig<E, A, D, Mmap, Mmap> {
419        LoadConfig {
420            basename: self.basename,
421            graph_load_flags: flags,
422            offsets_load_flags: flags,
423            _marker: std::marker::PhantomData,
424        }
425    }
426}
427
428impl<E: Endianness, A: Access, D: Dispatch> LoadConfig<E, A, D, LoadMmap, LoadMmap> {
429    /// Set flags for memory obtained from `mmap()` (both graph and offsets).
430    pub fn flags(self, flags: MemoryFlags) -> LoadConfig<E, A, D, LoadMmap, LoadMmap> {
431        LoadConfig {
432            basename: self.basename,
433            graph_load_flags: flags,
434            offsets_load_flags: flags,
435            _marker: std::marker::PhantomData,
436        }
437    }
438}
439
440impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
441    LoadConfig<E, A, D, GLM, OLM>
442{
443    /// Choose the [`LoadMode`] for the graph only.
444    pub fn graph_mode<NGLM: LoadMode>(self) -> LoadConfig<E, A, D, NGLM, OLM> {
445        LoadConfig {
446            basename: self.basename,
447            graph_load_flags: self.graph_load_flags,
448            offsets_load_flags: self.offsets_load_flags,
449            _marker: std::marker::PhantomData,
450        }
451    }
452}
453
454impl<E: Endianness, A: Access, D: Dispatch, OLM: LoadMode> LoadConfig<E, A, D, Mmap, OLM> {
455    /// Set flags for memory-mapping the graph.
456    pub fn graph_flags(self, flags: MemoryFlags) -> LoadConfig<E, A, D, Mmap, OLM> {
457        LoadConfig {
458            basename: self.basename,
459            graph_load_flags: flags,
460            offsets_load_flags: self.offsets_load_flags,
461            _marker: std::marker::PhantomData,
462        }
463    }
464}
465
466impl<E: Endianness, A: Access, D: Dispatch, OLM: LoadMode> LoadConfig<E, A, D, LoadMmap, OLM> {
467    /// Set flags for memory obtained from `mmap()` for the graph.
468    pub fn graph_flags(self, flags: MemoryFlags) -> LoadConfig<E, A, D, LoadMmap, OLM> {
469        LoadConfig {
470            basename: self.basename,
471            graph_load_flags: flags,
472            offsets_load_flags: self.offsets_load_flags,
473            _marker: std::marker::PhantomData,
474        }
475    }
476}
477
478impl<E: Endianness, D: Dispatch, GLM: LoadMode, OLM: LoadMode> LoadConfig<E, Random, D, GLM, OLM> {
479    /// Choose the [`LoadMode`] for the offsets only.
480    pub fn offsets_mode<NOLM: LoadMode>(self) -> LoadConfig<E, Random, D, GLM, NOLM> {
481        LoadConfig {
482            basename: self.basename,
483            graph_load_flags: self.graph_load_flags,
484            offsets_load_flags: self.offsets_load_flags,
485            _marker: std::marker::PhantomData,
486        }
487    }
488}
489
490impl<E: Endianness, D: Dispatch, GLM: LoadMode> LoadConfig<E, Random, D, GLM, Mmap> {
491    /// Set flags for memory-mapping the offsets.
492    pub fn offsets_flags(self, flags: MemoryFlags) -> LoadConfig<E, Random, D, GLM, Mmap> {
493        LoadConfig {
494            basename: self.basename,
495            graph_load_flags: self.graph_load_flags,
496            offsets_load_flags: flags,
497            _marker: std::marker::PhantomData,
498        }
499    }
500}
501
502impl<E: Endianness, D: Dispatch, GLM: LoadMode> LoadConfig<E, Random, D, GLM, LoadMmap> {
503    /// Set flags for memory obtained from `mmap()` for the graph.
504    pub fn offsets_flags(self, flags: MemoryFlags) -> LoadConfig<E, Random, D, GLM, LoadMmap> {
505        LoadConfig {
506            basename: self.basename,
507            graph_load_flags: self.graph_load_flags,
508            offsets_load_flags: flags,
509            _marker: std::marker::PhantomData,
510        }
511    }
512}
513
514impl<E: Endianness, GLM: LoadMode, OLM: LoadMode> LoadConfig<E, Random, Dynamic, GLM, OLM> {
515    /// Load a random-access graph with dynamic dispatch.
516    pub fn load(
517        mut self,
518    ) -> anyhow::Result<BvGraph<DynCodesDecoderFactory<E, GLM::Factory<E>, OLM::Offsets>>>
519    where
520        <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
521        for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E> + BitSeek,
522    {
523        warn_if_ef_stale(&self.basename);
524        self.basename.set_extension(PROPERTIES_EXTENSION);
525        let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)
526            .with_context(|| {
527                format!("Could not load properties file {}", self.basename.display())
528            })?;
529        self.basename.set_extension(GRAPH_EXTENSION);
530        let factory = GLM::new_factory(&self.basename, self.graph_load_flags)
531            .with_context(|| format!("Could not load graph file {}", self.basename.display()))?;
532        self.basename.set_extension(EF_EXTENSION);
533        let offsets = OLM::load_offsets(&self.basename, self.offsets_load_flags)
534            .with_context(|| format!("Could not load offsets file {}", self.basename.display()))?;
535
536        Ok(BvGraph::new(
537            DynCodesDecoderFactory::new(factory, offsets, comp_flags)?,
538            num_nodes,
539            num_arcs,
540            comp_flags.compression_window,
541            comp_flags.min_interval_length,
542        ))
543    }
544}
545
546impl<E: Endianness, GLM: LoadMode, OLM: LoadMode> LoadConfig<E, Sequential, Dynamic, GLM, OLM> {
547    /// Load a sequential graph with dynamic dispatch.
548    pub fn load(
549        mut self,
550    ) -> anyhow::Result<
551        BvGraphSeq<DynCodesDecoderFactory<E, GLM::Factory<E>, Owned<EmptyDict<usize, usize>>>>,
552    >
553    where
554        <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
555        for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E>,
556    {
557        self.basename.set_extension(PROPERTIES_EXTENSION);
558        let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)?;
559        self.basename.set_extension(GRAPH_EXTENSION);
560        let factory = GLM::new_factory(&self.basename, self.graph_load_flags)?;
561
562        Ok(BvGraphSeq::new(
563            DynCodesDecoderFactory::new(factory, EmptyDict::default().into(), comp_flags)?,
564            num_nodes,
565            Some(num_arcs),
566            comp_flags.compression_window,
567            comp_flags.min_interval_length,
568        ))
569    }
570}
571
572impl<
573    E: Endianness,
574    GLM: LoadMode,
575    OLM: LoadMode,
576    const OUTDEGREES: usize,
577    const REFERENCES: usize,
578    const BLOCKS: usize,
579    const INTERVALS: usize,
580    const RESIDUALS: usize,
581> LoadConfig<E, Random, Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>, GLM, OLM>
582{
583    /// Load a random-access graph with static dispatch.
584    pub fn load(
585        mut self,
586    ) -> anyhow::Result<
587        BvGraph<
588            ConstCodesDecoderFactory<
589                E,
590                GLM::Factory<E>,
591                OLM::Offsets,
592                OUTDEGREES,
593                REFERENCES,
594                BLOCKS,
595                INTERVALS,
596                RESIDUALS,
597            >,
598        >,
599    >
600    where
601        <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
602        for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E> + BitSeek,
603    {
604        warn_if_ef_stale(&self.basename);
605        self.basename.set_extension(PROPERTIES_EXTENSION);
606        let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)?;
607        self.basename.set_extension(GRAPH_EXTENSION);
608        let factory = GLM::new_factory(&self.basename, self.graph_load_flags)?;
609        self.basename.set_extension(EF_EXTENSION);
610        let offsets = OLM::load_offsets(&self.basename, self.offsets_load_flags)?;
611
612        Ok(BvGraph::new(
613            ConstCodesDecoderFactory::new(factory, offsets, comp_flags)?,
614            num_nodes,
615            num_arcs,
616            comp_flags.compression_window,
617            comp_flags.min_interval_length,
618        ))
619    }
620}
621
622impl<
623    E: Endianness,
624    GLM: LoadMode,
625    OLM: LoadMode,
626    const OUTDEGREES: usize,
627    const REFERENCES: usize,
628    const BLOCKS: usize,
629    const INTERVALS: usize,
630    const RESIDUALS: usize,
631>
632    LoadConfig<
633        E,
634        Sequential,
635        Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>,
636        GLM,
637        OLM,
638    >
639{
640    /// Load a sequential graph with static dispatch.
641    pub fn load(
642        mut self,
643    ) -> anyhow::Result<
644        BvGraphSeq<
645            ConstCodesDecoderFactory<
646                E,
647                GLM::Factory<E>,
648                Owned<EmptyDict<usize, usize>>,
649                OUTDEGREES,
650                REFERENCES,
651                BLOCKS,
652                INTERVALS,
653                RESIDUALS,
654            >,
655        >,
656    >
657    where
658        <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
659        for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E>,
660    {
661        self.basename.set_extension(PROPERTIES_EXTENSION);
662        let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)?;
663        self.basename.set_extension(GRAPH_EXTENSION);
664        let factory = GLM::new_factory(&self.basename, self.graph_load_flags)?;
665
666        Ok(BvGraphSeq::new(
667            ConstCodesDecoderFactory::new(factory, EmptyDict::default().into(), comp_flags)?,
668            num_nodes,
669            Some(num_arcs),
670            comp_flags.compression_window,
671            comp_flags.min_interval_length,
672        ))
673    }
674}
675
676/// Checks if the `.ef` file is older than the .graph file and log a warning if so.
677///
678/// This is important because if the graph has been recompressed, the `.ef` file
679/// will be stale and needs to be rebuilt. This is a very common scenario, in
680/// particular when testing compression techniques.
681fn warn_if_ef_stale(basename: &Path) {
682    if std::env::var_os("DO_NOT_CHECK_MOD_TIMES").is_some() {
683        return;
684    }
685    let graph_path = basename.with_extension(GRAPH_EXTENSION);
686    let ef_path = basename.with_extension(EF_EXTENSION);
687
688    let graph_modified = match std::fs::metadata(&graph_path).and_then(|m| m.modified()) {
689        Ok(t) => t,
690        Err(_) => return, // Can't check, skip warning
691    };
692
693    let ef_modified = match std::fs::metadata(&ef_path).and_then(|m| m.modified()) {
694        Ok(t) => t,
695        Err(_) => return, // Can't check, skip warning
696    };
697
698    if ef_modified < graph_modified {
699        log::warn!(
700            "The Elias-Fano file {} is older than the graph file {}; \
701             this may indicate that the graph has been modified and the .ef file is stale. \
702             Consider rebuilding it with \"webgraph build ef {}\", just touch it if this warning is spurious, \
703             or set the environment variable DO_NOT_CHECK_MOD_TIMES to disable this check.",
704            ef_path.display(),
705            graph_path.display(),
706            basename.display()
707        );
708    }
709}
710
711/// Read the .properties file and return the endianness
712pub fn get_endianness<P: AsRef<Path>>(basename: P) -> Result<String> {
713    let path = basename.as_ref().with_extension(PROPERTIES_EXTENSION);
714    let f = std::fs::File::open(&path)
715        .with_context(|| format!("Cannot open property file {}", path.display()))?;
716    let map = java_properties::read(BufReader::new(f))
717        .with_context(|| format!("cannot parse {} as a java properties file", path.display()))?;
718
719    let endianness = map
720        .get("endianness")
721        .map(|x| x.to_string())
722        .unwrap_or_else(|| BigEndian::NAME.to_string());
723
724    Ok(endianness)
725}
726
727/// Read the .properties file and return the number of nodes, number of arcs and compression flags
728/// for the graph. The endianness is checked against the expected one.
729pub fn parse_properties<E: Endianness>(path: impl AsRef<Path>) -> Result<(usize, u64, CompFlags)> {
730    let name = path.as_ref().display();
731    let f =
732        std::fs::File::open(&path).with_context(|| format!("Cannot open property file {name}"))?;
733    let map = java_properties::read(BufReader::new(f))
734        .with_context(|| format!("cannot parse {name} as a java properties file"))?;
735
736    let num_nodes = map
737        .get("nodes")
738        .with_context(|| format!("Missing 'nodes' property in {name}"))?
739        .parse::<usize>()
740        .with_context(|| format!("Cannot parse 'nodes' as usize in {name}"))?;
741    let num_arcs = map
742        .get("arcs")
743        .with_context(|| format!("Missing 'arcs' property in {name}"))?
744        .parse::<u64>()
745        .with_context(|| format!("Cannot parse arcs as usize in {name}"))?;
746
747    let comp_flags = CompFlags::from_properties::<E>(&map)
748        .with_context(|| format!("Cannot parse compression flags from {name}"))?;
749    Ok((num_nodes, num_arcs, comp_flags))
750}