1use super::*;
9use crate::prelude::*;
10use anyhow::{Context, Result};
11use dsi_bitstream::prelude::*;
12use dsi_bitstream::{dispatch::code_consts, dispatch::factory::CodesReaderFactoryHelper};
13use epserde::deser::Owned;
14use epserde::prelude::*;
15use sealed::sealed;
16use std::{
17 io::BufReader,
18 path::{Path, PathBuf},
19};
20
21#[doc(hidden)]
23#[sealed]
24pub trait Access: 'static {}
25
26#[derive(Debug, Clone)]
27pub struct Sequential {}
28#[sealed]
29impl Access for Sequential {}
30
31#[derive(Debug, Clone)]
32pub struct Random {}
33#[sealed]
34impl Access for Random {}
35
36#[sealed]
38pub trait Dispatch: 'static {}
39
40#[derive(Debug, Clone)]
45pub struct Static<
46 const OUTDEGREES: usize = { code_consts::GAMMA },
47 const REFERENCES: usize = { code_consts::UNARY },
48 const BLOCKS: usize = { code_consts::GAMMA },
49 const INTERVALS: usize = { code_consts::GAMMA },
50 const RESIDUALS: usize = { code_consts::ZETA3 },
51> {}
52
53#[sealed]
54impl<
55 const OUTDEGREES: usize,
56 const REFERENCES: usize,
57 const BLOCKS: usize,
58 const INTERVALS: usize,
59 const RESIDUALS: usize,
60> Dispatch for Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>
61{
62}
63
64#[derive(Debug, Clone)]
68pub struct Dynamic {}
69
70#[sealed]
71impl Dispatch for Dynamic {}
72
73#[sealed]
81pub trait LoadMode: 'static {
82 type Factory<E: Endianness>;
83
84 fn new_factory<E: Endianness, P: AsRef<Path>>(
85 graph: P,
86 flags: codecs::MemoryFlags,
87 ) -> Result<Self::Factory<E>>;
88
89 type Offsets: Offsets;
90
91 fn load_offsets<P: AsRef<Path>>(
92 offsets: P,
93 flags: MemoryFlags,
94 ) -> Result<MemCase<Self::Offsets>>;
95}
96
97pub type MemBufReader<'a, E> = BufBitReader<E, MemWordReader<u32, &'a [u32]>>;
99pub type FileBufReader<E> = BufBitReader<E, WordAdapter<u32, BufReader<std::fs::File>>>;
101pub type LoadModeFactory<E, LM> = <LM as LoadMode>::Factory<E>;
113pub type LoadModeCodesReader<'a, E, LM> =
125 <LoadModeFactory<E, LM> as CodesReaderFactory<E>>::CodesReader<'a>;
126
127#[derive(Debug, Clone)]
132pub struct File {}
133#[sealed]
134impl LoadMode for File {
135 type Factory<E: Endianness> = FileFactory<E>;
136 type Offsets = Owned<EF>;
137
138 fn new_factory<E: Endianness, P: AsRef<Path>>(
139 graph: P,
140 _flags: MemoryFlags,
141 ) -> Result<Self::Factory<E>> {
142 FileFactory::<E>::new(graph)
143 }
144
145 fn load_offsets<P: AsRef<Path>>(
146 offsets: P,
147 _flags: MemoryFlags,
148 ) -> Result<MemCase<Self::Offsets>> {
149 let path = offsets.as_ref();
150 unsafe {
151 EF::load_full(path)
152 .with_context(|| format!("Cannot load Elias-Fano pointer list {}", path.display()))
153 .map(Into::into)
154 }
155 }
156}
157
158#[derive(Debug, Clone)]
162pub struct Mmap {}
163#[sealed]
164impl LoadMode for Mmap {
165 type Factory<E: Endianness> = MmapHelper<u32>;
166 type Offsets = EF;
167
168 fn new_factory<E: Endianness, P: AsRef<Path>>(
169 graph: P,
170 flags: MemoryFlags,
171 ) -> Result<Self::Factory<E>> {
172 MmapHelper::mmap(graph, flags.into())
173 }
174
175 fn load_offsets<P: AsRef<Path>>(
176 offsets: P,
177 flags: MemoryFlags,
178 ) -> Result<MemCase<Self::Offsets>> {
179 let path = offsets.as_ref();
180 unsafe {
181 EF::mmap(path, flags.into())
182 .with_context(|| format!("Cannot map Elias-Fano pointer list {}", path.display()))
183 }
184 }
185}
186
187#[derive(Debug, Clone)]
189pub struct LoadMem {}
190#[sealed]
191impl LoadMode for LoadMem {
192 type Factory<E: Endianness> = MemoryFactory<E, Box<[u32]>>;
193 type Offsets = EF;
194
195 fn new_factory<E: Endianness, P: AsRef<Path>>(
196 graph: P,
197 _flags: MemoryFlags,
198 ) -> Result<Self::Factory<E>> {
199 MemoryFactory::<E, _>::new_mem(graph)
200 }
201
202 fn load_offsets<P: AsRef<Path>>(
203 offsets: P,
204 _flags: MemoryFlags,
205 ) -> Result<MemCase<Self::Offsets>> {
206 let path = offsets.as_ref();
207 unsafe {
208 EF::load_mem(path)
209 .with_context(|| format!("Cannot load Elias-Fano pointer list {}", path.display()))
210 }
211 }
212}
213
214#[derive(Debug, Clone)]
218pub struct LoadMmap {}
219#[sealed]
220impl LoadMode for LoadMmap {
221 type Factory<E: Endianness> = MemoryFactory<E, MmapHelper<u32>>;
222 type Offsets = EF;
223
224 fn new_factory<E: Endianness, P: AsRef<Path>>(
225 graph: P,
226 flags: MemoryFlags,
227 ) -> Result<Self::Factory<E>> {
228 MemoryFactory::<E, _>::new_mmap(graph, flags)
229 }
230
231 fn load_offsets<P: AsRef<Path>>(
232 offsets: P,
233 flags: MemoryFlags,
234 ) -> Result<MemCase<Self::Offsets>> {
235 let path = offsets.as_ref();
236 unsafe {
237 EF::load_mmap(path, flags.into())
238 .with_context(|| format!("Cannot load Elias-Fano pointer list {}", path.display()))
239 }
240 }
241}
242
243#[derive(Debug, Clone)]
363pub struct LoadConfig<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode> {
364 pub(crate) basename: PathBuf,
365 pub(crate) graph_load_flags: MemoryFlags,
366 pub(crate) offsets_load_flags: MemoryFlags,
367 pub(crate) _marker: std::marker::PhantomData<(E, A, D, GLM, OLM)>,
368}
369
370impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
371 LoadConfig<E, A, D, GLM, OLM>
372{
373 pub fn endianness<E2: Endianness>(self) -> LoadConfig<E2, A, D, GLM, OLM>
375 where
376 GLM: LoadMode,
377 OLM: LoadMode,
378 {
379 LoadConfig {
380 basename: self.basename,
381 graph_load_flags: self.graph_load_flags,
382 offsets_load_flags: self.offsets_load_flags,
383 _marker: std::marker::PhantomData,
384 }
385 }
386}
387
388impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
389 LoadConfig<E, A, D, GLM, OLM>
390{
391 pub fn dispatch<D2: Dispatch>(self) -> LoadConfig<E, A, D2, GLM, OLM> {
393 LoadConfig {
394 basename: self.basename,
395 graph_load_flags: self.graph_load_flags,
396 offsets_load_flags: self.offsets_load_flags,
397 _marker: std::marker::PhantomData,
398 }
399 }
400}
401
402impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
403 LoadConfig<E, A, D, GLM, OLM>
404{
405 pub fn mode<LM: LoadMode>(self) -> LoadConfig<E, A, D, LM, LM> {
407 LoadConfig {
408 basename: self.basename,
409 graph_load_flags: self.graph_load_flags,
410 offsets_load_flags: self.offsets_load_flags,
411 _marker: std::marker::PhantomData,
412 }
413 }
414}
415
416impl<E: Endianness, A: Access, D: Dispatch> LoadConfig<E, A, D, Mmap, Mmap> {
417 pub fn flags(self, flags: MemoryFlags) -> LoadConfig<E, A, D, Mmap, Mmap> {
419 LoadConfig {
420 basename: self.basename,
421 graph_load_flags: flags,
422 offsets_load_flags: flags,
423 _marker: std::marker::PhantomData,
424 }
425 }
426}
427
428impl<E: Endianness, A: Access, D: Dispatch> LoadConfig<E, A, D, LoadMmap, LoadMmap> {
429 pub fn flags(self, flags: MemoryFlags) -> LoadConfig<E, A, D, LoadMmap, LoadMmap> {
431 LoadConfig {
432 basename: self.basename,
433 graph_load_flags: flags,
434 offsets_load_flags: flags,
435 _marker: std::marker::PhantomData,
436 }
437 }
438}
439
440impl<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode>
441 LoadConfig<E, A, D, GLM, OLM>
442{
443 pub fn graph_mode<NGLM: LoadMode>(self) -> LoadConfig<E, A, D, NGLM, OLM> {
445 LoadConfig {
446 basename: self.basename,
447 graph_load_flags: self.graph_load_flags,
448 offsets_load_flags: self.offsets_load_flags,
449 _marker: std::marker::PhantomData,
450 }
451 }
452}
453
454impl<E: Endianness, A: Access, D: Dispatch, OLM: LoadMode> LoadConfig<E, A, D, Mmap, OLM> {
455 pub fn graph_flags(self, flags: MemoryFlags) -> LoadConfig<E, A, D, Mmap, OLM> {
457 LoadConfig {
458 basename: self.basename,
459 graph_load_flags: flags,
460 offsets_load_flags: self.offsets_load_flags,
461 _marker: std::marker::PhantomData,
462 }
463 }
464}
465
466impl<E: Endianness, A: Access, D: Dispatch, OLM: LoadMode> LoadConfig<E, A, D, LoadMmap, OLM> {
467 pub fn graph_flags(self, flags: MemoryFlags) -> LoadConfig<E, A, D, LoadMmap, OLM> {
469 LoadConfig {
470 basename: self.basename,
471 graph_load_flags: flags,
472 offsets_load_flags: self.offsets_load_flags,
473 _marker: std::marker::PhantomData,
474 }
475 }
476}
477
478impl<E: Endianness, D: Dispatch, GLM: LoadMode, OLM: LoadMode> LoadConfig<E, Random, D, GLM, OLM> {
479 pub fn offsets_mode<NOLM: LoadMode>(self) -> LoadConfig<E, Random, D, GLM, NOLM> {
481 LoadConfig {
482 basename: self.basename,
483 graph_load_flags: self.graph_load_flags,
484 offsets_load_flags: self.offsets_load_flags,
485 _marker: std::marker::PhantomData,
486 }
487 }
488}
489
490impl<E: Endianness, D: Dispatch, GLM: LoadMode> LoadConfig<E, Random, D, GLM, Mmap> {
491 pub fn offsets_flags(self, flags: MemoryFlags) -> LoadConfig<E, Random, D, GLM, Mmap> {
493 LoadConfig {
494 basename: self.basename,
495 graph_load_flags: self.graph_load_flags,
496 offsets_load_flags: flags,
497 _marker: std::marker::PhantomData,
498 }
499 }
500}
501
502impl<E: Endianness, D: Dispatch, GLM: LoadMode> LoadConfig<E, Random, D, GLM, LoadMmap> {
503 pub fn offsets_flags(self, flags: MemoryFlags) -> LoadConfig<E, Random, D, GLM, LoadMmap> {
505 LoadConfig {
506 basename: self.basename,
507 graph_load_flags: self.graph_load_flags,
508 offsets_load_flags: flags,
509 _marker: std::marker::PhantomData,
510 }
511 }
512}
513
514impl<E: Endianness, GLM: LoadMode, OLM: LoadMode> LoadConfig<E, Random, Dynamic, GLM, OLM> {
515 pub fn load(
517 mut self,
518 ) -> anyhow::Result<BvGraph<DynCodesDecoderFactory<E, GLM::Factory<E>, OLM::Offsets>>>
519 where
520 <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
521 for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E> + BitSeek,
522 {
523 warn_if_ef_stale(&self.basename);
524 self.basename.set_extension(PROPERTIES_EXTENSION);
525 let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)
526 .with_context(|| {
527 format!("Could not load properties file {}", self.basename.display())
528 })?;
529 self.basename.set_extension(GRAPH_EXTENSION);
530 let factory = GLM::new_factory(&self.basename, self.graph_load_flags)
531 .with_context(|| format!("Could not load graph file {}", self.basename.display()))?;
532 self.basename.set_extension(EF_EXTENSION);
533 let offsets = OLM::load_offsets(&self.basename, self.offsets_load_flags)
534 .with_context(|| format!("Could not load offsets file {}", self.basename.display()))?;
535
536 Ok(BvGraph::new(
537 DynCodesDecoderFactory::new(factory, offsets, comp_flags)?,
538 num_nodes,
539 num_arcs,
540 comp_flags.compression_window,
541 comp_flags.min_interval_length,
542 ))
543 }
544}
545
546impl<E: Endianness, GLM: LoadMode, OLM: LoadMode> LoadConfig<E, Sequential, Dynamic, GLM, OLM> {
547 pub fn load(
549 mut self,
550 ) -> anyhow::Result<
551 BvGraphSeq<DynCodesDecoderFactory<E, GLM::Factory<E>, Owned<EmptyDict<usize, usize>>>>,
552 >
553 where
554 <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
555 for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E>,
556 {
557 self.basename.set_extension(PROPERTIES_EXTENSION);
558 let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)?;
559 self.basename.set_extension(GRAPH_EXTENSION);
560 let factory = GLM::new_factory(&self.basename, self.graph_load_flags)?;
561
562 Ok(BvGraphSeq::new(
563 DynCodesDecoderFactory::new(factory, EmptyDict::default().into(), comp_flags)?,
564 num_nodes,
565 Some(num_arcs),
566 comp_flags.compression_window,
567 comp_flags.min_interval_length,
568 ))
569 }
570}
571
572impl<
573 E: Endianness,
574 GLM: LoadMode,
575 OLM: LoadMode,
576 const OUTDEGREES: usize,
577 const REFERENCES: usize,
578 const BLOCKS: usize,
579 const INTERVALS: usize,
580 const RESIDUALS: usize,
581> LoadConfig<E, Random, Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>, GLM, OLM>
582{
583 pub fn load(
585 mut self,
586 ) -> anyhow::Result<
587 BvGraph<
588 ConstCodesDecoderFactory<
589 E,
590 GLM::Factory<E>,
591 OLM::Offsets,
592 OUTDEGREES,
593 REFERENCES,
594 BLOCKS,
595 INTERVALS,
596 RESIDUALS,
597 >,
598 >,
599 >
600 where
601 <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
602 for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E> + BitSeek,
603 {
604 warn_if_ef_stale(&self.basename);
605 self.basename.set_extension(PROPERTIES_EXTENSION);
606 let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)?;
607 self.basename.set_extension(GRAPH_EXTENSION);
608 let factory = GLM::new_factory(&self.basename, self.graph_load_flags)?;
609 self.basename.set_extension(EF_EXTENSION);
610 let offsets = OLM::load_offsets(&self.basename, self.offsets_load_flags)?;
611
612 Ok(BvGraph::new(
613 ConstCodesDecoderFactory::new(factory, offsets, comp_flags)?,
614 num_nodes,
615 num_arcs,
616 comp_flags.compression_window,
617 comp_flags.min_interval_length,
618 ))
619 }
620}
621
622impl<
623 E: Endianness,
624 GLM: LoadMode,
625 OLM: LoadMode,
626 const OUTDEGREES: usize,
627 const REFERENCES: usize,
628 const BLOCKS: usize,
629 const INTERVALS: usize,
630 const RESIDUALS: usize,
631>
632 LoadConfig<
633 E,
634 Sequential,
635 Static<OUTDEGREES, REFERENCES, BLOCKS, INTERVALS, RESIDUALS>,
636 GLM,
637 OLM,
638 >
639{
640 pub fn load(
642 mut self,
643 ) -> anyhow::Result<
644 BvGraphSeq<
645 ConstCodesDecoderFactory<
646 E,
647 GLM::Factory<E>,
648 Owned<EmptyDict<usize, usize>>,
649 OUTDEGREES,
650 REFERENCES,
651 BLOCKS,
652 INTERVALS,
653 RESIDUALS,
654 >,
655 >,
656 >
657 where
658 <GLM as LoadMode>::Factory<E>: CodesReaderFactoryHelper<E>,
659 for<'a> LoadModeCodesReader<'a, E, GLM>: CodesRead<E>,
660 {
661 self.basename.set_extension(PROPERTIES_EXTENSION);
662 let (num_nodes, num_arcs, comp_flags) = parse_properties::<E>(&self.basename)?;
663 self.basename.set_extension(GRAPH_EXTENSION);
664 let factory = GLM::new_factory(&self.basename, self.graph_load_flags)?;
665
666 Ok(BvGraphSeq::new(
667 ConstCodesDecoderFactory::new(factory, EmptyDict::default().into(), comp_flags)?,
668 num_nodes,
669 Some(num_arcs),
670 comp_flags.compression_window,
671 comp_flags.min_interval_length,
672 ))
673 }
674}
675
676fn warn_if_ef_stale(basename: &Path) {
682 if std::env::var_os("DO_NOT_CHECK_MOD_TIMES").is_some() {
683 return;
684 }
685 let graph_path = basename.with_extension(GRAPH_EXTENSION);
686 let ef_path = basename.with_extension(EF_EXTENSION);
687
688 let graph_modified = match std::fs::metadata(&graph_path).and_then(|m| m.modified()) {
689 Ok(t) => t,
690 Err(_) => return, };
692
693 let ef_modified = match std::fs::metadata(&ef_path).and_then(|m| m.modified()) {
694 Ok(t) => t,
695 Err(_) => return, };
697
698 if ef_modified < graph_modified {
699 log::warn!(
700 "The Elias-Fano file {} is older than the graph file {}; \
701 this may indicate that the graph has been modified and the .ef file is stale. \
702 Consider rebuilding it with \"webgraph build ef {}\", just touch it if this warning is spurious, \
703 or set the environment variable DO_NOT_CHECK_MOD_TIMES to disable this check.",
704 ef_path.display(),
705 graph_path.display(),
706 basename.display()
707 );
708 }
709}
710
711pub fn get_endianness<P: AsRef<Path>>(basename: P) -> Result<String> {
713 let path = basename.as_ref().with_extension(PROPERTIES_EXTENSION);
714 let f = std::fs::File::open(&path)
715 .with_context(|| format!("Cannot open property file {}", path.display()))?;
716 let map = java_properties::read(BufReader::new(f))
717 .with_context(|| format!("cannot parse {} as a java properties file", path.display()))?;
718
719 let endianness = map
720 .get("endianness")
721 .map(|x| x.to_string())
722 .unwrap_or_else(|| BigEndian::NAME.to_string());
723
724 Ok(endianness)
725}
726
727pub fn parse_properties<E: Endianness>(path: impl AsRef<Path>) -> Result<(usize, u64, CompFlags)> {
730 let name = path.as_ref().display();
731 let f =
732 std::fs::File::open(&path).with_context(|| format!("Cannot open property file {name}"))?;
733 let map = java_properties::read(BufReader::new(f))
734 .with_context(|| format!("cannot parse {name} as a java properties file"))?;
735
736 let num_nodes = map
737 .get("nodes")
738 .with_context(|| format!("Missing 'nodes' property in {name}"))?
739 .parse::<usize>()
740 .with_context(|| format!("Cannot parse 'nodes' as usize in {name}"))?;
741 let num_arcs = map
742 .get("arcs")
743 .with_context(|| format!("Missing 'arcs' property in {name}"))?
744 .parse::<u64>()
745 .with_context(|| format!("Cannot parse arcs as usize in {name}"))?;
746
747 let comp_flags = CompFlags::from_properties::<E>(&map)
748 .with_context(|| format!("Cannot parse compression flags from {name}"))?;
749 Ok((num_nodes, num_arcs, comp_flags))
750}