swh_graph/
labeling.rs

1// Copyright (C) 2024  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use anyhow::{Context, Result};
7use bitstream::Supply;
8use dsi_bitstream::codes::GammaRead;
9use dsi_bitstream::impls::{BufBitReader, MemWordReader};
10use dsi_bitstream::traits::{BitRead, BitSeek, Endianness, BE};
11use epserde::deser::{DeserType, Deserialize, Flags, MemCase};
12use mmap_rs::MmapFlags;
13use std::path::Path;
14use webgraph::prelude::bitstream::BitStreamLabeling;
15use webgraph::prelude::*;
16
17/// A [`BitDeserializer`] for the labels stored in the bitstream.
18///
19/// Labels are deserialized as a sequence of `u64` values, each of which is
20/// `width` bits wide. The length of the sequence is read using a [γ
21/// code](GammaRead), and then each value is obtained by reading `width` bits.
22pub struct SwhDeserializer {
23    width: usize,
24}
25
26impl SwhDeserializer {
27    /// Creates a new [`SwhDeserializer`] with the given width.
28    pub(crate) fn new(width: usize) -> Self {
29        Self { width }
30    }
31}
32
33impl<BR: BitRead<BE> + BitSeek + GammaRead<BE>> BitDeserializer<BE, BR> for SwhDeserializer {
34    type DeserType = Vec<u64>;
35
36    fn deserialize(
37        &self,
38        bitstream: &mut BR,
39    ) -> std::result::Result<Self::DeserType, <BR as BitRead<BE>>::Error> {
40        let num_labels = bitstream.read_gamma().unwrap() as usize;
41        let mut labels = Vec::with_capacity(num_labels);
42        for _ in 0..num_labels {
43            labels.push(bitstream.read_bits(self.width)?);
44        }
45        Ok(labels)
46    }
47}
48
49pub struct MmapReaderSupplier<E: Endianness> {
50    backend: MmapHelper<u32>,
51    _marker: std::marker::PhantomData<E>,
52}
53
54impl Supply for MmapReaderSupplier<BE> {
55    type Item<'a>
56        = BufBitReader<BE, MemWordReader<u32, &'a [u32]>>
57    where
58        Self: 'a;
59
60    fn request(&self) -> Self::Item<'_> {
61        BufBitReader::<BE, _>::new(MemWordReader::new(self.backend.as_ref()))
62    }
63}
64
65pub type SwhLabelingInner =
66    BitStreamLabeling<BE, MmapReaderSupplier<BE>, SwhDeserializer, MemCase<DeserType<'static, EF>>>;
67
68pub struct SwhLabeling(pub SwhLabelingInner);
69
70impl SequentialLabeling for SwhLabeling {
71    type Label = <SwhLabelingInner as SequentialLabeling>::Label;
72    type Lender<'node>
73        = <SwhLabelingInner as SequentialLabeling>::Lender<'node>
74    where
75        Self: 'node;
76
77    // Required methods
78    fn num_nodes(&self) -> usize {
79        self.0.num_nodes()
80    }
81    fn iter_from(&self, from: usize) -> Self::Lender<'_> {
82        self.0.iter_from(from)
83    }
84}
85
86impl RandomAccessLabeling for SwhLabeling {
87    type Labels<'succ>
88        = <SwhLabelingInner as RandomAccessLabeling>::Labels<'succ>
89    where
90        Self: 'succ;
91
92    fn num_arcs(&self) -> u64 {
93        <SwhLabelingInner as RandomAccessLabeling>::num_arcs(&self.0)
94    }
95
96    fn labels(&self, node_id: usize) -> <Self as RandomAccessLabeling>::Labels<'_> {
97        <SwhLabelingInner as RandomAccessLabeling>::labels(&self.0, node_id)
98    }
99
100    fn outdegree(&self, node_id: usize) -> usize {
101        <SwhLabelingInner as RandomAccessLabeling>::outdegree(&self.0, node_id)
102    }
103}
104
105pub(crate) fn mmap(path: impl AsRef<Path>, bit_deser: SwhDeserializer) -> Result<SwhLabeling> {
106    let path = path.as_ref();
107    let labels_path = path.with_extension("labels");
108    let ef_path = path.with_extension("ef");
109    let ef = EF::mmap(&ef_path, Flags::empty())
110        .with_context(|| format!("Could not parse {}", ef_path.display()))?;
111    Ok(SwhLabeling(BitStreamLabeling::new(
112        MmapReaderSupplier {
113            backend: MmapHelper::<u32>::mmap(&labels_path, MmapFlags::empty())
114                .with_context(|| format!("Could not mmap {}", labels_path.display()))?,
115            _marker: std::marker::PhantomData,
116        },
117        bit_deser,
118        ef,
119    )))
120}