swh_graph/utils/
mmap.rs

1// Copyright (C) 2023  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::fs::File;
7use std::marker::PhantomData;
8use std::path::Path;
9
10use anyhow::{bail, Context, Result};
11use byteorder::ByteOrder;
12use mmap_rs::{Mmap, MmapFlags};
13
14/// Newtype for [`Mmap`] used to store arrays of any integers
15///
16/// instead of slices of u8
17pub struct NumberMmap<E: ByteOrder, N: common_traits::AsBytes, B> {
18    data: B,
19    len: usize,
20    offset: usize,
21    _number: PhantomData<N>,
22    _endianness: PhantomData<E>,
23}
24
25impl<E: ByteOrder, N: common_traits::AsBytes> NumberMmap<E, N, Mmap> {
26    pub fn new<P: AsRef<Path>>(path: P, len: usize) -> Result<NumberMmap<E, N, Mmap>> {
27        let path = path.as_ref();
28        let file_len = path
29            .metadata()
30            .with_context(|| format!("Could not stat {}", path.display()))?
31            .len();
32        if file_len < (len * N::BYTES) as u64 {
33            // We have to allow length > num_nodes because graphs compressed
34            // with the Java implementation used zero padding at the end
35            bail!(
36                "{} is too short: expected at least {} bytes ({} items), got {}",
37                path.display(),
38                len * N::BYTES,
39                len,
40                file_len,
41            );
42        }
43        let file =
44            File::open(path).with_context(|| format!("Could not open {}", path.display()))?;
45        Self::with_file_and_offset(path, len, file, 0)
46    }
47
48    pub fn with_file_and_offset<P: AsRef<Path>>(
49        path: P,
50        len: usize,
51        file: File,
52        offset: usize,
53    ) -> Result<NumberMmap<E, N, Mmap>> {
54        let path = path.as_ref();
55        let file_len = len * N::BYTES;
56        let data = unsafe {
57            mmap_rs::MmapOptions::new(file_len as _)
58                .with_context(|| format!("Could not initialize mmap of size {file_len}"))?
59                .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
60                .with_file(&file, 0)
61                .map()
62                .with_context(|| format!("Could not mmap {}", path.display()))?
63        };
64
65        if data.len() % N::BYTES != 0 {
66            bail!(
67                "Cannot interpret mmap of size {} as array of {}",
68                data.len(),
69                std::any::type_name::<N>()
70            );
71        }
72        Ok(NumberMmap {
73            data,
74            len,
75            offset,
76            _number: PhantomData,
77            _endianness: PhantomData,
78        })
79    }
80
81    #[allow(clippy::len_without_is_empty)]
82    pub fn len(&self) -> usize {
83        self.len
84    }
85}
86
87impl<E: ByteOrder, N: common_traits::AsBytes> NumberMmap<E, N, Mmap> {
88    fn get_slice(&self, index: usize) -> Option<&[u8]> {
89        let start = (index * N::BYTES) + self.offset;
90        self.data.get(start..(start + N::BYTES))
91    }
92
93    unsafe fn get_slice_unchecked(&self, index: usize) -> &[u8] {
94        let start = (index * N::BYTES) + self.offset;
95        self.data.get_unchecked(start..(start + N::BYTES))
96    }
97}
98
99macro_rules! impl_number_mmap {
100    ($ty:ty, $fn:ident) => {
101        impl<E: ByteOrder> crate::utils::GetIndex for &NumberMmap<E, $ty, Mmap> {
102            type Output = $ty;
103
104            fn len(&self) -> usize {
105                NumberMmap::len(self)
106            }
107
108            /// Returns an item
109            fn get(&self, index: usize) -> Option<$ty> {
110                self.get_slice(index).map(E::$fn)
111            }
112
113            /// Returns an item
114            ///
115            /// # Safety
116            ///
117            /// Undefined behavior if `index >= len()`
118            unsafe fn get_unchecked(&self, index: usize) -> $ty {
119                E::$fn(self.get_slice_unchecked(index))
120            }
121        }
122    };
123}
124
125impl_number_mmap!(i16, read_i16);
126impl_number_mmap!(u32, read_u32);
127impl_number_mmap!(i64, read_i64);
128impl_number_mmap!(u64, read_u64);