webgraph/graphs/bvgraph/codecs/
factories.rs1use crate::{
27 prelude::{FileBufReader, MemBufReader},
28 utils::MmapHelper,
29};
30use anyhow::{Context, ensure};
31use bitflags::bitflags;
32use common_traits::UnsignedInt;
33use dsi_bitstream::{
34 impls::{BufBitReader, MemWordReader, WordAdapter, buf_bit_reader},
35 prelude::{CodesRead, CodesReaderFactory},
36 traits::{BitRead, Endianness},
37};
38use epserde::Epserde;
39use std::{
40 fs::File,
41 io::{BufReader, Read},
42 marker::PhantomData,
43 mem::MaybeUninit,
44 path::Path,
45};
46use sux::traits::{IndexedSeq, Types};
47
48#[derive(Debug, Clone)]
49pub struct FileFactory<E: Endianness> {
50 path: Box<Path>,
51 _marker: core::marker::PhantomData<E>,
52}
53
54impl<E: Endianness> FileFactory<E> {
55 pub fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
56 let path: Box<Path> = path.as_ref().into();
57 let metadata = std::fs::metadata(&path)
58 .with_context(|| format!("Could not stat {}", path.display()))?;
59 ensure!(metadata.is_file(), "File {} is not a file", path.display());
60
61 Ok(Self {
62 path,
63 _marker: core::marker::PhantomData,
64 })
65 }
66}
67
68impl<E: Endianness> CodesReaderFactory<E> for FileFactory<E>
69where
70 FileBufReader<E>: BitRead<E> + CodesRead<E>,
71{
72 type CodesReader<'a>
73 = BufBitReader<E, WordAdapter<u32, BufReader<File>>>
74 where
75 Self: 'a;
76
77 fn new_reader(&self) -> Self::CodesReader<'_> {
78 buf_bit_reader::from_path::<E, u32>(&self.path).unwrap()
79 }
80}
81
82bitflags! {
83 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
85 pub struct MemoryFlags: u32 {
86 const TRANSPARENT_HUGE_PAGES = 1 << 0;
94 const SEQUENTIAL = 1 << 1;
99 const RANDOM_ACCESS = 1 << 2;
104 }
105}
106
107impl core::default::Default for MemoryFlags {
109 fn default() -> Self {
110 MemoryFlags::empty()
111 }
112}
113
114impl From<MemoryFlags> for mmap_rs::MmapFlags {
115 fn from(flags: MemoryFlags) -> Self {
116 let mut mmap_flags = mmap_rs::MmapFlags::empty();
117 if flags.contains(MemoryFlags::SEQUENTIAL) {
118 mmap_flags |= mmap_rs::MmapFlags::SEQUENTIAL;
119 }
120 if flags.contains(MemoryFlags::RANDOM_ACCESS) {
121 mmap_flags |= mmap_rs::MmapFlags::RANDOM_ACCESS;
122 }
123 if flags.contains(MemoryFlags::TRANSPARENT_HUGE_PAGES) {
124 mmap_flags |= mmap_rs::MmapFlags::TRANSPARENT_HUGE_PAGES;
125 }
126
127 mmap_flags
128 }
129}
130
131impl From<MemoryFlags> for epserde::deser::Flags {
132 fn from(flags: MemoryFlags) -> Self {
133 let mut deser_flags = epserde::deser::Flags::empty();
134 if flags.contains(MemoryFlags::SEQUENTIAL) {
135 deser_flags |= epserde::deser::Flags::SEQUENTIAL;
136 }
137 if flags.contains(MemoryFlags::RANDOM_ACCESS) {
138 deser_flags |= epserde::deser::Flags::RANDOM_ACCESS;
139 }
140 if flags.contains(MemoryFlags::TRANSPARENT_HUGE_PAGES) {
141 deser_flags |= epserde::deser::Flags::TRANSPARENT_HUGE_PAGES;
142 }
143
144 deser_flags
145 }
146}
147
148#[derive(Debug, Clone)]
149pub struct MemoryFactory<E: Endianness, M: AsRef<[u32]>> {
150 data: M,
151 _marker: core::marker::PhantomData<E>,
152}
153
154impl<E: Endianness, T: AsRef<[u32]>> MemoryFactory<E, T> {
155 pub fn from_data(data: T) -> Self {
156 Self {
157 data,
158 _marker: core::marker::PhantomData,
159 }
160 }
161}
162
163impl<E: Endianness> MemoryFactory<E, Box<[u32]>> {
164 pub fn new_mem(path: impl AsRef<Path>) -> anyhow::Result<Self> {
165 let path = path.as_ref();
166 let file_len = path
167 .metadata()
168 .with_context(|| format!("Could not stat {}", path.display()))?
169 .len() as usize;
170 let mut file = std::fs::File::open(path)
171 .with_context(|| format!("Could not open {}", path.display()))?;
172 let vec_len = file_len.div_ceil(usize::try_from(u32::BITS / 8).unwrap());
173
174 let mut data = Vec::<u32>::with_capacity(vec_len);
175 *data.spare_capacity_mut().last_mut().unwrap() = MaybeUninit::zeroed();
176
177 let bytes =
180 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, file_len) };
181
182 file.read_exact(bytes)
183 .with_context(|| format!("Could not read {}", path.display()))?;
184
185 unsafe { data.set_len(vec_len) };
188
189 Ok(Self {
190 data: data.into_boxed_slice(),
191 _marker: core::marker::PhantomData,
192 })
193 }
194}
195
196impl<E: Endianness> MemoryFactory<E, MmapHelper<u32>> {
197 pub fn new_mmap(path: impl AsRef<Path>, flags: MemoryFlags) -> anyhow::Result<Self> {
198 let path = path.as_ref();
199 let file_len = path
200 .metadata()
201 .with_context(|| format!("Could not stat {}", path.display()))?
202 .len() as usize;
203 let mut file = std::fs::File::open(path)
204 .with_context(|| format!("Could not open {}", path.display()))?;
205 let capacity = file_len.align_to(16);
206
207 let mut mmap = mmap_rs::MmapOptions::new(capacity)?
208 .with_flags(flags.into())
209 .map_mut()
210 .context("Could not create anonymous mmap")?;
211 file.read_exact(&mut mmap[..file_len])
212 .with_context(|| format!("Could not read {}", path.display()))?;
213 mmap[file_len..].fill(0);
216
217 Ok(Self {
218 data: MmapHelper::try_from(
220 mmap.make_read_only()
221 .map_err(|(_, err)| err)
222 .context("Could not make memory read-only")?,
223 )
224 .context("Could not create mmap backend")?,
225 _marker: core::marker::PhantomData,
226 })
227 }
228}
229
230impl<E: Endianness, M: AsRef<[u32]>> CodesReaderFactory<E> for MemoryFactory<E, M>
231where
232 for<'a> MemBufReader<'a, E>: CodesRead<E>,
233{
234 type CodesReader<'a>
235 = MemBufReader<'a, E>
236 where
237 Self: 'a;
238
239 fn new_reader(&self) -> Self::CodesReader<'_> {
240 BufBitReader::<E, _>::new(MemWordReader::new(self.data.as_ref()))
241 }
242}
243
244#[derive(Epserde, Debug, Clone)]
245pub struct EmptyDict<I, O> {
246 _marker: core::marker::PhantomData<(I, O)>,
247}
248
249impl<I, O> Types for EmptyDict<I, O> {
250 type Input = usize;
251 type Output<'a> = usize;
252}
253
254impl<I, O> IndexedSeq for EmptyDict<I, O> {
255 fn get(&self, _key: Self::Input) -> Self::Output<'_> {
256 panic!();
257 }
258
259 unsafe fn get_unchecked(&self, _index: usize) -> Self::Output<'_> {
260 panic!();
261 }
262
263 fn len(&self) -> usize {
264 0
265 }
266}
267
268impl<I, O> Default for EmptyDict<I, O> {
269 fn default() -> Self {
270 Self {
271 _marker: PhantomData,
272 }
273 }
274}
275
276impl<E: Endianness> CodesReaderFactory<E> for MmapHelper<u32>
277where
278 for<'a> MemBufReader<'a, E>: CodesRead<E>,
279{
280 type CodesReader<'a>
281 = MemBufReader<'a, E>
282 where
283 Self: 'a;
284
285 fn new_reader(&self) -> Self::CodesReader<'_> {
286 BufBitReader::<E, _>::new(MemWordReader::new(self.as_ref()))
287 }
288}