swh_graph/front_coded_list/
read.rs1use std::fs::File;
7use std::io::BufReader;
8use std::path::Path;
9
10use anyhow::{bail, Context, Result};
11use mmap_rs::{Mmap, MmapFlags};
12
13use crate::utils::{suffix_path, GetIndex};
14
15#[derive(Debug, Clone)]
16pub struct FrontCodedList<D: AsRef<[u8]>, P: AsRef<[u8]>> {
28 k: usize,
31 len: usize,
33 data: D,
35 pointers: P,
37}
38
39impl FrontCodedList<Mmap, Mmap> {
40 pub fn load<P: AsRef<Path>>(base_path: P) -> Result<Self> {
41 let properties_path = suffix_path(&base_path, ".properties");
42 let bytearray_path = suffix_path(&base_path, ".bytearray");
43 let pointers_path = suffix_path(&base_path, ".pointers");
44
45 let properties_file = File::open(&properties_path)
47 .with_context(|| format!("Could not open {}", properties_path.display()))?;
48 let map = java_properties::read(BufReader::new(properties_file)).with_context(|| {
49 format!(
50 "Could not parse properties from {}",
51 properties_path.display()
52 )
53 })?;
54 let len =
55 map.get("n").unwrap().parse::<usize>().with_context(|| {
56 format!("Could not parse 'n' from {}", properties_path.display())
57 })?;
58 let k = map
59 .get("ratio")
60 .unwrap()
61 .parse::<usize>()
62 .with_context(|| {
63 format!("Could not parse 'ratio' from {}", properties_path.display())
64 })?;
65
66 let bytearray_len = bytearray_path
68 .metadata()
69 .with_context(|| format!("Could not read {} stats", bytearray_path.display()))?
70 .len();
71 let bytearray_file = std::fs::File::open(&bytearray_path)
72 .with_context(|| format!("Could not open {}", bytearray_path.display()))?;
73 let data = unsafe {
74 mmap_rs::MmapOptions::new(bytearray_len as _)
75 .context("Could not initialize mmap")?
76 .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
77 .with_file(&bytearray_file, 0)
78 .map()
79 .with_context(|| format!("Could not mmap {}", bytearray_path.display()))?
80 };
81
82 let pointers_len = pointers_path
84 .metadata()
85 .with_context(|| format!("Could not read {} stats", pointers_path.display()))?
86 .len();
87 let expected_pointers_len = ((len.div_ceil(k)) * 8) as u64;
88 if pointers_len != expected_pointers_len {
89 bail!(
90 "FCL at {} has length {} and ratio {} so {} should have length {}, but it has length {}",
91 base_path.as_ref().display(),
92 len,
93 k,
94 pointers_path.display(),
95 expected_pointers_len,
96 pointers_len
97 );
98 }
99 let pointers_file = std::fs::File::open(&pointers_path)
100 .with_context(|| format!("Could not open {}", pointers_path.display()))?;
101 let pointers = unsafe {
102 mmap_rs::MmapOptions::new(pointers_len as _)
103 .context("Could not initialize mmap")?
104 .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
105 .with_file(&pointers_file, 0)
106 .map()
107 .with_context(|| format!("Could not mmap {}", pointers_path.display()))?
108 };
109
110 Ok(FrontCodedList {
111 k,
112 len,
113 data,
114 pointers,
115 })
116 }
117}
118
119impl<D: AsRef<[u8]>, P: AsRef<[u8]>> FrontCodedList<D, P> {
121 #[inline(always)]
124 pub fn get_inplace(&self, index: usize, result: &mut Vec<u8>) {
125 result.clear();
126 let block = index / self.k;
127 let offset = index % self.k;
128
129 let start = u64::from_be_bytes(
130 self.pointers.as_ref()[block * 8..(block + 1) * 8]
131 .try_into()
132 .unwrap(),
133 )
134 .try_into()
135 .expect("FCL pointer overflowed usize");
136 let data = &self.data.as_ref()[start..];
137
138 let (len, mut data) = decode_int(data);
140 result.extend(&data[..len as usize]);
141 data = &data[len as usize..];
142
143 for _ in 0..offset {
144 let (new_suffix_len, tmp) = decode_int(data);
145 let (reused_prefix_len, tmp) = decode_int(tmp);
146
147 result.resize(reused_prefix_len as usize, 0);
148
149 result.extend(&tmp[..new_suffix_len as usize]);
150
151 data = &tmp[new_suffix_len as usize..];
152 }
153 }
154}
155
156impl<D: AsRef<[u8]>, P: AsRef<[u8]>> GetIndex for &FrontCodedList<D, P> {
157 type Output = Vec<u8>;
158
159 fn len(&self) -> usize {
160 self.len
161 }
162
163 fn get(&self, index: usize) -> Option<Self::Output> {
165 if index >= self.len {
166 None
167 } else {
168 let mut result = Vec::with_capacity(128);
169 self.get_inplace(index, &mut result);
170 Some(result)
171 }
172 }
173
174 unsafe fn get_unchecked(&self, index: usize) -> Self::Output {
180 let mut result = Vec::with_capacity(128);
181 self.get_inplace(index, &mut result);
182 result
183 }
184}
185
186#[inline(always)]
187pub(crate) fn decode_int(data: &[u8]) -> (u32, &[u8]) {
192 let high_bit_mask = 0b1000_0000u8;
193 let invert = |n: u8| (!n) as u32;
194
195 if data[0] & high_bit_mask == 0 {
196 (data[0] as u32, &data[1..])
197 } else if data[1] & high_bit_mask == 0 {
198 ((invert(data[0]) << 7) | (data[1] as u32), &data[2..])
199 } else if data[2] & high_bit_mask == 0 {
200 (
201 ((invert(data[0])) << 14) | (invert(data[1]) << 7) | (data[2] as u32),
202 &data[3..],
203 )
204 } else if data[3] & high_bit_mask == 0 {
205 (
206 (invert(data[0]) << 21)
207 | (invert(data[1]) << 14)
208 | (invert(data[2]) << 7)
209 | (data[3] as u32),
210 &data[4..],
211 )
212 } else {
213 (
214 ((invert(data[0])) << 28)
215 | (invert(data[1]) << 21)
216 | (invert(data[2]) << 14)
217 | (invert(data[3]) << 7)
218 | (data[4] as u32),
219 &data[5..],
220 )
221 }
222}