swh_graph/front_coded_list/
read.rs1use std::fs::File;
7use std::io::BufReader;
8use std::path::Path;
9
10use anyhow::{bail, Context, Result};
11use mmap_rs::{Mmap, MmapFlags};
12use value_traits::slices::SliceByValue;
13
14use crate::utils::suffix_path;
15
16#[derive(Debug, Clone)]
17pub struct FrontCodedList<D: AsRef<[u8]>, P: AsRef<[u8]>> {
29 k: usize,
32 len: usize,
34 data: D,
36 pointers: P,
38}
39
40impl FrontCodedList<Mmap, Mmap> {
41 pub fn load<P: AsRef<Path>>(base_path: P) -> Result<Self> {
42 let properties_path = suffix_path(&base_path, ".properties");
43 let bytearray_path = suffix_path(&base_path, ".bytearray");
44 let pointers_path = suffix_path(&base_path, ".pointers");
45
46 let properties_file = File::open(&properties_path)
48 .with_context(|| format!("Could not open {}", properties_path.display()))?;
49 let map = java_properties::read(BufReader::new(properties_file)).with_context(|| {
50 format!(
51 "Could not parse properties from {}",
52 properties_path.display()
53 )
54 })?;
55 let len =
56 map.get("n").unwrap().parse::<usize>().with_context(|| {
57 format!("Could not parse 'n' from {}", properties_path.display())
58 })?;
59 let k = map
60 .get("ratio")
61 .unwrap()
62 .parse::<usize>()
63 .with_context(|| {
64 format!("Could not parse 'ratio' from {}", properties_path.display())
65 })?;
66
67 let bytearray_len = bytearray_path
69 .metadata()
70 .with_context(|| format!("Could not read {} stats", bytearray_path.display()))?
71 .len();
72 let bytearray_file = std::fs::File::open(&bytearray_path)
73 .with_context(|| format!("Could not open {}", bytearray_path.display()))?;
74 let data = unsafe {
75 mmap_rs::MmapOptions::new(bytearray_len as _)
76 .context("Could not initialize mmap")?
77 .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
78 .with_file(&bytearray_file, 0)
79 .map()
80 .with_context(|| format!("Could not mmap {}", bytearray_path.display()))?
81 };
82
83 let pointers_len = pointers_path
85 .metadata()
86 .with_context(|| format!("Could not read {} stats", pointers_path.display()))?
87 .len();
88 let expected_pointers_len = ((len.div_ceil(k)) * 8) as u64;
89 if pointers_len != expected_pointers_len {
90 bail!(
91 "FCL at {} has length {} and ratio {} so {} should have length {}, but it has length {}",
92 base_path.as_ref().display(),
93 len,
94 k,
95 pointers_path.display(),
96 expected_pointers_len,
97 pointers_len
98 );
99 }
100 let pointers_file = std::fs::File::open(&pointers_path)
101 .with_context(|| format!("Could not open {}", pointers_path.display()))?;
102 let pointers = unsafe {
103 mmap_rs::MmapOptions::new(pointers_len as _)
104 .context("Could not initialize mmap")?
105 .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES | MmapFlags::RANDOM_ACCESS)
106 .with_file(&pointers_file, 0)
107 .map()
108 .with_context(|| format!("Could not mmap {}", pointers_path.display()))?
109 };
110
111 Ok(FrontCodedList {
112 k,
113 len,
114 data,
115 pointers,
116 })
117 }
118}
119
120impl<D: AsRef<[u8]>, P: AsRef<[u8]>> FrontCodedList<D, P> {
122 #[inline(always)]
125 pub fn get_inplace(&self, index: usize, result: &mut Vec<u8>) {
126 result.clear();
127 let block = index / self.k;
128 let offset = index % self.k;
129
130 let start = u64::from_be_bytes(
131 self.pointers.as_ref()[block * 8..(block + 1) * 8]
132 .try_into()
133 .unwrap(),
134 )
135 .try_into()
136 .expect("FCL pointer overflowed usize");
137 let data = &self.data.as_ref()[start..];
138
139 let (len, mut data) = decode_int(data);
141 result.extend(&data[..len as usize]);
142 data = &data[len as usize..];
143
144 for _ in 0..offset {
145 let (new_suffix_len, tmp) = decode_int(data);
146 let (reused_prefix_len, tmp) = decode_int(tmp);
147
148 result.resize(reused_prefix_len as usize, 0);
149
150 result.extend(&tmp[..new_suffix_len as usize]);
151
152 data = &tmp[new_suffix_len as usize..];
153 }
154 }
155}
156
157impl<D: AsRef<[u8]>, P: AsRef<[u8]>> SliceByValue for &FrontCodedList<D, P> {
158 type Value = Vec<u8>;
159
160 fn len(&self) -> usize {
161 self.len
162 }
163
164 fn get_value(&self, index: usize) -> Option<Self::Value> {
166 if index >= self.len {
167 None
168 } else {
169 let mut result = Vec::with_capacity(128);
170 self.get_inplace(index, &mut result);
171 Some(result)
172 }
173 }
174
175 unsafe fn get_value_unchecked(&self, index: usize) -> Self::Value {
181 let mut result = Vec::with_capacity(128);
182 self.get_inplace(index, &mut result);
183 result
184 }
185}
186
187#[inline(always)]
188pub(crate) fn decode_int(data: &[u8]) -> (u32, &[u8]) {
193 let high_bit_mask = 0b1000_0000u8;
194 let invert = |n: u8| (!n) as u32;
195
196 if data[0] & high_bit_mask == 0 {
197 (data[0] as u32, &data[1..])
198 } else if data[1] & high_bit_mask == 0 {
199 ((invert(data[0]) << 7) | (data[1] as u32), &data[2..])
200 } else if data[2] & high_bit_mask == 0 {
201 (
202 ((invert(data[0])) << 14) | (invert(data[1]) << 7) | (data[2] as u32),
203 &data[3..],
204 )
205 } else if data[3] & high_bit_mask == 0 {
206 (
207 (invert(data[0]) << 21)
208 | (invert(data[1]) << 14)
209 | (invert(data[2]) << 7)
210 | (data[3] as u32),
211 &data[4..],
212 )
213 } else {
214 (
215 ((invert(data[0])) << 28)
216 | (invert(data[1]) << 21)
217 | (invert(data[2]) << 14)
218 | (invert(data[3]) << 7)
219 | (data[4] as u32),
220 &data[5..],
221 )
222 }
223}