swh_graph/properties/
label_names.rs1use anyhow::{Context, Result};
7use mmap_rs::Mmap;
8use rayon::prelude::*;
9use thiserror::Error;
10
11use super::suffixes::*;
12use super::*;
13use crate::front_coded_list::FrontCodedList;
14use crate::labels::LabelNameId;
15use crate::utils::suffix_path;
16
17pub trait MaybeLabelNames {}
20
21pub struct MappedLabelNames {
22 label_names: FrontCodedList<Mmap, Mmap>,
23}
24impl<L: LabelNames> MaybeLabelNames for L {}
25
26#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
28pub struct NoLabelNames;
29impl MaybeLabelNames for NoLabelNames {}
30
31#[diagnostic::on_unimplemented(
33 label = "does not have label names loaded",
34 note = "Use `let graph = graph.load_properties(|props| props.load_label_names()).unwrap()` to load them",
35 note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
36)]
37pub trait LabelNames {
38 type LabelNames<'a>: GetIndex<Output = Vec<u8>>
39 where
40 Self: 'a;
41
42 fn label_names(&self) -> Self::LabelNames<'_>;
43}
44
45impl LabelNames for MappedLabelNames {
46 type LabelNames<'a>
47 = &'a FrontCodedList<Mmap, Mmap>
48 where
49 Self: 'a;
50
51 #[inline(always)]
52 fn label_names(&self) -> Self::LabelNames<'_> {
53 &self.label_names
54 }
55}
56
57#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
58pub struct VecLabelNames {
59 label_names: Vec<Vec<u8>>,
60}
61impl VecLabelNames {
62 pub fn new<S: AsRef<[u8]>>(label_names: Vec<S>) -> Result<Self> {
64 let base64 = base64_simd::STANDARD;
65 Ok(VecLabelNames {
66 label_names: label_names
67 .into_iter()
68 .map(|s| base64.encode_to_string(s).into_bytes())
69 .collect(),
70 })
71 }
72}
73
74impl LabelNames for VecLabelNames {
75 type LabelNames<'a>
76 = &'a [Vec<u8>]
77 where
78 Self: 'a;
79
80 #[inline(always)]
81 fn label_names(&self) -> Self::LabelNames<'_> {
82 self.label_names.as_slice()
83 }
84}
85
86impl<
87 MAPS: MaybeMaps,
88 TIMESTAMPS: MaybeTimestamps,
89 PERSONS: MaybePersons,
90 CONTENTS: MaybeContents,
91 STRINGS: MaybeStrings,
92 > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, NoLabelNames>
93{
94 pub fn load_label_names(
100 self,
101 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, MappedLabelNames>>
102 {
103 let label_names = MappedLabelNames {
104 label_names: FrontCodedList::load(suffix_path(&self.path, LABEL_NAME))
105 .context("Could not load label names")?,
106 };
107 self.with_label_names(label_names)
108 }
109
110 pub fn with_label_names<LABELNAMES: MaybeLabelNames>(
113 self,
114 label_names: LABELNAMES,
115 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>> {
116 Ok(SwhGraphProperties {
117 maps: self.maps,
118 timestamps: self.timestamps,
119 persons: self.persons,
120 contents: self.contents,
121 strings: self.strings,
122 label_names,
123 path: self.path,
124 num_nodes: self.num_nodes,
125 label_names_are_in_base64_order: Default::default(), })
127 }
128}
129
130impl<
135 MAPS: MaybeMaps,
136 TIMESTAMPS: MaybeTimestamps,
137 PERSONS: MaybePersons,
138 CONTENTS: MaybeContents,
139 STRINGS: MaybeStrings,
140 LABELNAMES: LabelNames,
141 > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
142{
143 pub fn num_label_names(&self) -> u64 {
145 self.label_names
146 .label_names()
147 .len()
148 .try_into()
149 .expect("label_names.len() overflowed u64")
150 }
151
152 pub fn iter_label_name_ids(&self) -> impl Iterator<Item = LabelNameId> {
154 (0..self.num_label_names()).map(LabelNameId)
155 }
156
157 pub fn par_iter_label_name_ids(&self) -> impl ParallelIterator<Item = LabelNameId> {
159 (0..self.num_label_names()).into_par_iter().map(LabelNameId)
160 }
161
162 #[inline]
171 pub fn label_name_base64(&self, label_name_id: LabelNameId) -> Vec<u8> {
172 self.try_label_name_base64(label_name_id)
173 .unwrap_or_else(|e| panic!("Cannot get label name: {e}"))
174 }
175
176 #[inline]
181 pub fn try_label_name_base64(
182 &self,
183 label_name_id: LabelNameId,
184 ) -> Result<Vec<u8>, OutOfBoundError> {
185 let index = label_name_id
186 .0
187 .try_into()
188 .expect("label_name_id overflowed usize");
189 self.label_names
190 .label_names()
191 .get(index)
192 .ok_or(OutOfBoundError {
193 index,
194 len: self.label_names.label_names().len(),
195 })
196 }
197
198 #[inline]
207 pub fn label_name(&self, label_name_id: LabelNameId) -> Vec<u8> {
208 self.try_label_name(label_name_id)
209 .unwrap_or_else(|e| panic!("Cannot get label name: {e}"))
210 }
211
212 #[inline]
217 pub fn try_label_name(&self, label_name_id: LabelNameId) -> Result<Vec<u8>, OutOfBoundError> {
218 let base64 = base64_simd::STANDARD;
219 self.try_label_name_base64(label_name_id).map(|name| {
220 base64.decode_to_vec(name).unwrap_or_else(|name| {
221 panic!(
222 "Could not decode label_name of id {}: {:?}",
223 label_name_id.0, name
224 )
225 })
226 })
227 }
228
229 #[inline]
237 pub fn label_name_id(
238 &self,
239 name: impl AsRef<[u8]>,
240 ) -> Result<LabelNameId, LabelIdFromNameError> {
241 use std::cmp::Ordering::*;
242
243 let base64 = base64_simd::STANDARD;
244 let name = name.as_ref();
245 let name_base64 = base64.encode_to_string(name.as_ref()).into_bytes();
246
247 macro_rules! bisect {
248 ($compare_pivot:expr) => {{
249 let compare_pivot = $compare_pivot;
250 let res: Result<LabelNameId, LabelIdFromNameError> = {
251 let mut min = 0;
253 let mut max = self
254 .label_names
255 .label_names()
256 .len()
257 .saturating_sub(1)
258 .try_into()
259 .expect("number of labels overflowed u64");
260
261 while min <= max {
262 let pivot = (min + max) / 2;
263 let pivot_id = LabelNameId(pivot);
264 let pivot_name = self.label_name_base64(pivot_id);
265 if min == max {
266 if pivot_name.as_slice() == name_base64 {
267 return Ok(pivot_id);
268 } else {
269 break;
270 }
271 } else {
272 match compare_pivot(pivot_id) {
273 Less => min = pivot.saturating_add(1),
274 Equal => return Ok(pivot_id),
275 Greater => max = pivot.saturating_sub(1),
276 }
277 }
278 }
279
280 Err(LabelIdFromNameError(name_base64.to_vec()))
281 };
282 res
283 }};
284 }
285
286 let bisect_base64 = || {
287 bisect!(|pivot_id| self
288 .label_name_base64(pivot_id)
289 .as_slice()
290 .cmp(&name_base64))
291 };
292 let bisect_nonbase64 =
293 || bisect!(|pivot_id| self.label_name(pivot_id).as_slice().cmp(name));
294
295 match self.label_names_are_in_base64_order.get() {
296 Some(true) => {
297 bisect_base64()
300 }
301 Some(false) => {
302 bisect_nonbase64()
305 }
306 None => {
307 match bisect_nonbase64() {
310 Ok(label_name_id) => {
311 if let Err(LabelIdFromNameError(_)) = bisect_base64() {
316 log::debug!("Labels are not in base64 order");
319 let _ = self.label_names_are_in_base64_order.set(false);
320 }
321 Ok(label_name_id)
322 }
323 Err(LabelIdFromNameError(_)) => {
324 match bisect_base64() {
326 Ok(label_name_id) => {
327 log::debug!("Labels are in base64 order");
330 let _ = self.label_names_are_in_base64_order.set(true);
331 Ok(label_name_id)
332 }
333 Err(LabelIdFromNameError(e)) => {
334 Err(LabelIdFromNameError(e))
336 }
337 }
338 }
339 }
340 }
341 }
342 }
343}
344
345#[derive(Clone, Debug, Eq, PartialEq, Error)]
346#[error("Unknown label name: {}", String::from_utf8_lossy(.0))]
347pub struct LabelIdFromNameError(pub Vec<u8>);