1use mmap_rs::Mmap;
7
8use super::suffixes::*;
9use super::*;
10use crate::graph::NodeId;
11
12pub trait MaybeStrings {}
15impl<S: OptStrings> MaybeStrings for S {}
16
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
19pub struct NoStrings;
20impl MaybeStrings for NoStrings {}
21
22#[diagnostic::on_unimplemented(
23 label = "does not have String properties loaded",
24 note = "Use `let graph = graph.load_properties(|props| props.load_string()).unwrap()` to load them",
25 note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
26)]
27pub trait OptStrings: MaybeStrings + PropertiesBackend {
29 fn message(&self) -> PropertiesResult<'_, &[u8], Self>;
31 fn message_offset(&self, node: NodeId) -> PropertiesResult<'_, Option<u64>, Self>;
34 fn tag_name(&self) -> PropertiesResult<'_, &[u8], Self>;
36 fn tag_name_offset(&self, node: NodeId) -> PropertiesResult<'_, Option<u64>, Self>;
39}
40
41#[diagnostic::on_unimplemented(
42 label = "does not have String properties loaded",
43 note = "Use `let graph = graph.load_properties(|props| props.load_string()).unwrap()` to load them",
44 note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
45)]
46pub trait Strings: OptStrings<DataFilesAvailability = GuaranteedDataFiles> {}
48impl<S: OptStrings<DataFilesAvailability = GuaranteedDataFiles>> Strings for S {}
49
50pub struct OptMappedStrings {
53 message: Result<Mmap, UnavailableProperty>,
54 message_offset: Result<NumberMmap<BigEndian, u64, Mmap>, UnavailableProperty>,
55 tag_name: Result<Mmap, UnavailableProperty>,
56 tag_name_offset: Result<NumberMmap<BigEndian, u64, Mmap>, UnavailableProperty>,
57}
58impl PropertiesBackend for OptMappedStrings {
59 type DataFilesAvailability = OptionalDataFiles;
60}
61impl OptStrings for OptMappedStrings {
62 #[inline(always)]
63 fn message(&self) -> PropertiesResult<'_, &[u8], Self> {
64 self.message.as_deref()
65 }
66 #[inline(always)]
67 fn message_offset(&self, node: NodeId) -> PropertiesResult<'_, Option<u64>, Self> {
68 self.message_offset
69 .as_ref()
70 .map(|message_offsets| message_offsets.get(node))
71 }
72 #[inline(always)]
73 fn tag_name(&self) -> PropertiesResult<'_, &[u8], Self> {
74 self.tag_name.as_deref()
75 }
76 #[inline(always)]
77 fn tag_name_offset(&self, node: NodeId) -> PropertiesResult<'_, Option<u64>, Self> {
78 self.tag_name_offset
79 .as_ref()
80 .map(|tag_name_offsets| tag_name_offsets.get(node))
81 }
82}
83
84pub struct MappedStrings {
86 message: Mmap,
87 message_offset: NumberMmap<BigEndian, u64, Mmap>,
88 tag_name: Mmap,
89 tag_name_offset: NumberMmap<BigEndian, u64, Mmap>,
90}
91impl PropertiesBackend for MappedStrings {
92 type DataFilesAvailability = GuaranteedDataFiles;
93}
94impl OptStrings for MappedStrings {
95 #[inline(always)]
96 fn message(&self) -> &[u8] {
97 &self.message
98 }
99 #[inline(always)]
100 fn message_offset(&self, node: NodeId) -> Option<u64> {
101 (&self.message_offset).get(node)
102 }
103 #[inline(always)]
104 fn tag_name(&self) -> &[u8] {
105 &self.tag_name
106 }
107 #[inline(always)]
108 fn tag_name_offset(&self, node: NodeId) -> Option<u64> {
109 (&self.tag_name_offset).get(node)
110 }
111}
112
113#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
114pub struct VecStrings {
115 message: Vec<u8>,
116 message_offset: Vec<u64>,
117 tag_name: Vec<u8>,
118 tag_name_offset: Vec<u64>,
119}
120
121impl VecStrings {
122 pub fn new<Msg: AsRef<[u8]>, TagName: AsRef<[u8]>>(
124 data: Vec<(Option<Msg>, Option<TagName>)>,
125 ) -> Result<Self> {
126 let base64 = base64_simd::STANDARD;
127
128 let mut message = Vec::new();
129 let mut message_offset = Vec::new();
130 let mut tag_name = Vec::new();
131 let mut tag_name_offset = Vec::new();
132
133 for (msg, tag) in data.into_iter() {
134 match msg {
135 Some(msg) => {
136 let msg = base64.encode_to_string(msg);
137 message_offset.push(
138 message
139 .len()
140 .try_into()
141 .context("total message size overflowed usize")?,
142 );
143 message.extend(msg.as_bytes());
144 message.push(b'\n');
145 }
146 None => message_offset.push(u64::MAX),
147 }
148 match tag {
149 Some(tag) => {
150 let tag = base64.encode_to_string(tag);
151 tag_name_offset.push(
152 tag_name
153 .len()
154 .try_into()
155 .context("total tag_name size overflowed usize")?,
156 );
157 tag_name.extend(tag.as_bytes());
158 tag_name.push(b'\n');
159 }
160 None => tag_name_offset.push(u64::MAX),
161 }
162 }
163
164 Ok(VecStrings {
165 message,
166 message_offset,
167 tag_name,
168 tag_name_offset,
169 })
170 }
171}
172
173impl PropertiesBackend for VecStrings {
174 type DataFilesAvailability = GuaranteedDataFiles;
175}
176impl OptStrings for VecStrings {
177 #[inline(always)]
178 fn message(&self) -> &[u8] {
179 self.message.as_slice()
180 }
181 #[inline(always)]
182 fn message_offset(&self, node: NodeId) -> Option<u64> {
183 self.message_offset.get(node)
184 }
185 #[inline(always)]
186 fn tag_name(&self) -> &[u8] {
187 self.tag_name.as_slice()
188 }
189 #[inline(always)]
190 fn tag_name_offset(&self, node: NodeId) -> Option<u64> {
191 self.tag_name_offset.get(node)
192 }
193}
194
195impl<
196 MAPS: MaybeMaps,
197 TIMESTAMPS: MaybeTimestamps,
198 PERSONS: MaybePersons,
199 CONTENTS: MaybeContents,
200 LABELNAMES: MaybeLabelNames,
201 > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, NoStrings, LABELNAMES>
202{
203 pub fn load_strings(
211 self,
212 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, MappedStrings, LABELNAMES>>
213 {
214 let OptMappedStrings {
215 message,
216 message_offset,
217 tag_name,
218 tag_name_offset,
219 } = self.get_strings()?;
220 let strings = MappedStrings {
221 message: message?,
222 message_offset: message_offset?,
223 tag_name: tag_name?,
224 tag_name_offset: tag_name_offset?,
225 };
226 self.with_strings(strings)
227 }
228 pub fn opt_load_strings(
230 self,
231 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, OptMappedStrings, LABELNAMES>>
232 {
233 let strings = self.get_strings()?;
234 self.with_strings(strings)
235 }
236
237 fn get_strings(&self) -> Result<OptMappedStrings> {
238 Ok(OptMappedStrings {
239 message: load_if_exists(&self.path, MESSAGE, |path| mmap(path))
240 .context("Could not load message")?,
241 message_offset: load_if_exists(&self.path, MESSAGE_OFFSET, |path| {
242 NumberMmap::new(path, self.num_nodes)
243 })
244 .context("Could not load message_offset")?,
245 tag_name: load_if_exists(&self.path, TAG_NAME, |path| mmap(path))
246 .context("Could not load tag_name")?,
247 tag_name_offset: load_if_exists(&self.path, TAG_NAME_OFFSET, |path| {
248 NumberMmap::new(path, self.num_nodes)
249 })
250 .context("Could not load tag_name_offset")?,
251 })
252 }
253
254 pub fn with_strings<STRINGS: MaybeStrings>(
257 self,
258 strings: STRINGS,
259 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>> {
260 Ok(SwhGraphProperties {
261 maps: self.maps,
262 timestamps: self.timestamps,
263 persons: self.persons,
264 contents: self.contents,
265 strings,
266 label_names: self.label_names,
267 path: self.path,
268 num_nodes: self.num_nodes,
269 label_names_are_in_base64_order: self.label_names_are_in_base64_order,
270 })
271 }
272}
273
274impl<
279 MAPS: MaybeMaps,
280 TIMESTAMPS: MaybeTimestamps,
281 PERSONS: MaybePersons,
282 CONTENTS: MaybeContents,
283 STRINGS: OptStrings,
284 LABELNAMES: MaybeLabelNames,
285 > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
286{
287 #[inline(always)]
288 fn message_or_tag_name_base64<'a>(
289 &self,
290 what: &'static str,
291 data: &'a [u8],
292 offset: Option<u64>,
293 node_id: NodeId,
294 ) -> Result<Option<&'a [u8]>, OutOfBoundError> {
295 match offset {
296 None => Err(OutOfBoundError {
297 index: node_id,
299 len: self.num_nodes,
300 }),
301 Some(u64::MAX) => Ok(None), Some(offset) => {
303 let offset = offset as usize;
304 let slice: &[u8] = data.get(offset..).unwrap_or_else(|| {
305 panic!("Missing {what} for node {node_id} at offset {offset}")
306 });
307 Ok(slice
308 .iter()
309 .position(|&c| c == b'\n')
310 .map(|end| &slice[..end]))
311 }
312 }
313 }
314
315 #[inline]
322 pub fn message_base64(&self, node_id: NodeId) -> PropertiesResult<'_, Option<&[u8]>, STRINGS> {
323 STRINGS::map_if_available(
324 self.try_message_base64(node_id),
325 |message: Result<_, OutOfBoundError>| {
326 message.unwrap_or_else(|e| panic!("Cannot get node message: {e}"))
327 },
328 )
329 }
330
331 #[inline]
336 pub fn try_message_base64(
337 &self,
338 node_id: NodeId,
339 ) -> PropertiesResult<'_, Result<Option<&[u8]>, OutOfBoundError>, STRINGS> {
340 STRINGS::map_if_available(
341 STRINGS::zip_if_available(self.strings.message(), self.strings.message_offset(node_id)),
342 |(messages, message_offset)| {
343 self.message_or_tag_name_base64("message", messages, message_offset, node_id)
344 },
345 )
346 }
347 #[inline]
354 pub fn message(&self, node_id: NodeId) -> PropertiesResult<'_, Option<Vec<u8>>, STRINGS> {
355 STRINGS::map_if_available(self.try_message(node_id), |message| {
356 message.unwrap_or_else(|e| panic!("Cannot get node message: {e}"))
357 })
358 }
359
360 #[inline]
366 pub fn try_message(
367 &self,
368 node_id: NodeId,
369 ) -> PropertiesResult<'_, Result<Option<Vec<u8>>, OutOfBoundError>, STRINGS> {
370 let base64 = base64_simd::STANDARD;
371 STRINGS::map_if_available(self.try_message_base64(node_id), |message_opt_res| {
372 message_opt_res.map(|message_opt| {
373 message_opt.map(|message| {
374 base64
375 .decode_to_vec(message)
376 .unwrap_or_else(|e| panic!("Could not decode node message: {e}"))
377 })
378 })
379 })
380 }
381
382 #[inline]
388 pub fn tag_name_base64(&self, node_id: NodeId) -> PropertiesResult<'_, Option<&[u8]>, STRINGS> {
389 STRINGS::map_if_available(self.try_tag_name_base64(node_id), |tag_name| {
390 tag_name.unwrap_or_else(|e| panic!("Cannot get node tag: {e}"))
391 })
392 }
393
394 #[inline]
399 pub fn try_tag_name_base64(
400 &self,
401 node_id: NodeId,
402 ) -> PropertiesResult<'_, Result<Option<&[u8]>, OutOfBoundError>, STRINGS> {
403 STRINGS::map_if_available(
404 STRINGS::zip_if_available(
405 self.strings.tag_name(),
406 self.strings.tag_name_offset(node_id),
407 ),
408 |(tag_names, tag_name_offset)| {
409 self.message_or_tag_name_base64("tag_name", tag_names, tag_name_offset, node_id)
410 },
411 )
412 }
413
414 #[inline]
420 pub fn tag_name(&self, node_id: NodeId) -> PropertiesResult<'_, Option<Vec<u8>>, STRINGS> {
421 STRINGS::map_if_available(self.try_tag_name(node_id), |tag_name| {
422 tag_name.unwrap_or_else(|e| panic!("Cannot get node tag name: {e}"))
423 })
424 }
425
426 #[inline]
431 pub fn try_tag_name(
432 &self,
433 node_id: NodeId,
434 ) -> PropertiesResult<'_, Result<Option<Vec<u8>>, OutOfBoundError>, STRINGS> {
435 let base64 = base64_simd::STANDARD;
436 STRINGS::map_if_available(self.try_tag_name_base64(node_id), |tag_name_opt_res| {
437 tag_name_opt_res.map(|tag_name_opt| {
438 tag_name_opt.map(|tag_name| {
439 base64.decode_to_vec(tag_name).unwrap_or_else(|_| {
440 panic!("Could not decode tag_name of node {node_id}: {tag_name:?}")
441 })
442 })
443 })
444 })
445 }
446}