1use mmap_rs::Mmap;
7
8use super::suffixes::*;
9use super::*;
10use crate::graph::NodeId;
11
12pub trait MaybeStrings {}
15impl<S: OptStrings> MaybeStrings for S {}
16
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
19pub struct NoStrings;
20impl MaybeStrings for NoStrings {}
21
22#[diagnostic::on_unimplemented(
23 label = "does not have String properties loaded",
24 note = "Use `let graph = graph.load_properties(|props| props.load_string()).unwrap()` to load them",
25 note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
26)]
27pub trait OptStrings: MaybeStrings + PropertiesBackend {
29 type Offsets<'a>: GetIndex<Output = u64> + 'a
30 where
31 Self: 'a;
32
33 fn message(&self) -> PropertiesResult<&[u8], Self>;
34 fn message_offset(&self) -> PropertiesResult<Self::Offsets<'_>, Self>;
35 fn tag_name(&self) -> PropertiesResult<&[u8], Self>;
36 fn tag_name_offset(&self) -> PropertiesResult<Self::Offsets<'_>, Self>;
37}
38
39#[diagnostic::on_unimplemented(
40 label = "does not have String properties loaded",
41 note = "Use `let graph = graph.load_properties(|props| props.load_string()).unwrap()` to load them",
42 note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
43)]
44pub trait Strings: OptStrings<DataFilesAvailability = GuaranteedDataFiles> {}
46impl<S: OptStrings<DataFilesAvailability = GuaranteedDataFiles>> Strings for S {}
47
48pub struct OptMappedStrings {
51 message: Result<Mmap, UnavailableProperty>,
52 message_offset: Result<NumberMmap<BigEndian, u64, Mmap>, UnavailableProperty>,
53 tag_name: Result<Mmap, UnavailableProperty>,
54 tag_name_offset: Result<NumberMmap<BigEndian, u64, Mmap>, UnavailableProperty>,
55}
56impl PropertiesBackend for OptMappedStrings {
57 type DataFilesAvailability = OptionalDataFiles;
58}
59impl OptStrings for OptMappedStrings {
60 type Offsets<'a>
61 = &'a NumberMmap<BigEndian, u64, Mmap>
62 where
63 Self: 'a;
64
65 #[inline(always)]
66 fn message(&self) -> PropertiesResult<'_, &[u8], Self> {
67 self.message.as_deref()
68 }
69 #[inline(always)]
70 fn message_offset(&self) -> PropertiesResult<'_, Self::Offsets<'_>, Self> {
71 self.message_offset.as_ref()
72 }
73 #[inline(always)]
74 fn tag_name(&self) -> PropertiesResult<'_, &[u8], Self> {
75 self.tag_name.as_deref()
76 }
77 #[inline(always)]
78 fn tag_name_offset(&self) -> PropertiesResult<'_, Self::Offsets<'_>, Self> {
79 self.tag_name_offset.as_ref()
80 }
81}
82
83pub struct MappedStrings {
85 message: Mmap,
86 message_offset: NumberMmap<BigEndian, u64, Mmap>,
87 tag_name: Mmap,
88 tag_name_offset: NumberMmap<BigEndian, u64, Mmap>,
89}
90impl PropertiesBackend for MappedStrings {
91 type DataFilesAvailability = GuaranteedDataFiles;
92}
93impl OptStrings for MappedStrings {
94 type Offsets<'a>
95 = &'a NumberMmap<BigEndian, u64, Mmap>
96 where
97 Self: 'a;
98
99 #[inline(always)]
100 fn message(&self) -> &[u8] {
101 &self.message
102 }
103 #[inline(always)]
104 fn message_offset(&self) -> Self::Offsets<'_> {
105 &self.message_offset
106 }
107 #[inline(always)]
108 fn tag_name(&self) -> &[u8] {
109 &self.tag_name
110 }
111 #[inline(always)]
112 fn tag_name_offset(&self) -> Self::Offsets<'_> {
113 &self.tag_name_offset
114 }
115}
116
117#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
118pub struct VecStrings {
119 message: Vec<u8>,
120 message_offset: Vec<u64>,
121 tag_name: Vec<u8>,
122 tag_name_offset: Vec<u64>,
123}
124
125impl VecStrings {
126 pub fn new<Msg: AsRef<[u8]>, TagName: AsRef<[u8]>>(
128 data: Vec<(Option<Msg>, Option<TagName>)>,
129 ) -> Result<Self> {
130 let base64 = base64_simd::STANDARD;
131
132 let mut message = Vec::new();
133 let mut message_offset = Vec::new();
134 let mut tag_name = Vec::new();
135 let mut tag_name_offset = Vec::new();
136
137 for (msg, tag) in data.into_iter() {
138 match msg {
139 Some(msg) => {
140 let msg = base64.encode_to_string(msg);
141 message_offset.push(
142 message
143 .len()
144 .try_into()
145 .context("total message size overflowed usize")?,
146 );
147 message.extend(msg.as_bytes());
148 message.push(b'\n');
149 }
150 None => message_offset.push(u64::MAX),
151 }
152 match tag {
153 Some(tag) => {
154 let tag = base64.encode_to_string(tag);
155 tag_name_offset.push(
156 tag_name
157 .len()
158 .try_into()
159 .context("total tag_name size overflowed usize")?,
160 );
161 tag_name.extend(tag.as_bytes());
162 tag_name.push(b'\n');
163 }
164 None => tag_name_offset.push(u64::MAX),
165 }
166 }
167
168 Ok(VecStrings {
169 message,
170 message_offset,
171 tag_name,
172 tag_name_offset,
173 })
174 }
175}
176
177impl PropertiesBackend for VecStrings {
178 type DataFilesAvailability = GuaranteedDataFiles;
179}
180impl OptStrings for VecStrings {
181 type Offsets<'a>
182 = &'a [u64]
183 where
184 Self: 'a;
185
186 #[inline(always)]
187 fn message(&self) -> &[u8] {
188 self.message.as_slice()
189 }
190 #[inline(always)]
191 fn message_offset(&self) -> Self::Offsets<'_> {
192 self.message_offset.as_slice()
193 }
194 #[inline(always)]
195 fn tag_name(&self) -> &[u8] {
196 self.tag_name.as_slice()
197 }
198 #[inline(always)]
199 fn tag_name_offset(&self) -> Self::Offsets<'_> {
200 self.tag_name_offset.as_slice()
201 }
202}
203
204impl<
205 MAPS: MaybeMaps,
206 TIMESTAMPS: MaybeTimestamps,
207 PERSONS: MaybePersons,
208 CONTENTS: MaybeContents,
209 LABELNAMES: MaybeLabelNames,
210 > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, NoStrings, LABELNAMES>
211{
212 pub fn load_strings(
220 self,
221 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, MappedStrings, LABELNAMES>>
222 {
223 let OptMappedStrings {
224 message,
225 message_offset,
226 tag_name,
227 tag_name_offset,
228 } = self.get_strings()?;
229 let strings = MappedStrings {
230 message: message?,
231 message_offset: message_offset?,
232 tag_name: tag_name?,
233 tag_name_offset: tag_name_offset?,
234 };
235 self.with_strings(strings)
236 }
237 pub fn opt_load_strings(
239 self,
240 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, OptMappedStrings, LABELNAMES>>
241 {
242 let strings = self.get_strings()?;
243 self.with_strings(strings)
244 }
245
246 fn get_strings(&self) -> Result<OptMappedStrings> {
247 Ok(OptMappedStrings {
248 message: load_if_exists(&self.path, MESSAGE, |path| mmap(path))
249 .context("Could not load message")?,
250 message_offset: load_if_exists(&self.path, MESSAGE_OFFSET, |path| {
251 NumberMmap::new(path, self.num_nodes)
252 })
253 .context("Could not load message_offset")?,
254 tag_name: load_if_exists(&self.path, TAG_NAME, |path| mmap(path))
255 .context("Could not load tag_name")?,
256 tag_name_offset: load_if_exists(&self.path, TAG_NAME_OFFSET, |path| {
257 NumberMmap::new(path, self.num_nodes)
258 })
259 .context("Could not load tag_name_offset")?,
260 })
261 }
262
263 pub fn with_strings<STRINGS: MaybeStrings>(
266 self,
267 strings: STRINGS,
268 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>> {
269 Ok(SwhGraphProperties {
270 maps: self.maps,
271 timestamps: self.timestamps,
272 persons: self.persons,
273 contents: self.contents,
274 strings,
275 label_names: self.label_names,
276 path: self.path,
277 num_nodes: self.num_nodes,
278 label_names_are_in_base64_order: self.label_names_are_in_base64_order,
279 })
280 }
281}
282
283impl<
288 MAPS: MaybeMaps,
289 TIMESTAMPS: MaybeTimestamps,
290 PERSONS: MaybePersons,
291 CONTENTS: MaybeContents,
292 STRINGS: OptStrings,
293 LABELNAMES: MaybeLabelNames,
294 > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
295{
296 #[inline(always)]
297 fn message_or_tag_name_base64<'a>(
298 what: &'static str,
299 data: &'a [u8],
300 offsets: impl GetIndex<Output = u64>,
301 node_id: NodeId,
302 ) -> Result<Option<&'a [u8]>, OutOfBoundError> {
303 match offsets.get(node_id) {
304 None => Err(OutOfBoundError {
305 index: node_id,
307 len: offsets.len(),
308 }),
309 Some(u64::MAX) => Ok(None), Some(offset) => {
311 let offset = offset as usize;
312 let slice: &[u8] = data.get(offset..).unwrap_or_else(|| {
313 panic!("Missing {what} for node {node_id} at offset {offset}")
314 });
315 Ok(slice
316 .iter()
317 .position(|&c| c == b'\n')
318 .map(|end| &slice[..end]))
319 }
320 }
321 }
322
323 #[inline]
330 pub fn message_base64(&self, node_id: NodeId) -> PropertiesResult<Option<&[u8]>, STRINGS> {
331 STRINGS::map_if_available(
332 self.try_message_base64(node_id),
333 |message: Result<_, OutOfBoundError>| {
334 message.unwrap_or_else(|e| panic!("Cannot get node message: {e}"))
335 },
336 )
337 }
338
339 #[inline]
344 pub fn try_message_base64(
345 &self,
346 node_id: NodeId,
347 ) -> PropertiesResult<Result<Option<&[u8]>, OutOfBoundError>, STRINGS> {
348 STRINGS::map_if_available(
349 STRINGS::zip_if_available(self.strings.message(), self.strings.message_offset()),
350 |(messages, message_offsets)| {
351 Self::message_or_tag_name_base64("message", messages, message_offsets, node_id)
352 },
353 )
354 }
355 #[inline]
362 pub fn message(&self, node_id: NodeId) -> PropertiesResult<Option<Vec<u8>>, STRINGS> {
363 STRINGS::map_if_available(self.try_message(node_id), |message| {
364 message.unwrap_or_else(|e| panic!("Cannot get node message: {e}"))
365 })
366 }
367
368 #[inline]
374 pub fn try_message(
375 &self,
376 node_id: NodeId,
377 ) -> PropertiesResult<Result<Option<Vec<u8>>, OutOfBoundError>, STRINGS> {
378 let base64 = base64_simd::STANDARD;
379 STRINGS::map_if_available(self.try_message_base64(node_id), |message_opt_res| {
380 message_opt_res.map(|message_opt| {
381 message_opt.map(|message| {
382 base64
383 .decode_to_vec(message)
384 .unwrap_or_else(|e| panic!("Could not decode node message: {e}"))
385 })
386 })
387 })
388 }
389
390 #[inline]
396 pub fn tag_name_base64(&self, node_id: NodeId) -> PropertiesResult<Option<&[u8]>, STRINGS> {
397 STRINGS::map_if_available(self.try_tag_name_base64(node_id), |tag_name| {
398 tag_name.unwrap_or_else(|e| panic!("Cannot get node tag: {e}"))
399 })
400 }
401
402 #[inline]
407 pub fn try_tag_name_base64(
408 &self,
409 node_id: NodeId,
410 ) -> PropertiesResult<Result<Option<&[u8]>, OutOfBoundError>, STRINGS> {
411 STRINGS::map_if_available(
412 STRINGS::zip_if_available(self.strings.tag_name(), self.strings.tag_name_offset()),
413 |(tag_names, tag_name_offsets)| {
414 Self::message_or_tag_name_base64("tag_name", tag_names, tag_name_offsets, node_id)
415 },
416 )
417 }
418
419 #[inline]
425 pub fn tag_name(&self, node_id: NodeId) -> PropertiesResult<Option<Vec<u8>>, STRINGS> {
426 STRINGS::map_if_available(self.try_tag_name(node_id), |tag_name| {
427 tag_name.unwrap_or_else(|e| panic!("Cannot get node tag name: {e}"))
428 })
429 }
430
431 #[inline]
436 pub fn try_tag_name(
437 &self,
438 node_id: NodeId,
439 ) -> PropertiesResult<Result<Option<Vec<u8>>, OutOfBoundError>, STRINGS> {
440 let base64 = base64_simd::STANDARD;
441 STRINGS::map_if_available(self.try_tag_name_base64(node_id), |tag_name_opt_res| {
442 tag_name_opt_res.map(|tag_name_opt| {
443 tag_name_opt.map(|tag_name| {
444 base64.decode_to_vec(tag_name).unwrap_or_else(|_| {
445 panic!("Could not decode tag_name of node {node_id}: {tag_name:?}")
446 })
447 })
448 })
449 })
450 }
451}