1use anyhow::{ensure, Context, Result};
7use mmap_rs::Mmap;
8
9use super::suffixes::*;
10use super::*;
11use crate::graph::NodeId;
12
13pub trait MaybeContents {}
16impl<C: OptContents> MaybeContents for C {}
17
18#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
20pub struct NoContents;
21impl MaybeContents for NoContents {}
22
23unsafe fn get_bit(
29 array: impl GetIndex<Output = u64>,
30 num_bits: usize,
31 bit_position: usize,
32) -> Option<bool> {
33 if bit_position >= num_bits {
34 None
35 } else {
36 let cell_id = bit_position / (u64::BITS as usize);
37 let mask = 1 << (bit_position % (u64::BITS as usize));
38
39 let cell = unsafe { array.get_unchecked(cell_id) };
41
42 Some((cell & mask) != 0)
43 }
44}
45
46#[diagnostic::on_unimplemented(
47 label = "does not have Content properties loaded",
48 note = "Use `let graph = graph.load_properties(|props| props.load_contents()).unwrap()` to load them",
49 note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
50)]
51pub trait OptContents: MaybeContents + PropertiesBackend {
53 fn is_skipped_content(&self, node: NodeId) -> PropertiesResult<'_, Option<bool>, Self>;
55 fn content_length(&self, node: NodeId) -> PropertiesResult<'_, Option<u64>, Self>;
58}
59
60#[diagnostic::on_unimplemented(
61 label = "does not have Content properties loaded",
62 note = "Use `let graph = graph.load_properties(|props| props.load_contents()).unwrap()` to load them",
63 note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
64)]
65pub trait Contents: OptContents<DataFilesAvailability = GuaranteedDataFiles> {}
67impl<S: OptContents<DataFilesAvailability = GuaranteedDataFiles>> Contents for S {}
68
69pub struct OptMappedContents {
72 num_nodes: usize,
73 is_skipped_content: Result<NumberMmap<BigEndian, u64, Mmap>, UnavailableProperty>,
74 content_length: Result<NumberMmap<BigEndian, u64, Mmap>, UnavailableProperty>,
75}
76impl PropertiesBackend for OptMappedContents {
77 type DataFilesAvailability = OptionalDataFiles;
78}
79impl OptContents for OptMappedContents {
80 #[inline(always)]
81 fn is_skipped_content(&self, node: NodeId) -> PropertiesResult<'_, Option<bool>, Self> {
82 self.is_skipped_content
84 .as_ref()
85 .map(|is_skipped_content| unsafe { get_bit(is_skipped_content, self.num_nodes, node) })
86 }
87 #[inline(always)]
88 fn content_length(&self, node: NodeId) -> PropertiesResult<'_, Option<u64>, Self> {
89 self.content_length
90 .as_ref()
91 .map(|content_lengths| content_lengths.get(node))
92 }
93}
94
95pub struct MappedContents {
96 num_nodes: usize,
97 is_skipped_content: NumberMmap<BigEndian, u64, Mmap>,
98 content_length: NumberMmap<BigEndian, u64, Mmap>,
99}
100impl PropertiesBackend for MappedContents {
101 type DataFilesAvailability = GuaranteedDataFiles;
102}
103impl OptContents for MappedContents {
104 #[inline(always)]
105 fn is_skipped_content(&self, node: NodeId) -> Option<bool> {
106 unsafe { get_bit(&self.is_skipped_content, self.num_nodes, node) }
108 }
109 #[inline(always)]
110 fn content_length(&self, node: NodeId) -> Option<u64> {
111 (&self.content_length).get(node)
112 }
113}
114
115#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
116pub struct VecContents {
117 num_nodes: usize,
118 is_skipped_content: Vec<u64>,
119 content_length: Vec<u64>,
120}
121
122impl VecContents {
123 pub fn new(data: Vec<(bool, Option<u64>)>) -> Result<Self> {
124 let num_nodes = data.len();
125 let bit_vec_len = num_nodes.div_ceil(64);
126 let mut is_skipped_content = vec![0; bit_vec_len];
127 let mut content_length = Vec::with_capacity(num_nodes);
128 for (node_id, (is_skipped, length)) in data.into_iter().enumerate() {
129 ensure!(
130 length != Some(u64::MAX),
131 "content length may not be {}",
132 u64::MAX
133 );
134 content_length.push(length.unwrap_or(u64::MAX));
135 if is_skipped {
136 let cell_id = node_id / (u64::BITS as usize);
137 let mask = 1 << (node_id % (u64::BITS as usize));
138 is_skipped_content[cell_id] |= mask;
139 }
140 }
141 Ok(VecContents {
142 num_nodes,
143 is_skipped_content,
144 content_length,
145 })
146 }
147}
148
149impl PropertiesBackend for VecContents {
150 type DataFilesAvailability = GuaranteedDataFiles;
151}
152impl OptContents for VecContents {
153 #[inline(always)]
154 fn is_skipped_content(&self, node: NodeId) -> Option<bool> {
155 unsafe { get_bit(self.is_skipped_content.as_slice(), self.num_nodes, node) }
157 }
158 #[inline(always)]
159 fn content_length(&self, node: NodeId) -> Option<u64> {
160 self.content_length.get(node)
161 }
162}
163
164impl<
165 MAPS: MaybeMaps,
166 TIMESTAMPS: MaybeTimestamps,
167 PERSONS: MaybePersons,
168 STRINGS: MaybeStrings,
169 LABELNAMES: MaybeLabelNames,
170 > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, NoContents, STRINGS, LABELNAMES>
171{
172 pub fn load_contents(
178 self,
179 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, MappedContents, STRINGS, LABELNAMES>>
180 {
181 let OptMappedContents {
182 is_skipped_content,
183 content_length,
184 num_nodes,
185 } = self.get_contents()?;
186 let contents = MappedContents {
187 is_skipped_content: is_skipped_content?,
188 content_length: content_length?,
189 num_nodes,
190 };
191 self.with_contents(contents)
192 }
193
194 pub fn opt_load_contents(
196 self,
197 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, OptMappedContents, STRINGS, LABELNAMES>>
198 {
199 let contents = self.get_contents()?;
200 self.with_contents(contents)
201 }
202
203 fn get_contents(&self) -> Result<OptMappedContents> {
204 Ok(OptMappedContents {
205 num_nodes: self.num_nodes,
206 is_skipped_content: load_if_exists(&self.path, CONTENT_IS_SKIPPED, |path| {
207 let num_bytes = self.num_nodes.div_ceil(u64::BITS.try_into().unwrap());
208 NumberMmap::new(path, num_bytes).context("Could not load is_skipped_content")
209 })?,
210 content_length: load_if_exists(&self.path, CONTENT_LENGTH, |path| {
211 NumberMmap::new(path, self.num_nodes).context("Could not load content_length")
212 })?,
213 })
214 }
215
216 pub fn with_contents<CONTENTS: MaybeContents>(
219 self,
220 contents: CONTENTS,
221 ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>> {
222 Ok(SwhGraphProperties {
223 maps: self.maps,
224 timestamps: self.timestamps,
225 persons: self.persons,
226 contents,
227 strings: self.strings,
228 label_names: self.label_names,
229 path: self.path,
230 num_nodes: self.num_nodes,
231 label_names_are_in_base64_order: self.label_names_are_in_base64_order,
232 })
233 }
234}
235
236impl<
241 MAPS: MaybeMaps,
242 TIMESTAMPS: MaybeTimestamps,
243 PERSONS: MaybePersons,
244 CONTENTS: OptContents,
245 STRINGS: MaybeStrings,
246 LABELNAMES: MaybeLabelNames,
247 > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
248{
249 #[inline]
257 pub fn is_skipped_content(&self, node_id: NodeId) -> PropertiesResult<'_, bool, CONTENTS> {
258 CONTENTS::map_if_available(self.try_is_skipped_content(node_id), |is_skipped_content| {
259 is_skipped_content
260 .unwrap_or_else(|e| panic!("Cannot get is_skipped_content bit of node: {e}"))
261 })
262 }
263
264 #[inline]
268 pub fn try_is_skipped_content(
269 &self,
270 node_id: NodeId,
271 ) -> PropertiesResult<'_, Result<bool, OutOfBoundError>, CONTENTS> {
272 CONTENTS::map_if_available(
273 self.contents.is_skipped_content(node_id),
274 |is_skipped_content| match is_skipped_content {
275 None => Err(OutOfBoundError {
276 index: node_id,
277 len: self.num_nodes,
278 }),
279 Some(is_skipped_content) => Ok(is_skipped_content),
280 },
281 )
282 }
283
284 #[inline]
292 pub fn content_length(&self, node_id: NodeId) -> PropertiesResult<'_, Option<u64>, CONTENTS> {
293 CONTENTS::map_if_available(self.try_content_length(node_id), |content_length| {
294 content_length.unwrap_or_else(|e| panic!("Cannot get content length: {e}"))
295 })
296 }
297
298 #[inline]
302 pub fn try_content_length(
303 &self,
304 node_id: NodeId,
305 ) -> PropertiesResult<'_, Result<Option<u64>, OutOfBoundError>, CONTENTS> {
306 CONTENTS::map_if_available(self.contents.content_length(node_id), |content_length| {
307 match content_length {
308 None => Err(OutOfBoundError {
309 index: node_id,
311 len: self.num_nodes,
312 }),
313 Some(u64::MAX) => Ok(None), Some(length) => Ok(Some(length)),
315 }
316 })
317 }
318}