swh_graph/properties/
timestamps.rs

1// Copyright (C) 2023-2024  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use anyhow::{ensure, Context, Result};
7use mmap_rs::Mmap;
8
9use super::suffixes::*;
10use super::*;
11use crate::graph::NodeId;
12
13/// Trait implemented by both [`NoTimestamps`] and all implementors of [`Timestamps`],
14/// to allow loading timestamp properties only if needed.
15pub trait MaybeTimestamps {}
16impl<T: OptTimestamps> MaybeTimestamps for T {}
17
18/// Placeholder for when timestamp properties are not loaded
19#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
20pub struct NoTimestamps;
21impl MaybeTimestamps for NoTimestamps {}
22
23#[diagnostic::on_unimplemented(
24    label = "does not have Timestamp properties loaded",
25    note = "Use `let graph = graph.load_properties(|props| props.load_timestamps()).unwrap()` to load them",
26    note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
27)]
28/// Trait for backend storage of timestamp properties (either in-memory or memory-mapped)
29pub trait OptTimestamps: MaybeTimestamps + PropertiesBackend {
30    /// Returns `None` if out of bound, `Some(i64::MIN)` if the node has no author timestamp
31    fn author_timestamp(&self, node: NodeId) -> PropertiesResult<'_, Option<i64>, Self>;
32    /// Returns `None` if out of bound, `Some(i16::MIN)` if the node has no author timestamp offset
33    fn author_timestamp_offset(&self, node: NodeId) -> PropertiesResult<'_, Option<i16>, Self>;
34    /// Returns `None` if out of bound, `Some(i64::MIN)` if the node has no committer timestamp
35    fn committer_timestamp(&self, node: NodeId) -> PropertiesResult<'_, Option<i64>, Self>;
36    /// Returns `None` if out of bound, `Some(i16::MIN)` if the node has no committer timestamp offset
37    fn committer_timestamp_offset(&self, node: NodeId) -> PropertiesResult<'_, Option<i16>, Self>;
38}
39
40#[diagnostic::on_unimplemented(
41    label = "does not have Timestamp properties loaded",
42    note = "Use `let graph = graph.load_properties(|props| props.load_timestamps()).unwrap()` to load them",
43    note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
44)]
45/// Trait for backend storage of timestamp properties (either in-memory or memory-mapped)
46pub trait Timestamps: OptTimestamps<DataFilesAvailability = GuaranteedDataFiles> {}
47impl<T: OptTimestamps<DataFilesAvailability = GuaranteedDataFiles>> Timestamps for T {}
48
49pub struct OptMappedTimestamps {
50    author_timestamp: Result<NumberMmap<BigEndian, i64, Mmap>, UnavailableProperty>,
51    author_timestamp_offset: Result<NumberMmap<BigEndian, i16, Mmap>, UnavailableProperty>,
52    committer_timestamp: Result<NumberMmap<BigEndian, i64, Mmap>, UnavailableProperty>,
53    committer_timestamp_offset: Result<NumberMmap<BigEndian, i16, Mmap>, UnavailableProperty>,
54}
55impl PropertiesBackend for OptMappedTimestamps {
56    type DataFilesAvailability = OptionalDataFiles;
57}
58impl OptTimestamps for OptMappedTimestamps {
59    #[inline(always)]
60    fn author_timestamp(&self, node: NodeId) -> PropertiesResult<'_, Option<i64>, Self> {
61        self.author_timestamp
62            .as_ref()
63            .map(|author_timestamps| author_timestamps.get(node))
64    }
65    #[inline(always)]
66    fn author_timestamp_offset(&self, node: NodeId) -> PropertiesResult<'_, Option<i16>, Self> {
67        self.author_timestamp_offset
68            .as_ref()
69            .map(|author_timestamp_offsets| author_timestamp_offsets.get(node))
70    }
71    #[inline(always)]
72    fn committer_timestamp(&self, node: NodeId) -> PropertiesResult<'_, Option<i64>, Self> {
73        self.committer_timestamp
74            .as_ref()
75            .map(|committer_timestamps| committer_timestamps.get(node))
76    }
77    #[inline(always)]
78    fn committer_timestamp_offset(&self, node: NodeId) -> PropertiesResult<'_, Option<i16>, Self> {
79        self.committer_timestamp_offset
80            .as_ref()
81            .map(|committer_timestamp_offsets| committer_timestamp_offsets.get(node))
82    }
83}
84
85pub struct MappedTimestamps {
86    author_timestamp: NumberMmap<BigEndian, i64, Mmap>,
87    author_timestamp_offset: NumberMmap<BigEndian, i16, Mmap>,
88    committer_timestamp: NumberMmap<BigEndian, i64, Mmap>,
89    committer_timestamp_offset: NumberMmap<BigEndian, i16, Mmap>,
90}
91impl PropertiesBackend for MappedTimestamps {
92    type DataFilesAvailability = GuaranteedDataFiles;
93}
94
95impl OptTimestamps for MappedTimestamps {
96    #[inline(always)]
97    fn author_timestamp(&self, node: NodeId) -> Option<i64> {
98        (&self.author_timestamp).get(node)
99    }
100    #[inline(always)]
101    fn author_timestamp_offset(&self, node: NodeId) -> Option<i16> {
102        (&self.author_timestamp_offset).get(node)
103    }
104    #[inline(always)]
105    fn committer_timestamp(&self, node: NodeId) -> Option<i64> {
106        (&self.committer_timestamp).get(node)
107    }
108    #[inline(always)]
109    fn committer_timestamp_offset(&self, node: NodeId) -> Option<i16> {
110        (&self.committer_timestamp_offset).get(node)
111    }
112}
113
114#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
115pub struct VecTimestamps {
116    author_timestamp: Vec<i64>,
117    author_timestamp_offset: Vec<i16>,
118    committer_timestamp: Vec<i64>,
119    committer_timestamp_offset: Vec<i16>,
120}
121
122impl VecTimestamps {
123    /// Builds [`VecTimestamps`] from 4-tuples of `(author_timestamp, author_timestamp_offset,
124    /// committer_timestamp, committer_timestamp_offset)`
125    #[allow(clippy::type_complexity)]
126    pub fn new(
127        timestamps: Vec<(Option<i64>, Option<i16>, Option<i64>, Option<i16>)>,
128    ) -> Result<Self> {
129        let mut author_timestamp = Vec::with_capacity(timestamps.len());
130        let mut author_timestamp_offset = Vec::with_capacity(timestamps.len());
131        let mut committer_timestamp = Vec::with_capacity(timestamps.len());
132        let mut committer_timestamp_offset = Vec::with_capacity(timestamps.len());
133        for (a_ts, a_ts_o, c_ts, c_ts_o) in timestamps {
134            ensure!(
135                a_ts != Some(i64::MIN),
136                "author timestamp may not be {}",
137                i64::MIN
138            );
139            ensure!(
140                a_ts_o != Some(i16::MIN),
141                "author timestamp offset may not be {}",
142                i16::MIN
143            );
144            ensure!(
145                c_ts != Some(i64::MIN),
146                "committer timestamp may not be {}",
147                i64::MIN
148            );
149            ensure!(
150                c_ts_o != Some(i16::MIN),
151                "committer timestamp offset may not be {}",
152                i16::MIN
153            );
154            author_timestamp.push(a_ts.unwrap_or(i64::MIN));
155            author_timestamp_offset.push(a_ts_o.unwrap_or(i16::MIN));
156            committer_timestamp.push(c_ts.unwrap_or(i64::MIN));
157            committer_timestamp_offset.push(c_ts_o.unwrap_or(i16::MIN));
158        }
159        Ok(VecTimestamps {
160            author_timestamp,
161            author_timestamp_offset,
162            committer_timestamp,
163            committer_timestamp_offset,
164        })
165    }
166}
167
168impl PropertiesBackend for VecTimestamps {
169    type DataFilesAvailability = GuaranteedDataFiles;
170}
171impl OptTimestamps for VecTimestamps {
172    #[inline(always)]
173    fn author_timestamp(&self, node: NodeId) -> Option<i64> {
174        self.author_timestamp.get(node)
175    }
176    #[inline(always)]
177    fn author_timestamp_offset(&self, node: NodeId) -> Option<i16> {
178        self.author_timestamp_offset.get(node)
179    }
180    #[inline(always)]
181    fn committer_timestamp(&self, node: NodeId) -> Option<i64> {
182        self.committer_timestamp.get(node)
183    }
184    #[inline(always)]
185    fn committer_timestamp_offset(&self, node: NodeId) -> Option<i16> {
186        self.committer_timestamp_offset.get(node)
187    }
188}
189
190impl<
191        MAPS: MaybeMaps,
192        PERSONS: MaybePersons,
193        CONTENTS: MaybeContents,
194        STRINGS: MaybeStrings,
195        LABELNAMES: MaybeLabelNames,
196    > SwhGraphProperties<MAPS, NoTimestamps, PERSONS, CONTENTS, STRINGS, LABELNAMES>
197{
198    /// Consumes a [`SwhGraphProperties`] and returns a new one with these methods
199    /// available:
200    ///
201    /// * [`SwhGraphProperties::author_timestamp`]
202    /// * [`SwhGraphProperties::author_timestamp_offset`]
203    /// * [`SwhGraphProperties::committer_timestamp`]
204    /// * [`SwhGraphProperties::committer_timestamp_offset`]
205    pub fn load_timestamps(
206        self,
207    ) -> Result<SwhGraphProperties<MAPS, MappedTimestamps, PERSONS, CONTENTS, STRINGS, LABELNAMES>>
208    {
209        let OptMappedTimestamps {
210            author_timestamp,
211            author_timestamp_offset,
212            committer_timestamp,
213            committer_timestamp_offset,
214        } = self.get_timestamps()?;
215        let timestamps = MappedTimestamps {
216            author_timestamp: author_timestamp?,
217            author_timestamp_offset: author_timestamp_offset?,
218            committer_timestamp: committer_timestamp?,
219            committer_timestamp_offset: committer_timestamp_offset?,
220        };
221        self.with_timestamps(timestamps)
222    }
223
224    /// Equivalent to [`Self::load_timestamps`] that does not require all files to be present
225    pub fn opt_load_timestamps(
226        self,
227    ) -> Result<SwhGraphProperties<MAPS, OptMappedTimestamps, PERSONS, CONTENTS, STRINGS, LABELNAMES>>
228    {
229        let timestamps = self.get_timestamps()?;
230        self.with_timestamps(timestamps)
231    }
232
233    fn get_timestamps(&self) -> Result<OptMappedTimestamps> {
234        Ok(OptMappedTimestamps {
235            author_timestamp: load_if_exists(&self.path, AUTHOR_TIMESTAMP, |path| {
236                NumberMmap::new(path, self.num_nodes).context("Could not load author_timestamp")
237            })?,
238            author_timestamp_offset: load_if_exists(&self.path, AUTHOR_TIMESTAMP_OFFSET, |path| {
239                NumberMmap::new(path, self.num_nodes)
240                    .context("Could not load author_timestamp_offset")
241            })?,
242            committer_timestamp: load_if_exists(&self.path, COMMITTER_TIMESTAMP, |path| {
243                NumberMmap::new(path, self.num_nodes).context("Could not load committer_timestamp")
244            })?,
245            committer_timestamp_offset: load_if_exists(
246                &self.path,
247                COMMITTER_TIMESTAMP_OFFSET,
248                |path| {
249                    NumberMmap::new(path, self.num_nodes)
250                        .context("Could not load committer_timestamp_offset")
251                },
252            )?,
253        })
254    }
255
256    /// Alternative to [`load_timestamps`](Self::load_timestamps) that allows using arbitrary
257    /// timestamps implementations
258    pub fn with_timestamps<TIMESTAMPS: MaybeTimestamps>(
259        self,
260        timestamps: TIMESTAMPS,
261    ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>> {
262        Ok(SwhGraphProperties {
263            maps: self.maps,
264            timestamps,
265            persons: self.persons,
266            contents: self.contents,
267            strings: self.strings,
268            label_names: self.label_names,
269            path: self.path,
270            num_nodes: self.num_nodes,
271            label_names_are_in_base64_order: self.label_names_are_in_base64_order,
272        })
273    }
274}
275
276/// Functions to access timestamps of `revision` and `release` nodes
277///
278/// Only available after calling [`load_timestamps`](SwhGraphProperties::load_timestamps)
279/// or [`load_all_properties`](crate::graph::SwhBidirectionalGraph::load_all_properties)
280impl<
281        MAPS: MaybeMaps,
282        TIMESTAMPS: OptTimestamps,
283        PERSONS: MaybePersons,
284        CONTENTS: MaybeContents,
285        STRINGS: MaybeStrings,
286        LABELNAMES: MaybeLabelNames,
287    > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
288{
289    /// Returns the number of seconds since Epoch that a release or revision was
290    /// authored at
291    ///
292    /// # Panics
293    ///
294    /// If the node id does not exist
295    #[inline]
296    pub fn author_timestamp(
297        &self,
298        node_id: NodeId,
299    ) -> PropertiesResult<'_, Option<i64>, TIMESTAMPS> {
300        TIMESTAMPS::map_if_available(self.try_author_timestamp(node_id), |author_timestamp| {
301            author_timestamp.unwrap_or_else(|e| panic!("Cannot get author timestamp: {e}"))
302        })
303    }
304
305    /// Returns the number of seconds since Epoch that a release or revision was
306    /// authored at
307    ///
308    /// Returns `Err` if the node id is unknown, and `Ok(None)` if the node has
309    /// no author timestamp
310    #[inline]
311    pub fn try_author_timestamp(
312        &self,
313        node_id: NodeId,
314    ) -> PropertiesResult<'_, Result<Option<i64>, OutOfBoundError>, TIMESTAMPS> {
315        TIMESTAMPS::map_if_available(
316            self.timestamps.author_timestamp(node_id),
317            |author_timestamp| match author_timestamp {
318                None => Err(OutOfBoundError {
319                    index: node_id,
320                    len: self.num_nodes,
321                }),
322                Some(i64::MIN) => Ok(None),
323                Some(ts) => Ok(Some(ts)),
324            },
325        )
326    }
327
328    /// Returns the UTC offset in minutes of a release or revision's authorship date
329    ///
330    /// # Panics
331    ///
332    /// If the node id does not exist
333    #[inline]
334    pub fn author_timestamp_offset(
335        &self,
336        node_id: NodeId,
337    ) -> PropertiesResult<'_, Option<i16>, TIMESTAMPS> {
338        TIMESTAMPS::map_if_available(
339            self.try_author_timestamp_offset(node_id),
340            |author_timestamp_offset| {
341                author_timestamp_offset
342                    .unwrap_or_else(|e| panic!("Cannot get author timestamp offset: {e}"))
343            },
344        )
345    }
346
347    /// Returns the UTC offset in minutes of a release or revision's authorship date
348    ///
349    /// Returns `Err` if the node id is unknown, and `Ok(None)` if the node has
350    /// no author timestamp
351    #[inline]
352    pub fn try_author_timestamp_offset(
353        &self,
354        node_id: NodeId,
355    ) -> PropertiesResult<'_, Result<Option<i16>, OutOfBoundError>, TIMESTAMPS> {
356        TIMESTAMPS::map_if_available(
357            self.timestamps.author_timestamp_offset(node_id),
358            |author_timestamp_offset| match author_timestamp_offset {
359                None => Err(OutOfBoundError {
360                    index: node_id,
361                    len: self.num_nodes,
362                }),
363                Some(i16::MIN) => Ok(None),
364                Some(offset) => Ok(Some(offset)),
365            },
366        )
367    }
368
369    /// Returns the number of seconds since Epoch that a revision was committed at
370    ///
371    /// # Panics
372    ///
373    /// If the node id does not exist
374    #[inline]
375    pub fn committer_timestamp(
376        &self,
377        node_id: NodeId,
378    ) -> PropertiesResult<'_, Option<i64>, TIMESTAMPS> {
379        TIMESTAMPS::map_if_available(
380            self.try_committer_timestamp(node_id),
381            |committer_timestamp| {
382                committer_timestamp
383                    .unwrap_or_else(|e| panic!("Cannot get committer timestamp: {e}"))
384            },
385        )
386    }
387
388    /// Returns the number of seconds since Epoch that a revision was committed at
389    ///
390    /// Returns `Err` if the node id is unknown, and `Ok(None)` if the node has
391    /// no committer timestamp
392    #[inline]
393    pub fn try_committer_timestamp(
394        &self,
395        node_id: NodeId,
396    ) -> PropertiesResult<'_, Result<Option<i64>, OutOfBoundError>, TIMESTAMPS> {
397        TIMESTAMPS::map_if_available(
398            self.timestamps.committer_timestamp(node_id),
399            |committer_timestamp| match committer_timestamp {
400                None => Err(OutOfBoundError {
401                    index: node_id,
402                    len: self.num_nodes,
403                }),
404                Some(i64::MIN) => Ok(None),
405                Some(ts) => Ok(Some(ts)),
406            },
407        )
408    }
409
410    /// Returns the UTC offset in minutes of a revision's committer date
411    ///
412    /// # Panics
413    ///
414    /// If the node id does not exist
415    #[inline]
416    pub fn committer_timestamp_offset(
417        &self,
418        node_id: NodeId,
419    ) -> PropertiesResult<'_, Option<i16>, TIMESTAMPS> {
420        TIMESTAMPS::map_if_available(
421            self.try_committer_timestamp_offset(node_id),
422            |committer_timestamp_offset| {
423                committer_timestamp_offset
424                    .unwrap_or_else(|e| panic!("Cannot get committer timestamp: {e}"))
425            },
426        )
427    }
428
429    /// Returns the UTC offset in minutes of a revision's committer date
430    ///
431    /// Returns `Err` if the node id is unknown, and `Ok(None)` if the node has
432    /// no committer timestamp
433    #[inline]
434    pub fn try_committer_timestamp_offset(
435        &self,
436        node_id: NodeId,
437    ) -> PropertiesResult<'_, Result<Option<i16>, OutOfBoundError>, TIMESTAMPS> {
438        TIMESTAMPS::map_if_available(
439            self.timestamps.committer_timestamp_offset(node_id),
440            |committer_timestamp_offset| match committer_timestamp_offset {
441                None => Err(OutOfBoundError {
442                    index: node_id,
443                    len: self.num_nodes,
444                }),
445                Some(i16::MIN) => Ok(None),
446                Some(offset) => Ok(Some(offset)),
447            },
448        )
449    }
450}