swh_graph/properties/
mod.rs

1// Copyright (C) 2023-2024  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Node labels
7//!
8//! [`SwhGraphProperties`] is populated by the `load_properties` and `load_all_properties`
9//! of [`SwhUnidirectionalGraph`](crate::graph::SwhUnidirectionalGraph) and
10//! [`SwhBidirectionalGraph`](crate::graph::SwhBidirectionalGraph) and returned by
11//! their `properties` method.
12//!
13//! ```no_run
14//! # use std::path::PathBuf;
15//! use swh_graph::graph::SwhGraphWithProperties;
16//! use swh_graph::mph::DynMphf;
17//! use swh_graph::SwhGraphProperties;
18//!
19//! let properties: &SwhGraphProperties<_, _, _, _, _, _> =
20//!     swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
21//!     .expect("Could not load graph")
22//!     .load_all_properties::<DynMphf>()
23//!     .expect("Could not load properties")
24//!     .properties();
25//! ```
26
27use std::path::{Path, PathBuf};
28
29use anyhow::{Context, Result};
30use byteorder::BigEndian;
31use mmap_rs::Mmap;
32
33use crate::mph::LoadableSwhidMphf;
34use crate::utils::mmap::NumberMmap;
35use crate::utils::GetIndex;
36use crate::OutOfBoundError;
37
38pub(crate) mod suffixes {
39    pub const NODE2SWHID: &str = ".node2swhid.bin";
40    pub const NODE2TYPE: &str = ".node2type.bin";
41    pub const AUTHOR_TIMESTAMP: &str = ".property.author_timestamp.bin";
42    pub const AUTHOR_TIMESTAMP_OFFSET: &str = ".property.author_timestamp_offset.bin";
43    pub const COMMITTER_TIMESTAMP: &str = ".property.committer_timestamp.bin";
44    pub const COMMITTER_TIMESTAMP_OFFSET: &str = ".property.committer_timestamp_offset.bin";
45    pub const AUTHOR_ID: &str = ".property.author_id.bin";
46    pub const COMMITTER_ID: &str = ".property.committer_id.bin";
47    pub const CONTENT_IS_SKIPPED: &str = ".property.content.is_skipped.bits";
48    pub const CONTENT_LENGTH: &str = ".property.content.length.bin";
49    pub const MESSAGE: &str = ".property.message.bin";
50    pub const MESSAGE_OFFSET: &str = ".property.message.offset.bin";
51    pub const TAG_NAME: &str = ".property.tag_name.bin";
52    pub const TAG_NAME_OFFSET: &str = ".property.tag_name.offset.bin";
53    pub const LABEL_NAME: &str = ".labels.fcl";
54}
55
56#[derive(thiserror::Error, Debug)]
57#[error("{path} cannot be loaded: {source}")]
58pub struct UnavailableProperty {
59    path: PathBuf,
60    #[source]
61    source: std::io::Error,
62}
63
64/// Wrapper for the return type of [`SwhGraphProperties`] methods.
65///
66/// When `B` implements `GuaranteedDataFiles` (the most common case), `PropertiesResult<'err, T, B>`
67/// is exactly the same type as `T`.
68///
69/// aWhen `B` implements `OptionalDataFiles` (which is the case when using
70/// `opt_load_*` instead of `load_*` or [`load_all`](SwhGraphProperties::load_all) for example),
71/// then `PropertiesResult<'err, T, B>` is exactly the same type as `Result<T, &'err UnavailableProperty>`.
72pub type PropertiesResult<'err, T, B> =
73    <<B as PropertiesBackend>::DataFilesAvailability as DataFilesAvailability>::Result<'err, T>;
74
75/// Common trait for type parameters of [`SwhGraphProperties`]
76pub trait PropertiesBackend {
77    type DataFilesAvailability: DataFilesAvailability;
78
79    /// Applies the given function `f` to the value `v` if the value is available
80    ///
81    /// This is an alias for `<Self::DataFilesAvailability as DataFilesAvailability>::map(v, f)`,
82    /// meaning that:
83    ///
84    /// 1. if `Self::DataFilesAvailability` is `GuaranteedDataFiles`, then `map_if_available(v, f)`
85    ///    is equivalent to `f(v)` and has type `U`
86    /// 2. if `Self::DataFilesAvailability` is `OptionalDataFiles`, then `map_if_available(v, f)`
87    ///    is equivalent to `v.map(f)` and has type `Result<U, &'err UnavailableProperty>`
88    fn map_if_available<T, U>(
89        v: <Self::DataFilesAvailability as DataFilesAvailability>::Result<'_, T>,
90        f: impl FnOnce(T) -> U,
91    ) -> <Self::DataFilesAvailability as DataFilesAvailability>::Result<'_, U> {
92        <Self::DataFilesAvailability as DataFilesAvailability>::map(v, f)
93    }
94
95    /// Returns `(v1, v2)` if both are available, or an error otherwise
96    ///
97    /// This is an alias for `<Self::DataFilesAvailability as DataFilesAvailability>::zip(v, f)`,
98    /// meaning that:
99    ///
100    /// 1. if `Self::DataFilesAvailability` is `GuaranteedDataFiles`, then `zip_if_available(v1, v2)`
101    ///    is equivalent to `(v1, v2)` and has type `(T1, T2)`
102    /// 2. if `Self::DataFilesAvailability` is `OptionalDataFiles`, then `zip_if_available(v1, v2)`
103    ///    is equivalent to `v1.and_then(|v1| v2.map(|v2| (v1, v2)))` and has type
104    ///    `Result<(T1, T2), &'err UnavailableProperty>`
105    fn zip_if_available<'err, T1, T2>(
106        v1: <Self::DataFilesAvailability as DataFilesAvailability>::Result<'err, T1>,
107        v2: <Self::DataFilesAvailability as DataFilesAvailability>::Result<'err, T2>,
108    ) -> <Self::DataFilesAvailability as DataFilesAvailability>::Result<'err, (T1, T2)> {
109        <Self::DataFilesAvailability as DataFilesAvailability>::zip(v1, v2)
110    }
111}
112
113/// Helper trait to work with [`PropertiesResult`]
114///
115/// It is implemented by:
116/// * [`GuaranteedDataFiles`]: the common case, where data files are guaranteed to exist
117///   once a graph is loaded, in which case `Self::Result<'err, T>` is the same type as `T`
118/// * [`OptionalDataFiles`]: when they are not, in which case `Self::Result<T>`
119///   is the same type as `Result<T, &'err UnavailableProperty>`.
120pub trait DataFilesAvailability {
121    type Result<'err, T>;
122
123    fn map<T, U>(v: Self::Result<'_, T>, f: impl FnOnce(T) -> U) -> Self::Result<'_, U>;
124    fn zip<'err, T1, T2>(
125        v1: Self::Result<'err, T1>,
126        v2: Self::Result<'err, T2>,
127    ) -> Self::Result<'err, (T1, T2)>;
128    fn make_result<T>(value: Self::Result<'_, T>) -> Result<T, &UnavailableProperty>;
129}
130
131/// Helper type that implements [`DataFilesAvailability`] to signal underlying data files
132/// may be missing at runtime
133pub struct OptionalDataFiles {
134    _marker: (), // Prevents users from instantiating
135}
136
137/// Helper type that implements [`DataFilesAvailability`] to signal underlying data files
138/// are guaranteed to be available once the graph is loaded
139pub struct GuaranteedDataFiles {
140    _marker: (), // Prevents users from instantiating
141}
142
143impl DataFilesAvailability for OptionalDataFiles {
144    type Result<'err, T> = Result<T, &'err UnavailableProperty>;
145
146    fn map<T, U>(v: Self::Result<'_, T>, f: impl FnOnce(T) -> U) -> Self::Result<'_, U> {
147        v.map(f)
148    }
149
150    fn zip<'err, T1, T2>(
151        v1: Self::Result<'err, T1>,
152        v2: Self::Result<'err, T2>,
153    ) -> Self::Result<'err, (T1, T2)> {
154        v1.and_then(|v1| v2.map(|v2| (v1, v2)))
155    }
156
157    fn make_result<T>(value: Self::Result<'_, T>) -> Result<T, &UnavailableProperty> {
158        value
159    }
160}
161
162impl DataFilesAvailability for GuaranteedDataFiles {
163    type Result<'err, T> = T;
164
165    fn map<T, U>(v: Self::Result<'_, T>, f: impl FnOnce(T) -> U) -> Self::Result<'_, U> {
166        f(v)
167    }
168
169    fn zip<'err, T1, T2>(
170        v1: Self::Result<'err, T1>,
171        v2: Self::Result<'err, T2>,
172    ) -> Self::Result<'err, (T1, T2)> {
173        (v1, v2)
174    }
175
176    fn make_result<T>(value: Self::Result<'_, T>) -> Result<T, &UnavailableProperty> {
177        Ok(value)
178    }
179}
180
181/// Properties on graph nodes
182///
183/// This structures has many type parameters, to allow loading only some properties,
184/// and checking at compile time that only loaded properties are accessed.
185///
186/// Extra properties can be loaded, following the builder pattern on the owning graph.
187/// For example, this does not compile:
188///
189/// ```compile_fail
190/// # use std::path::PathBuf;
191/// use swh_graph::graph::SwhGraphWithProperties;
192/// use swh_graph::mph::DynMphf;
193/// use swh_graph::SwhGraphProperties;
194///
195/// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
196///     .expect("Could not load graph")
197///     .init_properties()
198///     .properties()
199///     .author_timestamp(42);
200/// ```
201///
202/// but this does:
203///
204/// ```no_run
205/// # use std::path::PathBuf;
206/// use swh_graph::graph::SwhGraphWithProperties;
207/// use swh_graph::mph::DynMphf;
208/// use swh_graph::SwhGraphProperties;
209///
210/// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
211///     .expect("Could not load graph")
212///     .init_properties()
213///     .load_properties(SwhGraphProperties::load_timestamps)
214///     .expect("Could not load timestamp properties")
215///     .properties()
216///     .author_timestamp(42);
217/// ```
218pub struct SwhGraphProperties<
219    MAPS: MaybeMaps,
220    TIMESTAMPS: MaybeTimestamps,
221    PERSONS: MaybePersons,
222    CONTENTS: MaybeContents,
223    STRINGS: MaybeStrings,
224    LABELNAMES: MaybeLabelNames,
225> {
226    path: PathBuf,
227    num_nodes: usize,
228    pub(crate) maps: MAPS,
229    pub(crate) timestamps: TIMESTAMPS,
230    pub(crate) persons: PERSONS,
231    pub(crate) contents: CONTENTS,
232    pub(crate) strings: STRINGS,
233    pub(crate) label_names: LABELNAMES,
234    /// Hack: `Some(false)` if the graph was compressed with Rust (2023-09-06 and newer),
235    /// `Some(true)` if the graph was compressed with Java (2022-12-07 and older),
236    /// `None` if we don't know yet (as we compute this lazily)
237    pub(crate) label_names_are_in_base64_order: once_cell::race::OnceBool,
238}
239
240pub type AllSwhGraphProperties<MPHF> = SwhGraphProperties<
241    MappedMaps<MPHF>,
242    MappedTimestamps,
243    MappedPersons,
244    MappedContents,
245    MappedStrings,
246    MappedLabelNames,
247>;
248
249pub type AllSwhGraphDynProperties<MPHF> = SwhGraphProperties<
250    MappedMaps<MPHF>,
251    OptMappedTimestamps,
252    OptMappedPersons,
253    OptMappedContents,
254    OptMappedStrings,
255    MappedLabelNames,
256>;
257
258impl SwhGraphProperties<NoMaps, NoTimestamps, NoPersons, NoContents, NoStrings, NoLabelNames> {
259    /// Creates an empty [`SwhGraphProperties`] instance, which will load properties
260    /// from the given path prefix.
261    pub fn new(path: impl AsRef<Path>, num_nodes: usize) -> Self {
262        SwhGraphProperties {
263            path: path.as_ref().to_owned(),
264            num_nodes,
265            maps: NoMaps,
266            timestamps: NoTimestamps,
267            persons: NoPersons,
268            contents: NoContents,
269            strings: NoStrings,
270            label_names: NoLabelNames,
271            label_names_are_in_base64_order: Default::default(),
272        }
273    }
274
275    /// Consumes an empty [`SwhGraphProperties`] instance and returns a new one
276    /// with all properties loaded and all methods available.
277    ///
278    /// ```no_run
279    /// # use std::path::PathBuf;
280    ///  use swh_graph::graph::SwhGraphWithProperties;
281    /// use swh_graph::mph::DynMphf;
282    /// use swh_graph::SwhGraphProperties;
283    ///
284    /// SwhGraphProperties::new(PathBuf::from("./graph"), 123)
285    ///     .load_all::<DynMphf>()
286    ///     .expect("Could not load properties");
287    /// ```
288    ///
289    /// is equivalent to:
290    ///
291    /// ```no_run
292    /// # use std::path::PathBuf;
293    /// use swh_graph::mph::DynMphf;
294    /// use swh_graph::SwhGraphProperties;
295    ///
296    /// SwhGraphProperties::new(PathBuf::from("./graph"), 123)
297    ///     .load_maps::<DynMphf>()
298    ///     .expect("Could not load node2swhid/swhid2node")
299    ///     .load_timestamps()
300    ///     .expect("Could not load timestamp properties")
301    ///     .load_persons()
302    ///     .expect("Could not load person properties")
303    ///     .load_contents()
304    ///     .expect("Could not load content properties")
305    ///     .load_strings()
306    ///     .expect("Could not load string properties");
307    /// ```
308    pub fn load_all<MPHF: LoadableSwhidMphf>(self) -> Result<AllSwhGraphProperties<MPHF>> {
309        self.load_maps()?
310            .load_timestamps()?
311            .load_persons()?
312            .load_contents()?
313            .load_strings()?
314            .load_label_names()
315    }
316}
317
318mod maps;
319pub use maps::{MappedMaps, Maps, MaybeMaps, NoMaps, NodeIdFromSwhidError, VecMaps};
320
321mod timestamps;
322pub use timestamps::{
323    MappedTimestamps, MaybeTimestamps, NoTimestamps, OptMappedTimestamps, OptTimestamps,
324    Timestamps, VecTimestamps,
325};
326
327mod persons;
328pub use persons::{
329    MappedPersons, MaybePersons, NoPersons, OptMappedPersons, OptPersons, Persons, VecPersons,
330};
331
332mod contents;
333pub use contents::{
334    Contents, MappedContents, MaybeContents, NoContents, OptContents, OptMappedContents,
335    VecContents,
336};
337
338mod strings;
339pub use strings::{
340    MappedStrings, MaybeStrings, NoStrings, OptMappedStrings, OptStrings, Strings, VecStrings,
341};
342
343mod label_names;
344pub use label_names::{
345    LabelIdFromNameError, LabelNames, MappedLabelNames, MaybeLabelNames, NoLabelNames,
346    VecLabelNames,
347};
348
349mod utils;
350use utils::*;