Skip to main content

swh_graph/properties/
mod.rs

1// Copyright (C) 2023-2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Node labels
7//!
8//! [`SwhGraphProperties`] is populated by the `load_properties` and `load_all_properties`
9//! of [`SwhUnidirectionalGraph`](crate::graph::SwhUnidirectionalGraph) and
10//! [`SwhBidirectionalGraph`](crate::graph::SwhBidirectionalGraph) and returned by
11//! their `properties` method.
12//!
13//! ```no_run
14//! # use std::path::PathBuf;
15//! use swh_graph::graph::SwhGraphWithProperties;
16//! use swh_graph::mph::DynMphf;
17//! use swh_graph::SwhGraphProperties;
18//!
19//! let properties: &SwhGraphProperties<_, _, _, _, _, _> =
20//!     swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
21//!     .expect("Could not load graph")
22//!     .load_all_properties::<DynMphf>()
23//!     .expect("Could not load properties")
24//!     .properties();
25//! ```
26
27use std::path::{Path, PathBuf};
28
29use anyhow::{Context, Result};
30use byteorder::BigEndian;
31use mmap_rs::Mmap;
32
33use crate::mph::LoadableSwhidMphf;
34use crate::utils::mmap::NumberMmap;
35use crate::OutOfBoundError;
36use value_traits::slices::SliceByValue;
37
38pub(crate) mod suffixes {
39    pub const NODE2SWHID: &str = ".node2swhid.bin";
40    pub const NODE2TYPE: &str = ".node2type.bin";
41    pub const AUTHOR_TIMESTAMP: &str = ".property.author_timestamp.bin";
42    pub const AUTHOR_TIMESTAMP_OFFSET: &str = ".property.author_timestamp_offset.bin";
43    pub const COMMITTER_TIMESTAMP: &str = ".property.committer_timestamp.bin";
44    pub const COMMITTER_TIMESTAMP_OFFSET: &str = ".property.committer_timestamp_offset.bin";
45    pub const AUTHOR_ID: &str = ".property.author_id.bin";
46    pub const COMMITTER_ID: &str = ".property.committer_id.bin";
47    pub const CONTENT_IS_SKIPPED: &str = ".property.content.is_skipped.bits";
48    pub const CONTENT_LENGTH: &str = ".property.content.length.bin";
49    pub const MESSAGE: &str = ".property.message.bin";
50    pub const MESSAGE_OFFSET: &str = ".property.message.offset.bin";
51    pub const TAG_NAME: &str = ".property.tag_name.bin";
52    pub const TAG_NAME_OFFSET: &str = ".property.tag_name.offset.bin";
53    pub const LABEL_NAME: &str = ".labels.fcl";
54    pub const PERSONS_COUNT: &str = ".persons.count.txt";
55}
56
57#[derive(thiserror::Error, Debug)]
58#[error("{path} cannot be loaded: {source}")]
59pub struct UnavailableProperty {
60    path: PathBuf,
61    #[source]
62    source: std::io::Error,
63}
64
65/// Wrapper for the return type of [`SwhGraphProperties`] methods.
66///
67/// When `B` implements `GuaranteedDataFiles` (the most common case), `PropertiesResult<'err, T, B>`
68/// is exactly the same type as `T`.
69///
70/// aWhen `B` implements `OptionalDataFiles` (which is the case when using
71/// `opt_load_*` instead of `load_*` or [`load_all`](SwhGraphProperties::load_all) for example),
72/// then `PropertiesResult<'err, T, B>` is exactly the same type as `Result<T, &'err UnavailableProperty>`.
73pub type PropertiesResult<'err, T, B> =
74    <<B as PropertiesBackend>::DataFilesAvailability as DataFilesAvailability>::Result<'err, T>;
75
76/// Common trait for type parameters of [`SwhGraphProperties`]
77pub trait PropertiesBackend {
78    type DataFilesAvailability: DataFilesAvailability;
79
80    /// Applies the given function `f` to the value `v` if the value is available
81    ///
82    /// This is an alias for `<Self::DataFilesAvailability as DataFilesAvailability>::map(v, f)`,
83    /// meaning that:
84    ///
85    /// 1. if `Self::DataFilesAvailability` is `GuaranteedDataFiles`, then `map_if_available(v, f)`
86    ///    is equivalent to `f(v)` and has type `U`
87    /// 2. if `Self::DataFilesAvailability` is `OptionalDataFiles`, then `map_if_available(v, f)`
88    ///    is equivalent to `v.map(f)` and has type `Result<U, &'err UnavailableProperty>`
89    fn map_if_available<T, U>(
90        v: <Self::DataFilesAvailability as DataFilesAvailability>::Result<'_, T>,
91        f: impl FnOnce(T) -> U,
92    ) -> <Self::DataFilesAvailability as DataFilesAvailability>::Result<'_, U> {
93        <Self::DataFilesAvailability as DataFilesAvailability>::map(v, f)
94    }
95
96    /// Returns `(v1, v2)` if both are available, or an error otherwise
97    ///
98    /// This is an alias for `<Self::DataFilesAvailability as DataFilesAvailability>::zip(v, f)`,
99    /// meaning that:
100    ///
101    /// 1. if `Self::DataFilesAvailability` is `GuaranteedDataFiles`, then `zip_if_available(v1, v2)`
102    ///    is equivalent to `(v1, v2)` and has type `(T1, T2)`
103    /// 2. if `Self::DataFilesAvailability` is `OptionalDataFiles`, then `zip_if_available(v1, v2)`
104    ///    is equivalent to `v1.and_then(|v1| v2.map(|v2| (v1, v2)))` and has type
105    ///    `Result<(T1, T2), &'err UnavailableProperty>`
106    fn zip_if_available<'err, T1, T2>(
107        v1: <Self::DataFilesAvailability as DataFilesAvailability>::Result<'err, T1>,
108        v2: <Self::DataFilesAvailability as DataFilesAvailability>::Result<'err, T2>,
109    ) -> <Self::DataFilesAvailability as DataFilesAvailability>::Result<'err, (T1, T2)> {
110        <Self::DataFilesAvailability as DataFilesAvailability>::zip(v1, v2)
111    }
112}
113
114/// Helper trait to work with [`PropertiesResult`]
115///
116/// It is implemented by:
117/// * [`GuaranteedDataFiles`]: the common case, where data files are guaranteed to exist
118///   once a graph is loaded, in which case `Self::Result<'err, T>` is the same type as `T`
119/// * [`OptionalDataFiles`]: when they are not, in which case `Self::Result<T>`
120///   is the same type as `Result<T, &'err UnavailableProperty>`.
121pub trait DataFilesAvailability {
122    type Result<'err, T>;
123
124    fn map<T, U>(v: Self::Result<'_, T>, f: impl FnOnce(T) -> U) -> Self::Result<'_, U>;
125    fn zip<'err, T1, T2>(
126        v1: Self::Result<'err, T1>,
127        v2: Self::Result<'err, T2>,
128    ) -> Self::Result<'err, (T1, T2)>;
129    fn make_result<T>(value: Self::Result<'_, T>) -> Result<T, &UnavailableProperty>;
130}
131
132/// Helper type that implements [`DataFilesAvailability`] to signal underlying data files
133/// may be missing at runtime
134pub struct OptionalDataFiles {
135    _marker: (), // Prevents users from instantiating
136}
137
138/// Helper type that implements [`DataFilesAvailability`] to signal underlying data files
139/// are guaranteed to be available once the graph is loaded
140pub struct GuaranteedDataFiles {
141    _marker: (), // Prevents users from instantiating
142}
143
144impl DataFilesAvailability for OptionalDataFiles {
145    type Result<'err, T> = Result<T, &'err UnavailableProperty>;
146
147    #[inline(always)]
148    fn map<T, U>(v: Self::Result<'_, T>, f: impl FnOnce(T) -> U) -> Self::Result<'_, U> {
149        v.map(f)
150    }
151
152    #[inline(always)]
153    fn zip<'err, T1, T2>(
154        v1: Self::Result<'err, T1>,
155        v2: Self::Result<'err, T2>,
156    ) -> Self::Result<'err, (T1, T2)> {
157        v1.and_then(|v1| v2.map(|v2| (v1, v2)))
158    }
159
160    #[inline(always)]
161    fn make_result<T>(value: Self::Result<'_, T>) -> Result<T, &UnavailableProperty> {
162        value
163    }
164}
165
166impl DataFilesAvailability for GuaranteedDataFiles {
167    type Result<'err, T> = T;
168
169    #[inline(always)]
170    fn map<T, U>(v: Self::Result<'_, T>, f: impl FnOnce(T) -> U) -> Self::Result<'_, U> {
171        f(v)
172    }
173
174    #[inline(always)]
175    fn zip<'err, T1, T2>(
176        v1: Self::Result<'err, T1>,
177        v2: Self::Result<'err, T2>,
178    ) -> Self::Result<'err, (T1, T2)> {
179        (v1, v2)
180    }
181
182    #[inline(always)]
183    fn make_result<T>(value: Self::Result<'_, T>) -> Result<T, &UnavailableProperty> {
184        Ok(value)
185    }
186}
187
188/// Properties on graph nodes
189///
190/// This structures has many type parameters, to allow loading only some properties,
191/// and checking at compile time that only loaded properties are accessed.
192///
193/// Extra properties can be loaded, following the builder pattern on the owning graph.
194/// For example, this does not compile:
195///
196/// ```compile_fail
197/// # use std::path::PathBuf;
198/// use swh_graph::graph::SwhGraphWithProperties;
199/// use swh_graph::mph::DynMphf;
200/// use swh_graph::SwhGraphProperties;
201///
202/// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
203///     .expect("Could not load graph")
204///     .init_properties()
205///     .properties()
206///     .author_timestamp(42);
207/// ```
208///
209/// but this does:
210///
211/// ```no_run
212/// # use std::path::PathBuf;
213/// use swh_graph::graph::SwhGraphWithProperties;
214/// use swh_graph::mph::DynMphf;
215/// use swh_graph::SwhGraphProperties;
216///
217/// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
218///     .expect("Could not load graph")
219///     .init_properties()
220///     .load_properties(SwhGraphProperties::load_timestamps)
221///     .expect("Could not load timestamp properties")
222///     .properties()
223///     .author_timestamp(42);
224/// ```
225#[derive(Debug)]
226pub struct SwhGraphProperties<
227    MAPS: MaybeMaps,
228    TIMESTAMPS: MaybeTimestamps,
229    PERSONS: MaybePersons,
230    CONTENTS: MaybeContents,
231    STRINGS: MaybeStrings,
232    LABELNAMES: MaybeLabelNames,
233> {
234    pub(crate) path: PathBuf,
235    pub(crate) num_nodes: usize,
236    pub(crate) maps: MAPS,
237    pub(crate) timestamps: TIMESTAMPS,
238    pub(crate) persons: PERSONS,
239    pub(crate) contents: CONTENTS,
240    pub(crate) strings: STRINGS,
241    pub(crate) label_names: LABELNAMES,
242    /// Hack: `Some(false)` if the graph was compressed with Rust (2023-09-06 and newer),
243    /// `Some(true)` if the graph was compressed with Java (2022-12-07 and older),
244    /// `None` if we don't know yet (as we compute this lazily)
245    pub(crate) label_names_are_in_base64_order: once_cell::race::OnceBool,
246}
247
248pub type AllSwhGraphProperties<MPHF> = SwhGraphProperties<
249    MappedMaps<MPHF>,
250    MappedTimestamps,
251    MappedPersons,
252    MappedContents,
253    MappedStrings,
254    MappedLabelNames,
255>;
256
257pub type AllSwhGraphDynProperties<MPHF> = SwhGraphProperties<
258    MappedMaps<MPHF>,
259    OptMappedTimestamps,
260    OptMappedPersons,
261    OptMappedContents,
262    OptMappedStrings,
263    MappedLabelNames,
264>;
265
266impl SwhGraphProperties<NoMaps, NoTimestamps, NoPersons, NoContents, NoStrings, NoLabelNames> {
267    /// Creates an empty [`SwhGraphProperties`] instance, which will load properties
268    /// from the given path prefix.
269    pub fn new(path: impl AsRef<Path>, num_nodes: usize) -> Self {
270        SwhGraphProperties {
271            path: path.as_ref().to_owned(),
272            num_nodes,
273            maps: NoMaps,
274            timestamps: NoTimestamps,
275            persons: NoPersons,
276            contents: NoContents,
277            strings: NoStrings,
278            label_names: NoLabelNames,
279            label_names_are_in_base64_order: Default::default(),
280        }
281    }
282
283    /// Consumes an empty [`SwhGraphProperties`] instance and returns a new one
284    /// with all properties loaded and all methods available.
285    ///
286    /// ```no_run
287    /// # use std::path::PathBuf;
288    ///  use swh_graph::graph::SwhGraphWithProperties;
289    /// use swh_graph::mph::DynMphf;
290    /// use swh_graph::SwhGraphProperties;
291    ///
292    /// SwhGraphProperties::new(PathBuf::from("./graph"), 123)
293    ///     .load_all::<DynMphf>()
294    ///     .expect("Could not load properties");
295    /// ```
296    ///
297    /// is equivalent to:
298    ///
299    /// ```no_run
300    /// # use std::path::PathBuf;
301    /// use swh_graph::mph::DynMphf;
302    /// use swh_graph::SwhGraphProperties;
303    ///
304    /// SwhGraphProperties::new(PathBuf::from("./graph"), 123)
305    ///     .load_maps::<DynMphf>()
306    ///     .expect("Could not load node2swhid/swhid2node")
307    ///     .load_timestamps()
308    ///     .expect("Could not load timestamp properties")
309    ///     .load_persons()
310    ///     .expect("Could not load person properties")
311    ///     .load_contents()
312    ///     .expect("Could not load content properties")
313    ///     .load_strings()
314    ///     .expect("Could not load string properties");
315    /// ```
316    pub fn load_all<MPHF: LoadableSwhidMphf>(self) -> Result<AllSwhGraphProperties<MPHF>> {
317        self.load_maps()?
318            .load_timestamps()?
319            .load_persons()?
320            .load_contents()?
321            .load_strings()?
322            .load_label_names()
323    }
324}
325
326mod maps;
327pub use maps::{MappedMaps, Maps, MaybeMaps, NoMaps, NodeIdFromSwhidError, VecMaps};
328
329mod timestamps;
330pub use timestamps::{
331    MappedTimestamps, MaybeTimestamps, NoTimestamps, OptMappedTimestamps, OptTimestamps,
332    Timestamps, VecTimestamps,
333};
334
335mod persons;
336pub use persons::{
337    MappedPersons, MaybePersons, NoPersons, OptMappedPersons, OptPersons, Persons, VecPersons,
338};
339
340mod contents;
341pub use contents::{
342    Contents, MappedContents, MaybeContents, NoContents, OptContents, OptMappedContents,
343    VecContents,
344};
345
346mod strings;
347pub use strings::{
348    MappedStrings, MaybeStrings, NoStrings, OptMappedStrings, OptStrings, Strings, VecStrings,
349};
350
351mod label_names;
352pub use label_names::{
353    LabelIdFromNameError, LabelNames, MappedLabelNames, MaybeLabelNames, NoLabelNames,
354    VecLabelNames,
355};
356
357mod utils;
358use utils::*;