swh_graph/properties/
persons.rs

1// Copyright (C) 2023-2025  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use anyhow::{ensure, Context, Result};
7use mmap_rs::Mmap;
8
9use super::suffixes::*;
10use super::*;
11use crate::graph::NodeId;
12
13/// Trait implemented by both [`NoPersons`] and all implementors of [`Persons`],
14/// to allow loading person ids only if needed.
15pub trait MaybePersons {}
16impl<P: OptPersons> MaybePersons for P {}
17
18/// Placeholder for when person ids are not loaded
19#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
20pub struct NoPersons;
21impl MaybePersons for NoPersons {}
22
23#[diagnostic::on_unimplemented(
24    label = "does not have Person properties loaded",
25    note = "Use `let graph = graph.load_properties(|props| props.load_persons()).unwrap()` to load them",
26    note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
27)]
28/// Trait for backend storage of person properties (either in-memory or memory-mapped)
29pub trait OptPersons: MaybePersons + PropertiesBackend {
30    /// Returns `None` if out of bounds, `Some(u32::MAX)` if the node has no author
31    fn author_id(&self, node: NodeId) -> PropertiesResult<'_, Option<u32>, Self>;
32    /// Returns `None` if out of bounds, `Some(u32::MAX)` if the node has no committer
33    fn committer_id(&self, node: NodeId) -> PropertiesResult<'_, Option<u32>, Self>;
34}
35
36#[diagnostic::on_unimplemented(
37    label = "does not have Person properties loaded",
38    note = "Use `let graph = graph.load_properties(|props| props.load_persons()).unwrap()` to load them",
39    note = "Or replace `graph.init_properties()` with `graph.load_all_properties::<DynMphf>().unwrap()` to load all properties"
40)]
41/// Trait for backend storage of person properties (either in-memory or memory-mapped)
42pub trait Persons: OptPersons<DataFilesAvailability = GuaranteedDataFiles> {}
43impl<P: OptPersons<DataFilesAvailability = GuaranteedDataFiles>> Persons for P {}
44
45pub struct OptMappedPersons {
46    author_id: Result<NumberMmap<BigEndian, u32, Mmap>, UnavailableProperty>,
47    committer_id: Result<NumberMmap<BigEndian, u32, Mmap>, UnavailableProperty>,
48}
49impl PropertiesBackend for OptMappedPersons {
50    type DataFilesAvailability = OptionalDataFiles;
51}
52impl OptPersons for OptMappedPersons {
53    #[inline(always)]
54    fn author_id(&self, node: NodeId) -> PropertiesResult<'_, Option<u32>, Self> {
55        self.author_id
56            .as_ref()
57            .map(|author_ids| author_ids.get(node))
58    }
59    #[inline(always)]
60    fn committer_id(&self, node: NodeId) -> PropertiesResult<'_, Option<u32>, Self> {
61        self.committer_id
62            .as_ref()
63            .map(|committer_ids| committer_ids.get(node))
64    }
65}
66
67pub struct MappedPersons {
68    author_id: NumberMmap<BigEndian, u32, Mmap>,
69    committer_id: NumberMmap<BigEndian, u32, Mmap>,
70}
71impl PropertiesBackend for MappedPersons {
72    type DataFilesAvailability = GuaranteedDataFiles;
73}
74impl OptPersons for MappedPersons {
75    /// Returns `None` if author ids are not loaded, `Some(u32::MAX)` if they are
76    /// loaded and the node has no author, or `Some(Some(_))` if they are loaded
77    /// and the node has an author
78    #[inline(always)]
79    fn author_id(&self, node: NodeId) -> Option<u32> {
80        (&self.author_id).get(node)
81    }
82    /// See [`Self::author_id`]
83    #[inline(always)]
84    fn committer_id(&self, node: NodeId) -> Option<u32> {
85        (&self.committer_id).get(node)
86    }
87}
88
89#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
90pub struct VecPersons {
91    author_id: Vec<u32>,
92    committer_id: Vec<u32>,
93}
94
95impl VecPersons {
96    /// Returns a [`VecPersons`] from pairs of `(author_id, committer_id)`
97    pub fn new(data: Vec<(Option<u32>, Option<u32>)>) -> Result<Self> {
98        let mut author_id = Vec::with_capacity(data.len());
99        let mut committer_id = Vec::with_capacity(data.len());
100        for (a, c) in data.into_iter() {
101            ensure!(a != Some(u32::MAX), "author_id may not be {}", u32::MAX);
102            ensure!(c != Some(u32::MAX), "author_id may not be {}", u32::MAX);
103            author_id.push(a.unwrap_or(u32::MAX));
104            committer_id.push(c.unwrap_or(u32::MAX));
105        }
106        Ok(VecPersons {
107            author_id,
108            committer_id,
109        })
110    }
111}
112
113impl PropertiesBackend for VecPersons {
114    type DataFilesAvailability = GuaranteedDataFiles;
115}
116impl OptPersons for VecPersons {
117    #[inline(always)]
118    fn author_id(&self, node: NodeId) -> Option<u32> {
119        self.author_id.get(node)
120    }
121    #[inline(always)]
122    fn committer_id(&self, node: NodeId) -> Option<u32> {
123        self.committer_id.get(node)
124    }
125}
126
127impl<
128        MAPS: MaybeMaps,
129        TIMESTAMPS: MaybeTimestamps,
130        CONTENTS: MaybeContents,
131        STRINGS: MaybeStrings,
132        LABELNAMES: MaybeLabelNames,
133    > SwhGraphProperties<MAPS, TIMESTAMPS, NoPersons, CONTENTS, STRINGS, LABELNAMES>
134{
135    /// Consumes a [`SwhGraphProperties`] and returns a new one with these methods
136    /// available:
137    ///
138    /// * [`SwhGraphProperties::author_id`]
139    /// * [`SwhGraphProperties::committer_id`]
140    pub fn load_persons(
141        self,
142    ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, MappedPersons, CONTENTS, STRINGS, LABELNAMES>>
143    {
144        let OptMappedPersons {
145            author_id,
146            committer_id,
147        } = self.get_persons()?;
148        let persons = MappedPersons {
149            author_id: author_id?,
150            committer_id: committer_id?,
151        };
152        self.with_persons(persons)
153    }
154
155    /// Equivalent to [`Self::load_persons`] that does not require all files to be present
156    pub fn opt_load_persons(
157        self,
158    ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, OptMappedPersons, CONTENTS, STRINGS, LABELNAMES>>
159    {
160        let persons = self.get_persons()?;
161        self.with_persons(persons)
162    }
163
164    fn get_persons(&self) -> Result<OptMappedPersons> {
165        Ok(OptMappedPersons {
166            author_id: load_if_exists(&self.path, AUTHOR_ID, |path| {
167                NumberMmap::new(path, self.num_nodes).context("Could not load author_id")
168            })?,
169            committer_id: load_if_exists(&self.path, COMMITTER_ID, |path| {
170                NumberMmap::new(path, self.num_nodes).context("Could not load committer_id")
171            })?,
172        })
173    }
174
175    /// Alternative to [`load_persons`](Self::load_persons) that allows using arbitrary
176    /// persons implementations
177    pub fn with_persons<PERSONS: MaybePersons>(
178        self,
179        persons: PERSONS,
180    ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>> {
181        Ok(SwhGraphProperties {
182            maps: self.maps,
183            timestamps: self.timestamps,
184            persons,
185            contents: self.contents,
186            strings: self.strings,
187            label_names: self.label_names,
188            path: self.path,
189            num_nodes: self.num_nodes,
190            label_names_are_in_base64_order: self.label_names_are_in_base64_order,
191        })
192    }
193}
194
195/// Functions to access the id of the author or committer of `revision`/`release` nodes.
196///
197/// Only available after calling [`load_persons`](SwhGraphProperties::load_persons)
198/// or [`load_all_properties`](crate::graph::SwhBidirectionalGraph::load_all_properties)
199impl<
200        MAPS: MaybeMaps,
201        TIMESTAMPS: MaybeTimestamps,
202        PERSONS: OptPersons,
203        CONTENTS: MaybeContents,
204        STRINGS: MaybeStrings,
205        LABELNAMES: MaybeLabelNames,
206    > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
207{
208    /// Returns the id of the author of a revision or release, if any
209    ///
210    /// # Panics
211    ///
212    /// If the node id does not exist
213    #[inline]
214    pub fn author_id(&self, node_id: NodeId) -> PropertiesResult<'_, Option<u32>, PERSONS> {
215        PERSONS::map_if_available(self.try_author_id(node_id), |author_id| {
216            author_id.unwrap_or_else(|e| panic!("Cannot get node author: {e}"))
217        })
218    }
219
220    /// Returns the id of the author of a revision or release, if any
221    ///
222    /// Returns `Err` if the node id does not exist, and `Ok(None)` if the node
223    /// has no author
224    #[inline]
225    pub fn try_author_id(
226        &self,
227        node_id: NodeId,
228    ) -> PropertiesResult<'_, Result<Option<u32>, OutOfBoundError>, PERSONS> {
229        PERSONS::map_if_available(self.persons.author_id(node_id), |author_id| {
230            match author_id {
231                None => Err(OutOfBoundError {
232                    // Invalid node id
233                    index: node_id,
234                    len: self.num_nodes,
235                }),
236                Some(u32::MAX) => Ok(None), // No author
237                Some(id) => Ok(Some(id)),
238            }
239        })
240    }
241
242    /// Returns the id of the committer of a revision, if any
243    ///
244    /// # Panics
245    ///
246    /// If the node id does not exist
247    #[inline]
248    pub fn committer_id(&self, node_id: NodeId) -> PropertiesResult<'_, Option<u32>, PERSONS> {
249        PERSONS::map_if_available(self.try_committer_id(node_id), |committer_id| {
250            committer_id.unwrap_or_else(|e| panic!("Cannot get node committer: {e}"))
251        })
252    }
253
254    /// Returns the id of the committer of a revision, if any
255    ///
256    /// Returns `None` if the node id does not exist, and `Ok(None)` if the node
257    /// has no author
258    #[inline]
259    pub fn try_committer_id(
260        &self,
261        node_id: NodeId,
262    ) -> PropertiesResult<'_, Result<Option<u32>, OutOfBoundError>, PERSONS> {
263        PERSONS::map_if_available(self.persons.committer_id(node_id), |committer_id| {
264            match committer_id {
265                None => Err(OutOfBoundError {
266                    // Invalid node id
267                    index: node_id,
268                    len: self.num_nodes,
269                }),
270                Some(u32::MAX) => Ok(None), // No committer
271                Some(id) => Ok(Some(id)),
272            }
273        })
274    }
275}