swh_graph/
swhtype.rs

1// Copyright (C) 2023-2024  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::str::FromStr;
7
8#[repr(u8)]
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10/// Object type of an SWHID
11///
12/// # Reference
13/// - <https://docs.softwareheritage.org/devel/swh-model/data-model.html>
14pub enum NodeType {
15    Content = 0,
16    /// a list of named directory entries, each of which pointing to other
17    /// artifacts, usually file contents or sub-directories. Directory entries
18    /// are also associated to some metadata stored as permission bits.
19    Directory = 1,
20    /// code “hosting places” as previously described are usually large
21    /// platforms that host several unrelated software projects. For software
22    /// provenance purposes it is important to be more specific than that.
23    ///
24    /// Software origins are fine grained references to where source code
25    /// artifacts archived by Software Heritage have been retrieved from. They
26    /// take the form of `(type, url)` pairs, where url is a canonical URL
27    /// (e.g., the address at which one can `git clone` a repository or download
28    /// a source tarball) and `type` the kind of software origin (e.g., git,
29    /// svn, or dsc for Debian source packages).
30    Origin = 2,
31    ///AKA “tags”
32    ///
33    /// some revisions are more equals than others and get selected by
34    /// developers as denoting important project milestones known as “releases”.
35    /// Each release points to the last commit in project history corresponding
36    /// to the release and carries metadata: release name and version, release
37    /// message, cryptographic signatures, etc.
38    Release = 3,
39    /// AKA commits
40    ///
41    /// Software development within a specific project is
42    /// essentially a time-indexed series of copies of a single “root” directory
43    /// that contains the entire project source code. Software evolves when a d
44    /// eveloper modifies the content of one or more files in that directory
45    /// and record their changes.
46    ///
47    /// Each recorded copy of the root directory is known as a “revision”. It
48    /// points to a fully-determined directory and is equipped with arbitrary
49    /// metadata. Some of those are added manually by the developer
50    /// (e.g., commit message), others are automatically synthesized
51    /// (timestamps, preceding commit(s), etc).
52    Revision = 4,
53    /// any kind of software origin offers multiple pointers to the “current”
54    /// state of a development project. In the case of VCS this is reflected by
55    /// branches (e.g., master, development, but also so called feature branches
56    /// dedicated to extending the software in a specific direction); in the
57    /// case of package distributions by notions such as suites that correspond
58    /// to different maturity levels of individual packages (e.g., stable,
59    /// development, etc.).
60    ///
61    /// A “snapshot” of a given software origin records all entry points found
62    /// there and where each of them was pointing at the time. For example, a
63    /// snapshot object might track the commit where the master branch was
64    /// pointing to at any given time, as well as the most recent release of a
65    /// given package in the stable suite of a FOSS distribution.
66    Snapshot = 5,
67}
68
69impl<'a> TryFrom<&'a [u8]> for NodeType {
70    type Error = &'a [u8];
71    fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
72        Ok(match value {
73            b"cnt" => Self::Content,
74            b"dir" => Self::Directory,
75            b"ori" => Self::Origin,
76            b"rel" => Self::Release,
77            b"rev" => Self::Revision,
78            b"snp" => Self::Snapshot,
79            _ => return Err(value),
80        })
81    }
82}
83
84impl FromStr for NodeType {
85    type Err = String;
86
87    /// # Examples
88    ///
89    /// ```
90    /// # use swh_graph::NodeType;
91    ///
92    /// assert_eq!("dir".parse::<NodeType>(), Ok(NodeType::Directory));
93    /// assert!(matches!("xyz".parse::<NodeType>(), Err(_)));
94    /// ```
95    fn from_str(s: &str) -> Result<Self, Self::Err> {
96        Ok(match s {
97            "cnt" => Self::Content,
98            "dir" => Self::Directory,
99            "ori" => Self::Origin,
100            "rel" => Self::Release,
101            "rev" => Self::Revision,
102            "snp" => Self::Snapshot,
103            _ => return Err(s.to_owned()),
104        })
105    }
106}
107
108impl TryFrom<u8> for NodeType {
109    type Error = u8;
110    fn try_from(value: u8) -> Result<Self, Self::Error> {
111        Ok(match value {
112            0 => Self::Content,
113            1 => Self::Directory,
114            2 => Self::Origin,
115            3 => Self::Release,
116            4 => Self::Revision,
117            5 => Self::Snapshot,
118            _ => return Err(value),
119        })
120    }
121}
122
123impl NodeType {
124    /// Get the number of possible types.
125    ///
126    /// To avoid having to update this when adding a new type
127    /// we can use the unstable function `std::mem::variant_count`
128    /// or the `variant_count` crate.
129    /// But for now we just hardcode it while we decide how to
130    /// deal with this.
131    pub const NUMBER_OF_TYPES: usize = 6;
132
133    /// The number of bits needed to store the node type as integers
134    /// This is `ceil(log2(NUMBER_OF_TYPES))`  which can be arithmetized into
135    /// `floor(log2(NUMBER_OF_TYPES))` plus one if it's not a power of two.
136    pub const BITWIDTH: usize = Self::NUMBER_OF_TYPES.ilog2() as usize
137        + (!Self::NUMBER_OF_TYPES.is_power_of_two()) as usize;
138
139    /// Convert a type to the str used in the SWHID
140    pub fn to_str(&self) -> &'static str {
141        match self {
142            Self::Content => "cnt",
143            Self::Directory => "dir",
144            Self::Origin => "ori",
145            Self::Release => "rel",
146            Self::Revision => "rev",
147            Self::Snapshot => "snp",
148        }
149    }
150
151    /// Convert a type to its enum discriminant value.
152    ///
153    /// In all cases using this method is both safer and more concise than
154    /// `(node_type as isize).try_into().unwrap()`.
155    pub fn to_u8(&self) -> u8 {
156        match self {
157            Self::Content => 0,
158            Self::Directory => 1,
159            Self::Origin => 2,
160            Self::Release => 3,
161            Self::Revision => 4,
162            Self::Snapshot => 5,
163        }
164    }
165
166    /// Returns a vector containing all possible `NodeType` values.
167    // TODO make this return an HashSet instead, as the order does not matter
168    pub fn all() -> Vec<Self> {
169        vec![
170            NodeType::Content,
171            NodeType::Directory,
172            NodeType::Origin,
173            NodeType::Release,
174            NodeType::Revision,
175            NodeType::Snapshot,
176        ]
177    }
178}
179
180impl core::fmt::Display for NodeType {
181    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182        f.write_str(self.to_str())
183    }
184}
185
186/// Compact representation of a set of [NodeType]-s, as a bit array.
187type NodeTypeSet = u64;
188
189/// Constraint on allowed node types, as a set of node types.
190#[derive(Debug, PartialEq, Clone, Copy)]
191pub struct NodeConstraint(pub NodeTypeSet);
192
193impl Default for NodeConstraint {
194    fn default() -> Self {
195        Self(0b111111)
196    }
197}
198
199impl NodeConstraint {
200    /// # Examples
201    ///
202    /// ```
203    /// # use std::collections::HashSet;
204    /// # use swh_graph::{NodeConstraint, NodeType};
205    ///
206    /// let only_dirs: NodeConstraint = "dir".parse().unwrap();
207    /// let history_nodes: NodeConstraint = "rel,rev".parse().unwrap();
208    /// let all_nodes: NodeConstraint = "*".parse().unwrap();
209    ///
210    /// assert!(only_dirs.matches(NodeType::Directory));
211    /// assert!(!only_dirs.matches(NodeType::Content));
212    /// assert!(history_nodes.matches(NodeType::Release));
213    /// assert!(history_nodes.matches(NodeType::Revision));
214    /// assert!(!history_nodes.matches(NodeType::Origin));
215    /// for node_type in NodeType::all() {
216    ///     assert!(all_nodes.matches(node_type));
217    /// }
218    /// ```
219    pub fn matches(&self, node_type: NodeType) -> bool {
220        self.0 & (1 << node_type.to_u8()) != 0
221    }
222
223    pub fn to_vec(&self) -> Vec<NodeType> {
224        (0..NodeType::NUMBER_OF_TYPES as u8)
225            .filter(|type_idx| self.0 & (1 << type_idx) != 0)
226            .map(|type_idx| type_idx.try_into().unwrap())
227            .collect()
228    }
229}
230
231impl FromStr for NodeConstraint {
232    type Err = String;
233
234    /// # Examples
235    ///
236    /// ```
237    /// # use std::collections::HashSet;
238    /// # use swh_graph::{NodeConstraint, NodeType};
239    ///
240    /// assert_eq!("*".parse::<NodeConstraint>(), Ok(NodeConstraint(0b111111)));
241    /// assert_eq!("rel".parse::<NodeConstraint>(), Ok(NodeConstraint(0b001000)));
242    /// assert_eq!("dir,cnt".parse::<NodeConstraint>(), Ok(NodeConstraint(0b000011)));
243    /// assert!(matches!("xyz".parse::<NodeConstraint>(), Err(_)));
244    /// ```
245    fn from_str(s: &str) -> Result<Self, Self::Err> {
246        if s == "*" {
247            Ok(NodeConstraint::default())
248        } else {
249            let mut node_types = 0;
250            for s in s.split(',') {
251                node_types |= 1 << s.parse::<NodeType>()?.to_u8();
252            }
253            Ok(NodeConstraint(node_types))
254        }
255    }
256}
257
258impl core::fmt::Display for NodeConstraint {
259    /// ```
260    /// # use std::collections::HashSet;
261    /// # use swh_graph::{NodeConstraint, NodeType};
262    ///
263    /// assert_eq!(format!("{}", NodeConstraint::default()), "*");
264    /// assert_eq!(
265    ///     format!("{}", NodeConstraint(0b000011)),
266    ///     "cnt,dir"
267    /// );
268    /// assert_eq!(
269    ///     format!("{}", NodeConstraint(0b111100)),
270    ///     "ori,rel,rev,snp"
271    /// );
272    /// ```
273    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274        if *self == Self::default() {
275            write!(f, "*")?;
276        } else {
277            let mut type_strings: Vec<&str> = self.to_vec().iter().map(|t| t.to_str()).collect();
278            type_strings.sort();
279            write!(f, "{}", type_strings.join(","))?;
280        }
281        Ok(())
282    }
283}
284
285/// Type of an arc between two nodes in the Software Heritage graph, as a pair
286/// of type constraints on the source and destination arc. When one of the two
287/// is None, it means "any node type accepted".
288// TODO remove Options from ArcType and create a (more  expressive, similar to
289// NodeConstraint) type called ArcConstraint
290pub struct ArcType {
291    pub src: Option<NodeType>,
292    pub dst: Option<NodeType>,
293}