swh_graph/swhtype.rs
1// Copyright (C) 2023-2024 The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::str::FromStr;
7
8#[repr(u8)]
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10/// Object type of an SWHID
11///
12/// # Reference
13/// - <https://docs.softwareheritage.org/devel/swh-model/data-model.html>
14pub enum NodeType {
15 Content = 0,
16 /// a list of named directory entries, each of which pointing to other
17 /// artifacts, usually file contents or sub-directories. Directory entries
18 /// are also associated to some metadata stored as permission bits.
19 Directory = 1,
20 /// code “hosting places” as previously described are usually large
21 /// platforms that host several unrelated software projects. For software
22 /// provenance purposes it is important to be more specific than that.
23 ///
24 /// Software origins are fine grained references to where source code
25 /// artifacts archived by Software Heritage have been retrieved from. They
26 /// take the form of `(type, url)` pairs, where url is a canonical URL
27 /// (e.g., the address at which one can `git clone` a repository or download
28 /// a source tarball) and `type` the kind of software origin (e.g., git,
29 /// svn, or dsc for Debian source packages).
30 Origin = 2,
31 ///AKA “tags”
32 ///
33 /// some revisions are more equals than others and get selected by
34 /// developers as denoting important project milestones known as “releases”.
35 /// Each release points to the last commit in project history corresponding
36 /// to the release and carries metadata: release name and version, release
37 /// message, cryptographic signatures, etc.
38 Release = 3,
39 /// AKA commits
40 ///
41 /// Software development within a specific project is
42 /// essentially a time-indexed series of copies of a single “root” directory
43 /// that contains the entire project source code. Software evolves when a d
44 /// eveloper modifies the content of one or more files in that directory
45 /// and record their changes.
46 ///
47 /// Each recorded copy of the root directory is known as a “revision”. It
48 /// points to a fully-determined directory and is equipped with arbitrary
49 /// metadata. Some of those are added manually by the developer
50 /// (e.g., commit message), others are automatically synthesized
51 /// (timestamps, preceding commit(s), etc).
52 Revision = 4,
53 /// any kind of software origin offers multiple pointers to the “current”
54 /// state of a development project. In the case of VCS this is reflected by
55 /// branches (e.g., master, development, but also so called feature branches
56 /// dedicated to extending the software in a specific direction); in the
57 /// case of package distributions by notions such as suites that correspond
58 /// to different maturity levels of individual packages (e.g., stable,
59 /// development, etc.).
60 ///
61 /// A “snapshot” of a given software origin records all entry points found
62 /// there and where each of them was pointing at the time. For example, a
63 /// snapshot object might track the commit where the master branch was
64 /// pointing to at any given time, as well as the most recent release of a
65 /// given package in the stable suite of a FOSS distribution.
66 Snapshot = 5,
67}
68
69impl<'a> TryFrom<&'a [u8]> for NodeType {
70 type Error = &'a [u8];
71 fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
72 Ok(match value {
73 b"cnt" => Self::Content,
74 b"dir" => Self::Directory,
75 b"ori" => Self::Origin,
76 b"rel" => Self::Release,
77 b"rev" => Self::Revision,
78 b"snp" => Self::Snapshot,
79 _ => return Err(value),
80 })
81 }
82}
83
84impl FromStr for NodeType {
85 type Err = String;
86
87 /// # Examples
88 ///
89 /// ```
90 /// # use swh_graph::NodeType;
91 ///
92 /// assert_eq!("dir".parse::<NodeType>(), Ok(NodeType::Directory));
93 /// assert!(matches!("xyz".parse::<NodeType>(), Err(_)));
94 /// ```
95 fn from_str(s: &str) -> Result<Self, Self::Err> {
96 Ok(match s {
97 "cnt" => Self::Content,
98 "dir" => Self::Directory,
99 "ori" => Self::Origin,
100 "rel" => Self::Release,
101 "rev" => Self::Revision,
102 "snp" => Self::Snapshot,
103 _ => return Err(s.to_owned()),
104 })
105 }
106}
107
108impl TryFrom<u8> for NodeType {
109 type Error = u8;
110 fn try_from(value: u8) -> Result<Self, Self::Error> {
111 Ok(match value {
112 0 => Self::Content,
113 1 => Self::Directory,
114 2 => Self::Origin,
115 3 => Self::Release,
116 4 => Self::Revision,
117 5 => Self::Snapshot,
118 _ => return Err(value),
119 })
120 }
121}
122
123impl NodeType {
124 /// Get the number of possible types.
125 ///
126 /// To avoid having to update this when adding a new type
127 /// we can use the unstable function `std::mem::variant_count`
128 /// or the `variant_count` crate.
129 /// But for now we just hardcode it while we decide how to
130 /// deal with this.
131 pub const NUMBER_OF_TYPES: usize = 6;
132
133 /// The number of bits needed to store the node type as integers
134 /// This is `ceil(log2(NUMBER_OF_TYPES))` which can be arithmetized into
135 /// `floor(log2(NUMBER_OF_TYPES))` plus one if it's not a power of two.
136 pub const BITWIDTH: usize = Self::NUMBER_OF_TYPES.ilog2() as usize
137 + (!Self::NUMBER_OF_TYPES.is_power_of_two()) as usize;
138
139 /// Convert a type to the str used in the SWHID
140 pub fn to_str(&self) -> &'static str {
141 match self {
142 Self::Content => "cnt",
143 Self::Directory => "dir",
144 Self::Origin => "ori",
145 Self::Release => "rel",
146 Self::Revision => "rev",
147 Self::Snapshot => "snp",
148 }
149 }
150
151 /// Convert a type to its enum discriminant value.
152 ///
153 /// In all cases using this method is both safer and more concise than
154 /// `(node_type as isize).try_into().unwrap()`.
155 pub fn to_u8(&self) -> u8 {
156 match self {
157 Self::Content => 0,
158 Self::Directory => 1,
159 Self::Origin => 2,
160 Self::Release => 3,
161 Self::Revision => 4,
162 Self::Snapshot => 5,
163 }
164 }
165
166 /// Returns a vector containing all possible `NodeType` values.
167 // TODO make this return an HashSet instead, as the order does not matter
168 pub fn all() -> Vec<Self> {
169 vec![
170 NodeType::Content,
171 NodeType::Directory,
172 NodeType::Origin,
173 NodeType::Release,
174 NodeType::Revision,
175 NodeType::Snapshot,
176 ]
177 }
178}
179
180impl core::fmt::Display for NodeType {
181 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182 f.write_str(self.to_str())
183 }
184}
185
186/// Compact representation of a set of [NodeType]-s, as a bit array.
187type NodeTypeSet = u64;
188
189/// Constraint on allowed node types, as a set of node types.
190#[derive(Debug, PartialEq, Clone, Copy)]
191pub struct NodeConstraint(pub NodeTypeSet);
192
193impl Default for NodeConstraint {
194 fn default() -> Self {
195 Self(0b111111)
196 }
197}
198
199impl NodeConstraint {
200 /// # Examples
201 ///
202 /// ```
203 /// # use std::collections::HashSet;
204 /// # use swh_graph::{NodeConstraint, NodeType};
205 ///
206 /// let only_dirs: NodeConstraint = "dir".parse().unwrap();
207 /// let history_nodes: NodeConstraint = "rel,rev".parse().unwrap();
208 /// let all_nodes: NodeConstraint = "*".parse().unwrap();
209 ///
210 /// assert!(only_dirs.matches(NodeType::Directory));
211 /// assert!(!only_dirs.matches(NodeType::Content));
212 /// assert!(history_nodes.matches(NodeType::Release));
213 /// assert!(history_nodes.matches(NodeType::Revision));
214 /// assert!(!history_nodes.matches(NodeType::Origin));
215 /// for node_type in NodeType::all() {
216 /// assert!(all_nodes.matches(node_type));
217 /// }
218 /// ```
219 pub fn matches(&self, node_type: NodeType) -> bool {
220 self.0 & (1 << node_type.to_u8()) != 0
221 }
222
223 pub fn to_vec(&self) -> Vec<NodeType> {
224 (0..NodeType::NUMBER_OF_TYPES as u8)
225 .filter(|type_idx| self.0 & (1 << type_idx) != 0)
226 .map(|type_idx| type_idx.try_into().unwrap())
227 .collect()
228 }
229}
230
231impl FromStr for NodeConstraint {
232 type Err = String;
233
234 /// # Examples
235 ///
236 /// ```
237 /// # use std::collections::HashSet;
238 /// # use swh_graph::{NodeConstraint, NodeType};
239 ///
240 /// assert_eq!("*".parse::<NodeConstraint>(), Ok(NodeConstraint(0b111111)));
241 /// assert_eq!("rel".parse::<NodeConstraint>(), Ok(NodeConstraint(0b001000)));
242 /// assert_eq!("dir,cnt".parse::<NodeConstraint>(), Ok(NodeConstraint(0b000011)));
243 /// assert!(matches!("xyz".parse::<NodeConstraint>(), Err(_)));
244 /// ```
245 fn from_str(s: &str) -> Result<Self, Self::Err> {
246 if s == "*" {
247 Ok(NodeConstraint::default())
248 } else {
249 let mut node_types = 0;
250 for s in s.split(',') {
251 node_types |= 1 << s.parse::<NodeType>()?.to_u8();
252 }
253 Ok(NodeConstraint(node_types))
254 }
255 }
256}
257
258impl core::fmt::Display for NodeConstraint {
259 /// ```
260 /// # use std::collections::HashSet;
261 /// # use swh_graph::{NodeConstraint, NodeType};
262 ///
263 /// assert_eq!(format!("{}", NodeConstraint::default()), "*");
264 /// assert_eq!(
265 /// format!("{}", NodeConstraint(0b000011)),
266 /// "cnt,dir"
267 /// );
268 /// assert_eq!(
269 /// format!("{}", NodeConstraint(0b111100)),
270 /// "ori,rel,rev,snp"
271 /// );
272 /// ```
273 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274 if *self == Self::default() {
275 write!(f, "*")?;
276 } else {
277 let mut type_strings: Vec<&str> = self.to_vec().iter().map(|t| t.to_str()).collect();
278 type_strings.sort();
279 write!(f, "{}", type_strings.join(","))?;
280 }
281 Ok(())
282 }
283}
284
285/// Type of an arc between two nodes in the Software Heritage graph, as a pair
286/// of type constraints on the source and destination arc. When one of the two
287/// is None, it means "any node type accepted".
288// TODO remove Options from ArcType and create a (more expressive, similar to
289// NodeConstraint) type called ArcConstraint
290pub struct ArcType {
291 pub src: Option<NodeType>,
292 pub dst: Option<NodeType>,
293}