swh_graph/swhtype.rs
1// Copyright (C) 2023-2024 The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::str::FromStr;
7
8use bitvec::prelude::*;
9
10#[repr(u8)]
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12/// Object type of an SWHID
13///
14/// # Reference
15/// - <https://docs.softwareheritage.org/devel/swh-model/data-model.html>
16pub enum NodeType {
17 Content = 0,
18 /// a list of named directory entries, each of which pointing to other
19 /// artifacts, usually file contents or sub-directories. Directory entries
20 /// are also associated to some metadata stored as permission bits.
21 Directory = 1,
22 /// code “hosting places” as previously described are usually large
23 /// platforms that host several unrelated software projects. For software
24 /// provenance purposes it is important to be more specific than that.
25 ///
26 /// Software origins are fine grained references to where source code
27 /// artifacts archived by Software Heritage have been retrieved from. They
28 /// take the form of `(type, url)` pairs, where url is a canonical URL
29 /// (e.g., the address at which one can `git clone` a repository or download
30 /// a source tarball) and `type` the kind of software origin (e.g., git,
31 /// svn, or dsc for Debian source packages).
32 Origin = 2,
33 ///AKA “tags”
34 ///
35 /// some revisions are more equals than others and get selected by
36 /// developers as denoting important project milestones known as “releases”.
37 /// Each release points to the last commit in project history corresponding
38 /// to the release and carries metadata: release name and version, release
39 /// message, cryptographic signatures, etc.
40 Release = 3,
41 /// AKA commits
42 ///
43 /// Software development within a specific project is
44 /// essentially a time-indexed series of copies of a single “root” directory
45 /// that contains the entire project source code. Software evolves when a d
46 /// eveloper modifies the content of one or more files in that directory
47 /// and record their changes.
48 ///
49 /// Each recorded copy of the root directory is known as a “revision”. It
50 /// points to a fully-determined directory and is equipped with arbitrary
51 /// metadata. Some of those are added manually by the developer
52 /// (e.g., commit message), others are automatically synthesized
53 /// (timestamps, preceding commit(s), etc).
54 Revision = 4,
55 /// any kind of software origin offers multiple pointers to the “current”
56 /// state of a development project. In the case of VCS this is reflected by
57 /// branches (e.g., master, development, but also so called feature branches
58 /// dedicated to extending the software in a specific direction); in the
59 /// case of package distributions by notions such as suites that correspond
60 /// to different maturity levels of individual packages (e.g., stable,
61 /// development, etc.).
62 ///
63 /// A “snapshot” of a given software origin records all entry points found
64 /// there and where each of them was pointing at the time. For example, a
65 /// snapshot object might track the commit where the master branch was
66 /// pointing to at any given time, as well as the most recent release of a
67 /// given package in the stable suite of a FOSS distribution.
68 Snapshot = 5,
69}
70
71impl<'a> TryFrom<&'a [u8]> for NodeType {
72 type Error = &'a [u8];
73 fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
74 Ok(match value {
75 b"cnt" => Self::Content,
76 b"dir" => Self::Directory,
77 b"ori" => Self::Origin,
78 b"rel" => Self::Release,
79 b"rev" => Self::Revision,
80 b"snp" => Self::Snapshot,
81 _ => return Err(value),
82 })
83 }
84}
85
86impl FromStr for NodeType {
87 type Err = String;
88
89 /// # Examples
90 ///
91 /// ```
92 /// # use swh_graph::NodeType;
93 ///
94 /// assert_eq!("dir".parse::<NodeType>(), Ok(NodeType::Directory));
95 /// assert!(matches!("xyz".parse::<NodeType>(), Err(_)));
96 /// ```
97 fn from_str(s: &str) -> Result<Self, Self::Err> {
98 Ok(match s {
99 "cnt" => Self::Content,
100 "dir" => Self::Directory,
101 "ori" => Self::Origin,
102 "rel" => Self::Release,
103 "rev" => Self::Revision,
104 "snp" => Self::Snapshot,
105 _ => return Err(s.to_owned()),
106 })
107 }
108}
109
110impl TryFrom<u8> for NodeType {
111 type Error = u8;
112 fn try_from(value: u8) -> Result<Self, Self::Error> {
113 Ok(match value {
114 0 => Self::Content,
115 1 => Self::Directory,
116 2 => Self::Origin,
117 3 => Self::Release,
118 4 => Self::Revision,
119 5 => Self::Snapshot,
120 _ => return Err(value),
121 })
122 }
123}
124
125impl NodeType {
126 /// Get the number of possible types.
127 ///
128 /// To avoid having to update this when adding a new type
129 /// we can use the unstable function `std::mem::variant_count`
130 /// or the `variant_count` crate.
131 /// But for now we just hardcode it while we decide how to
132 /// deal with this.
133 pub const NUMBER_OF_TYPES: usize = 6;
134
135 /// The number of bits needed to store the node type as integers
136 /// This is `ceil(log2(NUMBER_OF_TYPES))` which can be arithmetized into
137 /// `floor(log2(NUMBER_OF_TYPES))` plus one if it's not a power of two.
138 pub const BITWIDTH: usize = Self::NUMBER_OF_TYPES.ilog2() as usize
139 + (!Self::NUMBER_OF_TYPES.is_power_of_two()) as usize;
140
141 /// Convert a type to the str used in the SWHID
142 pub fn to_str(&self) -> &'static str {
143 match self {
144 Self::Content => "cnt",
145 Self::Directory => "dir",
146 Self::Origin => "ori",
147 Self::Release => "rel",
148 Self::Revision => "rev",
149 Self::Snapshot => "snp",
150 }
151 }
152
153 /// Convert a type to its enum discriminant value.
154 ///
155 /// In all cases using this method is both safer and more concise than
156 /// `(node_type as isize).try_into().unwrap()`.
157 pub fn to_u8(&self) -> u8 {
158 match self {
159 Self::Content => 0,
160 Self::Directory => 1,
161 Self::Origin => 2,
162 Self::Release => 3,
163 Self::Revision => 4,
164 Self::Snapshot => 5,
165 }
166 }
167
168 /// Returns a vector containing all possible `NodeType` values.
169 // TODO make this return an HashSet instead, as the order does not matter
170 pub fn all() -> Vec<Self> {
171 vec![
172 NodeType::Content,
173 NodeType::Directory,
174 NodeType::Origin,
175 NodeType::Release,
176 NodeType::Revision,
177 NodeType::Snapshot,
178 ]
179 }
180}
181
182impl core::fmt::Display for NodeType {
183 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
184 f.write_str(self.to_str())
185 }
186}
187
188/// Compact representation of a set of [NodeType]-s, as a bit array.
189type NodeTypeSet = BitArr!(for NodeType::NUMBER_OF_TYPES, in u8, Msb0);
190
191/// Constraint on allowed node types, as a set of node types.
192#[derive(Debug, PartialEq)]
193pub struct NodeConstraint(pub NodeTypeSet);
194
195impl NodeConstraint {
196 /// # Examples
197 ///
198 /// ```
199 /// # use std::collections::HashSet;
200 /// # use swh_graph::{NodeConstraint, NodeType};
201 ///
202 /// let only_dirs: NodeConstraint = "dir".parse().unwrap();
203 /// let history_nodes: NodeConstraint = "rel,rev".parse().unwrap();
204 /// let all_nodes: NodeConstraint = "*".parse().unwrap();
205 ///
206 /// assert!(only_dirs.matches(NodeType::Directory));
207 /// assert!(!only_dirs.matches(NodeType::Content));
208 /// assert!(history_nodes.matches(NodeType::Release));
209 /// assert!(history_nodes.matches(NodeType::Revision));
210 /// assert!(!history_nodes.matches(NodeType::Origin));
211 /// for node_type in NodeType::all() {
212 /// assert!(all_nodes.matches(node_type));
213 /// }
214 /// ```
215 pub fn matches(&self, node_type: NodeType) -> bool {
216 *self.0.get(node_type.to_u8() as usize).unwrap()
217 }
218
219 pub fn to_vec(&self) -> Vec<NodeType> {
220 self.0
221 .iter_ones() // Note: this iterates on all bits of the u8
222 .filter_map(|type_idx| (type_idx as u8).try_into().ok())
223 .collect()
224 }
225}
226
227impl Default for NodeConstraint {
228 fn default() -> Self {
229 Self(bitarr!(u8, Msb0; 1; NodeType::NUMBER_OF_TYPES))
230 }
231}
232
233impl FromStr for NodeConstraint {
234 type Err = String;
235
236 /// # Examples
237 ///
238 /// ```
239 /// # use std::collections::HashSet;
240 /// # use bitvec::prelude::*;
241 /// # use swh_graph::{NodeConstraint, NodeType};
242 ///
243 /// assert_eq!("*".parse::<NodeConstraint>(), Ok(NodeConstraint(bitarr!(u8, Msb0; 1; 6))));
244 /// assert_eq!("rel".parse::<NodeConstraint>(), Ok(NodeConstraint(bitarr!(u8, Msb0; 0, 0, 0, 1, 0, 0))));
245 /// assert_eq!("dir,cnt".parse::<NodeConstraint>(), Ok(NodeConstraint(bitarr!(u8, Msb0; 1, 1, 0, 0, 0, 0))));
246 /// assert!(matches!("xyz".parse::<NodeConstraint>(), Err(_)));
247 /// ```
248 fn from_str(s: &str) -> Result<Self, Self::Err> {
249 if s == "*" {
250 Ok(NodeConstraint::default())
251 } else {
252 let mut node_types = bitarr!(u8, Msb0; 0; NodeType::NUMBER_OF_TYPES);
253 for s in s.split(',') {
254 node_types.set(s.parse::<NodeType>()?.to_u8() as usize, true);
255 }
256 Ok(NodeConstraint(node_types))
257 }
258 }
259}
260
261impl core::fmt::Display for NodeConstraint {
262 /// ```
263 /// # use std::collections::HashSet;
264 /// # use bitvec::prelude::*;
265 /// # use swh_graph::{NodeConstraint, NodeType};
266 ///
267 /// assert_eq!(format!("{}", NodeConstraint::default()), "*");
268 /// assert_eq!(
269 /// format!("{}", NodeConstraint(bitarr!(u8, Msb0; 1, 1, 0, 0, 0, 0))),
270 /// "cnt,dir"
271 /// );
272 /// assert_eq!(
273 /// format!("{}", NodeConstraint(bitarr!(u8, Msb0; 0, 0, 1, 1, 1, 1))),
274 /// "ori,rel,rev,snp"
275 /// );
276 /// ```
277 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
278 if self.0.all() {
279 write!(f, "*")?;
280 } else {
281 let mut type_strings: Vec<&str> = self.to_vec().iter().map(|t| t.to_str()).collect();
282 type_strings.sort();
283 write!(f, "{}", type_strings.join(","))?;
284 }
285 Ok(())
286 }
287}
288
289/// Type of an arc between two nodes in the Software Heritage graph, as a pair
290/// of type constraints on the source and destination arc. When one of the two
291/// is None, it means "any node type accepted".
292// TODO remove Options from ArcType and create a (more expressive, similar to
293// NodeConstraint) type called ArcConstraint
294pub struct ArcType {
295 pub src: Option<NodeType>,
296 pub dst: Option<NodeType>,
297}