swh_graph/swhtype.rs
1// Copyright (C) 2023-2026 The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::str::FromStr;
7
8#[repr(u8)]
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10/// Object type of an SWHID
11///
12/// # Reference
13/// - <https://docs.softwareheritage.org/devel/swh-model/data-model.html>
14pub enum NodeType {
15 Content = 0,
16 /// a list of named directory entries, each of which pointing to other
17 /// artifacts, usually file contents or sub-directories. Directory entries
18 /// are also associated to some metadata stored as permission bits.
19 Directory = 1,
20 /// code “hosting places” as previously described are usually large
21 /// platforms that host several unrelated software projects. For software
22 /// provenance purposes it is important to be more specific than that.
23 ///
24 /// Software origins are fine grained references to where source code
25 /// artifacts archived by Software Heritage have been retrieved from. They
26 /// take the form of `(type, url)` pairs, where url is a canonical URL
27 /// (e.g., the address at which one can `git clone` a repository or download
28 /// a source tarball) and `type` the kind of software origin (e.g., git,
29 /// svn, or dsc for Debian source packages).
30 Origin = 2,
31 ///AKA “tags”
32 ///
33 /// some revisions are more equals than others and get selected by
34 /// developers as denoting important project milestones known as “releases”.
35 /// Each release points to the last commit in project history corresponding
36 /// to the release and carries metadata: release name and version, release
37 /// message, cryptographic signatures, etc.
38 Release = 3,
39 /// AKA commits
40 ///
41 /// Software development within a specific project is
42 /// essentially a time-indexed series of copies of a single “root” directory
43 /// that contains the entire project source code. Software evolves when a
44 /// developer modifies the content of one or more files in that directory
45 /// and record their changes.
46 ///
47 /// Each recorded copy of the root directory is known as a “revision”. It
48 /// points to a fully-determined directory and is equipped with arbitrary
49 /// metadata. Some of those are added manually by the developer
50 /// (e.g., commit message), others are automatically synthesized
51 /// (timestamps, preceding commit(s), etc).
52 Revision = 4,
53 /// any kind of software origin offers multiple pointers to the “current”
54 /// state of a development project. In the case of VCS this is reflected by
55 /// branches (e.g., master, development, but also so called feature branches
56 /// dedicated to extending the software in a specific direction); in the
57 /// case of package distributions by notions such as suites that correspond
58 /// to different maturity levels of individual packages (e.g., stable,
59 /// development, etc.).
60 ///
61 /// A “snapshot” of a given software origin records all entry points found
62 /// there and where each of them was pointing at the time. For example, a
63 /// snapshot object might track the commit where the master branch was
64 /// pointing to at any given time, as well as the most recent release of a
65 /// given package in the stable suite of a FOSS distribution.
66 Snapshot = 5,
67}
68
69impl<'a> TryFrom<&'a [u8]> for NodeType {
70 type Error = &'a [u8];
71 fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
72 Ok(match value {
73 b"cnt" => Self::Content,
74 b"dir" => Self::Directory,
75 b"ori" => Self::Origin,
76 b"rel" => Self::Release,
77 b"rev" => Self::Revision,
78 b"snp" => Self::Snapshot,
79 _ => return Err(value),
80 })
81 }
82}
83
84impl FromStr for NodeType {
85 type Err = String;
86
87 /// # Examples
88 ///
89 /// ```
90 /// # use swh_graph::NodeType;
91 ///
92 /// assert_eq!("dir".parse::<NodeType>(), Ok(NodeType::Directory));
93 /// assert!(matches!("xyz".parse::<NodeType>(), Err(_)));
94 /// ```
95 fn from_str(s: &str) -> Result<Self, Self::Err> {
96 Ok(match s {
97 "cnt" => Self::Content,
98 "dir" => Self::Directory,
99 "ori" => Self::Origin,
100 "rel" => Self::Release,
101 "rev" => Self::Revision,
102 "snp" => Self::Snapshot,
103 _ => return Err(s.to_owned()),
104 })
105 }
106}
107
108impl TryFrom<u8> for NodeType {
109 type Error = u8;
110 fn try_from(value: u8) -> Result<Self, Self::Error> {
111 Ok(match value {
112 0 => Self::Content,
113 1 => Self::Directory,
114 2 => Self::Origin,
115 3 => Self::Release,
116 4 => Self::Revision,
117 5 => Self::Snapshot,
118 _ => return Err(value),
119 })
120 }
121}
122
123impl NodeType {
124 /// Get the number of possible types.
125 ///
126 /// To avoid having to update this when adding a new type
127 /// we can use the unstable function `std::mem::variant_count`
128 /// or the `variant_count` crate.
129 /// But for now we just hardcode it while we decide how to
130 /// deal with this.
131 pub const NUMBER_OF_TYPES: usize = 6;
132
133 /// The number of bits needed to store the node type as integers
134 /// This is `ceil(log2(NUMBER_OF_TYPES))` which can be arithmetized into
135 /// `floor(log2(NUMBER_OF_TYPES))` plus one if it's not a power of two.
136 pub const BITWIDTH: usize = Self::NUMBER_OF_TYPES.ilog2() as usize
137 + (!Self::NUMBER_OF_TYPES.is_power_of_two()) as usize;
138
139 /// Convert a type to the str used in the SWHID
140 #[inline(always)]
141 pub fn to_str(&self) -> &'static str {
142 match self {
143 Self::Content => "cnt",
144 Self::Directory => "dir",
145 Self::Origin => "ori",
146 Self::Release => "rel",
147 Self::Revision => "rev",
148 Self::Snapshot => "snp",
149 }
150 }
151
152 /// Convert a type to its enum discriminant value.
153 ///
154 /// In all cases using this method is both safer and more concise than
155 /// `(node_type as isize).try_into().unwrap()`.
156 #[inline(always)]
157 pub fn to_u8(&self) -> u8 {
158 match self {
159 Self::Content => 0,
160 Self::Directory => 1,
161 Self::Origin => 2,
162 Self::Release => 3,
163 Self::Revision => 4,
164 Self::Snapshot => 5,
165 }
166 }
167
168 /// Returns a vector containing all possible `NodeType` values.
169 // TODO make this return an HashSet instead, as the order does not matter
170 pub fn all() -> Vec<Self> {
171 vec![
172 NodeType::Content,
173 NodeType::Directory,
174 NodeType::Origin,
175 NodeType::Release,
176 NodeType::Revision,
177 NodeType::Snapshot,
178 ]
179 }
180}
181
182impl core::fmt::Display for NodeType {
183 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
184 f.write_str(self.to_str())
185 }
186}
187
188/// Compact representation of a set of [NodeType]-s, as a bit array.
189type NodeTypeSet = u64;
190
191/// Constraint on allowed node types, as a set of node types.
192#[derive(Debug, PartialEq, Clone, Copy)]
193pub struct NodeConstraint(pub NodeTypeSet);
194
195impl Default for NodeConstraint {
196 fn default() -> Self {
197 Self(0b111111)
198 }
199}
200
201impl NodeConstraint {
202 /// Builds a `NodeConstraint` that only allows the given types
203 ///
204 /// # Example
205 ///
206 /// ```
207 /// # use std::collections::HashSet;
208 /// # use swh_graph::{NodeConstraint, NodeType};
209 ///
210 /// let only_revrels = NodeConstraint::from_types([NodeType::Revision, NodeType::Release]);
211 ///
212 /// assert!(!only_revrels.matches(NodeType::Directory));
213 /// assert!(!only_revrels.matches(NodeType::Content));
214 /// assert!(only_revrels.matches(NodeType::Release));
215 /// assert!(only_revrels.matches(NodeType::Revision));
216 /// assert!(!only_revrels.matches(NodeType::Origin));
217 /// ```
218 pub fn from_types(node_types: impl IntoIterator<Item = NodeType>) -> Self {
219 let mut bits = 0;
220 for node_type in node_types.into_iter() {
221 bits |= 1 << node_type.to_u8();
222 }
223 Self(bits)
224 }
225
226 /// # Examples
227 ///
228 /// ```
229 /// # use std::collections::HashSet;
230 /// # use swh_graph::{NodeConstraint, NodeType};
231 ///
232 /// let only_dirs: NodeConstraint = "dir".parse().unwrap();
233 /// let history_nodes: NodeConstraint = "rel,rev".parse().unwrap();
234 /// let all_nodes: NodeConstraint = "*".parse().unwrap();
235 ///
236 /// assert!(only_dirs.matches(NodeType::Directory));
237 /// assert!(!only_dirs.matches(NodeType::Content));
238 /// assert!(history_nodes.matches(NodeType::Release));
239 /// assert!(history_nodes.matches(NodeType::Revision));
240 /// assert!(!history_nodes.matches(NodeType::Origin));
241 /// for node_type in NodeType::all() {
242 /// assert!(all_nodes.matches(node_type));
243 /// }
244 /// ```
245 #[inline(always)]
246 pub fn matches(&self, node_type: NodeType) -> bool {
247 self.0 & (1 << node_type.to_u8()) != 0
248 }
249
250 pub fn to_vec(&self) -> Vec<NodeType> {
251 (0..NodeType::NUMBER_OF_TYPES as u8)
252 .filter(|type_idx| self.0 & (1 << type_idx) != 0)
253 .map(|type_idx| type_idx.try_into().unwrap())
254 .collect()
255 }
256}
257
258impl FromStr for NodeConstraint {
259 type Err = String;
260
261 /// # Examples
262 ///
263 /// ```
264 /// # use std::collections::HashSet;
265 /// # use swh_graph::{NodeConstraint, NodeType};
266 ///
267 /// assert_eq!("*".parse::<NodeConstraint>(), Ok(NodeConstraint(0b111111)));
268 /// assert_eq!("rel".parse::<NodeConstraint>(), Ok(NodeConstraint(0b001000)));
269 /// assert_eq!("dir,cnt".parse::<NodeConstraint>(), Ok(NodeConstraint(0b000011)));
270 /// assert!(matches!("xyz".parse::<NodeConstraint>(), Err(_)));
271 /// ```
272 fn from_str(s: &str) -> Result<Self, Self::Err> {
273 if s == "*" {
274 Ok(NodeConstraint::default())
275 } else {
276 Ok(Self::from_types(
277 s.split(',')
278 .map(|s| s.parse::<NodeType>())
279 .collect::<Result<Vec<_>, _>>()?,
280 ))
281 }
282 }
283}
284
285impl core::fmt::Display for NodeConstraint {
286 /// ```
287 /// # use std::collections::HashSet;
288 /// # use swh_graph::{NodeConstraint, NodeType};
289 ///
290 /// assert_eq!(format!("{}", NodeConstraint::default()), "*");
291 /// assert_eq!(
292 /// format!("{}", NodeConstraint(0b000011)),
293 /// "cnt,dir"
294 /// );
295 /// assert_eq!(
296 /// format!("{}", NodeConstraint(0b111100)),
297 /// "ori,rel,rev,snp"
298 /// );
299 /// ```
300 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
301 if *self == Self::default() {
302 write!(f, "*")?;
303 } else {
304 let mut type_strings: Vec<&str> = self.to_vec().iter().map(|t| t.to_str()).collect();
305 type_strings.sort();
306 write!(f, "{}", type_strings.join(","))?;
307 }
308 Ok(())
309 }
310}
311
312#[cfg(feature = "serde")]
313impl serde::Serialize for NodeConstraint {
314 fn serialize<S: serde::Serializer>(
315 &self,
316 serializer: S,
317 ) -> std::result::Result<S::Ok, S::Error> {
318 serializer.collect_str(self)
319 }
320}
321
322#[cfg(feature = "serde")]
323impl<'de> serde::Deserialize<'de> for NodeConstraint {
324 fn deserialize<D: serde::Deserializer<'de>>(
325 deserializer: D,
326 ) -> std::result::Result<Self, D::Error> {
327 deserializer.deserialize_str(NodeConstraintVisitor)
328 }
329}
330
331#[cfg(feature = "serde")]
332struct NodeConstraintVisitor;
333
334#[cfg(feature = "serde")]
335impl serde::de::Visitor<'_> for NodeConstraintVisitor {
336 type Value = NodeConstraint;
337
338 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
339 formatter.write_str("a node type constraint")
340 }
341
342 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
343 where
344 E: serde::de::Error,
345 {
346 value.parse().map_err(E::custom)
347 }
348}
349
350/// Type of an arc between two nodes in the Software Heritage graph, as a pair
351/// of type constraints on the source and destination arc. When one of the two
352/// is None, it means "any node type accepted".
353// TODO remove Options from ArcType and create a (more expressive, similar to
354// NodeConstraint) type called ArcConstraint
355pub struct ArcType {
356 pub src: Option<NodeType>,
357 pub dst: Option<NodeType>,
358}