swh_graph/
swhid.rs

1// Copyright (C) 2023-2024  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::str::FromStr;
7
8use rdst::RadixKey;
9use thiserror::Error;
10
11use crate::NodeType;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14#[repr(C)]
15/// SoftWare Heritage persistent IDentifiers
16///
17/// A SWHID consists of two separate parts, a mandatory core identifier that
18/// can point to any software artifact (or “object”) available in the Software
19/// Heritage archive, and an optional list of qualifiers that allows to specify
20/// the context where the object is meant to be seen and point to a subpart of
21/// the object itself.
22///
23/// # Reference
24/// - <https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html>
25/// - Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli. [Identifiers for Digital Objects: the Case of Software Source Code Preservation](https://hal.archives-ouvertes.fr/hal-01865790v4). In Proceedings of iPRES 2018: 15th International Conference on Digital Preservation, Boston, MA, USA, September 2018, 9 pages.
26/// - Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli. [Referencing Source Code Artifacts: a Separate Concern in Software Citation](https://arxiv.org/abs/2001.08647). In Computing in Science and Engineering, volume 22, issue 2, pages 33-43. ISSN 1521-9615, IEEE. March 2020.
27pub struct SWHID {
28    /// Namespace Version
29    pub namespace_version: u8,
30    /// Node type
31    pub node_type: NodeType,
32    /// SHA1 has of the node
33    pub hash: [u8; 20],
34}
35
36impl SWHID {
37    /// The size of the binary representation of a SWHID
38    pub const BYTES_SIZE: usize = 22;
39}
40
41impl core::fmt::Display for SWHID {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        write!(
44            f,
45            "swh:{}:{}:",
46            self.namespace_version,
47            self.node_type.to_str(),
48        )?;
49        for byte in self.hash.iter() {
50            write!(f, "{byte:02x}")?;
51        }
52        Ok(())
53    }
54}
55
56#[derive(Error, Debug)]
57pub enum BinSWHIDDeserializationError {
58    #[error("Unsupported SWHID version: {0}")]
59    Version(u8),
60    #[error("Invalid SWHID type: {0}")]
61    Type(u8),
62}
63
64/// Parse a SWHID from bytes, while the SWHID struct has the exact same layout
65/// and thus it can be read directly from bytes, this function is provided for
66/// completeness and safety because we can check the namespace version is
67/// supported.
68impl TryFrom<[u8; SWHID::BYTES_SIZE]> for SWHID {
69    type Error = BinSWHIDDeserializationError;
70    fn try_from(value: [u8; SWHID::BYTES_SIZE]) -> std::result::Result<Self, Self::Error> {
71        use BinSWHIDDeserializationError::*;
72
73        let namespace_version = value[0];
74        if namespace_version != 1 {
75            return Err(Version(namespace_version));
76        }
77        let node_type = NodeType::try_from(value[1]).map_err(Type)?;
78        let mut hash = [0; 20];
79        hash.copy_from_slice(&value[2..]);
80        Ok(Self {
81            namespace_version,
82            node_type,
83            hash,
84        })
85    }
86}
87
88#[derive(Error, Debug, PartialEq, Eq, Hash)]
89pub enum StrSWHIDDeserializationError {
90    #[error("Invalid syntax: {0}")]
91    Syntax(&'static str),
92    #[error("Unsupported SWHID namespace: {0}")]
93    Namespace(String),
94    #[error("Unsupported SWHID version: {0}")]
95    Version(String),
96    #[error("Expected hash length to be {expected}, got {got}")]
97    HashLength { expected: usize, got: usize },
98    #[error("Invalid SWHID type: {0}")]
99    Type(String),
100    #[error("SWHID hash is not hexadecimal: {0}")]
101    HashAlphabet(String),
102}
103
104/// Parse a SWHID from the string representation
105impl TryFrom<&str> for SWHID {
106    type Error = StrSWHIDDeserializationError;
107    fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
108        Self::from_str(value)
109    }
110}
111
112impl FromStr for SWHID {
113    type Err = StrSWHIDDeserializationError;
114
115    fn from_str(value: &str) -> Result<Self, Self::Err> {
116        use StrSWHIDDeserializationError::*;
117
118        let mut tokens = value.splitn(4, ':');
119        let Some(namespace) = tokens.next() else {
120            return Err(Syntax("SWHID is empty"));
121        };
122        if namespace != "swh" {
123            return Err(Namespace(namespace.to_string()));
124        }
125        let Some(namespace_version) = tokens.next() else {
126            return Err(Syntax("SWHID is too short (no namespace version)"));
127        };
128        if namespace_version != "1" {
129            return Err(Version(namespace_version.to_string()));
130        }
131        let Some(node_type) = tokens.next() else {
132            return Err(Syntax("SWHID is too short (no object type)"));
133        };
134        let Some(hex_hash) = tokens.next() else {
135            return Err(Syntax("SWHID is too short (no object hash)"));
136        };
137        if hex_hash.len() != 40 {
138            return Err(HashLength {
139                expected: 40,
140                got: hex_hash.len(),
141            });
142        }
143        let node_type = node_type
144            .parse::<NodeType>()
145            .map_err(|e| Type(e.to_string()))?;
146        let mut hash = [0u8; 20];
147        faster_hex::hex_decode(hex_hash.as_bytes(), &mut hash)
148            .map_err(|_| HashAlphabet(hex_hash.to_string()))?;
149        Ok(Self {
150            namespace_version: 1,
151            node_type,
152            hash,
153        })
154    }
155}
156
157impl From<SWHID> for [u8; SWHID::BYTES_SIZE] {
158    fn from(value: SWHID) -> Self {
159        let mut result = [0; SWHID::BYTES_SIZE];
160        result[0] = value.namespace_version;
161        result[1] = value.node_type as u8;
162        result[2..].copy_from_slice(&value.hash);
163        result
164    }
165}
166
167impl RadixKey for SWHID {
168    const LEVELS: usize = 22;
169
170    #[inline(always)]
171    fn get_level(&self, level: usize) -> u8 {
172        assert!(level < Self::LEVELS);
173        match Self::LEVELS - level - 1 {
174            0 => self.namespace_version,
175            1 => match self.node_type {
176                // must follow alphabetical order of the 3-char abbreviation
177                NodeType::Content => 0,   // cnt
178                NodeType::Directory => 1, // dir
179                NodeType::Origin => 2,    // ori
180                NodeType::Release => 3,   // rel
181                NodeType::Revision => 4,  // rev
182                NodeType::Snapshot => 5,  // rel
183            },
184            n => self.hash[n - 2],
185        }
186    }
187}
188
189impl Ord for SWHID {
190    #[inline(always)]
191    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
192        for level in (0..Self::LEVELS).rev() {
193            let ordering = self.get_level(level).cmp(&other.get_level(level));
194            if ordering != std::cmp::Ordering::Equal {
195                return ordering;
196            }
197        }
198        std::cmp::Ordering::Equal
199    }
200}
201impl PartialOrd for SWHID {
202    #[inline(always)]
203    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
204        Some(self.cmp(other))
205    }
206}
207
208#[cfg(feature = "serde")]
209impl serde::Serialize for SWHID {
210    fn serialize<S: serde::Serializer>(
211        &self,
212        serializer: S,
213    ) -> std::result::Result<S::Ok, S::Error> {
214        serializer.collect_str(self)
215    }
216}
217
218#[cfg(feature = "serde")]
219impl<'de> serde::Deserialize<'de> for SWHID {
220    fn deserialize<D: serde::Deserializer<'de>>(
221        deserializer: D,
222    ) -> std::result::Result<Self, D::Error> {
223        deserializer.deserialize_str(SwhidVisitor)
224    }
225}
226
227#[cfg(feature = "serde")]
228struct SwhidVisitor;
229
230#[cfg(feature = "serde")]
231impl serde::de::Visitor<'_> for SwhidVisitor {
232    type Value = SWHID;
233
234    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
235        formatter.write_str("a SWHID")
236    }
237
238    fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
239    where
240        E: serde::de::Error,
241    {
242        value.try_into().map_err(E::custom)
243    }
244}
245
246#[doc(hidden)]
247#[cfg(feature = "macros")]
248/// Helper function for [`swhid!()`]
249pub const fn __parse_swhid(node_type: crate::NodeType, hash: &'static str) -> SWHID {
250    use const_panic::unwrap_ok;
251    unwrap_ok!(match const_hex::const_decode_to_array(hash.as_bytes()) {
252        Ok(hash) => Ok(SWHID {
253            namespace_version: 1,
254            node_type,
255            hash
256        }),
257        Err(_) => Err("invalid SWHID hash"),
258    })
259}
260
261#[cfg(feature = "macros")]
262/// A SWHID literal checked at compile time
263///
264/// # Examples
265///
266/// ```
267/// use swh_graph::swhid;
268/// assert_eq!(
269///     swhid!(swh:1:rev:0000000000000000000000000000000000000004).to_string(),
270///     "swh:1:rev:0000000000000000000000000000000000000004".to_string(),
271/// );
272/// ```
273///
274/// ```compile_fail
275/// use swh_graph::swhid;
276/// swhid!(swh:1:rev:ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ);
277/// ```
278///
279/// ```compile_fail
280/// use swh_graph::swhid;
281/// swhid!(swh:1:rev:00000000000000000000000000000000000004);
282/// ```
283#[macro_export]
284macro_rules! swhid {
285    (swh:1:cnt:$hash:literal) => {{
286        const swhid: ::swh_graph::SWHID = {
287            let hash: &str = stringify!($hash);
288            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Content, hash)
289        };
290        swhid
291    }};
292    (swh:1:dir:$hash:literal) => {{
293        const swhid: ::swh_graph::SWHID = {
294            let hash: &str = stringify!($hash);
295            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Directory, hash)
296        };
297        swhid
298    }};
299    (swh:1:rev:$hash:literal) => {{
300        const swhid: ::swh_graph::SWHID = {
301            let hash: &str = stringify!($hash);
302            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Revision, hash)
303        };
304        swhid
305    }};
306    (swh:1:rel:$hash:literal) => {{
307        const swhid: ::swh_graph::SWHID = {
308            let hash: &str = stringify!($hash);
309            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Release, hash)
310        };
311        swhid
312    }};
313    (swh:1:snp:$hash:literal) => {{
314        const swhid: ::swh_graph::SWHID = {
315            let hash: &str = stringify!($hash);
316            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Snapshot, hash)
317        };
318        swhid
319    }};
320    (swh:1:ori:$hash:literal) => {{
321        const swhid: ::swh_graph::SWHID = {
322            let hash: &str = stringify!($hash);
323            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Origin, hash)
324        };
325        swhid
326    }};
327}