Skip to main content

swh_graph/
swhid.rs

1// Copyright (C) 2023-2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::str::FromStr;
7
8use rdst::RadixKey;
9use sha1::{Digest, Sha1};
10use thiserror::Error;
11
12use crate::NodeType;
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
15#[repr(C)]
16/// SoftWare Heritage persistent IDentifiers
17///
18/// A SWHID consists of two separate parts, a mandatory core identifier that
19/// can point to any software artifact (or “object”) available in the Software
20/// Heritage archive, and an optional list of qualifiers that allows to specify
21/// the context where the object is meant to be seen and point to a subpart of
22/// the object itself.
23///
24/// # Reference
25/// - <https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html>
26/// - Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli. [Identifiers for Digital Objects: the Case of Software Source Code Preservation](https://hal.archives-ouvertes.fr/hal-01865790v4). In Proceedings of iPRES 2018: 15th International Conference on Digital Preservation, Boston, MA, USA, September 2018, 9 pages.
27/// - Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli. [Referencing Source Code Artifacts: a Separate Concern in Software Citation](https://arxiv.org/abs/2001.08647). In Computing in Science and Engineering, volume 22, issue 2, pages 33-43. ISSN 1521-9615, IEEE. March 2020.
28pub struct SWHID {
29    /// Namespace Version
30    pub namespace_version: u8,
31    /// Node type
32    pub node_type: NodeType,
33    /// SHA1 hash of the node
34    pub hash: [u8; 20],
35}
36
37impl SWHID {
38    /// The size of the binary representation of a SWHID
39    pub const BYTES_SIZE: usize = 22;
40
41    /// Returns the pseudo-SWHID representation for a origin URI
42    /// akin to "swh:1:ori:{}"
43    pub fn from_origin_url(origin: impl AsRef<str>) -> SWHID {
44        let mut hasher = Sha1::new();
45        hasher.update(origin.as_ref());
46
47        SWHID {
48            namespace_version: 1,
49            node_type: NodeType::Origin,
50            hash: hasher.finalize().into(),
51        }
52    }
53}
54
55impl core::fmt::Display for SWHID {
56    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57        let mut hex_hash = [0; 40];
58        let hex_hash =
59            faster_hex::hex_encode(&self.hash, &mut hex_hash).expect("sha1 digest is not 40 bytes");
60        write!(
61            f,
62            "swh:{}:{}:{}",
63            self.namespace_version,
64            self.node_type.to_str(),
65            hex_hash,
66        )?;
67        Ok(())
68    }
69}
70
71#[derive(Error, Debug)]
72pub enum BinSWHIDDeserializationError {
73    #[error("Unsupported SWHID version: {0}")]
74    Version(u8),
75    #[error("Invalid SWHID type: {0}")]
76    Type(u8),
77}
78
79/// Parse a SWHID from bytes, while the SWHID struct has the exact same layout
80/// and thus it can be read directly from bytes, this function is provided for
81/// completeness and safety because we can check the namespace version is
82/// supported.
83impl TryFrom<[u8; SWHID::BYTES_SIZE]> for SWHID {
84    type Error = BinSWHIDDeserializationError;
85    fn try_from(value: [u8; SWHID::BYTES_SIZE]) -> std::result::Result<Self, Self::Error> {
86        use BinSWHIDDeserializationError::*;
87
88        let namespace_version = value[0];
89        if namespace_version != 1 {
90            return Err(Version(namespace_version));
91        }
92        let node_type = NodeType::try_from(value[1]).map_err(Type)?;
93        let mut hash = [0; 20];
94        hash.copy_from_slice(&value[2..]);
95        Ok(Self {
96            namespace_version,
97            node_type,
98            hash,
99        })
100    }
101}
102
103#[derive(Error, Debug, PartialEq, Eq, Hash)]
104pub enum StrSWHIDDeserializationError {
105    #[error("Invalid syntax: {0}")]
106    Syntax(&'static str),
107    #[error("Unsupported SWHID namespace: {0}")]
108    Namespace(String),
109    #[error("Unsupported SWHID version: {0}")]
110    Version(String),
111    #[error("Expected hash length to be {expected}, got {got}")]
112    HashLength { expected: usize, got: usize },
113    #[error("Invalid SWHID type: {0}")]
114    Type(String),
115    #[error("SWHID hash is not hexadecimal: {0}")]
116    HashAlphabet(String),
117}
118
119/// Parse a SWHID from the string representation
120impl TryFrom<&str> for SWHID {
121    type Error = StrSWHIDDeserializationError;
122    fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
123        Self::from_str(value)
124    }
125}
126
127impl FromStr for SWHID {
128    type Err = StrSWHIDDeserializationError;
129
130    fn from_str(value: &str) -> Result<Self, Self::Err> {
131        use StrSWHIDDeserializationError::*;
132
133        let mut tokens = value.splitn(4, ':');
134        let Some(namespace) = tokens.next() else {
135            return Err(Syntax("SWHID is empty"));
136        };
137        if namespace != "swh" {
138            return Err(Namespace(namespace.to_string()));
139        }
140        let Some(namespace_version) = tokens.next() else {
141            return Err(Syntax("SWHID is too short (no namespace version)"));
142        };
143        if namespace_version != "1" {
144            return Err(Version(namespace_version.to_string()));
145        }
146        let Some(node_type) = tokens.next() else {
147            return Err(Syntax("SWHID is too short (no object type)"));
148        };
149        let Some(hex_hash) = tokens.next() else {
150            return Err(Syntax("SWHID is too short (no object hash)"));
151        };
152        if hex_hash.len() != 40 {
153            return Err(HashLength {
154                expected: 40,
155                got: hex_hash.len(),
156            });
157        }
158        let node_type = node_type
159            .parse::<NodeType>()
160            .map_err(|e| Type(e.to_string()))?;
161        let mut hash = [0u8; 20];
162
163        // Miri does not support the SIMD feature-probing in faster-hex, so we have
164        // to fall back to a different crate.
165        #[cfg(all(miri, feature = "miri"))]
166        hex::decode_to_slice(hex_hash.as_bytes(), &mut hash)
167            .map_err(|_| HashAlphabet(hex_hash.to_string()))?;
168        #[cfg(all(miri, not(feature = "miri")))]
169        std::compile_error!("'miri' feature is required to compile with miri");
170        #[cfg(not(miri))]
171        faster_hex::hex_decode(hex_hash.as_bytes(), &mut hash)
172            .map_err(|_| HashAlphabet(hex_hash.to_string()))?;
173
174        Ok(Self {
175            namespace_version: 1,
176            node_type,
177            hash,
178        })
179    }
180}
181
182impl From<SWHID> for [u8; SWHID::BYTES_SIZE] {
183    fn from(value: SWHID) -> Self {
184        let mut result = [0; SWHID::BYTES_SIZE];
185        result[0] = value.namespace_version;
186        result[1] = value.node_type as u8;
187        result[2..].copy_from_slice(&value.hash);
188        result
189    }
190}
191
192impl RadixKey for SWHID {
193    const LEVELS: usize = 22;
194
195    #[inline(always)]
196    fn get_level(&self, level: usize) -> u8 {
197        assert!(level < Self::LEVELS);
198        match Self::LEVELS - level - 1 {
199            0 => self.namespace_version,
200            1 => match self.node_type {
201                // must follow alphabetical order of the 3-char abbreviation
202                NodeType::Content => 0,   // cnt
203                NodeType::Directory => 1, // dir
204                NodeType::Origin => 2,    // ori
205                NodeType::Release => 3,   // rel
206                NodeType::Revision => 4,  // rev
207                NodeType::Snapshot => 5,  // rel
208            },
209            n => self.hash[n - 2],
210        }
211    }
212}
213
214impl Ord for SWHID {
215    #[inline(always)]
216    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
217        for level in (0..Self::LEVELS).rev() {
218            let ordering = self.get_level(level).cmp(&other.get_level(level));
219            if ordering != std::cmp::Ordering::Equal {
220                return ordering;
221            }
222        }
223        std::cmp::Ordering::Equal
224    }
225}
226impl PartialOrd for SWHID {
227    #[inline(always)]
228    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
229        Some(self.cmp(other))
230    }
231}
232
233#[cfg(feature = "serde")]
234impl serde::Serialize for SWHID {
235    fn serialize<S: serde::Serializer>(
236        &self,
237        serializer: S,
238    ) -> std::result::Result<S::Ok, S::Error> {
239        serializer.collect_str(self)
240    }
241}
242
243#[cfg(feature = "serde")]
244impl<'de> serde::Deserialize<'de> for SWHID {
245    fn deserialize<D: serde::Deserializer<'de>>(
246        deserializer: D,
247    ) -> std::result::Result<Self, D::Error> {
248        deserializer.deserialize_str(SwhidVisitor)
249    }
250}
251
252#[cfg(feature = "serde")]
253struct SwhidVisitor;
254
255#[cfg(feature = "serde")]
256impl serde::de::Visitor<'_> for SwhidVisitor {
257    type Value = SWHID;
258
259    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
260        formatter.write_str("a SWHID")
261    }
262
263    fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
264    where
265        E: serde::de::Error,
266    {
267        value.try_into().map_err(E::custom)
268    }
269}
270
271#[doc(hidden)]
272#[cfg(feature = "macros")]
273/// Helper function for [`swhid!()`]
274pub const fn __parse_swhid(node_type: crate::NodeType, hash: &'static str) -> SWHID {
275    use const_panic::unwrap_ok;
276    unwrap_ok!(match const_hex::const_decode_to_array(hash.as_bytes()) {
277        Ok(hash) => Ok(SWHID {
278            namespace_version: 1,
279            node_type,
280            hash
281        }),
282        Err(_) => Err("invalid SWHID hash"),
283    })
284}
285
286#[cfg(feature = "macros")]
287/// A SWHID literal checked at compile time
288///
289/// # Examples
290///
291/// ```
292/// use swh_graph::swhid;
293/// assert_eq!(
294///     swhid!(swh:1:rev:0000000000000000000000000000000000000004).to_string(),
295///     "swh:1:rev:0000000000000000000000000000000000000004".to_string(),
296/// );
297/// assert_eq!(
298///     swhid!(swh:1:rev:ffffffffffffffffffffffffffff000000000004).to_string(),
299///     "swh:1:rev:ffffffffffffffffffffffffffff000000000004".to_string(),
300/// );
301/// assert_eq!(
302///     swhid!(swh:1:rev:FFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000004).to_string(),
303///     "swh:1:rev:ffffffffffffffffffffffffffff000000000004".to_string(),
304/// );
305/// ```
306///
307/// ```compile_fail
308/// use swh_graph::swhid;
309/// swhid!(swh:1:rev:ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ);
310/// ```
311///
312/// ```compile_fail
313/// use swh_graph::swhid;
314/// swhid!(swh:1:rev:00000000000000000000000000000000000004);
315/// ```
316#[macro_export]
317macro_rules! swhid {
318    // hash starting with a decimal digit
319    (swh:1:cnt:$hash:literal) => {{
320        const swhid: ::swh_graph::SWHID = {
321            let hash: &str = stringify!($hash);
322            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Content, hash)
323        };
324        swhid
325    }};
326    (swh:1:dir:$hash:literal) => {{
327        const swhid: ::swh_graph::SWHID = {
328            let hash: &str = stringify!($hash);
329            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Directory, hash)
330        };
331        swhid
332    }};
333    (swh:1:rev:$hash:literal) => {{
334        const swhid: ::swh_graph::SWHID = {
335            let hash: &str = stringify!($hash);
336            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Revision, hash)
337        };
338        swhid
339    }};
340    (swh:1:rel:$hash:literal) => {{
341        const swhid: ::swh_graph::SWHID = {
342            let hash: &str = stringify!($hash);
343            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Release, hash)
344        };
345        swhid
346    }};
347    (swh:1:snp:$hash:literal) => {{
348        const swhid: ::swh_graph::SWHID = {
349            let hash: &str = stringify!($hash);
350            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Snapshot, hash)
351        };
352        swhid
353    }};
354    (swh:1:ori:$hash:literal) => {{
355        const swhid: ::swh_graph::SWHID = {
356            let hash: &str = stringify!($hash);
357            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Origin, hash)
358        };
359        swhid
360    }};
361
362    // hash starting with a to f
363    (swh:1:cnt:$hash:ident) => {{
364        const swhid: ::swh_graph::SWHID = {
365            let hash: &str = stringify!($hash);
366            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Content, hash)
367        };
368        swhid
369    }};
370    (swh:1:dir:$hash:ident) => {{
371        const swhid: ::swh_graph::SWHID = {
372            let hash: &str = stringify!($hash);
373            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Directory, hash)
374        };
375        swhid
376    }};
377    (swh:1:rev:$hash:ident) => {{
378        const swhid: ::swh_graph::SWHID = {
379            let hash: &str = stringify!($hash);
380            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Revision, hash)
381        };
382        swhid
383    }};
384    (swh:1:rel:$hash:ident) => {{
385        const swhid: ::swh_graph::SWHID = {
386            let hash: &str = stringify!($hash);
387            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Release, hash)
388        };
389        swhid
390    }};
391    (swh:1:snp:$hash:ident) => {{
392        const swhid: ::swh_graph::SWHID = {
393            let hash: &str = stringify!($hash);
394            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Snapshot, hash)
395        };
396        swhid
397    }};
398    (swh:1:ori:$hash:ident) => {{
399        const swhid: ::swh_graph::SWHID = {
400            let hash: &str = stringify!($hash);
401            ::swh_graph::__parse_swhid(::swh_graph::NodeType::Origin, hash)
402        };
403        swhid
404    }};
405}