Skip to main content

ratproto_did/
lib.rs

1#![forbid(unsafe_op_in_unsafe_fn)]
2//! This library implements a memory-efficient container for atproto DIDs.
3//!
4//! The [`Did`] type is guaranteed to take up exactly 16 bytes.
5//! - `did:plc` is represented with just these 16 bytes
6//! - `did:web` uses up to 255 bytes of heap
7//! - other DID methods (not officially supported by atproto) allocate heap space as needed
8//!
9//! ---
10//!
11//! The most common DID in the Atmosphere (the atproto ecosystem) is `did:plc`.
12//! Currently, its identifier is exactly 24 characters of base32. At 5 bits per characters,
13//! that is 120 bits, or 15 bytes. This leaves just enough space for
14//! a single-byte discriminator, making this implementation highly-optimized
15//! for `did:plc` while also allowing it to represent other DID methods.
16//! **The PLC identifier is expected to be exactly 24 characters long.**
17//!
18//! Atproto currently only supports host-level `did:web` DIDs, so the identifier is a web domain.
19//! The maximum length of a web domain is 255 bytes, so a [`Did`] representing a `did:web` allocates
20//! the necessary space on the heap. **Parsing a `did:web` with an unsupported format fails**.
21//! _The only exception is `localhost`, which supports an optional port
22//! (as in `localhost%3A12345`)._
23//!
24//! Other DID methods are currently unsupported by atproto, but not strictly disallowed.
25//! DID methods that are not `plc` or `web` have their method and identifier strings
26//! allocated on the heap.
27//!
28//! ---
29//!
30//! [`Did`] supports `serde` using an optional feature.
31//! Values are serialized and deserialized using the standard string representation.
32
33mod plc_codec;
34mod web_domain;
35
36#[cfg(feature = "serde")]
37mod serde;
38
39use std::{
40    cmp::Ordering,
41    fmt::{Debug, Display, Formatter},
42    hash::{Hash, Hasher},
43    mem::ManuallyDrop,
44    str::FromStr,
45};
46
47use thiserror::Error;
48
49use crate::web_domain::DidWebDomain;
50
51/// A tightly-packed representation of DIDs for atproto.
52///
53/// Currently, `did:plc`, the most common kind of DID in the atmosphere,
54/// has a base32 identifier with exactly 24 characters.
55/// At 5 bits per characters, that is 120 bits, or 15 bytes.
56/// This type is optimized for `did:plc`, and _is guaranteed_
57/// to take up exactly 16 bytes.
58///
59/// did:web has its domain part heap-allocated as a string of up to 255 bytes.
60/// Other DID methods (not currently supported by atproto)
61/// allocate both the method and identifier as strings.
62pub struct Did(DidInner);
63
64impl Did {
65    /// Which method does this [`Did`] represent?
66    ///
67    /// See [`Did::method()`] to get the string representation (especially for [`DidKind::Other`]).
68    pub fn kind(&self) -> DidKind {
69        match &self.0 {
70            DidInner::Plc(_) => DidKind::Plc,
71            DidInner::Web { .. } => DidKind::Web,
72            DidInner::Other(_) => DidKind::Other,
73        }
74    }
75
76    /// The DID method string.
77    pub fn method(&self) -> &str {
78        match &self.0 {
79            DidInner::Plc(_) => "plc",
80            DidInner::Web { .. } => "web",
81            DidInner::Other(other) => other.method_ident().0,
82        }
83    }
84
85    /// Constructs a [`Did`] directly from the identifier's bytes.
86    pub fn from_plc_bytes(bytes: [u8; 15]) -> Self {
87        Did(DidInner::Plc(bytes))
88    }
89
90    /// Constructs a [`Did`] from a web domain (the identifier after `did:web:`).
91    pub fn from_web_domain(domain_str: &str) -> Option<Self> {
92        // Try to parse the domain,
93        // then unpack the raw data for a more packed repr
94        // Storing the DidWebDomain directly in the enum makes it too large
95        let web = DidWebDomain::from_str(domain_str).ok()?;
96        let (ptr, len) = web.into_raw_parts();
97        Some(Did(DidInner::Web { len, ptr }))
98    }
99}
100
101/// Validates a DID `method:ident` string
102fn validate_method_ident(method_ident: &str) -> Result<(), ParseDidError> {
103    fn validate_method(method: &str) -> bool {
104        // Method must be lowercase alphanumeric
105        method.chars().all(|c: char| c.is_ascii_lowercase() || c.is_ascii_digit())
106    }
107
108    fn validate_ident(ident: &str) -> bool {
109        // Identifier must be alphanumeric or one of: ._:%-
110        // Identifier cannot end with % or :
111        // Identifier cannot be empty
112        // Max identifier is (currently) 2048
113
114        // Does not currently check percent sign encoding!
115
116        ident.chars().all(|c| c.is_ascii_alphanumeric() || "._:%-".contains(c))
117            && !ident.ends_with('%')
118            && !ident.ends_with(':')
119            && !ident.is_empty()
120            && ident.len() <= 2048
121    }
122
123    // A missing `:` is considered an invalid (nonexistent) identifier
124    let (method, ident) = method_ident.split_once(':').ok_or(DidErrorKind::InvalidIdent)?;
125    validate_method(method).then_some(()).ok_or(DidErrorKind::InvalidMethod)?;
126    validate_ident(ident).then_some(()).ok_or(DidErrorKind::InvalidIdent)?;
127
128    Ok(())
129}
130
131/// An error which can be returned when parsing a DID.
132///
133/// Use [`ParseDidError::kind()`] to get error details.
134#[derive(Clone, Debug, Error, Eq, PartialEq)]
135#[error(transparent)]
136pub struct ParseDidError {
137    #[from]
138    kind: DidErrorKind,
139}
140
141impl ParseDidError {
142    pub const fn kind(&self) -> &DidErrorKind {
143        &self.kind
144    }
145}
146
147/// Details about why a DID failed to parse.
148#[derive(Clone, Debug, Error, Eq, PartialEq)]
149#[non_exhaustive]
150pub enum DidErrorKind {
151    #[error("missing `did:`")]
152    MissingDid,
153    #[error("invalid DID method")]
154    InvalidMethod,
155    #[error("invalid identifier")]
156    InvalidIdent,
157}
158
159impl FromStr for Did {
160    type Err = ParseDidError;
161    fn from_str(input: &str) -> Result<Self, Self::Err> {
162        // Eagerly try to parse did:plc
163        // This optimizes the code for the happy path
164        if let Some(input_32) = input.as_bytes().as_array::<32>() {
165            let plc_opt = plc_codec::decode_plc(input_32);
166            if let Ok(did_inner) = plc_opt.try_into() {
167                return Ok(Did(did_inner));
168            }
169        }
170
171        let Some(method_ident) = input.strip_prefix("did:") else {
172            return Err(DidErrorKind::MissingDid.into());
173        };
174
175        let Some((method, ident)) = method_ident.split_once(':') else {
176            return Err(DidErrorKind::InvalidIdent.into());
177        };
178
179        match method {
180            "plc" => {
181                // did:plc is parsed eagerly
182                // If we got here, the DID was not 32 bytes long
183                // We already checked for "did:" and the "plc" method,
184                // so the identifier must be wrong
185                Err(DidErrorKind::InvalidIdent.into())
186            }
187            "web" => Self::from_web_domain(ident).ok_or(DidErrorKind::InvalidIdent.into()),
188            _ => validate_method_ident(method_ident)
189                .map(|_| Did(DidInner::Other(Box::new(DidOther(method_ident.to_owned()))))),
190        }
191    }
192}
193
194/// Which method does this [`Did`] represent?
195///
196/// See [`Did::method()`] to get the string representation (especially for [`DidKind::Other`]).
197///
198/// Atproto currently officially supports `did:plc` and `did:web`,
199/// but more methods may be supported in the future.
200#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
201#[non_exhaustive]
202pub enum DidKind {
203    Plc,
204    Web,
205    Other,
206}
207
208#[repr(u8)]
209enum DidInner {
210    /// 1-byte discriminant + 15 identifier bytes
211    Plc([u8; 15]),
212    /// Contains an unpacked [`DidWebDomain`]
213    Web { len: u8, ptr: *const u8 },
214    /// A single-word pointer to a heap-allocated string
215    Other(Box<DidOther>),
216}
217
218/// A simple String wrapper. Contains at least one `:` character
219/// (between the method and the identifier).
220#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
221struct DidOther(String);
222
223impl DidOther {
224    /// DID method, the part before the `:`
225    #[inline]
226    pub fn method_ident(&self) -> (&str, &str) {
227        self.0.split_once(':').unwrap()
228    }
229
230    /// The DID string without the `did:` prefix.
231    #[inline]
232    pub fn as_str(&self) -> &str {
233        self.as_ref()
234    }
235}
236
237impl AsRef<str> for DidOther {
238    fn as_ref(&self) -> &str {
239        &self.0
240    }
241}
242
243impl Drop for DidInner {
244    fn drop(&mut self) {
245        match self {
246            DidInner::Web { len, ptr } => {
247                // SAFETY: ptr and len come from an unpacked DidWebDomain
248                // did:web needs to run its special drop code
249                // This deallocates the memory at ptr!
250                _ = unsafe { DidWebDomain::from_raw_parts(*ptr, *len) };
251            }
252            DidInner::Plc(_) | DidInner::Other(_) => {
253                // plc and other don't require special drop logic
254            }
255        }
256    }
257}
258
259impl PartialEq for Did {
260    fn eq(&self, other: &Self) -> bool {
261        match (&self.0, &other.0) {
262            (DidInner::Plc(a), DidInner::Plc(b)) => a == b,
263            (
264                DidInner::Web { len: len_a, ptr: ptr_a },
265                DidInner::Web { len: len_b, ptr: ptr_b },
266            ) => {
267                // SAFETY: ptr and len come from an unpacked DidWebDomain
268                // ManuallyDrop prevents dropping the contents
269                let a = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_a, *len_a)) };
270                let b = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_b, *len_b)) };
271                a.eq(&b)
272            }
273            (DidInner::Other(a), DidInner::Other(b)) => a.eq(b),
274            _ => false,
275        }
276    }
277}
278
279impl Eq for Did {}
280
281impl PartialOrd for Did {
282    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
283        Some(self.cmp(other))
284    }
285}
286
287impl Ord for Did {
288    fn cmp(&self, other: &Self) -> Ordering {
289        match (self, other) {
290            // plc & plc -> compare bytes
291            (Did(DidInner::Plc(a)), Did(DidInner::Plc(b))) => a.cmp(b),
292            // web & web -> compare domains
293            (
294                Did(DidInner::Web { len: len_a, ptr: ptr_a }),
295                Did(DidInner::Web { len: len_b, ptr: ptr_b }),
296            ) => {
297                // SAFETY: ptr and len come from an unpacked DidWebDomain
298                // ManuallyDrop prevents dropping the contents
299                let a = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_a, *len_a)) };
300                let b = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr_b, *len_b)) };
301                a.cmp(&b)
302            }
303            // other & other -> compare methods and identifiers
304            (Did(DidInner::Other(a)), Did(DidInner::Other(b))) => a.cmp(b),
305            // different kinds -> just compare the method
306            (a, b) => {
307                debug_assert_ne!(a.kind(), b.kind());
308                a.method().cmp(b.method())
309            }
310        }
311    }
312}
313
314impl Hash for Did {
315    fn hash<H: Hasher>(&self, state: &mut H) {
316        match &self.0 {
317            DidInner::Plc(bytes) => {
318                state.write(b"plc");
319                bytes.hash(state);
320            }
321            DidInner::Web { ptr, len } => {
322                state.write(b"web");
323                let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
324                web.hash(state);
325            }
326            DidInner::Other(other) => {
327                other.hash(state);
328            }
329        }
330    }
331}
332
333impl Clone for Did {
334    fn clone(&self) -> Self {
335        Did(match &self.0 {
336            DidInner::Plc(data) => DidInner::Plc(*data),
337            DidInner::Web { len, ptr } => {
338                // SAFETY: ptr and len come from an unpacked DidWebDomain
339                // ManuallyDrop prevents dropping the contents
340                // The implementation of DidWebDomain::clone is used
341                let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
342                let web = web.clone();
343                // into_raw_parts doesn't drop the data
344                let (ptr, len) = ManuallyDrop::into_inner(web).into_raw_parts();
345                DidInner::Web { len, ptr }
346            }
347            DidInner::Other(other) => DidInner::Other(Box::clone(other)),
348        })
349    }
350}
351
352impl Debug for Did {
353    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
354        match &self.0 {
355            DidInner::Plc(data) => f.debug_tuple("DidInner::Plc").field(data).finish(),
356            DidInner::Web { len, ptr } => {
357                // SAFETY: ptr and len come from an unpacked DidWebDomain
358                // ManuallyDrop prevents dropping the contents
359                let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
360                f.debug_tuple("DidInner::Web").field(&*web).finish()
361            }
362            DidInner::Other(other) => f.debug_tuple("DidInner::Other").field(other).finish(),
363        }
364    }
365}
366
367impl Display for Did {
368    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
369        match &self.0 {
370            DidInner::Plc(data) => {
371                // A custom ToString impl is currently not possible
372                // trying to introduce more unsafe here didn't yield any significant perf+
373                // Adding more unsafe for negligible gains doesn't feel worth it
374                let mut bytes = vec![0u8; 32];
375                plc_codec::encode_plc(DidInner::Plc(*data), bytes.as_mut_array::<32>().unwrap());
376                let string = String::from_utf8(bytes).expect("Encoded value should be UTF-8");
377                write!(f, "{string}")
378            }
379            DidInner::Web { len, ptr } => {
380                // SAFETY: ptr and len come from an unpacked DidWebDomain
381                // ManuallyDrop prevents dropping the contents
382                let web = unsafe { ManuallyDrop::new(DidWebDomain::from_raw_parts(*ptr, *len)) };
383                write!(f, "did:web:{}", web.as_str())
384            }
385            DidInner::Other(other) => {
386                write!(f, "did:{}", other.as_str())
387            }
388        }
389    }
390}
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395
396    #[test]
397    fn did_type_size() {
398        assert_eq!(size_of::<Did>(), 16);
399    }
400
401    fn did_syntax_valid_lines() -> impl Iterator<Item = &'static str> {
402        include_str!("./did_syntax_valid.txt")
403            .lines()
404            .filter(|l| !l.is_empty() && !l.starts_with('#'))
405    }
406
407    fn did_syntax_invalid_lines() -> impl Iterator<Item = &'static str> {
408        include_str!("./did_syntax_invalid.txt")
409            .lines()
410            .filter(|l| !l.is_empty() && !l.starts_with('#'))
411    }
412
413    #[test_case::test_case("did:plc:c6te24qg5hx54qgegqylpqkx" => [0x17, 0xa6, 0x4d, 0x72, 0x06, 0xe9, 0xef, 0xde, 0x40, 0xc4, 0x34, 0x30, 0xb7, 0xc1, 0x57]
414    )]
415    #[test_case::test_case("did:plc:abcdefghijklmnopqrstuvwx" => [0x00, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6, 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7]
416    )]
417    #[test_case::test_case("did:plc:abcdefghabcdefghabcdefgh" => [0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7]
418    )]
419    #[test_case::test_case("did:plc:234567234567234567234567" => [0xd6, 0xf9, 0xdf, 0x7f, 0x5b, 0xe7, 0x7d, 0xfd, 0x6f, 0x9d, 0xf7, 0xf5, 0xbe, 0x77, 0xdf]
420    )]
421    fn plc_str_to_bytes_ok(ident: &str) -> [u8; 15] {
422        let DidInner::Plc(bytes) = Did::from_str(ident).unwrap().0 else { panic!() };
423        bytes
424    }
425
426    #[test_case::test_case(&[0x17u8, 0xa6, 0x4d, 0x72, 0x06, 0xe9, 0xef, 0xde, 0x40, 0xc4, 0x34, 0x30, 0xb7, 0xc1, 0x57] => "did:plc:c6te24qg5hx54qgegqylpqkx"
427    )]
428    #[test_case::test_case(&[0x00u8, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6, 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7] => "did:plc:abcdefghijklmnopqrstuvwx"
429    )]
430    #[test_case::test_case(&[0x00u8, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7, 0x00, 0x44, 0x32, 0x14, 0xc7] => "did:plc:abcdefghabcdefghabcdefgh"
431    )]
432    #[test_case::test_case(&[0xd6u8, 0xf9, 0xdf, 0x7f, 0x5b, 0xe7, 0x7d, 0xfd, 0x6f, 0x9d, 0xf7, 0xf5, 0xbe, 0x77, 0xdf] => "did:plc:234567234567234567234567"
433    )]
434    fn bytes_to_plc_str_ok(bytes: &[u8; 15]) -> String {
435        Did(DidInner::Plc(bytes.to_owned())).to_string()
436    }
437
438    #[test]
439    fn did_syntax_valid() {
440        for l in did_syntax_valid_lines() {
441            Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
442        }
443    }
444
445    #[test]
446    fn did_syntax_invalid() {
447        for l in did_syntax_invalid_lines() {
448            Did::from_str(l).expect_err(&format!("Parsing {l} should have failed"));
449        }
450    }
451
452    #[test]
453    fn did_equals() {
454        for l in did_syntax_valid_lines() {
455            let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
456            let b = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
457            assert_eq!(a, b);
458        }
459    }
460
461    #[test]
462    fn did_clone_equals() {
463        for l in did_syntax_valid_lines() {
464            let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
465            let b = a.clone();
466            assert_eq!(a, b, "cloned item should be equal");
467        }
468    }
469
470    #[test]
471    fn did_clone_drop_safe() {
472        for l in did_syntax_valid_lines() {
473            let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
474            let b = a.clone();
475            drop(a);
476            assert_eq!(b.to_string(), l, "cloned item roundtrip should work after drop");
477
478            let a = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
479            let b = a.clone();
480            drop(b);
481            assert_eq!(a.to_string(), l, "original item roundtrip should work after drop");
482        }
483    }
484
485    #[test]
486    fn did_str_roundtrip() {
487        for l in did_syntax_valid_lines() {
488            let did = Did::from_str(l).unwrap_or_else(|_| panic!("Parsing {l} failed"));
489            assert_eq!(did.to_string(), l);
490        }
491    }
492}