atrium_api/types/
string.rs

1//! Lexicon [string formats].
2//!
3//! [string formats]: https://atproto.com/specs/lexicon#string-formats
4
5use chrono::DurationRound;
6use ipld_core::cid;
7use langtag::{LanguageTag, LanguageTagBuf};
8use regex::Regex;
9use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer};
10use std::{cmp, ops::Deref, str::FromStr, sync::OnceLock};
11
12use super::LimitedU32;
13
14// Reference: https://github.com/bluesky-social/indigo/blob/9e3b84fdbb20ca4ac397a549e1c176b308f7a6e1/repo/tid.go#L11-L19
15fn s32_encode(mut i: u64) -> String {
16    const S32_CHAR: &[u8] = b"234567abcdefghijklmnopqrstuvwxyz";
17
18    let mut s = String::with_capacity(13);
19    for _ in 0..13 {
20        let c = i & 0x1F;
21        s.push(S32_CHAR[c as usize] as char);
22
23        i >>= 5;
24    }
25
26    // Reverse the string to convert it to big-endian format.
27    s.chars().rev().collect()
28}
29
30/// Common trait implementations for Lexicon string formats that are newtype wrappers
31/// around `String`.
32macro_rules! string_newtype {
33    ($name:ident) => {
34        impl FromStr for $name {
35            type Err = &'static str;
36
37            fn from_str(s: &str) -> Result<Self, Self::Err> {
38                Self::new(s.into())
39            }
40        }
41
42        impl<'de> Deserialize<'de> for $name {
43            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
44            where
45                D: Deserializer<'de>,
46            {
47                let value = Deserialize::deserialize(deserializer)?;
48                Self::new(value).map_err(D::Error::custom)
49            }
50        }
51
52        impl From<$name> for String {
53            fn from(value: $name) -> Self {
54                value.0
55            }
56        }
57
58        impl AsRef<str> for $name {
59            fn as_ref(&self) -> &str {
60                self.as_str()
61            }
62        }
63
64        impl Deref for $name {
65            type Target = str;
66
67            fn deref(&self) -> &Self::Target {
68                self.as_str()
69            }
70        }
71    };
72}
73
74/// An AT Protocol identifier.
75#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Hash)]
76#[serde(untagged)]
77pub enum AtIdentifier {
78    Did(Did),
79    Handle(Handle),
80}
81
82impl From<Did> for AtIdentifier {
83    fn from(did: Did) -> Self {
84        AtIdentifier::Did(did)
85    }
86}
87
88impl From<Handle> for AtIdentifier {
89    fn from(handle: Handle) -> Self {
90        AtIdentifier::Handle(handle)
91    }
92}
93
94impl FromStr for AtIdentifier {
95    type Err = &'static str;
96
97    fn from_str(s: &str) -> Result<Self, Self::Err> {
98        if let Ok(did) = s.parse() {
99            Ok(AtIdentifier::Did(did))
100        } else {
101            s.parse().map(AtIdentifier::Handle)
102        }
103    }
104}
105
106impl From<AtIdentifier> for String {
107    fn from(value: AtIdentifier) -> Self {
108        match value {
109            AtIdentifier::Did(did) => did.into(),
110            AtIdentifier::Handle(handle) => handle.into(),
111        }
112    }
113}
114
115impl AsRef<str> for AtIdentifier {
116    fn as_ref(&self) -> &str {
117        match self {
118            AtIdentifier::Did(did) => did.as_ref(),
119            AtIdentifier::Handle(handle) => handle.as_ref(),
120        }
121    }
122}
123
124/// A [CID in string format].
125///
126/// [CID in string format]: https://atproto.com/specs/data-model#link-and-cid-formats
127#[derive(Clone, Debug, PartialEq, Eq, Hash)]
128pub struct Cid(cid::Cid);
129
130impl Cid {
131    /// Prepares a CID for use as a Lexicon string.
132    pub fn new(cid: cid::Cid) -> Self {
133        Self(cid)
134    }
135}
136
137impl FromStr for Cid {
138    type Err = cid::Error;
139
140    fn from_str(s: &str) -> Result<Self, Self::Err> {
141        s.parse().map(Self)
142    }
143}
144
145impl<'de> Deserialize<'de> for Cid {
146    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
147    where
148        D: Deserializer<'de>,
149    {
150        let value: String = Deserialize::deserialize(deserializer)?;
151        Self::from_str(&value).map_err(D::Error::custom)
152    }
153}
154
155impl Serialize for Cid {
156    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
157    where
158        S: Serializer,
159    {
160        serializer.serialize_str(&self.0.to_string())
161    }
162}
163
164impl AsRef<cid::Cid> for Cid {
165    fn as_ref(&self) -> &cid::Cid {
166        &self.0
167    }
168}
169
170/// A Lexicon timestamp.
171#[derive(Clone, Debug, Eq)]
172pub struct Datetime {
173    /// Serialized form. Preserved during parsing to ensure round-trip re-serialization.
174    serialized: String,
175    /// Parsed form.
176    dt: chrono::DateTime<chrono::FixedOffset>,
177}
178
179impl PartialEq for Datetime {
180    fn eq(&self, other: &Self) -> bool {
181        self.dt == other.dt
182    }
183}
184
185impl Ord for Datetime {
186    fn cmp(&self, other: &Self) -> cmp::Ordering {
187        self.dt.cmp(&other.dt)
188    }
189}
190
191impl PartialOrd for Datetime {
192    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
193        Some(self.cmp(other))
194    }
195}
196
197impl Datetime {
198    /// Returns a `Datetime` which corresponds to the current date and time in UTC.
199    ///
200    /// The timestamp uses microsecond precision.
201    pub fn now() -> Self {
202        Self::new(chrono::Utc::now().fixed_offset())
203    }
204
205    /// Constructs a new Lexicon timestamp.
206    ///
207    /// The timestamp is rounded to microsecond precision.
208    pub fn new(dt: chrono::DateTime<chrono::FixedOffset>) -> Self {
209        let dt = dt
210            .duration_round(chrono::Duration::microseconds(1))
211            .expect("delta does not exceed limits");
212        // This serialization format is compatible with ISO 8601.
213        let serialized = dt.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
214        Self { serialized, dt }
215    }
216
217    /// Extracts a string slice containing the entire `Datetime`.
218    #[inline]
219    #[must_use]
220    pub fn as_str(&self) -> &str {
221        self.serialized.as_str()
222    }
223}
224
225impl FromStr for Datetime {
226    type Err = chrono::ParseError;
227
228    #[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
229    fn from_str(s: &str) -> Result<Self, Self::Err> {
230        // The `chrono` crate only supports RFC 3339 parsing, but Lexicon restricts
231        // datetimes to the subset that is also valid under ISO 8601. Apply a regex that
232        // validates enough of the relevant ISO 8601 format that the RFC 3339 parser can
233        // do the rest.
234        static RE_ISO_8601: OnceLock<Regex> = OnceLock::new();
235        if RE_ISO_8601
236            .get_or_init(|| Regex::new(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|(\+[0-9]{2}|\-[0-9][1-9]):[0-9]{2})$").unwrap())
237            .is_match(s)
238        {
239            let dt = chrono::DateTime::parse_from_rfc3339(s)?;
240            Ok(Self {
241                serialized: s.into(),
242                dt,
243            })
244        } else {
245            // Simulate an invalid `ParseError`.
246            Err(chrono::DateTime::parse_from_rfc3339("invalid").expect_err("invalid"))
247        }
248    }
249}
250
251impl<'de> Deserialize<'de> for Datetime {
252    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
253    where
254        D: Deserializer<'de>,
255    {
256        let value: String = Deserialize::deserialize(deserializer)?;
257        Self::from_str(&value).map_err(D::Error::custom)
258    }
259}
260
261impl Serialize for Datetime {
262    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
263    where
264        S: Serializer,
265    {
266        serializer.serialize_str(&self.serialized)
267    }
268}
269
270impl AsRef<chrono::DateTime<chrono::FixedOffset>> for Datetime {
271    fn as_ref(&self) -> &chrono::DateTime<chrono::FixedOffset> {
272        &self.dt
273    }
274}
275
276/// A generic [DID Identifier].
277///
278/// [DID Identifier]: https://atproto.com/specs/did
279#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
280#[serde(transparent)]
281pub struct Did(String);
282string_newtype!(Did);
283
284impl Did {
285    #[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
286    /// Parses a `Did` from the given string.
287    pub fn new(did: String) -> Result<Self, &'static str> {
288        static RE_DID: OnceLock<Regex> = OnceLock::new();
289
290        // https://atproto.com/specs/did#at-protocol-did-identifier-syntax
291        if did.len() > 2048 {
292            Err("DID too long")
293        } else if !RE_DID
294            .get_or_init(|| Regex::new(r"^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$").unwrap())
295            .is_match(&did)
296        {
297            Err("Invalid DID")
298        } else {
299            Ok(Self(did))
300        }
301    }
302
303    /// Returns the DID method.
304    pub fn method(&self) -> &str {
305        &self.0[..4 + self.0[4..].find(':').unwrap()]
306    }
307
308    /// Returns the DID as a string slice.
309    pub fn as_str(&self) -> &str {
310        self.0.as_str()
311    }
312}
313
314/// A [Handle Identifier].
315///
316/// [Handle Identifier]: https://atproto.com/specs/handle
317#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
318#[serde(transparent)]
319pub struct Handle(String);
320string_newtype!(Handle);
321
322impl Handle {
323    #[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
324    /// Parses a `Handle` from the given string.
325    pub fn new(handle: String) -> Result<Self, &'static str> {
326        static RE_HANDLE: OnceLock<Regex> = OnceLock::new();
327
328        // https://atproto.com/specs/handle#handle-identifier-syntax
329        if handle.len() > 253 {
330            Err("Handle too long")
331        } else if !RE_HANDLE
332            .get_or_init(|| Regex::new(r"^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$").unwrap())
333            .is_match(&handle)
334        {
335            Err("Invalid handle")
336        } else {
337            Ok(Self(handle))
338        }
339    }
340
341    /// Returns the handle as a string slice.
342    pub fn as_str(&self) -> &str {
343        self.0.as_str()
344    }
345}
346
347/// A [Namespaced Identifier].
348///
349/// [Namespaced Identifier]: https://atproto.com/specs/nsid
350#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
351#[serde(transparent)]
352pub struct Nsid(String);
353string_newtype!(Nsid);
354
355impl Nsid {
356    #[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
357    /// Parses an NSID from the given string.
358    pub fn new(nsid: String) -> Result<Self, &'static str> {
359        static RE_NSID: OnceLock<Regex> = OnceLock::new();
360
361        // https://atproto.com/specs/handle#handle-identifier-syntax
362        if nsid.len() > 317 {
363            Err("NSID too long")
364        } else if !RE_NSID
365            .get_or_init(|| Regex::new(r"^[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(\.[a-zA-Z][a-zA-Z0-9]{0,62})$").unwrap())
366            .is_match(&nsid)
367        {
368            Err("Invalid NSID")
369        } else {
370            Ok(Self(nsid))
371        }
372    }
373
374    /// Returns the domain authority part of the NSID.
375    pub fn domain_authority(&self) -> &str {
376        let split = self.0.rfind('.').expect("enforced by constructor");
377        &self.0[..split]
378    }
379
380    /// Returns the name segment of the NSID.
381    pub fn name(&self) -> &str {
382        let split = self.0.rfind('.').expect("enforced by constructor");
383        &self.0[split + 1..]
384    }
385
386    /// Returns the NSID as a string slice.
387    pub fn as_str(&self) -> &str {
388        self.0.as_str()
389    }
390}
391
392/// An [IETF Language Tag] string.
393///
394/// [IETF Language Tag]: https://en.wikipedia.org/wiki/IETF_language_tag
395#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Hash)]
396#[serde(transparent)]
397pub struct Language(LanguageTagBuf);
398
399impl Language {
400    /// Creates a new language tag by parsing the given string.
401    pub fn new(s: String) -> Result<Self, langtag::Error> {
402        LanguageTagBuf::new(s.into()).map(Self).map_err(|(e, _)| e)
403    }
404
405    /// Returns a [`LanguageTag`] referencing this tag.
406    #[inline]
407    pub fn as_ref(&self) -> LanguageTag<'_> {
408        self.0.as_ref()
409    }
410}
411
412impl FromStr for Language {
413    type Err = langtag::Error;
414
415    fn from_str(s: &str) -> Result<Self, Self::Err> {
416        Self::new(s.into())
417    }
418}
419
420impl Serialize for Language {
421    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
422    where
423        S: Serializer,
424    {
425        serializer.serialize_str(self.0.as_str())
426    }
427}
428
429/// A [Timestamp Identifier].
430///
431/// [Timestamp Identifier]: https://atproto.com/specs/tid
432#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
433#[serde(transparent)]
434pub struct Tid(String);
435string_newtype!(Tid);
436
437impl Tid {
438    #[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
439    /// Parses a `TID` from the given string.
440    pub fn new(tid: String) -> Result<Self, &'static str> {
441        static RE_TID: OnceLock<Regex> = OnceLock::new();
442
443        if tid.len() != 13 {
444            Err("TID must be 13 characters")
445        } else if !RE_TID
446            .get_or_init(|| {
447                Regex::new(r"^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$").unwrap()
448            })
449            .is_match(&tid)
450        {
451            Err("Invalid TID")
452        } else {
453            Ok(Self(tid))
454        }
455    }
456
457    /// Construct a new timestamp with the specified clock ID.
458    ///
459    /// If you have multiple clock sources, you can use `clkid` to distinguish between them
460    /// and hint to other implementations that the timestamp cannot be compared with other
461    /// timestamps from other sources.
462    /// If you are only using a single clock source, you can just specify `0` for `clkid`.
463    pub fn from_datetime(clkid: LimitedU32<1023>, time: chrono::DateTime<chrono::Utc>) -> Self {
464        let time = time.timestamp_micros() as u64;
465
466        // The TID is laid out as follows:
467        // 0TTTTTTTTTTTTTTT TTTTTTTTTTTTTTTT TTTTTTTTTTTTTTTT TTTTTTCCCCCCCCCC
468        let tid = (time << 10) & 0x7FFF_FFFF_FFFF_FC00 | (Into::<u32>::into(clkid) as u64 & 0x3FF);
469        Self(s32_encode(tid))
470    }
471
472    /// Construct a new [Tid] that represents the current time.
473    ///
474    /// If you have multiple clock sources, you can use `clkid` to distinguish between them
475    /// and hint to other implementations that the timestamp cannot be compared with other
476    /// timestamps from other sources.
477    /// If you are only using a single clock source, you can just specify `0` for `clkid`.
478    ///
479    /// _Warning:_ It's possible that this function will return the same time more than once.
480    /// If it's important that these values be unique, you will want to repeatedly call this
481    /// function until a different time is returned.
482    pub fn now(clkid: LimitedU32<1023>) -> Self {
483        Self::from_datetime(clkid, chrono::Utc::now())
484    }
485
486    /// Returns the TID as a string slice.
487    pub fn as_str(&self) -> &str {
488        self.0.as_str()
489    }
490}
491
492/// A record key (`rkey`) used to name and reference an individual record within the same
493/// collection of an atproto repository.
494#[derive(Debug, Clone, PartialEq, Eq, Serialize, Hash)]
495pub struct RecordKey(String);
496string_newtype!(RecordKey);
497
498impl RecordKey {
499    #[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
500    /// Parses a `Record Key` from the given string.
501    pub fn new(s: String) -> Result<Self, &'static str> {
502        static RE_RKEY: OnceLock<Regex> = OnceLock::new();
503
504        if [".", ".."].contains(&s.as_str()) {
505            Err("Disallowed rkey")
506        } else if !RE_RKEY
507            .get_or_init(|| Regex::new(r"^[a-zA-Z0-9.\-_:~]{1,512}$").unwrap())
508            .is_match(&s)
509        {
510            Err("Invalid rkey")
511        } else {
512            Ok(Self(s))
513        }
514    }
515
516    /// Returns the record key as a string slice.
517    pub fn as_str(&self) -> &str {
518        self.0.as_str()
519    }
520}
521
522#[cfg(test)]
523mod tests {
524    use serde_json::{from_str, to_string};
525
526    use super::*;
527
528    #[test]
529    fn valid_datetime() {
530        // From https://atproto.com/specs/lexicon#datetime
531        for valid in [
532            // preferred
533            "1985-04-12T23:20:50.123Z",
534            "1985-04-12T23:20:50.123456Z",
535            "1985-04-12T23:20:50.120Z",
536            "1985-04-12T23:20:50.120000Z",
537            // supported
538            "1985-04-12T23:20:50.12345678912345Z",
539            "1985-04-12T23:20:50Z",
540            "1985-04-12T23:20:50.0Z",
541            "1985-04-12T23:20:50.123+00:00",
542            "1985-04-12T23:20:50.123-07:00",
543        ] {
544            let json_valid = format!("\"{}\"", valid);
545            let res = from_str::<Datetime>(&json_valid);
546            assert!(res.is_ok(), "valid Datetime `{}` parsed as invalid", valid);
547            let dt = res.unwrap();
548            assert_eq!(to_string(&dt).unwrap(), json_valid);
549        }
550    }
551
552    #[test]
553    fn invalid_datetime() {
554        // From https://atproto.com/specs/lexicon#datetime
555        for invalid in [
556            "1985-04-12",
557            "1985-04-12T23:20Z",
558            "1985-04-12T23:20:5Z",
559            "1985-04-12T23:20:50.123",
560            "+001985-04-12T23:20:50.123Z",
561            "23:20:50.123Z",
562            "-1985-04-12T23:20:50.123Z",
563            "1985-4-12T23:20:50.123Z",
564            "01985-04-12T23:20:50.123Z",
565            "1985-04-12T23:20:50.123+00",
566            "1985-04-12T23:20:50.123+0000",
567            // ISO-8601 strict capitalization
568            "1985-04-12t23:20:50.123Z",
569            "1985-04-12T23:20:50.123z",
570            // RFC-3339, but not ISO-8601
571            "1985-04-12T23:20:50.123-00:00",
572            "1985-04-12 23:20:50.123Z",
573            // timezone is required
574            "1985-04-12T23:20:50.123",
575            // syntax looks ok, but datetime is not valid
576            "1985-04-12T23:99:50.123Z",
577            "1985-00-12T23:20:50.123Z",
578        ] {
579            assert!(
580                from_str::<Datetime>(&format!("\"{}\"", invalid)).is_err(),
581                "invalid Datetime `{}` parsed as valid",
582                invalid,
583            );
584        }
585    }
586
587    #[test]
588    fn datetime_round_trip() {
589        let dt = Datetime::now();
590        let encoded = to_string(&dt).unwrap();
591        assert_eq!(from_str::<Datetime>(&encoded).unwrap(), dt);
592    }
593
594    #[test]
595    fn valid_did() {
596        // From https://atproto.com/specs/did#examples
597        for valid in [
598            "did:plc:z72i7hdynmk6r22z27h6tvur",
599            "did:web:blueskyweb.xyz",
600            "did:method:val:two",
601            "did:m:v",
602            "did:method::::val",
603            "did:method:-:_:.",
604            "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N",
605        ] {
606            assert!(
607                from_str::<Did>(&format!("\"{}\"", valid)).is_ok(),
608                "valid DID `{}` parsed as invalid",
609                valid,
610            );
611        }
612    }
613
614    #[test]
615    fn invalid_did() {
616        // From https://atproto.com/specs/did#examples
617        for invalid in [
618            "did:METHOD:val",
619            "did:m123:val",
620            "DID:method:val",
621            "did:method:",
622            "did:method:val/two",
623            "did:method:val?two",
624            "did:method:val#two",
625        ] {
626            assert!(
627                from_str::<Did>(&format!("\"{}\"", invalid)).is_err(),
628                "invalid DID `{}` parsed as valid",
629                invalid,
630            );
631        }
632    }
633
634    #[test]
635    fn did_method() {
636        // From https://atproto.com/specs/did#examples
637        for (method, did) in [
638            ("did:plc", "did:plc:z72i7hdynmk6r22z27h6tvur"),
639            ("did:web", "did:web:blueskyweb.xyz"),
640            ("did:method", "did:method:val:two"),
641            ("did:m", "did:m:v"),
642            ("did:method", "did:method::::val"),
643            ("did:method", "did:method:-:_:."),
644            ("did:key", "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N"),
645        ] {
646            assert_eq!(Did::new(did.to_string()).unwrap().method(), method);
647        }
648    }
649
650    #[test]
651    fn valid_handle() {
652        // From https://atproto.com/specs/handle#identifier-examples
653        for valid in [
654            "jay.bsky.social",
655            "8.cn",
656            "name.t--t", // not a real TLD, but syntax ok
657            "XX.LCS.MIT.EDU",
658            "a.co",
659            "xn--notarealidn.com",
660            "xn--fiqa61au8b7zsevnm8ak20mc4a87e.xn--fiqs8s",
661            "xn--ls8h.test",
662            "example.t", // not a real TLD, but syntax ok
663            // Valid syntax, but must always fail resolution due to other restrictions:
664            "2gzyxa5ihm7nsggfxnu52rck2vv4rvmdlkiu3zzui5du4xyclen53wid.onion",
665            "laptop.local",
666            "blah.arpa",
667        ] {
668            assert!(
669                from_str::<Handle>(&format!("\"{}\"", valid)).is_ok(),
670                "valid handle `{}` parsed as invalid",
671                valid,
672            );
673        }
674    }
675
676    #[test]
677    fn invalid_handle() {
678        // From https://atproto.com/specs/handle#identifier-examples
679        for invalid in [
680            "jo@hn.test",
681            "💩.test",
682            "john..test",
683            "xn--bcher-.tld",
684            "john.0",
685            "cn.8",
686            "www.masełkowski.pl.com",
687            "org",
688            "name.org.",
689        ] {
690            assert!(
691                from_str::<Handle>(&format!("\"{}\"", invalid)).is_err(),
692                "invalid handle `{}` parsed as valid",
693                invalid,
694            );
695        }
696    }
697
698    #[test]
699    fn valid_nsid() {
700        // From https://atproto.com/specs/nsid#examples
701        for valid in [
702            "com.example.fooBar",
703            "net.users.bob.ping",
704            "a-0.b-1.c",
705            "a.b.c",
706            "com.example.fooBarV2",
707            "cn.8.lex.stuff",
708        ] {
709            assert!(
710                from_str::<Nsid>(&format!("\"{}\"", valid)).is_ok(),
711                "valid NSID `{}` parsed as invalid",
712                valid,
713            );
714        }
715    }
716
717    #[test]
718    fn invalid_nsid() {
719        // From https://atproto.com/specs/nsid#examples
720        for invalid in ["com.exa💩ple.thing", "com.example", "com.example.3"] {
721            assert!(
722                from_str::<Nsid>(&format!("\"{}\"", invalid)).is_err(),
723                "invalid NSID `{}` parsed as valid",
724                invalid,
725            );
726        }
727    }
728
729    #[test]
730    fn nsid_parts() {
731        // From https://atproto.com/specs/nsid#examples
732        for (nsid, domain_authority, name) in [
733            ("com.example.fooBar", "com.example", "fooBar"),
734            ("net.users.bob.ping", "net.users.bob", "ping"),
735            ("a-0.b-1.c", "a-0.b-1", "c"),
736            ("a.b.c", "a.b", "c"),
737            ("cn.8.lex.stuff", "cn.8.lex", "stuff"),
738        ] {
739            let nsid = Nsid::new(nsid.to_string()).unwrap();
740            assert_eq!(nsid.domain_authority(), domain_authority);
741            assert_eq!(nsid.name(), name);
742        }
743    }
744
745    #[test]
746    fn valid_language() {
747        // From https://www.rfc-editor.org/rfc/rfc5646.html#appendix-A
748        for valid in [
749            // Simple language subtag:
750            "de",         // German
751            "fr",         // French
752            "ja",         // Japanese
753            "i-enochian", // example of a grandfathered tag
754            // Language subtag plus Script subtag:
755            "zh-Hant", // Chinese written using the Traditional Chinese script
756            "zh-Hans", // Chinese written using the Simplified Chinese script
757            "sr-Cyrl", // Serbian written using the Cyrillic script
758            "sr-Latn", // Serbian written using the Latin script
759            // Extended language subtags and their primary language subtag counterparts:
760            "zh-cmn-Hans-CN", // Chinese, Mandarin, Simplified script, as used in China
761            "cmn-Hans-CN",    // Mandarin Chinese, Simplified script, as used in China
762            "zh-yue-HK",      // Chinese, Cantonese, as used in Hong Kong SAR
763            "yue-HK",         // Cantonese Chinese, as used in Hong Kong SAR
764            // Language-Script-Region:
765            "zh-Hans-CN", // Chinese written using the Simplified script as used in mainland China
766            "sr-Latn-RS", // Serbian written using the Latin script as used in Serbia
767            // Language-Variant:
768            "sl-rozaj",       // Resian dialect of Slovenian
769            "sl-rozaj-biske", // San Giorgio dialect of Resian dialect of Slovenian
770            "sl-nedis",       // Nadiza dialect of Slovenian
771            // Language-Region-Variant:
772            "de-CH-1901", // German as used in Switzerland using the 1901 variant orthography
773            "sl-IT-nedis", // Slovenian as used in Italy, Nadiza dialect
774            // Language-Script-Region-Variant:
775            "hy-Latn-IT-arevela", // Eastern Armenian written in Latin script, as used in Italy
776            // Language-Region:
777            "de-DE",  // German for Germany
778            "en-US",  // English as used in the United States
779            "es-419", // Spanish appropriate for the Latin America and Caribbean region using the UN region code
780            // Private use subtags:
781            "de-CH-x-phonebk",
782            "az-Arab-x-AZE-derbend",
783            // Private use registry values:
784            "x-whatever",             // private use using the singleton 'x'
785            "qaa-Qaaa-QM-x-southern", // all private tags
786            "de-Qaaa",                // German, with a private script
787            "sr-Latn-QM",             // Serbian, Latin script, private region
788            "sr-Qaaa-RS",             // Serbian, private script, for Serbia
789            // Tags that use extensions (examples ONLY -- extensions MUST be defined by RFC):
790            "en-US-u-islamcal",
791            "zh-CN-a-myext-x-private",
792            "en-a-myext-b-another",
793            // Invalid tags that are well-formed:
794            "ar-a-aaa-b-bbb-a-ccc", // two extensions with same single-letter prefix
795        ] {
796            let json_valid = format!("\"{}\"", valid);
797            let res = from_str::<Language>(&json_valid);
798            assert!(res.is_ok(), "valid language `{}` parsed as invalid", valid);
799            let dt = res.unwrap();
800            assert_eq!(to_string(&dt).unwrap(), json_valid);
801        }
802    }
803
804    #[test]
805    fn invalid_language() {
806        // From https://www.rfc-editor.org/rfc/rfc5646.html#appendix-A
807        for invalid in [
808            "de-419-DE", // two region tags
809            // use of a single-character subtag in primary position; note that there are a
810            // few grandfathered tags that start with "i-" that are valid
811            "a-DE",
812        ] {
813            assert!(
814                from_str::<Language>(&format!("\"{}\"", invalid)).is_err(),
815                "invalid language `{}` parsed as valid",
816                invalid,
817            );
818        }
819    }
820
821    #[test]
822    fn tid_encode() {
823        assert_eq!(s32_encode(0), "2222222222222");
824        assert_eq!(s32_encode(1), "2222222222223");
825    }
826
827    #[test]
828    fn tid_construct() {
829        let tid = Tid::from_datetime(
830            0.try_into().unwrap(),
831            chrono::DateTime::from_timestamp(1738430999, 0).unwrap(),
832        );
833        assert_eq!(tid.as_str(), "3lh5234mwy222");
834    }
835
836    #[test]
837    fn valid_tid() {
838        for valid in ["3jzfcijpj2z2a", "7777777777777", "3zzzzzzzzzzzz"] {
839            assert!(
840                from_str::<Tid>(&format!("\"{}\"", valid)).is_ok(),
841                "valid TID `{}` parsed as invalid",
842                valid,
843            );
844        }
845    }
846
847    #[test]
848    fn invalid_tid() {
849        for invalid in [
850            // not base32
851            "3jzfcijpj2z21",
852            "0000000000000",
853            // too long/short
854            "3jzfcijpj2z2aa",
855            "3jzfcijpj2z2",
856            // old dashes syntax not actually supported (TTTT-TTT-TTTT-CC)
857            "3jzf-cij-pj2z-2a",
858            // high bit can't be high
859            "zzzzzzzzzzzzz",
860            "kjzfcijpj2z2a",
861        ] {
862            assert!(
863                from_str::<Tid>(&format!("\"{}\"", invalid)).is_err(),
864                "invalid TID `{}` parsed as valid",
865                invalid,
866            );
867        }
868    }
869
870    #[test]
871    fn valid_rkey() {
872        // From https://atproto.com/specs/record-key#examples
873        for valid in [
874            "3jui7kd54zh2y",
875            "self",
876            "literal:self",
877            "example.com",
878            "~1.2-3_",
879            "dHJ1ZQ",
880            "pre:fix",
881            "_",
882        ] {
883            assert!(
884                from_str::<RecordKey>(&format!("\"{}\"", valid)).is_ok(),
885                "valid rkey `{}` parsed as invalid",
886                valid,
887            );
888        }
889    }
890
891    #[test]
892    fn invalid_rkey() {
893        // From https://atproto.com/specs/record-key#examples
894        for invalid in [
895            "alpha/beta",
896            ".",
897            "..",
898            "#extra",
899            "@handle",
900            "any space",
901            "any+space",
902            "number[3]",
903            "number(3)",
904            "\"quote\"",
905            "dHJ1ZQ==",
906        ] {
907            assert!(
908                from_str::<RecordKey>(&format!("\"{}\"", invalid)).is_err(),
909                "invalid rkey `{}` parsed as valid",
910                invalid,
911            );
912        }
913    }
914}