Skip to main content

proto_blue_syntax/
tid.rs

1//! TID (Timestamp Identifier) validation and types.
2//!
3//! TIDs are 13-character base32-sortable timestamp identifiers.
4//! See: <https://atproto.com/specs/record-key#record-key-type-tid>
5
6use regex::Regex;
7use std::fmt;
8use std::str::FromStr;
9
10/// Exact length of a TID.
11const TID_LENGTH: usize = 13;
12
13/// Base32-sortable alphabet used by TIDs.
14const S32_CHARSET: &[u8] = b"234567abcdefghijklmnopqrstuvwxyz";
15
16static TID_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
17    Regex::new(r"^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$").unwrap()
18});
19
20/// A validated TID (Timestamp Identifier).
21///
22/// TIDs are exactly 13 characters from the base32-sortable alphabet.
23#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
24pub struct Tid(String);
25
26/// Error returned when a TID string is invalid.
27#[derive(Debug, Clone, thiserror::Error)]
28#[error("Invalid TID: {reason}")]
29pub struct InvalidTidError {
30    pub reason: String,
31}
32
33impl Tid {
34    /// Create a new `Tid` from a string, validating the format.
35    pub fn new(s: &str) -> Result<Self, InvalidTidError> {
36        ensure_valid_tid(s)?;
37        Ok(Self(s.to_string()))
38    }
39
40    /// Check whether a string is a valid TID.
41    #[must_use]
42    pub fn is_valid(s: &str) -> bool {
43        ensure_valid_tid(s).is_ok()
44    }
45
46    /// Return the inner string.
47    #[must_use]
48    pub fn as_str(&self) -> &str {
49        &self.0
50    }
51
52    /// Consume and return the inner string.
53    #[must_use]
54    pub fn into_inner(self) -> String {
55        self.0
56    }
57
58    /// Decode the TID to its underlying microsecond timestamp.
59    #[must_use]
60    pub fn timestamp_micros(&self) -> u64 {
61        s32_decode(&self.0[..11])
62    }
63
64    /// Encode a microsecond timestamp and clock ID into a TID.
65    #[must_use]
66    pub fn from_timestamp(timestamp_micros: u64, clock_id: u16) -> Self {
67        // Upper 53 bits: timestamp, lower 10 bits: clock_id
68        let tid_int = (timestamp_micros << 10) | (u64::from(clock_id) & 0x3FF);
69        // Ensure top bit is 0
70        let tid_int = tid_int & 0x7FFF_FFFF_FFFF_FFFF;
71        let encoded = s32_encode(tid_int);
72        Self(encoded)
73    }
74}
75
76/// Encode a u64 to a 13-char base32-sortable string.
77fn s32_encode(mut v: u64) -> String {
78    let mut out = [b'2'; TID_LENGTH];
79    for i in (0..TID_LENGTH).rev() {
80        out[i] = S32_CHARSET[(v & 0x1F) as usize];
81        v >>= 5;
82    }
83    String::from_utf8(out.to_vec()).unwrap()
84}
85
86/// Decode a base32-sortable string to a u64.
87fn s32_decode(s: &str) -> u64 {
88    let mut result: u64 = 0;
89    for byte in s.bytes() {
90        let val = match byte {
91            b'2'..=b'7' => byte - b'2',
92            b'a'..=b'z' => byte - b'a' + 6,
93            _ => 0,
94        };
95        result = (result << 5) | u64::from(val);
96    }
97    result
98}
99
100fn ensure_valid_tid(s: &str) -> Result<(), InvalidTidError> {
101    let err = |reason: &str| InvalidTidError {
102        reason: reason.to_string(),
103    };
104
105    if s.len() != TID_LENGTH {
106        return Err(err(&format!(
107            "TID must be exactly {} characters, got {}",
108            TID_LENGTH,
109            s.len()
110        )));
111    }
112
113    if !TID_REGEX.is_match(s) {
114        return Err(err(
115            "TID must match base32-sortable pattern (first char [234567abcdefghij], rest [234567abcdefghijklmnopqrstuvwxyz])",
116        ));
117    }
118
119    Ok(())
120}
121
122impl fmt::Display for Tid {
123    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124        f.write_str(&self.0)
125    }
126}
127
128impl FromStr for Tid {
129    type Err = InvalidTidError;
130    fn from_str(s: &str) -> Result<Self, Self::Err> {
131        Self::new(s)
132    }
133}
134
135impl AsRef<str> for Tid {
136    fn as_ref(&self) -> &str {
137        &self.0
138    }
139}
140
141impl serde::Serialize for Tid {
142    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
143        self.0.serialize(serializer)
144    }
145}
146
147impl<'de> serde::Deserialize<'de> for Tid {
148    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
149        let s = String::deserialize(deserializer)?;
150        Self::new(&s).map_err(serde::de::Error::custom)
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    #[test]
159    fn valid_tids() {
160        assert!(Tid::new("3jui7kd54zh2y").is_ok());
161        assert!(Tid::new("2222222222222").is_ok());
162        assert!(Tid::new("jzzzzzzzzzzzy").is_ok()); // 'j' is the last valid first char
163        assert!(Tid::new("kzzzzzzzzzzzy").is_err()); // 'k' is NOT valid as first char
164    }
165
166    #[test]
167    fn invalid_tids() {
168        assert!(Tid::new("").is_err());
169        assert!(Tid::new("too_short").is_err());
170        assert!(Tid::new("0000000000000").is_err()); // '0' not in charset
171        assert!(Tid::new("3jui7kd54zh2yX").is_err()); // too long
172    }
173
174    #[test]
175    fn length_check() {
176        assert!(Tid::new("abcdefghijklm").is_ok());
177        assert!(Tid::new("abcdefghijkl").is_err()); // 12 chars
178        assert!(Tid::new("abcdefghijklmn").is_err()); // 14 chars
179    }
180
181    #[test]
182    fn from_timestamp_roundtrip() {
183        let ts: u64 = 1_700_000_000_000_000; // microseconds
184        let clock_id: u16 = 42;
185        let tid = Tid::from_timestamp(ts, clock_id);
186        assert_eq!(tid.as_str().len(), 13);
187        assert!(Tid::is_valid(tid.as_str()));
188    }
189
190    #[test]
191    fn s32_encode_decode() {
192        let val: u64 = 12345678;
193        let encoded = s32_encode(val);
194        let decoded = s32_decode(&encoded);
195        assert_eq!(decoded, val);
196    }
197}