Skip to main content

proto_blue_syntax/
tid.rs

1//! TID (Timestamp Identifier) validation and types.
2//!
3//! TIDs are 13-character base32-sortable timestamp identifiers.
4//! See: <https://atproto.com/specs/record-key#record-key-type-tid>
5
6use once_cell::sync::Lazy;
7use regex::Regex;
8use std::fmt;
9use std::str::FromStr;
10
11/// Exact length of a TID.
12const TID_LENGTH: usize = 13;
13
14/// Base32-sortable alphabet used by TIDs.
15const S32_CHARSET: &[u8] = b"234567abcdefghijklmnopqrstuvwxyz";
16
17static TID_REGEX: Lazy<Regex> = Lazy::new(|| {
18    Regex::new(r"^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$").unwrap()
19});
20
21/// A validated TID (Timestamp Identifier).
22///
23/// TIDs are exactly 13 characters from the base32-sortable alphabet.
24#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
25pub struct Tid(String);
26
27/// Error returned when a TID string is invalid.
28#[derive(Debug, Clone, thiserror::Error)]
29#[error("Invalid TID: {reason}")]
30pub struct InvalidTidError {
31    pub reason: String,
32}
33
34impl Tid {
35    /// Create a new `Tid` from a string, validating the format.
36    pub fn new(s: &str) -> Result<Self, InvalidTidError> {
37        ensure_valid_tid(s)?;
38        Ok(Tid(s.to_string()))
39    }
40
41    /// Check whether a string is a valid TID.
42    pub fn is_valid(s: &str) -> bool {
43        ensure_valid_tid(s).is_ok()
44    }
45
46    /// Return the inner string.
47    pub fn as_str(&self) -> &str {
48        &self.0
49    }
50
51    /// Consume and return the inner string.
52    pub fn into_inner(self) -> String {
53        self.0
54    }
55
56    /// Decode the TID to its underlying microsecond timestamp.
57    pub fn timestamp_micros(&self) -> u64 {
58        s32_decode(&self.0[..11])
59    }
60
61    /// Encode a microsecond timestamp and clock ID into a TID.
62    pub fn from_timestamp(timestamp_micros: u64, clock_id: u16) -> Self {
63        // Upper 53 bits: timestamp, lower 10 bits: clock_id
64        let tid_int = (timestamp_micros << 10) | (clock_id as u64 & 0x3FF);
65        // Ensure top bit is 0
66        let tid_int = tid_int & 0x7FFFFFFFFFFFFFFF;
67        let encoded = s32_encode(tid_int);
68        Tid(encoded)
69    }
70}
71
72/// Encode a u64 to a 13-char base32-sortable string.
73fn s32_encode(mut v: u64) -> String {
74    let mut out = [b'2'; TID_LENGTH];
75    for i in (0..TID_LENGTH).rev() {
76        out[i] = S32_CHARSET[(v & 0x1F) as usize];
77        v >>= 5;
78    }
79    String::from_utf8(out.to_vec()).unwrap()
80}
81
82/// Decode a base32-sortable string to a u64.
83fn s32_decode(s: &str) -> u64 {
84    let mut result: u64 = 0;
85    for byte in s.bytes() {
86        let val = match byte {
87            b'2'..=b'7' => byte - b'2',
88            b'a'..=b'z' => byte - b'a' + 6,
89            _ => 0,
90        };
91        result = (result << 5) | val as u64;
92    }
93    result
94}
95
96fn ensure_valid_tid(s: &str) -> Result<(), InvalidTidError> {
97    let err = |reason: &str| InvalidTidError {
98        reason: reason.to_string(),
99    };
100
101    if s.len() != TID_LENGTH {
102        return Err(err(&format!(
103            "TID must be exactly {} characters, got {}",
104            TID_LENGTH,
105            s.len()
106        )));
107    }
108
109    if !TID_REGEX.is_match(s) {
110        return Err(err(
111            "TID must match base32-sortable pattern (first char [234567abcdefghij], rest [234567abcdefghijklmnopqrstuvwxyz])",
112        ));
113    }
114
115    Ok(())
116}
117
118impl fmt::Display for Tid {
119    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
120        f.write_str(&self.0)
121    }
122}
123
124impl FromStr for Tid {
125    type Err = InvalidTidError;
126    fn from_str(s: &str) -> Result<Self, Self::Err> {
127        Tid::new(s)
128    }
129}
130
131impl AsRef<str> for Tid {
132    fn as_ref(&self) -> &str {
133        &self.0
134    }
135}
136
137impl serde::Serialize for Tid {
138    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
139        self.0.serialize(serializer)
140    }
141}
142
143impl<'de> serde::Deserialize<'de> for Tid {
144    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
145        let s = String::deserialize(deserializer)?;
146        Tid::new(&s).map_err(serde::de::Error::custom)
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn valid_tids() {
156        assert!(Tid::new("3jui7kd54zh2y").is_ok());
157        assert!(Tid::new("2222222222222").is_ok());
158        assert!(Tid::new("jzzzzzzzzzzzy").is_ok()); // 'j' is the last valid first char
159        assert!(Tid::new("kzzzzzzzzzzzy").is_err()); // 'k' is NOT valid as first char
160    }
161
162    #[test]
163    fn invalid_tids() {
164        assert!(Tid::new("").is_err());
165        assert!(Tid::new("too_short").is_err());
166        assert!(Tid::new("0000000000000").is_err()); // '0' not in charset
167        assert!(Tid::new("3jui7kd54zh2yX").is_err()); // too long
168    }
169
170    #[test]
171    fn length_check() {
172        assert!(Tid::new("abcdefghijklm").is_ok());
173        assert!(Tid::new("abcdefghijkl").is_err()); // 12 chars
174        assert!(Tid::new("abcdefghijklmn").is_err()); // 14 chars
175    }
176
177    #[test]
178    fn from_timestamp_roundtrip() {
179        let ts: u64 = 1_700_000_000_000_000; // microseconds
180        let clock_id: u16 = 42;
181        let tid = Tid::from_timestamp(ts, clock_id);
182        assert_eq!(tid.as_str().len(), 13);
183        assert!(Tid::is_valid(tid.as_str()));
184    }
185
186    #[test]
187    fn s32_encode_decode() {
188        let val: u64 = 12345678;
189        let encoded = s32_encode(val);
190        let decoded = s32_decode(&encoded);
191        assert_eq!(decoded, val);
192    }
193}