Skip to main content

jacquard_common/types/
tid.rs

1use alloc::borrow::ToOwned;
2use alloc::string::{String, ToString};
3use core::fmt;
4use core::ops::Deref;
5use core::str::FromStr;
6
7use serde::{Deserialize, Deserializer, Serialize, de::Error};
8use smol_str::{SmolStr, SmolStrBuilder};
9
10use super::Lazy;
11
12use crate::CowStr;
13use crate::types::integer::LimitedU32;
14use crate::types::string::{AtStrError, StrParseKind};
15#[cfg(all(not(target_arch = "wasm32"), feature = "std"))]
16use regex::Regex;
17#[cfg(all(not(target_arch = "wasm32"), not(feature = "std")))]
18use regex_automata::meta::Regex;
19#[cfg(target_arch = "wasm32")]
20use regex_lite::Regex;
21
22const S32_CHAR: &str = "234567abcdefghijklmnopqrstuvwxyz";
23
24fn s32_encode(mut i: u64) -> SmolStr {
25    let mut s = SmolStrBuilder::new();
26    for _ in 0..13 {
27        let c = i & 0x1F;
28        s.push(S32_CHAR.chars().nth(c as usize).unwrap());
29
30        i >>= 5;
31    }
32
33    let mut builder = SmolStrBuilder::new();
34    for c in s.finish().chars().rev() {
35        builder.push(c);
36    }
37    builder.finish()
38}
39
40/// Regex for TID validation per AT Protocol spec
41static TID_REGEX: Lazy<Regex> = Lazy::new(|| {
42    Regex::new(r"^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$").unwrap()
43});
44
45/// Timestamp Identifier (TID) for record keys and commit revisions
46///
47/// TIDs are compact, sortable identifiers based on timestamps. They're used as record keys
48/// and repository commit revision numbers in AT Protocol.
49///
50/// Format:
51/// - Always 13 ASCII characters
52/// - Base32-sortable encoding (`234567abcdefghijklmnopqrstuvwxyz`)
53/// - First 53 bits: microseconds since UNIX epoch
54/// - Final 10 bits: random clock identifier for collision resistance
55///
56/// TIDs are sortable by timestamp and suitable for use in URLs. Generate new TIDs with
57/// `Tid::now()` or `Tid::now_with_clock_id()`.
58///
59/// See: <https://atproto.com/specs/tid>
60#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
61#[serde(transparent)]
62#[repr(transparent)]
63pub struct Tid(SmolStr);
64
65impl Tid {
66    /// Parses a `TID` from the given string.
67    pub fn new(tid: impl AsRef<str>) -> Result<Self, AtStrError> {
68        let tid = tid.as_ref();
69        if tid.len() != 13 {
70            let kind = if tid.len() > 13 {
71                StrParseKind::TooLong {
72                    max: 13,
73                    actual: tid.len(),
74                }
75            } else {
76                StrParseKind::TooShort {
77                    min: 13,
78                    actual: tid.len(),
79                }
80            };
81            Err(AtStrError::new("tid", tid.to_string(), kind))
82        } else if !TID_REGEX.is_match(&tid.as_ref()) {
83            let kind = StrParseKind::RegexFail {
84                span: None,
85                message: SmolStr::new_static("didn't match schema"),
86            };
87            Err(AtStrError::new("tid", tid.to_string(), kind))
88        } else {
89            Ok(Self(SmolStr::new_inline(&tid)))
90        }
91    }
92
93    /// Infallible constructor for when you *know* the string is a valid TID.
94    /// Will panic on invalid TID. If you're manually decoding atproto records
95    /// or API values you know are valid (rather than using serde), this is the one to use.
96    /// The `From<String>` and `From<CowStr>` impls use the same logic.
97    pub fn raw(tid: impl AsRef<str>) -> Self {
98        let tid = tid.as_ref();
99        if tid.len() != 13 {
100            panic!("TID must be 13 characters")
101        } else if !TID_REGEX.is_match(&tid) {
102            panic!("Invalid TID")
103        } else {
104            Self(SmolStr::new_inline(tid))
105        }
106    }
107
108    /// Infallible constructor for when you *know* the string is a valid TID.
109    /// Marked unsafe because responsibility for upholding the invariant is on the developer.
110    pub unsafe fn unchecked(tid: impl AsRef<str>) -> Self {
111        let tid = tid.as_ref();
112        Self(SmolStr::new_inline(tid))
113    }
114
115    /// Construct a new timestamp with the specified clock ID.
116    ///
117    /// If you have multiple clock sources, you can use `clkid` to distinguish between them
118    /// and hint to other implementations that the timestamp cannot be compared with other
119    /// timestamps from other sources.
120    /// If you are only using a single clock source, you can just specify `0` for `clkid`.
121    pub fn from_datetime(clkid: LimitedU32<1023>, time: chrono::DateTime<chrono::Utc>) -> Self {
122        let time = time.timestamp_micros() as u64;
123
124        // The TID is laid out as follows:
125        // 0TTTTTTTTTTTTTTT TTTTTTTTTTTTTTTT TTTTTTTTTTTTTTTT TTTTTTCCCCCCCCCC
126        let tid = (time << 10) & 0x7FFF_FFFF_FFFF_FC00 | (Into::<u32>::into(clkid) as u64 & 0x3FF);
127        Self(s32_encode(tid))
128    }
129
130    /// Construct a TID from a timestamp (in microseconds) and clock ID
131    pub fn from_time(timestamp: u64, clkid: u32) -> Self {
132        // Combine timestamp and clock ID into single u64: 53 bits timestamp + 10 bits clock ID
133        // 0TTTTTTTTTTTTTTT TTTTTTTTTTTTTTTT TTTTTTTTTTTTTTTT TTTTTTCCCCCCCCCC
134        let tid = (timestamp << 10) & 0x7FFF_FFFF_FFFF_FC00 | (clkid as u64 & 0x3FF);
135        Self(s32_encode(tid))
136    }
137
138    /// Extract the timestamp component (microseconds since UNIX epoch)
139    pub fn timestamp(&self) -> u64 {
140        s32decode(self.0[0..11].to_owned())
141    }
142
143    /// Compare two TIDs chronologically (newer > older)
144    ///
145    /// Returns 1 if self is newer, -1 if older, 0 if equal
146    pub fn compare_to(&self, other: &Tid) -> i8 {
147        if self.0 > other.0 {
148            return 1;
149        }
150        if self.0 < other.0 {
151            return -1;
152        }
153        0
154    }
155
156    /// Check if this TID is newer than another
157    pub fn newer_than(&self, other: &Tid) -> bool {
158        self.compare_to(other) > 0
159    }
160
161    /// Check if this TID is older than another
162    pub fn older_than(&self, other: &Tid) -> bool {
163        self.compare_to(other) < 0
164    }
165
166    /// Generate the next TID in sequence after the given TID
167    pub fn next_str(prev: Option<Tid>) -> Result<Self, AtStrError> {
168        let prev = match prev {
169            None => None,
170            Some(prev) => Some(Tid::new(prev)?),
171        };
172        Ok(Ticker::new().next(prev))
173    }
174
175    /// Construct a new [Tid] that represents the current time.
176    ///
177    /// If you have multiple clock sources, you can use `clkid` to distinguish between them
178    /// and hint to other implementations that the timestamp cannot be compared with other
179    /// timestamps from other sources.
180    /// If you are only using a single clock source, you can just specify `0` for `clkid`.
181    ///
182    /// TODO: fix to auto-increment if it would return the same value twice
183    pub fn now(clkid: LimitedU32<1023>) -> Self {
184        Self::from_datetime(clkid, chrono::Utc::now())
185    }
186
187    /// Construct a new [Tid] that represents the current time with clkid 0.
188    ///
189    /// TODO: fix to auto-increment if it would return the same value twice
190    pub fn now_0() -> Self {
191        Self::from_datetime(LimitedU32::from_str("0").unwrap(), chrono::Utc::now())
192    }
193
194    /// Returns the TID as a string slice.
195    pub fn as_str(&self) -> &str {
196        {
197            let this = &self.0;
198            this
199        }
200    }
201}
202
203/// Decode a base32-sortable string into a usize
204pub fn s32decode(s: String) -> u64 {
205    let mut i: usize = 0;
206    for c in s.chars() {
207        i = i * 32 + S32_CHAR.chars().position(|x| x == c).unwrap();
208    }
209    i as u64
210}
211
212impl FromStr for Tid {
213    type Err = AtStrError;
214
215    /// Has to take ownership due to the lifetime constraints of the FromStr trait.
216    /// Prefer `Did::new()` or `Did::raw` if you want to borrow.
217    fn from_str(s: &str) -> Result<Self, Self::Err> {
218        Self::new(s)
219    }
220}
221
222impl<'de> Deserialize<'de> for Tid {
223    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
224    where
225        D: Deserializer<'de>,
226    {
227        let value: &str = Deserialize::deserialize(deserializer)?;
228        Self::new(value).map_err(D::Error::custom)
229    }
230}
231
232impl fmt::Display for Tid {
233    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
234        f.write_str(&self.0)
235    }
236}
237
238impl From<Tid> for String {
239    fn from(value: Tid) -> Self {
240        value.0.to_string()
241    }
242}
243
244impl From<Tid> for SmolStr {
245    fn from(value: Tid) -> Self {
246        value.0
247    }
248}
249
250impl crate::IntoStatic for Tid {
251    type Output = Tid;
252
253    fn into_static(self) -> Self::Output {
254        self
255    }
256}
257
258impl From<String> for Tid {
259    fn from(value: String) -> Self {
260        if value.len() != 13 {
261            panic!("TID must be 13 characters")
262        } else if !TID_REGEX.is_match(&value) {
263            panic!("Invalid TID")
264        } else {
265            Self(SmolStr::new_inline(&value))
266        }
267    }
268}
269
270impl<'t> From<CowStr<'t>> for Tid {
271    fn from(value: CowStr<'t>) -> Self {
272        if value.len() != 13 {
273            panic!("TID must be 13 characters")
274        } else if !TID_REGEX.is_match(&value) {
275            panic!("Invalid TID")
276        } else {
277            Self(SmolStr::new_inline(&value))
278        }
279    }
280}
281
282impl AsRef<str> for Tid {
283    fn as_ref(&self) -> &str {
284        self.as_str()
285    }
286}
287
288impl Deref for Tid {
289    type Target = str;
290
291    fn deref(&self) -> &Self::Target {
292        self.as_str()
293    }
294}
295
296/// Based on adenosine/adenosine/src/identifiers.rs
297/// TODO: clean up and normalize stuff between this and the stuff pulled from atrium
298pub struct Ticker {
299    last_timestamp: u64,
300    clock_id: u32,
301}
302
303impl Ticker {
304    /// Create a new TID generator with random clock ID
305    pub fn new() -> Self {
306        let mut ticker = Self {
307            last_timestamp: 0,
308            // mask to 10 bits
309            clock_id: rand::random::<u32>() & 0x03FF,
310        };
311        // prime the pump
312        ticker.next(None);
313        ticker
314    }
315
316    /// Generate the next TID, optionally ensuring it's after the given TID
317    pub fn next(&mut self, prev: Option<Tid>) -> Tid {
318        let now = chrono::Utc::now().timestamp_micros() as u64;
319        // mask to 53 bits
320        let now = now & 0x001FFFFFFFFFFFFF;
321        if now > self.last_timestamp {
322            self.last_timestamp = now;
323        } else {
324            self.last_timestamp += 1;
325        }
326        // 53 bits of millis
327        let micros = self.last_timestamp & 0x001FFFFFFFFFFFFF;
328        // 10 bits of clock ID
329        let clock_id = self.clock_id & 0x03FF;
330
331        let tid = Tid::from_time(micros, clock_id as u32);
332        match prev {
333            Some(ref prev) if tid.newer_than(prev) => tid,
334            Some(prev) => Tid::from_time(prev.timestamp() + 1, clock_id as u32),
335            None => tid,
336        }
337    }
338}
339
340impl Default for Ticker {
341    fn default() -> Self {
342        Self::new()
343    }
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349
350    #[test]
351    fn valid_tids() {
352        assert!(Tid::new("3jzfcijpj2z2a").is_ok());
353        assert!(Tid::new("2222222222222").is_ok());
354        assert!(Tid::new("j7777777777777").is_err()); // j is valid for first char but makes high bit set
355    }
356
357    #[test]
358    fn exact_length() {
359        assert!(Tid::new("3jzfcijpj2z2a").is_ok());
360        assert!(Tid::new("3jzfcijpj2z2").is_err()); // 12 chars
361        assert!(Tid::new("3jzfcijpj2z2aa").is_err()); // 14 chars
362    }
363
364    #[test]
365    fn first_char_constraint() {
366        // First char must be 2-7 or a-j (not k-z)
367        assert!(Tid::new("2222222222222").is_ok());
368        assert!(Tid::new("7777777777777").is_ok());
369        assert!(Tid::new("a222222222222").is_ok());
370        assert!(Tid::new("j222222222222").is_ok());
371        assert!(Tid::new("k222222222222").is_err());
372        assert!(Tid::new("z222222222222").is_err());
373    }
374
375    #[test]
376    fn remaining_chars_constraint() {
377        // Remaining 12 chars must be 2-7 or a-z
378        assert!(Tid::new("3abcdefghijkl").is_ok());
379        assert!(Tid::new("3zzzzzzzzzzzz").is_ok());
380        assert!(Tid::new("3222222222222").is_ok());
381        assert!(Tid::new("3777777777777").is_ok());
382    }
383
384    #[test]
385    fn disallowed_characters() {
386        assert!(Tid::new("3jzfcijpj2z2A").is_err()); // uppercase
387        assert!(Tid::new("3jzfcijpj2z21").is_err()); // 1 not allowed
388        assert!(Tid::new("3jzfcijpj2z28").is_err()); // 8 not allowed
389        assert!(Tid::new("3jzfcijpj2z2-").is_err()); // special char
390    }
391
392    #[test]
393    fn generation_and_comparison() {
394        let tid1 = Tid::now_0();
395        std::thread::sleep(std::time::Duration::from_micros(10));
396        let tid2 = Tid::now_0();
397
398        assert!(tid1.as_str().len() == 13);
399        assert!(tid2.as_str().len() == 13);
400        assert!(tid2.newer_than(&tid1));
401        assert!(tid1.older_than(&tid2));
402    }
403
404    #[test]
405    fn ticker_monotonic() {
406        let mut ticker = Ticker::new();
407        let tid1 = ticker.next(None);
408        let tid2 = ticker.next(Some(tid1.clone()));
409        let tid3 = ticker.next(Some(tid2.clone()));
410
411        assert!(tid2.newer_than(&tid1));
412        assert!(tid3.newer_than(&tid2));
413    }
414}