Skip to main content

id_forge/
ulid.rs

1//! # ULID generation
2//!
3//! Universally Unique Lexicographically Sortable Identifier per the
4//! [ULID spec]: 128 bits split as a 48-bit big-endian millisecond
5//! timestamp followed by 80 bits of randomness. Encoded as 26
6//! Crockford base32 characters that sort byte-wise in creation order.
7//!
8//! [ULID spec]: https://github.com/ulid/spec
9//!
10//! ```
11//! use id_forge::ulid::Ulid;
12//!
13//! let a = Ulid::new();
14//! let b = Ulid::new();
15//! assert_eq!(a.to_string().len(), 26);
16//! assert!(b > a);                                  // monotonic per process
17//! let parsed = Ulid::parse_str(&a.to_string()).unwrap();
18//! assert_eq!(a, parsed);
19//! ```
20//!
21//! ## Monotonicity
22//!
23//! Within a single process, two ULIDs generated in the same
24//! millisecond are guaranteed to be byte-wise ordered: the second one
25//! is the first one's 80-bit random suffix plus one. Across
26//! milliseconds, fresh randomness is drawn. This matches the
27//! "monotonic factory" guarantee in the spec.
28//!
29//! ## Randomness
30//!
31//! The 80-bit random suffix comes from the shared inline xoshiro256\*\*
32//! generator. It is fast and statistically strong but **not**
33//! cryptographically secure.
34
35use core::fmt;
36use std::cell::RefCell;
37use std::time::{SystemTime, UNIX_EPOCH};
38
39use crate::rng;
40
41/// Crockford base32 alphabet — the 32 characters ULID display uses.
42const ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";
43
44const RAND_MASK_80: u128 = (1u128 << 80) - 1;
45
46/// A 128-bit ULID.
47///
48/// Internally stored as 16 big-endian bytes: 6 bytes of millisecond
49/// timestamp followed by 10 bytes of randomness.
50///
51/// # Example
52///
53/// ```
54/// use id_forge::ulid::Ulid;
55///
56/// let id = Ulid::new();
57/// assert_eq!(id.to_string().len(), 26);
58/// ```
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
60pub struct Ulid([u8; 16]);
61
62impl Ulid {
63    /// The Nil ULID: all 128 bits set to zero.
64    ///
65    /// # Example
66    ///
67    /// ```
68    /// use id_forge::ulid::Ulid;
69    ///
70    /// assert_eq!(Ulid::nil().to_string(), "00000000000000000000000000");
71    /// ```
72    pub const fn nil() -> Self {
73        Self([0u8; 16])
74    }
75
76    /// The Max ULID: all 128 bits set to one — the largest value the
77    /// 26-character display can represent (`7ZZZ…` since the spec
78    /// reserves the top two bits of the leading character).
79    ///
80    /// # Example
81    ///
82    /// ```
83    /// use id_forge::ulid::Ulid;
84    ///
85    /// assert_eq!(Ulid::max().to_string(), "7ZZZZZZZZZZZZZZZZZZZZZZZZZ");
86    /// ```
87    pub const fn max() -> Self {
88        Self([0xff; 16])
89    }
90
91    /// Construct a new ULID at the current wall-clock millisecond.
92    ///
93    /// Two ULIDs minted in the same millisecond by the same process
94    /// are strictly ordered: the second one is the first one's random
95    /// suffix plus one. Across milliseconds, the randomness is freshly
96    /// drawn.
97    ///
98    /// # Example
99    ///
100    /// ```
101    /// use id_forge::ulid::Ulid;
102    ///
103    /// let a = Ulid::new();
104    /// let b = Ulid::new();
105    /// assert!(b > a);
106    /// ```
107    pub fn new() -> Self {
108        let ms = SystemTime::now()
109            .duration_since(UNIX_EPOCH)
110            .map(|d| d.as_millis() as u64)
111            .unwrap_or(0)
112            & ((1u64 << 48) - 1);
113        STATE.with(|cell| {
114            let mut st = cell.borrow_mut();
115            let rand = st.next_random(ms);
116            Self::pack(ms, rand)
117        })
118    }
119
120    /// Wrap a 16-byte big-endian representation as-is.
121    ///
122    /// # Example
123    ///
124    /// ```
125    /// use id_forge::ulid::Ulid;
126    ///
127    /// let id = Ulid::new();
128    /// let copy = Ulid::from_bytes(id.as_bytes());
129    /// assert_eq!(id, copy);
130    /// ```
131    pub const fn from_bytes(bytes: &[u8; 16]) -> Self {
132        Self(*bytes)
133    }
134
135    /// Return the raw 16-byte big-endian representation.
136    pub const fn as_bytes(&self) -> &[u8; 16] {
137        &self.0
138    }
139
140    /// Return the 48-bit millisecond timestamp prefix.
141    ///
142    /// # Example
143    ///
144    /// ```
145    /// use id_forge::ulid::Ulid;
146    ///
147    /// let id = Ulid::new();
148    /// assert!(id.timestamp_ms() > 0);
149    /// ```
150    pub const fn timestamp_ms(&self) -> u64 {
151        let b = &self.0;
152        ((b[0] as u64) << 40)
153            | ((b[1] as u64) << 32)
154            | ((b[2] as u64) << 24)
155            | ((b[3] as u64) << 16)
156            | ((b[4] as u64) << 8)
157            | (b[5] as u64)
158    }
159
160    /// Parse a 26-character Crockford base32 ULID, case-insensitive.
161    ///
162    /// Accepts the substitution rules from the spec: `I`, `L` decode
163    /// to `1`; `O` decodes to `0`. `U` is reserved and rejected.
164    /// Returns [`ParseError`] on length, character, or leading-bits
165    /// violations.
166    ///
167    /// # Example
168    ///
169    /// ```
170    /// use id_forge::ulid::Ulid;
171    ///
172    /// let id = Ulid::parse_str("01ARZ3NDEKTSV4RRFFQ69G5FAV").unwrap();
173    /// assert_eq!(id.to_string(), "01ARZ3NDEKTSV4RRFFQ69G5FAV");
174    /// ```
175    pub fn parse_str(input: &str) -> Result<Self, ParseError> {
176        let bytes = input.as_bytes();
177        if bytes.len() != 26 {
178            return Err(ParseError::InvalidLength(bytes.len()));
179        }
180        // Leading character must be 0-7 — only the low 3 bits are
181        // valid for a 128-bit value rendered in 130 bits.
182        let first = decode_char(bytes[0]).ok_or(ParseError::InvalidChar(0))?;
183        if first > 7 {
184            return Err(ParseError::Overflow);
185        }
186        let mut n: u128 = first as u128;
187        for (i, &c) in bytes.iter().enumerate().skip(1) {
188            let v = decode_char(c).ok_or(ParseError::InvalidChar(i))?;
189            n = (n << 5) | (v as u128);
190        }
191        Ok(Self(n.to_be_bytes()))
192    }
193
194    fn pack(ms: u64, rand: u128) -> Self {
195        let mut bytes = [0u8; 16];
196        let ms_bytes = ms.to_be_bytes();
197        bytes[0..6].copy_from_slice(&ms_bytes[2..8]);
198        let rand_bytes = rand.to_be_bytes();
199        bytes[6..16].copy_from_slice(&rand_bytes[6..16]);
200        Self(bytes)
201    }
202}
203
204impl Default for Ulid {
205    fn default() -> Self {
206        Self::nil()
207    }
208}
209
210impl fmt::Display for Ulid {
211    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
212        let n = u128::from_be_bytes(self.0);
213        let mut out = [0u8; 26];
214        for i in (0..26).rev() {
215            out[i] = ALPHABET[((n >> ((25 - i) * 5)) & 0x1f) as usize];
216        }
217        f.write_str(core::str::from_utf8(&out).unwrap_or(""))
218    }
219}
220
221impl core::str::FromStr for Ulid {
222    type Err = ParseError;
223    fn from_str(s: &str) -> Result<Self, Self::Err> {
224        Self::parse_str(s)
225    }
226}
227
228/// Error returned by [`Ulid::parse_str`].
229#[derive(Debug, Clone, Copy, PartialEq, Eq)]
230pub enum ParseError {
231    /// Input was not exactly 26 characters. The value is the actual length.
232    InvalidLength(usize),
233    /// A character at the given byte position is not in the Crockford
234    /// base32 alphabet (after applying the I/L/O/U substitution rules).
235    InvalidChar(usize),
236    /// The leading character encodes a value above 7, which would set
237    /// bits beyond the 128-bit ULID range.
238    Overflow,
239}
240
241impl fmt::Display for ParseError {
242    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243        match self {
244            Self::InvalidLength(n) => write!(f, "expected 26 characters, got {n}"),
245            Self::InvalidChar(p) => write!(f, "invalid Crockford base32 digit at position {p}"),
246            Self::Overflow => write!(f, "leading character exceeds 128-bit range"),
247        }
248    }
249}
250
251impl std::error::Error for ParseError {}
252
253#[inline]
254const fn decode_char(c: u8) -> Option<u8> {
255    match c {
256        b'0' | b'o' | b'O' => Some(0),
257        b'1' | b'i' | b'I' | b'l' | b'L' => Some(1),
258        b'2'..=b'9' => Some(c - b'0'),
259        b'A'..=b'H' => Some(c - b'A' + 10),
260        b'J' | b'K' => Some(c - b'A' + 10 - 1),
261        b'M' | b'N' => Some(c - b'A' + 10 - 2),
262        b'P'..=b'T' => Some(c - b'A' + 10 - 3),
263        b'V'..=b'Z' => Some(c - b'A' + 10 - 4),
264        b'a'..=b'h' => Some(c - b'a' + 10),
265        b'j' | b'k' => Some(c - b'a' + 10 - 1),
266        b'm' | b'n' => Some(c - b'a' + 10 - 2),
267        b'p'..=b't' => Some(c - b'a' + 10 - 3),
268        b'v'..=b'z' => Some(c - b'a' + 10 - 4),
269        _ => None,
270    }
271}
272
273// -------- Monotonic factory state --------
274
275thread_local! {
276    static STATE: RefCell<MonotonicState> = RefCell::new(MonotonicState::default());
277}
278
279#[derive(Default)]
280struct MonotonicState {
281    last_ms: u64,
282    last_rand: u128,
283}
284
285impl MonotonicState {
286    fn next_random(&mut self, ms: u64) -> u128 {
287        if ms == self.last_ms && self.last_rand != 0 {
288            let next = self.last_rand.wrapping_add(1) & RAND_MASK_80;
289            assert!(
290                next != 0,
291                "ulid: 80-bit monotonic counter overflowed in a single millisecond"
292            );
293            self.last_rand = next;
294        } else {
295            self.last_ms = ms;
296            let hi = rng::next_u64() as u128;
297            let lo = rng::next_u64() as u128;
298            // 80 random bits: 64 from `hi`, top 16 from `lo`.
299            self.last_rand = ((hi & 0xFFFF_FFFF_FFFF_FFFF) << 16) | ((lo >> 48) & 0xFFFF);
300        }
301        self.last_rand
302    }
303}
304
305#[cfg(test)]
306mod tests {
307    use super::*;
308    use std::collections::HashSet;
309
310    #[test]
311    fn display_length_26() {
312        assert_eq!(Ulid::new().to_string().len(), 26);
313    }
314
315    #[test]
316    fn two_calls_differ() {
317        assert_ne!(Ulid::new(), Ulid::new());
318    }
319
320    #[test]
321    fn monotonic_within_ms() {
322        let a = Ulid::new();
323        let b = Ulid::new();
324        assert!(b > a, "ULIDs in same ms must be strictly ordered");
325    }
326
327    #[test]
328    fn time_ordered_across_ms() {
329        let a = Ulid::new();
330        std::thread::sleep(std::time::Duration::from_millis(2));
331        let b = Ulid::new();
332        assert!(b > a);
333        assert!(b.timestamp_ms() > a.timestamp_ms());
334    }
335
336    #[test]
337    fn nil_and_max() {
338        assert_eq!(Ulid::nil().as_bytes(), &[0u8; 16]);
339        assert_eq!(Ulid::max().as_bytes(), &[0xffu8; 16]);
340        assert_eq!(Ulid::nil().to_string(), "00000000000000000000000000");
341        assert_eq!(Ulid::max().to_string(), "7ZZZZZZZZZZZZZZZZZZZZZZZZZ");
342    }
343
344    #[test]
345    fn default_is_nil() {
346        assert_eq!(Ulid::default(), Ulid::nil());
347    }
348
349    #[test]
350    fn parse_round_trip() {
351        // Spec example.
352        let s = "01ARZ3NDEKTSV4RRFFQ69G5FAV";
353        let id = Ulid::parse_str(s).unwrap();
354        assert_eq!(id.to_string(), s);
355    }
356
357    #[test]
358    fn parse_nil() {
359        let id = Ulid::parse_str("00000000000000000000000000").unwrap();
360        assert_eq!(id, Ulid::nil());
361    }
362
363    #[test]
364    fn parse_max() {
365        let id = Ulid::parse_str("7ZZZZZZZZZZZZZZZZZZZZZZZZZ").unwrap();
366        assert_eq!(id, Ulid::max());
367    }
368
369    #[test]
370    fn parse_lowercase() {
371        let id = Ulid::parse_str("01arz3ndektsv4rrffq69g5fav").unwrap();
372        assert_eq!(id.to_string(), "01ARZ3NDEKTSV4RRFFQ69G5FAV");
373    }
374
375    #[test]
376    fn parse_substitutions() {
377        // I, L map to 1; O maps to 0.
378        let a = Ulid::parse_str("0IIIIIIIIIIIIIIIIIIIIIIIII").unwrap();
379        let b = Ulid::parse_str("0LLLLLLLLLLLLLLLLLLLLLLLLL").unwrap();
380        let c = Ulid::parse_str("01111111111111111111111111").unwrap();
381        assert_eq!(a, b);
382        assert_eq!(a, c);
383        let z = Ulid::parse_str("OO000000000000000000000000").unwrap();
384        assert_eq!(z, Ulid::nil());
385    }
386
387    #[test]
388    fn parse_rejects_short() {
389        assert!(matches!(
390            Ulid::parse_str("abc"),
391            Err(ParseError::InvalidLength(3))
392        ));
393    }
394
395    #[test]
396    fn parse_rejects_long() {
397        assert!(matches!(
398            Ulid::parse_str("01ARZ3NDEKTSV4RRFFQ69G5FAVX"),
399            Err(ParseError::InvalidLength(27))
400        ));
401    }
402
403    #[test]
404    fn parse_rejects_u() {
405        // U is reserved.
406        assert!(matches!(
407            Ulid::parse_str("0UARZ3NDEKTSV4RRFFQ69G5FAV"),
408            Err(ParseError::InvalidChar(1))
409        ));
410    }
411
412    #[test]
413    fn parse_rejects_overflow_leading() {
414        // Leading char 8..=Z would set the 129th bit.
415        assert!(matches!(
416            Ulid::parse_str("80000000000000000000000000"),
417            Err(ParseError::Overflow)
418        ));
419        assert!(matches!(
420            Ulid::parse_str("Z0000000000000000000000000"),
421            Err(ParseError::Overflow)
422        ));
423    }
424
425    #[test]
426    fn from_str_works() {
427        let id: Ulid = "01ARZ3NDEKTSV4RRFFQ69G5FAV".parse().unwrap();
428        assert_eq!(id.to_string(), "01ARZ3NDEKTSV4RRFFQ69G5FAV");
429    }
430
431    #[test]
432    fn from_bytes_roundtrip() {
433        let id = Ulid::new();
434        assert_eq!(Ulid::from_bytes(id.as_bytes()), id);
435    }
436
437    #[test]
438    fn timestamp_decodes_known_prefix() {
439        // "01ARZ3NDEK" -> ms 0x01563E3AB5D3 (48 bits) in Crockford base32.
440        let id = Ulid::parse_str("01ARZ3NDEKTSV4RRFFQ69G5FAV").unwrap();
441        assert_eq!(id.timestamp_ms(), 0x0000_0156_3E3A_B5D3);
442    }
443
444    #[test]
445    fn timestamp_from_known_bytes() {
446        // Hand-packed ms = 0x0123_4567_89AB in the first 6 bytes.
447        let mut bytes = [0u8; 16];
448        bytes[0..6].copy_from_slice(&[0x01, 0x23, 0x45, 0x67, 0x89, 0xAB]);
449        let id = Ulid::from_bytes(&bytes);
450        assert_eq!(id.timestamp_ms(), 0x0000_0123_4567_89AB);
451    }
452
453    #[test]
454    fn many_unique() {
455        let mut set = HashSet::new();
456        for _ in 0..10_000 {
457            assert!(set.insert(Ulid::new()));
458        }
459    }
460
461    #[test]
462    fn monotonic_within_ms_burst() {
463        // 1000 in a row, all strictly increasing.
464        let mut prev = Ulid::new();
465        for _ in 0..1000 {
466            let cur = Ulid::new();
467            assert!(cur > prev);
468            prev = cur;
469        }
470    }
471}