Skip to main content

id_forge/
ulid.rs

1//! # ULID generation
2//!
3//! Universally Unique Lexicographically Sortable Identifier per the
4//! [ULID spec]: 128 bits split as a 48-bit big-endian millisecond
5//! timestamp followed by 80 bits of randomness. Encoded as 26
6//! Crockford base32 characters that sort byte-wise in creation order.
7//!
8//! [ULID spec]: https://github.com/ulid/spec
9//!
10//! ```
11//! use id_forge::ulid::Ulid;
12//!
13//! let a = Ulid::new();
14//! let b = Ulid::new();
15//! assert_eq!(a.to_string().len(), 26);
16//! assert!(b > a);                                  // monotonic per process
17//! let parsed = Ulid::parse_str(&a.to_string()).unwrap();
18//! assert_eq!(a, parsed);
19//! ```
20//!
21//! ## Monotonicity
22//!
23//! Within a single process, two ULIDs generated in the same
24//! millisecond are guaranteed to be byte-wise ordered: the second one
25//! is the first one's 80-bit random suffix plus one. Across
26//! milliseconds, fresh randomness is drawn. This matches the
27//! "monotonic factory" guarantee in the spec.
28//!
29//! ## Randomness
30//!
31//! The 80-bit random suffix comes from the shared inline xoshiro256\*\*
32//! generator. It is fast and statistically strong but **not**
33//! cryptographically secure.
34
35use core::fmt;
36use std::cell::RefCell;
37use std::time::{SystemTime, UNIX_EPOCH};
38
39use crate::rng;
40
41/// Crockford base32 alphabet — the 32 characters ULID display uses.
42const ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";
43
44const RAND_MASK_80: u128 = (1u128 << 80) - 1;
45
46/// A 128-bit ULID.
47///
48/// Internally stored as 16 big-endian bytes: 6 bytes of millisecond
49/// timestamp followed by 10 bytes of randomness.
50///
51/// # Example
52///
53/// ```
54/// use id_forge::ulid::Ulid;
55///
56/// let id = Ulid::new();
57/// assert_eq!(id.to_string().len(), 26);
58/// ```
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
60pub struct Ulid([u8; 16]);
61
62impl Ulid {
63    /// The Nil ULID: all 128 bits set to zero.
64    ///
65    /// # Example
66    ///
67    /// ```
68    /// use id_forge::ulid::Ulid;
69    ///
70    /// assert_eq!(Ulid::nil().to_string(), "00000000000000000000000000");
71    /// ```
72    pub const fn nil() -> Self {
73        Self([0u8; 16])
74    }
75
76    /// The Max ULID: all 128 bits set to one — the largest value the
77    /// 26-character display can represent (`7ZZZ…` since the spec
78    /// reserves the top two bits of the leading character).
79    ///
80    /// # Example
81    ///
82    /// ```
83    /// use id_forge::ulid::Ulid;
84    ///
85    /// assert_eq!(Ulid::max().to_string(), "7ZZZZZZZZZZZZZZZZZZZZZZZZZ");
86    /// ```
87    pub const fn max() -> Self {
88        Self([0xff; 16])
89    }
90
91    /// Construct a new ULID at the current wall-clock millisecond.
92    ///
93    /// Two ULIDs minted in the same millisecond by the same process
94    /// are strictly ordered: the second one is the first one's random
95    /// suffix plus one. Across milliseconds, the randomness is freshly
96    /// drawn.
97    ///
98    /// # Example
99    ///
100    /// ```
101    /// use id_forge::ulid::Ulid;
102    ///
103    /// let a = Ulid::new();
104    /// let b = Ulid::new();
105    /// assert!(b > a);
106    /// ```
107    pub fn new() -> Self {
108        let ms = SystemTime::now()
109            .duration_since(UNIX_EPOCH)
110            .map(|d| d.as_millis() as u64)
111            .unwrap_or(0)
112            & ((1u64 << 48) - 1);
113        STATE.with(|cell| {
114            let mut st = cell.borrow_mut();
115            let rand = st.next_random(ms);
116            Self::pack(ms, rand)
117        })
118    }
119
120    /// Wrap a 16-byte big-endian representation as-is.
121    ///
122    /// # Example
123    ///
124    /// ```
125    /// use id_forge::ulid::Ulid;
126    ///
127    /// let id = Ulid::new();
128    /// let copy = Ulid::from_bytes(id.as_bytes());
129    /// assert_eq!(id, copy);
130    /// ```
131    pub const fn from_bytes(bytes: &[u8; 16]) -> Self {
132        Self(*bytes)
133    }
134
135    /// Return the raw 16-byte big-endian representation.
136    ///
137    /// # Example
138    ///
139    /// ```
140    /// use id_forge::ulid::Ulid;
141    ///
142    /// assert_eq!(Ulid::nil().as_bytes(), &[0u8; 16]);
143    /// ```
144    pub const fn as_bytes(&self) -> &[u8; 16] {
145        &self.0
146    }
147
148    /// Return the 48-bit millisecond timestamp prefix.
149    ///
150    /// # Example
151    ///
152    /// ```
153    /// use id_forge::ulid::Ulid;
154    ///
155    /// let id = Ulid::new();
156    /// assert!(id.timestamp_ms() > 0);
157    /// ```
158    pub const fn timestamp_ms(&self) -> u64 {
159        let b = &self.0;
160        ((b[0] as u64) << 40)
161            | ((b[1] as u64) << 32)
162            | ((b[2] as u64) << 24)
163            | ((b[3] as u64) << 16)
164            | ((b[4] as u64) << 8)
165            | (b[5] as u64)
166    }
167
168    /// Parse a 26-character Crockford base32 ULID, case-insensitive.
169    ///
170    /// Accepts the substitution rules from the spec: `I`, `L` decode
171    /// to `1`; `O` decodes to `0`. `U` is reserved and rejected.
172    /// Returns [`ParseError`] on length, character, or leading-bits
173    /// violations.
174    ///
175    /// # Example
176    ///
177    /// ```
178    /// use id_forge::ulid::Ulid;
179    ///
180    /// let id = Ulid::parse_str("01ARZ3NDEKTSV4RRFFQ69G5FAV").unwrap();
181    /// assert_eq!(id.to_string(), "01ARZ3NDEKTSV4RRFFQ69G5FAV");
182    /// ```
183    pub fn parse_str(input: &str) -> Result<Self, ParseError> {
184        let bytes = input.as_bytes();
185        if bytes.len() != 26 {
186            return Err(ParseError::InvalidLength(bytes.len()));
187        }
188        // Leading character must be 0-7 — only the low 3 bits are
189        // valid for a 128-bit value rendered in 130 bits.
190        let first = decode_char(bytes[0]).ok_or(ParseError::InvalidChar(0))?;
191        if first > 7 {
192            return Err(ParseError::Overflow);
193        }
194        let mut n: u128 = first as u128;
195        for (i, &c) in bytes.iter().enumerate().skip(1) {
196            let v = decode_char(c).ok_or(ParseError::InvalidChar(i))?;
197            n = (n << 5) | (v as u128);
198        }
199        Ok(Self(n.to_be_bytes()))
200    }
201
202    fn pack(ms: u64, rand: u128) -> Self {
203        let mut bytes = [0u8; 16];
204        let ms_bytes = ms.to_be_bytes();
205        bytes[0..6].copy_from_slice(&ms_bytes[2..8]);
206        let rand_bytes = rand.to_be_bytes();
207        bytes[6..16].copy_from_slice(&rand_bytes[6..16]);
208        Self(bytes)
209    }
210}
211
212impl Default for Ulid {
213    fn default() -> Self {
214        Self::nil()
215    }
216}
217
218impl fmt::Display for Ulid {
219    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
220        let n = u128::from_be_bytes(self.0);
221        let mut out = [0u8; 26];
222        for i in (0..26).rev() {
223            out[i] = ALPHABET[((n >> ((25 - i) * 5)) & 0x1f) as usize];
224        }
225        f.write_str(core::str::from_utf8(&out).unwrap_or(""))
226    }
227}
228
229impl core::str::FromStr for Ulid {
230    type Err = ParseError;
231    fn from_str(s: &str) -> Result<Self, Self::Err> {
232        Self::parse_str(s)
233    }
234}
235
236/// Error returned by [`Ulid::parse_str`].
237#[derive(Debug, Clone, Copy, PartialEq, Eq)]
238pub enum ParseError {
239    /// Input was not exactly 26 characters. The value is the actual length.
240    InvalidLength(usize),
241    /// A character at the given byte position is not in the Crockford
242    /// base32 alphabet (after applying the I/L/O/U substitution rules).
243    InvalidChar(usize),
244    /// The leading character encodes a value above 7, which would set
245    /// bits beyond the 128-bit ULID range.
246    Overflow,
247}
248
249impl fmt::Display for ParseError {
250    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
251        match self {
252            Self::InvalidLength(n) => write!(f, "expected 26 characters, got {n}"),
253            Self::InvalidChar(p) => write!(f, "invalid Crockford base32 digit at position {p}"),
254            Self::Overflow => write!(f, "leading character exceeds 128-bit range"),
255        }
256    }
257}
258
259impl std::error::Error for ParseError {}
260
261#[inline]
262const fn decode_char(c: u8) -> Option<u8> {
263    match c {
264        b'0' | b'o' | b'O' => Some(0),
265        b'1' | b'i' | b'I' | b'l' | b'L' => Some(1),
266        b'2'..=b'9' => Some(c - b'0'),
267        b'A'..=b'H' => Some(c - b'A' + 10),
268        b'J' | b'K' => Some(c - b'A' + 10 - 1),
269        b'M' | b'N' => Some(c - b'A' + 10 - 2),
270        b'P'..=b'T' => Some(c - b'A' + 10 - 3),
271        b'V'..=b'Z' => Some(c - b'A' + 10 - 4),
272        b'a'..=b'h' => Some(c - b'a' + 10),
273        b'j' | b'k' => Some(c - b'a' + 10 - 1),
274        b'm' | b'n' => Some(c - b'a' + 10 - 2),
275        b'p'..=b't' => Some(c - b'a' + 10 - 3),
276        b'v'..=b'z' => Some(c - b'a' + 10 - 4),
277        _ => None,
278    }
279}
280
281// -------- Monotonic factory state --------
282
283thread_local! {
284    static STATE: RefCell<MonotonicState> = RefCell::new(MonotonicState::default());
285}
286
287#[derive(Default)]
288struct MonotonicState {
289    last_ms: u64,
290    last_rand: u128,
291}
292
293impl MonotonicState {
294    fn next_random(&mut self, ms: u64) -> u128 {
295        if ms == self.last_ms && self.last_rand != 0 {
296            let next = self.last_rand.wrapping_add(1) & RAND_MASK_80;
297            assert!(
298                next != 0,
299                "ulid: 80-bit monotonic counter overflowed in a single millisecond"
300            );
301            self.last_rand = next;
302        } else {
303            self.last_ms = ms;
304            let hi = rng::next_u64() as u128;
305            let lo = rng::next_u64() as u128;
306            // 80 random bits: 64 from `hi`, top 16 from `lo`.
307            self.last_rand = ((hi & 0xFFFF_FFFF_FFFF_FFFF) << 16) | ((lo >> 48) & 0xFFFF);
308        }
309        self.last_rand
310    }
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use std::collections::HashSet;
317
318    #[test]
319    fn display_length_26() {
320        assert_eq!(Ulid::new().to_string().len(), 26);
321    }
322
323    #[test]
324    fn two_calls_differ() {
325        assert_ne!(Ulid::new(), Ulid::new());
326    }
327
328    #[test]
329    fn monotonic_within_ms() {
330        let a = Ulid::new();
331        let b = Ulid::new();
332        assert!(b > a, "ULIDs in same ms must be strictly ordered");
333    }
334
335    #[test]
336    fn time_ordered_across_ms() {
337        let a = Ulid::new();
338        std::thread::sleep(std::time::Duration::from_millis(2));
339        let b = Ulid::new();
340        assert!(b > a);
341        assert!(b.timestamp_ms() > a.timestamp_ms());
342    }
343
344    #[test]
345    fn nil_and_max() {
346        assert_eq!(Ulid::nil().as_bytes(), &[0u8; 16]);
347        assert_eq!(Ulid::max().as_bytes(), &[0xffu8; 16]);
348        assert_eq!(Ulid::nil().to_string(), "00000000000000000000000000");
349        assert_eq!(Ulid::max().to_string(), "7ZZZZZZZZZZZZZZZZZZZZZZZZZ");
350    }
351
352    #[test]
353    fn default_is_nil() {
354        assert_eq!(Ulid::default(), Ulid::nil());
355    }
356
357    #[test]
358    fn parse_round_trip() {
359        // Spec example.
360        let s = "01ARZ3NDEKTSV4RRFFQ69G5FAV";
361        let id = Ulid::parse_str(s).unwrap();
362        assert_eq!(id.to_string(), s);
363    }
364
365    #[test]
366    fn parse_nil() {
367        let id = Ulid::parse_str("00000000000000000000000000").unwrap();
368        assert_eq!(id, Ulid::nil());
369    }
370
371    #[test]
372    fn parse_max() {
373        let id = Ulid::parse_str("7ZZZZZZZZZZZZZZZZZZZZZZZZZ").unwrap();
374        assert_eq!(id, Ulid::max());
375    }
376
377    #[test]
378    fn parse_lowercase() {
379        let id = Ulid::parse_str("01arz3ndektsv4rrffq69g5fav").unwrap();
380        assert_eq!(id.to_string(), "01ARZ3NDEKTSV4RRFFQ69G5FAV");
381    }
382
383    #[test]
384    fn parse_substitutions() {
385        // I, L map to 1; O maps to 0.
386        let a = Ulid::parse_str("0IIIIIIIIIIIIIIIIIIIIIIIII").unwrap();
387        let b = Ulid::parse_str("0LLLLLLLLLLLLLLLLLLLLLLLLL").unwrap();
388        let c = Ulid::parse_str("01111111111111111111111111").unwrap();
389        assert_eq!(a, b);
390        assert_eq!(a, c);
391        let z = Ulid::parse_str("OO000000000000000000000000").unwrap();
392        assert_eq!(z, Ulid::nil());
393    }
394
395    #[test]
396    fn parse_rejects_short() {
397        assert!(matches!(
398            Ulid::parse_str("abc"),
399            Err(ParseError::InvalidLength(3))
400        ));
401    }
402
403    #[test]
404    fn parse_rejects_long() {
405        assert!(matches!(
406            Ulid::parse_str("01ARZ3NDEKTSV4RRFFQ69G5FAVX"),
407            Err(ParseError::InvalidLength(27))
408        ));
409    }
410
411    #[test]
412    fn parse_rejects_u() {
413        // U is reserved.
414        assert!(matches!(
415            Ulid::parse_str("0UARZ3NDEKTSV4RRFFQ69G5FAV"),
416            Err(ParseError::InvalidChar(1))
417        ));
418    }
419
420    #[test]
421    fn parse_rejects_overflow_leading() {
422        // Leading char 8..=Z would set the 129th bit.
423        assert!(matches!(
424            Ulid::parse_str("80000000000000000000000000"),
425            Err(ParseError::Overflow)
426        ));
427        assert!(matches!(
428            Ulid::parse_str("Z0000000000000000000000000"),
429            Err(ParseError::Overflow)
430        ));
431    }
432
433    #[test]
434    fn from_str_works() {
435        let id: Ulid = "01ARZ3NDEKTSV4RRFFQ69G5FAV".parse().unwrap();
436        assert_eq!(id.to_string(), "01ARZ3NDEKTSV4RRFFQ69G5FAV");
437    }
438
439    #[test]
440    fn from_bytes_roundtrip() {
441        let id = Ulid::new();
442        assert_eq!(Ulid::from_bytes(id.as_bytes()), id);
443    }
444
445    #[test]
446    fn timestamp_decodes_known_prefix() {
447        // "01ARZ3NDEK" -> ms 0x01563E3AB5D3 (48 bits) in Crockford base32.
448        let id = Ulid::parse_str("01ARZ3NDEKTSV4RRFFQ69G5FAV").unwrap();
449        assert_eq!(id.timestamp_ms(), 0x0000_0156_3E3A_B5D3);
450    }
451
452    #[test]
453    fn timestamp_from_known_bytes() {
454        // Hand-packed ms = 0x0123_4567_89AB in the first 6 bytes.
455        let mut bytes = [0u8; 16];
456        bytes[0..6].copy_from_slice(&[0x01, 0x23, 0x45, 0x67, 0x89, 0xAB]);
457        let id = Ulid::from_bytes(&bytes);
458        assert_eq!(id.timestamp_ms(), 0x0000_0123_4567_89AB);
459    }
460
461    #[test]
462    fn many_unique() {
463        let mut set = HashSet::new();
464        for _ in 0..10_000 {
465            assert!(set.insert(Ulid::new()));
466        }
467    }
468
469    #[test]
470    fn monotonic_within_ms_burst() {
471        // 1000 in a row, all strictly increasing.
472        let mut prev = Ulid::new();
473        for _ in 0..1000 {
474            let cur = Ulid::new();
475            assert!(cur > prev);
476            prev = cur;
477        }
478    }
479}