cartulary 0.3.0-alpha.1

The knowledge layer of your project — decisions, issues, docs, all in one place.
Documentation
//! `Ulid` — Universally Unique Lexicographically Sortable Identifier.
//!
//! 128-bit value: 48 bits of milliseconds since the Unix epoch followed by
//! 80 random bits. Encoded canonically as **26 characters in Crockford
//! base32** (uppercase output, case-insensitive input, alphabet excludes
//! `I`/`L`/`O`/`U`). The 26 chars cover 130 bits; the leftmost char's two
//! high bits are always zero, so values stay within `u128` (and a leading
//! char value above 7 is rejected on parse).
//!
//! ## Properties (verified by property tests)
//!
//! - **Round-trip**: `parse(format(u)) == u` for every Ulid.
//! - **Sortability**: lexicographic order of the encoded string matches
//!   chronological order of the underlying timestamps. Same-millisecond
//!   ULIDs sort by their random tail, giving a total order on the type.
//! - **Codec leniency**: input is case-insensitive and tolerates the
//!   ambiguous Crockford pairs `I`→`1`, `L`→`1`, `O`→`0`. Canonical
//!   output never emits these.

use std::fmt;
use std::str::FromStr;

const ENCODED_LEN: usize = 26;

/// 48 bits of milliseconds = 281 474 976 710 655 ms ≈ 10 889 years of range.
const MAX_TS_MS: u128 = (1u128 << 48) - 1;

/// 80 random bits per millisecond — collision-free at any realistic
/// generation rate, including bulk imports.
pub const RANDOM_BITS: u32 = 80;
const RANDOM_MASK: u128 = (1u128 << RANDOM_BITS) - 1;

/// Crockford base32 alphabet, ordered so the byte value of each char (`0..32`)
/// matches its 5-bit payload. Excludes `I`, `L`, `O`, `U`.
const CROCKFORD_ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";

/// A Universally Unique Lexicographically Sortable Identifier. Opaque
/// 128-bit value with a stable canonical 26-character Crockford base32
/// representation.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Ulid(u128);

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum UlidParseError {
    /// Input length differs from 26 chars.
    BadLength { actual: usize },
    /// Input contains a character that is not in the Crockford alphabet
    /// (and not one of the lenient mappings `I`/`L`/`O`).
    BadChar { position: usize, ch: char },
    /// Input is 26 chars long but its leftmost character's payload exceeds
    /// 2 bits, which would overflow a `u128`.
    Overflow,
}

impl fmt::Display for UlidParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            UlidParseError::BadLength { actual } => write!(
                f,
                "ULID must be exactly {ENCODED_LEN} characters, got {actual}"
            ),
            UlidParseError::BadChar { position, ch } => write!(
                f,
                "invalid Crockford base32 character {ch:?} at position {position}"
            ),
            UlidParseError::Overflow => write!(
                f,
                "ULID overflows u128: leftmost character's top two bits must be zero"
            ),
        }
    }
}

impl std::error::Error for UlidParseError {}

impl Ulid {
    /// Build a ULID from a raw 128-bit value. Useful for tests.
    pub fn from_raw(value: u128) -> Self {
        Ulid(value)
    }

    /// The raw 128-bit value. Mostly for tests; production code stays at
    /// the canonical string surface.
    pub fn as_u128(self) -> u128 {
        self.0
    }

    /// Build a ULID from a Unix-millis timestamp and an 80-bit random
    /// tail. The pure constructor: clock and RNG sourcing live in the
    /// infra layer.
    ///
    /// Negative timestamps are clamped to 0 (pre-1970-01-01 makes no
    /// sense for a Unix-epoch identifier). Timestamps above 2⁴⁸-1 ms
    /// (≈ year 10889) wrap; in practice no realistic input gets close.
    pub fn from_millis_with_random(unix_ms: i64, random_80_bits: u128) -> Self {
        let ts = (unix_ms.max(0) as u128) & MAX_TS_MS;
        let rand = random_80_bits & RANDOM_MASK;
        Ulid((ts << RANDOM_BITS) | rand)
    }

    /// The 48-bit timestamp portion as Unix milliseconds.
    pub fn timestamp_millis(self) -> i64 {
        (self.0 >> RANDOM_BITS) as i64
    }

    /// The 80-bit random tail.
    pub fn random(self) -> u128 {
        self.0 & RANDOM_MASK
    }

    /// Crockford base32 canonical form (26 uppercase chars).
    pub fn encode(self) -> String {
        let mut buf = [0u8; ENCODED_LEN];
        let mut value = self.0;
        for i in (0..ENCODED_LEN).rev() {
            let bits = (value & 0x1F) as usize;
            buf[i] = CROCKFORD_ALPHABET[bits];
            value >>= 5;
        }
        // SAFETY: every byte written is from CROCKFORD_ALPHABET, ASCII subset.
        String::from_utf8(buf.to_vec()).expect("alphabet is ASCII")
    }

    /// Parse a 26-char Crockford base32 string. Lenient on case and on the
    /// ambiguous pairs `I`/`L`→`1`, `O`→`0`. Rejects anything else.
    pub fn parse(s: &str) -> Result<Self, UlidParseError> {
        if s.len() != ENCODED_LEN {
            return Err(UlidParseError::BadLength { actual: s.len() });
        }
        let mut value: u128 = 0;
        for (position, ch) in s.chars().enumerate() {
            let bits = decode_char(ch).ok_or(UlidParseError::BadChar { position, ch })?;
            if position == 0 && bits >= 8 {
                return Err(UlidParseError::Overflow);
            }
            value = (value << 5) | bits as u128;
        }
        Ok(Ulid(value))
    }
}

impl fmt::Display for Ulid {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(&self.encode())
    }
}

impl FromStr for Ulid {
    type Err = UlidParseError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Self::parse(s)
    }
}

impl serde::Serialize for Ulid {
    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
        s.serialize_str(&self.encode())
    }
}

impl<'de> serde::Deserialize<'de> for Ulid {
    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
        let s = String::deserialize(d)?;
        Self::parse(&s).map_err(serde::de::Error::custom)
    }
}

// ── Helpers ──────────────────────────────────────────────────────────────────

/// Decode one Crockford base32 character to its 5-bit payload. Lenient on
/// case and on the ambiguous transcription pairs. Returns `None` for
/// characters outside the accepted set.
fn decode_char(ch: char) -> Option<u8> {
    let upper = ch.to_ascii_uppercase();
    match upper {
        '0' | 'O' => Some(0),
        '1' | 'I' | 'L' => Some(1),
        '2'..='9' => Some(upper as u8 - b'0'),
        'A'..='H' => Some(upper as u8 - b'A' + 10),
        'J' => Some(18),
        'K' => Some(19),
        'M' => Some(20),
        'N' => Some(21),
        'P'..='T' => Some(upper as u8 - b'P' + 22),
        'V'..='Z' => Some(upper as u8 - b'V' + 27),
        _ => None,
    }
}

// ── Proptest strategy ────────────────────────────────────────────────────────

#[cfg(test)]
pub mod strategy {
    use super::Ulid;
    use proptest::prelude::*;

    pub fn any_ulid() -> impl Strategy<Value = Ulid> {
        // u128 generators aren't built in; combine two u64s.
        (any::<u64>(), any::<u64>())
            .prop_map(|(hi, lo)| Ulid::from_raw(((hi as u128) << 64) | lo as u128))
    }
}

// ── Tests ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn encode_has_length_26_and_uppercase() {
        let u = Ulid::from_raw(0);
        let s = u.encode();
        assert_eq!(s.len(), ENCODED_LEN);
        assert_eq!(s, "00000000000000000000000000");
        assert!(s
            .chars()
            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit()));
    }

    #[test]
    fn parse_canonical_round_trips() {
        for value in [
            0u128,
            1,
            42,
            MAX_TS_MS,
            (MAX_TS_MS << RANDOM_BITS) | RANDOM_MASK,
            u128::MAX >> 2, // top 2 bits zero — the largest representable ULID
        ] {
            let u = Ulid::from_raw(value);
            assert_eq!(Ulid::parse(&u.encode()), Ok(u), "value = {value:#x}");
        }
    }

    #[test]
    fn parse_is_case_insensitive() {
        let u = Ulid::from_raw(0xABCDEF123456789);
        let canonical = u.encode();
        assert_eq!(Ulid::parse(&canonical.to_lowercase()), Ok(u));
    }

    #[test]
    fn parse_accepts_ambiguous_crockford_chars() {
        let u = Ulid::from_raw(1);
        let mangled = u.encode().replace('1', "L");
        assert_eq!(Ulid::parse(&mangled), Ok(u));
        let mangled = u.encode().replace('1', "I");
        assert_eq!(Ulid::parse(&mangled), Ok(u));
        assert_eq!(
            Ulid::parse("OOOOOOOOOOOOOOOOOOOOOOOOOO"),
            Ok(Ulid::from_raw(0))
        );
    }

    #[test]
    fn parse_rejects_wrong_length() {
        assert!(matches!(
            Ulid::parse("ABCD"),
            Err(UlidParseError::BadLength { actual: 4 })
        ));
        assert!(matches!(
            Ulid::parse("000000000000000000000000000"),
            Err(UlidParseError::BadLength { actual: 27 })
        ));
    }

    #[test]
    fn parse_rejects_invalid_char() {
        // 'U' is not in the Crockford alphabet
        assert!(matches!(
            Ulid::parse("U0000000000000000000000000"),
            Err(UlidParseError::BadChar {
                position: 0,
                ch: 'U'
            })
        ));
    }

    #[test]
    fn parse_rejects_overflow_at_top_char() {
        // A leading '8' means the leading char's top bit is set in the
        // 5-bit group, pushing the total above u128 (top 2 bits of the
        // 130-bit slot are reserved zero).
        assert_eq!(
            Ulid::parse("80000000000000000000000000"),
            Err(UlidParseError::Overflow)
        );
    }

    #[test]
    fn from_millis_uses_unix_epoch() {
        let u = Ulid::from_millis_with_random(0, 0);
        assert_eq!(
            u.as_u128(),
            0,
            "Unix epoch ms with zero random == zero ULID"
        );
        let u = Ulid::from_millis_with_random(1, 0);
        assert_eq!(u.as_u128(), 1u128 << RANDOM_BITS);
    }

    #[test]
    fn from_millis_clamps_negative_input_to_zero() {
        let u = Ulid::from_millis_with_random(-1_000_000, 42);
        assert_eq!(u.as_u128(), 42u128, "negative ms collapses to ts=0");
    }

    #[test]
    fn timestamp_millis_extracts_ts_portion() {
        let u = Ulid::from_millis_with_random(1_700_000_000_000, 0xABCDEF);
        assert_eq!(u.timestamp_millis(), 1_700_000_000_000);
    }

    #[test]
    fn random_extracts_tail() {
        let u = Ulid::from_millis_with_random(1_700_000_000_000, 0xABCDEF);
        assert_eq!(u.random(), 0xABCDEF);
    }

    #[test]
    fn serde_round_trips_via_string() {
        let u = Ulid::from_raw(0xDEAD_BEEF_CAFE_BABE_1234_5678_90AB_CDEF);
        let s = serde_json::to_string(&u).unwrap();
        assert_eq!(s, format!("\"{u}\""));
        let back: Ulid = serde_json::from_str(&s).unwrap();
        assert_eq!(back, u);
    }

    // ── Property tests ──────────────────────────────────────────────────────

    use proptest::prelude::*;

    proptest! {
        #[test]
        fn prop_round_trip(hi: u64, lo: u64) {
            // Mask top 2 bits to stay within the representable range.
            let raw = (((hi as u128) << 64) | lo as u128) >> 2;
            let u = Ulid::from_raw(raw);
            prop_assert_eq!(Ulid::parse(&u.encode()), Ok(u));
        }

        #[test]
        fn prop_lexicographic_sort_matches_value_sort(
            values in proptest::collection::vec((any::<u64>(), any::<u64>()), 0..50)
        ) {
            let raws: Vec<u128> = values
                .into_iter()
                .map(|(hi, lo)| (((hi as u128) << 64) | lo as u128) >> 2)
                .collect();
            let mut ulids: Vec<Ulid> = raws.iter().copied().map(Ulid::from_raw).collect();
            ulids.sort();
            let encoded: Vec<String> = ulids.iter().map(|u| u.encode()).collect();
            let mut sorted_encoded = encoded.clone();
            sorted_encoded.sort();
            prop_assert_eq!(encoded, sorted_encoded);
        }

        #[test]
        fn prop_increasing_timestamp_yields_increasing_ulid(
            ts1 in 0i64..(1i64 << 47),
            delta in 1i64..1_000_000_000_i64,
        ) {
            let a = Ulid::from_millis_with_random(ts1, 0);
            let b = Ulid::from_millis_with_random(ts1 + delta, 0);
            prop_assert!(a < b);
            prop_assert!(a.encode() < b.encode());
        }

        #[test]
        fn prop_timestamp_round_trips(
            ts in 0i64..(1i64 << 47),
            rand_hi: u32,
            rand_lo: u64,
        ) {
            let random = ((rand_hi as u128) << 64) | rand_lo as u128;
            let u = Ulid::from_millis_with_random(ts, random);
            prop_assert_eq!(u.timestamp_millis(), ts);
        }
    }
}