cartulary 0.3.0-alpha.1

The knowledge layer of your project — decisions, issues, docs, all in one place.
Documentation
//! `Tsid` — Time-Sorted Identifier per ADR-0022.
//!
//! 64-bit value: 42 bits of milliseconds since the project epoch
//! (`TSID_EPOCH` = `2026-01-01T00:00:00Z`) followed by 22 random bits.
//!
//! Encoded canonically as **13 characters in Crockford base32** (uppercase
//! output, case-insensitive input, alphabet excludes `I`/`L`/`O`/`U` to
//! prevent transcription errors). The 13 chars cover 65 bits; the leftmost
//! char's high bit is always zero, so values stay within `u64`.
//!
//! ## Properties (verified by property tests)
//!
//! - **Round-trip**: `parse(format(t)) == t` for every TSID.
//! - **Sortability**: lexicographic order of the encoded string matches
//!   chronological order of the underlying timestamps. Same-millisecond TSIDs
//!   sort by their random tiebreaker, giving a total order on the type.
//! - **Codec leniency**: input is case-insensitive and tolerates the
//!   ambiguous Crockford pairs `I`→`1`, `L`→`1`, `O`→`0`. Canonical output
//!   never emits these.

use std::fmt;
use std::str::FromStr;

/// Project epoch (`2026-01-01T00:00:00Z`) in milliseconds since the Unix
/// epoch. Fixed forever — embedded in every TSID's timestamp portion. A
/// 42-bit ms counter starting here gives ~140 years of range (until ~2166).
pub const TSID_EPOCH_MILLIS: i64 = 1_767_225_600_000;

/// 42 bits of milliseconds = 4 398 046 511 103 ms ≈ 139.4 years.
const MAX_TS_MS: u64 = (1u64 << 42) - 1;

/// 22 random bits per millisecond → birthday collision at √(2·2²²) ≈ 2900
/// records created in the same millisecond. Practically impossible.
pub const RANDOM_BITS: u32 = 22;
const RANDOM_MASK: u64 = (1u64 << RANDOM_BITS) - 1;

/// Crockford base32 alphabet, ordered so the byte value of each char (`0..32`)
/// matches its 5-bit payload. Excludes `I`, `L`, `O`, `U`.
const CROCKFORD_ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";

const ENCODED_LEN: usize = 13;

/// A Time-Sorted Identifier. Opaque 64-bit value with a stable canonical
/// 13-character Crockford base32 representation.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Tsid(u64);

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TsidParseError {
    /// Input length differs from 13 chars.
    BadLength { actual: usize },
    /// Input contains a character that is not in the Crockford alphabet
    /// (and not one of the lenient mappings `I`/`L`/`O`).
    BadChar { position: usize, ch: char },
    /// Input is 13 chars long but its leftmost character's payload exceeds
    /// 4 bits, which would overflow a `u64`.
    Overflow,
}

impl fmt::Display for TsidParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            TsidParseError::BadLength { actual } => write!(
                f,
                "TSID must be exactly {ENCODED_LEN} characters, got {actual}"
            ),
            TsidParseError::BadChar { position, ch } => write!(
                f,
                "invalid Crockford base32 character {ch:?} at position {position}"
            ),
            TsidParseError::Overflow => write!(
                f,
                "TSID overflows u64: leftmost character's high bit must be zero"
            ),
        }
    }
}

impl std::error::Error for TsidParseError {}

impl Tsid {
    /// Build a TSID from raw 64-bit value. Useful for tests and for the
    /// migration path that derives a TSID from a known timestamp.
    pub fn from_raw(value: u64) -> Self {
        Tsid(value)
    }

    /// The raw 64-bit value. Mostly for tests; production code stays at the
    /// canonical string surface.
    pub fn as_u64(self) -> u64 {
        self.0
    }

    /// Build a TSID from a Unix-millis timestamp and a 22-bit random
    /// tiebreaker. The pure constructor: clock and RNG sourcing live in
    /// the infra layer (`infra/driven/fs/id_generator`).
    pub fn from_millis_with_random(unix_ms: i64, random_22_bits: u32) -> Self {
        let project_ms = (unix_ms - TSID_EPOCH_MILLIS).max(0) as u64;
        let ts = project_ms & MAX_TS_MS;
        let rand = (random_22_bits as u64) & RANDOM_MASK;
        Tsid((ts << RANDOM_BITS) | rand)
    }

    /// The timestamp portion as Unix milliseconds. Pre-epoch inputs were
    /// clamped to zero at construction, so this returns `TSID_EPOCH_MILLIS`
    /// for such TSIDs — distinguishing them from genuine epoch-boundary
    /// records requires the caller to consult the corpus.
    pub fn timestamp_millis(self) -> i64 {
        let project_ms = (self.0 >> RANDOM_BITS) as i64;
        project_ms + TSID_EPOCH_MILLIS
    }

    /// Crockford base32 canonical form (13 uppercase chars).
    pub fn encode(self) -> String {
        let mut buf = [0u8; ENCODED_LEN];
        let mut value = self.0;
        for i in (0..ENCODED_LEN).rev() {
            let bits = (value & 0x1F) as usize;
            buf[i] = CROCKFORD_ALPHABET[bits];
            value >>= 5;
        }
        // SAFETY: every byte written is from CROCKFORD_ALPHABET, ASCII subset.
        String::from_utf8(buf.to_vec()).expect("alphabet is ASCII")
    }

    /// Parse a 13-char Crockford base32 string. Lenient on case and on the
    /// ambiguous pairs `I`/`L`→`1`, `O`→`0`. Rejects anything else.
    pub fn parse(s: &str) -> Result<Self, TsidParseError> {
        if s.len() != ENCODED_LEN {
            return Err(TsidParseError::BadLength { actual: s.len() });
        }
        let mut value: u64 = 0;
        for (position, ch) in s.chars().enumerate() {
            let bits = decode_char(ch).ok_or(TsidParseError::BadChar { position, ch })?;
            if position == 0 && bits >= 16 {
                return Err(TsidParseError::Overflow);
            }
            value = (value << 5) | bits as u64;
        }
        Ok(Tsid(value))
    }
}

impl fmt::Display for Tsid {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(&self.encode())
    }
}

impl FromStr for Tsid {
    type Err = TsidParseError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Self::parse(s)
    }
}

impl serde::Serialize for Tsid {
    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
        s.serialize_str(&self.encode())
    }
}

impl<'de> serde::Deserialize<'de> for Tsid {
    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
        let s = String::deserialize(d)?;
        Self::parse(&s).map_err(serde::de::Error::custom)
    }
}

// ── Helpers ──────────────────────────────────────────────────────────────────

/// Decode one Crockford base32 character to its 5-bit payload. Lenient on
/// case and on the ambiguous transcription pairs. Returns `None` for
/// characters outside the accepted set.
fn decode_char(ch: char) -> Option<u8> {
    let upper = ch.to_ascii_uppercase();
    match upper {
        '0' | 'O' => Some(0),
        '1' | 'I' | 'L' => Some(1),
        '2'..='9' => Some(upper as u8 - b'0'),
        'A'..='H' => Some(upper as u8 - b'A' + 10),
        'J' => Some(18),
        'K' => Some(19),
        'M' => Some(20),
        'N' => Some(21),
        'P'..='T' => Some(upper as u8 - b'P' + 22),
        'V'..='Z' => Some(upper as u8 - b'V' + 27),
        _ => None,
    }
}

// ── Proptest strategy ────────────────────────────────────────────────────────

#[cfg(test)]
pub mod strategy {
    use super::Tsid;
    use proptest::prelude::*;

    pub fn any_tsid() -> impl Strategy<Value = Tsid> {
        any::<u64>().prop_map(Tsid::from_raw)
    }
}

// ── Tests ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn encode_has_length_13_and_uppercase() {
        let t = Tsid::from_raw(0);
        let s = t.encode();
        assert_eq!(s.len(), ENCODED_LEN);
        assert_eq!(s, "0000000000000");
        assert!(s
            .chars()
            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit()));
    }

    #[test]
    fn encode_known_value() {
        // 0x1234567890ABCDEF in Crockford base32
        let t = Tsid::from_raw(0x1234_5678_90AB_CDEF);
        assert_eq!(t.encode().len(), ENCODED_LEN);
        assert_eq!(Tsid::parse(&t.encode()), Ok(t));
    }

    #[test]
    fn parse_canonical_round_trips() {
        for value in [0u64, 1, 42, MAX_TS_MS, u64::MAX] {
            let t = Tsid::from_raw(value);
            assert_eq!(Tsid::parse(&t.encode()), Ok(t), "value = {value:#x}");
        }
    }

    #[test]
    fn parse_is_case_insensitive() {
        let t = Tsid::from_raw(0xABCDEF12345);
        let canonical = t.encode();
        assert_eq!(Tsid::parse(&canonical.to_lowercase()), Ok(t));
    }

    #[test]
    fn parse_accepts_ambiguous_crockford_chars() {
        let t = Tsid::from_raw(1);
        let mangled = t.encode().replace('1', "L");
        assert_eq!(Tsid::parse(&mangled), Ok(t));
        let mangled = t.encode().replace('1', "I");
        assert_eq!(Tsid::parse(&mangled), Ok(t));
        let t = Tsid::from_raw(0);
        assert_eq!(Tsid::parse("OOOOOOOOOOOOO"), Ok(t));
    }

    #[test]
    fn parse_rejects_wrong_length() {
        assert!(matches!(
            Tsid::parse("ABCD"),
            Err(TsidParseError::BadLength { actual: 4 })
        ));
        assert!(matches!(
            Tsid::parse("00000000000000"),
            Err(TsidParseError::BadLength { actual: 14 })
        ));
    }

    #[test]
    fn parse_rejects_invalid_char() {
        // 'U' is not in the Crockford alphabet
        assert!(matches!(
            Tsid::parse("U000000000000"),
            Err(TsidParseError::BadChar {
                position: 0,
                ch: 'U'
            })
        ));
    }

    #[test]
    fn parse_rejects_overflow_at_top_char() {
        // A leading 'G' (= 16) means the top bit is set in the first 5 bits,
        // pushing the total above u64.
        assert_eq!(Tsid::parse("G000000000000"), Err(TsidParseError::Overflow));
    }

    #[test]
    fn from_millis_uses_project_epoch() {
        let t = Tsid::from_millis_with_random(TSID_EPOCH_MILLIS, 0);
        assert_eq!(t.as_u64(), 0, "epoch ms with zero random == zero TSID");
        let t = Tsid::from_millis_with_random(TSID_EPOCH_MILLIS + 1, 0);
        assert_eq!(t.as_u64(), 1u64 << RANDOM_BITS);
    }

    #[test]
    fn from_millis_clamps_pre_epoch_input_to_zero() {
        let t = Tsid::from_millis_with_random(TSID_EPOCH_MILLIS - 1_000_000, 42);
        assert_eq!(t.as_u64(), 42u64, "pre-epoch ms collapses to ts=0");
    }

    #[test]
    fn serde_round_trips_via_string() {
        let t = Tsid::from_raw(0xDEAD_BEEF_CAFE_BABE);
        let s = serde_json::to_string(&t).unwrap();
        assert_eq!(s, format!("\"{t}\""));
        let back: Tsid = serde_json::from_str(&s).unwrap();
        assert_eq!(back, t);
    }

    // ── Property tests ──────────────────────────────────────────────────────

    use proptest::prelude::any;

    proptest::proptest! {
        #[test]
        fn prop_round_trip(value: u64) {
            let t = Tsid::from_raw(value);
            proptest::prop_assert_eq!(Tsid::parse(&t.encode()), Ok(t));
        }

        #[test]
        fn prop_lexicographic_sort_matches_value_sort(values in proptest::collection::vec(any::<u64>(), 0..50)) {
            let mut tsids: Vec<Tsid> = values.iter().copied().map(Tsid::from_raw).collect();
            tsids.sort();
            let encoded: Vec<String> = tsids.iter().map(|t| t.encode()).collect();
            let mut sorted_encoded = encoded.clone();
            sorted_encoded.sort();
            proptest::prop_assert_eq!(encoded, sorted_encoded);
        }

        #[test]
        fn prop_increasing_timestamp_yields_increasing_tsid(
            ts1 in 0i64..1_000_000_000_000_i64,
            delta in 1i64..1_000_000_i64,
        ) {
            let unix1 = TSID_EPOCH_MILLIS + ts1;
            let unix2 = unix1 + delta;
            let a = Tsid::from_millis_with_random(unix1, 0);
            let b = Tsid::from_millis_with_random(unix2, 0);
            proptest::prop_assert!(a < b);
            proptest::prop_assert!(a.encode() < b.encode());
        }

        #[test]
        fn prop_same_millisecond_orders_by_random_tiebreaker(
            ts in 0i64..1_000_000_000_000_i64,
            r1 in 0u32..(RANDOM_MASK as u32),
            r2 in 0u32..(RANDOM_MASK as u32),
        ) {
            let unix = TSID_EPOCH_MILLIS + ts;
            let a = Tsid::from_millis_with_random(unix, r1);
            let b = Tsid::from_millis_with_random(unix, r2);
            proptest::prop_assert_eq!(a.cmp(&b), r1.cmp(&r2));
        }
    }
}