cartulary 0.3.0-alpha.1

The knowledge layer of your project — decisions, issues, docs, all in one place.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
use std::fmt;
use std::str::FromStr;

/// A validated, self-describing reference to any entity managed by cartulary.
///
/// An `EntityRef` carries both the kind and the ID of the target as a single
/// prefixed string. Two suffix shapes are accepted (per ADR-0022 phase 3):
///
/// - **Legacy sequential**: a four-or-more-digit zero-padded non-zero integer
///   (e.g. `ISSUE-0006`, `ADR-0042`).
/// - **TSID (canonical post-migration)**: the 13-character Crockford base32
///   encoding of a [`Tsid`] (e.g. `ISSUE-0DCT3MKW5T2K0`).
///
/// The prefix encodes the entity kind. Both suffix shapes coexist so a v4
/// workspace can keep historical references resolvable via aliases while new
/// records carry TSID `id:` values.
///
/// ## Format
///
/// ```text
/// <PREFIX>-<SUFFIX>
/// ```
///
/// where `<PREFIX>` is one or more uppercase ASCII letters and `<SUFFIX>` is
/// either a numeric run of 4+ digits (parsing as a non-zero `u32`) or a
/// 13-character Crockford base32 TSID encoding.
///
/// ## Design note
///
/// A text-driven newtype is used rather than a discriminated enum so that new
/// entity kinds (configured at runtime via `cartulary.toml`) are supported without
/// source-level changes.  The prefix is the single source of truth for the
/// entity kind — no separate `RecordKind` parameter is required at the link
/// level.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct EntityRef(String);

impl serde::Serialize for EntityRef {
    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
        s.serialize_str(&self.0)
    }
}

impl EntityRef {
    /// Create an `EntityRef` from a raw string, accepting either schema shape.
    ///
    /// Equivalent to [`EntityRef::parse_any`]. Kept for source compatibility
    /// with construction sites that don't yet know which schema version they
    /// live in (test fixtures, synthesised refs, etc.). New call sites should
    /// prefer [`parse_v3`](Self::parse_v3), [`parse_v4`](Self::parse_v4), or
    /// [`parse_any`](Self::parse_any) explicitly.
    pub fn new(s: impl Into<String>) -> anyhow::Result<Self> {
        Self::parse_any(s.into().as_str())
    }

    /// Strict v3 parser: `<UPPER>+-<NNNN>` where `<NNNN>` is a non-zero
    /// integer of 1+ digits (canonical form is zero-padded to 4).
    ///
    /// Refuses TSID-shaped suffixes — use [`parse_v4`](Self::parse_v4) or
    /// [`parse_any`](Self::parse_any) for those.
    pub fn parse_v3(s: &str) -> anyhow::Result<Self> {
        parse_with(s, validate_v3_suffix)
    }

    /// Strict v4 parser: `<UPPER>+-<13-char Crockford base32 TSID>`.
    ///
    /// Refuses legacy numeric suffixes — use [`parse_v3`](Self::parse_v3) or
    /// [`parse_any`](Self::parse_any) for those.
    pub fn parse_v4(s: &str) -> anyhow::Result<Self> {
        parse_with(s, validate_v4_suffix)
    }

    /// Strict v5 parser: `<UPPER>+-<26-char Crockford base32 ULID>`.
    ///
    /// Refuses any other suffix shape — use [`parse_any`](Self::parse_any)
    /// when a call site needs to handle legacy TSIDs or sequential IDs too.
    pub fn parse_v5(s: &str) -> anyhow::Result<Self> {
        parse_with(s, validate_v5_suffix)
    }

    /// Tolerant parser used at UX surfaces (CLI input, alias forwarding).
    /// Tries v5 first, then v4, then v3 — newest shape wins when the
    /// suffix length is unambiguous (26, 13, or shorter numeric).
    pub fn parse_any(s: &str) -> anyhow::Result<Self> {
        parse_with(s, |full, suffix| {
            validate_v5_suffix(full, suffix)
                .or_else(|_| validate_v4_suffix(full, suffix))
                .or_else(|_| validate_v3_suffix(full, suffix))
        })
    }

    /// Return the string representation (e.g. `"ISSUE-0006"`).
    pub fn as_str(&self) -> &str {
        &self.0
    }

    /// Return the prefix part (e.g. `"ISSUE"`, `"ADR"`).
    pub fn prefix(&self) -> &str {
        // Safety: validate() ensures a `-` is present.
        let pos = self.0.rfind('-').unwrap();
        &self.0[..pos]
    }

    /// Return the suffix part (everything after the last `-`).
    ///
    /// Either a `<NNNN>` numeric run or a 13-char Crockford base32 TSID.
    pub fn suffix(&self) -> &str {
        let pos = self.0.rfind('-').unwrap();
        &self.0[pos + 1..]
    }

    /// Return a `u64` derived from the suffix.
    ///
    /// For v4 (TSID) suffixes this is the full underlying `u64`. For v5
    /// (ULID) suffixes the full value is 128 bits; the returned `u64`
    /// is the low 64 bits, which preserves time-ordering within a single
    /// millisecond and has negligible collision risk in practice. For
    /// legacy numeric suffixes this is the parsed value widened to `u64`.
    pub fn numeric_id(&self) -> u64 {
        let suffix = self.suffix();
        if suffix.len() == 26 {
            if let Some(v) = decode_crockford_to_u128(suffix) {
                return v as u64;
            }
        }
        if suffix.len() == 13 {
            if let Some(v) = decode_crockford_to_u64(suffix) {
                return v;
            }
        }
        suffix.parse().unwrap()
    }
}

/// Shared parsing skeleton: split, validate the prefix, run a per-version
/// suffix validator, build the ref. The only thing that varies between
/// `parse_v3` / `parse_v4` / `parse_any` is the suffix validator.
fn parse_with(
    s: &str,
    suffix_validator: impl FnOnce(&str, &str) -> anyhow::Result<()>,
) -> anyhow::Result<EntityRef> {
    let (prefix, suffix) = split(s)?;
    validate_prefix(s, prefix)?;
    suffix_validator(s, suffix)?;
    Ok(EntityRef(s.to_string()))
}

/// Split `s` at its last `-` into `(prefix, suffix)`. Errors if no `-` is
/// present.
fn split(s: &str) -> anyhow::Result<(&str, &str)> {
    let pos = s
        .rfind('-')
        .ok_or_else(|| anyhow::anyhow!("invalid entity ref '{s}': missing '-'"))?;
    Ok((&s[..pos], &s[pos + 1..]))
}

fn validate_prefix(full: &str, prefix: &str) -> anyhow::Result<()> {
    if prefix.is_empty() || !prefix.chars().all(|c| c.is_ascii_uppercase()) {
        anyhow::bail!(
            "invalid entity ref '{full}': prefix must be one or more uppercase ASCII letters"
        );
    }
    Ok(())
}

/// Strict legacy numeric suffix validator: any digit run that parses as a
/// non-zero `u32`.
fn validate_v3_suffix(full: &str, suffix: &str) -> anyhow::Result<()> {
    let n: u32 = suffix.parse().map_err(|_| {
        anyhow::anyhow!("invalid entity ref '{full}': v3 suffix must be a non-zero integer")
    })?;
    if n == 0 {
        anyhow::bail!("invalid entity ref '{full}': numeric id cannot be zero");
    }
    Ok(())
}

/// Strict TSID suffix validator: exactly 13 Crockford base32 characters.
fn validate_v4_suffix(full: &str, suffix: &str) -> anyhow::Result<()> {
    if suffix.len() != 13 {
        anyhow::bail!("invalid entity ref '{full}': v4 suffix must be exactly 13 characters");
    }
    decode_crockford_to_u64(suffix).ok_or_else(|| {
        anyhow::anyhow!(
            "invalid entity ref '{full}': v4 suffix must be a valid Crockford base32 TSID"
        )
    })?;
    Ok(())
}

/// Strict ULID suffix validator: exactly 26 Crockford base32 characters.
fn validate_v5_suffix(full: &str, suffix: &str) -> anyhow::Result<()> {
    if suffix.len() != 26 {
        anyhow::bail!("invalid entity ref '{full}': v5 suffix must be exactly 26 characters");
    }
    decode_crockford_to_u128(suffix).ok_or_else(|| {
        anyhow::anyhow!(
            "invalid entity ref '{full}': v5 suffix must be a valid Crockford base32 ULID"
        )
    })?;
    Ok(())
}

/// Decode a 13-character Crockford base32 string into a `u64`. Lenient on
/// case and on the ambiguous transcription pairs `I`/`L`→`1`, `O`→`0`.
/// Returns `None` for any other character, a wrong length, or a leading
/// payload that would overflow `u64`.
fn decode_crockford_to_u64(s: &str) -> Option<u64> {
    if s.len() != 13 {
        return None;
    }
    let mut value: u64 = 0;
    for (position, ch) in s.chars().enumerate() {
        let bits = decode_crockford_char(ch)?;
        if position == 0 && bits >= 16 {
            return None;
        }
        value = (value << 5) | bits as u64;
    }
    Some(value)
}

/// Decode a 26-character Crockford base32 string into a `u128`. Same
/// leniency rules as the 13-char variant; the leading char's top two
/// bits must be zero (otherwise the value would overflow `u128`).
fn decode_crockford_to_u128(s: &str) -> Option<u128> {
    if s.len() != 26 {
        return None;
    }
    let mut value: u128 = 0;
    for (position, ch) in s.chars().enumerate() {
        let bits = decode_crockford_char(ch)?;
        if position == 0 && bits >= 8 {
            return None;
        }
        value = (value << 5) | bits as u128;
    }
    Some(value)
}

fn decode_crockford_char(ch: char) -> Option<u8> {
    let upper = ch.to_ascii_uppercase();
    match upper {
        '0' | 'O' => Some(0),
        '1' | 'I' | 'L' => Some(1),
        '2'..='9' => Some(upper as u8 - b'0'),
        'A'..='H' => Some(upper as u8 - b'A' + 10),
        'J' => Some(18),
        'K' => Some(19),
        'M' => Some(20),
        'N' => Some(21),
        'P'..='T' => Some(upper as u8 - b'P' + 22),
        'V'..='Z' => Some(upper as u8 - b'V' + 27),
        _ => None,
    }
}

impl fmt::Display for EntityRef {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(&self.0)
    }
}

impl FromStr for EntityRef {
    type Err = anyhow::Error;

    fn from_str(s: &str) -> anyhow::Result<Self> {
        EntityRef::new(s)
    }
}

// ── KnownRefs ────────────────────────────────────────────────────────────────

/// A set of all entity refs known to exist in the workspace.
///
/// Passed to check functions to enable cross-ref link validation.
/// An empty `KnownRefs` signals that link-target resolution should be skipped.
#[derive(Debug, Default, Clone)]
pub struct KnownRefs(std::collections::HashSet<EntityRef>);

impl KnownRefs {
    /// Create an empty set.
    pub fn new() -> Self {
        Self::default()
    }

    /// Add a ref to the set.
    pub fn insert(&mut self, r: EntityRef) {
        self.0.insert(r);
    }

    /// Return `true` if the set contains no refs.
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }

    /// Return `true` if `r` is in the set.
    pub fn contains(&self, r: &EntityRef) -> bool {
        self.0.contains(r)
    }
}

impl FromIterator<EntityRef> for KnownRefs {
    fn from_iter<I: IntoIterator<Item = EntityRef>>(iter: I) -> Self {
        KnownRefs(iter.into_iter().collect())
    }
}

// ── proptest strategy ────────────────────────────────────────────────────────

#[cfg(test)]
pub mod strategy {
    use super::*;
    use proptest::prelude::*;

    /// Generate valid `EntityRef` values with one of a few common prefixes.
    pub fn entity_ref() -> impl Strategy<Value = EntityRef> {
        (
            proptest::sample::select(vec!["ISSUE", "ADR", "DDR", "GDDR"]),
            1u32..10_000,
        )
            .prop_map(|(prefix, n)| EntityRef(format!("{prefix}-{n:04}")))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use proptest::prelude::*;

    #[test]
    fn new_accepts_issue_ref() {
        let r = EntityRef::new("ISSUE-0006").unwrap();
        assert_eq!(r.as_str(), "ISSUE-0006");
    }

    #[test]
    fn new_accepts_adr_ref() {
        let r = EntityRef::new("ADR-0001").unwrap();
        assert_eq!(r.as_str(), "ADR-0001");
    }

    #[test]
    fn new_accepts_multi_letter_prefix() {
        let r = EntityRef::new("GDDR-0042").unwrap();
        assert_eq!(r.prefix(), "GDDR");
        assert_eq!(r.numeric_id(), 42);
    }

    #[test]
    fn new_rejects_missing_dash() {
        assert!(EntityRef::new("ISSUE0006").is_err());
    }

    #[test]
    fn new_rejects_lowercase_prefix() {
        assert!(EntityRef::new("adr-0001").is_err());
    }

    #[test]
    fn new_rejects_mixed_case_prefix() {
        assert!(EntityRef::new("Adr-0001").is_err());
    }

    #[test]
    fn new_rejects_zero_id() {
        assert!(EntityRef::new("ADR-0000").is_err());
    }

    #[test]
    fn new_rejects_non_numeric_suffix() {
        assert!(EntityRef::new("ADR-abc").is_err());
    }

    #[test]
    fn new_rejects_empty_prefix() {
        assert!(EntityRef::new("-0001").is_err());
    }

    #[test]
    fn prefix_extracts_correctly() {
        assert_eq!(EntityRef::new("ISSUE-0006").unwrap().prefix(), "ISSUE");
        assert_eq!(EntityRef::new("ADR-0001").unwrap().prefix(), "ADR");
    }

    #[test]
    fn numeric_id_extracts_correctly() {
        assert_eq!(EntityRef::new("ISSUE-0006").unwrap().numeric_id(), 6);
        assert_eq!(EntityRef::new("ADR-0042").unwrap().numeric_id(), 42);
    }

    #[test]
    fn display_roundtrips() {
        let r = EntityRef::new("ADR-0001").unwrap();
        assert_eq!(r.to_string(), "ADR-0001");
    }

    #[test]
    fn from_str_roundtrips() {
        let r: EntityRef = "ISSUE-0051".parse().unwrap();
        assert_eq!(r.as_str(), "ISSUE-0051");
    }

    #[test]
    fn equality_holds_for_same_value() {
        assert_eq!(
            EntityRef::new("ADR-0001").unwrap(),
            EntityRef::new("ADR-0001").unwrap()
        );
        assert_ne!(
            EntityRef::new("ADR-0001").unwrap(),
            EntityRef::new("ADR-0002").unwrap()
        );
        assert_ne!(
            EntityRef::new("ADR-0001").unwrap(),
            EntityRef::new("ISSUE-0001").unwrap()
        );
    }

    #[test]
    fn new_accepts_tsid_suffix() {
        // 13 chars, all valid Crockford base32 (no I/L/O/U).
        let r = EntityRef::new("ISSUE-0DCT3MKW5T2K0").unwrap();
        assert_eq!(r.prefix(), "ISSUE");
        assert_eq!(r.suffix(), "0DCT3MKW5T2K0");
    }

    #[test]
    fn new_accepts_tsid_suffix_for_decision_record() {
        let r = EntityRef::new("ADR-0DCT4P9X8N3R7").unwrap();
        assert_eq!(r.prefix(), "ADR");
        assert_eq!(r.suffix(), "0DCT4P9X8N3R7");
    }

    #[test]
    fn new_rejects_thirteen_char_invalid_tsid() {
        // 'U' is not in the Crockford alphabet at any position. The suffix
        // is exactly 13 chars and not a number, so both branches reject.
        assert!(EntityRef::new("ISSUE-UUUUUUUUUUUUU").is_err());
    }

    #[test]
    fn numeric_id_for_v4_suffix_returns_full_u64() {
        // The 13-char Crockford suffix encodes 65 bits; the leading char
        // contributes 4 bits (top bit must be zero), the next 12 chars
        // contribute 60 bits. 'F' (= 15) in the leading position yields
        // 15 << 60 = 0xF000_0000_0000_0000, so the full u64 is recovered.
        let r = EntityRef::new("ISSUE-F000000000000").unwrap();
        assert_eq!(r.numeric_id(), 0xF000_0000_0000_0000u64);
    }

    // ── per-version strict parsers ──────────────────────────────────────────

    #[test]
    fn parse_v3_accepts_legacy_numeric_suffix() {
        assert!(EntityRef::parse_v3("ISSUE-0006").is_ok());
        assert!(EntityRef::parse_v3("ADR-0042").is_ok());
    }

    #[test]
    fn parse_v3_rejects_tsid_suffix() {
        let err = EntityRef::parse_v3("ISSUE-0DCT3MKW5T2K0").unwrap_err();
        assert!(
            err.to_string().contains("v3 suffix"),
            "expected v3 suffix error, got: {err}"
        );
    }

    #[test]
    fn parse_v4_accepts_tsid_suffix() {
        assert!(EntityRef::parse_v4("ISSUE-0DCT3MKW5T2K0").is_ok());
    }

    #[test]
    fn parse_v4_rejects_legacy_numeric_suffix() {
        let err = EntityRef::parse_v4("ISSUE-0042").unwrap_err();
        assert!(
            err.to_string().contains("v4 suffix"),
            "expected v4 suffix error, got: {err}"
        );
    }

    #[test]
    fn parse_any_accepts_all_three_shapes() {
        assert!(EntityRef::parse_any("ISSUE-0006").is_ok());
        assert!(EntityRef::parse_any("ISSUE-0DCT3MKW5T2K0").is_ok());
        assert!(EntityRef::parse_any("ISSUE-01J9ZK4T5M8N3QXA7BR2HVPMD0").is_ok());
    }

    #[test]
    fn parse_any_rejects_garbage() {
        assert!(EntityRef::parse_any("ISSUE-abc").is_err());
        assert!(EntityRef::parse_any("ISSUE-UUUUUUUUUUUUU").is_err());
        assert!(EntityRef::parse_any("ISSUE-UUUUUUUUUUUUUUUUUUUUUUUUUU").is_err());
    }

    #[test]
    fn parse_v5_accepts_ulid_suffix() {
        let r = EntityRef::parse_v5("ISSUE-01J9ZK4T5M8N3QXA7BR2HVPMD0").unwrap();
        assert_eq!(r.prefix(), "ISSUE");
        assert_eq!(r.suffix(), "01J9ZK4T5M8N3QXA7BR2HVPMD0");
    }

    #[test]
    fn parse_v5_rejects_tsid_suffix() {
        let err = EntityRef::parse_v5("ISSUE-0DCT3MKW5T2K0").unwrap_err();
        assert!(
            err.to_string().contains("v5 suffix"),
            "expected v5 suffix error, got: {err}"
        );
    }

    #[test]
    fn parse_v5_rejects_overflowing_leading_char() {
        // '8' (= 8) in the leading position sets the top bit of the 5-bit
        // group, so the 130-bit slot would push past u128.
        let err = EntityRef::parse_v5("ISSUE-80000000000000000000000000").unwrap_err();
        assert!(
            err.to_string().contains("v5 suffix"),
            "expected v5 suffix error, got: {err}"
        );
    }

    #[test]
    fn numeric_id_for_v5_suffix_returns_low_u64() {
        // Encoded "00000000000000F000000000000" = 0xF << 60 in the low 64
        // bits of the 128-bit value; numeric_id returns those low 64 bits.
        let r = EntityRef::new("ISSUE-0000000000000F000000000000").unwrap();
        assert_eq!(r.numeric_id(), 0xF000_0000_0000_0000u64);
    }

    proptest! {
        #[test]
        fn prop_strategy_always_produces_valid_refs(r in strategy::entity_ref()) {
            // If the strategy produces it, EntityRef::new must accept it.
            prop_assert!(EntityRef::new(r.as_str()).is_ok());
        }

        #[test]
        fn prop_display_roundtrips(r in strategy::entity_ref()) {
            let s = r.to_string();
            prop_assert_eq!(s.parse::<EntityRef>().unwrap(), r);
        }

        #[test]
        fn prop_prefix_is_uppercase(r in strategy::entity_ref()) {
            prop_assert!(r.prefix().chars().all(|c| c.is_ascii_uppercase()));
        }

        #[test]
        fn prop_numeric_id_is_positive(r in strategy::entity_ref()) {
            prop_assert!(r.numeric_id() > 0);
        }
    }
}