Skip to main content

obj_core/
id.rs

1//! Per-collection document identifier — `Id(NonZeroU64)`.
2//!
3//! See `docs/format.md` § Id allocation for the on-disk contract.
4//! `Id` is the public identifier type every collection hands out for
5//! its documents. The newtype encoding pins three invariants:
6//!
7//! 1. Zero is reserved as the sentinel "no id" value — `Id::try_new(0)`
8//!    returns `None`. This makes `Option<Id>` the same size as `Id`
9//!    and gives the freelist / catalog primitives an unambiguous
10//!    "missing" marker.
11//! 2. Allocation is per-collection monotonic. Two collections can
12//!    independently hand out `Id(1), Id(2), ...` without interference;
13//!    the catalog row for each collection carries its own `next_id`
14//!    watermark.
15//! 3. The serde representation of `Id` is its inner `NonZeroU64`, so
16//!    an `Id` can appear in user `Document` types verbatim and round-
17//!    trip through postcard.
18//!
19//! # Power-of-ten posture
20//!
21//! - **Rule 5.** `NonZeroU64` is the type-level invariant; the
22//!   `try_new` constructor is the runtime boundary.
23//! - **Rule 7.** Wraparound at `u64::MAX` returns
24//!   [`Error::IdSpaceExhausted`] — never a panic.
25
26#![forbid(unsafe_code)]
27
28use core::num::NonZeroU64;
29
30use postcard::experimental::max_size::MaxSize;
31use serde::{Deserialize, Serialize};
32
33use crate::error::{Error, Result};
34
35/// Per-collection document identifier.
36///
37/// Wraps a [`NonZeroU64`] so the on-disk value `0` is unambiguously
38/// "no id". Allocated by the catalog (M5 issue #38) via
39/// `Catalog::next_id` — see that method for the full transactional
40/// contract.
41///
42/// `Id` implements `serde::Serialize + Deserialize + MaxSize` so it
43/// can appear in user `Document` types directly, including inside a
44/// `Vec<Id>` or a nested struct. The serde encoding is the inner
45/// `NonZeroU64` — postcard varint-encodes it, and the deserializer
46/// rejects the on-the-wire value `0` because `NonZeroU64`'s `serde`
47/// impl already does the validation.
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
49#[serde(transparent)]
50pub struct Id(NonZeroU64);
51
52impl Id {
53    /// Construct an [`Id`] from a raw `u64`. Returns `None` if `raw`
54    /// is `0` (per the sentinel-zero contract).
55    #[must_use]
56    pub const fn try_new(raw: u64) -> Option<Self> {
57        match NonZeroU64::new(raw) {
58            Some(nz) => Some(Self(nz)),
59            None => None,
60        }
61    }
62
63    /// Total-function constructor: builds an [`Id`] from a
64    /// [`NonZeroU64`] that the caller already proved is non-zero.
65    /// Use [`try_new`](Self::try_new) at runtime boundaries.
66    #[must_use]
67    pub const fn from_nonzero(nz: NonZeroU64) -> Self {
68        Self(nz)
69    }
70
71    /// The underlying `u64`. Always non-zero.
72    #[must_use]
73    pub const fn get(self) -> u64 {
74        self.0.get()
75    }
76
77    /// The underlying [`NonZeroU64`].
78    #[must_use]
79    pub const fn as_nonzero(self) -> NonZeroU64 {
80        self.0
81    }
82
83    /// Big-endian byte encoding used as a key in a collection's
84    /// primary B-tree. The big-endian shape makes lexicographic
85    /// byte comparison agree with numeric `<` on the underlying
86    /// `u64` — the documented convention from `docs/format.md`
87    /// § Key and value encoding.
88    #[must_use]
89    pub const fn to_be_bytes(self) -> [u8; 8] {
90        self.0.get().to_be_bytes()
91    }
92
93    /// Decode a big-endian `Id` from `bytes`. Returns `None` if the
94    /// byte slice is the wrong length or names the sentinel zero.
95    #[must_use]
96    pub fn from_be_bytes(bytes: &[u8]) -> Option<Self> {
97        let arr: [u8; 8] = bytes.try_into().ok()?;
98        Self::try_new(u64::from_be_bytes(arr))
99    }
100}
101
102impl MaxSize for Id {
103    // postcard encodes a u64 varint at most 10 bytes (7 data bits per
104    // byte; ceil(64 / 7) = 10). `NonZeroU64` shares the same encoding.
105    const POSTCARD_MAX_SIZE: usize = 10;
106}
107
108/// Allocator shim used by the catalog to mint the next `Id` in a
109/// collection.
110///
111/// The full allocator lives in `Catalog::next_id` (M5 issue #38).
112/// This module provides only the **arithmetic step** — incrementing
113/// a `u64` watermark and rejecting wraparound — so the catalog can
114/// be tested in isolation.
115///
116/// The `collection` argument is a closure that builds the owned
117/// collection name on demand, so the happy path never allocates a
118/// `String`. The closure is only invoked on the wraparound /
119/// zero-watermark error path.
120///
121/// # Errors
122///
123/// Returns [`Error::IdSpaceExhausted`] when the increment would
124/// overflow `u64::MAX`. At 10⁹ inserts/sec this is ~584 years; the
125/// check is defensive (Rule 5) and cheap.
126pub fn bump_next_id<F>(next_id: &mut u64, collection: F) -> Result<Id>
127where
128    F: FnOnce() -> String,
129{
130    if *next_id == 0 {
131        // The catalog initialises every collection's `next_id` to
132        // `1` (since `Id` is non-zero by construction). Reading a
133        // zero is a corruption signal — surface it explicitly rather
134        // than silently bumping to `1`.
135        return Err(Error::IdSpaceExhausted {
136            collection: collection(),
137        });
138    }
139    let issued = *next_id;
140    // After issuing `u64::MAX`, the post-state's watermark cannot
141    // advance — we saturate at `u64::MAX` so that the next call
142    // detects "already-issued" via the equality check above plus
143    // `checked_add`. We rewrite to a sentinel zero so the next
144    // call's check trips immediately; that preserves the contract
145    // "exceeding u64::MAX returns IdSpaceExhausted" while still
146    // letting the current call hand out the last legitimate id.
147    *next_id = next_id.checked_add(1).unwrap_or(0);
148    Id::try_new(issued).ok_or_else(|| Error::IdSpaceExhausted {
149        collection: collection(),
150    })
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    #[test]
158    fn try_new_rejects_zero() {
159        assert!(Id::try_new(0).is_none());
160        assert_eq!(Id::try_new(1).map(Id::get), Some(1));
161        assert_eq!(Id::try_new(u64::MAX).map(Id::get), Some(u64::MAX));
162    }
163
164    #[test]
165    fn be_bytes_round_trip() {
166        let id = Id::try_new(0x0102_0304_0506_0708).expect("non-zero");
167        assert_eq!(
168            id.to_be_bytes(),
169            [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]
170        );
171        assert_eq!(Id::from_be_bytes(&id.to_be_bytes()), Some(id));
172        assert_eq!(Id::from_be_bytes(&[0u8; 8]), None);
173        assert_eq!(Id::from_be_bytes(&[0u8; 7]), None);
174    }
175
176    #[test]
177    fn serde_round_trip_via_postcard() {
178        let id = Id::try_new(42).expect("non-zero");
179        let bytes = postcard::to_allocvec(&id).expect("encode");
180        let back: Id = postcard::from_bytes(&bytes).expect("decode");
181        assert_eq!(back, id);
182    }
183
184    #[test]
185    fn serde_rejects_zero_on_decode() {
186        // A wire payload that decodes as a u64 with value 0 must be
187        // rejected because Id wraps NonZeroU64 (which serde rejects
188        // on decode). The byte `0x00` is the varint encoding of 0.
189        let bytes = [0u8];
190        let result = postcard::from_bytes::<Id>(&bytes);
191        assert!(result.is_err(), "zero must be rejected; got {result:?}",);
192    }
193
194    #[test]
195    fn postcard_max_size_constant() {
196        // Spot-check the constant — a varint u64 is at most 10
197        // bytes. The exact value matters for downstream callers
198        // that size buffers off it.
199        assert_eq!(Id::POSTCARD_MAX_SIZE, 10);
200    }
201
202    #[test]
203    fn bump_allocator_advances() {
204        let mut next = 1u64;
205        let id1 = bump_next_id(&mut next, || "test".to_owned()).expect("bump 1");
206        assert_eq!(id1.get(), 1);
207        assert_eq!(next, 2);
208        let id2 = bump_next_id(&mut next, || "test".to_owned()).expect("bump 2");
209        assert_eq!(id2.get(), 2);
210        assert_eq!(next, 3);
211    }
212
213    #[test]
214    fn bump_allocator_detects_wraparound() {
215        let mut next = u64::MAX;
216        let id = bump_next_id(&mut next, || "wrap".to_owned()).expect("last id");
217        assert_eq!(id.get(), u64::MAX);
218        // Now `next` overflowed; the next call must fail.
219        let err = bump_next_id(&mut next, || "wrap".to_owned()).expect_err("wraparound");
220        match err {
221            Error::IdSpaceExhausted { collection } => assert_eq!(collection, "wrap"),
222            other => panic!("expected IdSpaceExhausted, got {other:?}"),
223        }
224    }
225
226    #[test]
227    fn bump_allocator_detects_zero_watermark() {
228        let mut next = 0u64;
229        let err = bump_next_id(&mut next, || "zerowm".to_owned()).expect_err("zero watermark");
230        match err {
231            Error::IdSpaceExhausted { collection } => assert_eq!(collection, "zerowm"),
232            other => panic!("expected IdSpaceExhausted, got {other:?}"),
233        }
234    }
235
236    #[test]
237    fn bump_allocator_error_field_preserves_user_supplied_name() {
238        // Regression test for #42: the `collection` field must carry
239        // the exact `&str` passed in, with no `Box::leak` /
240        // static-storage trick — i.e. it should be an owned `String`
241        // built from the user's input.
242        let mut next = 0u64;
243        let user_input = String::from("dynamically built name");
244        let err = bump_next_id(&mut next, || user_input.clone()).expect_err("error");
245        match err {
246            Error::IdSpaceExhausted { collection } => {
247                assert_eq!(collection, "dynamically built name");
248            }
249            other => panic!("expected IdSpaceExhausted, got {other:?}"),
250        }
251    }
252}