obj_core/id.rs
1//! Per-collection document identifier — `Id(NonZeroU64)`.
2//!
3//! See `docs/format.md` § Id allocation for the on-disk contract.
4//! `Id` is the public identifier type every collection hands out for
5//! its documents. The newtype encoding pins three invariants:
6//!
7//! 1. Zero is reserved as the sentinel "no id" value — `Id::try_new(0)`
8//! returns `None`. This makes `Option<Id>` the same size as `Id`
9//! and gives the freelist / catalog primitives an unambiguous
10//! "missing" marker.
11//! 2. Allocation is per-collection monotonic. Two collections can
12//! independently hand out `Id(1), Id(2), ...` without interference;
13//! the catalog row for each collection carries its own `next_id`
14//! watermark.
15//! 3. The serde representation of `Id` is its inner `NonZeroU64`, so
16//! an `Id` can appear in user `Document` types verbatim and round-
17//! trip through postcard.
18//!
19//! # Power-of-ten posture
20//!
21//! - **Rule 5.** `NonZeroU64` is the type-level invariant; the
22//! `try_new` constructor is the runtime boundary.
23//! - **Rule 7.** Wraparound at `u64::MAX` returns
24//! [`Error::IdSpaceExhausted`] — never a panic.
25
26#![forbid(unsafe_code)]
27
28use core::num::NonZeroU64;
29
30use postcard::experimental::max_size::MaxSize;
31use serde::{Deserialize, Serialize};
32
33use crate::error::{Error, Result};
34
35/// Per-collection document identifier.
36///
37/// Wraps a [`NonZeroU64`] so the on-disk value `0` is unambiguously
38/// "no id". Allocated by the catalog (M5 issue #38) via
39/// `Catalog::next_id` — see that method for the full transactional
40/// contract.
41///
42/// `Id` implements `serde::Serialize + Deserialize + MaxSize` so it
43/// can appear in user `Document` types directly, including inside a
44/// `Vec<Id>` or a nested struct. The serde encoding is the inner
45/// `NonZeroU64` — postcard varint-encodes it, and the deserializer
46/// rejects the on-the-wire value `0` because `NonZeroU64`'s `serde`
47/// impl already does the validation.
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
49#[serde(transparent)]
50pub struct Id(NonZeroU64);
51
52impl Id {
53 /// Construct an [`Id`] from a raw `u64`. Returns `None` if `raw`
54 /// is `0` (per the sentinel-zero contract).
55 #[must_use]
56 pub const fn try_new(raw: u64) -> Option<Self> {
57 match NonZeroU64::new(raw) {
58 Some(nz) => Some(Self(nz)),
59 None => None,
60 }
61 }
62
63 /// Total-function constructor: builds an [`Id`] from a
64 /// [`NonZeroU64`] that the caller already proved is non-zero.
65 /// Use [`try_new`](Self::try_new) at runtime boundaries.
66 #[must_use]
67 pub const fn from_nonzero(nz: NonZeroU64) -> Self {
68 Self(nz)
69 }
70
71 /// The underlying `u64`. Always non-zero.
72 // #87: trivial accessor on the read/maint hot path, used across
73 // the obj-db → obj-core boundary; inline to elide the call.
74 #[inline]
75 #[must_use]
76 pub const fn get(self) -> u64 {
77 self.0.get()
78 }
79
80 /// The underlying [`NonZeroU64`].
81 #[must_use]
82 pub const fn as_nonzero(self) -> NonZeroU64 {
83 self.0
84 }
85
86 /// Big-endian byte encoding used as a key in a collection's
87 /// primary B-tree. The big-endian shape makes lexicographic
88 /// byte comparison agree with numeric `<` on the underlying
89 /// `u64` — the documented convention from `docs/format.md`
90 /// § Key and value encoding.
91 // #87: trivial one-liner on the read hot path, called across the
92 // obj-db → obj-core crate boundary (e.g. `collection.rs`); inline
93 // so the call cannot survive LTO as a real function call.
94 #[inline]
95 #[must_use]
96 pub const fn to_be_bytes(self) -> [u8; 8] {
97 self.0.get().to_be_bytes()
98 }
99
100 /// Decode a big-endian `Id` from `bytes`. Returns `None` if the
101 /// byte slice is the wrong length or names the sentinel zero.
102 // #87: trivial cross-crate decode on the read hot path
103 // (`collection.rs`, `db.rs`); inline to elide the boundary call.
104 #[inline]
105 #[must_use]
106 pub fn from_be_bytes(bytes: &[u8]) -> Option<Self> {
107 let arr: [u8; 8] = bytes.try_into().ok()?;
108 Self::try_new(u64::from_be_bytes(arr))
109 }
110}
111
112impl MaxSize for Id {
113 // postcard encodes a u64 varint at most 10 bytes (7 data bits per
114 // byte; ceil(64 / 7) = 10). `NonZeroU64` shares the same encoding.
115 const POSTCARD_MAX_SIZE: usize = 10;
116}
117
118/// Allocator shim used by the catalog to mint the next `Id` in a
119/// collection.
120///
121/// The full allocator lives in `Catalog::next_id` (M5 issue #38).
122/// This module provides only the **arithmetic step** — incrementing
123/// a `u64` watermark and rejecting wraparound — so the catalog can
124/// be tested in isolation.
125///
126/// The `collection` argument is a closure that builds the owned
127/// collection name on demand, so the happy path never allocates a
128/// `String`. The closure is only invoked on the wraparound /
129/// zero-watermark error path.
130///
131/// # Errors
132///
133/// Returns [`Error::IdSpaceExhausted`] when the increment would
134/// overflow `u64::MAX`. At 10⁹ inserts/sec this is ~584 years; the
135/// check is defensive (Rule 5) and cheap.
136pub fn bump_next_id<F>(next_id: &mut u64, collection: F) -> Result<Id>
137where
138 F: FnOnce() -> String,
139{
140 if *next_id == 0 {
141 // The catalog initialises every collection's `next_id` to
142 // `1` (since `Id` is non-zero by construction). Reading a
143 // zero is a corruption signal — surface it explicitly rather
144 // than silently bumping to `1`.
145 return Err(Error::IdSpaceExhausted {
146 collection: collection(),
147 });
148 }
149 let issued = *next_id;
150 // After issuing `u64::MAX`, the post-state's watermark cannot
151 // advance — we saturate at `u64::MAX` so that the next call
152 // detects "already-issued" via the equality check above plus
153 // `checked_add`. We rewrite to a sentinel zero so the next
154 // call's check trips immediately; that preserves the contract
155 // "exceeding u64::MAX returns IdSpaceExhausted" while still
156 // letting the current call hand out the last legitimate id.
157 *next_id = next_id.checked_add(1).unwrap_or(0);
158 Id::try_new(issued).ok_or_else(|| Error::IdSpaceExhausted {
159 collection: collection(),
160 })
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166
167 #[test]
168 fn try_new_rejects_zero() {
169 assert!(Id::try_new(0).is_none());
170 assert_eq!(Id::try_new(1).map(Id::get), Some(1));
171 assert_eq!(Id::try_new(u64::MAX).map(Id::get), Some(u64::MAX));
172 }
173
174 #[test]
175 fn be_bytes_round_trip() {
176 let id = Id::try_new(0x0102_0304_0506_0708).expect("non-zero");
177 assert_eq!(
178 id.to_be_bytes(),
179 [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]
180 );
181 assert_eq!(Id::from_be_bytes(&id.to_be_bytes()), Some(id));
182 assert_eq!(Id::from_be_bytes(&[0u8; 8]), None);
183 assert_eq!(Id::from_be_bytes(&[0u8; 7]), None);
184 }
185
186 #[test]
187 fn serde_round_trip_via_postcard() {
188 let id = Id::try_new(42).expect("non-zero");
189 let bytes = postcard::to_allocvec(&id).expect("encode");
190 let back: Id = postcard::from_bytes(&bytes).expect("decode");
191 assert_eq!(back, id);
192 }
193
194 #[test]
195 fn serde_rejects_zero_on_decode() {
196 // A wire payload that decodes as a u64 with value 0 must be
197 // rejected because Id wraps NonZeroU64 (which serde rejects
198 // on decode). The byte `0x00` is the varint encoding of 0.
199 let bytes = [0u8];
200 let result = postcard::from_bytes::<Id>(&bytes);
201 assert!(result.is_err(), "zero must be rejected; got {result:?}");
202 }
203
204 #[test]
205 fn postcard_max_size_constant() {
206 // Spot-check the constant — a varint u64 is at most 10
207 // bytes. The exact value matters for downstream callers
208 // that size buffers off it.
209 assert_eq!(Id::POSTCARD_MAX_SIZE, 10);
210 }
211
212 #[test]
213 fn bump_allocator_advances() {
214 let mut next = 1u64;
215 let id1 = bump_next_id(&mut next, || "test".to_owned()).expect("bump 1");
216 assert_eq!(id1.get(), 1);
217 assert_eq!(next, 2);
218 let id2 = bump_next_id(&mut next, || "test".to_owned()).expect("bump 2");
219 assert_eq!(id2.get(), 2);
220 assert_eq!(next, 3);
221 }
222
223 #[test]
224 fn bump_allocator_detects_wraparound() {
225 let mut next = u64::MAX;
226 let id = bump_next_id(&mut next, || "wrap".to_owned()).expect("last id");
227 assert_eq!(id.get(), u64::MAX);
228 // Now `next` overflowed; the next call must fail.
229 let err = bump_next_id(&mut next, || "wrap".to_owned()).expect_err("wraparound");
230 match err {
231 Error::IdSpaceExhausted { collection } => assert_eq!(collection, "wrap"),
232 other => panic!("expected IdSpaceExhausted, got {other:?}"),
233 }
234 }
235
236 #[test]
237 fn bump_allocator_detects_zero_watermark() {
238 let mut next = 0u64;
239 let err = bump_next_id(&mut next, || "zerowm".to_owned()).expect_err("zero watermark");
240 match err {
241 Error::IdSpaceExhausted { collection } => assert_eq!(collection, "zerowm"),
242 other => panic!("expected IdSpaceExhausted, got {other:?}"),
243 }
244 }
245
246 #[test]
247 fn bump_allocator_error_field_preserves_user_supplied_name() {
248 // Regression test for #42: the `collection` field must carry
249 // the exact `&str` passed in, with no `Box::leak` /
250 // static-storage trick — i.e. it should be an owned `String`
251 // built from the user's input.
252 let mut next = 0u64;
253 let user_input = String::from("dynamically built name");
254 let err = bump_next_id(&mut next, || user_input.clone()).expect_err("error");
255 match err {
256 Error::IdSpaceExhausted { collection } => {
257 assert_eq!(collection, "dynamically built name");
258 }
259 other => panic!("expected IdSpaceExhausted, got {other:?}"),
260 }
261 }
262}