Skip to main content

mnem_core/id/
stable.rs

1//! Stable identifiers: `NodeId`, `EdgeId`, `ChangeId`, `OperationId`.
2//!
3//! Per SPEC §2.3 and , every persistent entity carries a 16-byte
4//! (128-bit) stable identifier that survives content edits, rewrites, and
5//! rebases. The identifiers are distinguished at the type level to prevent
6//! mixing a `NodeId` into a field expecting an `EdgeId`.
7//!
8//! All four use `UUIDv7` (RFC 9562, finalized May 2024) as the default
9//! generator. Implementations MAY substitute other time-ordered,
10//! collision-resistant 128-bit values.
11//!
12//! ## Generator entropy
13//!
14//! `UUIDv7` relies on 74 bits of random data to disambiguate IDs produced in
15//! the same millisecond. `uuid`'s `Uuid::now_v7()` uses the thread-local
16//! OS CSPRNG. Applications requiring stronger entropy guarantees
17//! (multi-tenant servers, high-throughput agent fleets) should pass their
18//! own `OsRng`-sourced seeds via [`StableId::from_random_bytes`] when
19//! constructing IDs explicitly.
20
21use core::fmt;
22use core::marker::PhantomData;
23
24use serde::de::{self, Visitor};
25use serde::{Deserialize, Deserializer, Serialize, Serializer};
26use uuid::Uuid;
27
28use crate::error::IdError;
29
30/// Phantom-typed tag distinguishing the four stable-ID roles.
31///
32/// The generic parameter is never materialized; only a type tag.
33pub trait StableIdKind: sealed::Sealed + 'static {
34    /// Short tag used in `Debug` output.
35    const TAG: &'static str;
36}
37
38mod sealed {
39    pub trait Sealed {}
40}
41
42/// Tag for [`NodeId`].
43#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
44pub struct NodeTag;
45impl sealed::Sealed for NodeTag {}
46impl StableIdKind for NodeTag {
47    const TAG: &'static str = "node";
48}
49
50/// Tag for [`EdgeId`].
51#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
52pub struct EdgeTag;
53impl sealed::Sealed for EdgeTag {}
54impl StableIdKind for EdgeTag {
55    const TAG: &'static str = "edge";
56}
57
58/// Tag for [`ChangeId`].
59#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
60pub struct ChangeTag;
61impl sealed::Sealed for ChangeTag {}
62impl StableIdKind for ChangeTag {
63    const TAG: &'static str = "change";
64}
65
66/// Tag for [`OperationId`].
67#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
68pub struct OperationTag;
69impl sealed::Sealed for OperationTag {}
70impl StableIdKind for OperationTag {
71    const TAG: &'static str = "op";
72}
73
74/// A 16-byte stable identifier, parameterized by role tag.
75///
76/// Construct via [`StableId::new_v7`] for fresh IDs or
77/// [`StableId::from_bytes`] to rehydrate from canonical-encoded form.
78///
79/// Two `StableId` values with different role tags are distinct types and
80/// cannot be compared or converted without an explicit crossing.
81// The tag types are zero-sized and derive all the needed comparison/hash
82// traits, so derives on `StableId<Kind>` propagate cleanly. Only the `bytes`
83// field participates materially in Eq/Ord/Hash; the tag is a compile-time
84// marker with no runtime presence.
85//
86// Serde is implemented manually (not derived) so that serialization uses
87// `serialize_bytes`, which DAG-CBOR encodes as a major-type-2 byte string
88// (17 bytes on the wire: 1 header byte + 16 data bytes). The default
89// `#[derive(Serialize)]` on `[u8; 16]` emits a CBOR array, which would
90// violate SPEC §4.1 ("stable identifiers MUST be encoded as 16-byte byte
91// strings, never as arrays").
92#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
93pub struct StableId<Kind: StableIdKind> {
94    bytes: [u8; 16],
95    _tag: PhantomData<fn() -> Kind>,
96}
97
98impl<Kind: StableIdKind> Serialize for StableId<Kind> {
99    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
100        serializer.serialize_bytes(&self.bytes)
101    }
102}
103
104struct StableIdVisitor<Kind>(PhantomData<fn() -> Kind>);
105
106impl<'de, Kind: StableIdKind> Visitor<'de> for StableIdVisitor<Kind> {
107    type Value = StableId<Kind>;
108
109    fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110        f.write_str("a 16-byte stable-id byte string")
111    }
112
113    fn visit_bytes<E: de::Error>(self, v: &[u8]) -> Result<Self::Value, E> {
114        if v.len() != 16 {
115            return Err(E::invalid_length(v.len(), &"16"));
116        }
117        let mut arr = [0u8; 16];
118        arr.copy_from_slice(v);
119        Ok(StableId::from_bytes_raw(arr))
120    }
121
122    fn visit_borrowed_bytes<E: de::Error>(self, v: &'de [u8]) -> Result<Self::Value, E> {
123        self.visit_bytes(v)
124    }
125
126    fn visit_byte_buf<E: de::Error>(self, v: Vec<u8>) -> Result<Self::Value, E> {
127        self.visit_bytes(&v)
128    }
129}
130
131impl<'de, Kind: StableIdKind> Deserialize<'de> for StableId<Kind> {
132    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
133        deserializer.deserialize_bytes(StableIdVisitor::<Kind>(PhantomData))
134    }
135}
136
137impl<Kind: StableIdKind> StableId<Kind> {
138    /// Generate a fresh `UUIDv7` stable ID.
139    ///
140    /// Uses the `uuid` crate's `Uuid::now_v7()`, which combines the current
141    /// Unix timestamp (48 bits, millisecond resolution) with 74 bits of
142    /// CSPRNG entropy. See RFC 9562.
143    #[must_use]
144    pub fn new_v7() -> Self {
145        Self::from_bytes_raw(*Uuid::now_v7().as_bytes())
146    }
147
148    /// Construct from an explicit 16-byte array. The bytes are not validated
149    /// beyond their length - any 128-bit value is accepted. Callers who
150    /// want UUID shape validation should use [`StableId::parse_uuid`].
151    #[must_use]
152    pub const fn from_bytes_raw(bytes: [u8; 16]) -> Self {
153        Self {
154            bytes,
155            _tag: PhantomData,
156        }
157    }
158
159    /// Construct from a byte slice of length 16, validating the length.
160    ///
161    /// # Errors
162    ///
163    /// Returns [`IdError::StableIdLength`] if `bytes.len() != 16`.
164    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdError> {
165        let arr: [u8; 16] = bytes
166            .try_into()
167            .map_err(|_| IdError::StableIdLength { got: bytes.len() })?;
168        Ok(Self::from_bytes_raw(arr))
169    }
170
171    /// Parse from a canonical UUID string (`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`).
172    ///
173    /// # Errors
174    ///
175    /// Returns [`IdError::StableIdParse`] if `s` is not a valid UUID.
176    pub fn parse_uuid(s: &str) -> Result<Self, IdError> {
177        Uuid::parse_str(s)
178            .map(|u| Self::from_bytes_raw(*u.as_bytes()))
179            .map_err(|source| IdError::StableIdParse { source })
180    }
181
182    /// Construct from 16 random bytes without UUID structure. Useful when
183    /// the caller has already generated cryptographically random bytes and
184    /// does not need `UUIDv7`'s time ordering. The value will not validate
185    /// as a `UUIDv7` but will still function as a stable ID in mnem.
186    #[must_use]
187    pub const fn from_random_bytes(bytes: [u8; 16]) -> Self {
188        Self::from_bytes_raw(bytes)
189    }
190
191    /// Borrow as a byte slice.
192    #[must_use]
193    pub const fn as_bytes(&self) -> &[u8; 16] {
194        &self.bytes
195    }
196
197    /// Consume and return the underlying 16 bytes.
198    #[must_use]
199    pub const fn into_bytes(self) -> [u8; 16] {
200        self.bytes
201    }
202
203    /// Render as a lowercase hyphenated UUID string.
204    #[must_use]
205    pub fn to_uuid_string(&self) -> String {
206        Uuid::from_bytes(self.bytes).hyphenated().to_string()
207    }
208}
209
210impl<Kind: StableIdKind> fmt::Debug for StableId<Kind> {
211    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
212        write!(f, "{}({})", Kind::TAG, self.to_uuid_string())
213    }
214}
215
216impl<Kind: StableIdKind> fmt::Display for StableId<Kind> {
217    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218        f.write_str(&self.to_uuid_string())
219    }
220}
221
222/// Stable identifier of a Node (SPEC §4.1, §2.3).
223pub type NodeId = StableId<NodeTag>;
224
225/// Stable identifier of an Edge (SPEC §4.2, §2.3).
226pub type EdgeId = StableId<EdgeTag>;
227
228/// Stable identifier of a Commit's logical change (SPEC §4.4, §2.3).
229///
230/// Survives rebase, amend, and squash, unlike the commit's content-addressed
231/// CID which changes on every rewrite.
232pub type ChangeId = StableId<ChangeTag>;
233
234/// Stable identifier of an Operation (SPEC §4.5, §2.3).
235///
236/// Note: by convention the `OperationId` equals the content hash of the
237/// Operation object because Operations are immutable by design. The
238/// `StableId<OperationTag>` wrapper lets the rest of the API treat all four
239/// role-tagged IDs uniformly; construction from a content hash happens in
240/// a later module (`mnem-core::objects::operation`).
241pub type OperationId = StableId<OperationTag>;
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    #[test]
248    fn stable_ids_have_distinct_types() {
249        // This file would fail to compile if we tried to assign a NodeId to an
250        // EdgeId, which is the invariant we care about. Here we just sanity-check
251        // that construction and serialization round-trip.
252        let n = NodeId::new_v7();
253        let bytes = *n.as_bytes();
254        let n2 = NodeId::from_bytes(&bytes).expect("16 bytes");
255        assert_eq!(n, n2);
256    }
257
258    #[test]
259    fn stable_id_debug_shows_kind_tag() {
260        let n = NodeId::new_v7();
261        let s = format!("{n:?}");
262        assert!(
263            s.starts_with("node("),
264            "debug repr begins with kind tag: {s}"
265        );
266    }
267
268    #[test]
269    fn wrong_length_rejected() {
270        let err = NodeId::from_bytes(&[0u8; 8]).unwrap_err();
271        match err {
272            IdError::StableIdLength { got } => assert_eq!(got, 8),
273            e => panic!("wrong variant: {e:?}"),
274        }
275    }
276
277    #[test]
278    fn uuid_string_roundtrip() {
279        let n = NodeId::new_v7();
280        let s = n.to_uuid_string();
281        let parsed = NodeId::parse_uuid(&s).expect("valid uuid");
282        assert_eq!(n, parsed);
283    }
284
285    #[test]
286    fn uuidv7_is_time_ordered_within_a_ms() {
287        // UUIDv7 is time-ordered at 1ms resolution; two IDs generated
288        // in the same ms are NOT strictly ordered, but the batch IS.
289        let mut ids: Vec<NodeId> = (0..32).map(|_| NodeId::new_v7()).collect();
290        let sorted = {
291            let mut c = ids.clone();
292            c.sort();
293            c
294        };
295        // Not asserting strict equality because rapid-fire creation within a
296        // millisecond allows tie-breaking by random tail. Instead, assert
297        // the range of timestamps in the sorted form is non-decreasing.
298        for window in sorted.windows(2) {
299            let a = &window[0].as_bytes()[0..6];
300            let b = &window[1].as_bytes()[0..6];
301            assert!(
302                a <= b,
303                "UUIDv7 timestamp prefix non-monotonic: {a:?} > {b:?}"
304            );
305        }
306        ids.clear();
307    }
308}