Skip to main content

mnem_core/prolly/
constants.rs

1//! Frozen constants + newtype key for the Prolly tree.
2//!
3//! The values here are part of the mnem format and MUST NOT drift across
4//! implementations. A change to any of them is a wire-format-breaking
5//! change that requires a `mnem/N+1` version bump and a migration story.
6
7use core::fmt;
8
9use serde::de::{self, Visitor};
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11
12use crate::id::{ChangeId, EdgeId, NodeId};
13
14/// Width in bytes of a Prolly tree key.
15///
16/// 16 bytes = 128 bits, matching the stable-ID width used throughout mnem
17/// (`NodeId`, `EdgeId`, `ChangeId`; see SPEC §2.3 and ).
18pub const PROLLY_KEY_BYTES: usize = 16;
19
20/// A 16-byte Prolly tree key - the unit the chunker and tree operate on.
21///
22/// This is a newtype over `[u8; 16]` with custom Serde impls that emit the
23/// value as a CBOR byte string (major type 2) per SPEC §4.3. The default
24/// serde derive would emit a CBOR array of sixteen `u8` integers, which
25/// is incorrect for the mnem canonical form.
26///
27/// All four stable IDs (`NodeId`, `EdgeId`, `ChangeId`, `OperationId`)
28/// convert into `ProllyKey` via `From`. Construct raw keys with
29/// [`ProllyKey::new`] or the tuple literal `ProllyKey([u8; 16])`.
30#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
31pub struct ProllyKey(pub [u8; PROLLY_KEY_BYTES]);
32
33impl ProllyKey {
34    /// Wrap raw bytes.
35    #[must_use]
36    pub const fn new(bytes: [u8; PROLLY_KEY_BYTES]) -> Self {
37        Self(bytes)
38    }
39
40    /// Borrow the underlying bytes.
41    #[must_use]
42    pub const fn as_bytes(&self) -> &[u8; PROLLY_KEY_BYTES] {
43        &self.0
44    }
45
46    /// Extract the underlying bytes.
47    #[must_use]
48    pub const fn into_bytes(self) -> [u8; PROLLY_KEY_BYTES] {
49        self.0
50    }
51}
52
53impl fmt::Debug for ProllyKey {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        write!(f, "ProllyKey(")?;
56        for b in &self.0 {
57            write!(f, "{b:02x}")?;
58        }
59        f.write_str(")")
60    }
61}
62
63// ---------- Stable-ID conversions ----------
64
65impl From<NodeId> for ProllyKey {
66    fn from(v: NodeId) -> Self {
67        Self(v.into_bytes())
68    }
69}
70
71impl From<EdgeId> for ProllyKey {
72    fn from(v: EdgeId) -> Self {
73        Self(v.into_bytes())
74    }
75}
76
77impl From<ChangeId> for ProllyKey {
78    fn from(v: ChangeId) -> Self {
79        Self(v.into_bytes())
80    }
81}
82
83// ---------- Serde (byte-string wire form) ----------
84
85impl Serialize for ProllyKey {
86    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
87        serializer.serialize_bytes(&self.0)
88    }
89}
90
91struct ProllyKeyVisitor;
92
93impl<'de> Visitor<'de> for ProllyKeyVisitor {
94    type Value = ProllyKey;
95
96    fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97        f.write_str("a 16-byte prolly key byte string")
98    }
99
100    fn visit_bytes<E: de::Error>(self, v: &[u8]) -> Result<Self::Value, E> {
101        if v.len() != PROLLY_KEY_BYTES {
102            return Err(E::invalid_length(v.len(), &"16"));
103        }
104        let mut arr = [0u8; PROLLY_KEY_BYTES];
105        arr.copy_from_slice(v);
106        Ok(ProllyKey(arr))
107    }
108
109    fn visit_borrowed_bytes<E: de::Error>(self, v: &'de [u8]) -> Result<Self::Value, E> {
110        self.visit_bytes(v)
111    }
112
113    fn visit_byte_buf<E: de::Error>(self, v: Vec<u8>) -> Result<Self::Value, E> {
114        self.visit_bytes(&v)
115    }
116}
117
118impl<'de> Deserialize<'de> for ProllyKey {
119    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
120        deserializer.deserialize_bytes(ProllyKeyVisitor)
121    }
122}
123
124// ---------- Rolling-hash / chunk-size constants ----------
125
126/// Size of the rolling-hash window. SPEC §5.2: 64 bytes = four keys.
127pub const ROLLING_WINDOW_BYTES: usize = 64;
128
129/// The 32-byte BLAKE3 keyed-hash key used by the rolling hash.
130///
131/// Fixed for `mnem/0.1`. MUST NOT be changed within this format version.
132/// Bytes are the ASCII literal `"mnem-prolly-rh-1"` (16 bytes) followed
133/// by 16 zero bytes (padding to BLAKE3's 32-byte key size).
134///
135/// See SPEC §5.2.
136pub const ROLLING_KEY: [u8; 32] = [
137    0x6d, 0x6e, 0x65, 0x6d, // 'm', 'n', 'e', 'm'
138    0x2d, 0x70, 0x72, 0x6f, // '-', 'p', 'r', 'o'
139    0x6c, 0x6c, 0x79, 0x2d, // 'l', 'l', 'y', '-'
140    0x72, 0x68, 0x2d, 0x31, // 'r', 'h', '-', '1'
141    0x00, 0x00, 0x00, 0x00, //  zero padding
142    0x00, 0x00, 0x00, 0x00, //
143    0x00, 0x00, 0x00, 0x00, //
144    0x00, 0x00, 0x00, 0x00, //
145];
146
147/// Hard minimum entries per chunk. Below this, a boundary MUST NOT fire.
148pub const MIN_ENTRIES_PER_CHUNK: usize = 16;
149
150/// Target average entries per chunk. ~4 KiB on typical mnem payloads
151/// (16-byte key + ~40-byte link + ~8-byte CBOR framing per entry).
152pub const TARGET_AVG_ENTRIES_PER_CHUNK: usize = 64;
153
154/// Hard maximum entries per chunk. At this count the chunker MUST
155/// emit a boundary regardless of hash.
156pub const MAX_ENTRIES_PER_CHUNK: usize = 512;
157
158/// Boundary threshold for the rolling hash - integer form.
159///
160/// When `entries_in_chunk` is strictly between [`MIN_ENTRIES_PER_CHUNK`]
161/// and [`MAX_ENTRIES_PER_CHUNK`], the chunker emits a boundary whenever
162/// the 64-bit rolling hash value is `< THRESHOLD`.
163///
164/// Chosen so the per-entry boundary probability is `~1/48`, yielding an
165/// expected chunk size of `MIN + 48 = 64` entries which matches
166/// [`TARGET_AVG_ENTRIES_PER_CHUNK`].
167pub const THRESHOLD: u64 = u64::MAX / 48;
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172    use crate::codec::{from_canonical_bytes, to_canonical_bytes};
173
174    #[test]
175    fn rolling_key_is_ascii_prefix_plus_zeros() {
176        assert_eq!(&ROLLING_KEY[..16], b"mnem-prolly-rh-1");
177        assert!(ROLLING_KEY[16..].iter().all(|&b| b == 0));
178    }
179
180    #[test]
181    fn threshold_probability_is_about_one_in_48() {
182        let ratio = THRESHOLD as f64 / u64::MAX as f64;
183        assert!((ratio - 1.0 / 48.0).abs() < 1e-9, "ratio was {ratio}");
184    }
185
186    #[test]
187    fn bounds_are_ordered() {
188        assert!(MIN_ENTRIES_PER_CHUNK < TARGET_AVG_ENTRIES_PER_CHUNK);
189        assert!(TARGET_AVG_ENTRIES_PER_CHUNK < MAX_ENTRIES_PER_CHUNK);
190    }
191
192    #[test]
193    fn prolly_key_cbor_round_trip_as_byte_string() {
194        let original = ProllyKey([0xAB; PROLLY_KEY_BYTES]);
195        let bytes = to_canonical_bytes(&original).expect("encode");
196        // Header byte 0x50 = major-type-2 (byte string), length 16.
197        assert_eq!(bytes[0], 0x50, "expected CBOR byte-string-16 prefix");
198        assert_eq!(bytes.len(), 17);
199        let decoded: ProllyKey = from_canonical_bytes(&bytes).expect("decode");
200        assert_eq!(original, decoded);
201    }
202
203    #[test]
204    fn node_id_converts_to_prolly_key() {
205        let n = NodeId::from_bytes_raw([7u8; 16]);
206        let k: ProllyKey = n.into();
207        assert_eq!(k.as_bytes(), &[7u8; 16]);
208    }
209}