metamorphic_log/leaf/mod.rs
1//! Layer-0: canonical leaf encoding and content hashing.
2//!
3//! A transparency-log leaf is **opaque, app-defined record bytes**. Layer 1
4//! (the Merkle tree, [`crate::merkle`]) treats them as a byte string and never
5//! inspects their structure, so an application's canonical record drops in as a
6//! leaf with *zero reformatting*.
7//!
8//! This module provides:
9//!
10//! 1. [`ContextLabel`] — the versioned `<namespace>/<record-type>/v<N>` domain
11//! separator used by the intra-chain content hash. Each application chooses
12//! its own namespace and record types (e.g. `"acme/user-keys/v1"`,
13//! `"example-app/audit-event/v2"`). The label lives *inside* the content
14//! hash (it never touches Layer 1's tile mechanics), giving cross-protocol /
15//! cross-context separation while keeping the Merkle layer label-agnostic
16//! (#299 / #290).
17//!
18//! 2. [`content_hash`] — the generic intra-chain leaf-content hash,
19//! `sha3_512_with_context(label, content)` from
20//! [`metamorphic_crypto`](crate). This is the per-identity continuity
21//! linkage; it is **independent** from, and must not be confused with, the
22//! RFC 6962 Merkle leaf hash ([`crate::merkle::hash_leaf`]). The same leaf
23//! bytes feed both linkages without reformatting either.
24//!
25//! 3. [`key_history_v1`] — a worked, byte-exact **example/conformance instance**
26//! of an application record type. It is not privileged by the engine; it is
27//! simply the first real-world consumer's leaf shape (Mosslet's signed
28//! key-history, `assets/js/crypto/key_history.js`, locked by
29//! `test/mosslet/crypto/key_history_test.exs`) and the seed of the
30//! cross-language KAT suite (#315 / #299). Any other application defines its
31//! own record type the same way, against this same fixed byte discipline.
32//!
33//! ## Byte-layout discipline (fixed, audited — version-bump-or-nothing)
34//!
35//! All canonical encodings in this crate use a single, fixed discipline so that
36//! independent witnesses and cross-language SDKs recompute byte-for-byte:
37//!
38//! - integers are **big-endian** (`u32` / `u64`),
39//! - variable-length fields are **`u32`-be length-prefixed** (`lp(x) =
40//! u32_be(len(x)) || x`),
41//! - the layout is never reordered; a change is a new version label, never a
42//! silent reinterpretation.
43
44use crate::error::{Error, Result};
45
46/// A validated, versioned context label of the form
47/// `<namespace>/<record-type>/v<N>`.
48///
49/// Used as the SHA3-512 domain separator for [`content_hash`]. Each consuming
50/// application picks its own namespace and record-type segments; the grammar is
51/// deliberately small and strict so labels are unambiguous across tenants and
52/// versions:
53///
54/// - exactly three `/`-separated, non-empty segments,
55/// - the third segment is `v` followed by one or more ASCII digits (no leading
56/// zero unless the version is literally `0`),
57/// - all characters are printable ASCII excluding `/` within a segment.
58///
59/// ```
60/// use metamorphic_log::leaf::ContextLabel;
61///
62/// // Any application defines its own namespace/record-type/version.
63/// let label = ContextLabel::parse("acme/user-keys/v1").unwrap();
64/// assert_eq!(label.as_str(), "acme/user-keys/v1");
65/// assert_eq!(label.namespace(), "acme");
66/// assert_eq!(label.record_type(), "user-keys");
67/// assert_eq!(label.version(), 1);
68///
69/// assert!(ContextLabel::parse("missing/version").is_err());
70/// assert!(ContextLabel::parse("a/b/v01").is_err()); // no leading zeros
71/// ```
72#[derive(Debug, Clone, PartialEq, Eq, Hash)]
73pub struct ContextLabel {
74 label: String,
75 namespace_len: usize,
76 record_type_len: usize,
77 version: u64,
78}
79
80impl ContextLabel {
81 /// Parse and validate a `<namespace>/<record-type>/v<N>` label.
82 ///
83 /// # Errors
84 /// Returns [`Error::MalformedLeaf`] if the label does not match the grammar.
85 pub fn parse(label: &str) -> Result<Self> {
86 let mut parts = label.split('/');
87 let namespace = parts.next().unwrap_or("");
88 let record_type = parts.next().unwrap_or("");
89 let version_seg = parts.next().unwrap_or("");
90 if parts.next().is_some() {
91 return Err(Error::MalformedLeaf(format!(
92 "context label has too many '/'-segments: {label:?}"
93 )));
94 }
95
96 let valid_segment =
97 |s: &str| !s.is_empty() && s.bytes().all(|b| b.is_ascii_graphic() && b != b'/');
98 if !valid_segment(namespace) || !valid_segment(record_type) {
99 return Err(Error::MalformedLeaf(format!(
100 "context label segments must be non-empty printable ASCII: {label:?}"
101 )));
102 }
103
104 let digits = version_seg.strip_prefix('v').ok_or_else(|| {
105 Error::MalformedLeaf(format!(
106 "context label version must start with 'v': {label:?}"
107 ))
108 })?;
109 if digits.is_empty() || !digits.bytes().all(|b| b.is_ascii_digit()) {
110 return Err(Error::MalformedLeaf(format!(
111 "context label version must be 'v' followed by digits: {label:?}"
112 )));
113 }
114 if digits.len() > 1 && digits.starts_with('0') {
115 return Err(Error::MalformedLeaf(format!(
116 "context label version must not have leading zeros: {label:?}"
117 )));
118 }
119 let version: u64 = digits.parse().map_err(|_| {
120 Error::MalformedLeaf(format!("context label version overflow: {label:?}"))
121 })?;
122
123 Ok(Self {
124 label: label.to_string(),
125 namespace_len: namespace.len(),
126 record_type_len: record_type.len(),
127 version,
128 })
129 }
130
131 /// The full label string, e.g. `"acme/user-keys/v1"`.
132 #[must_use]
133 pub fn as_str(&self) -> &str {
134 &self.label
135 }
136
137 /// The namespace segment, e.g. `"acme"`.
138 #[must_use]
139 pub fn namespace(&self) -> &str {
140 &self.label[..self.namespace_len]
141 }
142
143 /// The record-type segment, e.g. `"user-keys"`.
144 #[must_use]
145 pub fn record_type(&self) -> &str {
146 let start = self.namespace_len + 1;
147 &self.label[start..start + self.record_type_len]
148 }
149
150 /// The numeric version `N`, e.g. `1`.
151 #[must_use]
152 pub fn version(&self) -> u64 {
153 self.version
154 }
155}
156
157/// Generic intra-chain leaf-content hash:
158/// `sha3_512_with_context(label, content)` (64 bytes).
159///
160/// This is the per-identity continuity linkage (for example, a key-history
161/// chain's `entry_hash` that the next entry chains to via a `prev_entry_hash`
162/// field). It is computed over whatever leaf *content* a given record type
163/// commits — for an opaque Layer-0 record that is simply the canonical bytes
164/// (the [`key_history_v1`] example hashes its canonical bytes directly).
165///
166/// This hash is deliberately distinct from the RFC 6962 Merkle leaf hash
167/// ([`crate::merkle::hash_leaf`]): one provides per-identity continuity
168/// (SHA3-512, PQ posture), the other provides global append-only ordering
169/// (ecosystem SHA-256, witness compatibility). The two must never be confused.
170#[must_use]
171pub fn content_hash(label: &ContextLabel, content: &[u8]) -> [u8; 64] {
172 metamorphic_crypto::hash::sha3_512_with_context(label.as_str(), content)
173}
174
175/// Append `lp(bytes) = u32_be(len(bytes)) || bytes` to `out`.
176///
177/// The `u32`-be length prefix makes field boundaries unambiguous, so distinct
178/// records cannot collide by boundary confusion.
179fn push_lp(out: &mut Vec<u8>, bytes: &[u8]) {
180 out.extend_from_slice(&(bytes.len() as u32).to_be_bytes());
181 out.extend_from_slice(bytes);
182}
183
184/// Example record type: the `mosslet/key-history/v1` conformance instance.
185///
186/// This module is an **example** of how an application defines a Layer-0 record
187/// type; the engine does not privilege it. It happens to be the first
188/// real-world leaf shape (and the seed of the cross-language KAT suite), so it
189/// doubles as a conformance fixture: the byte layout, the SHA3-512 `entry_hash`,
190/// and the RFC 6962 leaf hash here are byte-for-byte identical to the shipped
191/// reference implementation (`assets/js/crypto/key_history.js`, locked by
192/// `test/mosslet/crypto/key_history_test.exs`). A real key-history row is a
193/// valid leaf with **zero reformatting**. Other applications define their own
194/// record types against the same fixed byte discipline.
195pub mod key_history_v1 {
196 use super::{ContextLabel, Error, Result, content_hash, push_lp};
197 use crate::merkle::{Hash, hash_leaf};
198
199 /// The canonical context label for this record type.
200 pub const CONTEXT: &str = "mosslet/key-history/v1";
201
202 /// The canonical leaf format version (the `1` in `v1`).
203 pub const VERSION: u32 = 1;
204
205 /// A `mosslet/key-history/v1` entry's public fields (raw, decoded bytes).
206 ///
207 /// Mirrors the canonical-format inputs in `key_history.js`. The encryption
208 /// and signing public keys are the raw (already base64-decoded) key bytes;
209 /// `prev_entry_hash` is the raw 64-byte SHA3-512 digest of the previous
210 /// entry, or `None` for the genesis entry (seq 0).
211 #[derive(Debug, Clone, PartialEq, Eq)]
212 pub struct Entry {
213 /// Monotonic sequence number; genesis is `0`.
214 pub seq: u64,
215 /// Unix epoch milliseconds (UTC) at which the entry was created.
216 pub ts_ms: u64,
217 /// Recipient X25519 encryption public key (raw bytes).
218 pub enc_x25519: Vec<u8>,
219 /// Recipient ML-KEM encryption public key (raw bytes).
220 pub enc_pq: Vec<u8>,
221 /// The hybrid signing public key this entry pins (raw bytes).
222 pub signing_pub: Vec<u8>,
223 /// Raw previous-entry hash (64 bytes), or `None` for genesis.
224 pub prev_entry_hash: Option<Vec<u8>>,
225 }
226
227 impl Entry {
228 /// Build the canonical, byte-reproducible serialization of this entry.
229 ///
230 /// ```text
231 /// canonical(entry) =
232 /// u32_be(VERSION = 1)
233 /// || u64_be(seq)
234 /// || u64_be(ts_ms)
235 /// || lp(enc_x25519)
236 /// || lp(enc_pq)
237 /// || lp(signing_pub)
238 /// || lp(prev_entry_hash) // 0-length for genesis
239 /// ```
240 ///
241 /// # Errors
242 /// Returns [`Error::MalformedLeaf`] if `prev_entry_hash` is present but
243 /// empty (genesis must use `None`, not an empty vector) — this keeps the
244 /// genesis/rotation distinction unambiguous.
245 pub fn canonical_bytes(&self) -> Result<Vec<u8>> {
246 if matches!(self.prev_entry_hash.as_deref(), Some([])) {
247 return Err(Error::MalformedLeaf(
248 "prev_entry_hash present but empty; genesis must use None".into(),
249 ));
250 }
251 let prev: &[u8] = self.prev_entry_hash.as_deref().unwrap_or(&[]);
252 let mut out = Vec::new();
253 out.extend_from_slice(&VERSION.to_be_bytes());
254 out.extend_from_slice(&self.seq.to_be_bytes());
255 out.extend_from_slice(&self.ts_ms.to_be_bytes());
256 push_lp(&mut out, &self.enc_x25519);
257 push_lp(&mut out, &self.enc_pq);
258 push_lp(&mut out, &self.signing_pub);
259 push_lp(&mut out, prev);
260 Ok(out)
261 }
262
263 /// Compute the intra-chain `entry_hash` (64-byte SHA3-512), byte-for-byte
264 /// identical to the shipped `#315` value.
265 ///
266 /// ```text
267 /// entry_hash = sha3_512_with_context(
268 /// "mosslet/key-history/v1",
269 /// canonical_bytes,
270 /// )
271 /// ```
272 ///
273 /// The shipped Mosslet/WASM API passes the canonical bytes across the
274 /// JS↔WASM boundary as base64 and base64-*decodes* them before hashing,
275 /// so the hashed input is the **raw canonical bytes** — exactly the same
276 /// Layer-0 leaf bytes the RFC 6962 leaf hash consumes. The next entry
277 /// chains to this digest via `prev_entry_hash`.
278 ///
279 /// # Errors
280 /// Propagates [`Entry::canonical_bytes`] errors.
281 pub fn entry_hash(&self) -> Result<[u8; 64]> {
282 let canonical = self.canonical_bytes()?;
283 let label = ContextLabel::parse(CONTEXT)?;
284 Ok(content_hash(&label, &canonical))
285 }
286
287 /// Compute the RFC 6962 Merkle leaf hash `SHA-256(0x00 || canonical)`
288 /// over the **raw canonical bytes** (the Layer-0 leaf bytes).
289 ///
290 /// This is the global append-only ordering linkage and is independent of
291 /// [`Entry::entry_hash`].
292 ///
293 /// # Errors
294 /// Propagates [`Entry::canonical_bytes`] errors.
295 pub fn rfc6962_leaf_hash(&self) -> Result<Hash> {
296 Ok(hash_leaf(&self.canonical_bytes()?))
297 }
298 }
299}