Skip to main content

metamorphic_log/leaf/
mod.rs

1//! Layer-0: canonical leaf encoding and content hashing.
2//!
3//! A transparency-log leaf is **opaque, app-defined record bytes**. Layer 1
4//! (the Merkle tree, [`crate::merkle`]) treats them as a byte string and never
5//! inspects their structure, so an application's canonical record drops in as a
6//! leaf with *zero reformatting*.
7//!
8//! This module provides:
9//!
10//! 1. [`ContextLabel`] — the versioned `<namespace>/<record-type>/v<N>` domain
11//!    separator used by the intra-chain content hash. Each application chooses
12//!    its own namespace and record types (e.g. `"acme/user-keys/v1"`,
13//!    `"example-app/audit-event/v2"`). The label lives *inside* the content
14//!    hash (it never touches Layer 1's tile mechanics), giving cross-protocol /
15//!    cross-context separation while keeping the Merkle layer label-agnostic
16//!    (#299 / #290).
17//!
18//! 2. [`content_hash`] — the generic intra-chain leaf-content hash,
19//!    `sha3_512_with_context(label, content)` from
20//!    [`metamorphic_crypto`](crate). This is the per-identity continuity
21//!    linkage; it is **independent** from, and must not be confused with, the
22//!    RFC 6962 Merkle leaf hash ([`crate::merkle::hash_leaf`]). The same leaf
23//!    bytes feed both linkages without reformatting either.
24//!
25//! 3. [`key_history_v1`] — a worked, byte-exact **example/conformance instance**
26//!    of an application record type. It is not privileged by the engine; it is
27//!    simply the first real-world consumer's leaf shape (Mosslet's signed
28//!    key-history, `assets/js/crypto/key_history.js`, locked by
29//!    `test/mosslet/crypto/key_history_test.exs`) and the seed of the
30//!    cross-language KAT suite (#315 / #299). Any other application defines its
31//!    own record type the same way, against this same fixed byte discipline.
32//!
33//! ## Byte-layout discipline (fixed, audited — version-bump-or-nothing)
34//!
35//! All canonical encodings in this crate use a single, fixed discipline so that
36//! independent witnesses and cross-language SDKs recompute byte-for-byte:
37//!
38//! - integers are **big-endian** (`u32` / `u64`),
39//! - variable-length fields are **`u32`-be length-prefixed** (`lp(x) =
40//!   u32_be(len(x)) || x`),
41//! - the layout is never reordered; a change is a new version label, never a
42//!   silent reinterpretation.
43
44use crate::error::{Error, Result};
45
46/// A validated, versioned context label of the form
47/// `<namespace>/<record-type>/v<N>`.
48///
49/// Used as the SHA3-512 domain separator for [`content_hash`]. Each consuming
50/// application picks its own namespace and record-type segments; the grammar is
51/// deliberately small and strict so labels are unambiguous across tenants and
52/// versions:
53///
54/// - exactly three `/`-separated, non-empty segments,
55/// - the third segment is `v` followed by one or more ASCII digits (no leading
56///   zero unless the version is literally `0`),
57/// - all characters are printable ASCII excluding `/` within a segment.
58///
59/// ```
60/// use metamorphic_log::leaf::ContextLabel;
61///
62/// // Any application defines its own namespace/record-type/version.
63/// let label = ContextLabel::parse("acme/user-keys/v1").unwrap();
64/// assert_eq!(label.as_str(), "acme/user-keys/v1");
65/// assert_eq!(label.namespace(), "acme");
66/// assert_eq!(label.record_type(), "user-keys");
67/// assert_eq!(label.version(), 1);
68///
69/// assert!(ContextLabel::parse("missing/version").is_err());
70/// assert!(ContextLabel::parse("a/b/v01").is_err()); // no leading zeros
71/// ```
72#[derive(Debug, Clone, PartialEq, Eq, Hash)]
73pub struct ContextLabel {
74    label: String,
75    namespace_len: usize,
76    record_type_len: usize,
77    version: u64,
78}
79
80impl ContextLabel {
81    /// Parse and validate a `<namespace>/<record-type>/v<N>` label.
82    ///
83    /// # Errors
84    /// Returns [`Error::MalformedLeaf`] if the label does not match the grammar.
85    pub fn parse(label: &str) -> Result<Self> {
86        let mut parts = label.split('/');
87        let namespace = parts.next().unwrap_or("");
88        let record_type = parts.next().unwrap_or("");
89        let version_seg = parts.next().unwrap_or("");
90        if parts.next().is_some() {
91            return Err(Error::MalformedLeaf(format!(
92                "context label has too many '/'-segments: {label:?}"
93            )));
94        }
95
96        let valid_segment =
97            |s: &str| !s.is_empty() && s.bytes().all(|b| b.is_ascii_graphic() && b != b'/');
98        if !valid_segment(namespace) || !valid_segment(record_type) {
99            return Err(Error::MalformedLeaf(format!(
100                "context label segments must be non-empty printable ASCII: {label:?}"
101            )));
102        }
103
104        let digits = version_seg.strip_prefix('v').ok_or_else(|| {
105            Error::MalformedLeaf(format!(
106                "context label version must start with 'v': {label:?}"
107            ))
108        })?;
109        if digits.is_empty() || !digits.bytes().all(|b| b.is_ascii_digit()) {
110            return Err(Error::MalformedLeaf(format!(
111                "context label version must be 'v' followed by digits: {label:?}"
112            )));
113        }
114        if digits.len() > 1 && digits.starts_with('0') {
115            return Err(Error::MalformedLeaf(format!(
116                "context label version must not have leading zeros: {label:?}"
117            )));
118        }
119        let version: u64 = digits.parse().map_err(|_| {
120            Error::MalformedLeaf(format!("context label version overflow: {label:?}"))
121        })?;
122
123        Ok(Self {
124            label: label.to_string(),
125            namespace_len: namespace.len(),
126            record_type_len: record_type.len(),
127            version,
128        })
129    }
130
131    /// The full label string, e.g. `"acme/user-keys/v1"`.
132    #[must_use]
133    pub fn as_str(&self) -> &str {
134        &self.label
135    }
136
137    /// The namespace segment, e.g. `"acme"`.
138    #[must_use]
139    pub fn namespace(&self) -> &str {
140        &self.label[..self.namespace_len]
141    }
142
143    /// The record-type segment, e.g. `"user-keys"`.
144    #[must_use]
145    pub fn record_type(&self) -> &str {
146        let start = self.namespace_len + 1;
147        &self.label[start..start + self.record_type_len]
148    }
149
150    /// The numeric version `N`, e.g. `1`.
151    #[must_use]
152    pub fn version(&self) -> u64 {
153        self.version
154    }
155}
156
157/// Generic intra-chain leaf-content hash:
158/// `sha3_512_with_context(label, content)` (64 bytes).
159///
160/// This is the per-identity continuity linkage (for example, a key-history
161/// chain's `entry_hash` that the next entry chains to via a `prev_entry_hash`
162/// field). It is computed over whatever leaf *content* a given record type
163/// commits — for an opaque Layer-0 record that is simply the canonical bytes
164/// (the [`key_history_v1`] example hashes its canonical bytes directly).
165///
166/// This hash is deliberately distinct from the RFC 6962 Merkle leaf hash
167/// ([`crate::merkle::hash_leaf`]): one provides per-identity continuity
168/// (SHA3-512, PQ posture), the other provides global append-only ordering
169/// (ecosystem SHA-256, witness compatibility). The two must never be confused.
170#[must_use]
171pub fn content_hash(label: &ContextLabel, content: &[u8]) -> [u8; 64] {
172    metamorphic_crypto::hash::sha3_512_with_context(label.as_str(), content)
173}
174
175/// Append `lp(bytes) = u32_be(len(bytes)) || bytes` to `out`.
176///
177/// The `u32`-be length prefix makes field boundaries unambiguous, so distinct
178/// records cannot collide by boundary confusion.
179fn push_lp(out: &mut Vec<u8>, bytes: &[u8]) {
180    out.extend_from_slice(&(bytes.len() as u32).to_be_bytes());
181    out.extend_from_slice(bytes);
182}
183
184/// Example record type: the `mosslet/key-history/v1` conformance instance.
185///
186/// This module is an **example** of how an application defines a Layer-0 record
187/// type; the engine does not privilege it. It happens to be the first
188/// real-world leaf shape (and the seed of the cross-language KAT suite), so it
189/// doubles as a conformance fixture: the byte layout, the SHA3-512 `entry_hash`,
190/// and the RFC 6962 leaf hash here are byte-for-byte identical to the shipped
191/// reference implementation (`assets/js/crypto/key_history.js`, locked by
192/// `test/mosslet/crypto/key_history_test.exs`). A real key-history row is a
193/// valid leaf with **zero reformatting**. Other applications define their own
194/// record types against the same fixed byte discipline.
195pub mod key_history_v1 {
196    use super::{ContextLabel, Error, Result, content_hash, push_lp};
197    use crate::merkle::{Hash, hash_leaf};
198
199    /// The canonical context label for this record type.
200    pub const CONTEXT: &str = "mosslet/key-history/v1";
201
202    /// The canonical leaf format version (the `1` in `v1`).
203    pub const VERSION: u32 = 1;
204
205    /// A `mosslet/key-history/v1` entry's public fields (raw, decoded bytes).
206    ///
207    /// Mirrors the canonical-format inputs in `key_history.js`. The encryption
208    /// and signing public keys are the raw (already base64-decoded) key bytes;
209    /// `prev_entry_hash` is the raw 64-byte SHA3-512 digest of the previous
210    /// entry, or `None` for the genesis entry (seq 0).
211    #[derive(Debug, Clone, PartialEq, Eq)]
212    pub struct Entry {
213        /// Monotonic sequence number; genesis is `0`.
214        pub seq: u64,
215        /// Unix epoch milliseconds (UTC) at which the entry was created.
216        pub ts_ms: u64,
217        /// Recipient X25519 encryption public key (raw bytes).
218        pub enc_x25519: Vec<u8>,
219        /// Recipient ML-KEM encryption public key (raw bytes).
220        pub enc_pq: Vec<u8>,
221        /// The hybrid signing public key this entry pins (raw bytes).
222        pub signing_pub: Vec<u8>,
223        /// Raw previous-entry hash (64 bytes), or `None` for genesis.
224        pub prev_entry_hash: Option<Vec<u8>>,
225    }
226
227    impl Entry {
228        /// Build the canonical, byte-reproducible serialization of this entry.
229        ///
230        /// ```text
231        /// canonical(entry) =
232        ///     u32_be(VERSION = 1)
233        ///  || u64_be(seq)
234        ///  || u64_be(ts_ms)
235        ///  || lp(enc_x25519)
236        ///  || lp(enc_pq)
237        ///  || lp(signing_pub)
238        ///  || lp(prev_entry_hash)   // 0-length for genesis
239        /// ```
240        ///
241        /// # Errors
242        /// Returns [`Error::MalformedLeaf`] if `prev_entry_hash` is present but
243        /// empty (genesis must use `None`, not an empty vector) — this keeps the
244        /// genesis/rotation distinction unambiguous.
245        pub fn canonical_bytes(&self) -> Result<Vec<u8>> {
246            if matches!(self.prev_entry_hash.as_deref(), Some([])) {
247                return Err(Error::MalformedLeaf(
248                    "prev_entry_hash present but empty; genesis must use None".into(),
249                ));
250            }
251            let prev: &[u8] = self.prev_entry_hash.as_deref().unwrap_or(&[]);
252            let mut out = Vec::new();
253            out.extend_from_slice(&VERSION.to_be_bytes());
254            out.extend_from_slice(&self.seq.to_be_bytes());
255            out.extend_from_slice(&self.ts_ms.to_be_bytes());
256            push_lp(&mut out, &self.enc_x25519);
257            push_lp(&mut out, &self.enc_pq);
258            push_lp(&mut out, &self.signing_pub);
259            push_lp(&mut out, prev);
260            Ok(out)
261        }
262
263        /// Compute the intra-chain `entry_hash` (64-byte SHA3-512), byte-for-byte
264        /// identical to the shipped `#315` value.
265        ///
266        /// ```text
267        /// entry_hash = sha3_512_with_context(
268        ///     "mosslet/key-history/v1",
269        ///     canonical_bytes,
270        /// )
271        /// ```
272        ///
273        /// The shipped Mosslet/WASM API passes the canonical bytes across the
274        /// JS↔WASM boundary as base64 and base64-*decodes* them before hashing,
275        /// so the hashed input is the **raw canonical bytes** — exactly the same
276        /// Layer-0 leaf bytes the RFC 6962 leaf hash consumes. The next entry
277        /// chains to this digest via `prev_entry_hash`.
278        ///
279        /// # Errors
280        /// Propagates [`Entry::canonical_bytes`] errors.
281        pub fn entry_hash(&self) -> Result<[u8; 64]> {
282            let canonical = self.canonical_bytes()?;
283            let label = ContextLabel::parse(CONTEXT)?;
284            Ok(content_hash(&label, &canonical))
285        }
286
287        /// Compute the RFC 6962 Merkle leaf hash `SHA-256(0x00 || canonical)`
288        /// over the **raw canonical bytes** (the Layer-0 leaf bytes).
289        ///
290        /// This is the global append-only ordering linkage and is independent of
291        /// [`Entry::entry_hash`].
292        ///
293        /// # Errors
294        /// Propagates [`Entry::canonical_bytes`] errors.
295        pub fn rfc6962_leaf_hash(&self) -> Result<Hash> {
296            Ok(hash_leaf(&self.canonical_bytes()?))
297        }
298    }
299}