jmt_pq/lib.rs
1// Copyright (c) The Diem Core Contributors SPDX-License-Identifier: Apache-2.0
2#![cfg_attr(not(feature = "std"), no_std)]
3#![forbid(unsafe_code)]
4
5//! This module implements [`JellyfishMerkleTree`] backed by storage module. The tree itself
6//! doesn't persist anything, but realizes the logic of R/W only. The write path will produce
7//! all the intermediate results in a batch for storage layer to commit and the read path will
8//! return results directly. The public APIs are only [`new`], [`put_value_sets`],
9//! [`put_value_set`] and [`get_with_proof`]. After each put with a `value_set` based on a
10//! known version, the tree will return a new root hash with a [`TreeUpdateBatch`] containing
11//! all the new nodes and indices of stale nodes.
12//!
13//! A Jellyfish Merkle Tree itself logically is a 512-bit sparse Merkle tree with an
14//! optimization that any subtree containing 0 or 1 leaf node will be replaced by that leaf
15//! node or a placeholder node with default hash value. With this optimization we can save CPU
16//! by avoiding hashing on many sparse levels in the tree. Physically, the tree is structurally
17//! similar to the modified Patricia Merkle tree of Ethereum but with some modifications. A
18//! standard Jellyfish Merkle tree will look like the following figure:
19//!
20//! ```text
21//! .──────────────────────.
22//! _.─────' `──────.
23//! _.──' `───.
24//! _.─' `──.
25//! _.─' `──.
26//! ,' `.
27//! ,─' '─.
28//! ,' `.
29//! ,' `.
30//! ╱ ╲
31//! ╱ ╲
32//! ╱ ╲
33//! ╱ ╲
34//! ; :
35//! ; :
36//! ; :
37//! │ │
38//! +──────────────────────────────────────────────────────────────────────────────────────────────+
39//! .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''.
40//! / \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \
41//! +----++----++----++----++----++----++----++----++----++----++----++----++----++----++----++----+
42//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
43//! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
44//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
45//! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
46//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
47//! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
48//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
49//! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
50//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
51//! ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■
52//!
53//! ■: the [`Value`] type this tree stores.
54//! ```
55//!
56//! A Jellyfish Merkle Tree consists of [`InternalNode`] and [`LeafNode`]. [`InternalNode`] is
57//! like branch node in ethereum patricia merkle with 16 children to represent a 4-level binary
58//! tree and [`LeafNode`] is similar to that in patricia merkle too. In the above figure, each
59//! `bell` in the jellyfish is an [`InternalNode`] while each tentacle is a [`LeafNode`]. It is
60//! noted that Jellyfish merkle doesn't have a counterpart for `extension` node of ethereum
61//! patricia merkle.
62//!
63//! [`JellyfishMerkleTree`]: struct.JellyfishMerkleTree.html
64//! [`new`]: struct.JellyfishMerkleTree.html#method.new
65//! [`put_value_sets`]: struct.JellyfishMerkleTree.html#method.put_value_sets
66//! [`put_value_set`]: struct.JellyfishMerkleTree.html#method.put_value_set
67//! [`get_with_proof`]: struct.JellyfishMerkleTree.html#method.get_with_proof
68//! [`TreeUpdateBatch`]: struct.TreeUpdateBatch.html
69//! [`InternalNode`]: node_type/struct.InternalNode.html
70//! [`LeafNode`]: node_type/struct.LeafNode.html
71
72extern crate alloc;
73
74use core::fmt::Debug;
75
76use digest::{Digest, consts::U64};
77use lencode::prelude::{Decode, Encode};
78use serde::{Deserialize, Serialize};
79#[cfg(feature = "std")]
80use thiserror::Error;
81
82mod bytes32ext;
83mod iterator;
84mod node_type;
85mod reader;
86mod tree;
87mod tree_cache;
88mod types;
89mod writer;
90
91pub(crate) mod hash_bytes_serde {
92 use super::{HASH_SIZE, HashBytes};
93 use alloc::vec::Vec;
94 use core::fmt;
95 use serde::de::{Error as DeError, Expected};
96 use serde::{Deserialize, Deserializer, Serializer};
97
98 struct ExpectedLength;
99
100 impl Expected for ExpectedLength {
101 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102 write!(f, "an array of length {}", HASH_SIZE)
103 }
104 }
105
106 impl fmt::Debug for ExpectedLength {
107 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
108 Expected::fmt(self, f)
109 }
110 }
111
112 pub fn serialize<S>(bytes: &HashBytes, serializer: S) -> Result<S::Ok, S::Error>
113 where
114 S: Serializer,
115 {
116 serializer.serialize_bytes(bytes)
117 }
118
119 pub fn deserialize<'de, D>(deserializer: D) -> Result<HashBytes, D::Error>
120 where
121 D: Deserializer<'de>,
122 {
123 let bytes = Vec::<u8>::deserialize(deserializer)?;
124 if bytes.len() != HASH_SIZE {
125 return Err(DeError::invalid_length(bytes.len(), &ExpectedLength));
126 }
127 let mut arr = [0u8; HASH_SIZE];
128 arr.copy_from_slice(&bytes);
129 Ok(arr)
130 }
131}
132
133#[cfg(any(test, feature = "mocks"))]
134pub mod mock;
135pub mod restore;
136
137use bytes32ext::Bytes32Ext;
138pub use iterator::JellyfishMerkleIterator;
139pub use tree::JellyfishMerkleTree;
140#[cfg(any(test, feature = "sha2"))]
141pub use tree::Sha512Jmt;
142#[cfg(feature = "ics23")]
143pub use tree::ics23_impl::ics23_spec;
144
145pub use types::Version;
146use types::nibble::ROOT_NIBBLE_HEIGHT;
147pub use types::proof;
148
149/// Length in bytes of every hash output used by the Jellyfish Merkle Tree.
150pub const HASH_SIZE: usize = 64;
151/// Convenience alias for hash output bytes.
152pub type HashBytes = [u8; HASH_SIZE];
153
154/// Contains types used to bridge a [`JellyfishMerkleTree`](crate::JellyfishMerkleTree) to the
155/// backing storage recording the tree's internal data.
156pub mod storage {
157 pub use node_type::{LeafNode, Node, NodeKey};
158 pub use reader::HasPreimage;
159 pub use reader::TreeReader;
160 pub use types::nibble::nibble_path::NibblePath;
161 pub use writer::{
162 NodeBatch, NodeStats, StaleNodeIndex, StaleNodeIndexBatch, TreeUpdateBatch, TreeWriter,
163 };
164
165 use super::*;
166}
167
168#[cfg(test)]
169mod tests;
170
171/// An error that occurs when the state root for a requested version is missing (e.g., because
172/// it was pruned).
173#[derive(Debug)]
174#[cfg_attr(feature = "std", derive(Error))]
175#[cfg_attr(
176 feature = "std",
177 error("Missing state root node at version {version}, probably pruned.")
178)]
179pub struct MissingRootError {
180 pub version: Version,
181}
182
183#[cfg(not(feature = "std"))]
184impl core::fmt::Display for MissingRootError {
185 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
186 write!(
187 f,
188 "Missing state root node at version {}, probably pruned.",
189 self.version
190 )
191 }
192}
193
194// TODO: reorg
195
196const SPARSE_MERKLE_PLACEHOLDER_HASH: HashBytes =
197 *b"SPARSE_MERKLE_PLACEHOLDER_HASH__PQ_RESISTANT_PLACEHOLDER_HASH_PQ";
198
199/// An owned value stored in the [`JellyfishMerkleTree`].
200pub type OwnedValue = alloc::vec::Vec<u8>;
201
202#[cfg(test)]
203use proptest_derive::Arbitrary;
204
205/// A root of a [`JellyfishMerkleTree`].
206#[derive(
207 Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Encode, Decode,
208)]
209#[cfg_attr(any(test), derive(Arbitrary))]
210pub struct RootHash(#[serde(with = "crate::hash_bytes_serde")] pub HashBytes);
211
212impl From<RootHash> for HashBytes {
213 fn from(value: RootHash) -> Self {
214 value.0
215 }
216}
217
218impl From<HashBytes> for RootHash {
219 fn from(value: HashBytes) -> Self {
220 Self(value)
221 }
222}
223
224impl AsRef<[u8]> for RootHash {
225 fn as_ref(&self) -> &[u8] {
226 &self.0
227 }
228}
229
230/// A hashed key used to index a [`JellyfishMerkleTree`].
231///
232/// # 🚨 Danger 🚨
233/// ics23 non-existence proofs require that all key preimages are non-empty. If you plan to use
234/// ics23 non-existence proofs, you must ensure that keys are non-empty before creating
235/// `KeyHash`es.
236///
237/// The [`JellyfishMerkleTree`] only stores key hashes, not full keys.
238#[derive(
239 Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Encode, Decode,
240)]
241#[cfg_attr(any(test), derive(Arbitrary))]
242pub struct KeyHash(#[serde(with = "crate::hash_bytes_serde")] pub HashBytes);
243
244#[derive(
245 Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Encode, Decode,
246)]
247#[cfg_attr(any(test), derive(Arbitrary))]
248// This needs to be public for the fuzzing/Arbitrary feature, but we don't really want it to
249// be, so #[doc(hidden)] is the next best thing.
250#[doc(hidden)]
251pub struct ValueHash(#[serde(with = "crate::hash_bytes_serde")] pub HashBytes);
252
253impl ValueHash {
254 pub fn with<H: SimpleHasher>(value: impl AsRef<[u8]>) -> Self {
255 Self(H::hash(value))
256 }
257}
258
259impl KeyHash {
260 /// Hash the provided key with the provided hasher and return a new `KeyHash`.
261 ///
262 /// # 🚨 Danger 🚨
263 /// If you will use ics23 non-existence proofs, you must ensure that the key is non-empty
264 /// before calling this function.
265 pub fn with<H: SimpleHasher>(key: impl AsRef<[u8]>) -> Self {
266 let key_hash = Self(H::hash(key.as_ref()));
267 // Adding a tracing event here allows cross-referencing the key hash with the original
268 // key bytes when looking through logs.
269 tracing::debug!(key = ?EscapedByteSlice(key.as_ref()), ?key_hash, "hashed jmt key");
270 key_hash
271 }
272}
273
274impl core::fmt::Debug for KeyHash {
275 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
276 f.debug_tuple("KeyHash")
277 .field(&hex::encode(self.0))
278 .finish()
279 }
280}
281
282impl core::fmt::Debug for ValueHash {
283 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
284 f.debug_tuple("ValueHash")
285 .field(&hex::encode(self.0))
286 .finish()
287 }
288}
289
290impl core::fmt::Debug for RootHash {
291 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
292 f.debug_tuple("RootHash")
293 .field(&hex::encode(self.0))
294 .finish()
295 }
296}
297
298struct EscapedByteSlice<'a>(&'a [u8]);
299
300impl<'a> core::fmt::Debug for EscapedByteSlice<'a> {
301 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
302 write!(f, "b\"")?;
303 for &b in self.0 {
304 // https://doc.rust-lang.org/reference/tokens.html#byte-escapes
305 #[allow(clippy::manual_range_contains)]
306 if b == b'\n' {
307 write!(f, "\\n")?;
308 } else if b == b'\r' {
309 write!(f, "\\r")?;
310 } else if b == b'\t' {
311 write!(f, "\\t")?;
312 } else if b == b'\\' || b == b'"' {
313 write!(f, "\\{}", b as char)?;
314 } else if b == b'\0' {
315 write!(f, "\\0")?;
316 // ASCII printable
317 } else if b >= 0x20 && b < 0x7f {
318 write!(f, "{}", b as char)?;
319 } else {
320 write!(f, "\\x{:02x}", b)?;
321 }
322 }
323 write!(f, "\"")?;
324 Ok(())
325 }
326}
327
328/// A minimal trait representing a hash function. We implement our own rather than relying on
329/// `Digest` for broader compatibility.
330pub trait SimpleHasher: Sized {
331 /// Creates a new hasher with default state.
332 fn new() -> Self;
333 /// Ingests the provided data, updating the hasher's state.
334 fn update(&mut self, data: &[u8]);
335 /// Consumes the hasher state to produce a digest.
336 fn finalize(self) -> HashBytes;
337 /// Returns the digest of the provided data.
338 fn hash(data: impl AsRef<[u8]>) -> HashBytes {
339 let mut hasher = Self::new();
340 hasher.update(data.as_ref());
341 hasher.finalize()
342 }
343}
344
345impl<T> SimpleHasher for T
346where
347 T: Digest<OutputSize = U64>,
348{
349 fn new() -> Self {
350 T::new()
351 }
352
353 fn update(&mut self, data: &[u8]) {
354 Digest::update(self, data)
355 }
356
357 fn finalize(self) -> HashBytes {
358 let output = Digest::finalize(self);
359 let mut result = [0u8; HASH_SIZE];
360 for (dest, src) in result.iter_mut().zip(output.iter()) {
361 *dest = *src;
362 }
363 result
364 }
365}
366
367/// A trivial implementation of [`SimpleHasher`] that simply returns the first 64 bytes of the
368/// provided data. This is useful to avoid hashing data when testing, and facilitate debugging
369/// specific tree configurations.
370pub struct TransparentHasher {
371 key: HashBytes,
372}
373
374impl SimpleHasher for TransparentHasher {
375 fn new() -> Self {
376 TransparentHasher {
377 key: [0u8; HASH_SIZE],
378 }
379 }
380
381 fn update(&mut self, data: &[u8]) {
382 for (dest, &src) in self.key.iter_mut().zip(data.iter()) {
383 *dest = src;
384 }
385 }
386 fn finalize(self) -> HashBytes {
387 self.key
388 }
389}
390
391#[cfg(feature = "blake3_tests")]
392mod blake3_impl {
393 use super::{HashBytes, SimpleHasher};
394
395 #[derive(Default)]
396 pub struct Blake3Hasher(blake3::Hasher);
397
398 impl SimpleHasher for Blake3Hasher {
399 fn new() -> Self {
400 Self(blake3::Hasher::new())
401 }
402
403 fn update(&mut self, data: &[u8]) {
404 self.0.update(data);
405 }
406
407 fn finalize(self) -> HashBytes {
408 let digest = self.0.finalize();
409 let mut buf = [0u8; super::HASH_SIZE];
410 buf[..digest.as_bytes().len()].copy_from_slice(digest.as_bytes());
411 buf
412 }
413 }
414}