jmt_blake3/
node_type.rs

1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Node types of [`JellyfishMerkleTree`](crate::JellyfishMerkleTree)
5//!
6//! This module defines two types of Jellyfish Merkle tree nodes: [`InternalNode`]
7//! and [`LeafNode`] as building blocks of a 256-bit
8//! [`JellyfishMerkleTree`](crate::JellyfishMerkleTree). [`InternalNode`] represents a 4-level
9//! binary tree to optimize for IOPS: it compresses a tree with 31 nodes into one node with 16
10//! chidren at the lowest level. [`LeafNode`] stores the full key and the value associated.
11
12use std::{
13    collections::hash_map::HashMap,
14    convert::TryFrom,
15    io::{prelude::*, Cursor, Read, SeekFrom, Write},
16    mem::size_of,
17};
18
19use anyhow::{ensure, Context, Result};
20use byteorder::{BigEndian, LittleEndian, ReadBytesExt, WriteBytesExt};
21use num_derive::{FromPrimitive, ToPrimitive};
22use num_traits::cast::FromPrimitive;
23#[cfg(any(test, feature = "fuzzing"))]
24use proptest::{collection::hash_map, prelude::*};
25#[cfg(any(test, feature = "fuzzing"))]
26use proptest_derive::Arbitrary;
27use serde::{Deserialize, Serialize};
28use thiserror::Error;
29
30use crate::{
31    metrics::{DIEM_JELLYFISH_INTERNAL_ENCODED_BYTES, DIEM_JELLYFISH_LEAF_ENCODED_BYTES},
32    types::{
33        nibble::{nibble_path::NibblePath, Nibble, ROOT_NIBBLE_HEIGHT},
34        proof::{SparseMerkleInternalNode, SparseMerkleLeafNode},
35        Version,
36    },
37    KeyHash, ValueHash, SPARSE_MERKLE_PLACEHOLDER_HASH,
38};
39
40/// The unique key of each node.
41#[derive(Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)]
42#[cfg_attr(any(test, feature = "fuzzing"), derive(Arbitrary))]
43pub struct NodeKey {
44    // The version at which the node is created.
45    version: Version,
46    // The nibble path this node represents in the tree.
47    nibble_path: NibblePath,
48}
49
50impl NodeKey {
51    /// Creates a new `NodeKey`.
52    pub(crate) fn new(version: Version, nibble_path: NibblePath) -> Self {
53        Self {
54            version,
55            nibble_path,
56        }
57    }
58
59    /// A shortcut to generate a node key consisting of a version and an empty nibble path.
60    pub(crate) fn new_empty_path(version: Version) -> Self {
61        Self::new(version, NibblePath::new(vec![]))
62    }
63
64    /// Gets the version.
65    pub fn version(&self) -> Version {
66        self.version
67    }
68
69    /// Gets the nibble path.
70    pub(crate) fn nibble_path(&self) -> &NibblePath {
71        &self.nibble_path
72    }
73
74    /// Generates a child node key based on this node key.
75    pub(crate) fn gen_child_node_key(&self, version: Version, n: Nibble) -> Self {
76        let mut node_nibble_path = self.nibble_path().clone();
77        node_nibble_path.push(n);
78        Self::new(version, node_nibble_path)
79    }
80
81    /// Generates parent node key at the same version based on this node key.
82    pub(crate) fn gen_parent_node_key(&self) -> Self {
83        let mut node_nibble_path = self.nibble_path().clone();
84        assert!(
85            node_nibble_path.pop().is_some(),
86            "Current node key is root.",
87        );
88        Self::new(self.version, node_nibble_path)
89    }
90
91    /// Sets the version to the given version.
92    pub(crate) fn set_version(&mut self, version: Version) {
93        self.version = version;
94    }
95
96    /// Serializes to bytes for physical storage enforcing the same order as that in memory.
97    pub fn encode(&self) -> Result<Vec<u8>> {
98        let mut out = vec![];
99        out.write_u64::<BigEndian>(self.version())?;
100        out.write_u8(self.nibble_path().num_nibbles() as u8)?;
101        out.write_all(self.nibble_path().bytes())?;
102        Ok(out)
103    }
104
105    /// Recovers from serialized bytes in physical storage.
106    pub fn decode(val: &[u8]) -> Result<NodeKey> {
107        let mut reader = Cursor::new(val);
108        let version = reader.read_u64::<BigEndian>()?;
109        let num_nibbles = reader.read_u8()? as usize;
110        ensure!(
111            num_nibbles <= ROOT_NIBBLE_HEIGHT,
112            "Invalid number of nibbles: {}",
113            num_nibbles,
114        );
115        let mut nibble_bytes = Vec::with_capacity((num_nibbles + 1) / 2);
116        reader.read_to_end(&mut nibble_bytes)?;
117        ensure!(
118            (num_nibbles + 1) / 2 == nibble_bytes.len(),
119            "encoded num_nibbles {} mismatches nibble path bytes {:?}",
120            num_nibbles,
121            nibble_bytes
122        );
123        let nibble_path = if num_nibbles % 2 == 0 {
124            NibblePath::new(nibble_bytes)
125        } else {
126            let padding = nibble_bytes.last().unwrap() & 0x0f;
127            ensure!(
128                padding == 0,
129                "Padding nibble expected to be 0, got: {}",
130                padding,
131            );
132            NibblePath::new_odd(nibble_bytes)
133        };
134        Ok(NodeKey::new(version, nibble_path))
135    }
136}
137
138#[derive(Clone, Debug, Eq, PartialEq)]
139pub enum NodeType {
140    Leaf,
141    /// A internal node that haven't been finished the leaf count migration, i.e. None or not all
142    /// of the children leaf counts are known.
143    InternalLegacy,
144    Internal {
145        leaf_count: usize,
146    },
147}
148
149#[cfg(any(test, feature = "fuzzing"))]
150impl Arbitrary for NodeType {
151    type Parameters = ();
152    type Strategy = BoxedStrategy<Self>;
153
154    fn arbitrary_with(_args: ()) -> Self::Strategy {
155        prop_oneof![
156            Just(NodeType::Leaf),
157            Just(NodeType::InternalLegacy),
158            (2..100usize).prop_map(|leaf_count| NodeType::Internal { leaf_count })
159        ]
160        .boxed()
161    }
162}
163
164/// Each child of [`InternalNode`] encapsulates a nibble forking at this node.
165#[derive(Clone, Debug, Eq, PartialEq)]
166#[cfg_attr(any(test, feature = "fuzzing"), derive(Arbitrary))]
167pub struct Child {
168    /// The hash value of this child node.
169    pub hash: [u8; 32],
170    /// `version`, the `nibble_path` of the ['NodeKey`] of this [`InternalNode`] the child belongs
171    /// to and the child's index constitute the [`NodeKey`] to uniquely identify this child node
172    /// from the storage. Used by `[`NodeKey::gen_child_node_key`].
173    pub version: Version,
174    /// Indicates if the child is a leaf, or if it's an internal node, the total number of leaves
175    /// under it (though it can be unknown during migration).
176    pub node_type: NodeType,
177}
178
179impl Child {
180    pub fn new(hash: [u8; 32], version: Version, node_type: NodeType) -> Self {
181        Self {
182            hash,
183            version,
184            node_type,
185        }
186    }
187
188    pub fn is_leaf(&self) -> bool {
189        matches!(self.node_type, NodeType::Leaf)
190    }
191
192    pub fn leaf_count(&self) -> Option<usize> {
193        match self.node_type {
194            NodeType::Leaf => Some(1),
195            NodeType::InternalLegacy => None,
196            NodeType::Internal { leaf_count } => Some(leaf_count),
197        }
198    }
199}
200
201/// [`Children`] is just a collection of children belonging to a [`InternalNode`], indexed from 0 to
202/// 15, inclusive.
203pub(crate) type Children = HashMap<Nibble, Child>;
204
205/// Represents a 4-level subtree with 16 children at the bottom level. Theoretically, this reduces
206/// IOPS to query a tree by 4x since we compress 4 levels in a standard Merkle tree into 1 node.
207/// Though we choose the same internal node structure as that of Patricia Merkle tree, the root hash
208/// computation logic is similar to a 4-level sparse Merkle tree except for some customizations. See
209/// the `CryptoHash` trait implementation below for details.
210#[derive(Clone, Debug, Eq, PartialEq)]
211pub struct InternalNode {
212    /// Up to 16 children.
213    children: Children,
214    /// Total number of leaves under this internal node
215    leaf_count: Option<usize>,
216    /// serialize leaf counts
217    leaf_count_migration: bool,
218}
219
220/// Computes the hash of internal node according to [`JellyfishTree`](crate::JellyfishTree)
221/// data structure in the logical view. `start` and `nibble_height` determine a subtree whose
222/// root hash we want to get. For an internal node with 16 children at the bottom level, we compute
223/// the root hash of it as if a full binary Merkle tree with 16 leaves as below:
224///
225/// ```text
226///   4 ->              +------ root hash ------+
227///                     |                       |
228///   3 ->        +---- # ----+           +---- # ----+
229///               |           |           |           |
230///   2 ->        #           #           #           #
231///             /   \       /   \       /   \       /   \
232///   1 ->     #     #     #     #     #     #     #     #
233///           / \   / \   / \   / \   / \   / \   / \   / \
234///   0 ->   0   1 2   3 4   5 6   7 8   9 A   B C   D E   F
235///   ^
236/// height
237/// ```
238///
239/// As illustrated above, at nibble height 0, `0..F` in hex denote 16 chidren hashes.  Each `#`
240/// means the hash of its two direct children, which will be used to generate the hash of its
241/// parent with the hash of its sibling. Finally, we can get the hash of this internal node.
242///
243/// However, if an internal node doesn't have all 16 chidren exist at height 0 but just a few of
244/// them, we have a modified hashing rule on top of what is stated above:
245/// 1. From top to bottom, a node will be replaced by a leaf child if the subtree rooted at this
246/// node has only one child at height 0 and it is a leaf child.
247/// 2. From top to bottom, a node will be replaced by the placeholder node if the subtree rooted at
248/// this node doesn't have any child at height 0. For example, if an internal node has 3 leaf
249/// children at index 0, 3, 8, respectively, and 1 internal node at index C, then the computation
250/// graph will be like:
251///
252/// ```text
253///   4 ->              +------ root hash ------+
254///                     |                       |
255///   3 ->        +---- # ----+           +---- # ----+
256///               |           |           |           |
257///   2 ->        #           @           8           #
258///             /   \                               /   \
259///   1 ->     0     3                             #     @
260///                                               / \
261///   0 ->                                       C   @
262///   ^
263/// height
264/// Note: @ denotes placeholder hash.
265/// ```
266#[cfg(any(test, feature = "fuzzing"))]
267impl Arbitrary for InternalNode {
268    type Parameters = ();
269    type Strategy = BoxedStrategy<Self>;
270
271    fn arbitrary_with(_args: ()) -> Self::Strategy {
272        hash_map(any::<Nibble>(), any::<Child>(), 1..=16)
273            .prop_filter(
274                "InternalNode constructor panics when its only child is a leaf.",
275                |children| {
276                    !(children.len() == 1
277                        && children.values().next().expect("Must exist.").is_leaf())
278                },
279            )
280            .prop_map(InternalNode::new)
281            .boxed()
282    }
283}
284
285impl InternalNode {
286    /// Creates a new Internal node.
287    #[cfg(any(test, feature = "fuzzing"))]
288    pub fn new(children: Children) -> Self {
289        Self::new_migration(children, true /* leaf_count_migration */)
290    }
291
292    pub fn new_migration(children: Children, leaf_count_migration: bool) -> Self {
293        Self::new_impl(children, leaf_count_migration).expect("Input children are logical.")
294    }
295
296    pub fn new_impl(children: Children, leaf_count_migration: bool) -> Result<Self> {
297        // Assert the internal node must have >= 1 children. If it only has one child, it cannot be
298        // a leaf node. Otherwise, the leaf node should be a child of this internal node's parent.
299        ensure!(!children.is_empty(), "Children must not be empty");
300        if children.len() == 1 {
301            ensure!(
302                !children
303                    .values()
304                    .next()
305                    .expect("Must have 1 element")
306                    .is_leaf(),
307                "If there's only one child, it must not be a leaf."
308            );
309        }
310
311        let leaf_count = Self::sum_leaf_count(&children);
312        Ok(Self {
313            children,
314            leaf_count,
315            leaf_count_migration,
316        })
317    }
318
319    fn sum_leaf_count(children: &Children) -> Option<usize> {
320        let mut leaf_count = 0;
321        for child in children.values() {
322            if let Some(n) = child.leaf_count() {
323                leaf_count += n;
324            } else {
325                return None;
326            }
327        }
328        Some(leaf_count)
329    }
330
331    pub fn leaf_count(&self) -> Option<usize> {
332        self.leaf_count
333    }
334
335    pub fn node_type(&self) -> NodeType {
336        match self.leaf_count {
337            Some(leaf_count) => NodeType::Internal { leaf_count },
338            None => NodeType::InternalLegacy,
339        }
340    }
341
342    pub fn hash(&self) -> [u8; 32] {
343        self.merkle_hash(
344            0,  /* start index */
345            16, /* the number of leaves in the subtree of which we want the hash of root */
346            self.generate_bitmaps(),
347        )
348    }
349
350    pub fn children_sorted(&self) -> impl Iterator<Item = (&Nibble, &Child)> {
351        // Previously this used `.sorted_by_key()` directly on the iterator but this does not appear
352        // to be available in itertools (it does not seem to ever have existed???) for unknown
353        // reasons. This satisfies the same behavior. ¯\_(ツ)_/¯
354        let mut sorted: Vec<(&Nibble, &Child)> = self.children.iter().collect();
355        sorted.sort_by_key(|(&nibble, _)| nibble);
356        sorted.into_iter()
357    }
358
359    pub fn serialize(&self, binary: &mut Vec<u8>, persist_leaf_counts: bool) -> Result<()> {
360        let (mut existence_bitmap, leaf_bitmap) = self.generate_bitmaps();
361        binary.write_u16::<LittleEndian>(existence_bitmap)?;
362        binary.write_u16::<LittleEndian>(leaf_bitmap)?;
363        for _ in 0..existence_bitmap.count_ones() {
364            let next_child = existence_bitmap.trailing_zeros() as u8;
365            let child = &self.children[&Nibble::from(next_child)];
366            serialize_u64_varint(child.version, binary);
367            binary.extend(child.hash.to_vec());
368            match child.node_type {
369                NodeType::Leaf => (),
370                NodeType::InternalLegacy => {
371                    if persist_leaf_counts {
372                        // It's impossible that a internal node has 0 leaves, use 0 to indicate
373                        // "known".
374                        // Also n.b., a not-fully-migrated internal is of `NodeType::InternalLegacy`
375                        // in memory, but serialized with `NodeTag::Internal` anyway once the
376                        // migration starts.
377                        serialize_u64_varint(0, binary);
378                    }
379                }
380                NodeType::Internal { leaf_count } => {
381                    if persist_leaf_counts {
382                        serialize_u64_varint(leaf_count as u64, binary);
383                    }
384                }
385            };
386            existence_bitmap &= !(1 << next_child);
387        }
388        Ok(())
389    }
390
391    pub fn deserialize(data: &[u8], read_leaf_counts: bool) -> Result<Self> {
392        let mut reader = Cursor::new(data);
393        let len = data.len();
394
395        // Read and validate existence and leaf bitmaps
396        let mut existence_bitmap = reader.read_u16::<LittleEndian>()?;
397        let leaf_bitmap = reader.read_u16::<LittleEndian>()?;
398        match existence_bitmap {
399            0 => return Err(NodeDecodeError::NoChildren.into()),
400            _ if (existence_bitmap & leaf_bitmap) != leaf_bitmap => {
401                return Err(NodeDecodeError::ExtraLeaves {
402                    existing: existence_bitmap,
403                    leaves: leaf_bitmap,
404                }
405                .into())
406            }
407            _ => (),
408        }
409
410        // Reconstruct children
411        let mut children = HashMap::new();
412        for _ in 0..existence_bitmap.count_ones() {
413            let next_child = existence_bitmap.trailing_zeros() as u8;
414            let version = deserialize_u64_varint(&mut reader)?;
415            let pos = reader.position() as usize;
416            let remaining = len - pos;
417
418            ensure!(
419                remaining >= size_of::<[u8; 32]>(),
420                "not enough bytes left, children: {}, bytes: {}",
421                existence_bitmap.count_ones(),
422                remaining
423            );
424            let hash = <[u8; 32]>::try_from(&reader.get_ref()[pos..pos + size_of::<[u8; 32]>()])?;
425            reader.seek(SeekFrom::Current(size_of::<[u8; 32]>() as i64))?;
426
427            let child_bit = 1 << next_child;
428            let node_type = if (leaf_bitmap & child_bit) != 0 {
429                NodeType::Leaf
430            } else if read_leaf_counts {
431                let leaf_count = deserialize_u64_varint(&mut reader)? as usize;
432                if leaf_count == 0 {
433                    NodeType::InternalLegacy
434                } else {
435                    NodeType::Internal { leaf_count }
436                }
437            } else {
438                NodeType::InternalLegacy
439            };
440
441            children.insert(
442                Nibble::from(next_child),
443                Child::new(hash, version, node_type),
444            );
445            existence_bitmap &= !child_bit;
446        }
447        assert_eq!(existence_bitmap, 0);
448
449        // The "leaf_count_migration" flag doesn't matter here, since a deserialized node should
450        // not be persisted again to the DB.
451        Self::new_impl(children, read_leaf_counts /* leaf_count_migration */)
452    }
453
454    /// Gets the `n`-th child.
455    pub fn child(&self, n: Nibble) -> Option<&Child> {
456        self.children.get(&n)
457    }
458
459    /// Generates `existence_bitmap` and `leaf_bitmap` as a pair of `u16`s: child at index `i`
460    /// exists if `existence_bitmap[i]` is set; child at index `i` is leaf node if
461    /// `leaf_bitmap[i]` is set.
462    pub fn generate_bitmaps(&self) -> (u16, u16) {
463        let mut existence_bitmap = 0;
464        let mut leaf_bitmap = 0;
465        for (nibble, child) in self.children.iter() {
466            let i = u8::from(*nibble);
467            existence_bitmap |= 1u16 << i;
468            if child.is_leaf() {
469                leaf_bitmap |= 1u16 << i;
470            }
471        }
472        // `leaf_bitmap` must be a subset of `existence_bitmap`.
473        assert_eq!(existence_bitmap | leaf_bitmap, existence_bitmap);
474        (existence_bitmap, leaf_bitmap)
475    }
476
477    /// Given a range [start, start + width), returns the sub-bitmap of that range.
478    fn range_bitmaps(start: u8, width: u8, bitmaps: (u16, u16)) -> (u16, u16) {
479        assert!(start < 16 && width.count_ones() == 1 && start % width == 0);
480        assert!(width <= 16 && (start + width) <= 16);
481        // A range with `start == 8` and `width == 4` will generate a mask 0b0000111100000000.
482        // use as converting to smaller integer types when 'width == 16'
483        let mask = (((1u32 << width) - 1) << start) as u16;
484        (bitmaps.0 & mask, bitmaps.1 & mask)
485    }
486
487    fn merkle_hash(
488        &self,
489        start: u8,
490        width: u8,
491        (existence_bitmap, leaf_bitmap): (u16, u16),
492    ) -> [u8; 32] {
493        // Given a bit [start, 1 << nibble_height], return the value of that range.
494        let (range_existence_bitmap, range_leaf_bitmap) =
495            Self::range_bitmaps(start, width, (existence_bitmap, leaf_bitmap));
496        if range_existence_bitmap == 0 {
497            // No child under this subtree
498            SPARSE_MERKLE_PLACEHOLDER_HASH
499        } else if width == 1 || (range_existence_bitmap.count_ones() == 1 && range_leaf_bitmap != 0)
500        {
501            // Only 1 leaf child under this subtree or reach the lowest level
502            let only_child_index = Nibble::from(range_existence_bitmap.trailing_zeros() as u8);
503            self.child(only_child_index)
504                .with_context(|| {
505                    format!(
506                        "Corrupted internal node: existence_bitmap indicates \
507                         the existence of a non-exist child at index {:x}",
508                        only_child_index
509                    )
510                })
511                .unwrap()
512                .hash
513        } else {
514            let left_child = self.merkle_hash(
515                start,
516                width / 2,
517                (range_existence_bitmap, range_leaf_bitmap),
518            );
519            let right_child = self.merkle_hash(
520                start + width / 2,
521                width / 2,
522                (range_existence_bitmap, range_leaf_bitmap),
523            );
524            SparseMerkleInternalNode::new(left_child, right_child).hash()
525        }
526    }
527
528    /// Gets the child without its corresponding siblings (like using
529    /// [`get_child_with_siblings`](InternalNode::get_child_with_siblings) and dropping the
530    /// siblings, but more efficient).
531    pub fn get_child_without_siblings(&self, node_key: &NodeKey, n: Nibble) -> Option<NodeKey> {
532        let (existence_bitmap, leaf_bitmap) = self.generate_bitmaps();
533
534        // Nibble height from 3 to 0.
535        for h in (0..4).rev() {
536            // Get the number of children of the internal node that each subtree at this height
537            // covers.
538            let width = 1 << h;
539            let child_half_start = get_child_half_start(n, h);
540
541            let (range_existence_bitmap, range_leaf_bitmap) =
542                Self::range_bitmaps(child_half_start, width, (existence_bitmap, leaf_bitmap));
543
544            if range_existence_bitmap == 0 {
545                // No child in this range.
546                return None;
547            } else if width == 1
548                || (range_existence_bitmap.count_ones() == 1 && range_leaf_bitmap != 0)
549            {
550                // Return the only 1 leaf child under this subtree or reach the lowest level
551                // Even this leaf child is not the n-th child, it should be returned instead of
552                // `None` because it's existence indirectly proves the n-th child doesn't exist.
553                // Please read proof format for details.
554                let only_child_index = Nibble::from(range_existence_bitmap.trailing_zeros() as u8);
555
556                let only_child_version = self
557                    .child(only_child_index)
558                    // Should be guaranteed by the self invariants, but these are not easy to express at the moment
559                    .with_context(|| {
560                        format!(
561                            "Corrupted internal node: child_bitmap indicates \
562                                     the existence of a non-exist child at index {:x}",
563                            only_child_index
564                        )
565                    })
566                    .unwrap()
567                    .version;
568
569                return Some(node_key.gen_child_node_key(only_child_version, only_child_index));
570            }
571        }
572        unreachable!("Impossible to get here without returning even at the lowest level.")
573    }
574
575    /// Gets the child and its corresponding siblings that are necessary to generate the proof for
576    /// the `n`-th child. If it is an existence proof, the returned child must be the `n`-th
577    /// child; otherwise, the returned child may be another child. See inline explanation for
578    /// details. When calling this function with n = 11 (node `b` in the following graph), the
579    /// range at each level is illustrated as a pair of square brackets:
580    ///
581    /// ```text
582    ///     4      [f   e   d   c   b   a   9   8   7   6   5   4   3   2   1   0] -> root level
583    ///            ---------------------------------------------------------------
584    ///     3      [f   e   d   c   b   a   9   8] [7   6   5   4   3   2   1   0] width = 8
585    ///                                  chs <--┘                        shs <--┘
586    ///     2      [f   e   d   c] [b   a   9   8] [7   6   5   4] [3   2   1   0] width = 4
587    ///                  shs <--┘               └--> chs
588    ///     1      [f   e] [d   c] [b   a] [9   8] [7   6] [5   4] [3   2] [1   0] width = 2
589    ///                          chs <--┘       └--> shs
590    ///     0      [f] [e] [d] [c] [b] [a] [9] [8] [7] [6] [5] [4] [3] [2] [1] [0] width = 1
591    ///     ^                chs <--┘   └--> shs
592    ///     |   MSB|<---------------------- uint 16 ---------------------------->|LSB
593    ///  height    chs: `child_half_start`         shs: `sibling_half_start`
594    /// ```
595    pub fn get_child_with_siblings(
596        &self,
597        node_key: &NodeKey,
598        n: Nibble,
599    ) -> (Option<NodeKey>, Vec<[u8; 32]>) {
600        let mut siblings = vec![];
601        let (existence_bitmap, leaf_bitmap) = self.generate_bitmaps();
602
603        // Nibble height from 3 to 0.
604        for h in (0..4).rev() {
605            // Get the number of children of the internal node that each subtree at this height
606            // covers.
607            let width = 1 << h;
608            let (child_half_start, sibling_half_start) = get_child_and_sibling_half_start(n, h);
609            // Compute the root hash of the subtree rooted at the sibling of `r`.
610            siblings.push(self.merkle_hash(
611                sibling_half_start,
612                width,
613                (existence_bitmap, leaf_bitmap),
614            ));
615
616            let (range_existence_bitmap, range_leaf_bitmap) =
617                Self::range_bitmaps(child_half_start, width, (existence_bitmap, leaf_bitmap));
618
619            if range_existence_bitmap == 0 {
620                // No child in this range.
621                return (None, siblings);
622            } else if width == 1
623                || (range_existence_bitmap.count_ones() == 1 && range_leaf_bitmap != 0)
624            {
625                // Return the only 1 leaf child under this subtree or reach the lowest level
626                // Even this leaf child is not the n-th child, it should be returned instead of
627                // `None` because it's existence indirectly proves the n-th child doesn't exist.
628                // Please read proof format for details.
629                let only_child_index = Nibble::from(range_existence_bitmap.trailing_zeros() as u8);
630                return (
631                    {
632                        let only_child_version = self
633                            .child(only_child_index)
634                            // Should be guaranteed by the self invariants, but these are not easy to express at the moment
635                            .with_context(|| {
636                                format!(
637                                    "Corrupted internal node: child_bitmap indicates \
638                                     the existence of a non-exist child at index {:x}",
639                                    only_child_index
640                                )
641                            })
642                            .unwrap()
643                            .version;
644                        Some(node_key.gen_child_node_key(only_child_version, only_child_index))
645                    },
646                    siblings,
647                );
648            }
649        }
650        unreachable!("Impossible to get here without returning even at the lowest level.")
651    }
652
653    #[cfg(test)]
654    pub(crate) fn into_legacy_internal(self) -> InternalNode {
655        let mut children = self.children;
656        children.iter_mut().for_each(|(_, mut child)| {
657            if matches!(child.node_type, NodeType::Internal { .. }) {
658                child.node_type = NodeType::InternalLegacy
659            }
660        });
661
662        InternalNode::new_migration(children, false /* leaf_count_migration */)
663    }
664
665    #[cfg(test)]
666    pub(crate) fn children(&self) -> &Children {
667        &self.children
668    }
669}
670
671/// Given a nibble, computes the start position of its `child_half_start` and `sibling_half_start`
672/// at `height` level.
673pub(crate) fn get_child_and_sibling_half_start(n: Nibble, height: u8) -> (u8, u8) {
674    // Get the index of the first child belonging to the same subtree whose root, let's say `r` is
675    // at `height` that the n-th child belongs to.
676    // Note: `child_half_start` will be always equal to `n` at height 0.
677    let child_half_start = (0xff << height) & u8::from(n);
678
679    // Get the index of the first child belonging to the subtree whose root is the sibling of `r`
680    // at `height`.
681    let sibling_half_start = child_half_start ^ (1 << height);
682
683    (child_half_start, sibling_half_start)
684}
685
686/// Given a nibble, computes the start position of its `child_half_start` at `height` level.
687pub(crate) fn get_child_half_start(n: Nibble, height: u8) -> u8 {
688    // Get the index of the first child belonging to the same subtree whose root, let's say `r` is
689    // at `height` that the n-th child belongs to.
690    // Note: `child_half_start` will be always equal to `n` at height 0.
691    (0xff << height) & u8::from(n)
692}
693
694/// Represents a key-value pair in the map.
695///
696/// Note: this does not store the key itself.
697#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
698pub struct LeafNode {
699    /// The hash of the key for this entry.
700    key_hash: KeyHash,
701    /// The hash of the value for this entry.
702    value_hash: ValueHash,
703    /// The value associated with the key.
704    value: Vec<u8>,
705}
706
707impl LeafNode {
708    /// Creates a new leaf node.
709    pub fn new(key_hash: KeyHash, value: Vec<u8>) -> Self {
710        let value_hash = value.as_slice().into();
711        Self {
712            key_hash,
713            value_hash,
714            value,
715        }
716    }
717
718    /// Gets the key hash.
719    pub fn key_hash(&self) -> KeyHash {
720        self.key_hash
721    }
722
723    /// Gets the associated value itself.
724    pub fn value(&self) -> &[u8] {
725        self.value.as_ref()
726    }
727
728    /// Gets the associated value hash.
729    pub(crate) fn value_hash(&self) -> ValueHash {
730        self.value_hash
731    }
732
733    pub fn hash(&self) -> [u8; 32] {
734        SparseMerkleLeafNode::new(self.key_hash, self.value_hash).hash()
735    }
736}
737
738impl From<LeafNode> for SparseMerkleLeafNode {
739    fn from(leaf_node: LeafNode) -> Self {
740        Self::new(leaf_node.key_hash, leaf_node.value_hash)
741    }
742}
743
744#[repr(u8)]
745#[derive(FromPrimitive, ToPrimitive)]
746enum NodeTag {
747    Null = 0,
748    InternalLegacy = 1,
749    Leaf = 2,
750    Internal = 3,
751}
752
753/// The concrete node type of [`JellyfishMerkleTree`](crate::JellyfishMerkleTree).
754#[derive(Clone, Debug, Eq, PartialEq)]
755pub enum Node {
756    /// Represents `null`.
757    Null,
758    /// A wrapper of [`InternalNode`].
759    Internal(InternalNode),
760    /// A wrapper of [`LeafNode`].
761    Leaf(LeafNode),
762}
763
764impl From<InternalNode> for Node {
765    fn from(node: InternalNode) -> Self {
766        Node::Internal(node)
767    }
768}
769
770impl From<InternalNode> for Children {
771    fn from(node: InternalNode) -> Self {
772        node.children
773    }
774}
775
776impl From<LeafNode> for Node {
777    fn from(node: LeafNode) -> Self {
778        Node::Leaf(node)
779    }
780}
781
782impl Node {
783    /// Creates the [`Null`](Node::Null) variant.
784    pub(crate) fn new_null() -> Self {
785        Node::Null
786    }
787
788    /// Creates the [`Internal`](Node::Internal) variant.
789    #[cfg(any(test, feature = "fuzzing"))]
790    pub(crate) fn new_internal(children: Children) -> Self {
791        Node::Internal(InternalNode::new(children))
792    }
793
794    /// Creates the [`Leaf`](Node::Leaf) variant.
795    pub(crate) fn new_leaf(key_hash: KeyHash, value: Vec<u8>) -> Self {
796        Node::Leaf(LeafNode::new(key_hash, value))
797    }
798
799    /// Returns `true` if the node is a leaf node.
800    pub(crate) fn is_leaf(&self) -> bool {
801        matches!(self, Node::Leaf(_))
802    }
803
804    /// Returns `NodeType`
805    pub(crate) fn node_type(&self) -> NodeType {
806        match self {
807            // The returning value will be used to construct a `Child` of a internal node, while an
808            // internal node will never have a child of Node::Null.
809            Self::Null => unreachable!(),
810            Self::Leaf(_) => NodeType::Leaf,
811            Self::Internal(n) => n.node_type(),
812        }
813    }
814
815    /// Returns leaf count if known
816    pub(crate) fn leaf_count(&self) -> Option<usize> {
817        match self {
818            Node::Null => Some(0),
819            Node::Leaf(_) => Some(1),
820            Node::Internal(internal_node) => internal_node.leaf_count,
821        }
822    }
823
824    /// Serializes to bytes for physical storage.
825    pub fn encode(&self) -> Result<Vec<u8>> {
826        let mut out = vec![];
827
828        match self {
829            Node::Null => {
830                out.push(NodeTag::Null as u8);
831            }
832            Node::Internal(internal_node) => {
833                let persist_leaf_count = internal_node.leaf_count_migration;
834                let tag = if persist_leaf_count {
835                    NodeTag::Internal
836                } else {
837                    NodeTag::InternalLegacy
838                };
839                out.push(tag as u8);
840                internal_node.serialize(&mut out, persist_leaf_count)?;
841                DIEM_JELLYFISH_INTERNAL_ENCODED_BYTES.inc_by(out.len() as u64);
842            }
843            Node::Leaf(leaf_node) => {
844                out.push(NodeTag::Leaf as u8);
845                out.extend(bcs::to_bytes(&leaf_node)?);
846                DIEM_JELLYFISH_LEAF_ENCODED_BYTES.inc_by(out.len() as u64);
847            }
848        }
849        Ok(out)
850    }
851
852    /// Computes the hash of nodes.
853    pub(crate) fn hash(&self) -> [u8; 32] {
854        match self {
855            Node::Null => SPARSE_MERKLE_PLACEHOLDER_HASH,
856            Node::Internal(internal_node) => internal_node.hash(),
857            Node::Leaf(leaf_node) => leaf_node.hash(),
858        }
859    }
860
861    /// Recovers from serialized bytes in physical storage.
862    pub fn decode(val: &[u8]) -> Result<Node> {
863        if val.is_empty() {
864            return Err(NodeDecodeError::EmptyInput.into());
865        }
866        let tag = val[0];
867        let node_tag = NodeTag::from_u8(tag);
868        match node_tag {
869            Some(NodeTag::Null) => Ok(Node::Null),
870            Some(NodeTag::InternalLegacy) => {
871                Ok(Node::Internal(InternalNode::deserialize(&val[1..], false)?))
872            }
873            Some(NodeTag::Internal) => {
874                Ok(Node::Internal(InternalNode::deserialize(&val[1..], true)?))
875            }
876            Some(NodeTag::Leaf) => Ok(Node::Leaf(bcs::from_bytes(&val[1..])?)),
877            None => Err(NodeDecodeError::UnknownTag { unknown_tag: tag }.into()),
878        }
879    }
880}
881
882/// Error thrown when a [`Node`] fails to be deserialized out of a byte sequence stored in physical
883/// storage, via [`Node::decode`].
884#[derive(Debug, Error, Eq, PartialEq)]
885pub enum NodeDecodeError {
886    /// Input is empty.
887    #[error("Missing tag due to empty input")]
888    EmptyInput,
889
890    /// The first byte of the input is not a known tag representing one of the variants.
891    #[error("lead tag byte is unknown: {}", unknown_tag)]
892    UnknownTag { unknown_tag: u8 },
893
894    /// No children found in internal node
895    #[error("No children found in internal node")]
896    NoChildren,
897
898    /// Extra leaf bits set
899    #[error(
900        "Non-existent leaf bits set, existing: {}, leaves: {}",
901        existing,
902        leaves
903    )]
904    ExtraLeaves { existing: u16, leaves: u16 },
905}
906
907/// Helper function to serialize version in a more efficient encoding.
908/// We use a super simple encoding - the high bit is set if more bytes follow.
909pub(crate) fn serialize_u64_varint(mut num: u64, binary: &mut Vec<u8>) {
910    for _ in 0..8 {
911        let low_bits = num as u8 & 0x7f;
912        num >>= 7;
913        let more = match num {
914            0 => 0u8,
915            _ => 0x80,
916        };
917        binary.push(low_bits | more);
918        if more == 0 {
919            return;
920        }
921    }
922    // Last byte is encoded raw; this means there are no bad encodings.
923    assert_ne!(num, 0);
924    assert!(num <= 0xff);
925    binary.push(num as u8);
926}
927
928/// Helper function to deserialize versions from above encoding.
929pub(crate) fn deserialize_u64_varint<T>(reader: &mut T) -> Result<u64>
930where
931    T: Read,
932{
933    let mut num = 0u64;
934    for i in 0..8 {
935        let byte = reader.read_u8()?;
936        num |= u64::from(byte & 0x7f) << (i * 7);
937        if (byte & 0x80) == 0 {
938            return Ok(num);
939        }
940    }
941    // Last byte is encoded as is.
942    let byte = reader.read_u8()?;
943    num |= u64::from(byte) << 56;
944    Ok(num)
945}