Skip to main content

commonware_storage/merkle/
mod.rs

1//! Shared types for Merkle-family data structures (MMR, MMB).
2//!
3//! This module provides generic `Position<F>` and `Location<F>` types parameterized by a
4//! [`Family`] marker trait. Each Merkle family (e.g. MMR, MMB) implements the trait with
5//! its own constants and conversion formulas, while the shared arithmetic, codec, and comparison
6//! logic lives here.
7
8pub mod batch;
9#[cfg(test)]
10pub(crate) mod conformance;
11pub mod hasher;
12#[cfg(feature = "std")]
13mod persisted;
14#[cfg(feature = "std")]
15pub use persisted::{compact, full};
16mod location;
17pub mod mem;
18pub mod mmb;
19pub mod mmr;
20pub mod path;
21mod position;
22mod proof;
23mod read;
24#[cfg(feature = "std")]
25pub mod storage;
26#[cfg(feature = "std")]
27pub mod verification;
28
29use alloc::vec::Vec;
30use bytes::{Buf, BufMut};
31use commonware_codec::{EncodeSize, Read, Write};
32use commonware_cryptography::Digest;
33use core::fmt::Debug;
34pub use location::{Location, LocationRangeExt};
35pub use position::Position;
36#[cfg(test)]
37pub(crate) use proof::build_range_proof;
38pub use proof::{Proof, MAX_PROOF_DIGESTS_PER_ELEMENT};
39pub use read::Readable;
40use thiserror::Error;
41
42/// Safe upper bound on the number of pinned nodes for any u64-backed Merkle family.
43///
44/// Pinned nodes are produced by [`Family::nodes_to_pin`], whose default
45/// implementation returns the peaks of the sub-structure at the prune
46/// location. A structure with `n` leaves has at most `popcount(n)` peaks, and
47/// since [`Location`] is backed by a `u64`, `n <= u64::MAX` yields at most 64
48/// peaks.
49pub const MAX_PINNED_NODES: usize = 64;
50
51/// Defines the strategy used to fold peaks into the final root digest.
52#[derive(Copy, Clone, Debug, PartialEq, Eq)]
53pub enum Bagging {
54    /// Peaks are bagged forward from oldest to newest, placing the smallest peak at the top.
55    ForwardFold,
56    /// Peaks are bagged backward from newest to oldest, placing the largest peak at the top.
57    BackwardFold,
58}
59
60/// Marker trait for Merkle-family data structures.
61///
62/// Provides the per-family constants and conversion functions that differentiate
63/// MMR from MMB (or other future Merkle structures). Families capture structural topology
64/// only; bagging is owned by the [`hasher::Hasher`] instance and supplied by the consumer.
65pub trait Family: Copy + Clone + Debug + Default + Send + Sync + 'static {
66    /// Maximum valid node count / size.
67    const MAX_NODES: Position<Self>;
68
69    /// Maximum valid leaf count.
70    const MAX_LEAVES: Location<Self>;
71
72    /// Convert a leaf location to its node position, or equivalently, convert a leaf count to the
73    /// corresponding total node count (size).
74    ///
75    /// The public, guaranteed domain is `loc <= MAX_LEAVES`. Some implementations may also accept
76    /// slightly larger temporary values for internal probing (for example, when checking the next
77    /// size boundary), but callers must not rely on that behavior.
78    fn location_to_position(loc: Location<Self>) -> Position<Self>;
79
80    /// Convert a node position to its leaf location, or `None` if the position is not a leaf.
81    /// Equivalently, convert a total node count (size) to the corresponding leaf count, returning
82    /// `None` if the size is not valid.
83    ///
84    /// The caller guarantees `pos <= MAX_NODES`.
85    fn position_to_location(pos: Position<Self>) -> Option<Location<Self>>;
86
87    /// Whether `size` is a valid tree size for this Merkle structure.
88    fn is_valid_size(size: Position<Self>) -> bool;
89
90    /// Returns the largest valid size that is no greater than `size`.
91    fn to_nearest_size(size: Position<Self>) -> Position<Self>;
92
93    /// Return the peaks of a structure with the given `size` as `(position, height)` pairs
94    /// in canonical oldest-to-newest order (suitable for
95    /// [`Hasher::root`](crate::merkle::hasher::Hasher::root)).
96    fn peaks(size: Position<Self>) -> impl Iterator<Item = (Position<Self>, u32)> + Send;
97
98    /// Count the number of oldest peaks that are entirely inactive.
99    ///
100    /// A peak is considered entirely inactive if all of its leaves strictly precede the
101    /// `inactivity_floor`. These peaks can be safely bagged forward into the `grafted_root`.
102    fn inactive_peaks(size: Position<Self>, inactivity_floor: Location<Self>) -> usize {
103        let mut inactive_count = 0;
104        let mut leaf_capacity_sum = 0u64;
105        for (_, height) in Self::peaks(size) {
106            let capacity = 1u64.checked_shl(height).expect("height excessively large");
107            leaf_capacity_sum = leaf_capacity_sum
108                .checked_add(capacity)
109                .expect("capacity overflow");
110            if leaf_capacity_sum <= *inactivity_floor {
111                inactive_count += 1;
112            } else {
113                break;
114            }
115        }
116        inactive_count
117    }
118
119    /// Compute positions of nodes that must be pinned when pruning to `prune_loc`.
120    ///
121    /// Pinned nodes are the minimal set of pruned digests required to continue growing the
122    /// structure and recomputing its root after pruning. The default implementation returns the
123    /// peaks of the sub-structure at `prune_loc`, which is sufficient for both root computation and
124    /// re-merkleization of retained leaves. Implementations may override this if their family
125    /// requires a different canonical pinned-node set for the pruning boundary.
126    ///
127    /// # Panics
128    ///
129    /// Implementations panic if `prune_loc` is invalid (i.e., exceeds
130    /// [`MAX_LEAVES`](Self::MAX_LEAVES)). Callers must validate inputs before calling.
131    fn nodes_to_pin(prune_loc: Location<Self>) -> impl Iterator<Item = Position<Self>> + Send {
132        let prune_pos = Self::location_to_position(prune_loc);
133        Self::peaks(prune_pos)
134            .filter(move |&(pos, _)| pos < prune_pos)
135            .map(|(pos, _)| pos)
136            .collect::<Vec<_>>()
137            .into_iter()
138    }
139
140    /// Return the positions of the left and right children of the node at `pos` with the
141    /// given `height`. The caller guarantees `height > 0` (leaves have no children).
142    fn children(pos: Position<Self>, height: u32) -> (Position<Self>, Position<Self>);
143
144    /// Return the heights of the internal nodes that lie between `size_for(N)` and
145    /// `size_for(N+1)`, where `N` is the given leaf count. These are the nodes created
146    /// when the `N`-th leaf is appended.
147    fn parent_heights(leaves: Location<Self>) -> impl Iterator<Item = u32>;
148
149    /// Return the height of the node at `pos`.
150    ///
151    /// # Panics
152    ///
153    /// Implementations may panic if `pos` does not correspond to a node that exists in some valid
154    /// instance of this family (i.e., it is not a position that would appear in a structure of any
155    /// size).
156    fn pos_to_height(pos: Position<Self>) -> u32;
157}
158
159/// Pending-chunk slot for Merkle families that do not carry a pending chunk (e.g. MMR).
160#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
161pub struct Unused;
162
163impl Write for Unused {
164    #[inline]
165    fn write(&self, _: &mut impl BufMut) {}
166}
167
168impl EncodeSize for Unused {
169    #[inline]
170    fn encode_size(&self) -> usize {
171        0
172    }
173}
174
175impl Read for Unused {
176    type Cfg = ();
177
178    #[inline]
179    fn read_cfg(_: &mut impl Buf, _: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
180        Ok(Self)
181    }
182}
183
184impl<D: Digest> TryFrom<Option<D>> for Unused {
185    type Error = commonware_codec::Error;
186
187    #[inline]
188    fn try_from(value: Option<D>) -> Result<Self, Self::Error> {
189        match value {
190            None => Ok(Self),
191            Some(_) => Err(commonware_codec::Error::Invalid(
192                "PendingChunk",
193                "pending chunk present for family without pending chunks",
194            )),
195        }
196    }
197}
198
199#[cfg(feature = "arbitrary")]
200impl arbitrary::Arbitrary<'_> for Unused {
201    fn arbitrary(_: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
202        Ok(Self)
203    }
204}
205
206/// The pending-chunk slot carried by a [Graftable] family's proof and witness types.
207pub trait PendingChunk<D: Digest>:
208    Clone
209    + Debug
210    + Eq
211    + Write
212    + EncodeSize
213    + Read<Cfg = ()>
214    + Send
215    + Sync
216    + TryFrom<Option<D>, Error: Debug>
217{
218    /// View the slot as an `Option<&D>`.
219    fn as_ref(&self) -> Option<&D> {
220        None
221    }
222}
223
224impl<D: Digest> PendingChunk<D> for Option<D> {
225    #[inline]
226    fn as_ref(&self) -> Option<&D> {
227        Self::as_ref(self)
228    }
229}
230
231impl<D: Digest> PendingChunk<D> for Unused {}
232
233/// Extension of [`Family`] with methods needed for grafting bitmap chunks onto a Merkle structure.
234/// Grafting combines an activity bitmap with an ops Merkle structure by hashing bitmap chunks
235/// together with ops subtree roots. These methods provide the coordinate conversions and
236/// chunk-to-peak mappings required by that process.
237pub trait Graftable: Family {
238    /// The pending-chunk slot type for this family's proofs and witnesses.
239    type PendingChunk<D: Digest>: PendingChunk<D>;
240
241    /// Return the nodes that collectively cover the leaf range of a bitmap chunk in a structure of
242    /// the given `size`.
243    ///
244    /// A chunk at index `chunk_idx` with grafting height `grafting_height` covers leaves
245    /// `[chunk_idx << grafting_height, (chunk_idx + 1) << grafting_height)`. The returned nodes
246    /// partition that range: each node's leaf range is entirely within the chunk, and together they
247    /// cover it exactly.
248    ///
249    /// Results are returned in oldest-to-newest (left-to-right) order.
250    ///
251    /// # Panics
252    ///
253    /// Panics if `size` is not a valid size or if the chunk's leaf range exceeds the structure's
254    /// leaf count.
255    fn chunk_peaks(
256        size: Position<Self>,
257        chunk_idx: u64,
258        grafting_height: u32,
259    ) -> impl Iterator<Item = (Position<Self>, u32)> + Send;
260
261    /// Return the deterministic position of the node at `height` whose leftmost leaf is at
262    /// `leaf_start`.
263    ///
264    /// For some families, this position corresponds to a node that physically exists in any
265    /// structure containing those leaves. For others (e.g. MMB with delayed merging), it may be a
266    /// "virtual" position that no actual node occupies, but is still deterministic and unique for
267    /// the given leaf range and height.
268    ///
269    /// Used by grafting to map grafted-structure positions to ops-structure positions for domain
270    /// separation in hash pre-images.
271    ///
272    /// # Panics
273    ///
274    /// Panics if `height` is excessively large (e.g., `>= 63`), or if the resulting position
275    /// computation overflows the bounds of the underlying numeric types.
276    fn subtree_root_position(leaf_start: Location<Self>, height: u32) -> Position<Self>;
277
278    /// Return the location of the leftmost leaf covered by the node at `pos` with `height`. For a
279    /// leaf (height 0), returns its own location.
280    ///
281    /// # Panics
282    ///
283    /// Panics if `height` is excessively large (e.g., `>= 63`), or if an invalid combination of
284    /// `pos` and `height` results in arithmetic underflow/overflow.
285    fn leftmost_leaf(pos: Position<Self>, height: u32) -> Location<Self>;
286
287    /// Return the minimum leaf count at which the node at `pos` with `height` exists in the
288    /// structure.
289    ///
290    /// For families without delayed merging (e.g. MMR), a node exists as soon as all leaves
291    /// in its span have been appended. For families with delayed merging (e.g. MMB), the
292    /// node is created some number of leaf insertions _after_ its last leaf, so the birth
293    /// size is larger. The MMB override accounts for this delay.
294    ///
295    /// This is used by the grafted-tree pruning logic to determine when a chunk-pair's
296    /// parent has been born in the ops tree, which controls when it is safe to prune the
297    /// pair's individual grafted leaves.
298    ///
299    /// # Panics
300    ///
301    /// Panics if `height` is excessively large (e.g., `>= 63`), or if arithmetic overflows.
302    fn peak_birth_size(pos: Position<Self>, height: u32) -> u64 {
303        let leftmost = *Self::leftmost_leaf(pos, height);
304        let width = 1u64.checked_shl(height).expect("height excessively large");
305        leftmost.checked_add(width).expect("birth size overflow")
306    }
307}
308
309/// Errors that can occur when interacting with a Merkle-family data structure.
310#[derive(Debug, Error)]
311pub enum Error<F: Family> {
312    /// The position does not correspond to a leaf node.
313    #[error("{0} is not a leaf")]
314    NonLeaf(Position<F>),
315
316    /// The position exceeds the valid range.
317    #[error("{0} > MAX_NODES")]
318    PositionOverflow(Position<F>),
319
320    /// The location exceeds the valid range.
321    #[error("{0} > MAX_LEAVES")]
322    LocationOverflow(Location<F>),
323
324    /// The range is empty but must contain at least one element.
325    #[error("range is empty")]
326    Empty,
327
328    /// The end of a range is out of bounds.
329    #[error("range end out of bounds: {0}")]
330    RangeOutOfBounds(Location<F>),
331
332    /// The requested size is invalid.
333    #[error("invalid size: {0}")]
334    InvalidSize(u64),
335
336    /// A requested leaf location exceeds the current leaf count.
337    #[error("leaf location out of bounds: {0}")]
338    LeafOutOfBounds(Location<F>),
339
340    /// A required node was not available (e.g. pruned).
341    #[error("element pruned: {0}")]
342    ElementPruned(Position<F>),
343
344    /// A required digest was compressed into a synthetic accumulator (e.g. a backward-fold
345    /// suffix accumulator) when the source proof was constructed, so it is not individually
346    /// available. Unlike [`Error::ElementPruned`], the digest still exists in the source
347    /// structure; the caller can recover by supplying it through an explicit witness slice.
348    #[error("digest hidden behind synthetic accumulator: {0}")]
349    CompressedDigest(Position<F>),
350
351    /// The provided pinned node list does not match the expected pruning boundary.
352    #[error("invalid pinned nodes")]
353    InvalidPinnedNodes,
354
355    /// Structure has diverged incompatibly from the batch's ancestor chain.
356    #[error("stale batch: base size {expected}, current size {actual}")]
357    StaleBatch {
358        /// The base size when the batch chain was forked.
359        expected: Position<F>,
360        /// The current structure size.
361        actual: Position<F>,
362    },
363
364    /// An ancestor batch was dropped before this batch was applied, causing
365    /// data loss. All ancestors must be kept alive until descendants are applied.
366    #[error("ancestor dropped: expected size {expected}, actual size {actual}")]
367    AncestorDropped {
368        /// The expected size after applying all ancestors + this batch.
369        expected: Position<F>,
370        /// The actual size (less than expected due to missing ancestor data).
371        actual: Position<F>,
372    },
373
374    /// The proof is invalid.
375    #[error("invalid proof")]
376    InvalidProof,
377
378    /// The requested inactive peak count cannot be represented by the current peak list.
379    #[error("inactive peak count {requested} exceeds peak count {peaks}")]
380    InvalidInactivePeaks {
381        /// Requested inactive peak count.
382        requested: usize,
383        /// Number of available peaks.
384        peaks: usize,
385    },
386
387    /// The root does not match the computed root.
388    #[error("root mismatch")]
389    RootMismatch,
390
391    /// A required digest is missing.
392    #[error("missing digest: {0}")]
393    MissingDigest(Position<F>),
394
395    /// A metadata error occurred.
396    #[cfg(feature = "std")]
397    #[error("metadata error: {0}")]
398    Metadata(#[from] crate::metadata::Error),
399
400    /// A journal error occurred.
401    #[cfg(feature = "std")]
402    #[error("journal error: {0}")]
403    Journal(#[from] crate::journal::Error),
404
405    /// A runtime error occurred.
406    #[cfg(feature = "std")]
407    #[error("runtime error: {0}")]
408    Runtime(#[from] commonware_runtime::Error),
409
410    /// A required node is missing.
411    #[error("missing node: {0}")]
412    MissingNode(Position<F>),
413
414    /// Data is corrupted.
415    #[error("data corrupted: {0}")]
416    DataCorrupted(&'static str),
417
418    /// A required grafted leaf digest is missing.
419    #[error("missing grafted leaf digest: {0}")]
420    MissingGraftedLeaf(Position<F>),
421
422    /// Bit offset is out of bounds.
423    #[error("bit offset {0} out of bounds (size: {1})")]
424    BitOutOfBounds(u64, u64),
425
426    /// Rewind was attempted but no prior committed state is available.
427    #[error("rewind beyond history")]
428    RewindBeyondHistory,
429}