commonware_storage/merkle/mod.rs
1//! Shared types for Merkle-family data structures (MMR, MMB).
2//!
3//! This module provides generic `Position<F>` and `Location<F>` types parameterized by a
4//! [`Family`] marker trait. Each Merkle family (e.g. MMR, MMB) implements the trait with
5//! its own constants and conversion formulas, while the shared arithmetic, codec, and comparison
6//! logic lives here.
7
8pub mod batch;
9#[cfg(test)]
10pub(crate) mod conformance;
11pub mod hasher;
12#[cfg(feature = "std")]
13mod persisted;
14#[cfg(feature = "std")]
15pub use persisted::{compact, full};
16mod location;
17pub mod mem;
18pub mod mmb;
19pub mod mmr;
20pub mod path;
21mod position;
22mod proof;
23mod read;
24#[cfg(feature = "std")]
25pub mod storage;
26#[cfg(feature = "std")]
27pub mod verification;
28
29use alloc::vec::Vec;
30use bytes::{Buf, BufMut};
31use commonware_codec::{EncodeSize, Read, Write};
32use commonware_cryptography::Digest;
33use core::fmt::Debug;
34pub use location::{Location, LocationRangeExt};
35pub use position::Position;
36#[cfg(test)]
37pub(crate) use proof::build_range_proof;
38pub use proof::{Proof, MAX_PROOF_DIGESTS_PER_ELEMENT};
39pub use read::Readable;
40use thiserror::Error;
41
42/// Safe upper bound on the number of pinned nodes for any u64-backed Merkle family.
43///
44/// Pinned nodes are produced by [`Family::nodes_to_pin`], whose default
45/// implementation returns the peaks of the sub-structure at the prune
46/// location. A structure with `n` leaves has at most `popcount(n)` peaks, and
47/// since [`Location`] is backed by a `u64`, `n <= u64::MAX` yields at most 64
48/// peaks.
49pub const MAX_PINNED_NODES: usize = 64;
50
51/// Defines the strategy used to fold peaks into the final root digest.
52#[derive(Copy, Clone, Debug, PartialEq, Eq)]
53pub enum Bagging {
54 /// Peaks are bagged forward from oldest to newest, placing the smallest peak at the top.
55 ForwardFold,
56 /// Peaks are bagged backward from newest to oldest, placing the largest peak at the top.
57 BackwardFold,
58}
59
60/// Marker trait for Merkle-family data structures.
61///
62/// Provides the per-family constants and conversion functions that differentiate
63/// MMR from MMB (or other future Merkle structures). Families capture structural topology
64/// only; bagging is owned by the [`hasher::Hasher`] instance and supplied by the consumer.
65pub trait Family: Copy + Clone + Debug + Default + Send + Sync + 'static {
66 /// Maximum valid node count / size.
67 const MAX_NODES: Position<Self>;
68
69 /// Maximum valid leaf count.
70 const MAX_LEAVES: Location<Self>;
71
72 /// Convert a leaf location to its node position, or equivalently, convert a leaf count to the
73 /// corresponding total node count (size).
74 ///
75 /// The public, guaranteed domain is `loc <= MAX_LEAVES`. Some implementations may also accept
76 /// slightly larger temporary values for internal probing (for example, when checking the next
77 /// size boundary), but callers must not rely on that behavior.
78 fn location_to_position(loc: Location<Self>) -> Position<Self>;
79
80 /// Convert a node position to its leaf location, or `None` if the position is not a leaf.
81 /// Equivalently, convert a total node count (size) to the corresponding leaf count, returning
82 /// `None` if the size is not valid.
83 ///
84 /// The caller guarantees `pos <= MAX_NODES`.
85 fn position_to_location(pos: Position<Self>) -> Option<Location<Self>>;
86
87 /// Whether `size` is a valid tree size for this Merkle structure.
88 fn is_valid_size(size: Position<Self>) -> bool;
89
90 /// Returns the largest valid size that is no greater than `size`.
91 fn to_nearest_size(size: Position<Self>) -> Position<Self>;
92
93 /// Return the peaks of a structure with the given `size` as `(position, height)` pairs
94 /// in canonical oldest-to-newest order (suitable for
95 /// [`Hasher::root`](crate::merkle::hasher::Hasher::root)).
96 fn peaks(size: Position<Self>) -> impl Iterator<Item = (Position<Self>, u32)> + Send;
97
98 /// Count the number of oldest peaks that are entirely inactive.
99 ///
100 /// A peak is considered entirely inactive if all of its leaves strictly precede the
101 /// `inactivity_floor`. These peaks can be safely bagged forward into the `grafted_root`.
102 fn inactive_peaks(size: Position<Self>, inactivity_floor: Location<Self>) -> usize {
103 let mut inactive_count = 0;
104 let mut leaf_capacity_sum = 0u64;
105 for (_, height) in Self::peaks(size) {
106 let capacity = 1u64.checked_shl(height).expect("height excessively large");
107 leaf_capacity_sum = leaf_capacity_sum
108 .checked_add(capacity)
109 .expect("capacity overflow");
110 if leaf_capacity_sum <= *inactivity_floor {
111 inactive_count += 1;
112 } else {
113 break;
114 }
115 }
116 inactive_count
117 }
118
119 /// Compute positions of nodes that must be pinned when pruning to `prune_loc`.
120 ///
121 /// Pinned nodes are the minimal set of pruned digests required to continue growing the
122 /// structure and recomputing its root after pruning. The default implementation returns the
123 /// peaks of the sub-structure at `prune_loc`, which is sufficient for both root computation and
124 /// re-merkleization of retained leaves. Implementations may override this if their family
125 /// requires a different canonical pinned-node set for the pruning boundary.
126 ///
127 /// # Panics
128 ///
129 /// Implementations panic if `prune_loc` is invalid (i.e., exceeds
130 /// [`MAX_LEAVES`](Self::MAX_LEAVES)). Callers must validate inputs before calling.
131 fn nodes_to_pin(prune_loc: Location<Self>) -> impl Iterator<Item = Position<Self>> + Send {
132 let prune_pos = Self::location_to_position(prune_loc);
133 Self::peaks(prune_pos)
134 .filter(move |&(pos, _)| pos < prune_pos)
135 .map(|(pos, _)| pos)
136 .collect::<Vec<_>>()
137 .into_iter()
138 }
139
140 /// Return the positions of the left and right children of the node at `pos` with the
141 /// given `height`. The caller guarantees `height > 0` (leaves have no children).
142 fn children(pos: Position<Self>, height: u32) -> (Position<Self>, Position<Self>);
143
144 /// Return the heights of the internal nodes that lie between `size_for(N)` and
145 /// `size_for(N+1)`, where `N` is the given leaf count. These are the nodes created
146 /// when the `N`-th leaf is appended.
147 fn parent_heights(leaves: Location<Self>) -> impl Iterator<Item = u32>;
148
149 /// Return the height of the node at `pos`.
150 ///
151 /// # Panics
152 ///
153 /// Implementations may panic if `pos` does not correspond to a node that exists in some valid
154 /// instance of this family (i.e., it is not a position that would appear in a structure of any
155 /// size).
156 fn pos_to_height(pos: Position<Self>) -> u32;
157}
158
159/// Pending-chunk slot for Merkle families that do not carry a pending chunk (e.g. MMR).
160#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
161pub struct Unused;
162
163impl Write for Unused {
164 #[inline]
165 fn write(&self, _: &mut impl BufMut) {}
166}
167
168impl EncodeSize for Unused {
169 #[inline]
170 fn encode_size(&self) -> usize {
171 0
172 }
173}
174
175impl Read for Unused {
176 type Cfg = ();
177
178 #[inline]
179 fn read_cfg(_: &mut impl Buf, _: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
180 Ok(Self)
181 }
182}
183
184impl<D: Digest> TryFrom<Option<D>> for Unused {
185 type Error = commonware_codec::Error;
186
187 #[inline]
188 fn try_from(value: Option<D>) -> Result<Self, Self::Error> {
189 match value {
190 None => Ok(Self),
191 Some(_) => Err(commonware_codec::Error::Invalid(
192 "PendingChunk",
193 "pending chunk present for family without pending chunks",
194 )),
195 }
196 }
197}
198
199#[cfg(feature = "arbitrary")]
200impl arbitrary::Arbitrary<'_> for Unused {
201 fn arbitrary(_: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
202 Ok(Self)
203 }
204}
205
206/// The pending-chunk slot carried by a [Graftable] family's proof and witness types.
207pub trait PendingChunk<D: Digest>:
208 Clone
209 + Debug
210 + Eq
211 + Write
212 + EncodeSize
213 + Read<Cfg = ()>
214 + Send
215 + Sync
216 + TryFrom<Option<D>, Error: Debug>
217{
218 /// View the slot as an `Option<&D>`.
219 fn as_ref(&self) -> Option<&D> {
220 None
221 }
222}
223
224impl<D: Digest> PendingChunk<D> for Option<D> {
225 #[inline]
226 fn as_ref(&self) -> Option<&D> {
227 Self::as_ref(self)
228 }
229}
230
231impl<D: Digest> PendingChunk<D> for Unused {}
232
233/// Extension of [`Family`] with methods needed for grafting bitmap chunks onto a Merkle structure.
234/// Grafting combines an activity bitmap with an ops Merkle structure by hashing bitmap chunks
235/// together with ops subtree roots. These methods provide the coordinate conversions and
236/// chunk-to-peak mappings required by that process.
237pub trait Graftable: Family {
238 /// The pending-chunk slot type for this family's proofs and witnesses.
239 type PendingChunk<D: Digest>: PendingChunk<D>;
240
241 /// Return the nodes that collectively cover the leaf range of a bitmap chunk in a structure of
242 /// the given `size`.
243 ///
244 /// A chunk at index `chunk_idx` with grafting height `grafting_height` covers leaves
245 /// `[chunk_idx << grafting_height, (chunk_idx + 1) << grafting_height)`. The returned nodes
246 /// partition that range: each node's leaf range is entirely within the chunk, and together they
247 /// cover it exactly.
248 ///
249 /// Results are returned in oldest-to-newest (left-to-right) order.
250 ///
251 /// # Panics
252 ///
253 /// Panics if `size` is not a valid size or if the chunk's leaf range exceeds the structure's
254 /// leaf count.
255 fn chunk_peaks(
256 size: Position<Self>,
257 chunk_idx: u64,
258 grafting_height: u32,
259 ) -> impl Iterator<Item = (Position<Self>, u32)> + Send;
260
261 /// Return the deterministic position of the node at `height` whose leftmost leaf is at
262 /// `leaf_start`.
263 ///
264 /// For some families, this position corresponds to a node that physically exists in any
265 /// structure containing those leaves. For others (e.g. MMB with delayed merging), it may be a
266 /// "virtual" position that no actual node occupies, but is still deterministic and unique for
267 /// the given leaf range and height.
268 ///
269 /// Used by grafting to map grafted-structure positions to ops-structure positions for domain
270 /// separation in hash pre-images.
271 ///
272 /// # Panics
273 ///
274 /// Panics if `height` is excessively large (e.g., `>= 63`), or if the resulting position
275 /// computation overflows the bounds of the underlying numeric types.
276 fn subtree_root_position(leaf_start: Location<Self>, height: u32) -> Position<Self>;
277
278 /// Return the location of the leftmost leaf covered by the node at `pos` with `height`. For a
279 /// leaf (height 0), returns its own location.
280 ///
281 /// # Panics
282 ///
283 /// Panics if `height` is excessively large (e.g., `>= 63`), or if an invalid combination of
284 /// `pos` and `height` results in arithmetic underflow/overflow.
285 fn leftmost_leaf(pos: Position<Self>, height: u32) -> Location<Self>;
286
287 /// Return the minimum leaf count at which the node at `pos` with `height` exists in the
288 /// structure.
289 ///
290 /// For families without delayed merging (e.g. MMR), a node exists as soon as all leaves
291 /// in its span have been appended. For families with delayed merging (e.g. MMB), the
292 /// node is created some number of leaf insertions _after_ its last leaf, so the birth
293 /// size is larger. The MMB override accounts for this delay.
294 ///
295 /// This is used by the grafted-tree pruning logic to determine when a chunk-pair's
296 /// parent has been born in the ops tree, which controls when it is safe to prune the
297 /// pair's individual grafted leaves.
298 ///
299 /// # Panics
300 ///
301 /// Panics if `height` is excessively large (e.g., `>= 63`), or if arithmetic overflows.
302 fn peak_birth_size(pos: Position<Self>, height: u32) -> u64 {
303 let leftmost = *Self::leftmost_leaf(pos, height);
304 let width = 1u64.checked_shl(height).expect("height excessively large");
305 leftmost.checked_add(width).expect("birth size overflow")
306 }
307}
308
309/// Errors that can occur when interacting with a Merkle-family data structure.
310#[derive(Debug, Error)]
311pub enum Error<F: Family> {
312 /// The position does not correspond to a leaf node.
313 #[error("{0} is not a leaf")]
314 NonLeaf(Position<F>),
315
316 /// The position exceeds the valid range.
317 #[error("{0} > MAX_NODES")]
318 PositionOverflow(Position<F>),
319
320 /// The location exceeds the valid range.
321 #[error("{0} > MAX_LEAVES")]
322 LocationOverflow(Location<F>),
323
324 /// The range is empty but must contain at least one element.
325 #[error("range is empty")]
326 Empty,
327
328 /// The end of a range is out of bounds.
329 #[error("range end out of bounds: {0}")]
330 RangeOutOfBounds(Location<F>),
331
332 /// The requested size is invalid.
333 #[error("invalid size: {0}")]
334 InvalidSize(u64),
335
336 /// A requested leaf location exceeds the current leaf count.
337 #[error("leaf location out of bounds: {0}")]
338 LeafOutOfBounds(Location<F>),
339
340 /// A required node was not available (e.g. pruned).
341 #[error("element pruned: {0}")]
342 ElementPruned(Position<F>),
343
344 /// A required digest was compressed into a synthetic accumulator (e.g. a backward-fold
345 /// suffix accumulator) when the source proof was constructed, so it is not individually
346 /// available. Unlike [`Error::ElementPruned`], the digest still exists in the source
347 /// structure; the caller can recover by supplying it through an explicit witness slice.
348 #[error("digest hidden behind synthetic accumulator: {0}")]
349 CompressedDigest(Position<F>),
350
351 /// The provided pinned node list does not match the expected pruning boundary.
352 #[error("invalid pinned nodes")]
353 InvalidPinnedNodes,
354
355 /// Structure has diverged incompatibly from the batch's ancestor chain.
356 #[error("stale batch: base size {expected}, current size {actual}")]
357 StaleBatch {
358 /// The base size when the batch chain was forked.
359 expected: Position<F>,
360 /// The current structure size.
361 actual: Position<F>,
362 },
363
364 /// An ancestor batch was dropped before this batch was applied, causing
365 /// data loss. All ancestors must be kept alive until descendants are applied.
366 #[error("ancestor dropped: expected size {expected}, actual size {actual}")]
367 AncestorDropped {
368 /// The expected size after applying all ancestors + this batch.
369 expected: Position<F>,
370 /// The actual size (less than expected due to missing ancestor data).
371 actual: Position<F>,
372 },
373
374 /// The proof is invalid.
375 #[error("invalid proof")]
376 InvalidProof,
377
378 /// The requested inactive peak count cannot be represented by the current peak list.
379 #[error("inactive peak count {requested} exceeds peak count {peaks}")]
380 InvalidInactivePeaks {
381 /// Requested inactive peak count.
382 requested: usize,
383 /// Number of available peaks.
384 peaks: usize,
385 },
386
387 /// The root does not match the computed root.
388 #[error("root mismatch")]
389 RootMismatch,
390
391 /// A required digest is missing.
392 #[error("missing digest: {0}")]
393 MissingDigest(Position<F>),
394
395 /// A metadata error occurred.
396 #[cfg(feature = "std")]
397 #[error("metadata error: {0}")]
398 Metadata(#[from] crate::metadata::Error),
399
400 /// A journal error occurred.
401 #[cfg(feature = "std")]
402 #[error("journal error: {0}")]
403 Journal(#[from] crate::journal::Error),
404
405 /// A runtime error occurred.
406 #[cfg(feature = "std")]
407 #[error("runtime error: {0}")]
408 Runtime(#[from] commonware_runtime::Error),
409
410 /// A required node is missing.
411 #[error("missing node: {0}")]
412 MissingNode(Position<F>),
413
414 /// Data is corrupted.
415 #[error("data corrupted: {0}")]
416 DataCorrupted(&'static str),
417
418 /// A required grafted leaf digest is missing.
419 #[error("missing grafted leaf digest: {0}")]
420 MissingGraftedLeaf(Position<F>),
421
422 /// Bit offset is out of bounds.
423 #[error("bit offset {0} out of bounds (size: {1})")]
424 BitOutOfBounds(u64, u64),
425
426 /// Rewind was attempted but no prior committed state is available.
427 #[error("rewind beyond history")]
428 RewindBeyondHistory,
429}