Skip to main content

miden_crypto/merkle/smt/partial/serialization/
mod.rs

1//! This module contains a system for producing compact serialized representations of the
2//! `PartialSmt` data structure, intended to reduce data sent over the wire through de-duplication.
3
4pub mod property_tests;
5mod tests;
6
7use alloc::{string::ToString, vec::Vec};
8
9use miden_field::{Felt, FeltFromIntError, Word};
10use miden_serde_utils::{
11    ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
12};
13
14use crate::{
15    Map,
16    merkle::{
17        EmptySubtreeRoots,
18        smt::{SMT_DEPTH, SmtLeaf},
19    },
20};
21
22// UNIQUE NODES
23// ================================================================================================
24
25/// A representation of a partial SMT that contains only the unique nodes in the tree, designed for
26/// better efficiency when sending data across the wire.
27///
28/// It _explicitly_ does not need to contain a fully-realized SMT, and instead may contain some
29/// subset of a full tree. It contains the minimal set of data necessary to reconstruct its input.
30///
31/// # Versioning
32///
33/// Note that this structure is explicitly **not intended to be versioned**. This structure should
34/// be used as part of a broader serialization solution that does include this if necessary.
35///
36/// # Serialization
37///
38/// The serialization and deserialization process does not validate that node or leaf indices are
39/// valid for their level. This is the responsibility of the client of this type.
40#[derive(Clone, Debug, Eq, PartialEq)]
41pub struct UniqueNodes {
42    /// The expected root of the tree after reconstruction.
43    ///
44    /// This primarily exists as a sanity check, taking little extra space but ensuring that we can
45    /// detect more possible cases of corruption.
46    pub root: Word,
47
48    /// The nodes that make up the tree itself.
49    ///
50    /// It maps the node depth to a vector containing all the nodes at that depth, ensuring that no
51    /// data that can be reasonably reconstructed is stored.
52    pub nodes: Map<u8, Vec<(u64, NodeValue)>>,
53
54    /// The leaves of the tree.
55    ///
56    /// It only stores the populated leaves, keyed on their index.
57    pub leaves: Vec<(u64, SmtLeaf)>,
58
59    /// The leaves for which we only have the hash value, and not the actual leaf value.
60    ///
61    /// We keep these separately to the `leaves` as storing them this way is more compact.
62    pub value_only_leaves: Vec<(u64, Word)>,
63}
64
65impl UniqueNodes {
66    /// Creates an empty `UniqueNodes` with no nodes or leaves in it.
67    pub fn empty() -> Self {
68        Self {
69            root: *EmptySubtreeRoots::entry(SMT_DEPTH, 0),
70            nodes: Map::default(),
71            leaves: Vec::default(),
72            value_only_leaves: Vec::default(),
73        }
74    }
75}
76
77impl Default for UniqueNodes {
78    fn default() -> Self {
79        Self::empty()
80    }
81}
82
83impl Serializable for UniqueNodes {
84    fn write_into<W: ByteWriter>(&self, target: &mut W) {
85        // First we write the expected root into the buffer.
86        self.root.write_into(target);
87
88        // We write the length as u64 to ensure portability.
89        let node_count = self.nodes.len() as u64;
90        target.write(node_count);
91
92        // We then write each of the pairs of (u8, Vec<...>) independently.
93        for (depth, nodes) in self.nodes.iter() {
94            target.write(depth);
95            let node_count = nodes.len() as u64;
96            target.write(node_count);
97            target.write_many(nodes.iter());
98        }
99
100        // The leaves themselves come next.
101        let leaf_count = self.leaves.len() as u64;
102        target.write(leaf_count);
103        target.write_many(self.leaves.iter());
104
105        // And the value-only leaves bring up the rear.
106        let value_only_leaf_count = self.value_only_leaves.len() as u64;
107        target.write(value_only_leaf_count);
108        target.write_many(self.value_only_leaves.iter());
109    }
110}
111
112impl Deserializable for UniqueNodes {
113    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
114        // The first item is the 32 bytes containing the expected root of the tree after
115        // reconstruction.
116        let root = Word::read_from(source)?;
117
118        // We first have to read the count of levels.
119        let level_count = source.read_u64()?;
120        let mut nodes = Map::new();
121
122        // Next we have that many levels to read, but each is of a variable size.
123        for _ in 0..level_count {
124            let depth = source.read_u8()?;
125            let node_count = source.read_u64()?;
126            let level_nodes = source
127                .read_many_iter(node_count.try_into().map_err(|_| {
128                    DeserializationError::InvalidValue(format!("Node count {node_count} overflow"))
129                })?)?
130                .collect::<Result<Vec<_>, _>>()?;
131            nodes.insert(depth, level_nodes);
132        }
133
134        // Next we need the number of leaves.
135        let leaf_count = source.read_u64()?;
136        let mut leaves = Vec::new();
137
138        // And then we have to read that many leaves.
139        for _ in 0..leaf_count {
140            leaves.push(source.read()?);
141        }
142
143        // Finally we read the number of value-only leaves...
144        let value_only_leaf_count = source.read_u64()?;
145        let mut value_only_leaves = Vec::new();
146
147        // ... and read that many.
148        for _ in 0..value_only_leaf_count {
149            value_only_leaves.push(source.read()?);
150        }
151
152        Ok(Self { root, nodes, leaves, value_only_leaves })
153    }
154}
155
156// NODE VALUE
157// ================================================================================================
158
159/// The value of a node in the serialized representation.
160///
161/// # Serialization
162///
163/// This enum can be in one of two cases: empty, or containing a Word. The naïve serialization would
164/// use a flag to indicate the variant, costing at least a byte to avoid the need for potentially
165/// expensive unaligned accesses.
166///
167/// [`Word`], however, consists of four `Felt`s, each of which occupies the Goldilocks field. This
168/// provides a niche in each of those `Felt`s that allows us to not require the extra byte when
169/// serializing a true value. As we assume that real values are more common than empty subtree roots
170/// by their very nature, making an empty root take 8 bytes instead of 1 is a smaller cost to pay
171/// than an extra byte for each populated node.
172///
173/// To that end, we encode the type as follows:
174///
175/// - For `Self::EmptySubtreeRoot` we encode the LE bytes for [`u64::MAX`], which exceeds the field
176///   order and hence serves as a sentinel value using the niche.
177/// - For `Self::Present` we simply encode the word as its four component felts. As none of the
178///   felts can take a value exceeding [`Felt::ORDER`], we can immediately disambiguate between this
179///   case and the one above.
180#[derive(Clone, Debug, Eq, PartialEq)]
181pub enum NodeValue {
182    /// The node is the head of an empty subtree at the depth given by the outer map in
183    /// [`UniqueNodes`].
184    EmptySubtreeRoot,
185
186    /// The node's value is the provided hash.
187    Present(Word),
188}
189
190impl Serializable for NodeValue {
191    fn write_into<W: ByteWriter>(&self, target: &mut W) {
192        match self {
193            NodeValue::EmptySubtreeRoot => target.write_u64(u64::MAX),
194            NodeValue::Present(w) => w.write_into(target),
195        }
196    }
197}
198
199impl Deserializable for NodeValue {
200    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
201        let first_value = source.read_u64()?;
202
203        let to_e = |e: FeltFromIntError| DeserializationError::InvalidValue(e.to_string());
204
205        if first_value == u64::MAX {
206            Ok(Self::EmptySubtreeRoot)
207        } else {
208            // We start by reading the rest of the bytes here to make sure that we have enough data
209            // before actually deserializing.
210            let remaining_values: [u64; Word::NUM_ELEMENTS - 1] = source.read()?;
211
212            let felts = [
213                Felt::new(first_value).map_err(to_e)?,
214                Felt::new(remaining_values[0]).map_err(to_e)?,
215                Felt::new(remaining_values[1]).map_err(to_e)?,
216                Felt::new(remaining_values[2]).map_err(to_e)?,
217            ];
218
219            Ok(Self::Present(Word::new(felts)))
220        }
221    }
222}