miden_crypto/merkle/smt/partial/serialization/mod.rs
1//! This module contains a system for producing compact serialized representations of the
2//! `PartialSmt` data structure, intended to reduce data sent over the wire through de-duplication.
3
4pub mod property_tests;
5mod tests;
6
7use alloc::{string::ToString, vec::Vec};
8
9use miden_field::{Felt, FeltFromIntError, Word};
10use miden_serde_utils::{
11 ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
12};
13
14use crate::{
15 Map,
16 merkle::{
17 EmptySubtreeRoots,
18 smt::{SMT_DEPTH, SmtLeaf},
19 },
20};
21
22// UNIQUE NODES
23// ================================================================================================
24
25/// A representation of a partial SMT that contains only the unique nodes in the tree, designed for
26/// better efficiency when sending data across the wire.
27///
28/// It _explicitly_ does not need to contain a fully-realized SMT, and instead may contain some
29/// subset of a full tree. It contains the minimal set of data necessary to reconstruct its input.
30///
31/// # Versioning
32///
33/// Note that this structure is explicitly **not intended to be versioned**. This structure should
34/// be used as part of a broader serialization solution that does include this if necessary.
35///
36/// # Serialization
37///
38/// The serialization and deserialization process does not validate that node or leaf indices are
39/// valid for their level. This is the responsibility of the client of this type.
40#[derive(Clone, Debug, Eq, PartialEq)]
41pub struct UniqueNodes {
42 /// The expected root of the tree after reconstruction.
43 ///
44 /// This primarily exists as a sanity check, taking little extra space but ensuring that we can
45 /// detect more possible cases of corruption.
46 pub root: Word,
47
48 /// The nodes that make up the tree itself.
49 ///
50 /// It maps the node depth to a vector containing all the nodes at that depth, ensuring that no
51 /// data that can be reasonably reconstructed is stored.
52 pub nodes: Map<u8, Vec<(u64, NodeValue)>>,
53
54 /// The leaves of the tree.
55 ///
56 /// It only stores the populated leaves, keyed on their index.
57 pub leaves: Vec<(u64, SmtLeaf)>,
58
59 /// The leaves for which we only have the hash value, and not the actual leaf value.
60 ///
61 /// We keep these separately to the `leaves` as storing them this way is more compact.
62 pub value_only_leaves: Vec<(u64, Word)>,
63}
64
65impl UniqueNodes {
66 /// Creates an empty `UniqueNodes` with no nodes or leaves in it.
67 pub fn empty() -> Self {
68 Self {
69 root: *EmptySubtreeRoots::entry(SMT_DEPTH, 0),
70 nodes: Map::default(),
71 leaves: Vec::default(),
72 value_only_leaves: Vec::default(),
73 }
74 }
75}
76
77impl Default for UniqueNodes {
78 fn default() -> Self {
79 Self::empty()
80 }
81}
82
83impl Serializable for UniqueNodes {
84 fn write_into<W: ByteWriter>(&self, target: &mut W) {
85 // First we write the expected root into the buffer.
86 self.root.write_into(target);
87
88 // We write the length as u64 to ensure portability.
89 let node_count = self.nodes.len() as u64;
90 target.write(node_count);
91
92 // We then write each of the pairs of (u8, Vec<...>) independently.
93 for (depth, nodes) in self.nodes.iter() {
94 target.write(depth);
95 let node_count = nodes.len() as u64;
96 target.write(node_count);
97 target.write_many(nodes.iter());
98 }
99
100 // The leaves themselves come next.
101 let leaf_count = self.leaves.len() as u64;
102 target.write(leaf_count);
103 target.write_many(self.leaves.iter());
104
105 // And the value-only leaves bring up the rear.
106 let value_only_leaf_count = self.value_only_leaves.len() as u64;
107 target.write(value_only_leaf_count);
108 target.write_many(self.value_only_leaves.iter());
109 }
110}
111
112impl Deserializable for UniqueNodes {
113 fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
114 // The first item is the 32 bytes containing the expected root of the tree after
115 // reconstruction.
116 let root = Word::read_from(source)?;
117
118 // We first have to read the count of levels.
119 let level_count = source.read_u64()?;
120 let mut nodes = Map::new();
121
122 // Next we have that many levels to read, but each is of a variable size.
123 for _ in 0..level_count {
124 let depth = source.read_u8()?;
125 let node_count = source.read_u64()?;
126 let level_nodes = source
127 .read_many_iter(node_count.try_into().map_err(|_| {
128 DeserializationError::InvalidValue(format!("Node count {node_count} overflow"))
129 })?)?
130 .collect::<Result<Vec<_>, _>>()?;
131 nodes.insert(depth, level_nodes);
132 }
133
134 // Next we need the number of leaves.
135 let leaf_count = source.read_u64()?;
136 let mut leaves = Vec::new();
137
138 // And then we have to read that many leaves.
139 for _ in 0..leaf_count {
140 leaves.push(source.read()?);
141 }
142
143 // Finally we read the number of value-only leaves...
144 let value_only_leaf_count = source.read_u64()?;
145 let mut value_only_leaves = Vec::new();
146
147 // ... and read that many.
148 for _ in 0..value_only_leaf_count {
149 value_only_leaves.push(source.read()?);
150 }
151
152 Ok(Self { root, nodes, leaves, value_only_leaves })
153 }
154}
155
156// NODE VALUE
157// ================================================================================================
158
159/// The value of a node in the serialized representation.
160///
161/// # Serialization
162///
163/// This enum can be in one of two cases: empty, or containing a Word. The naïve serialization would
164/// use a flag to indicate the variant, costing at least a byte to avoid the need for potentially
165/// expensive unaligned accesses.
166///
167/// [`Word`], however, consists of four `Felt`s, each of which occupies the Goldilocks field. This
168/// provides a niche in each of those `Felt`s that allows us to not require the extra byte when
169/// serializing a true value. As we assume that real values are more common than empty subtree roots
170/// by their very nature, making an empty root take 8 bytes instead of 1 is a smaller cost to pay
171/// than an extra byte for each populated node.
172///
173/// To that end, we encode the type as follows:
174///
175/// - For `Self::EmptySubtreeRoot` we encode the LE bytes for [`u64::MAX`], which exceeds the field
176/// order and hence serves as a sentinel value using the niche.
177/// - For `Self::Present` we simply encode the word as its four component felts. As none of the
178/// felts can take a value exceeding [`Felt::ORDER`], we can immediately disambiguate between this
179/// case and the one above.
180#[derive(Clone, Debug, Eq, PartialEq)]
181pub enum NodeValue {
182 /// The node is the head of an empty subtree at the depth given by the outer map in
183 /// [`UniqueNodes`].
184 EmptySubtreeRoot,
185
186 /// The node's value is the provided hash.
187 Present(Word),
188}
189
190impl Serializable for NodeValue {
191 fn write_into<W: ByteWriter>(&self, target: &mut W) {
192 match self {
193 NodeValue::EmptySubtreeRoot => target.write_u64(u64::MAX),
194 NodeValue::Present(w) => w.write_into(target),
195 }
196 }
197}
198
199impl Deserializable for NodeValue {
200 fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
201 let first_value = source.read_u64()?;
202
203 let to_e = |e: FeltFromIntError| DeserializationError::InvalidValue(e.to_string());
204
205 if first_value == u64::MAX {
206 Ok(Self::EmptySubtreeRoot)
207 } else {
208 // We start by reading the rest of the bytes here to make sure that we have enough data
209 // before actually deserializing.
210 let remaining_values: [u64; Word::NUM_ELEMENTS - 1] = source.read()?;
211
212 let felts = [
213 Felt::new(first_value).map_err(to_e)?,
214 Felt::new(remaining_values[0]).map_err(to_e)?,
215 Felt::new(remaining_values[1]).map_err(to_e)?,
216 Felt::new(remaining_values[2]).map_err(to_e)?,
217 ];
218
219 Ok(Self::Present(Word::new(felts)))
220 }
221 }
222}