miden_core/mast/serialization/mod.rs
1//! The serialization format of MastForest is as follows:
2//!
3//! (Metadata)
4//! - MAGIC (4 bytes) + FLAGS (1 byte) + VERSION (3 bytes)
5//!
6//! (Counts)
7//! - nodes count (`usize`)
8//! - decorators count (`usize`) - 0 if stripped, reserved for future use in lazy loading (#2504)
9//!
10//! (Procedure roots section)
11//! - procedure roots (`Vec<u32>` as MastNodeId values)
12//!
13//! (Basic block data section)
14//! - basic block data (padded operations + batch metadata)
15//!
16//! (Node info section)
17//! - MAST node infos (`Vec<MastNodeInfo>`)
18//!
19//! (Advice map section)
20//! - Advice map (`AdviceMap`)
21//!
22//! (DebugInfo section - omitted if FLAGS bit 0 is set)
23//! - Decorator data (raw bytes for decorator payloads)
24//! - String table (deduplicated strings)
25//! - Decorator infos (`Vec<DecoratorInfo>`)
26//! - Error codes map (`BTreeMap<u64, String>`)
27//! - OpToDecoratorIds CSR (operation-indexed decorators, dense representation)
28//! - NodeToDecoratorIds CSR (before_enter and after_exit decorators, dense representation)
29//! - Procedure names map (`BTreeMap<Word, String>`)
30//!
31//! # Stripped Format
32//!
33//! When serializing with [`MastForest::write_stripped`], the FLAGS byte has bit 0 set
34//! and the entire DebugInfo section is omitted. Deserialization auto-detects the format
35//! and creates an empty `DebugInfo` with valid CSR structures when reading stripped files.
36
37use alloc::vec::Vec;
38
39use super::{MastForest, MastNode, MastNodeId};
40use crate::{
41 advice::AdviceMap,
42 serde::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable},
43};
44
45pub(crate) mod asm_op;
46pub(crate) mod decorator;
47
48mod info;
49use info::MastNodeInfo;
50
51mod basic_blocks;
52use basic_blocks::{BasicBlockDataBuilder, BasicBlockDataDecoder};
53
54pub(crate) mod string_table;
55pub(crate) use string_table::StringTable;
56
57#[cfg(test)]
58mod seed_gen;
59
60#[cfg(test)]
61mod tests;
62
63// TYPE ALIASES
64// ================================================================================================
65
66/// Specifies an offset into the `node_data` section of an encoded [`MastForest`].
67type NodeDataOffset = u32;
68
69/// Specifies an offset into the `decorator_data` section of an encoded [`MastForest`].
70type DecoratorDataOffset = u32;
71
72/// Specifies an offset into the `strings_data` section of an encoded [`MastForest`].
73type StringDataOffset = usize;
74
75/// Specifies an offset into the strings table of an encoded [`MastForest`].
76type StringIndex = usize;
77
78// CONSTANTS
79// ================================================================================================
80
81/// Magic bytes for detecting that a file is binary-encoded MAST.
82///
83/// The format uses 4 bytes for identification followed by a flags byte:
84/// - Bytes 0-3: `b"MAST"` - Magic identifier
85/// - Byte 4: Flags byte (see [`FLAG_STRIPPED`] and [`FLAGS_RESERVED_MASK`] constants)
86///
87/// This design repurposes the original null terminator (`b"MAST\0"`) as a flags byte,
88/// maintaining backward compatibility: old files have flags=0x00 (the null byte),
89/// which means "debug info present".
90const MAGIC: &[u8; 4] = b"MAST";
91
92/// Flag indicating debug info is stripped from the serialized MastForest.
93///
94/// When this bit is set in the flags byte, the DebugInfo section is omitted entirely.
95/// The deserializer will create an empty `DebugInfo` with valid CSR structures.
96const FLAG_STRIPPED: u8 = 0x01;
97
98/// Mask for reserved flag bits that must be zero.
99///
100/// Bits 1-7 are reserved for future use. If any are set, deserialization fails.
101const FLAGS_RESERVED_MASK: u8 = 0xfe;
102
103/// The format version.
104///
105/// If future modifications are made to this format, the version should be incremented by 1. A
106/// version of `[255, 255, 255]` is reserved for future extensions that require extending the
107/// version field itself, but should be considered invalid for now.
108///
109/// Version history:
110/// - [0, 0, 0]: Initial format
111/// - [0, 0, 1]: Added batch metadata to basic blocks (operations serialized in padded form with
112/// indptr, padding, and group metadata for exact OpBatch reconstruction). Direct decorator
113/// serialization in CSR format (eliminates per-node decorator sections and round-trip
114/// conversions). Header changed from `MAST\0` to `MAST` + flags byte.
115/// - [0, 0, 2]: Removed AssemblyOp from Decorator enum serialization. AssemblyOps are now stored
116/// separately in DebugInfo. Removed `should_break` field from AssemblyOp serialization (#2646).
117/// Removed `breakpoint` instruction (#2655).
118const VERSION: [u8; 3] = [0, 0, 2];
119
120// MAST FOREST SERIALIZATION/DESERIALIZATION
121// ================================================================================================
122
123impl Serializable for MastForest {
124 fn write_into<W: ByteWriter>(&self, target: &mut W) {
125 self.write_into_with_options(target, false);
126 }
127}
128
129impl MastForest {
130 /// Internal serialization with options.
131 ///
132 /// When `stripped` is true, the DebugInfo section is omitted and the FLAGS byte
133 /// has bit 0 set.
134 fn write_into_with_options<W: ByteWriter>(&self, target: &mut W, stripped: bool) {
135 let mut basic_block_data_builder = BasicBlockDataBuilder::new();
136
137 // magic & flags
138 target.write_bytes(MAGIC);
139 target.write_u8(if stripped { FLAG_STRIPPED } else { 0x00 });
140
141 // version
142 target.write_bytes(&VERSION);
143
144 // node & decorator counts
145 target.write_usize(self.nodes.len());
146 target.write_usize(if stripped { 0 } else { self.debug_info.num_decorators() });
147
148 // roots
149 let roots: Vec<u32> = self.roots.iter().copied().map(u32::from).collect();
150 roots.write_into(target);
151
152 // Prepare MAST node infos, but don't store them yet. We store them at the end to make
153 // deserialization more efficient.
154 let mast_node_infos: Vec<MastNodeInfo> = self
155 .nodes
156 .iter()
157 .map(|mast_node| {
158 let ops_offset = if let MastNode::Block(basic_block) = mast_node {
159 basic_block_data_builder.encode_basic_block(basic_block)
160 } else {
161 0
162 };
163
164 MastNodeInfo::new(mast_node, ops_offset)
165 })
166 .collect();
167
168 let basic_block_data = basic_block_data_builder.finalize();
169 basic_block_data.write_into(target);
170
171 // Write node infos
172 for mast_node_info in mast_node_infos {
173 mast_node_info.write_into(target);
174 }
175
176 self.advice_map.write_into(target);
177
178 // Serialize DebugInfo only if not stripped
179 if !stripped {
180 self.debug_info.write_into(target);
181 }
182 }
183}
184
185impl Deserializable for MastForest {
186 fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
187 let flags = read_and_validate_header(source)?;
188 let is_stripped = flags & FLAG_STRIPPED != 0;
189
190 // Reading sections metadata
191 let node_count = source.read_usize()?;
192 if node_count > MastForest::MAX_NODES {
193 return Err(DeserializationError::InvalidValue(format!(
194 "node count {} exceeds maximum allowed {}",
195 node_count,
196 MastForest::MAX_NODES
197 )));
198 }
199 let _decorator_count = source.read_usize()?; // Read for wire format compatibility
200
201 // Reading procedure roots
202 let roots: Vec<u32> = Deserializable::read_from(source)?;
203
204 // Reading nodes
205 let basic_block_data: Vec<u8> = Deserializable::read_from(source)?;
206 let mast_node_infos: Vec<MastNodeInfo> = node_infos_iter(source, node_count)
207 .collect::<Result<Vec<MastNodeInfo>, DeserializationError>>()?;
208
209 let advice_map = AdviceMap::read_from(source)?;
210
211 // Deserialize DebugInfo or create empty one if stripped
212 let debug_info = if is_stripped {
213 super::DebugInfo::empty_for_nodes(node_count)
214 } else {
215 super::DebugInfo::read_from(source)?
216 };
217
218 // Constructing MastForest
219 let mast_forest = {
220 let mut mast_forest = MastForest::new();
221
222 // Set the fully deserialized debug_info - it already contains all mappings
223 mast_forest.debug_info = debug_info;
224
225 // Convert node infos to builders
226 let basic_block_data_decoder = BasicBlockDataDecoder::new(&basic_block_data);
227 let mast_builders = mast_node_infos
228 .into_iter()
229 .map(|node_info| {
230 node_info.try_into_mast_node_builder(node_count, &basic_block_data_decoder)
231 })
232 .collect::<Result<Vec<_>, _>>()?;
233
234 // Add all builders to forest using relaxed validation
235 for mast_node_builder in mast_builders {
236 mast_node_builder.add_to_forest_relaxed(&mut mast_forest).map_err(|e| {
237 DeserializationError::InvalidValue(format!(
238 "failed to add node to MAST forest while deserializing: {e}",
239 ))
240 })?;
241 }
242
243 // roots
244 for root in roots {
245 // make sure the root is valid in the context of the MAST forest
246 let root = MastNodeId::from_u32_safe(root, &mast_forest)?;
247 mast_forest.make_root(root);
248 }
249
250 mast_forest.advice_map = advice_map;
251
252 mast_forest
253 };
254
255 // Note: Full validation of deserialized MastForests (e.g., checking that procedure name
256 // digests correspond to procedure roots) is intentionally not performed here.
257 // The serialized format is expected to come from a trusted source (e.g., the assembler
258 // or a verified package). Callers should use MastForest::validate() if validation of
259 // untrusted input is needed.
260
261 Ok(mast_forest)
262 }
263}
264
265/// Reads and validates the MAST header (magic, flags, version).
266///
267/// Returns the flags byte on success.
268fn read_and_validate_header<R: ByteReader>(source: &mut R) -> Result<u8, DeserializationError> {
269 // Read magic
270 let magic: [u8; 4] = source.read_array()?;
271 if magic != *MAGIC {
272 return Err(DeserializationError::InvalidValue(format!(
273 "Invalid magic bytes. Expected '{:?}', got '{:?}'",
274 *MAGIC, magic
275 )));
276 }
277
278 // Read and validate flags
279 let flags: u8 = source.read_u8()?;
280 if flags & FLAGS_RESERVED_MASK != 0 {
281 return Err(DeserializationError::InvalidValue(format!(
282 "Unknown flags set in MAST header: {:#04x}. Reserved bits must be zero.",
283 flags & FLAGS_RESERVED_MASK
284 )));
285 }
286
287 // Read and validate version
288 let version: [u8; 3] = source.read_array()?;
289 if version != VERSION {
290 return Err(DeserializationError::InvalidValue(format!(
291 "Unsupported version. Got '{version:?}', but only '{VERSION:?}' is supported",
292 )));
293 }
294
295 Ok(flags)
296}
297
298fn node_infos_iter<'a, R>(
299 source: &'a mut R,
300 node_count: usize,
301) -> impl Iterator<Item = Result<MastNodeInfo, DeserializationError>> + 'a
302where
303 R: ByteReader + 'a,
304{
305 let mut remaining = node_count;
306 core::iter::from_fn(move || {
307 if remaining == 0 {
308 return None;
309 }
310 remaining -= 1;
311 Some(MastNodeInfo::read_from(source))
312 })
313}
314
315// UNTRUSTED DESERIALIZATION
316// ================================================================================================
317
318impl Deserializable for super::UntrustedMastForest {
319 /// Deserializes an [`super::UntrustedMastForest`] from a byte reader.
320 ///
321 /// Note: This method does not apply budgeting. For untrusted input, prefer using
322 /// [`read_from_bytes`](Self::read_from_bytes) which applies budgeted deserialization.
323 ///
324 /// After deserialization, callers should use [`super::UntrustedMastForest::validate()`]
325 /// to verify structural integrity and recompute all node hashes before using
326 /// the forest.
327 fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
328 let forest = MastForest::read_from(source)?;
329 Ok(super::UntrustedMastForest(forest))
330 }
331
332 /// Deserializes an [`super::UntrustedMastForest`] from bytes using budgeted deserialization.
333 ///
334 /// This method uses a [`crate::serde::BudgetedReader`] with a budget equal to the input size
335 /// to protect against denial-of-service attacks from malicious input.
336 ///
337 /// After deserialization, callers should use [`super::UntrustedMastForest::validate()`]
338 /// to verify structural integrity and recompute all node hashes before using
339 /// the forest.
340 fn read_from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
341 super::UntrustedMastForest::read_from_bytes(bytes)
342 }
343}
344
345// STRIPPED SERIALIZATION
346// ================================================================================================
347
348/// Wrapper for serializing a [`MastForest`] without debug information.
349///
350/// This newtype enables an alternative serialization format that omits the DebugInfo section,
351/// producing smaller output files suitable for production deployment where debug info is not
352/// needed.
353///
354/// The resulting bytes can be deserialized with the standard [`Deserializable`] impl for
355/// [`MastForest`], which auto-detects the format via the flags byte in the header.
356pub(super) struct StrippedMastForest<'a>(pub(super) &'a MastForest);
357
358impl Serializable for StrippedMastForest<'_> {
359 fn write_into<W: ByteWriter>(&self, target: &mut W) {
360 self.0.write_into_with_options(target, true);
361 }
362}