Skip to main content

miden_core/mast/serialization/
mod.rs

1//! The serialization format of MastForest is as follows:
2//!
3//! (Metadata)
4//! - MAGIC (4 bytes) + FLAGS (1 byte) + VERSION (3 bytes)
5//!
6//! (Counts)
7//! - nodes count (`usize`)
8//! - decorators count (`usize`) - 0 if stripped, reserved for future use in lazy loading (#2504)
9//!
10//! (Procedure roots section)
11//! - procedure roots (`Vec<u32>` as MastNodeId values)
12//!
13//! (Basic block data section)
14//! - basic block data (padded operations + batch metadata)
15//!
16//! (Node info section)
17//! - MAST node infos (`Vec<MastNodeInfo>`)
18//!
19//! (Advice map section)
20//! - Advice map (`AdviceMap`)
21//!
22//! (DebugInfo section - omitted if FLAGS bit 0 is set)
23//! - Decorator data (raw bytes for decorator payloads)
24//! - String table (deduplicated strings)
25//! - Decorator infos (`Vec<DecoratorInfo>`)
26//! - Error codes map (`BTreeMap<u64, String>`)
27//! - OpToDecoratorIds CSR (operation-indexed decorators, dense representation)
28//! - NodeToDecoratorIds CSR (before_enter and after_exit decorators, dense representation)
29//! - Procedure names map (`BTreeMap<Word, String>`)
30//!
31//! # Stripped Format
32//!
33//! When serializing with [`MastForest::write_stripped`], the FLAGS byte has bit 0 set
34//! and the entire DebugInfo section is omitted. Deserialization auto-detects the format
35//! and creates an empty `DebugInfo` with valid CSR structures when reading stripped files.
36
37use alloc::vec::Vec;
38
39use super::{MastForest, MastNode, MastNodeId};
40use crate::{
41    advice::AdviceMap,
42    serde::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable},
43};
44
45pub(crate) mod asm_op;
46pub(crate) mod decorator;
47
48mod info;
49use info::MastNodeInfo;
50
51mod basic_blocks;
52use basic_blocks::{BasicBlockDataBuilder, BasicBlockDataDecoder};
53
54pub(crate) mod string_table;
55pub(crate) use string_table::StringTable;
56
57#[cfg(test)]
58mod seed_gen;
59
60#[cfg(test)]
61mod tests;
62
63// TYPE ALIASES
64// ================================================================================================
65
66/// Specifies an offset into the `node_data` section of an encoded [`MastForest`].
67type NodeDataOffset = u32;
68
69/// Specifies an offset into the `decorator_data` section of an encoded [`MastForest`].
70type DecoratorDataOffset = u32;
71
72/// Specifies an offset into the `strings_data` section of an encoded [`MastForest`].
73type StringDataOffset = usize;
74
75/// Specifies an offset into the strings table of an encoded [`MastForest`].
76type StringIndex = usize;
77
78// CONSTANTS
79// ================================================================================================
80
81/// Magic bytes for detecting that a file is binary-encoded MAST.
82///
83/// The format uses 4 bytes for identification followed by a flags byte:
84/// - Bytes 0-3: `b"MAST"` - Magic identifier
85/// - Byte 4: Flags byte (see [`FLAG_STRIPPED`] and [`FLAGS_RESERVED_MASK`] constants)
86///
87/// This design repurposes the original null terminator (`b"MAST\0"`) as a flags byte,
88/// maintaining backward compatibility: old files have flags=0x00 (the null byte),
89/// which means "debug info present".
90const MAGIC: &[u8; 4] = b"MAST";
91
92/// Flag indicating debug info is stripped from the serialized MastForest.
93///
94/// When this bit is set in the flags byte, the DebugInfo section is omitted entirely.
95/// The deserializer will create an empty `DebugInfo` with valid CSR structures.
96const FLAG_STRIPPED: u8 = 0x01;
97
98/// Mask for reserved flag bits that must be zero.
99///
100/// Bits 1-7 are reserved for future use. If any are set, deserialization fails.
101const FLAGS_RESERVED_MASK: u8 = 0xfe;
102
103/// The format version.
104///
105/// If future modifications are made to this format, the version should be incremented by 1. A
106/// version of `[255, 255, 255]` is reserved for future extensions that require extending the
107/// version field itself, but should be considered invalid for now.
108///
109/// Version history:
110/// - [0, 0, 0]: Initial format
111/// - [0, 0, 1]: Added batch metadata to basic blocks (operations serialized in padded form with
112///   indptr, padding, and group metadata for exact OpBatch reconstruction). Direct decorator
113///   serialization in CSR format (eliminates per-node decorator sections and round-trip
114///   conversions). Header changed from `MAST\0` to `MAST` + flags byte.
115/// - [0, 0, 2]: Removed AssemblyOp from Decorator enum serialization. AssemblyOps are now stored
116///   separately in DebugInfo. Removed `should_break` field from AssemblyOp serialization (#2646).
117///   Removed `breakpoint` instruction (#2655).
118const VERSION: [u8; 3] = [0, 0, 2];
119
120// MAST FOREST SERIALIZATION/DESERIALIZATION
121// ================================================================================================
122
123impl Serializable for MastForest {
124    fn write_into<W: ByteWriter>(&self, target: &mut W) {
125        self.write_into_with_options(target, false);
126    }
127}
128
129impl MastForest {
130    /// Internal serialization with options.
131    ///
132    /// When `stripped` is true, the DebugInfo section is omitted and the FLAGS byte
133    /// has bit 0 set.
134    fn write_into_with_options<W: ByteWriter>(&self, target: &mut W, stripped: bool) {
135        let mut basic_block_data_builder = BasicBlockDataBuilder::new();
136
137        // magic & flags
138        target.write_bytes(MAGIC);
139        target.write_u8(if stripped { FLAG_STRIPPED } else { 0x00 });
140
141        // version
142        target.write_bytes(&VERSION);
143
144        // node & decorator counts
145        target.write_usize(self.nodes.len());
146        target.write_usize(if stripped { 0 } else { self.debug_info.num_decorators() });
147
148        // roots
149        let roots: Vec<u32> = self.roots.iter().copied().map(u32::from).collect();
150        roots.write_into(target);
151
152        // Prepare MAST node infos, but don't store them yet. We store them at the end to make
153        // deserialization more efficient.
154        let mast_node_infos: Vec<MastNodeInfo> = self
155            .nodes
156            .iter()
157            .map(|mast_node| {
158                let ops_offset = if let MastNode::Block(basic_block) = mast_node {
159                    basic_block_data_builder.encode_basic_block(basic_block)
160                } else {
161                    0
162                };
163
164                MastNodeInfo::new(mast_node, ops_offset)
165            })
166            .collect();
167
168        let basic_block_data = basic_block_data_builder.finalize();
169        basic_block_data.write_into(target);
170
171        // Write node infos
172        for mast_node_info in mast_node_infos {
173            mast_node_info.write_into(target);
174        }
175
176        self.advice_map.write_into(target);
177
178        // Serialize DebugInfo only if not stripped
179        if !stripped {
180            self.debug_info.write_into(target);
181        }
182    }
183}
184
185impl Deserializable for MastForest {
186    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
187        let flags = read_and_validate_header(source)?;
188        let is_stripped = flags & FLAG_STRIPPED != 0;
189
190        // Reading sections metadata
191        let node_count = source.read_usize()?;
192        if node_count > MastForest::MAX_NODES {
193            return Err(DeserializationError::InvalidValue(format!(
194                "node count {} exceeds maximum allowed {}",
195                node_count,
196                MastForest::MAX_NODES
197            )));
198        }
199        let _decorator_count = source.read_usize()?; // Read for wire format compatibility
200
201        // Reading procedure roots
202        let roots: Vec<u32> = Deserializable::read_from(source)?;
203
204        // Reading nodes
205        let basic_block_data: Vec<u8> = Deserializable::read_from(source)?;
206        let mast_node_infos: Vec<MastNodeInfo> = node_infos_iter(source, node_count)
207            .collect::<Result<Vec<MastNodeInfo>, DeserializationError>>()?;
208
209        let advice_map = AdviceMap::read_from(source)?;
210
211        // Deserialize DebugInfo or create empty one if stripped
212        let debug_info = if is_stripped {
213            super::DebugInfo::empty_for_nodes(node_count)
214        } else {
215            super::DebugInfo::read_from(source)?
216        };
217
218        // Constructing MastForest
219        let mast_forest = {
220            let mut mast_forest = MastForest::new();
221
222            // Set the fully deserialized debug_info - it already contains all mappings
223            mast_forest.debug_info = debug_info;
224
225            // Convert node infos to builders
226            let basic_block_data_decoder = BasicBlockDataDecoder::new(&basic_block_data);
227            let mast_builders = mast_node_infos
228                .into_iter()
229                .map(|node_info| {
230                    node_info.try_into_mast_node_builder(node_count, &basic_block_data_decoder)
231                })
232                .collect::<Result<Vec<_>, _>>()?;
233
234            // Add all builders to forest using relaxed validation
235            for mast_node_builder in mast_builders {
236                mast_node_builder.add_to_forest_relaxed(&mut mast_forest).map_err(|e| {
237                    DeserializationError::InvalidValue(format!(
238                        "failed to add node to MAST forest while deserializing: {e}",
239                    ))
240                })?;
241            }
242
243            // roots
244            for root in roots {
245                // make sure the root is valid in the context of the MAST forest
246                let root = MastNodeId::from_u32_safe(root, &mast_forest)?;
247                mast_forest.make_root(root);
248            }
249
250            mast_forest.advice_map = advice_map;
251
252            mast_forest
253        };
254
255        // Note: Full validation of deserialized MastForests (e.g., checking that procedure name
256        // digests correspond to procedure roots) is intentionally not performed here.
257        // The serialized format is expected to come from a trusted source (e.g., the assembler
258        // or a verified package). Callers should use MastForest::validate() if validation of
259        // untrusted input is needed.
260
261        Ok(mast_forest)
262    }
263}
264
265/// Reads and validates the MAST header (magic, flags, version).
266///
267/// Returns the flags byte on success.
268fn read_and_validate_header<R: ByteReader>(source: &mut R) -> Result<u8, DeserializationError> {
269    // Read magic
270    let magic: [u8; 4] = source.read_array()?;
271    if magic != *MAGIC {
272        return Err(DeserializationError::InvalidValue(format!(
273            "Invalid magic bytes. Expected '{:?}', got '{:?}'",
274            *MAGIC, magic
275        )));
276    }
277
278    // Read and validate flags
279    let flags: u8 = source.read_u8()?;
280    if flags & FLAGS_RESERVED_MASK != 0 {
281        return Err(DeserializationError::InvalidValue(format!(
282            "Unknown flags set in MAST header: {:#04x}. Reserved bits must be zero.",
283            flags & FLAGS_RESERVED_MASK
284        )));
285    }
286
287    // Read and validate version
288    let version: [u8; 3] = source.read_array()?;
289    if version != VERSION {
290        return Err(DeserializationError::InvalidValue(format!(
291            "Unsupported version. Got '{version:?}', but only '{VERSION:?}' is supported",
292        )));
293    }
294
295    Ok(flags)
296}
297
298fn node_infos_iter<'a, R>(
299    source: &'a mut R,
300    node_count: usize,
301) -> impl Iterator<Item = Result<MastNodeInfo, DeserializationError>> + 'a
302where
303    R: ByteReader + 'a,
304{
305    let mut remaining = node_count;
306    core::iter::from_fn(move || {
307        if remaining == 0 {
308            return None;
309        }
310        remaining -= 1;
311        Some(MastNodeInfo::read_from(source))
312    })
313}
314
315// UNTRUSTED DESERIALIZATION
316// ================================================================================================
317
318impl Deserializable for super::UntrustedMastForest {
319    /// Deserializes an [`super::UntrustedMastForest`] from a byte reader.
320    ///
321    /// Note: This method does not apply budgeting. For untrusted input, prefer using
322    /// [`read_from_bytes`](Self::read_from_bytes) which applies budgeted deserialization.
323    ///
324    /// After deserialization, callers should use [`super::UntrustedMastForest::validate()`]
325    /// to verify structural integrity and recompute all node hashes before using
326    /// the forest.
327    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
328        let forest = MastForest::read_from(source)?;
329        Ok(super::UntrustedMastForest(forest))
330    }
331
332    /// Deserializes an [`super::UntrustedMastForest`] from bytes using budgeted deserialization.
333    ///
334    /// This method uses a [`crate::serde::BudgetedReader`] with a budget equal to the input size
335    /// to protect against denial-of-service attacks from malicious input.
336    ///
337    /// After deserialization, callers should use [`super::UntrustedMastForest::validate()`]
338    /// to verify structural integrity and recompute all node hashes before using
339    /// the forest.
340    fn read_from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
341        super::UntrustedMastForest::read_from_bytes(bytes)
342    }
343}
344
345// STRIPPED SERIALIZATION
346// ================================================================================================
347
348/// Wrapper for serializing a [`MastForest`] without debug information.
349///
350/// This newtype enables an alternative serialization format that omits the DebugInfo section,
351/// producing smaller output files suitable for production deployment where debug info is not
352/// needed.
353///
354/// The resulting bytes can be deserialized with the standard [`Deserializable`] impl for
355/// [`MastForest`], which auto-detects the format via the flags byte in the header.
356pub(super) struct StrippedMastForest<'a>(pub(super) &'a MastForest);
357
358impl Serializable for StrippedMastForest<'_> {
359    fn write_into<W: ByteWriter>(&self, target: &mut W) {
360        self.0.write_into_with_options(target, true);
361    }
362}