Skip to main content

miden_core/mast/serialization/
mod.rs

1//! MAST forest serialization keeps one fixed structural layout for full, stripped, and hashless
2//! payloads.
3//!
4//! The main goal is to keep random access cheap in stripped and hashless modes. Node structure
5//! stays in one fixed-width section. Variable-size data lives in separate sections. Internal node
6//! digests also live in a separate section so hashless payloads can omit them without changing the
7//! structural layout.
8//!
9//! Wire flags describe serializer intent, not reader trust policy. Trusted [`MastForest`] reads
10//! reject hashless payloads. [`crate::mast::UntrustedMastForest`] accepts them and rebuilds
11//! non-external digests before use. If a non-hashless payload is sent down the untrusted path,
12//! validation recomputes those digests and requires them to match the serialized values.
13//! Budgeted untrusted reads always bound wire counts during layout scanning via
14//! [`ByteReader::max_alloc`]. Callers that opt into validation budgeting also get a second check:
15//! - later stripped/hashless helper allocations are charged against an explicit validation budget
16//!   before the corresponding `Vec` or CSR scaffolding is created
17//! - the default convenience path uses a coarse validation budget derived from the input size; this
18//!   is intentionally a simple bound for common callers, not an exact peak-memory formula
19//!
20//! The main layers fit together like this:
21//!
22//! ```text
23//! wire bytes
24//!     |
25//!     +--> ForestLayout -----------> SerializedMastForest --+
26//!     |        absolute offsets         structural view      |
27//!     |                                                     v
28//!     +--> UntrustedMastForest ----validate----> ResolvedSerializedForest ---> MastForest
29//!              bytes + parsed state                digest-backed view            trusted runtime
30//!
31//! MastForestView is the shared random-access API implemented by SerializedMastForest and
32//! MastForest.
33//! ```
34//!
35//! The format is:
36//!
37//! (Metadata)
38//! - MAGIC (4 bytes) + FLAGS (1 byte) + VERSION (3 bytes)
39//!
40//! (Counts)
41//! - nodes count (`usize`)
42//! - internal nodes count (`usize`)
43//! - external nodes count (`usize`)
44//!
45//! (Procedure roots section)
46//! - procedure roots (`Vec<u32>` as MastNodeId values)
47//!
48//! (Basic block data section)
49//! - basic block data (padded operations + batch metadata)
50//!
51//! (Node entries section)
52//! - fixed-width structural node entries (`Vec<MastNodeEntry>`)
53//! - `Block` entries store offsets into the basic-block section above
54//!
55//! (External digest section)
56//! - digests for `External` nodes only (`Vec<Word>`, ordered by node index)
57//! - lookup is dense-by-kind: the Nth external node uses slot N in this section
58//!
59//! (Node hash section - omitted if FLAGS bit 1 is set)
60//! - digests for all non-external nodes (`Vec<Word>`, ordered by node index)
61//! - lookup is also dense-by-kind: the Nth non-external node uses slot N in this section
62//!
63//! (Advice map section)
64//! - Advice map (`AdviceMap`)
65//!
66//! (DebugInfo section - omitted if FLAGS bit 0 is set)
67//! - Decorator data (raw bytes for decorator payloads)
68//! - String table (deduplicated strings)
69//! - Decorator infos (`Vec<DecoratorInfo>`)
70//! - Error codes map (`BTreeMap<u64, String>`)
71//! - OpToDecoratorIds CSR (operation-indexed decorators, dense representation)
72//! - NodeToDecoratorIds CSR (before_enter and after_exit decorators, dense representation)
73//! - Procedure names map (`BTreeMap<Word, String>`)
74//!
75//! In stripped format, the `DebugInfo` section is omitted and readers materialize an empty
76//! `DebugInfo`.
77//!
78//! In hashless format, the internal node-hash section is omitted and `HASHLESS` also implies
79//! `STRIPPED`. External node digests still stay on the wire because they cannot be rebuilt from
80//! local structure. This keeps hashless focused on the untrusted-validation use case: trusted
81//! reads reject `HASHLESS`, and the untrusted path rebuilds the data it actually trusts before
82//! use, so supporting a separate "hashless but with debug info" mode would add another wire mode
83//! without changing the validation semantics.
84//!
85//! Readers recover per-node digest lookup by scanning node entries once and building a compact
86//! "slot by node index" table. This preserves random access without forcing all digests into the
87//! same contiguous array on the wire.
88//!
89//! Public entry points adopt these policies:
90//! - [`MastForest::read_from_bytes`]: trusted full payload, no hashless support.
91//! - [`SerializedMastForest::new`]: structural inspection for local tooling, including hashless
92//!   payloads; not an untrusted-validation entry point.
93//! - [`crate::mast::UntrustedMastForest::read_from_bytes`] /
94//!   [`crate::mast::UntrustedMastForest::read_from_bytes_with_budgets`]: untrusted parsing plus
95//!   later validation before use.
96
97#[cfg(test)]
98use alloc::string::ToString;
99use alloc::{format, vec::Vec};
100
101use miden_utils_sync::OnceLockCompat;
102
103use super::{MastForest, MastNode, MastNodeId};
104use crate::{
105    advice::AdviceMap,
106    mast::node::MastNodeExt,
107    serde::{
108        BudgetedReader, ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
109        SliceReader,
110    },
111};
112
113pub(crate) mod asm_op;
114pub(crate) mod decorator;
115
116mod info;
117pub use info::{MastNodeEntry, MastNodeInfo};
118
119mod view;
120pub use view::MastForestView;
121
122mod layout;
123pub(super) use layout::ForestLayout;
124use layout::{OffsetTrackingReader, TrackingReader, WireFlags, read_header_and_scan_layout};
125
126mod resolved;
127use resolved::{ResolvedSerializedForest, basic_block_offset_for_node_index};
128
129mod basic_blocks;
130use basic_blocks::{BasicBlockDataBuilder, basic_block_data_len};
131
132pub(crate) mod string_table;
133pub(crate) use string_table::StringTable;
134
135#[cfg(test)]
136mod seed_gen;
137
138#[cfg(test)]
139mod tests;
140
141// TYPE ALIASES
142// ================================================================================================
143
144/// Specifies an offset into the `node_data` section of an encoded [`MastForest`].
145type NodeDataOffset = u32;
146
147/// Specifies an offset into the `decorator_data` section of an encoded [`MastForest`].
148type DecoratorDataOffset = u32;
149
150/// Specifies an offset into the `strings_data` section of an encoded [`MastForest`].
151type StringDataOffset = usize;
152
153/// Specifies an offset into the strings table of an encoded [`MastForest`].
154type StringIndex = usize;
155
156/// Default multiplier for the untrusted validation allocation budget.
157///
158/// The budgeted byte reader limits wire-driven parsing. Hashless and stripped validation also
159/// needs transient per-node allocations for the slot table, empty debug-info scaffolding, and
160/// rebuilt digest data.
161/// The generic untrusted path also retains a recorded copy of the consumed
162/// serialized payload for deferred validation.
163///
164/// This convenience multiplier is therefore a coarse "wire bytes plus worst-case helper
165/// headroom" bound:
166/// - `* 6` covers the helper-allocation model introduced with explicit validation budgeting
167/// - `+ 1 * bytes_len` covers the retained serialized copy recorded during untrusted reads
168///
169/// It is deliberately conservative and exists to make the default
170/// [`crate::mast::UntrustedMastForest::read_from_bytes`] path usable without forcing callers to
171/// size each helper allocation themselves. Callers with stricter limits should use
172/// [`crate::mast::UntrustedMastForest::read_from_bytes_with_budgets`] and choose explicit parsing
173/// and validation budgets.
174const DEFAULT_UNTRUSTED_ALLOCATION_BUDGET_MULTIPLIER: usize = 7;
175
176/// Byte-read budget multiplier for trusted full deserialization from a byte slice.
177///
178/// The budget is intentionally finite to reject malicious length prefixes, but larger than the
179/// source length because collection deserialization uses conservative per-element size estimates.
180const TRUSTED_BYTE_READ_BUDGET_MULTIPLIER: usize = 64;
181
182// CONSTANTS
183// ================================================================================================
184
185/// Magic bytes for detecting that a file is binary-encoded MAST.
186///
187/// The header is `b"MAST"` + flags byte + version bytes.
188///
189/// This repurposes the old `b"MAST\0"` terminator as the flags byte, so legacy payloads still
190/// decode as "debug info present".
191const MAGIC: &[u8; 4] = b"MAST";
192
193/// Flag indicating that the `DebugInfo` section is omitted from the wire payload.
194///
195/// Readers treat this as serializer intent about the wire layout, not as a trust decision.
196const FLAG_STRIPPED: u8 = 0x01;
197
198/// Flag indicating that the internal node-hash section is omitted from the wire payload.
199///
200/// External digests still remain serialized in their own section because they cannot be rebuilt
201/// from local structure. This flag implies [`FLAG_STRIPPED`] because no supported consumer treats
202/// wire `DebugInfo` as trusted in hashless mode: [`crate::mast::MastForest`] rejects `HASHLESS`,
203/// [`SerializedMastForest::new`] accepts it only for structural inspection, and the untrusted path
204/// rebuilds the data it actually trusts before use.
205pub(super) const FLAG_HASHLESS: u8 = 0x02;
206
207/// Mask for reserved flag bits that must be zero.
208///
209/// Bits 2-7 are reserved for future use. If any are set, deserialization fails.
210const FLAGS_RESERVED_MASK: u8 = 0xfc;
211
212/// The format version.
213///
214/// If future modifications are made to this format, the version should be incremented by 1. A
215/// version of `[255, 255, 255]` is reserved for future extensions that require extending the
216/// version field itself, but should be considered invalid for now.
217///
218/// Version history:
219/// - [0, 0, 0]: Initial format
220/// - [0, 0, 1]: Added batch metadata to basic blocks (operations serialized in padded form with
221///   indptr, padding, and group metadata for exact OpBatch reconstruction). Direct decorator
222///   serialization in CSR format (eliminates per-node decorator sections and round-trip
223///   conversions). Header changed from `MAST\0` to `MAST` + flags byte.
224/// - [0, 0, 2]: Removed AssemblyOp from Decorator enum serialization. AssemblyOps are now stored
225///   separately in DebugInfo. Removed `should_break` field from AssemblyOp serialization (#2646).
226///   Removed `breakpoint` instruction (#2655).
227/// - [0, 0, 3]: Added HASHLESS flag (bit 1). HASHLESS implies STRIPPED. Trusted deserialization
228///   rejects HASHLESS. Split fixed-width node entries from digest storage. External digests moved
229///   to a dedicated section. Hashless serialization omits the general node-hash section entirely.
230///   Dropped the serialized decorator-count field because it was not used by the wire layout or
231///   deserializers. Before any public release on this branch, the same unreleased wire version also
232///   grew explicit internal/external node counts in the header.
233const VERSION: [u8; 3] = [0, 0, 3];
234
235// MAST FOREST SERIALIZATION/DESERIALIZATION
236// ================================================================================================
237
238impl Serializable for MastForest {
239    fn write_into<W: ByteWriter>(&self, target: &mut W) {
240        self.write_into_with_options(target, false, false);
241    }
242}
243
244impl MastForest {
245    /// Internal serialization with options.
246    ///
247    /// When `stripped` is true, the DebugInfo section is omitted and the FLAGS byte
248    /// has bit 0 set.
249    fn write_into_with_options<W: ByteWriter>(
250        &self,
251        target: &mut W,
252        stripped: bool,
253        hashless: bool,
254    ) {
255        let mut basic_block_data_builder = BasicBlockDataBuilder::new();
256
257        // magic & flags
258        target.write_bytes(MAGIC);
259        let flags = if stripped || hashless { FLAG_STRIPPED } else { 0 }
260            | if hashless { FLAG_HASHLESS } else { 0 };
261        target.write_u8(flags);
262
263        // version
264        target.write_bytes(&VERSION);
265
266        // header counts
267        let node_count = self.nodes.len();
268        let external_node_count = self.nodes.iter().filter(|node| node.is_external()).count();
269        let internal_node_count = node_count - external_node_count;
270        target.write_usize(internal_node_count);
271        target.write_usize(external_node_count);
272
273        // roots
274        let roots: Vec<u32> = self.roots.iter().copied().map(u32::from).collect();
275        roots.write_into(target);
276
277        let mut mast_node_entries = Vec::with_capacity(self.nodes.len());
278        let mut external_digests = Vec::new();
279        let mut node_hashes = Vec::new();
280
281        for mast_node in self.nodes.iter() {
282            let ops_offset = if let MastNode::Block(basic_block) = mast_node {
283                basic_block_data_builder.encode_basic_block(basic_block)
284            } else {
285                0
286            };
287
288            mast_node_entries.push(MastNodeEntry::new(mast_node, ops_offset));
289            if mast_node.is_external() {
290                external_digests.push(mast_node.digest());
291            } else if !hashless {
292                node_hashes.push(mast_node.digest());
293            }
294        }
295
296        let basic_block_data = basic_block_data_builder.finalize();
297        basic_block_data.write_into(target);
298
299        for mast_node_entry in mast_node_entries {
300            mast_node_entry.write_into(target);
301        }
302
303        for digest in external_digests {
304            digest.write_into(target);
305        }
306
307        if !hashless {
308            for digest in node_hashes {
309                digest.write_into(target);
310            }
311        }
312
313        self.advice_map.write_into(target);
314
315        // Serialize DebugInfo only if not stripped
316        if !stripped {
317            self.debug_info.write_into(target);
318        }
319    }
320}
321
322pub(super) fn write_stripped_into<W: ByteWriter>(forest: &MastForest, target: &mut W) {
323    forest.write_into_with_options(target, true, false);
324}
325
326pub(super) fn write_hashless_into<W: ByteWriter>(forest: &MastForest, target: &mut W) {
327    forest.write_into_with_options(target, true, true);
328}
329
330pub(super) fn stripped_size_hint(forest: &MastForest) -> usize {
331    serialized_size_hint(forest, true, false)
332}
333
334fn serialized_size_hint(forest: &MastForest, stripped: bool, hashless: bool) -> usize {
335    let node_count = forest.nodes.len();
336    let external_count = forest.nodes.iter().filter(|node| node.is_external()).count();
337    let non_external_count = node_count - external_count;
338
339    let mut size = MAGIC.len() + 1 + VERSION.len();
340    size += non_external_count.get_size_hint();
341    size += external_count.get_size_hint();
342
343    let roots_len = forest.roots.len();
344    size += roots_len.get_size_hint();
345    size += roots_len * size_of::<u32>();
346
347    let mut basic_block_len = 0usize;
348    for node in forest.nodes.iter() {
349        if let MastNode::Block(block) = node {
350            basic_block_len += basic_block_data_len(block);
351        }
352    }
353    size += basic_block_len.get_size_hint() + basic_block_len;
354
355    size += node_count * MastNodeEntry::SERIALIZED_SIZE;
356    size += external_count * crate::Word::min_serialized_size();
357    if !hashless {
358        size += non_external_count * crate::Word::min_serialized_size();
359    }
360    size += forest.advice_map.serialized_size_hint();
361    if !stripped {
362        size += forest.debug_info.get_size_hint();
363    }
364
365    size
366}
367
368/// A zero-copy structural view over serialized MAST forest bytes.
369///
370/// This view accepts full, stripped, and hashless payloads. It validates the header and the
371/// fixed-width structural sections needed for random access, but it does not fully materialize the
372/// forest.
373///
374/// Use this when callers need random access to roots or node metadata without deserializing the
375/// full forest. For strict trusted deserialization, use
376/// [`crate::mast::MastForest::read_from_bytes`].
377///
378/// # Examples
379///
380/// ```
381/// use miden_core::{
382///     mast::{BasicBlockNodeBuilder, MastForest, MastForestContributor, SerializedMastForest},
383///     operations::Operation,
384/// };
385///
386/// let mut forest = MastForest::new();
387/// let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new())
388///     .add_to_forest(&mut forest)
389///     .unwrap();
390/// forest.make_root(block_id);
391///
392/// let mut bytes = Vec::new();
393/// forest.write_stripped(&mut bytes);
394///
395/// let view = SerializedMastForest::new(&bytes).unwrap();
396/// assert_eq!(view.node_count(), forest.nodes().len());
397/// assert!(view.node_info_at(0).is_ok());
398/// ```
399#[derive(Debug)]
400pub struct SerializedMastForest<'a> {
401    bytes: &'a [u8],
402    flags: WireFlags,
403    layout: ForestLayout,
404    resolved: OnceLockCompat<Result<ResolvedSerializedForest<'a>, DeserializationError>>,
405}
406
407impl<'a> SerializedMastForest<'a> {
408    /// Creates a new view from serialized bytes.
409    ///
410    /// The input may be full, stripped, or hashless format.
411    /// Structural parsing is delegated to the same single-pass scanner used by reader-based
412    /// deserialization paths.
413    ///
414    /// This constructor is layout-oriented: it validates the header and sections needed for
415    /// node/roots/random-access metadata only. It does not validate or fully parse trailing
416    /// `AdviceMap` / `DebugInfo` payloads.
417    ///
418    /// Treat this as a trusted inspection API, not as an untrusted-validation entry point. It is
419    /// appropriate for local tools that need random access over serialized structure, but callers
420    /// handling adversarial bytes should use [`crate::mast::UntrustedMastForest`] instead.
421    ///
422    /// In particular, this constructor does **not** protect callers from untrusted-input concerns
423    /// that are enforced by [`crate::mast::UntrustedMastForest::validate`]. It does not:
424    /// - verify that serialized non-external digests match the structure they describe
425    /// - check topological ordering / forward-reference constraints
426    /// - validate basic-block batch invariants or procedure-name-root consistency
427    /// - fully parse or validate trailing `AdviceMap` / `DebugInfo` payloads
428    /// - provide a bounded-work guarantee for hashless digest-backed inspection
429    ///
430    /// For strict full-payload validation, use
431    /// [`crate::mast::MastForest::read_from_bytes`].
432    ///
433    /// Wire flags describe serializer intent, not trust policy. This constructor accepts
434    /// hashless payloads for inspection even though trusted [`crate::mast::MastForest`]
435    /// deserialization rejects them.
436    ///
437    /// Digest lookup follows the wire layout:
438    /// - If the internal-hash section is present, non-external node digests are read from it.
439    /// - If the internal-hash section is absent, the first digest-backed access rebuilds all
440    ///   non-external node digests from structure and caches them.
441    /// - External node digests are always read from the external-digest section.
442    ///
443    /// # Examples
444    ///
445    /// ```
446    /// use miden_core::{
447    ///     mast::{BasicBlockNodeBuilder, MastForest, MastForestContributor, SerializedMastForest},
448    ///     operations::Operation,
449    /// };
450    ///
451    /// let mut forest = MastForest::new();
452    /// let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new())
453    ///     .add_to_forest(&mut forest)
454    ///     .unwrap();
455    /// forest.make_root(block_id);
456    ///
457    /// let mut bytes = Vec::new();
458    /// forest.write_stripped(&mut bytes);
459    ///
460    /// let view = SerializedMastForest::new(&bytes).unwrap();
461    /// assert_eq!(view.node_count(), 1);
462    /// ```
463    pub fn new(bytes: &'a [u8]) -> Result<Self, DeserializationError> {
464        let mut reader = SliceReader::new(bytes);
465        let mut scanner = TrackingReader::new(&mut reader);
466        let (flags, layout) = read_header_and_scan_layout(&mut scanner, true)?;
467
468        Ok(Self {
469            bytes,
470            flags,
471            layout,
472            resolved: OnceLockCompat::new(),
473        })
474    }
475
476    /// Returns the number of nodes in the serialized forest.
477    pub fn node_count(&self) -> usize {
478        self.layout.node_count
479    }
480
481    /// Returns `true` when the wire header says that the internal-hash section is omitted.
482    pub fn is_hashless(&self) -> bool {
483        self.flags.is_hashless()
484    }
485
486    /// Returns `true` when the wire header says that the `DebugInfo` section is omitted.
487    pub fn is_stripped(&self) -> bool {
488        self.flags.is_stripped()
489    }
490
491    /// Returns the number of procedure roots in the serialized forest.
492    pub fn procedure_root_count(&self) -> usize {
493        self.layout.roots_count
494    }
495
496    /// Returns the procedure root id at the specified index.
497    ///
498    /// Returns an error if `index >= self.procedure_root_count()`.
499    pub fn procedure_root_at(&self, index: usize) -> Result<MastNodeId, DeserializationError> {
500        self.layout.read_procedure_root_at(self.bytes, index)
501    }
502
503    /// Returns the `MastNodeInfo` at the specified index.
504    ///
505    /// On hashless payloads, this may trigger the first digest-backed access and therefore the
506    /// one-time rebuild of the non-external digest table described in [`Self::node_digest_at`].
507    ///
508    /// Returns an error if `index >= self.node_count()`.
509    ///
510    /// # Examples
511    ///
512    /// ```
513    /// use miden_core::{
514    ///     mast::{BasicBlockNodeBuilder, MastForest, MastForestContributor, SerializedMastForest},
515    ///     operations::Operation,
516    /// };
517    ///
518    /// let mut forest = MastForest::new();
519    /// let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new())
520    ///     .add_to_forest(&mut forest)
521    ///     .unwrap();
522    /// forest.make_root(block_id);
523    ///
524    /// let mut bytes = Vec::new();
525    /// forest.write_stripped(&mut bytes);
526    ///
527    /// let view = SerializedMastForest::new(&bytes).unwrap();
528    /// assert!(view.node_info_at(0).is_ok());
529    /// ```
530    pub fn node_info_at(&self, index: usize) -> Result<MastNodeInfo, DeserializationError> {
531        Ok(MastNodeInfo::from_entry(
532            self.node_entry_at(index)?,
533            self.node_digest_at(index)?,
534        ))
535    }
536
537    /// Returns the fixed-width structural node entry at the specified index.
538    ///
539    /// Returns an error if `index >= self.node_count()`.
540    pub fn node_entry_at(&self, index: usize) -> Result<MastNodeEntry, DeserializationError> {
541        self.layout.read_node_entry_at(self.bytes, index)
542    }
543
544    /// Returns the digest for the node at the specified index.
545    ///
546    /// This resolves digests lazily. If the internal-hash section is absent, the first
547    /// digest-backed access rebuilds all non-external node digests and caches them.
548    ///
549    /// This means the hashless cost model is:
550    /// - `node_count()`, `node_entry_at()`, and `procedure_root_at()` stay cheap and structural
551    /// - the first `node_digest_at()` / `node_info_at()` call does `O(node_count)` digest rebuild
552    ///   work and allocates the cached digest table
553    /// - later digest lookups reuse that cache
554    ///
555    /// Returns an error if `index >= self.node_count()`.
556    pub fn node_digest_at(&self, index: usize) -> Result<crate::Word, DeserializationError> {
557        self.resolved()?.node_digest_at(index)
558    }
559
560    fn resolved(&self) -> Result<&ResolvedSerializedForest<'a>, DeserializationError> {
561        self.resolved
562            .get_or_init(|| ResolvedSerializedForest::new(self.bytes, self.layout))
563            .as_ref()
564            .map_err(Clone::clone)
565    }
566}
567
568impl MastForestView for SerializedMastForest<'_> {
569    fn node_count(&self) -> usize {
570        SerializedMastForest::node_count(self)
571    }
572
573    fn node_entry_at(&self, index: usize) -> Result<MastNodeEntry, DeserializationError> {
574        SerializedMastForest::node_entry_at(self, index)
575    }
576
577    fn node_digest_at(&self, index: usize) -> Result<crate::Word, DeserializationError> {
578        SerializedMastForest::node_digest_at(self, index)
579    }
580
581    fn procedure_root_count(&self) -> usize {
582        SerializedMastForest::procedure_root_count(self)
583    }
584
585    fn procedure_root_at(&self, index: usize) -> Result<MastNodeId, DeserializationError> {
586        SerializedMastForest::procedure_root_at(self, index)
587    }
588}
589
590impl MastForestView for MastForest {
591    fn node_count(&self) -> usize {
592        self.nodes.len()
593    }
594
595    fn node_entry_at(&self, index: usize) -> Result<MastNodeEntry, DeserializationError> {
596        let node = self.nodes.as_slice().get(index).ok_or_else(|| {
597            DeserializationError::InvalidValue(format!("node index {index} out of bounds"))
598        })?;
599        let ops_offset = if matches!(node, MastNode::Block(_)) {
600            basic_block_offset_for_node_index(self.nodes.as_slice(), index)?
601        } else {
602            0
603        };
604
605        Ok(MastNodeEntry::new(node, ops_offset))
606    }
607
608    fn node_digest_at(&self, index: usize) -> Result<crate::Word, DeserializationError> {
609        self.nodes.as_slice().get(index).map(MastNode::digest).ok_or_else(|| {
610            DeserializationError::InvalidValue(format!("node index {index} out of bounds"))
611        })
612    }
613
614    fn procedure_root_count(&self) -> usize {
615        self.roots.len()
616    }
617
618    fn procedure_root_at(&self, index: usize) -> Result<MastNodeId, DeserializationError> {
619        self.roots.get(index).copied().ok_or_else(|| {
620            DeserializationError::InvalidValue(format!(
621                "root index {} out of bounds for {} roots",
622                index,
623                self.roots.len()
624            ))
625        })
626    }
627}
628
629// TEST HELPERS
630// ================================================================================================
631
632#[cfg(test)]
633impl SerializedMastForest<'_> {
634    fn advice_map_offset(&self) -> Result<usize, DeserializationError> {
635        self.layout.advice_map_offset()
636    }
637
638    fn node_entry_offset(&self) -> usize {
639        self.layout.node_entry_offset
640    }
641
642    fn node_hash_offset(&self) -> Option<usize> {
643        self.layout.node_hash_offset
644    }
645
646    fn digest_slot_at(&self, index: usize) -> usize {
647        self.resolved()
648            .expect("digest slots should be readable for a valid serialized view")
649            .digest_slot_at(index)
650    }
651}
652
653#[cfg(test)]
654fn read_u8_at(bytes: &[u8], offset: &mut usize) -> Result<u8, DeserializationError> {
655    read_slice_at(bytes, offset, 1).map(|slice| slice[0])
656}
657
658#[cfg(test)]
659fn read_array_at<const N: usize>(
660    bytes: &[u8],
661    offset: &mut usize,
662) -> Result<[u8; N], DeserializationError> {
663    let slice = read_slice_at(bytes, offset, N)?;
664    let mut result = [0u8; N];
665    result.copy_from_slice(slice);
666    Ok(result)
667}
668
669#[cfg(test)]
670fn read_slice_at<'a>(
671    bytes: &'a [u8],
672    offset: &mut usize,
673    len: usize,
674) -> Result<&'a [u8], DeserializationError> {
675    let end = offset
676        .checked_add(len)
677        .ok_or_else(|| DeserializationError::InvalidValue("offset overflow".to_string()))?;
678    if end > bytes.len() {
679        return Err(DeserializationError::UnexpectedEOF);
680    }
681    let slice = &bytes[*offset..end];
682    *offset = end;
683    Ok(slice)
684}
685
686// NOTE: Mirrors ByteReader::read_usize (vint64) decoding to preserve wire compatibility.
687#[cfg(test)]
688fn read_usize_at(bytes: &[u8], offset: &mut usize) -> Result<usize, DeserializationError> {
689    if *offset >= bytes.len() {
690        return Err(DeserializationError::UnexpectedEOF);
691    }
692    let first_byte = bytes[*offset];
693    let length = first_byte.trailing_zeros() as usize + 1;
694
695    let result = if length == 9 {
696        let _marker = read_u8_at(bytes, offset)?;
697        let value = read_array_at::<8>(bytes, offset)?;
698        u64::from_le_bytes(value)
699    } else {
700        let mut encoded = [0u8; 8];
701        let value = read_slice_at(bytes, offset, length)?;
702        encoded[..length].copy_from_slice(value);
703        u64::from_le_bytes(encoded) >> length
704    };
705
706    if result > usize::MAX as u64 {
707        return Err(DeserializationError::InvalidValue(format!(
708            "Encoded value must be less than {}, but {} was provided",
709            usize::MAX,
710            result
711        )));
712    }
713
714    Ok(result as usize)
715}
716
717impl Deserializable for MastForest {
718    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
719        let (_flags, forest) = decode_from_reader(source, false)?;
720        forest.into_materialized()
721    }
722
723    fn read_from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
724        let budget = bytes.len().saturating_mul(TRUSTED_BYTE_READ_BUDGET_MULTIPLIER);
725        let mut reader = BudgetedReader::new(SliceReader::new(bytes), budget);
726        Self::read_from(&mut reader)
727    }
728}
729
730impl super::UntrustedMastForest {
731    pub(super) fn into_materialized(self) -> Result<MastForest, DeserializationError> {
732        let resolved = if let Some(allocation_budget) = self.remaining_allocation_budget {
733            ResolvedSerializedForest::new_with_allocation_budget(
734                &self.bytes,
735                self.layout,
736                allocation_budget,
737            )?
738        } else {
739            ResolvedSerializedForest::new(&self.bytes, self.layout)?
740        };
741
742        resolved.materialize(self.advice_map, self.debug_info)
743    }
744}
745
746pub(super) fn read_untrusted_with_flags<R: ByteReader>(
747    source: &mut R,
748) -> Result<(super::UntrustedMastForest, u8), DeserializationError> {
749    let (flags, forest) = decode_from_reader(source, true)?;
750    log_untrusted_overspecification(flags);
751    Ok((forest, flags.bits()))
752}
753
754pub(super) fn read_untrusted_with_flags_and_allocation_budget<R: ByteReader>(
755    source: &mut R,
756    allocation_budget: usize,
757) -> Result<(super::UntrustedMastForest, u8), DeserializationError> {
758    let (flags, forest) = decode_from_reader_inner(source, true, Some(allocation_budget))?;
759    log_untrusted_overspecification(flags);
760    Ok((forest, flags.bits()))
761}
762
763fn log_untrusted_overspecification(flags: WireFlags) {
764    if !flags.is_hashless() {
765        log::error!(
766            "UntrustedMastForest expected HASHLESS input; supplied artifact includes wire node hashes, and validation will recompute them and require them to match"
767        );
768    }
769
770    if !flags.is_stripped() {
771        log::error!(
772            "UntrustedMastForest expected STRIPPED input; supplied artifact includes DebugInfo and other optional payloads over the wire"
773        );
774    }
775}
776
777fn decode_from_reader<R: ByteReader>(
778    source: &mut R,
779    allow_hashless: bool,
780) -> Result<(WireFlags, super::UntrustedMastForest), DeserializationError> {
781    decode_from_reader_inner(source, allow_hashless, None)
782}
783
784fn decode_from_reader_inner<R: ByteReader>(
785    source: &mut R,
786    allow_hashless: bool,
787    mut remaining_allocation_budget: Option<usize>,
788) -> Result<(WireFlags, super::UntrustedMastForest), DeserializationError> {
789    let mut recording = TrackingReader::new_recording(source);
790    let (flags, layout) = read_header_and_scan_layout(&mut recording, allow_hashless)?;
791    debug_assert_eq!(recording.offset(), layout.advice_map_offset);
792
793    let advice_map = AdviceMap::read_from(&mut recording)?;
794    let debug_info = if flags.is_stripped() {
795        if let Some(allocation_budget) = &mut remaining_allocation_budget {
796            reserve_allocation::<usize>(
797                allocation_budget,
798                layout.node_count.checked_add(1).ok_or_else(|| {
799                    DeserializationError::InvalidValue("debug-info node count overflow".into())
800                })?,
801                "empty debug-info scaffolding",
802            )?;
803        }
804        super::DebugInfo::empty_for_nodes(layout.node_count)
805    } else {
806        super::DebugInfo::read_from(&mut recording)?
807    };
808
809    Ok((
810        flags,
811        super::UntrustedMastForest {
812            bytes: recording.into_recorded(),
813            layout,
814            advice_map,
815            debug_info,
816            remaining_allocation_budget,
817        },
818    ))
819}
820
821pub(super) fn reserve_allocation<T>(
822    remaining_budget: &mut usize,
823    count: usize,
824    label: &str,
825) -> Result<(), DeserializationError> {
826    let bytes_needed = count
827        .checked_mul(size_of::<T>())
828        .ok_or_else(|| DeserializationError::InvalidValue(format!("{label} size overflow")))?;
829    if bytes_needed > *remaining_budget {
830        return Err(DeserializationError::InvalidValue(format!(
831            "{label} requires {bytes_needed} bytes, exceeding the remaining untrusted allocation budget of {} bytes",
832            *remaining_budget
833        )));
834    }
835
836    *remaining_budget -= bytes_needed;
837    Ok(())
838}
839
840pub(super) fn default_untrusted_allocation_budget(bytes_len: usize) -> usize {
841    bytes_len.saturating_mul(DEFAULT_UNTRUSTED_ALLOCATION_BUDGET_MULTIPLIER)
842}
843
844// UNTRUSTED DESERIALIZATION
845// ================================================================================================
846
847impl Deserializable for super::UntrustedMastForest {
848    /// Deserializes an [`super::UntrustedMastForest`] from a byte reader.
849    ///
850    /// Note: This method does not apply budgeting. For untrusted input, prefer using
851    /// [`read_from_bytes`](Self::read_from_bytes) which applies budgeted deserialization.
852    ///
853    /// After deserialization, callers should use [`super::UntrustedMastForest::validate()`]
854    /// to verify structural integrity and recompute all node hashes before using
855    /// the forest.
856    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
857        read_untrusted_with_flags(source).map(|(forest, _flags)| forest)
858    }
859
860    /// Deserializes an [`super::UntrustedMastForest`] from bytes using budgeted deserialization.
861    ///
862    /// This method uses the default untrusted wire/validation budget from
863    /// [`super::UntrustedMastForest::read_from_bytes`].
864    ///
865    /// After deserialization, callers should use [`super::UntrustedMastForest::validate()`]
866    /// to verify structural integrity and recompute all node hashes before using
867    /// the forest.
868    fn read_from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
869        super::UntrustedMastForest::read_from_bytes(bytes)
870    }
871}