miden_core/mast/serialization/mod.rs
1//! MAST forest serialization keeps one fixed structural layout for full, stripped, and hashless
2//! payloads.
3//!
4//! The main goal is to keep random access cheap in stripped and hashless modes. Node structure
5//! stays in one fixed-width section. Variable-size data lives in separate sections. Internal node
6//! digests also live in a separate section so hashless payloads can omit them without changing the
7//! structural layout.
8//!
9//! Wire flags describe serializer intent, not reader trust policy. Trusted [`MastForest`] reads
10//! reject hashless payloads. [`crate::mast::UntrustedMastForest`] accepts them and rebuilds
11//! non-external digests before use. If a non-hashless payload is sent down the untrusted path,
12//! validation recomputes those digests and requires them to match the serialized values.
13//! Budgeted untrusted reads always bound wire counts during layout scanning via
14//! [`ByteReader::max_alloc`]. Callers that opt into validation budgeting also get a second check:
15//! - later stripped/hashless helper allocations are charged against an explicit validation budget
16//! before the corresponding `Vec` or CSR scaffolding is created
17//! - the default convenience path uses a coarse validation budget derived from the input size; this
18//! is intentionally a simple bound for common callers, not an exact peak-memory formula
19//!
20//! The main layers fit together like this:
21//!
22//! ```text
23//! wire bytes
24//! |
25//! +--> ForestLayout -----------> SerializedMastForest --+
26//! | absolute offsets structural view |
27//! | v
28//! +--> UntrustedMastForest ----validate----> ResolvedSerializedForest ---> MastForest
29//! bytes + parsed state digest-backed view trusted runtime
30//!
31//! MastForestView is the shared random-access API implemented by SerializedMastForest and
32//! MastForest.
33//! ```
34//!
35//! The format is:
36//!
37//! (Metadata)
38//! - MAGIC (4 bytes) + FLAGS (1 byte) + VERSION (3 bytes)
39//!
40//! (Counts)
41//! - nodes count (`usize`)
42//! - internal nodes count (`usize`)
43//! - external nodes count (`usize`)
44//!
45//! (Procedure roots section)
46//! - procedure roots (`Vec<u32>` as MastNodeId values)
47//!
48//! (Basic block data section)
49//! - basic block data (padded operations + batch metadata)
50//!
51//! (Node entries section)
52//! - fixed-width structural node entries (`Vec<MastNodeEntry>`)
53//! - `Block` entries store offsets into the basic-block section above
54//!
55//! (External digest section)
56//! - digests for `External` nodes only (`Vec<Word>`, ordered by node index)
57//! - lookup is dense-by-kind: the Nth external node uses slot N in this section
58//!
59//! (Node hash section - omitted if FLAGS bit 1 is set)
60//! - digests for all non-external nodes (`Vec<Word>`, ordered by node index)
61//! - lookup is also dense-by-kind: the Nth non-external node uses slot N in this section
62//!
63//! (Advice map section)
64//! - Advice map (`AdviceMap`)
65//!
66//! (DebugInfo section - omitted if FLAGS bit 0 is set)
67//! - Decorator data (raw bytes for decorator payloads)
68//! - String table (deduplicated strings)
69//! - Decorator infos (`Vec<DecoratorInfo>`)
70//! - Error codes map (`BTreeMap<u64, String>`)
71//! - OpToDecoratorIds CSR (operation-indexed decorators, dense representation)
72//! - NodeToDecoratorIds CSR (before_enter and after_exit decorators, dense representation)
73//! - Procedure names map (`BTreeMap<Word, String>`)
74//!
75//! In stripped format, the `DebugInfo` section is omitted and readers materialize an empty
76//! `DebugInfo`.
77//!
78//! In hashless format, the internal node-hash section is omitted and `HASHLESS` also implies
79//! `STRIPPED`. External node digests still stay on the wire because they cannot be rebuilt from
80//! local structure. This keeps hashless focused on the untrusted-validation use case: trusted
81//! reads reject `HASHLESS`, and the untrusted path rebuilds the data it actually trusts before
82//! use, so supporting a separate "hashless but with debug info" mode would add another wire mode
83//! without changing the validation semantics.
84//!
85//! Readers recover per-node digest lookup by scanning node entries once and building a compact
86//! "slot by node index" table. This preserves random access without forcing all digests into the
87//! same contiguous array on the wire.
88//!
89//! Public entry points adopt these policies:
90//! - [`MastForest::read_from_bytes`]: trusted full payload, no hashless support.
91//! - [`SerializedMastForest::new`]: structural inspection for local tooling, including hashless
92//! payloads; not an untrusted-validation entry point.
93//! - [`crate::mast::UntrustedMastForest::read_from_bytes`] /
94//! [`crate::mast::UntrustedMastForest::read_from_bytes_with_budgets`]: untrusted parsing plus
95//! later validation before use.
96
97#[cfg(test)]
98use alloc::string::ToString;
99use alloc::{format, vec::Vec};
100
101use miden_utils_sync::OnceLockCompat;
102
103use super::{MastForest, MastNode, MastNodeId};
104use crate::{
105 advice::AdviceMap,
106 mast::node::MastNodeExt,
107 serde::{
108 BudgetedReader, ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
109 SliceReader,
110 },
111};
112
113pub(crate) mod asm_op;
114pub(crate) mod decorator;
115
116mod info;
117pub use info::{MastNodeEntry, MastNodeInfo};
118
119mod view;
120pub use view::MastForestView;
121
122mod layout;
123pub(super) use layout::ForestLayout;
124use layout::{OffsetTrackingReader, TrackingReader, WireFlags, read_header_and_scan_layout};
125
126mod resolved;
127use resolved::{ResolvedSerializedForest, basic_block_offset_for_node_index};
128
129mod basic_blocks;
130use basic_blocks::{BasicBlockDataBuilder, basic_block_data_len};
131
132pub(crate) mod string_table;
133pub(crate) use string_table::StringTable;
134
135#[cfg(test)]
136mod seed_gen;
137
138#[cfg(test)]
139mod tests;
140
141// TYPE ALIASES
142// ================================================================================================
143
144/// Specifies an offset into the `node_data` section of an encoded [`MastForest`].
145type NodeDataOffset = u32;
146
147/// Specifies an offset into the `decorator_data` section of an encoded [`MastForest`].
148type DecoratorDataOffset = u32;
149
150/// Specifies an offset into the `strings_data` section of an encoded [`MastForest`].
151type StringDataOffset = usize;
152
153/// Specifies an offset into the strings table of an encoded [`MastForest`].
154type StringIndex = usize;
155
156/// Default multiplier for the untrusted validation allocation budget.
157///
158/// The budgeted byte reader limits wire-driven parsing. Hashless and stripped validation also
159/// needs transient per-node allocations for the slot table, empty debug-info scaffolding, and
160/// rebuilt digest data.
161/// The generic untrusted path also retains a recorded copy of the consumed
162/// serialized payload for deferred validation.
163///
164/// This convenience multiplier is therefore a coarse "wire bytes plus worst-case helper
165/// headroom" bound:
166/// - `* 6` covers the helper-allocation model introduced with explicit validation budgeting
167/// - `+ 1 * bytes_len` covers the retained serialized copy recorded during untrusted reads
168///
169/// It is deliberately conservative and exists to make the default
170/// [`crate::mast::UntrustedMastForest::read_from_bytes`] path usable without forcing callers to
171/// size each helper allocation themselves. Callers with stricter limits should use
172/// [`crate::mast::UntrustedMastForest::read_from_bytes_with_budgets`] and choose explicit parsing
173/// and validation budgets.
174const DEFAULT_UNTRUSTED_ALLOCATION_BUDGET_MULTIPLIER: usize = 7;
175
176/// Byte-read budget multiplier for trusted full deserialization from a byte slice.
177///
178/// The budget is intentionally finite to reject malicious length prefixes, but larger than the
179/// source length because collection deserialization uses conservative per-element size estimates.
180const TRUSTED_BYTE_READ_BUDGET_MULTIPLIER: usize = 64;
181
182// CONSTANTS
183// ================================================================================================
184
185/// Magic bytes for detecting that a file is binary-encoded MAST.
186///
187/// The header is `b"MAST"` + flags byte + version bytes.
188///
189/// This repurposes the old `b"MAST\0"` terminator as the flags byte, so legacy payloads still
190/// decode as "debug info present".
191const MAGIC: &[u8; 4] = b"MAST";
192
193/// Flag indicating that the `DebugInfo` section is omitted from the wire payload.
194///
195/// Readers treat this as serializer intent about the wire layout, not as a trust decision.
196const FLAG_STRIPPED: u8 = 0x01;
197
198/// Flag indicating that the internal node-hash section is omitted from the wire payload.
199///
200/// External digests still remain serialized in their own section because they cannot be rebuilt
201/// from local structure. This flag implies [`FLAG_STRIPPED`] because no supported consumer treats
202/// wire `DebugInfo` as trusted in hashless mode: [`crate::mast::MastForest`] rejects `HASHLESS`,
203/// [`SerializedMastForest::new`] accepts it only for structural inspection, and the untrusted path
204/// rebuilds the data it actually trusts before use.
205pub(super) const FLAG_HASHLESS: u8 = 0x02;
206
207/// Mask for reserved flag bits that must be zero.
208///
209/// Bits 2-7 are reserved for future use. If any are set, deserialization fails.
210const FLAGS_RESERVED_MASK: u8 = 0xfc;
211
212/// The format version.
213///
214/// If future modifications are made to this format, the version should be incremented by 1. A
215/// version of `[255, 255, 255]` is reserved for future extensions that require extending the
216/// version field itself, but should be considered invalid for now.
217///
218/// Version history:
219/// - [0, 0, 0]: Initial format
220/// - [0, 0, 1]: Added batch metadata to basic blocks (operations serialized in padded form with
221/// indptr, padding, and group metadata for exact OpBatch reconstruction). Direct decorator
222/// serialization in CSR format (eliminates per-node decorator sections and round-trip
223/// conversions). Header changed from `MAST\0` to `MAST` + flags byte.
224/// - [0, 0, 2]: Removed AssemblyOp from Decorator enum serialization. AssemblyOps are now stored
225/// separately in DebugInfo. Removed `should_break` field from AssemblyOp serialization (#2646).
226/// Removed `breakpoint` instruction (#2655).
227/// - [0, 0, 3]: Added HASHLESS flag (bit 1). HASHLESS implies STRIPPED. Trusted deserialization
228/// rejects HASHLESS. Split fixed-width node entries from digest storage. External digests moved
229/// to a dedicated section. Hashless serialization omits the general node-hash section entirely.
230/// Dropped the serialized decorator-count field because it was not used by the wire layout or
231/// deserializers. Before any public release on this branch, the same unreleased wire version also
232/// grew explicit internal/external node counts in the header.
233const VERSION: [u8; 3] = [0, 0, 3];
234
235// MAST FOREST SERIALIZATION/DESERIALIZATION
236// ================================================================================================
237
238impl Serializable for MastForest {
239 fn write_into<W: ByteWriter>(&self, target: &mut W) {
240 self.write_into_with_options(target, false, false);
241 }
242}
243
244impl MastForest {
245 /// Internal serialization with options.
246 ///
247 /// When `stripped` is true, the DebugInfo section is omitted and the FLAGS byte
248 /// has bit 0 set.
249 fn write_into_with_options<W: ByteWriter>(
250 &self,
251 target: &mut W,
252 stripped: bool,
253 hashless: bool,
254 ) {
255 let mut basic_block_data_builder = BasicBlockDataBuilder::new();
256
257 // magic & flags
258 target.write_bytes(MAGIC);
259 let flags = if stripped || hashless { FLAG_STRIPPED } else { 0 }
260 | if hashless { FLAG_HASHLESS } else { 0 };
261 target.write_u8(flags);
262
263 // version
264 target.write_bytes(&VERSION);
265
266 // header counts
267 let node_count = self.nodes.len();
268 let external_node_count = self.nodes.iter().filter(|node| node.is_external()).count();
269 let internal_node_count = node_count - external_node_count;
270 target.write_usize(internal_node_count);
271 target.write_usize(external_node_count);
272
273 // roots
274 let roots: Vec<u32> = self.roots.iter().copied().map(u32::from).collect();
275 roots.write_into(target);
276
277 let mut mast_node_entries = Vec::with_capacity(self.nodes.len());
278 let mut external_digests = Vec::new();
279 let mut node_hashes = Vec::new();
280
281 for mast_node in self.nodes.iter() {
282 let ops_offset = if let MastNode::Block(basic_block) = mast_node {
283 basic_block_data_builder.encode_basic_block(basic_block)
284 } else {
285 0
286 };
287
288 mast_node_entries.push(MastNodeEntry::new(mast_node, ops_offset));
289 if mast_node.is_external() {
290 external_digests.push(mast_node.digest());
291 } else if !hashless {
292 node_hashes.push(mast_node.digest());
293 }
294 }
295
296 let basic_block_data = basic_block_data_builder.finalize();
297 basic_block_data.write_into(target);
298
299 for mast_node_entry in mast_node_entries {
300 mast_node_entry.write_into(target);
301 }
302
303 for digest in external_digests {
304 digest.write_into(target);
305 }
306
307 if !hashless {
308 for digest in node_hashes {
309 digest.write_into(target);
310 }
311 }
312
313 self.advice_map.write_into(target);
314
315 // Serialize DebugInfo only if not stripped
316 if !stripped {
317 self.debug_info.write_into(target);
318 }
319 }
320}
321
322pub(super) fn write_stripped_into<W: ByteWriter>(forest: &MastForest, target: &mut W) {
323 forest.write_into_with_options(target, true, false);
324}
325
326pub(super) fn write_hashless_into<W: ByteWriter>(forest: &MastForest, target: &mut W) {
327 forest.write_into_with_options(target, true, true);
328}
329
330pub(super) fn stripped_size_hint(forest: &MastForest) -> usize {
331 serialized_size_hint(forest, true, false)
332}
333
334fn serialized_size_hint(forest: &MastForest, stripped: bool, hashless: bool) -> usize {
335 let node_count = forest.nodes.len();
336 let external_count = forest.nodes.iter().filter(|node| node.is_external()).count();
337 let non_external_count = node_count - external_count;
338
339 let mut size = MAGIC.len() + 1 + VERSION.len();
340 size += non_external_count.get_size_hint();
341 size += external_count.get_size_hint();
342
343 let roots_len = forest.roots.len();
344 size += roots_len.get_size_hint();
345 size += roots_len * size_of::<u32>();
346
347 let mut basic_block_len = 0usize;
348 for node in forest.nodes.iter() {
349 if let MastNode::Block(block) = node {
350 basic_block_len += basic_block_data_len(block);
351 }
352 }
353 size += basic_block_len.get_size_hint() + basic_block_len;
354
355 size += node_count * MastNodeEntry::SERIALIZED_SIZE;
356 size += external_count * crate::Word::min_serialized_size();
357 if !hashless {
358 size += non_external_count * crate::Word::min_serialized_size();
359 }
360 size += forest.advice_map.serialized_size_hint();
361 if !stripped {
362 size += forest.debug_info.get_size_hint();
363 }
364
365 size
366}
367
368/// A zero-copy structural view over serialized MAST forest bytes.
369///
370/// This view accepts full, stripped, and hashless payloads. It validates the header and the
371/// fixed-width structural sections needed for random access, but it does not fully materialize the
372/// forest.
373///
374/// Use this when callers need random access to roots or node metadata without deserializing the
375/// full forest. For strict trusted deserialization, use
376/// [`crate::mast::MastForest::read_from_bytes`].
377///
378/// # Examples
379///
380/// ```
381/// use miden_core::{
382/// mast::{BasicBlockNodeBuilder, MastForest, MastForestContributor, SerializedMastForest},
383/// operations::Operation,
384/// };
385///
386/// let mut forest = MastForest::new();
387/// let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new())
388/// .add_to_forest(&mut forest)
389/// .unwrap();
390/// forest.make_root(block_id);
391///
392/// let mut bytes = Vec::new();
393/// forest.write_stripped(&mut bytes);
394///
395/// let view = SerializedMastForest::new(&bytes).unwrap();
396/// assert_eq!(view.node_count(), forest.nodes().len());
397/// assert!(view.node_info_at(0).is_ok());
398/// ```
399#[derive(Debug)]
400pub struct SerializedMastForest<'a> {
401 bytes: &'a [u8],
402 flags: WireFlags,
403 layout: ForestLayout,
404 resolved: OnceLockCompat<Result<ResolvedSerializedForest<'a>, DeserializationError>>,
405}
406
407impl<'a> SerializedMastForest<'a> {
408 /// Creates a new view from serialized bytes.
409 ///
410 /// The input may be full, stripped, or hashless format.
411 /// Structural parsing is delegated to the same single-pass scanner used by reader-based
412 /// deserialization paths.
413 ///
414 /// This constructor is layout-oriented: it validates the header and sections needed for
415 /// node/roots/random-access metadata only. It does not validate or fully parse trailing
416 /// `AdviceMap` / `DebugInfo` payloads.
417 ///
418 /// Treat this as a trusted inspection API, not as an untrusted-validation entry point. It is
419 /// appropriate for local tools that need random access over serialized structure, but callers
420 /// handling adversarial bytes should use [`crate::mast::UntrustedMastForest`] instead.
421 ///
422 /// In particular, this constructor does **not** protect callers from untrusted-input concerns
423 /// that are enforced by [`crate::mast::UntrustedMastForest::validate`]. It does not:
424 /// - verify that serialized non-external digests match the structure they describe
425 /// - check topological ordering / forward-reference constraints
426 /// - validate basic-block batch invariants or procedure-name-root consistency
427 /// - fully parse or validate trailing `AdviceMap` / `DebugInfo` payloads
428 /// - provide a bounded-work guarantee for hashless digest-backed inspection
429 ///
430 /// For strict full-payload validation, use
431 /// [`crate::mast::MastForest::read_from_bytes`].
432 ///
433 /// Wire flags describe serializer intent, not trust policy. This constructor accepts
434 /// hashless payloads for inspection even though trusted [`crate::mast::MastForest`]
435 /// deserialization rejects them.
436 ///
437 /// Digest lookup follows the wire layout:
438 /// - If the internal-hash section is present, non-external node digests are read from it.
439 /// - If the internal-hash section is absent, the first digest-backed access rebuilds all
440 /// non-external node digests from structure and caches them.
441 /// - External node digests are always read from the external-digest section.
442 ///
443 /// # Examples
444 ///
445 /// ```
446 /// use miden_core::{
447 /// mast::{BasicBlockNodeBuilder, MastForest, MastForestContributor, SerializedMastForest},
448 /// operations::Operation,
449 /// };
450 ///
451 /// let mut forest = MastForest::new();
452 /// let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new())
453 /// .add_to_forest(&mut forest)
454 /// .unwrap();
455 /// forest.make_root(block_id);
456 ///
457 /// let mut bytes = Vec::new();
458 /// forest.write_stripped(&mut bytes);
459 ///
460 /// let view = SerializedMastForest::new(&bytes).unwrap();
461 /// assert_eq!(view.node_count(), 1);
462 /// ```
463 pub fn new(bytes: &'a [u8]) -> Result<Self, DeserializationError> {
464 let mut reader = SliceReader::new(bytes);
465 let mut scanner = TrackingReader::new(&mut reader);
466 let (flags, layout) = read_header_and_scan_layout(&mut scanner, true)?;
467
468 Ok(Self {
469 bytes,
470 flags,
471 layout,
472 resolved: OnceLockCompat::new(),
473 })
474 }
475
476 /// Returns the number of nodes in the serialized forest.
477 pub fn node_count(&self) -> usize {
478 self.layout.node_count
479 }
480
481 /// Returns `true` when the wire header says that the internal-hash section is omitted.
482 pub fn is_hashless(&self) -> bool {
483 self.flags.is_hashless()
484 }
485
486 /// Returns `true` when the wire header says that the `DebugInfo` section is omitted.
487 pub fn is_stripped(&self) -> bool {
488 self.flags.is_stripped()
489 }
490
491 /// Returns the number of procedure roots in the serialized forest.
492 pub fn procedure_root_count(&self) -> usize {
493 self.layout.roots_count
494 }
495
496 /// Returns the procedure root id at the specified index.
497 ///
498 /// Returns an error if `index >= self.procedure_root_count()`.
499 pub fn procedure_root_at(&self, index: usize) -> Result<MastNodeId, DeserializationError> {
500 self.layout.read_procedure_root_at(self.bytes, index)
501 }
502
503 /// Returns the `MastNodeInfo` at the specified index.
504 ///
505 /// On hashless payloads, this may trigger the first digest-backed access and therefore the
506 /// one-time rebuild of the non-external digest table described in [`Self::node_digest_at`].
507 ///
508 /// Returns an error if `index >= self.node_count()`.
509 ///
510 /// # Examples
511 ///
512 /// ```
513 /// use miden_core::{
514 /// mast::{BasicBlockNodeBuilder, MastForest, MastForestContributor, SerializedMastForest},
515 /// operations::Operation,
516 /// };
517 ///
518 /// let mut forest = MastForest::new();
519 /// let block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new())
520 /// .add_to_forest(&mut forest)
521 /// .unwrap();
522 /// forest.make_root(block_id);
523 ///
524 /// let mut bytes = Vec::new();
525 /// forest.write_stripped(&mut bytes);
526 ///
527 /// let view = SerializedMastForest::new(&bytes).unwrap();
528 /// assert!(view.node_info_at(0).is_ok());
529 /// ```
530 pub fn node_info_at(&self, index: usize) -> Result<MastNodeInfo, DeserializationError> {
531 Ok(MastNodeInfo::from_entry(
532 self.node_entry_at(index)?,
533 self.node_digest_at(index)?,
534 ))
535 }
536
537 /// Returns the fixed-width structural node entry at the specified index.
538 ///
539 /// Returns an error if `index >= self.node_count()`.
540 pub fn node_entry_at(&self, index: usize) -> Result<MastNodeEntry, DeserializationError> {
541 self.layout.read_node_entry_at(self.bytes, index)
542 }
543
544 /// Returns the digest for the node at the specified index.
545 ///
546 /// This resolves digests lazily. If the internal-hash section is absent, the first
547 /// digest-backed access rebuilds all non-external node digests and caches them.
548 ///
549 /// This means the hashless cost model is:
550 /// - `node_count()`, `node_entry_at()`, and `procedure_root_at()` stay cheap and structural
551 /// - the first `node_digest_at()` / `node_info_at()` call does `O(node_count)` digest rebuild
552 /// work and allocates the cached digest table
553 /// - later digest lookups reuse that cache
554 ///
555 /// Returns an error if `index >= self.node_count()`.
556 pub fn node_digest_at(&self, index: usize) -> Result<crate::Word, DeserializationError> {
557 self.resolved()?.node_digest_at(index)
558 }
559
560 fn resolved(&self) -> Result<&ResolvedSerializedForest<'a>, DeserializationError> {
561 self.resolved
562 .get_or_init(|| ResolvedSerializedForest::new(self.bytes, self.layout))
563 .as_ref()
564 .map_err(Clone::clone)
565 }
566}
567
568impl MastForestView for SerializedMastForest<'_> {
569 fn node_count(&self) -> usize {
570 SerializedMastForest::node_count(self)
571 }
572
573 fn node_entry_at(&self, index: usize) -> Result<MastNodeEntry, DeserializationError> {
574 SerializedMastForest::node_entry_at(self, index)
575 }
576
577 fn node_digest_at(&self, index: usize) -> Result<crate::Word, DeserializationError> {
578 SerializedMastForest::node_digest_at(self, index)
579 }
580
581 fn procedure_root_count(&self) -> usize {
582 SerializedMastForest::procedure_root_count(self)
583 }
584
585 fn procedure_root_at(&self, index: usize) -> Result<MastNodeId, DeserializationError> {
586 SerializedMastForest::procedure_root_at(self, index)
587 }
588}
589
590impl MastForestView for MastForest {
591 fn node_count(&self) -> usize {
592 self.nodes.len()
593 }
594
595 fn node_entry_at(&self, index: usize) -> Result<MastNodeEntry, DeserializationError> {
596 let node = self.nodes.as_slice().get(index).ok_or_else(|| {
597 DeserializationError::InvalidValue(format!("node index {index} out of bounds"))
598 })?;
599 let ops_offset = if matches!(node, MastNode::Block(_)) {
600 basic_block_offset_for_node_index(self.nodes.as_slice(), index)?
601 } else {
602 0
603 };
604
605 Ok(MastNodeEntry::new(node, ops_offset))
606 }
607
608 fn node_digest_at(&self, index: usize) -> Result<crate::Word, DeserializationError> {
609 self.nodes.as_slice().get(index).map(MastNode::digest).ok_or_else(|| {
610 DeserializationError::InvalidValue(format!("node index {index} out of bounds"))
611 })
612 }
613
614 fn procedure_root_count(&self) -> usize {
615 self.roots.len()
616 }
617
618 fn procedure_root_at(&self, index: usize) -> Result<MastNodeId, DeserializationError> {
619 self.roots.get(index).copied().ok_or_else(|| {
620 DeserializationError::InvalidValue(format!(
621 "root index {} out of bounds for {} roots",
622 index,
623 self.roots.len()
624 ))
625 })
626 }
627}
628
629// TEST HELPERS
630// ================================================================================================
631
632#[cfg(test)]
633impl SerializedMastForest<'_> {
634 fn advice_map_offset(&self) -> Result<usize, DeserializationError> {
635 self.layout.advice_map_offset()
636 }
637
638 fn node_entry_offset(&self) -> usize {
639 self.layout.node_entry_offset
640 }
641
642 fn node_hash_offset(&self) -> Option<usize> {
643 self.layout.node_hash_offset
644 }
645
646 fn digest_slot_at(&self, index: usize) -> usize {
647 self.resolved()
648 .expect("digest slots should be readable for a valid serialized view")
649 .digest_slot_at(index)
650 }
651}
652
653#[cfg(test)]
654fn read_u8_at(bytes: &[u8], offset: &mut usize) -> Result<u8, DeserializationError> {
655 read_slice_at(bytes, offset, 1).map(|slice| slice[0])
656}
657
658#[cfg(test)]
659fn read_array_at<const N: usize>(
660 bytes: &[u8],
661 offset: &mut usize,
662) -> Result<[u8; N], DeserializationError> {
663 let slice = read_slice_at(bytes, offset, N)?;
664 let mut result = [0u8; N];
665 result.copy_from_slice(slice);
666 Ok(result)
667}
668
669#[cfg(test)]
670fn read_slice_at<'a>(
671 bytes: &'a [u8],
672 offset: &mut usize,
673 len: usize,
674) -> Result<&'a [u8], DeserializationError> {
675 let end = offset
676 .checked_add(len)
677 .ok_or_else(|| DeserializationError::InvalidValue("offset overflow".to_string()))?;
678 if end > bytes.len() {
679 return Err(DeserializationError::UnexpectedEOF);
680 }
681 let slice = &bytes[*offset..end];
682 *offset = end;
683 Ok(slice)
684}
685
686// NOTE: Mirrors ByteReader::read_usize (vint64) decoding to preserve wire compatibility.
687#[cfg(test)]
688fn read_usize_at(bytes: &[u8], offset: &mut usize) -> Result<usize, DeserializationError> {
689 if *offset >= bytes.len() {
690 return Err(DeserializationError::UnexpectedEOF);
691 }
692 let first_byte = bytes[*offset];
693 let length = first_byte.trailing_zeros() as usize + 1;
694
695 let result = if length == 9 {
696 let _marker = read_u8_at(bytes, offset)?;
697 let value = read_array_at::<8>(bytes, offset)?;
698 u64::from_le_bytes(value)
699 } else {
700 let mut encoded = [0u8; 8];
701 let value = read_slice_at(bytes, offset, length)?;
702 encoded[..length].copy_from_slice(value);
703 u64::from_le_bytes(encoded) >> length
704 };
705
706 if result > usize::MAX as u64 {
707 return Err(DeserializationError::InvalidValue(format!(
708 "Encoded value must be less than {}, but {} was provided",
709 usize::MAX,
710 result
711 )));
712 }
713
714 Ok(result as usize)
715}
716
717impl Deserializable for MastForest {
718 fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
719 let (_flags, forest) = decode_from_reader(source, false)?;
720 forest.into_materialized()
721 }
722
723 fn read_from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
724 let budget = bytes.len().saturating_mul(TRUSTED_BYTE_READ_BUDGET_MULTIPLIER);
725 let mut reader = BudgetedReader::new(SliceReader::new(bytes), budget);
726 Self::read_from(&mut reader)
727 }
728}
729
730impl super::UntrustedMastForest {
731 pub(super) fn into_materialized(self) -> Result<MastForest, DeserializationError> {
732 let resolved = if let Some(allocation_budget) = self.remaining_allocation_budget {
733 ResolvedSerializedForest::new_with_allocation_budget(
734 &self.bytes,
735 self.layout,
736 allocation_budget,
737 )?
738 } else {
739 ResolvedSerializedForest::new(&self.bytes, self.layout)?
740 };
741
742 resolved.materialize(self.advice_map, self.debug_info)
743 }
744}
745
746pub(super) fn read_untrusted_with_flags<R: ByteReader>(
747 source: &mut R,
748) -> Result<(super::UntrustedMastForest, u8), DeserializationError> {
749 let (flags, forest) = decode_from_reader(source, true)?;
750 log_untrusted_overspecification(flags);
751 Ok((forest, flags.bits()))
752}
753
754pub(super) fn read_untrusted_with_flags_and_allocation_budget<R: ByteReader>(
755 source: &mut R,
756 allocation_budget: usize,
757) -> Result<(super::UntrustedMastForest, u8), DeserializationError> {
758 let (flags, forest) = decode_from_reader_inner(source, true, Some(allocation_budget))?;
759 log_untrusted_overspecification(flags);
760 Ok((forest, flags.bits()))
761}
762
763fn log_untrusted_overspecification(flags: WireFlags) {
764 if !flags.is_hashless() {
765 log::error!(
766 "UntrustedMastForest expected HASHLESS input; supplied artifact includes wire node hashes, and validation will recompute them and require them to match"
767 );
768 }
769
770 if !flags.is_stripped() {
771 log::error!(
772 "UntrustedMastForest expected STRIPPED input; supplied artifact includes DebugInfo and other optional payloads over the wire"
773 );
774 }
775}
776
777fn decode_from_reader<R: ByteReader>(
778 source: &mut R,
779 allow_hashless: bool,
780) -> Result<(WireFlags, super::UntrustedMastForest), DeserializationError> {
781 decode_from_reader_inner(source, allow_hashless, None)
782}
783
784fn decode_from_reader_inner<R: ByteReader>(
785 source: &mut R,
786 allow_hashless: bool,
787 mut remaining_allocation_budget: Option<usize>,
788) -> Result<(WireFlags, super::UntrustedMastForest), DeserializationError> {
789 let mut recording = TrackingReader::new_recording(source);
790 let (flags, layout) = read_header_and_scan_layout(&mut recording, allow_hashless)?;
791 debug_assert_eq!(recording.offset(), layout.advice_map_offset);
792
793 let advice_map = AdviceMap::read_from(&mut recording)?;
794 let debug_info = if flags.is_stripped() {
795 if let Some(allocation_budget) = &mut remaining_allocation_budget {
796 reserve_allocation::<usize>(
797 allocation_budget,
798 layout.node_count.checked_add(1).ok_or_else(|| {
799 DeserializationError::InvalidValue("debug-info node count overflow".into())
800 })?,
801 "empty debug-info scaffolding",
802 )?;
803 }
804 super::DebugInfo::empty_for_nodes(layout.node_count)
805 } else {
806 super::DebugInfo::read_from(&mut recording)?
807 };
808
809 Ok((
810 flags,
811 super::UntrustedMastForest {
812 bytes: recording.into_recorded(),
813 layout,
814 advice_map,
815 debug_info,
816 remaining_allocation_budget,
817 },
818 ))
819}
820
821pub(super) fn reserve_allocation<T>(
822 remaining_budget: &mut usize,
823 count: usize,
824 label: &str,
825) -> Result<(), DeserializationError> {
826 let bytes_needed = count
827 .checked_mul(size_of::<T>())
828 .ok_or_else(|| DeserializationError::InvalidValue(format!("{label} size overflow")))?;
829 if bytes_needed > *remaining_budget {
830 return Err(DeserializationError::InvalidValue(format!(
831 "{label} requires {bytes_needed} bytes, exceeding the remaining untrusted allocation budget of {} bytes",
832 *remaining_budget
833 )));
834 }
835
836 *remaining_budget -= bytes_needed;
837 Ok(())
838}
839
840pub(super) fn default_untrusted_allocation_budget(bytes_len: usize) -> usize {
841 bytes_len.saturating_mul(DEFAULT_UNTRUSTED_ALLOCATION_BUDGET_MULTIPLIER)
842}
843
844// UNTRUSTED DESERIALIZATION
845// ================================================================================================
846
847impl Deserializable for super::UntrustedMastForest {
848 /// Deserializes an [`super::UntrustedMastForest`] from a byte reader.
849 ///
850 /// Note: This method does not apply budgeting. For untrusted input, prefer using
851 /// [`read_from_bytes`](Self::read_from_bytes) which applies budgeted deserialization.
852 ///
853 /// After deserialization, callers should use [`super::UntrustedMastForest::validate()`]
854 /// to verify structural integrity and recompute all node hashes before using
855 /// the forest.
856 fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
857 read_untrusted_with_flags(source).map(|(forest, _flags)| forest)
858 }
859
860 /// Deserializes an [`super::UntrustedMastForest`] from bytes using budgeted deserialization.
861 ///
862 /// This method uses the default untrusted wire/validation budget from
863 /// [`super::UntrustedMastForest::read_from_bytes`].
864 ///
865 /// After deserialization, callers should use [`super::UntrustedMastForest::validate()`]
866 /// to verify structural integrity and recompute all node hashes before using
867 /// the forest.
868 fn read_from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
869 super::UntrustedMastForest::read_from_bytes(bytes)
870 }
871}