miden_core/mast/mod.rs
1//! MAST forest: a collection of procedures represented as Merkle trees.
2//!
3//! # Deserializing from untrusted sources
4//!
5//! When loading a `MastForest` from bytes you don't fully trust (network, user upload, etc.),
6//! use [`UntrustedMastForest`] instead of calling `MastForest::read_from_bytes` directly:
7//!
8//! ```ignore
9//! use miden_core::mast::UntrustedMastForest;
10//!
11//! let forest = UntrustedMastForest::read_from_bytes(&bytes)?
12//! .validate()?;
13//! ```
14//!
15//! For maximum protection against denial-of-service attacks from malicious input, use
16//! [`UntrustedMastForest::read_from_bytes_with_budget`] which limits memory consumption:
17//!
18//! ```ignore
19//! use miden_core::mast::UntrustedMastForest;
20//!
21//! // Budget limits pre-allocation sizes and total bytes consumed
22//! let forest = UntrustedMastForest::read_from_bytes_with_budget(&bytes, bytes.len())?
23//! .validate()?;
24//! ```
25//!
26//! This recomputes all node hashes and checks structural invariants before returning a usable
27//! `MastForest`. Direct deserialization via `MastForest::read_from_bytes` trusts the serialized
28//! hashes and should only be used for data from trusted sources (e.g. compiled locally).
29
30use alloc::{
31 collections::{BTreeMap, BTreeSet},
32 string::String,
33 sync::Arc,
34 vec::Vec,
35};
36use core::{
37 fmt,
38 ops::{Index, IndexMut},
39};
40
41use miden_utils_sync::OnceLockCompat;
42#[cfg(feature = "serde")]
43use serde::{Deserialize, Serialize};
44
45mod node;
46#[cfg(any(test, feature = "arbitrary"))]
47pub use node::arbitrary;
48pub(crate) use node::collect_immediate_placements;
49pub use node::{
50 BasicBlockNode, BasicBlockNodeBuilder, CallNode, CallNodeBuilder, DecoratedOpLink,
51 DecoratorOpLinkIterator, DecoratorStore, DynNode, DynNodeBuilder, ExternalNode,
52 ExternalNodeBuilder, JoinNode, JoinNodeBuilder, LoopNode, LoopNodeBuilder,
53 MastForestContributor, MastNode, MastNodeBuilder, MastNodeExt, OP_BATCH_SIZE, OP_GROUP_SIZE,
54 OpBatch, OperationOrDecorator, SplitNode, SplitNodeBuilder,
55};
56
57use crate::{
58 Felt, LexicographicWord, Word,
59 advice::AdviceMap,
60 operations::{AssemblyOp, DebugVarInfo, Decorator},
61 serde::{
62 BudgetedReader, ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
63 SliceReader,
64 },
65 utils::{Idx, IndexVec, hash_string_to_word},
66};
67
68mod debuginfo;
69pub use debuginfo::{
70 AsmOpIndexError, DebugInfo, DebugVarId, DecoratedLinks, DecoratedLinksIter,
71 DecoratorIndexError, NodeToDecoratorIds, OpToAsmOpId, OpToDebugVarIds, OpToDecoratorIds,
72};
73
74mod serialization;
75
76mod merger;
77pub(crate) use merger::MastForestMerger;
78pub use merger::MastForestRootMap;
79
80mod multi_forest_node_iterator;
81pub(crate) use multi_forest_node_iterator::*;
82
83mod node_fingerprint;
84pub use node_fingerprint::{DecoratorFingerprint, MastNodeFingerprint};
85
86mod node_builder_utils;
87pub use node_builder_utils::build_node_with_remapped_ids;
88
89#[cfg(test)]
90mod tests;
91
92// MAST FOREST
93// ================================================================================================
94
95/// Represents one or more procedures, represented as a collection of [`MastNode`]s.
96///
97/// A [`MastForest`] does not have an entrypoint, and hence is not executable. A
98/// [`crate::program::Program`] can be built from a [`MastForest`] to specify an entrypoint.
99#[derive(Clone, Debug, Default)]
100pub struct MastForest {
101 /// All of the nodes local to the trees comprising the MAST forest.
102 nodes: IndexVec<MastNodeId, MastNode>,
103
104 /// Roots of procedures defined within this MAST forest.
105 roots: Vec<MastNodeId>,
106
107 /// Advice map to be loaded into the VM prior to executing procedures from this MAST forest.
108 advice_map: AdviceMap,
109
110 /// Debug information including decorators and error codes.
111 /// Always present (as per issue #1821), but can be empty for stripped builds.
112 debug_info: DebugInfo,
113
114 /// Cached commitment to this MAST forest (commitment to all roots).
115 /// This is computed lazily on first access and invalidated on any mutation.
116 commitment_cache: OnceLockCompat<Word>,
117}
118
119// ------------------------------------------------------------------------------------------------
120/// Constructors
121impl MastForest {
122 /// Creates a new empty [`MastForest`].
123 pub fn new() -> Self {
124 Self {
125 nodes: IndexVec::new(),
126 roots: Vec::new(),
127 advice_map: AdviceMap::default(),
128 debug_info: DebugInfo::new(),
129 commitment_cache: OnceLockCompat::new(),
130 }
131 }
132}
133
134// ------------------------------------------------------------------------------------------------
135/// Equality implementations
136impl PartialEq for MastForest {
137 fn eq(&self, other: &Self) -> bool {
138 // Compare all fields except commitment_cache, which is derived data
139 self.nodes == other.nodes
140 && self.roots == other.roots
141 && self.advice_map == other.advice_map
142 && self.debug_info == other.debug_info
143 }
144}
145
146impl Eq for MastForest {}
147
148// ------------------------------------------------------------------------------------------------
149/// State mutators
150impl MastForest {
151 /// The maximum number of nodes that can be stored in a single MAST forest.
152 const MAX_NODES: usize = (1 << 30) - 1;
153
154 /// Marks the given [`MastNodeId`] as being the root of a procedure.
155 ///
156 /// If the specified node is already marked as a root, this will have no effect.
157 ///
158 /// # Panics
159 /// - if `new_root_id`'s internal index is larger than the number of nodes in this forest (i.e.
160 /// clearly doesn't belong to this MAST forest).
161 pub fn make_root(&mut self, new_root_id: MastNodeId) {
162 assert!(new_root_id.to_usize() < self.nodes.len());
163
164 if !self.roots.contains(&new_root_id) {
165 self.roots.push(new_root_id);
166 // Invalidate the cached commitment since we modified the roots
167 self.commitment_cache.take();
168 }
169 }
170
171 /// Removes all nodes in the provided set from the MAST forest. The nodes MUST be orphaned (i.e.
172 /// have no parent). Otherwise, this parent's reference is considered "dangling" after the
173 /// removal (i.e. will point to an incorrect node after the removal), and this removal operation
174 /// would result in an invalid [`MastForest`].
175 ///
176 /// It also returns the map from old node IDs to new node IDs. Any [`MastNodeId`] used in
177 /// reference to the old [`MastForest`] should be remapped using this map.
178 pub fn remove_nodes(
179 &mut self,
180 nodes_to_remove: &BTreeSet<MastNodeId>,
181 ) -> BTreeMap<MastNodeId, MastNodeId> {
182 if nodes_to_remove.is_empty() {
183 return BTreeMap::new();
184 }
185
186 let old_nodes = core::mem::replace(&mut self.nodes, IndexVec::new());
187 let old_root_ids = core::mem::take(&mut self.roots);
188 let (retained_nodes, id_remappings) = remove_nodes(old_nodes.into_inner(), nodes_to_remove);
189
190 self.remap_and_add_nodes(retained_nodes, &id_remappings);
191 self.remap_and_add_roots(old_root_ids, &id_remappings);
192
193 // Remap the asm_op_storage to use the new node IDs
194 self.debug_info.remap_asm_op_storage(&id_remappings);
195
196 // Invalidate the cached commitment since we modified the forest structure
197 self.commitment_cache.take();
198
199 id_remappings
200 }
201
202 /// Clears all [`DebugInfo`] from this forest: decorators, error codes, and procedure names.
203 ///
204 /// ```
205 /// # use miden_core::mast::MastForest;
206 /// let mut forest = MastForest::new();
207 /// forest.clear_debug_info();
208 /// assert!(forest.decorators().is_empty());
209 /// ```
210 pub fn clear_debug_info(&mut self) {
211 self.debug_info = DebugInfo::empty_for_nodes(self.nodes.len());
212 }
213
214 /// Compacts the forest by merging duplicate nodes.
215 ///
216 /// This operation performs node deduplication by merging the forest with itself.
217 /// The method assumes that debug info has already been cleared if that is desired.
218 /// This method consumes the forest and returns a new compacted forest.
219 ///
220 /// The process works by:
221 /// 1. Merging the forest with itself to deduplicate identical nodes
222 /// 2. Updating internal node references and remappings
223 /// 3. Returning the compacted forest and root map
224 ///
225 /// # Examples
226 ///
227 /// ```rust
228 /// use miden_core::mast::MastForest;
229 ///
230 /// let mut forest = MastForest::new();
231 /// // Add nodes to the forest
232 ///
233 /// // First clear debug info if needed
234 /// forest.clear_debug_info();
235 ///
236 /// // Then compact the forest (consumes the original)
237 /// let (compacted_forest, root_map) = forest.compact();
238 ///
239 /// // compacted_forest is now compacted with duplicate nodes merged
240 /// ```
241 pub fn compact(self) -> (MastForest, MastForestRootMap) {
242 // Merge with itself to deduplicate nodes
243 // Note: This cannot fail for a self-merge under normal conditions.
244 // The only possible failures (TooManyNodes, TooManyDecorators) would require the
245 // original forest to be at capacity limits, at which point compaction wouldn't help.
246 MastForest::merge([&self])
247 .expect("Failed to compact MastForest: this should never happen during self-merge")
248 }
249
250 /// Merges all `forests` into a new [`MastForest`].
251 ///
252 /// Merging two forests means combining all their constituent parts, i.e. [`MastNode`]s,
253 /// [`Decorator`]s and roots. During this process, any duplicate or
254 /// unreachable nodes are removed. Additionally, [`MastNodeId`]s of nodes as well as
255 /// [`DecoratorId`]s of decorators may change and references to them are remapped to their new
256 /// location.
257 ///
258 /// For example, consider this representation of a forest's nodes with all of these nodes being
259 /// roots:
260 ///
261 /// ```text
262 /// [Block(foo), Block(bar)]
263 /// ```
264 ///
265 /// If we merge another forest into it:
266 ///
267 /// ```text
268 /// [Block(bar), Call(0)]
269 /// ```
270 ///
271 /// then we would expect this forest:
272 ///
273 /// ```text
274 /// [Block(foo), Block(bar), Call(1)]
275 /// ```
276 ///
277 /// - The `Call` to the `bar` block was remapped to its new index (now 1, previously 0).
278 /// - The `Block(bar)` was deduplicated any only exists once in the merged forest.
279 ///
280 /// The function also returns a vector of [`MastForestRootMap`]s, whose length equals the number
281 /// of passed `forests`. The indices in the vector correspond to the ones in `forests`. The map
282 /// of a given forest contains the new locations of its roots in the merged forest. To
283 /// illustrate, the above example would return a vector of two maps:
284 ///
285 /// ```text
286 /// vec![{0 -> 0, 1 -> 1}
287 /// {0 -> 1, 1 -> 2}]
288 /// ```
289 ///
290 /// - The root locations of the original forest are unchanged.
291 /// - For the second forest, the `bar` block has moved from index 0 to index 1 in the merged
292 /// forest, and the `Call` has moved from index 1 to 2.
293 ///
294 /// If any forest being merged contains an `External(qux)` node and another forest contains a
295 /// node whose digest is `qux`, then the external node will be replaced with the `qux` node,
296 /// which is effectively deduplication. Decorators are ignored when it comes to merging
297 /// External nodes. This means that an External node with decorators may be replaced by a node
298 /// without decorators or vice versa.
299 pub fn merge<'forest>(
300 forests: impl IntoIterator<Item = &'forest MastForest>,
301 ) -> Result<(MastForest, MastForestRootMap), MastForestError> {
302 MastForestMerger::merge(forests)
303 }
304}
305
306// ------------------------------------------------------------------------------------------------
307/// Helpers
308impl MastForest {
309 /// Adds all provided nodes to the internal set of nodes, remapping all [`MastNodeId`]
310 /// references in those nodes.
311 ///
312 /// # Panics
313 /// - Panics if the internal set of nodes is not empty.
314 fn remap_and_add_nodes(
315 &mut self,
316 nodes_to_add: Vec<MastNode>,
317 id_remappings: &BTreeMap<MastNodeId, MastNodeId>,
318 ) {
319 assert!(self.nodes.is_empty());
320 // extract decorator information from the nodes by converting them into builders
321 let node_builders =
322 nodes_to_add.into_iter().map(|node| node.to_builder(self)).collect::<Vec<_>>();
323
324 // Clear decorator storage after extracting builders (builders contain decorator data)
325 self.debug_info.clear_mappings();
326
327 // Add each node to the new MAST forest, making sure to rewrite any outdated internal
328 // `MastNodeId`s
329 for live_node_builder in node_builders {
330 live_node_builder.remap_children(id_remappings).add_to_forest(self).unwrap();
331 }
332 }
333
334 /// Remaps and adds all old root ids to the internal set of roots.
335 ///
336 /// # Panics
337 /// - Panics if the internal set of roots is not empty.
338 fn remap_and_add_roots(
339 &mut self,
340 old_root_ids: Vec<MastNodeId>,
341 id_remappings: &BTreeMap<MastNodeId, MastNodeId>,
342 ) {
343 assert!(self.roots.is_empty());
344
345 for old_root_id in old_root_ids {
346 let new_root_id = id_remappings.get(&old_root_id).copied().unwrap_or(old_root_id);
347 self.make_root(new_root_id);
348 }
349 }
350}
351
352/// Returns the set of nodes that are live, as well as the mapping from "old ID" to "new ID" for all
353/// live nodes.
354fn remove_nodes(
355 mast_nodes: Vec<MastNode>,
356 nodes_to_remove: &BTreeSet<MastNodeId>,
357) -> (Vec<MastNode>, BTreeMap<MastNodeId, MastNodeId>) {
358 // Note: this allows us to safely use `usize as u32`, guaranteeing that it won't wrap around.
359 assert!(mast_nodes.len() < u32::MAX as usize);
360
361 let mut retained_nodes = Vec::with_capacity(mast_nodes.len());
362 let mut id_remappings = BTreeMap::new();
363
364 for (old_node_index, old_node) in mast_nodes.into_iter().enumerate() {
365 let old_node_id: MastNodeId = MastNodeId(old_node_index as u32);
366
367 if !nodes_to_remove.contains(&old_node_id) {
368 let new_node_id: MastNodeId = MastNodeId(retained_nodes.len() as u32);
369 id_remappings.insert(old_node_id, new_node_id);
370
371 retained_nodes.push(old_node);
372 }
373 }
374
375 (retained_nodes, id_remappings)
376}
377
378// ------------------------------------------------------------------------------------------------
379/// Public accessors
380impl MastForest {
381 /// Returns the [`MastNode`] associated with the provided [`MastNodeId`] if valid, or else
382 /// `None`.
383 ///
384 /// This is the fallible version of indexing (e.g. `mast_forest[node_id]`).
385 #[inline(always)]
386 pub fn get_node_by_id(&self, node_id: MastNodeId) -> Option<&MastNode> {
387 self.nodes.get(node_id)
388 }
389
390 /// Returns the [`MastNodeId`] of the procedure associated with a given digest, if any.
391 #[inline(always)]
392 pub fn find_procedure_root(&self, digest: Word) -> Option<MastNodeId> {
393 self.roots.iter().find(|&&root_id| self[root_id].digest() == digest).copied()
394 }
395
396 /// Returns true if a node with the specified ID is a root of a procedure in this MAST forest.
397 pub fn is_procedure_root(&self, node_id: MastNodeId) -> bool {
398 self.roots.contains(&node_id)
399 }
400
401 /// Returns an iterator over the digests of all procedures in this MAST forest.
402 pub fn procedure_digests(&self) -> impl Iterator<Item = Word> + '_ {
403 self.roots.iter().map(|&root_id| self[root_id].digest())
404 }
405
406 /// Returns an iterator over the digests of local procedures in this MAST forest.
407 ///
408 /// A local procedure is defined as a procedure which is not a single external node.
409 pub fn local_procedure_digests(&self) -> impl Iterator<Item = Word> + '_ {
410 self.roots.iter().filter_map(|&root_id| {
411 let node = &self[root_id];
412 if node.is_external() { None } else { Some(node.digest()) }
413 })
414 }
415
416 /// Returns an iterator over the IDs of the procedures in this MAST forest.
417 pub fn procedure_roots(&self) -> &[MastNodeId] {
418 &self.roots
419 }
420
421 /// Returns the number of procedures in this MAST forest.
422 pub fn num_procedures(&self) -> u32 {
423 self.roots
424 .len()
425 .try_into()
426 .expect("MAST forest contains more than 2^32 procedures.")
427 }
428
429 /// Returns the [Word] representing the content hash of a subset of [`MastNodeId`]s.
430 ///
431 /// # Panics
432 /// This function panics if any `node_ids` is not a node of this forest.
433 pub fn compute_nodes_commitment<'a>(
434 &self,
435 node_ids: impl IntoIterator<Item = &'a MastNodeId>,
436 ) -> Word {
437 let mut digests: Vec<Word> = node_ids.into_iter().map(|&id| self[id].digest()).collect();
438 digests.sort_unstable_by_key(|word| LexicographicWord::from(*word));
439 miden_crypto::hash::poseidon2::Poseidon2::merge_many(&digests)
440 }
441
442 /// Returns the commitment to this MAST forest.
443 ///
444 /// The commitment is computed as the sequential hash of all procedure roots in the forest.
445 /// This value is cached after the first computation and reused for subsequent calls,
446 /// unless the forest is mutated (in which case the cache is invalidated).
447 ///
448 /// The commitment uniquely identifies the forest's structure, as each root's digest
449 /// transitively includes all of its descendants. Therefore, a commitment to all roots
450 /// is a commitment to the entire forest.
451 pub fn commitment(&self) -> Word {
452 *self.commitment_cache.get_or_init(|| self.compute_nodes_commitment(&self.roots))
453 }
454
455 /// Returns the number of nodes in this MAST forest.
456 pub fn num_nodes(&self) -> u32 {
457 self.nodes.len() as u32
458 }
459
460 /// Returns the underlying nodes in this MAST forest.
461 pub fn nodes(&self) -> &[MastNode] {
462 self.nodes.as_slice()
463 }
464
465 pub fn advice_map(&self) -> &AdviceMap {
466 &self.advice_map
467 }
468
469 pub fn advice_map_mut(&mut self) -> &mut AdviceMap {
470 &mut self.advice_map
471 }
472
473 // SERIALIZATION
474 // --------------------------------------------------------------------------------------------
475
476 /// Serializes this MastForest without debug information.
477 ///
478 /// This produces a smaller output by omitting decorators, error codes, and procedure names.
479 /// The resulting bytes can be deserialized with the standard [`Deserializable`] impl,
480 /// which auto-detects the format and creates an empty [`DebugInfo`].
481 ///
482 /// Use this for production builds where debug info is not needed.
483 ///
484 /// # Example
485 ///
486 /// ```
487 /// use miden_core::{mast::MastForest, serde::Serializable};
488 ///
489 /// let forest = MastForest::new();
490 ///
491 /// // Full serialization (with debug info)
492 /// let full_bytes = forest.to_bytes();
493 ///
494 /// // Stripped serialization (without debug info)
495 /// let mut stripped_bytes = Vec::new();
496 /// forest.write_stripped(&mut stripped_bytes);
497 ///
498 /// // Both can be deserialized the same way
499 /// // let restored = MastForest::read_from_bytes(&stripped_bytes).unwrap();
500 /// ```
501 pub fn write_stripped<W: ByteWriter>(&self, target: &mut W) {
502 use serialization::StrippedMastForest;
503 StrippedMastForest(self).write_into(target);
504 }
505}
506
507// ------------------------------------------------------------------------------------------------
508/// Decorator methods
509impl MastForest {
510 /// Returns a list of all decorators contained in this [MastForest].
511 pub fn decorators(&self) -> &[Decorator] {
512 self.debug_info.decorators()
513 }
514
515 /// Returns the [`Decorator`] associated with the provided [`DecoratorId`] if valid, or else
516 /// `None`.
517 ///
518 /// This is the fallible version of indexing (e.g. `mast_forest[decorator_id]`).
519 #[inline]
520 pub fn decorator_by_id(&self, decorator_id: DecoratorId) -> Option<&Decorator> {
521 self.debug_info.decorator(decorator_id)
522 }
523
524 /// Returns decorator indices for a specific operation within a node.
525 ///
526 /// This is the primary accessor for reading decorators from the centralized storage.
527 /// Returns a slice of decorator IDs for the given operation.
528 #[inline]
529 pub(crate) fn decorator_indices_for_op(
530 &self,
531 node_id: MastNodeId,
532 local_op_idx: usize,
533 ) -> &[DecoratorId] {
534 self.debug_info.decorators_for_operation(node_id, local_op_idx)
535 }
536
537 /// Returns an iterator over decorator references for a specific operation within a node.
538 ///
539 /// This is the preferred method for accessing decorators, as it provides direct
540 /// references to the decorator objects.
541 #[inline]
542 pub fn decorators_for_op<'a>(
543 &'a self,
544 node_id: MastNodeId,
545 local_op_idx: usize,
546 ) -> impl Iterator<Item = &'a Decorator> + 'a {
547 self.decorator_indices_for_op(node_id, local_op_idx)
548 .iter()
549 .map(move |&decorator_id| &self[decorator_id])
550 }
551
552 /// Returns the decorators to be executed before this node is executed.
553 #[inline]
554 pub fn before_enter_decorators(&self, node_id: MastNodeId) -> &[DecoratorId] {
555 self.debug_info.before_enter_decorators(node_id)
556 }
557
558 /// Returns the decorators to be executed after this node is executed.
559 #[inline]
560 pub fn after_exit_decorators(&self, node_id: MastNodeId) -> &[DecoratorId] {
561 self.debug_info.after_exit_decorators(node_id)
562 }
563
564 /// Returns decorator links for a node, including operation indices.
565 ///
566 /// This provides a flattened view of all decorators for a node with their operation indices.
567 #[inline]
568 pub(crate) fn decorator_links_for_node<'a>(
569 &'a self,
570 node_id: MastNodeId,
571 ) -> Result<DecoratedLinks<'a>, DecoratorIndexError> {
572 self.debug_info.decorator_links_for_node(node_id)
573 }
574
575 /// Adds a decorator to the forest, and returns the associated [`DecoratorId`].
576 pub fn add_decorator(&mut self, decorator: Decorator) -> Result<DecoratorId, MastForestError> {
577 self.debug_info.add_decorator(decorator)
578 }
579
580 /// Adds a debug variable to the forest, and returns the associated [`DebugVarId`].
581 pub fn add_debug_var(
582 &mut self,
583 debug_var: DebugVarInfo,
584 ) -> Result<DebugVarId, MastForestError> {
585 self.debug_info.add_debug_var(debug_var)
586 }
587
588 /// Returns debug variable IDs for a specific operation within a node.
589 pub fn debug_vars_for_operation(
590 &self,
591 node_id: MastNodeId,
592 local_op_idx: usize,
593 ) -> &[DebugVarId] {
594 self.debug_info.debug_vars_for_operation(node_id, local_op_idx)
595 }
596
597 /// Returns the debug variable with the given ID, if it exists.
598 pub fn debug_var(&self, debug_var_id: DebugVarId) -> Option<&DebugVarInfo> {
599 self.debug_info.debug_var(debug_var_id)
600 }
601
602 /// Adds decorator IDs for a node to the storage.
603 ///
604 /// Used when building nodes for efficient decorator access during execution.
605 ///
606 /// # Note
607 /// This method does not validate decorator IDs immediately. Validation occurs during
608 /// operations that need to access the actual decorator data (e.g., merging, serialization).
609 #[inline]
610 pub(crate) fn register_node_decorators(
611 &mut self,
612 node_id: MastNodeId,
613 before_enter: &[DecoratorId],
614 after_exit: &[DecoratorId],
615 ) {
616 self.debug_info.register_node_decorators(node_id, before_enter, after_exit);
617 }
618
619 /// Returns the [`AssemblyOp`] associated with a node.
620 ///
621 /// For basic block nodes with a `target_op_idx`, returns the AssemblyOp for that operation.
622 /// For other nodes or when no `target_op_idx` is provided, returns the first AssemblyOp.
623 pub fn get_assembly_op(
624 &self,
625 node_id: MastNodeId,
626 target_op_idx: Option<usize>,
627 ) -> Option<&AssemblyOp> {
628 match target_op_idx {
629 Some(op_idx) => self.debug_info.asm_op_for_operation(node_id, op_idx),
630 None => self.debug_info.first_asm_op_for_node(node_id),
631 }
632 }
633}
634
635// ------------------------------------------------------------------------------------------------
636/// Validation methods
637impl MastForest {
638 /// Validates that all BasicBlockNodes in this forest satisfy the core invariants:
639 /// 1. Power-of-two number of groups in each batch
640 /// 2. No operation group ends with an operation requiring an immediate value
641 /// 3. The last operation group in a batch cannot contain operations requiring immediate values
642 /// 4. OpBatch structural consistency (num_groups <= BATCH_SIZE, group size <= GROUP_SIZE,
643 /// indptr integrity, bounds checking)
644 ///
645 /// This addresses the gap created by PR 2094, where padding NOOPs are now inserted
646 /// at assembly time rather than dynamically during execution, and adds comprehensive
647 /// structural validation to prevent deserialization-time panics.
648 pub fn validate(&self) -> Result<(), MastForestError> {
649 // Validate basic block batch invariants
650 for (node_id_idx, node) in self.nodes.iter().enumerate() {
651 let node_id =
652 MastNodeId::new_unchecked(node_id_idx.try_into().expect("too many nodes"));
653 if let MastNode::Block(basic_block) = node {
654 basic_block.validate_batch_invariants().map_err(|error_msg| {
655 MastForestError::InvalidBatchPadding(node_id, error_msg)
656 })?;
657 }
658 }
659
660 // Validate that all procedure name digests correspond to procedure roots in the forest
661 for (digest, _) in self.debug_info.procedure_names() {
662 if self.find_procedure_root(digest).is_none() {
663 return Err(MastForestError::InvalidProcedureNameDigest(digest));
664 }
665 }
666
667 Ok(())
668 }
669
670 /// Validates topological ordering of nodes and recomputes all node hashes.
671 ///
672 /// This method iterates through all nodes in index order, verifying:
673 /// 1. All child references point to nodes with smaller indices (topological order)
674 /// 2. Each node's recomputed digest matches its stored digest
675 ///
676 /// # Errors
677 ///
678 /// Returns `MastForestError::ForwardReference` if any node references a child that
679 /// appears later in the forest.
680 ///
681 /// Returns `MastForestError::HashMismatch` if any node's recomputed digest doesn't
682 /// match its stored digest.
683 fn validate_node_hashes(&self) -> Result<(), MastForestError> {
684 use crate::chiplets::hasher;
685
686 /// Checks that child_id references a node that appears before node_id in topological order.
687 fn check_no_forward_ref(
688 node_id: MastNodeId,
689 child_id: MastNodeId,
690 ) -> Result<(), MastForestError> {
691 if child_id.0 >= node_id.0 {
692 return Err(MastForestError::ForwardReference(node_id, child_id));
693 }
694 Ok(())
695 }
696
697 for (node_idx, node) in self.nodes.iter().enumerate() {
698 let node_id = MastNodeId::new_unchecked(node_idx as u32);
699
700 // Check topological ordering and compute expected digest
701 let computed_digest = match node {
702 MastNode::Block(block) => {
703 let op_groups: Vec<Felt> =
704 block.op_batches().iter().flat_map(|batch| *batch.groups()).collect();
705 hasher::hash_elements(&op_groups)
706 },
707 MastNode::Join(join) => {
708 let left_id = join.first();
709 let right_id = join.second();
710 check_no_forward_ref(node_id, left_id)?;
711 check_no_forward_ref(node_id, right_id)?;
712
713 let left_digest = self.nodes[left_id].digest();
714 let right_digest = self.nodes[right_id].digest();
715 hasher::merge_in_domain(&[left_digest, right_digest], JoinNode::DOMAIN)
716 },
717 MastNode::Split(split) => {
718 let true_id = split.on_true();
719 let false_id = split.on_false();
720 check_no_forward_ref(node_id, true_id)?;
721 check_no_forward_ref(node_id, false_id)?;
722
723 let true_digest = self.nodes[true_id].digest();
724 let false_digest = self.nodes[false_id].digest();
725 hasher::merge_in_domain(&[true_digest, false_digest], SplitNode::DOMAIN)
726 },
727 MastNode::Loop(loop_node) => {
728 let body_id = loop_node.body();
729 check_no_forward_ref(node_id, body_id)?;
730
731 let body_digest = self.nodes[body_id].digest();
732 hasher::merge_in_domain(&[body_digest, Word::default()], LoopNode::DOMAIN)
733 },
734 MastNode::Call(call) => {
735 let callee_id = call.callee();
736 check_no_forward_ref(node_id, callee_id)?;
737
738 let callee_digest = self.nodes[callee_id].digest();
739 let domain = if call.is_syscall() {
740 CallNode::SYSCALL_DOMAIN
741 } else {
742 CallNode::CALL_DOMAIN
743 };
744 hasher::merge_in_domain(&[callee_digest, Word::default()], domain)
745 },
746 MastNode::Dyn(dyn_node) => {
747 if dyn_node.is_dyncall() {
748 DynNode::DYNCALL_DEFAULT_DIGEST
749 } else {
750 DynNode::DYN_DEFAULT_DIGEST
751 }
752 },
753 MastNode::External(_) => {
754 // External nodes have externally-provided digests that cannot be recomputed
755 continue;
756 },
757 };
758
759 let stored_digest = node.digest();
760 if computed_digest != stored_digest {
761 return Err(MastForestError::HashMismatch {
762 node_id,
763 expected: stored_digest,
764 computed: computed_digest,
765 });
766 }
767 }
768
769 Ok(())
770 }
771}
772
773// ------------------------------------------------------------------------------------------------
774/// Error message methods
775impl MastForest {
776 /// Given an error code as a Felt, resolves it to its corresponding error message.
777 pub fn resolve_error_message(&self, code: Felt) -> Option<Arc<str>> {
778 let key = code.as_canonical_u64();
779 self.debug_info.error_message(key)
780 }
781
782 /// Registers an error message in the MAST Forest and returns the corresponding error code as a
783 /// Felt.
784 pub fn register_error(&mut self, msg: Arc<str>) -> Felt {
785 let code: Felt = error_code_from_msg(&msg);
786 // we use u64 as keys for the map
787 self.debug_info.insert_error_code(code.as_canonical_u64(), msg);
788 code
789 }
790}
791
792// ------------------------------------------------------------------------------------------------
793/// Procedure name methods
794impl MastForest {
795 /// Returns the procedure name for the given MAST root digest, if present.
796 pub fn procedure_name(&self, digest: &Word) -> Option<&str> {
797 self.debug_info.procedure_name(digest)
798 }
799
800 /// Returns an iterator over all (digest, name) pairs of procedure names.
801 pub fn procedure_names(&self) -> impl Iterator<Item = (Word, &Arc<str>)> {
802 self.debug_info.procedure_names()
803 }
804
805 /// Inserts a procedure name for the given MAST root digest.
806 pub fn insert_procedure_name(&mut self, digest: Word, name: Arc<str>) {
807 assert!(
808 self.find_procedure_root(digest).is_some(),
809 "attempted to insert procedure name for digest that is not a procedure root"
810 );
811 self.debug_info.insert_procedure_name(digest, name);
812 }
813
814 /// Returns a reference to the debug info for this forest.
815 pub fn debug_info(&self) -> &DebugInfo {
816 &self.debug_info
817 }
818
819 /// Returns a mutable reference to the debug info.
820 ///
821 /// This is intended for use by the assembler to register AssemblyOps and other debug
822 /// information during compilation.
823 pub fn debug_info_mut(&mut self) -> &mut DebugInfo {
824 &mut self.debug_info
825 }
826}
827
828// TEST HELPERS
829// ================================================================================================
830
831#[cfg(test)]
832impl MastForest {
833 /// Returns all decorators for a given node as a vector of (position, DecoratorId) tuples.
834 ///
835 /// This helper method combines before_enter, operation-indexed, and after_exit decorators
836 /// into a single collection, which is useful for testing decorator positions and ordering.
837 ///
838 /// **Performance Warning**: This method performs multiple allocations through collect() calls
839 /// and should not be relied upon for performance-critical code. It is intended for testing
840 /// only.
841 pub fn all_decorators(&self, node_id: MastNodeId) -> Vec<(usize, DecoratorId)> {
842 let node = &self[node_id];
843
844 // For non-basic blocks, just get before_enter and after_exit decorators at position 0
845 if !node.is_basic_block() {
846 let before_enter_decorators: Vec<_> = self
847 .before_enter_decorators(node_id)
848 .iter()
849 .map(|&deco_id| (0, deco_id))
850 .collect();
851
852 let after_exit_decorators: Vec<_> = self
853 .after_exit_decorators(node_id)
854 .iter()
855 .map(|&deco_id| (1, deco_id))
856 .collect();
857
858 return [before_enter_decorators, after_exit_decorators].concat();
859 }
860
861 // For basic blocks, we need to handle operation-indexed decorators with proper positioning
862 let block = node.unwrap_basic_block();
863
864 // Before-enter decorators are at position 0
865 let before_enter_decorators: Vec<_> = self
866 .before_enter_decorators(node_id)
867 .iter()
868 .map(|&deco_id| (0, deco_id))
869 .collect();
870
871 // Operation-indexed decorators with their actual positions
872 let op_indexed_decorators: Vec<_> =
873 self.decorator_links_for_node(node_id).unwrap().into_iter().collect();
874
875 // After-exit decorators are positioned after all operations
876 let after_exit_decorators: Vec<_> = self
877 .after_exit_decorators(node_id)
878 .iter()
879 .map(|&deco_id| (block.num_operations() as usize, deco_id))
880 .collect();
881
882 [before_enter_decorators, op_indexed_decorators, after_exit_decorators].concat()
883 }
884}
885
886// MAST FOREST INDEXING
887// ------------------------------------------------------------------------------------------------
888
889impl Index<MastNodeId> for MastForest {
890 type Output = MastNode;
891
892 #[inline(always)]
893 fn index(&self, node_id: MastNodeId) -> &Self::Output {
894 &self.nodes[node_id]
895 }
896}
897
898impl IndexMut<MastNodeId> for MastForest {
899 #[inline(always)]
900 fn index_mut(&mut self, node_id: MastNodeId) -> &mut Self::Output {
901 &mut self.nodes[node_id]
902 }
903}
904
905impl Index<DecoratorId> for MastForest {
906 type Output = Decorator;
907
908 #[inline(always)]
909 fn index(&self, decorator_id: DecoratorId) -> &Self::Output {
910 self.debug_info.decorator(decorator_id).expect("DecoratorId out of bounds")
911 }
912}
913
914impl IndexMut<DecoratorId> for MastForest {
915 #[inline(always)]
916 fn index_mut(&mut self, decorator_id: DecoratorId) -> &mut Self::Output {
917 self.debug_info.decorator_mut(decorator_id).expect("DecoratorId out of bounds")
918 }
919}
920
921// MAST NODE ID
922// ================================================================================================
923
924/// An opaque handle to a [`MastNode`] in some [`MastForest`]. It is the responsibility of the user
925/// to use a given [`MastNodeId`] with the corresponding [`MastForest`].
926///
927/// Note that the [`MastForest`] does *not* ensure that equal [`MastNode`]s have equal
928/// [`MastNodeId`] handles. Hence, [`MastNodeId`] equality must not be used to test for equality of
929/// the underlying [`MastNode`].
930#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
931#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
932#[cfg_attr(feature = "serde", serde(transparent))]
933#[cfg_attr(all(feature = "arbitrary", test), miden_test_serde_macros::serde_test)]
934pub struct MastNodeId(u32);
935
936/// Operations that mutate a MAST often produce this mapping between old and new NodeIds.
937pub type Remapping = BTreeMap<MastNodeId, MastNodeId>;
938
939impl MastNodeId {
940 /// Returns a new `MastNodeId` with the provided inner value, or an error if the provided
941 /// `value` is greater than the number of nodes in the forest.
942 ///
943 /// For use in deserialization.
944 pub fn from_u32_safe(
945 value: u32,
946 mast_forest: &MastForest,
947 ) -> Result<Self, DeserializationError> {
948 Self::from_u32_with_node_count(value, mast_forest.nodes.len())
949 }
950
951 /// Returns a new [`MastNodeId`] with the provided `node_id`, or an error if `node_id` is
952 /// greater than the number of nodes in the [`MastForest`] for which this ID is being
953 /// constructed.
954 pub fn from_usize_safe(
955 node_id: usize,
956 mast_forest: &MastForest,
957 ) -> Result<Self, DeserializationError> {
958 let node_id: u32 = node_id.try_into().map_err(|_| {
959 DeserializationError::InvalidValue(format!(
960 "node id '{node_id}' does not fit into a u32"
961 ))
962 })?;
963 MastNodeId::from_u32_safe(node_id, mast_forest)
964 }
965
966 /// Returns a new [`MastNodeId`] from the given `value` without checking its validity.
967 pub fn new_unchecked(value: u32) -> Self {
968 Self(value)
969 }
970
971 /// Returns a new [`MastNodeId`] with the provided `id`, or an error if `id` is greater or equal
972 /// to `node_count`. The `node_count` is the total number of nodes in the [`MastForest`] for
973 /// which this ID is being constructed.
974 ///
975 /// This function can be used when deserializing an id whose corresponding node is not yet in
976 /// the forest and [`Self::from_u32_safe`] would fail. For instance, when deserializing the ids
977 /// referenced by the Join node in this forest:
978 ///
979 /// ```text
980 /// [Join(1, 2), Block(foo), Block(bar)]
981 /// ```
982 ///
983 /// Since it is less safe than [`Self::from_u32_safe`] and usually not needed it is not public.
984 pub(super) fn from_u32_with_node_count(
985 id: u32,
986 node_count: usize,
987 ) -> Result<Self, DeserializationError> {
988 if (id as usize) < node_count {
989 Ok(Self(id))
990 } else {
991 Err(DeserializationError::InvalidValue(format!(
992 "Invalid deserialized MAST node ID '{id}', but {node_count} is the number of nodes in the forest",
993 )))
994 }
995 }
996
997 /// Remap the NodeId to its new position using the given [`Remapping`].
998 pub fn remap(&self, remapping: &Remapping) -> Self {
999 *remapping.get(self).unwrap_or(self)
1000 }
1001}
1002
1003impl From<u32> for MastNodeId {
1004 fn from(value: u32) -> Self {
1005 MastNodeId::new_unchecked(value)
1006 }
1007}
1008
1009impl Idx for MastNodeId {}
1010
1011impl From<MastNodeId> for u32 {
1012 fn from(value: MastNodeId) -> Self {
1013 value.0
1014 }
1015}
1016
1017impl fmt::Display for MastNodeId {
1018 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1019 write!(f, "MastNodeId({})", self.0)
1020 }
1021}
1022
1023#[cfg(any(test, feature = "arbitrary"))]
1024impl proptest::prelude::Arbitrary for MastNodeId {
1025 type Parameters = ();
1026
1027 fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
1028 use proptest::prelude::*;
1029 any::<u32>().prop_map(MastNodeId).boxed()
1030 }
1031
1032 type Strategy = proptest::prelude::BoxedStrategy<Self>;
1033}
1034
1035// ITERATOR
1036
1037/// Iterates over all the nodes a root depends on, in pre-order. The iteration can include other
1038/// roots in the same forest.
1039pub struct SubtreeIterator<'a> {
1040 forest: &'a MastForest,
1041 discovered: Vec<MastNodeId>,
1042 unvisited: Vec<MastNodeId>,
1043}
1044impl<'a> SubtreeIterator<'a> {
1045 pub fn new(root: &MastNodeId, forest: &'a MastForest) -> Self {
1046 let discovered = vec![];
1047 let unvisited = vec![*root];
1048 SubtreeIterator { forest, discovered, unvisited }
1049 }
1050}
1051impl Iterator for SubtreeIterator<'_> {
1052 type Item = MastNodeId;
1053 fn next(&mut self) -> Option<MastNodeId> {
1054 while let Some(id) = self.unvisited.pop() {
1055 let node = &self.forest[id];
1056 if !node.has_children() {
1057 return Some(id);
1058 } else {
1059 self.discovered.push(id);
1060 node.append_children_to(&mut self.unvisited);
1061 }
1062 }
1063 self.discovered.pop()
1064 }
1065}
1066
1067// DECORATOR ID
1068// ================================================================================================
1069
1070/// An opaque handle to a [`Decorator`] in some [`MastForest`]. It is the responsibility of the user
1071/// to use a given [`DecoratorId`] with the corresponding [`MastForest`].
1072#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1073#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1074#[cfg_attr(feature = "serde", serde(transparent))]
1075pub struct DecoratorId(u32);
1076
1077impl DecoratorId {
1078 /// Returns a new `DecoratorId` with the provided inner value, or an error if the provided
1079 /// `value` is greater than the number of nodes in the forest.
1080 ///
1081 /// For use in deserialization.
1082 pub fn from_u32_safe(
1083 value: u32,
1084 mast_forest: &MastForest,
1085 ) -> Result<Self, DeserializationError> {
1086 Self::from_u32_bounded(value, mast_forest.debug_info.num_decorators())
1087 }
1088
1089 /// Returns a new `DecoratorId` with the provided inner value, or an error if the provided
1090 /// `value` is greater than or equal to `bound`.
1091 ///
1092 /// For use in deserialization when the bound is known without needing the full MastForest.
1093 pub fn from_u32_bounded(value: u32, bound: usize) -> Result<Self, DeserializationError> {
1094 if (value as usize) < bound {
1095 Ok(Self(value))
1096 } else {
1097 Err(DeserializationError::InvalidValue(format!(
1098 "Invalid deserialized MAST decorator id '{}', but allows only {} decorators",
1099 value, bound,
1100 )))
1101 }
1102 }
1103
1104 /// Creates a new [`DecoratorId`] without checking its validity.
1105 pub(crate) fn new_unchecked(value: u32) -> Self {
1106 Self(value)
1107 }
1108}
1109
1110impl From<u32> for DecoratorId {
1111 fn from(value: u32) -> Self {
1112 DecoratorId::new_unchecked(value)
1113 }
1114}
1115
1116impl Idx for DecoratorId {}
1117
1118impl From<DecoratorId> for u32 {
1119 fn from(value: DecoratorId) -> Self {
1120 value.0
1121 }
1122}
1123
1124impl fmt::Display for DecoratorId {
1125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1126 write!(f, "DecoratorId({})", self.0)
1127 }
1128}
1129
1130impl Serializable for DecoratorId {
1131 fn write_into<W: ByteWriter>(&self, target: &mut W) {
1132 self.0.write_into(target)
1133 }
1134}
1135
1136impl Deserializable for DecoratorId {
1137 fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
1138 let value = u32::read_from(source)?;
1139 Ok(Self(value))
1140 }
1141}
1142
1143// ASM OP ID
1144// ================================================================================================
1145
1146/// Unique identifier for an [`AssemblyOp`] within a [`MastForest`].
1147///
1148/// Unlike decorators (which are executed at runtime), AssemblyOps are metadata
1149/// used only for error context and debugging tools.
1150#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1151#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1152#[cfg_attr(feature = "serde", serde(transparent))]
1153pub struct AsmOpId(u32);
1154
1155impl AsmOpId {
1156 /// Creates a new [`AsmOpId`] with the provided inner value.
1157 pub const fn new(value: u32) -> Self {
1158 Self(value)
1159 }
1160}
1161
1162impl From<u32> for AsmOpId {
1163 fn from(value: u32) -> Self {
1164 AsmOpId::new(value)
1165 }
1166}
1167
1168impl Idx for AsmOpId {}
1169
1170impl From<AsmOpId> for u32 {
1171 fn from(id: AsmOpId) -> Self {
1172 id.0
1173 }
1174}
1175
1176impl fmt::Display for AsmOpId {
1177 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1178 write!(f, "AsmOpId({})", self.0)
1179 }
1180}
1181
1182impl Serializable for AsmOpId {
1183 fn write_into<W: ByteWriter>(&self, target: &mut W) {
1184 self.0.write_into(target)
1185 }
1186}
1187
1188impl Deserializable for AsmOpId {
1189 fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
1190 let value = u32::read_from(source)?;
1191 Ok(Self(value))
1192 }
1193}
1194
1195/// Derives an error code from an error message by hashing the message and returning the 0th element
1196/// of the resulting [`Word`].
1197pub fn error_code_from_msg(msg: impl AsRef<str>) -> Felt {
1198 // hash the message and return 0th felt of the resulting Word
1199 hash_string_to_word(msg.as_ref())[0]
1200}
1201
1202// MAST FOREST ERROR
1203// ================================================================================================
1204
1205/// Represents the types of errors that can occur when dealing with MAST forest.
1206#[derive(Debug, thiserror::Error, PartialEq, Eq)]
1207pub enum MastForestError {
1208 #[error("MAST forest decorator count exceeds the maximum of {} decorators", u32::MAX)]
1209 TooManyDecorators,
1210 #[error("MAST forest node count exceeds the maximum of {} nodes", MastForest::MAX_NODES)]
1211 TooManyNodes,
1212 #[error("node id {0} is greater than or equal to forest length {1}")]
1213 NodeIdOverflow(MastNodeId, usize),
1214 #[error("decorator id {0} is greater than or equal to decorator count {1}")]
1215 DecoratorIdOverflow(DecoratorId, usize),
1216 #[error("basic block cannot be created from an empty list of operations")]
1217 EmptyBasicBlock,
1218 #[error(
1219 "decorator root of child with node id {0} is missing but is required for fingerprint computation"
1220 )]
1221 ChildFingerprintMissing(MastNodeId),
1222 #[error("advice map key {0} already exists when merging forests")]
1223 AdviceMapKeyCollisionOnMerge(Word),
1224 #[error("decorator storage error: {0}")]
1225 DecoratorError(DecoratorIndexError),
1226 #[error("digest is required for deserialization")]
1227 DigestRequiredForDeserialization,
1228 #[error("invalid batch in basic block node {0:?}: {1}")]
1229 InvalidBatchPadding(MastNodeId, String),
1230 #[error("procedure name references digest that is not a procedure root: {0:?}")]
1231 InvalidProcedureNameDigest(Word),
1232 #[error(
1233 "node {0:?} references child {1:?} which comes after it in the forest (forward reference)"
1234 )]
1235 ForwardReference(MastNodeId, MastNodeId),
1236 #[error("hash mismatch for node {node_id:?}: expected {expected:?}, computed {computed:?}")]
1237 HashMismatch {
1238 node_id: MastNodeId,
1239 expected: Word,
1240 computed: Word,
1241 },
1242}
1243
1244// Custom serde implementations for MastForest that handle linked decorators properly
1245// by delegating to the existing miden-crypto serialization which already handles
1246// the conversion between linked and owned decorator formats.
1247#[cfg(feature = "serde")]
1248impl serde::Serialize for MastForest {
1249 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1250 where
1251 S: serde::Serializer,
1252 {
1253 // Use the existing miden-crypto serialization which already handles linked decorators
1254 let bytes = Serializable::to_bytes(self);
1255 serializer.serialize_bytes(&bytes)
1256 }
1257}
1258
1259#[cfg(feature = "serde")]
1260impl<'de> serde::Deserialize<'de> for MastForest {
1261 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1262 where
1263 D: serde::Deserializer<'de>,
1264 {
1265 // Deserialize bytes, then use miden-crypto Deserializable
1266 let bytes = Vec::<u8>::deserialize(deserializer)?;
1267 let mut slice_reader = SliceReader::new(&bytes);
1268 Deserializable::read_from(&mut slice_reader).map_err(serde::de::Error::custom)
1269 }
1270}
1271
1272// UNTRUSTED MAST FOREST
1273// ================================================================================================
1274
1275/// A [`MastForest`] deserialized from untrusted input that has not yet been validated.
1276///
1277/// This type wraps a `MastForest` that was deserialized from bytes but has not had its
1278/// node hashes verified. Before using the forest, callers must call [`validate()`](Self::validate)
1279/// to verify structural integrity and recompute all node hashes.
1280///
1281/// # Usage
1282///
1283/// ```ignore
1284/// // Deserialize from untrusted bytes
1285/// let untrusted = UntrustedMastForest::read_from_bytes(&bytes)?;
1286///
1287/// // Validate structure and hashes
1288/// let forest = untrusted.validate()?;
1289///
1290/// // Now safe to use
1291/// let root = forest.procedure_roots()[0];
1292/// ```
1293///
1294/// # Security
1295///
1296/// This type exists to provide type-level safety for untrusted deserialization. The validation
1297/// performed by [`validate()`](Self::validate) includes:
1298///
1299/// 1. **Structural validation**: Checks that basic block batch invariants are satisfied and
1300/// procedure names reference valid roots.
1301/// 2. **Topological ordering**: Verifies that all node references point to nodes that appear
1302/// earlier in the forest (no forward references).
1303/// 3. **Hash recomputation**: Recomputes the digest for every node and verifies it matches the
1304/// stored digest.
1305#[derive(Debug, Clone)]
1306pub struct UntrustedMastForest(MastForest);
1307
1308impl UntrustedMastForest {
1309 /// Validates the forest by checking structural invariants and recomputing all node hashes.
1310 ///
1311 /// This method performs a complete validation of the deserialized forest:
1312 ///
1313 /// 1. Validates structural invariants (batch padding, procedure names)
1314 /// 2. Validates topological ordering (no forward references)
1315 /// 3. Recomputes all node hashes and compares against stored digests
1316 ///
1317 /// # Returns
1318 ///
1319 /// - `Ok(MastForest)` if validation succeeds
1320 /// - `Err(MastForestError)` with details about the first validation failure
1321 ///
1322 /// # Errors
1323 ///
1324 /// Returns an error if:
1325 /// - Any basic block has invalid batch structure ([`MastForestError::InvalidBatchPadding`])
1326 /// - Any procedure name references a non-root digest
1327 /// ([`MastForestError::InvalidProcedureNameDigest`])
1328 /// - Any node references a child that appears later in the forest
1329 /// ([`MastForestError::ForwardReference`])
1330 /// - Any node's recomputed hash doesn't match its stored digest
1331 /// ([`MastForestError::HashMismatch`])
1332 pub fn validate(self) -> Result<MastForest, MastForestError> {
1333 let forest = self.0;
1334
1335 // Step 1: Validate structural invariants (existing validate() checks)
1336 forest.validate()?;
1337
1338 // Step 2: Validate topological ordering and recompute hashes
1339 forest.validate_node_hashes()?;
1340
1341 Ok(forest)
1342 }
1343
1344 /// Deserializes an [`UntrustedMastForest`] from bytes.
1345 ///
1346 /// This method uses a [`BudgetedReader`] with a budget equal to the input size to protect
1347 /// against denial-of-service attacks from malicious input.
1348 ///
1349 /// For stricter limits, use
1350 /// [`read_from_bytes_with_budget`](Self::read_from_bytes_with_budget) with a custom budget.
1351 ///
1352 /// # Example
1353 ///
1354 /// ```ignore
1355 /// // Read from untrusted source
1356 /// let untrusted = UntrustedMastForest::read_from_bytes(&bytes)?;
1357 ///
1358 /// // Validate before use
1359 /// let forest = untrusted.validate()?;
1360 /// ```
1361 pub fn read_from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
1362 Self::read_from_bytes_with_budget(bytes, bytes.len())
1363 }
1364
1365 /// Deserializes an [`UntrustedMastForest`] from bytes with a byte budget.
1366 ///
1367 /// This method uses a [`BudgetedReader`] to limit memory consumption during deserialization,
1368 /// protecting against denial-of-service attacks from malicious input that claims to contain
1369 /// an excessive number of elements.
1370 ///
1371 /// # Arguments
1372 ///
1373 /// * `bytes` - The serialized forest bytes
1374 /// * `budget` - Maximum bytes to consume during deserialization. Set this to `bytes.len()` for
1375 /// typical use cases, or lower to enforce stricter limits.
1376 ///
1377 /// # Example
1378 ///
1379 /// ```ignore
1380 /// // Read from untrusted source with budget equal to input size
1381 /// let untrusted = UntrustedMastForest::read_from_bytes_with_budget(&bytes, bytes.len())?;
1382 ///
1383 /// // Validate before use
1384 /// let forest = untrusted.validate()?;
1385 /// ```
1386 ///
1387 /// # Security
1388 ///
1389 /// The budget limits:
1390 /// - Pre-allocation sizes when deserializing collections (via `max_alloc`)
1391 /// - Total bytes consumed during deserialization
1392 ///
1393 /// This prevents attacks where malicious input claims an unrealistic number of elements
1394 /// (e.g., `len = 2^60`), causing excessive memory allocation before any data is read.
1395 pub fn read_from_bytes_with_budget(
1396 bytes: &[u8],
1397 budget: usize,
1398 ) -> Result<Self, DeserializationError> {
1399 let mut reader = BudgetedReader::new(SliceReader::new(bytes), budget);
1400 Self::read_from(&mut reader)
1401 }
1402}