miden_core/mast/sparse.rs
1use alloc::{
2 collections::{BTreeMap, BTreeSet},
3 sync::Arc,
4 vec::Vec,
5};
6
7use miden_utils_indexing::newtype_id;
8
9use crate::{
10 Word,
11 advice::AdviceMap,
12 mast::{ExecutableMastForest, MastForest, MastNode, MastNodeExt, MastNodeId},
13};
14
15// MAST FOREST ID
16// ================================================================================================
17
18// `MastForestId` is an opaque handle to a [`MastForest`] in some forest store such as the
19// `TraceGenerationContext::mast_forest_store`. It is not a content-derived or stable identity for a
20// forest, and must not be compared or reused across stores or trace contexts. It is analogous to
21// `MastNodeId`, which is meaningful only within one forest's node store.
22newtype_id!(MastForestId);
23
24// SPARSE MAST FOREST
25// ================================================================================================
26
27/// A sparse replay view over a single source [`MastForest`]'s [`MastNodeId`] space, retaining only
28/// the nodes visited during execution.
29///
30/// Unlike [`MastForest`], which stores nodes contiguously in an `IndexVec`, a [`SparseMastForest`]
31/// uses a `BTreeMap` so that it can preserve the original [`MastNodeId`]s of the source forest
32/// while omitting nodes that were not visited. A [`SparseMastForest`] is not an independent forest
33/// shape: it shares its source forest's ID space, and is intended to back re-execution of the same
34/// program. Lookups by the original [`MastNodeId`] continue to resolve to the correct
35/// [`MastNode`]s.
36///
37/// In addition to the visited nodes, a [`SparseMastForest`] may also carry digest-only entries for
38/// nodes that were referenced during execution but never actually entered (e.g. the not-taken
39/// branch of a split, or the children of a join that only need to contribute their digests to the
40/// parent's trace row). These entries let trace generation read the digest without having to copy
41/// the full child node, and they make accidental entry into a pruned node a clean
42/// `get_node_by_id` miss rather than a partially-populated node.
43#[derive(Debug)]
44pub struct SparseMastForest {
45 /// Subset of the original forest's nodes, keyed by their original [`MastNodeId`].
46 nodes: BTreeMap<MastNodeId, MastNode>,
47
48 /// Digests of nodes that were referenced (but not entered) during execution, keyed by their
49 /// original [`MastNodeId`]. Nodes present in [`Self::nodes`] are excluded from this map: a
50 /// full-node entry implicitly carries its own digest via [`MastNodeExt::digest`].
51 digests: BTreeMap<MastNodeId, Word>,
52
53 /// Total number of nodes in the source [`MastForest`] from which this sparse forest was
54 /// built. Note that this is *not* `nodes.len()` — it is the upper bound on the original
55 /// [`MastNodeId`] space, preserved so that callers materializing dense-shaped state (e.g.
56 /// allocating an `IndexVec` keyed by [`MastNodeId`]) know its required size.
57 num_nodes: usize,
58
59 /// Roots of procedures defined within the original MAST forest.
60 roots: Vec<MastNodeId>,
61
62 /// Advice map to be loaded into the VM prior to executing procedures from this MAST forest.
63 advice_map: AdviceMap,
64
65 /// Cached commitment to the original MAST forest (i.e. a commitment to all roots).
66 commitment_cache: Word,
67}
68
69impl SparseMastForest {
70 /// Returns the underlying nodes of this sparse forest, keyed by their original
71 /// [`MastNodeId`].
72 pub fn nodes(&self) -> &BTreeMap<MastNodeId, MastNode> {
73 &self.nodes
74 }
75
76 /// Returns the total number of nodes in the source [`MastForest`] from which this sparse
77 /// forest was built. This is *not* the number of visited (i.e. present) nodes — see
78 /// [`Self::nodes`] for that.
79 pub fn num_nodes(&self) -> usize {
80 self.num_nodes
81 }
82
83 /// Returns the roots of procedures defined within this sparse forest.
84 pub fn procedure_roots(&self) -> &[MastNodeId] {
85 &self.roots
86 }
87
88 /// Returns the advice map associated with this sparse forest.
89 pub fn advice_map(&self) -> &AdviceMap {
90 &self.advice_map
91 }
92
93 /// Returns the commitment to this sparse forest, computed from the procedure roots.
94 ///
95 /// The commitment value is derived from the digests of the procedure roots in the original
96 /// forest; it is therefore equal to the commitment of the source [`MastForest`] from which
97 /// this sparse forest was built.
98 pub fn commitment(&self) -> Word {
99 self.commitment_cache
100 }
101}
102
103impl ExecutableMastForest for SparseMastForest {
104 #[inline(always)]
105 fn get_node_by_id(&self, node_id: MastNodeId) -> Option<&MastNode> {
106 self.nodes.get(&node_id)
107 }
108
109 #[inline(always)]
110 fn get_digest_by_id(&self, node_id: MastNodeId) -> Option<Word> {
111 if let Some(node) = self.nodes.get(&node_id) {
112 return Some(node.digest());
113 }
114 self.digests.get(&node_id).copied()
115 }
116
117 #[inline(always)]
118 fn find_procedure_root(&self, digest: Word) -> Option<MastNodeId> {
119 // The `roots` list is copied wholesale from the source forest and may include roots that
120 // were never visited (and thus aren't present in `nodes`). Skip those gracefully rather
121 // than panicking via the `Index` impl.
122 self.roots.iter().find_map(|&root_id| {
123 let node = self.nodes.get(&root_id)?;
124 (node.digest() == digest).then_some(root_id)
125 })
126 }
127
128 #[inline(always)]
129 fn advice_map(&self) -> &AdviceMap {
130 &self.advice_map
131 }
132}
133
134// SPARSE MAST FOREST BUILDER
135// ================================================================================================
136
137/// Describes how a node referenced during execution should be represented in the resulting
138/// [`SparseMastForest`].
139#[derive(Debug, Clone, Copy, PartialEq, Eq)]
140pub enum VisitKind {
141 /// The node was actually entered (or otherwise needs to be available in full at replay time).
142 /// The full [`MastNode`] is copied into [`SparseMastForest::nodes`].
143 FullVisit,
144 /// Only the node's digest is required at replay time (e.g. a child of a control-flow node
145 /// whose digest contributes to the parent's trace row, but which is itself never entered).
146 /// The digest is copied into the digest-only map; the full node is omitted.
147 DigestOnly,
148}
149
150/// Incrementally builds a [`SparseMastForest`] by collecting the [`MastNodeId`]s of nodes visited
151/// during execution of a single source [`MastForest`].
152///
153/// The builder retains a strong reference to the source forest so that it can copy out the visited
154/// nodes (and the source's roots, advice map, and debug info) at finalization time.
155///
156/// Each recorded id carries a [`VisitKind`] that controls whether the full node is copied or only
157/// its digest. If the same id is recorded as both a [`VisitKind::FullVisit`] and a
158/// [`VisitKind::DigestOnly`], the full-visit representation wins (the digest is recoverable from
159/// the full node).
160#[derive(Debug)]
161pub struct SparseMastForestBuilder {
162 /// The source forest whose nodes are being collected.
163 source: Arc<MastForest>,
164
165 /// Total number of nodes in the source forest, captured at construction time. Propagated to
166 /// the finalized [`SparseMastForest`] so consumers know the original [`MastNodeId`] space.
167 num_nodes: usize,
168
169 /// IDs of nodes that were entered during execution. Their full [`MastNode`] is copied into the
170 /// finalized forest's `nodes` map.
171 full_visits: BTreeSet<MastNodeId>,
172
173 /// IDs of nodes that were only referenced (not entered) during execution. At finalization,
174 /// any id that also appears in [`Self::full_visits`] is excluded; the remainder contributes a
175 /// digest-only entry to the finalized forest.
176 digest_only_visits: BTreeSet<MastNodeId>,
177}
178
179impl SparseMastForestBuilder {
180 /// Creates a new builder for the given source forest.
181 pub fn new(source: Arc<MastForest>) -> Self {
182 let num_nodes = source.nodes().len();
183 Self {
184 source,
185 num_nodes,
186 full_visits: BTreeSet::new(),
187 digest_only_visits: BTreeSet::new(),
188 }
189 }
190
191 /// Records a visit to the node with the provided id. Idempotent.
192 ///
193 /// If the same id is recorded both as [`VisitKind::FullVisit`] and as
194 /// [`VisitKind::DigestOnly`], the full-visit representation wins.
195 pub fn record_visit(&mut self, node_id: MastNodeId, kind: VisitKind) {
196 match kind {
197 VisitKind::FullVisit => {
198 self.full_visits.insert(node_id);
199 },
200 VisitKind::DigestOnly => {
201 self.digest_only_visits.insert(node_id);
202 },
203 }
204 }
205
206 /// Returns a strong reference to the source forest backing this builder.
207 pub fn source(&self) -> &Arc<MastForest> {
208 &self.source
209 }
210
211 /// Consumes the builder and produces a [`SparseMastForest`] containing only the visited nodes
212 /// from the source forest. The roots, advice map, and debug info are cloned from the source
213 /// in full (they are not yet trimmed to visited nodes only).
214 pub fn finalize(self) -> SparseMastForest {
215 let SparseMastForestBuilder {
216 source,
217 num_nodes,
218 full_visits,
219 digest_only_visits,
220 } = self;
221
222 let mut nodes = BTreeMap::new();
223 for node_id in &full_visits {
224 let node = source
225 .get_node_by_id(*node_id)
226 .expect("recorded full-visit id must exist in source forest");
227 nodes.insert(*node_id, node.clone());
228 }
229
230 let mut digests = BTreeMap::new();
231 for node_id in digest_only_visits {
232 if full_visits.contains(&node_id) {
233 continue;
234 }
235 let node = source
236 .get_node_by_id(node_id)
237 .expect("recorded digest-only id must exist in source forest");
238 digests.insert(node_id, node.digest());
239 }
240
241 SparseMastForest {
242 nodes,
243 digests,
244 num_nodes,
245 roots: source.procedure_roots().to_vec(),
246 advice_map: source.advice_map().clone(),
247 commitment_cache: source.commitment(),
248 }
249 }
250}