Skip to main content

miden_core/mast/
sparse.rs

1use alloc::{
2    collections::{BTreeMap, BTreeSet},
3    sync::Arc,
4    vec::Vec,
5};
6
7use miden_utils_indexing::newtype_id;
8
9use crate::{
10    Word,
11    advice::AdviceMap,
12    mast::{ExecutableMastForest, MastForest, MastNode, MastNodeExt, MastNodeId},
13};
14
15// MAST FOREST ID
16// ================================================================================================
17
18// `MastForestId` is an opaque handle to a [`MastForest`] in some forest store such as the
19// `TraceGenerationContext::mast_forest_store`. It is not a content-derived or stable identity for a
20// forest, and must not be compared or reused across stores or trace contexts. It is analogous to
21// `MastNodeId`, which is meaningful only within one forest's node store.
22newtype_id!(MastForestId);
23
24// SPARSE MAST FOREST
25// ================================================================================================
26
27/// A sparse replay view over a single source [`MastForest`]'s [`MastNodeId`] space, retaining only
28/// the nodes visited during execution.
29///
30/// Unlike [`MastForest`], which stores nodes contiguously in an `IndexVec`, a [`SparseMastForest`]
31/// uses a `BTreeMap` so that it can preserve the original [`MastNodeId`]s of the source forest
32/// while omitting nodes that were not visited. A [`SparseMastForest`] is not an independent forest
33/// shape: it shares its source forest's ID space, and is intended to back re-execution of the same
34/// program. Lookups by the original [`MastNodeId`] continue to resolve to the correct
35/// [`MastNode`]s.
36///
37/// In addition to the visited nodes, a [`SparseMastForest`] may also carry digest-only entries for
38/// nodes that were referenced during execution but never actually entered (e.g. the not-taken
39/// branch of a split, or the children of a join that only need to contribute their digests to the
40/// parent's trace row). These entries let trace generation read the digest without having to copy
41/// the full child node, and they make accidental entry into a pruned node a clean
42/// `get_node_by_id` miss rather than a partially-populated node.
43#[derive(Debug)]
44pub struct SparseMastForest {
45    /// Subset of the original forest's nodes, keyed by their original [`MastNodeId`].
46    nodes: BTreeMap<MastNodeId, MastNode>,
47
48    /// Digests of nodes that were referenced (but not entered) during execution, keyed by their
49    /// original [`MastNodeId`]. Nodes present in [`Self::nodes`] are excluded from this map: a
50    /// full-node entry implicitly carries its own digest via [`MastNodeExt::digest`].
51    digests: BTreeMap<MastNodeId, Word>,
52
53    /// Total number of nodes in the source [`MastForest`] from which this sparse forest was
54    /// built. Note that this is *not* `nodes.len()` — it is the upper bound on the original
55    /// [`MastNodeId`] space, preserved so that callers materializing dense-shaped state (e.g.
56    /// allocating an `IndexVec` keyed by [`MastNodeId`]) know its required size.
57    num_nodes: usize,
58
59    /// Roots of procedures defined within the original MAST forest.
60    roots: Vec<MastNodeId>,
61
62    /// Advice map to be loaded into the VM prior to executing procedures from this MAST forest.
63    advice_map: AdviceMap,
64
65    /// Cached commitment to the original MAST forest (i.e. a commitment to all roots).
66    commitment_cache: Word,
67}
68
69impl SparseMastForest {
70    /// Returns the underlying nodes of this sparse forest, keyed by their original
71    /// [`MastNodeId`].
72    pub fn nodes(&self) -> &BTreeMap<MastNodeId, MastNode> {
73        &self.nodes
74    }
75
76    /// Returns the total number of nodes in the source [`MastForest`] from which this sparse
77    /// forest was built. This is *not* the number of visited (i.e. present) nodes — see
78    /// [`Self::nodes`] for that.
79    pub fn num_nodes(&self) -> usize {
80        self.num_nodes
81    }
82
83    /// Returns the roots of procedures defined within this sparse forest.
84    pub fn procedure_roots(&self) -> &[MastNodeId] {
85        &self.roots
86    }
87
88    /// Returns the advice map associated with this sparse forest.
89    pub fn advice_map(&self) -> &AdviceMap {
90        &self.advice_map
91    }
92
93    /// Returns the commitment to this sparse forest, computed from the procedure roots.
94    ///
95    /// The commitment value is derived from the digests of the procedure roots in the original
96    /// forest; it is therefore equal to the commitment of the source [`MastForest`] from which
97    /// this sparse forest was built.
98    pub fn commitment(&self) -> Word {
99        self.commitment_cache
100    }
101}
102
103impl ExecutableMastForest for SparseMastForest {
104    #[inline(always)]
105    fn get_node_by_id(&self, node_id: MastNodeId) -> Option<&MastNode> {
106        self.nodes.get(&node_id)
107    }
108
109    #[inline(always)]
110    fn get_digest_by_id(&self, node_id: MastNodeId) -> Option<Word> {
111        if let Some(node) = self.nodes.get(&node_id) {
112            return Some(node.digest());
113        }
114        self.digests.get(&node_id).copied()
115    }
116
117    #[inline(always)]
118    fn find_procedure_root(&self, digest: Word) -> Option<MastNodeId> {
119        // The `roots` list is copied wholesale from the source forest and may include roots that
120        // were never visited (and thus aren't present in `nodes`). Skip those gracefully rather
121        // than panicking via the `Index` impl.
122        self.roots.iter().find_map(|&root_id| {
123            let node = self.nodes.get(&root_id)?;
124            (node.digest() == digest).then_some(root_id)
125        })
126    }
127
128    #[inline(always)]
129    fn advice_map(&self) -> &AdviceMap {
130        &self.advice_map
131    }
132}
133
134// SPARSE MAST FOREST BUILDER
135// ================================================================================================
136
137/// Describes how a node referenced during execution should be represented in the resulting
138/// [`SparseMastForest`].
139#[derive(Debug, Clone, Copy, PartialEq, Eq)]
140pub enum VisitKind {
141    /// The node was actually entered (or otherwise needs to be available in full at replay time).
142    /// The full [`MastNode`] is copied into [`SparseMastForest::nodes`].
143    FullVisit,
144    /// Only the node's digest is required at replay time (e.g. a child of a control-flow node
145    /// whose digest contributes to the parent's trace row, but which is itself never entered).
146    /// The digest is copied into the digest-only map; the full node is omitted.
147    DigestOnly,
148}
149
150/// Incrementally builds a [`SparseMastForest`] by collecting the [`MastNodeId`]s of nodes visited
151/// during execution of a single source [`MastForest`].
152///
153/// The builder retains a strong reference to the source forest so that it can copy out the visited
154/// nodes (and the source's roots, advice map, and debug info) at finalization time.
155///
156/// Each recorded id carries a [`VisitKind`] that controls whether the full node is copied or only
157/// its digest. If the same id is recorded as both a [`VisitKind::FullVisit`] and a
158/// [`VisitKind::DigestOnly`], the full-visit representation wins (the digest is recoverable from
159/// the full node).
160#[derive(Debug)]
161pub struct SparseMastForestBuilder {
162    /// The source forest whose nodes are being collected.
163    source: Arc<MastForest>,
164
165    /// Total number of nodes in the source forest, captured at construction time. Propagated to
166    /// the finalized [`SparseMastForest`] so consumers know the original [`MastNodeId`] space.
167    num_nodes: usize,
168
169    /// IDs of nodes that were entered during execution. Their full [`MastNode`] is copied into the
170    /// finalized forest's `nodes` map.
171    full_visits: BTreeSet<MastNodeId>,
172
173    /// IDs of nodes that were only referenced (not entered) during execution. At finalization,
174    /// any id that also appears in [`Self::full_visits`] is excluded; the remainder contributes a
175    /// digest-only entry to the finalized forest.
176    digest_only_visits: BTreeSet<MastNodeId>,
177}
178
179impl SparseMastForestBuilder {
180    /// Creates a new builder for the given source forest.
181    pub fn new(source: Arc<MastForest>) -> Self {
182        let num_nodes = source.nodes().len();
183        Self {
184            source,
185            num_nodes,
186            full_visits: BTreeSet::new(),
187            digest_only_visits: BTreeSet::new(),
188        }
189    }
190
191    /// Records a visit to the node with the provided id. Idempotent.
192    ///
193    /// If the same id is recorded both as [`VisitKind::FullVisit`] and as
194    /// [`VisitKind::DigestOnly`], the full-visit representation wins.
195    pub fn record_visit(&mut self, node_id: MastNodeId, kind: VisitKind) {
196        match kind {
197            VisitKind::FullVisit => {
198                self.full_visits.insert(node_id);
199            },
200            VisitKind::DigestOnly => {
201                self.digest_only_visits.insert(node_id);
202            },
203        }
204    }
205
206    /// Returns a strong reference to the source forest backing this builder.
207    pub fn source(&self) -> &Arc<MastForest> {
208        &self.source
209    }
210
211    /// Consumes the builder and produces a [`SparseMastForest`] containing only the visited nodes
212    /// from the source forest. The roots, advice map, and debug info are cloned from the source
213    /// in full (they are not yet trimmed to visited nodes only).
214    pub fn finalize(self) -> SparseMastForest {
215        let SparseMastForestBuilder {
216            source,
217            num_nodes,
218            full_visits,
219            digest_only_visits,
220        } = self;
221
222        let mut nodes = BTreeMap::new();
223        for node_id in &full_visits {
224            let node = source
225                .get_node_by_id(*node_id)
226                .expect("recorded full-visit id must exist in source forest");
227            nodes.insert(*node_id, node.clone());
228        }
229
230        let mut digests = BTreeMap::new();
231        for node_id in digest_only_visits {
232            if full_visits.contains(&node_id) {
233                continue;
234            }
235            let node = source
236                .get_node_by_id(node_id)
237                .expect("recorded digest-only id must exist in source forest");
238            digests.insert(node_id, node.digest());
239        }
240
241        SparseMastForest {
242            nodes,
243            digests,
244            num_nodes,
245            roots: source.procedure_roots().to_vec(),
246            advice_map: source.advice_map().clone(),
247            commitment_cache: source.commitment(),
248        }
249    }
250}