miden_core/mast/merger/mod.rs
1use alloc::{collections::BTreeMap, vec::Vec};
2
3use miden_crypto::hash::blake::Blake3Digest;
4
5use crate::{
6 DenseIdMap, IndexVec,
7 mast::{
8 DecoratorId, MastForest, MastForestContributor, MastForestError, MastNode, MastNodeBuilder,
9 MastNodeFingerprint, MastNodeId, MultiMastForestIteratorItem, MultiMastForestNodeIter,
10 },
11};
12
13#[cfg(test)]
14mod tests;
15
16/// A type that allows merging [`MastForest`]s.
17///
18/// This functionality is exposed via [`MastForest::merge`]. See its documentation for more details.
19pub(crate) struct MastForestMerger {
20 mast_forest: MastForest,
21 // Internal indices needed for efficient duplicate checking and MastNodeFingerprint
22 // computation.
23 //
24 // These are always in-sync with the nodes in `mast_forest`, i.e. all nodes added to the
25 // `mast_forest` are also added to the indices.
26 node_id_by_hash: BTreeMap<MastNodeFingerprint, MastNodeId>,
27 hash_by_node_id: IndexVec<MastNodeId, MastNodeFingerprint>,
28 decorators_by_hash: BTreeMap<Blake3Digest<32>, DecoratorId>,
29 /// Mappings from old decorator and node ids to their new ids.
30 ///
31 /// Any decorator in `mast_forest` is present as the target of some mapping in this map.
32 decorator_id_mappings: Vec<DenseIdMap<DecoratorId, DecoratorId>>,
33 /// Mappings from previous `MastNodeId`s to their new ids.
34 ///
35 /// Any `MastNodeId` in `mast_forest` is present as the target of some mapping in this map.
36 node_id_mappings: Vec<DenseIdMap<MastNodeId, MastNodeId>>,
37}
38
39impl MastForestMerger {
40 /// Creates a new merger with an initially empty forest and merges all provided [`MastForest`]s
41 /// into it.
42 ///
43 /// # Normalizing Behavior
44 ///
45 /// This function performs normalization of the merged forest, which:
46 /// - Remaps all node IDs to maintain the invariant that child node IDs < parent node IDs
47 /// - Creates a clean, deduplicated forest structure
48 /// - Provides consistent node ordering regardless of input
49 ///
50 /// This normalization is idempotent, but it means that even for single-forest merges, the
51 /// resulting forest may have different node IDs and digests than the input. See assembly
52 /// test `issue_1644_single_forest_merge_identity` for detailed explanation of this
53 /// behavior.
54 pub(crate) fn merge<'forest>(
55 forests: impl IntoIterator<Item = &'forest MastForest>,
56 ) -> Result<(MastForest, MastForestRootMap), MastForestError> {
57 let forests = forests.into_iter().collect::<Vec<_>>();
58
59 let decorator_id_mappings = Vec::with_capacity(forests.len());
60 let node_id_mappings =
61 forests.iter().map(|f| DenseIdMap::with_len(f.nodes().len())).collect();
62
63 let mut merger = Self {
64 node_id_by_hash: BTreeMap::new(),
65 hash_by_node_id: IndexVec::new(),
66 decorators_by_hash: BTreeMap::new(),
67 mast_forest: MastForest::new(),
68 decorator_id_mappings,
69 node_id_mappings,
70 };
71
72 merger.merge_inner(forests.clone())?;
73
74 let Self { mast_forest, node_id_mappings, .. } = merger;
75
76 let root_maps = MastForestRootMap::from_node_id_map(node_id_mappings, forests);
77
78 Ok((mast_forest, root_maps))
79 }
80
81 /// Merges all `forests` into self.
82 ///
83 /// It does this in three steps:
84 ///
85 /// 1. Merge all advice maps, checking for key collisions.
86 /// 2. Merge all decorators, which is a case of deduplication and creating a decorator id
87 /// mapping which contains how existing [`DecoratorId`]s map to [`DecoratorId`]s in the
88 /// merged forest.
89 /// 3. Merge all nodes of forests.
90 /// - Similar to decorators, node indices might move during merging, so the merger keeps a
91 /// node id mapping as it merges nodes.
92 /// - This is a depth-first traversal over all forests to ensure all children are processed
93 /// before their parents. See the documentation of [`MultiMastForestNodeIter`] for details
94 /// on this traversal.
95 /// - Because all parents are processed after their children, we can use the node id mapping
96 /// to remap all [`MastNodeId`]s of the children to their potentially new id in the merged
97 /// forest.
98 /// - If any external node is encountered during this traversal with a digest `foo` for which
99 /// a `replacement` node exists in another forest with digest `foo`, then the external node
100 /// will be replaced by that node. In particular, it means we do not want to add the
101 /// external node to the merged forest, so it is never yielded from the iterator.
102 /// - Assuming the simple case, where the `replacement` was not visited yet and is just a
103 /// single node (not a tree), the iterator would first yield the `replacement` node which
104 /// means it is going to be merged into the forest.
105 /// - Next the iterator yields [`MultiMastForestIteratorItem::ExternalNodeReplacement`]
106 /// which signals that an external node was replaced by another node. In this example,
107 /// the `replacement_*` indices contained in that variant would point to the
108 /// `replacement` node. Now we can simply add a mapping from the external node to the
109 /// `replacement` node in our node id mapping which means all nodes that referenced the
110 /// external node will point to the `replacement` instead.
111 /// 4. Finally, we merge all roots of all forests. Here we map the existing root indices to
112 /// their potentially new indices in the merged forest and add them to the forest,
113 /// deduplicating in the process, too.
114 fn merge_inner(&mut self, forests: Vec<&MastForest>) -> Result<(), MastForestError> {
115 for other_forest in forests.iter() {
116 self.merge_advice_map(other_forest)?;
117 }
118 for other_forest in forests.iter() {
119 self.merge_decorators(other_forest)?;
120 }
121 for other_forest in forests.iter() {
122 self.merge_error_codes(other_forest)?;
123 }
124
125 let iterator = MultiMastForestNodeIter::new(forests.clone());
126 for item in iterator {
127 match item {
128 MultiMastForestIteratorItem::Node { forest_idx, node_id } => {
129 let node = forests[forest_idx][node_id].clone();
130 self.merge_node(forest_idx, node_id, node, &forests)?;
131 },
132 MultiMastForestIteratorItem::ExternalNodeReplacement {
133 // forest index of the node which replaces the external node
134 replacement_forest_idx,
135 // ID of the node that replaces the external node
136 replacement_mast_node_id,
137 // forest index of the external node
138 replaced_forest_idx,
139 // ID of the external node
140 replaced_mast_node_id,
141 } => {
142 // The iterator is not aware of the merged forest, so the node indices it yields
143 // are for the existing forests. That means we have to map the ID of the
144 // replacement to its new location, since it was previously merged and its IDs
145 // have very likely changed.
146 let mapped_replacement = self.node_id_mappings[replacement_forest_idx]
147 .get(replacement_mast_node_id)
148 .expect("every merged node id should be mapped");
149
150 // SAFETY: The iterator only yields valid forest indices, so it is safe to index
151 // directly.
152 self.node_id_mappings[replaced_forest_idx]
153 .insert(replaced_mast_node_id, mapped_replacement);
154 },
155 }
156 }
157
158 for (forest_idx, forest) in forests.iter().enumerate() {
159 self.merge_roots(forest_idx, forest)?;
160 }
161
162 Ok(())
163 }
164
165 fn merge_decorators(&mut self, other_forest: &MastForest) -> Result<(), MastForestError> {
166 let mut decorator_id_remapping =
167 DenseIdMap::with_len(other_forest.debug_info.num_decorators());
168
169 for (merging_id, merging_decorator) in
170 other_forest.debug_info.decorators().iter().enumerate()
171 {
172 let merging_decorator_hash = merging_decorator.fingerprint();
173 let new_decorator_id = if let Some(existing_decorator) =
174 self.decorators_by_hash.get(&merging_decorator_hash)
175 {
176 *existing_decorator
177 } else {
178 let new_decorator_id = self.mast_forest.add_decorator(merging_decorator.clone())?;
179 self.decorators_by_hash.insert(merging_decorator_hash, new_decorator_id);
180 new_decorator_id
181 };
182
183 decorator_id_remapping
184 .insert(DecoratorId::new_unchecked(merging_id as u32), new_decorator_id);
185 }
186
187 self.decorator_id_mappings.push(decorator_id_remapping);
188
189 Ok(())
190 }
191
192 fn merge_advice_map(&mut self, other_forest: &MastForest) -> Result<(), MastForestError> {
193 self.mast_forest
194 .advice_map
195 .merge(&other_forest.advice_map)
196 .map_err(|((key, _prev), _new)| MastForestError::AdviceMapKeyCollisionOnMerge(key))
197 }
198
199 fn merge_error_codes(&mut self, other_forest: &MastForest) -> Result<(), MastForestError> {
200 self.mast_forest.debug_info.extend_error_codes(
201 other_forest.debug_info.error_codes().map(|(k, v)| (*k, v.clone())),
202 );
203 Ok(())
204 }
205
206 fn merge_node(
207 &mut self,
208 forest_idx: usize,
209 merging_id: MastNodeId,
210 node: MastNode,
211 original_forests: &[&MastForest],
212 ) -> Result<(), MastForestError> {
213 // We need to remap the node prior to computing the MastNodeFingerprint.
214 //
215 // This is because the MastNodeFingerprint computation looks up its descendants and
216 // decorators in the internal index, and if we were to pass the original node to
217 // that computation, it would look up the incorrect descendants and decorators
218 // (since the descendant's indices may have changed).
219 //
220 // Remapping at this point is guaranteed to be "complete", meaning all ids of children
221 // will be present in the node id mapping since the DFS iteration guarantees
222 // that all children of this `node` have been processed before this node and
223 // their indices have been added to the mappings.
224 let remapped_builder = self.build_with_remapped_children(
225 merging_id,
226 node,
227 original_forests[forest_idx],
228 &self.node_id_mappings[forest_idx],
229 &self.decorator_id_mappings[forest_idx],
230 )?;
231
232 let node_fingerprint =
233 remapped_builder.fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id)?;
234
235 match self.lookup_node_by_fingerprint(&node_fingerprint) {
236 Some(matching_node_id) => {
237 // If a node with a matching fingerprint exists, then the merging node is a
238 // duplicate and we remap it to the existing node.
239 self.node_id_mappings[forest_idx].insert(merging_id, matching_node_id);
240 },
241 None => {
242 // If no node with a matching fingerprint exists, then the merging node is
243 // unique and we can add it to the merged forest using builders.
244 let new_node_id = remapped_builder.add_to_forest(&mut self.mast_forest)?;
245 self.node_id_mappings[forest_idx].insert(merging_id, new_node_id);
246
247 // We need to update the indices with the newly inserted nodes
248 // since the MastNodeFingerprint computation requires all descendants of a node
249 // to be in this index. Hence when we encounter a node in the merging forest
250 // which has descendants (Call, Loop, Split, ...), then their descendants need to be
251 // in the indices.
252 self.node_id_by_hash.insert(node_fingerprint, new_node_id);
253 let returned_id = self
254 .hash_by_node_id
255 .push(node_fingerprint)
256 .map_err(|_| MastForestError::TooManyNodes)?;
257 debug_assert_eq!(
258 returned_id, new_node_id,
259 "hash_by_node_id push() should return the same node IDs as node_id_by_hash"
260 );
261 },
262 }
263
264 Ok(())
265 }
266
267 fn merge_roots(
268 &mut self,
269 forest_idx: usize,
270 other_forest: &MastForest,
271 ) -> Result<(), MastForestError> {
272 for root_id in other_forest.roots.iter() {
273 // Map the previous root to its possibly new id.
274 let new_root = self.node_id_mappings[forest_idx]
275 .get(*root_id)
276 .expect("all node ids should have an entry");
277 // This takes O(n) where n is the number of roots in the merged forest every time to
278 // check if the root already exists. As the number of roots is relatively low generally,
279 // this should be okay.
280 self.mast_forest.make_root(new_root);
281 }
282
283 Ok(())
284 }
285
286 // HELPERS
287 // ================================================================================================
288
289 /// Returns the ID of the node in the merged forest that matches the given
290 /// fingerprint, if any.
291 fn lookup_node_by_fingerprint(&self, fingerprint: &MastNodeFingerprint) -> Option<MastNodeId> {
292 self.node_id_by_hash.get(fingerprint).copied()
293 }
294
295 /// Builds a new node with remapped children and decorators using the provided mappings.
296 fn build_with_remapped_children(
297 &self,
298 merging_id: MastNodeId,
299 src: MastNode,
300 original_forest: &MastForest,
301 nmap: &DenseIdMap<MastNodeId, MastNodeId>,
302 dmap: &DenseIdMap<DecoratorId, DecoratorId>,
303 ) -> Result<MastNodeBuilder, MastForestError> {
304 super::build_node_with_remapped_ids(merging_id, src, original_forest, nmap, dmap)
305 }
306}
307
308// MAST FOREST ROOT MAP
309// ================================================================================================
310
311/// A mapping for the new location of the roots of a [`MastForest`] after a merge.
312///
313/// It maps the roots ([`MastNodeId`]s) of a forest to their new [`MastNodeId`] in the merged
314/// forest. See [`MastForest::merge`] for more details.
315#[derive(Debug, Clone, PartialEq, Eq)]
316pub struct MastForestRootMap {
317 root_maps: Vec<BTreeMap<MastNodeId, MastNodeId>>,
318}
319
320impl MastForestRootMap {
321 fn from_node_id_map(
322 id_map: Vec<DenseIdMap<MastNodeId, MastNodeId>>,
323 forests: Vec<&MastForest>,
324 ) -> Self {
325 let mut root_maps = vec![BTreeMap::new(); forests.len()];
326
327 for (forest_idx, forest) in forests.into_iter().enumerate() {
328 for root in forest.procedure_roots() {
329 let new_id = id_map[forest_idx]
330 .get(*root)
331 .expect("every node id should be mapped to its new id");
332 root_maps[forest_idx].insert(*root, new_id);
333 }
334 }
335
336 Self { root_maps }
337 }
338
339 /// Maps the given root to its new location in the merged forest, if such a mapping exists.
340 ///
341 /// It is guaranteed that every root of the map's corresponding forest is contained in the map.
342 pub fn map_root(&self, forest_index: usize, root: &MastNodeId) -> Option<MastNodeId> {
343 self.root_maps.get(forest_index).and_then(|map| map.get(root)).copied()
344 }
345}