Skip to main content

miden_assembly/linker/
mod.rs

1//! Assembly of a Miden Assembly project is comprised of four phases:
2//!
3//! 1. _Parsing_, where MASM sources are parsed into the AST data structure. Some light validation
4//!    is done in this phase, to catch invalid syntax, invalid immediate values (e.g. overflow), and
5//!    other simple checks that require little to no reasoning about surrounding context.
6//! 2. _Semantic analysis_, where initial validation of the AST is performed. This step catches
7//!    unused imports, references to undefined local symbols, orphaned doc comments, and other
8//!    checks that only require minimal module-local context. Initial symbol resolution is performed
9//!    here based on module-local context, as well as constant folding of expressions that can be
10//!    resolved locally. Symbols which refer to external items are unable to be fully processed as
11//!    part of this phase, and is instead left to the linking phase.
12//! 3. _Linking_, the most critical phase of compilation. During this phase, the assembler has the
13//!    full compilation graph available to it, and so this is where inter-module symbol references
14//!    are finally able to be resolved (or not, in which case appropriate errors are raised). This
15//!    is the phase where we catch cyclic references, references to undefined symbols, references to
16//!    non-public symbols from other modules, etc. Once all symbols are linked, the assembler is
17//!    free to compile all of the procedures to MAST, and generate a [crate::Library].
18//! 4. _Assembly_, the final phase, where all of the linked items provided to the assembler are
19//!    lowered to MAST, or to their final representations in the [crate::Library] produced as the
20//!    output of assembly. During this phase, it is expected that the compilation graph has been
21//!    validated by the linker, and we're simply processing the conversion to MAST.
22//!
23//! This module provides the implementation of the linker and its associated data structures. There
24//! are three primary parts:
25//!
26//! 1. The _call graph_, this is what tracks dependencies between procedures in the compilation
27//!    graph, and is used to ensure that all procedure references can be resolved to a MAST root
28//!    during final assembly.
29//! 2. The _symbol resolver_, this is what is responsible for computing symbol resolutions using
30//!    context-sensitive details about how a symbol is referenced. This context sensitivity is how
31//!    we are able to provide better diagnostics when invalid references are found. The resolver
32//!    shares part of it's implementation with the same infrastructure used for symbol resolution
33//!    that is performed during semantic analysis - the difference is that at link-time, we are
34//!    stricter about what happens when a symbol cannot be resolved correctly.
35//! 3. A set of _rewrites_, applied to symbols/modules at link-time, which rewrite the AST so that
36//!    all symbol references and constant expressions are fully resolved/folded. This is where any
37//!    final issues are discovered, and the AST is prepared for lowering to MAST.
38mod callgraph;
39mod debug;
40mod errors;
41mod library;
42mod module;
43mod resolver;
44mod rewrites;
45mod symbols;
46
47use alloc::{boxed::Box, collections::BTreeMap, string::ToString, sync::Arc, vec::Vec};
48use core::{
49    cell::{Cell, RefCell},
50    ops::{ControlFlow, Index},
51};
52
53use miden_assembly_syntax::{
54    ast::{
55        self, Alias, AttributeSet, GlobalItemIndex, InvocationTarget, InvokeKind, ItemIndex,
56        Module, ModuleIndex, Path, SymbolResolution, Visibility, types,
57    },
58    debuginfo::{SourceManager, SourceSpan, Span, Spanned},
59    library::{ItemInfo, ModuleInfo},
60};
61use miden_core::{Word, advice::AdviceMap, program::Kernel};
62use miden_project::Linkage;
63use smallvec::{SmallVec, smallvec};
64
65pub use self::{
66    callgraph::{CallGraph, CycleError},
67    errors::LinkerError,
68    library::{LinkLibrary, LinkLibraryKind},
69    resolver::{ResolverCache, SymbolResolutionContext, SymbolResolver},
70    symbols::{Symbol, SymbolItem},
71};
72use self::{
73    module::{LinkModule, ModuleSource},
74    resolver::*,
75};
76
77/// Represents the current status of a symbol in the state of the [Linker]
78#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]
79pub enum LinkStatus {
80    /// The module or item has not been visited by the linker
81    #[default]
82    Unlinked,
83    /// The module or item has been visited by the linker, but still refers to one or more
84    /// unresolved symbols.
85    PartiallyLinked,
86    /// The module or item has been visited by the linker, and is fully linked and resolved
87    Linked,
88}
89
90// LINKER
91// ================================================================================================
92
93/// The [`Linker`] is responsible for analyzing the input modules and libraries provided to the
94/// assembler, and _linking_ them together.
95///
96/// The core conceptual data structure of the linker is the _module graph_, which is implemented
97/// by a vector of module nodes, and a _call graph_, which is implemented as an adjacency matrix
98/// of item nodes and the outgoing edges from those nodes, representing references from that item
99/// to another symbol (typically as the result of procedure invocation, hence "call" graph).
100///
101/// Each item/symbol known to the linker is given a _global item index_, which is actually a pair
102/// of indices: a _module index_ (which indexes into the vector of module nodes), and an _item
103/// index_ (which indexes into the items defined by a module). These global item indices function
104/// as a unique identifier within the linker, to a specific item, and can be resolved to either the
105/// original syntax tree of the item, or to metadata about the item retrieved from previously-
106/// assembled MAST.
107///
108/// The process of linking involves two phases:
109///
110/// 1. Setting up the linker context, by providing the set of inputs to link together
111/// 2. Analyzing and rewriting the symbols known to the linker, as needed, to ensure that all symbol
112///    references are resolved to concrete definitions.
113///
114/// The assembler will call [`Self::link`] once it has provided all inputs that it wants to link,
115/// which will, when successful, return the set of module indices corresponding to the modules that
116/// comprise the public interface of the assembled artifact. The assembler then constructs the MAST
117/// starting from the exported procedures of those modules, recursively tracing the call graph
118/// based on whether or not the callee is statically or dynamically linked. In the static linking
119/// case, any procedures referenced in a statically-linked library or module will be included in
120/// the assembled artifact. In the dynamic linking case, referenced procedures are instead
121/// referenced in the assembled artifact only by their MAST root.
122#[derive(Clone)]
123pub struct Linker {
124    /// The set of libraries to link against.
125    libraries: BTreeMap<Word, LinkLibrary>,
126    /// The global set of items known to the linker
127    modules: Vec<LinkModule>,
128    /// The global call graph of calls, not counting those that are performed directly via MAST
129    /// root.
130    callgraph: CallGraph,
131    /// The set of MAST roots which have procedure definitions in this graph. There can be
132    /// multiple procedures bound to the same root due to having identical code.
133    procedures_by_mast_root: BTreeMap<Word, SmallVec<[GlobalItemIndex; 1]>>,
134    /// The index of the kernel module in `modules`, if present
135    kernel_index: Option<ModuleIndex>,
136    /// The kernel library being linked against.
137    ///
138    /// This is always provided, with an empty kernel being the default.
139    kernel: Kernel,
140    /// The source manager to use when emitting diagnostics.
141    source_manager: Arc<dyn SourceManager>,
142}
143
144// ------------------------------------------------------------------------------------------------
145/// Constructors
146impl Linker {
147    /// Instantiate a new [Linker], using the provided [SourceManager] to resolve source info.
148    pub fn new(source_manager: Arc<dyn SourceManager>) -> Self {
149        Self {
150            libraries: Default::default(),
151            modules: Default::default(),
152            callgraph: Default::default(),
153            procedures_by_mast_root: Default::default(),
154            kernel_index: None,
155            kernel: Default::default(),
156            source_manager,
157        }
158    }
159
160    /// Registers `library` and all of its modules with the linker, according to its linkage
161    pub fn link_library(&mut self, library: LinkLibrary) -> Result<(), LinkerError> {
162        use alloc::collections::btree_map::Entry;
163
164        match self.libraries.entry(library.mast.commitment()) {
165            Entry::Vacant(entry) => {
166                entry.insert(library.clone());
167                self.link_assembled_modules(library.module_infos)
168            },
169            Entry::Occupied(mut entry) => {
170                let prev = entry.get_mut();
171
172                // If the same library is linked both dynamically and statically, prefer static
173                // linking always.
174                if matches!(prev.linkage, Linkage::Dynamic) {
175                    prev.linkage = library.linkage;
176                }
177
178                Ok(())
179            },
180        }
181    }
182
183    /// Registers a set of MAST modules with the linker.
184    ///
185    /// If called directly, the modules will default to being dynamically linked. You must use
186    /// [`Self::link_library`] if you wish to statically link a set of assembled modules.
187    pub fn link_assembled_modules(
188        &mut self,
189        modules: impl IntoIterator<Item = ModuleInfo>,
190    ) -> Result<(), LinkerError> {
191        for module in modules {
192            self.link_assembled_module(module)?;
193        }
194
195        Ok(())
196    }
197
198    /// Registers a MAST module with the linker.
199    ///
200    /// If called directly, the module will default to being dynamically linked. You must use
201    /// [`Self::link_library`] if you wish to statically link `module`.
202    pub fn link_assembled_module(
203        &mut self,
204        module: ModuleInfo,
205    ) -> Result<ModuleIndex, LinkerError> {
206        log::debug!(target: "linker", "adding pre-assembled module {} to module graph", module.path());
207
208        let module_path = module.path();
209        let is_duplicate = self.find_module_index(module_path).is_some();
210        if is_duplicate {
211            return Err(LinkerError::DuplicateModule {
212                path: module_path.to_path_buf().into_boxed_path().into(),
213            });
214        }
215
216        let module_index = self.next_module_id();
217        let items = module.items();
218        let mut symbols = Vec::with_capacity(items.len());
219        for (idx, item) in items {
220            let gid = module_index + idx;
221            self.callgraph.get_or_insert_node(gid);
222            match &item {
223                ItemInfo::Procedure(item) => {
224                    self.register_procedure_root(gid, item.digest)?;
225                },
226                ItemInfo::Constant(_) | ItemInfo::Type(_) => (),
227            }
228            symbols.push(Symbol::new(
229                item.name().clone(),
230                Visibility::Public,
231                LinkStatus::Linked,
232                SymbolItem::Compiled(item.clone()),
233            ));
234        }
235
236        let link_module = LinkModule::new(
237            module_index,
238            ast::ModuleKind::Library,
239            LinkStatus::Linked,
240            ModuleSource::Mast,
241            module_path.into(),
242        )
243        .with_symbols(symbols);
244
245        self.modules.push(link_module);
246        Ok(module_index)
247    }
248
249    /// Registers a set of AST modules with the linker.
250    ///
251    /// See [`Self::link_module`] for more details.
252    pub fn link_modules(
253        &mut self,
254        modules: impl IntoIterator<Item = Box<Module>>,
255    ) -> Result<Vec<ModuleIndex>, LinkerError> {
256        modules.into_iter().map(|mut m| self.link_module(&mut m)).collect()
257    }
258
259    /// Registers an AST module with the linker.
260    ///
261    /// A module provided to this method is presumed to be dynamically linked, unless specifically
262    /// handled otherwise by the assembler. In particular, the assembler will only statically link
263    /// the set of AST modules provided to [`Self::link`], as they are expected to comprise the
264    /// public interface of the assembled artifact.
265    ///
266    /// # Errors
267    ///
268    /// This operation can fail for the following reasons:
269    ///
270    /// * Module with same [Path] is in the graph already
271    /// * Too many modules in the graph
272    ///
273    /// # Panics
274    ///
275    /// This function will panic if the number of modules exceeds the maximum representable
276    /// [ModuleIndex] value, `u16::MAX`.
277    pub fn link_module(&mut self, module: &mut Module) -> Result<ModuleIndex, LinkerError> {
278        log::debug!(target: "linker", "adding unprocessed module {}", module.path());
279
280        let is_duplicate = self.find_module_index(module.path()).is_some();
281        if is_duplicate {
282            return Err(LinkerError::DuplicateModule { path: module.path().into() });
283        }
284
285        let module_index = self.next_module_id();
286        let symbols = {
287            core::mem::take(module.items_mut())
288                .into_iter()
289                .enumerate()
290                .map(|(idx, item)| {
291                    let gid = module_index + ItemIndex::new(idx);
292                    self.callgraph.get_or_insert_node(gid);
293                    Symbol::new(
294                        item.name().clone(),
295                        item.visibility(),
296                        LinkStatus::Unlinked,
297                        match item {
298                            ast::Export::Alias(alias) => {
299                                SymbolItem::Alias { alias, resolved: Cell::new(None) }
300                            },
301                            ast::Export::Type(item) => SymbolItem::Type(item),
302                            ast::Export::Constant(item) => SymbolItem::Constant(item),
303                            ast::Export::Procedure(item) => {
304                                SymbolItem::Procedure(RefCell::new(Box::new(item)))
305                            },
306                        },
307                    )
308                })
309                .collect()
310        };
311        let link_module = LinkModule::new(
312            module_index,
313            module.kind(),
314            LinkStatus::Unlinked,
315            ModuleSource::Ast,
316            module.path().into(),
317        )
318        .with_advice_map(module.advice_map().clone())
319        .with_symbols(symbols);
320
321        self.modules.push(link_module);
322        Ok(module_index)
323    }
324
325    #[inline]
326    fn next_module_id(&self) -> ModuleIndex {
327        ModuleIndex::new(self.modules.len())
328    }
329}
330
331// ------------------------------------------------------------------------------------------------
332/// Kernels
333impl Linker {
334    /// Returns a new [Linker] instantiated from the provided kernel and kernel info module.
335    ///
336    /// Note: it is assumed that kernel and kernel_module are consistent, but this is not checked.
337    ///
338    /// TODO: consider passing `KernelLibrary` into this constructor as a parameter instead.
339    pub(super) fn with_kernel(
340        source_manager: Arc<dyn SourceManager>,
341        kernel: Kernel,
342        kernel_module: ModuleInfo,
343    ) -> Self {
344        assert!(!kernel.is_empty());
345        assert!(
346            kernel_module.path().is_kernel_path(),
347            "invalid root kernel module path: {}",
348            kernel_module.path()
349        );
350        log::debug!(target: "linker", "instantiating linker with kernel {}", kernel_module.path());
351
352        let mut graph = Self::new(source_manager);
353        let kernel_index = graph
354            .link_assembled_module(kernel_module)
355            .expect("failed to add kernel module to the module graph");
356
357        graph.kernel_index = Some(kernel_index);
358        graph.kernel = kernel;
359        graph
360    }
361
362    /// Add a kernel to the linker after the linker is initially constructed.
363    ///
364    /// This cannot cause any issues with modules already added to the linker (if any), as they
365    /// cannot have directly depended on the kernel, or an error would have been raised.
366    ///
367    /// This will panic if the kernel is empty, or the provided kernel module info is not valid for
368    /// a kernel.
369    pub(super) fn link_with_kernel(
370        &mut self,
371        kernel: Kernel,
372        kernel_module: ModuleInfo,
373    ) -> Result<(), LinkerError> {
374        assert!(self.kernel.is_empty());
375        assert!(!kernel.is_empty());
376        assert!(
377            kernel_module.path().is_kernel_path(),
378            "invalid root kernel module path: {}",
379            kernel_module.path()
380        );
381        log::debug!(target: "linker", "modifying linker with kernel {}", kernel_module.path());
382        let kernel_index = self.link_assembled_module(kernel_module)?;
383        self.kernel_index = Some(kernel_index);
384        self.kernel = kernel;
385
386        Ok(())
387    }
388
389    pub fn kernel(&self) -> &Kernel {
390        &self.kernel
391    }
392
393    pub fn has_nonempty_kernel(&self) -> bool {
394        self.kernel_index.is_some() || !self.kernel.is_empty()
395    }
396}
397
398// ------------------------------------------------------------------------------------------------
399/// Analysis
400impl Linker {
401    fn cycle_error(&self, cycle: CycleError) -> LinkerError {
402        let iter = cycle.into_node_ids();
403        let mut nodes = Vec::with_capacity(iter.len());
404        for node in iter {
405            let module = self[node.module].path();
406            let item = self[node].name();
407            nodes.push(module.join(item).to_string());
408        }
409        LinkerError::Cycle { nodes: nodes.into() }
410    }
411
412    /// Links `modules` using the current state of the linker.
413    ///
414    /// Returns the module indices corresponding to the provided modules, which are expected to
415    /// provide the public interface of the final assembled artifact.
416    pub fn link(
417        &mut self,
418        modules: impl IntoIterator<Item = Box<Module>>,
419    ) -> Result<Vec<ModuleIndex>, LinkerError> {
420        let module_indices = self.link_modules(modules)?;
421
422        self.link_and_rewrite()?;
423
424        Ok(module_indices)
425    }
426
427    /// Links `kernel` using the current state of the linker.
428    ///
429    /// Returns the module index of the kernel module, which is expected to provide the public
430    /// interface of the final assembled kernel.
431    ///
432    /// This differs from `link` in that we allow all AST modules in the module graph access to
433    /// kernel features, e.g. `caller`, as if they are defined by the kernel module itself.
434    pub fn link_kernel(
435        &mut self,
436        mut kernel: Box<Module>,
437    ) -> Result<Vec<ModuleIndex>, LinkerError> {
438        let original_module_len = self.modules.len();
439        let original_callgraph = self.callgraph.clone();
440        let module_index = self.link_module(&mut kernel)?;
441        let original_kernel_index = self.kernel_index;
442        let original_module_kinds = self
443            .modules
444            .iter()
445            .enumerate()
446            .take(module_index.as_usize())
447            .filter(|(_, module)| matches!(module.source(), ModuleSource::Ast))
448            .map(|(module_index, module)| (module_index, module.kind()))
449            .collect::<Vec<_>>();
450
451        // Set the module kind of all pending AST modules to Kernel, as we are linking a kernel
452        for module in self.modules.iter_mut().take(module_index.as_usize()) {
453            if matches!(module.source(), ModuleSource::Ast) {
454                module.set_kind(ast::ModuleKind::Kernel);
455            }
456        }
457
458        self.kernel_index = Some(module_index);
459
460        if let Err(err) = self.link_and_rewrite() {
461            self.kernel_index = original_kernel_index;
462            self.callgraph = original_callgraph;
463            self.modules.truncate(original_module_len);
464            for (module_index, module_kind) in original_module_kinds {
465                self.modules[module_index].set_kind(module_kind);
466            }
467
468            return Err(err);
469        }
470
471        Ok(vec![module_index])
472    }
473
474    /// Compute the module graph from the set of pending modules, and link it, rewriting any AST
475    /// modules with unresolved, or partially-resolved, symbol references.
476    ///
477    /// This should be called any time you add more libraries or modules to the module graph, to
478    /// ensure that the graph is valid, and that there are no unresolved references. In general,
479    /// you will only instantiate the linker, build up the graph, and link a single time; but you
480    /// can re-use the linker to build multiple artifacts as well.
481    ///
482    /// When this function is called, some initial information is calculated about the AST modules
483    /// which are to be added to the graph, and then each module is visited to perform a deeper
484    /// analysis than can be done by the `sema` module, as we now have the full set of modules
485    /// available to do import resolution, and to rewrite invoke targets with their absolute paths
486    /// and/or MAST roots. A variety of issues are caught at this stage.
487    ///
488    /// Once each module is validated, the various analysis results stored as part of the graph
489    /// structure are updated to reflect that module being added to the graph. Once part of the
490    /// graph, the module becomes immutable/clone-on-write, so as to allow the graph to be
491    /// cheaply cloned.
492    ///
493    /// The final, and most important, analysis done by this function is the topological sort of
494    /// the global call graph, which contains the inter-procedural dependencies of every procedure
495    /// in the module graph. We use this sort order to do two things:
496    ///
497    /// 1. Verify that there are no static cycles in the graph that would prevent us from being able
498    ///    to hash the generated MAST of the program. NOTE: dynamic cycles, e.g. those induced by
499    ///    `dynexec`, are perfectly fine, we are only interested in preventing cycles that interfere
500    ///    with the ability to generate MAST roots.
501    ///
502    /// 2. Visit the call graph bottom-up, so that we can fully compile a procedure before any of
503    ///    its callers, and thus rewrite those callers to reference that procedure by MAST root,
504    ///    rather than by name. As a result, a compiled MAST program is like an immutable snapshot
505    ///    of the entire call graph at the time of compilation. Later, if we choose to recompile a
506    ///    subset of modules (currently we do not have support for this in the assembler API), we
507    ///    can re-analyze/re-compile only those parts of the graph which have actually changed.
508    ///
509    /// NOTE: This will return `Err` if we detect a validation error, a cycle in the graph, or an
510    /// operation not supported by the current configuration. Basically, for any reason that would
511    /// cause the resulting graph to represent an invalid program.
512    fn link_and_rewrite(&mut self) -> Result<(), LinkerError> {
513        log::debug!(
514            target: "linker",
515            "processing {} unlinked/partially-linked modules, and recomputing module graph",
516            self.modules.iter().filter(|m| !m.is_linked()).count()
517        );
518
519        // It is acceptable for there to be no changes, but if the graph is empty and no changes
520        // are being made, we treat that as an error
521        if self.modules.is_empty() {
522            return Err(LinkerError::Empty);
523        }
524
525        // If no changes are being made, we're done
526        if self.modules.iter().all(LinkModule::is_linked) {
527            return Ok(());
528        }
529
530        // Obtain a set of resolvers for the pending modules so that we can do name resolution
531        // before they are added to the graph
532        let pending_modules = self
533            .modules
534            .iter()
535            .enumerate()
536            .filter(|(_, module)| module.is_unlinked())
537            .map(|(module_index, module)| (module_index, module.clone()))
538            .collect::<Vec<_>>();
539        let original_callgraph = self.callgraph.clone();
540
541        let result = {
542            let resolver = SymbolResolver::new(self);
543            let mut edges = Vec::new();
544            let mut cache = ResolverCache::default();
545            let mut linked_modules = Vec::new();
546
547            for (module_index, module) in self.modules.iter().enumerate() {
548                if !module.is_unlinked() {
549                    continue;
550                }
551
552                let module_index = ModuleIndex::new(module_index);
553
554                for (symbol_idx, symbol) in module.symbols().enumerate() {
555                    let gid = module_index + ItemIndex::new(symbol_idx);
556
557                    // Perform any applicable rewrites to this item
558                    rewrites::rewrite_symbol(gid, symbol, &resolver, &mut cache)?;
559
560                    // Update the linker graph
561                    match symbol.item() {
562                        SymbolItem::Compiled(_) | SymbolItem::Type(_) | SymbolItem::Constant(_) => {
563                        },
564                        SymbolItem::Alias { alias, resolved } => {
565                            if let Some(resolved) = resolved.get() {
566                                log::debug!(target: "linker", "  | resolved alias {} to item {resolved}", alias.target());
567                                if self[resolved].is_procedure() {
568                                    edges.push((gid, resolved));
569                                }
570                            } else {
571                                log::debug!(target: "linker", "  | resolving alias {}..", alias.target());
572
573                                let context = SymbolResolutionContext {
574                                    span: alias.target().span(),
575                                    module: module_index,
576                                    kind: None,
577                                };
578                                if let Some(callee) =
579                                    resolver.resolve_alias_target(&context, alias)?.into_global_id()
580                                {
581                                    log::debug!(
582                                        target: "linker",
583                                        "  | resolved alias to gid {:?}:{:?}",
584                                        callee.module,
585                                        callee.index
586                                    );
587                                    edges.push((gid, callee));
588                                    resolved.set(Some(callee));
589                                }
590                            }
591                        },
592                        SymbolItem::Procedure(proc) => {
593                            // Add edges to all transitive dependencies of this item due to
594                            // calls/symbol refs
595                            let proc = proc.borrow();
596                            for invoke in proc.invoked() {
597                                log::debug!(target: "linker", "  | recording {} dependency on {}", invoke.kind, &invoke.target);
598
599                                let context = SymbolResolutionContext {
600                                    span: invoke.span(),
601                                    module: module_index,
602                                    kind: None,
603                                };
604                                if let Some(callee) = resolver
605                                    .resolve_invoke_target(&context, &invoke.target)?
606                                    .into_global_id()
607                                {
608                                    log::debug!(
609                                        target: "linker",
610                                        "  | resolved dependency to gid {}:{}",
611                                        callee.module.as_usize(),
612                                        callee.index.as_usize()
613                                    );
614                                    edges.push((gid, callee));
615                                }
616                            }
617                        },
618                    }
619                }
620
621                linked_modules.push(module_index);
622            }
623
624            let mut callgraph = self.callgraph.clone();
625            for (caller, callee) in edges {
626                callgraph.add_edge(caller, callee).map_err(|cycle| self.cycle_error(cycle))?;
627            }
628
629            // Make sure the graph is free of cycles
630            callgraph.toposort().map_err(|cycle| self.cycle_error(cycle))?;
631
632            Ok::<_, LinkerError>((linked_modules, callgraph))
633        };
634
635        match result {
636            Ok((linked_modules, callgraph)) => {
637                self.callgraph = callgraph;
638                for module_index in linked_modules {
639                    self.modules[module_index.as_usize()].set_status(LinkStatus::Linked);
640                }
641            },
642            Err(err) => {
643                self.callgraph = original_callgraph;
644                for (module_index, module) in pending_modules {
645                    self.modules[module_index] = module;
646                }
647                return Err(err);
648            },
649        }
650
651        Ok(())
652    }
653}
654
655// ------------------------------------------------------------------------------------------------
656/// Accessors/Queries
657impl Linker {
658    /// Get an iterator over the external libraries the linker has linked against
659    pub fn libraries(&self) -> impl Iterator<Item = &LinkLibrary> {
660        self.libraries.values()
661    }
662
663    /// Compute the topological sort of the callgraph rooted at `caller`
664    pub fn topological_sort_from_root(
665        &self,
666        caller: GlobalItemIndex,
667    ) -> Result<Vec<GlobalItemIndex>, CycleError> {
668        self.callgraph.toposort_caller(caller)
669    }
670
671    /// Returns a procedure index which corresponds to the provided procedure digest.
672    ///
673    /// Note that there can be many procedures with the same digest - due to having the same code,
674    /// and/or using different decorators which don't affect the MAST root. This method returns an
675    /// arbitrary one.
676    pub fn get_procedure_index_by_digest(
677        &self,
678        procedure_digest: &Word,
679    ) -> Option<GlobalItemIndex> {
680        self.procedures_by_mast_root.get(procedure_digest).map(|indices| indices[0])
681    }
682
683    /// Resolves `target` from the perspective of `caller`.
684    pub fn resolve_invoke_target(
685        &self,
686        caller: &SymbolResolutionContext,
687        target: &InvocationTarget,
688    ) -> Result<SymbolResolution, LinkerError> {
689        let resolver = SymbolResolver::new(self);
690        resolver.resolve_invoke_target(caller, target)
691    }
692
693    /// Resolves `target` from the perspective of `caller`.
694    pub fn resolve_alias_target(
695        &self,
696        caller: &SymbolResolutionContext,
697        target: &Alias,
698    ) -> Result<SymbolResolution, LinkerError> {
699        let resolver = SymbolResolver::new(self);
700        resolver.resolve_alias_target(caller, target)
701    }
702
703    /// Resolves `path` from the perspective of `caller`.
704    pub fn resolve_path(
705        &self,
706        caller: &SymbolResolutionContext,
707        path: &Path,
708    ) -> Result<SymbolResolution, LinkerError> {
709        let resolver = SymbolResolver::new(self);
710        resolver.resolve_path(caller, Span::new(caller.span, path))
711    }
712
713    /// Resolves the user-defined type signature of the given procedure to the HIR type signature
714    pub(super) fn resolve_signature(
715        &self,
716        gid: GlobalItemIndex,
717    ) -> Result<Option<Arc<types::FunctionType>>, LinkerError> {
718        match self[gid].item() {
719            SymbolItem::Compiled(ItemInfo::Procedure(proc)) => Ok(proc.signature.clone()),
720            SymbolItem::Procedure(proc) => {
721                let proc = proc.borrow();
722                match proc.signature() {
723                    Some(ty) => self.translate_function_type(gid.module, ty).map(Some),
724                    None => Ok(None),
725                }
726            },
727            SymbolItem::Alias { alias, resolved } => {
728                if let Some(resolved) = resolved.get() {
729                    return self.resolve_signature(resolved);
730                }
731
732                let context = SymbolResolutionContext {
733                    span: alias.target().span(),
734                    module: gid.module,
735                    kind: Some(InvokeKind::ProcRef),
736                };
737                let resolution = self.resolve_alias_target(&context, alias)?;
738                match resolution {
739                    // If we get back a MAST root resolution, it's a phantom digest
740                    SymbolResolution::MastRoot(_) => Ok(None),
741                    SymbolResolution::Exact { gid, .. } => self.resolve_signature(gid),
742                    SymbolResolution::Module { .. }
743                    | SymbolResolution::Local(_)
744                    | SymbolResolution::External(_) => unreachable!(),
745                }
746            },
747            SymbolItem::Compiled(_) | SymbolItem::Constant(_) | SymbolItem::Type(_) => {
748                panic!("procedure index unexpectedly refers to non-procedure item")
749            },
750        }
751    }
752
753    fn translate_function_type(
754        &self,
755        module_index: ModuleIndex,
756        ty: &ast::FunctionType,
757    ) -> Result<Arc<types::FunctionType>, LinkerError> {
758        use miden_assembly_syntax::ast::TypeResolver;
759
760        let cc = ty.cc;
761        let mut args = Vec::with_capacity(ty.args.len());
762
763        let symbol_resolver = SymbolResolver::new(self);
764        let mut cache = ResolverCache::default();
765        let mut resolver = Resolver {
766            resolver: &symbol_resolver,
767            cache: &mut cache,
768            current_module: module_index,
769        };
770        for arg in ty.args.iter() {
771            if let Some(arg) = resolver.resolve(arg)? {
772                args.push(arg);
773            } else {
774                let span = arg.span();
775                return Err(LinkerError::UndefinedType {
776                    span,
777                    source_file: self.source_manager.get(span.source_id()).ok(),
778                });
779            }
780        }
781        let mut results = Vec::with_capacity(ty.results.len());
782        for result in ty.results.iter() {
783            if let Some(result) = resolver.resolve(result)? {
784                results.push(result);
785            } else {
786                let span = result.span();
787                return Err(LinkerError::UndefinedType {
788                    span,
789                    source_file: self.source_manager.get(span.source_id()).ok(),
790                });
791            }
792        }
793        Ok(Arc::new(types::FunctionType::new(cc, args, results)))
794    }
795
796    /// Resolves a [GlobalProcedureIndex] to the known attributes of that procedure
797    pub(super) fn resolve_attributes(
798        &self,
799        gid: GlobalItemIndex,
800    ) -> Result<AttributeSet, LinkerError> {
801        match self[gid].item() {
802            SymbolItem::Compiled(ItemInfo::Procedure(proc)) => Ok(proc.attributes.clone()),
803            SymbolItem::Procedure(proc) => {
804                let proc = proc.borrow();
805                Ok(proc.attributes().clone())
806            },
807            SymbolItem::Alias { alias, resolved } => {
808                if let Some(resolved) = resolved.get() {
809                    return self.resolve_attributes(resolved);
810                }
811
812                let context = SymbolResolutionContext {
813                    span: alias.target().span(),
814                    module: gid.module,
815                    kind: Some(InvokeKind::ProcRef),
816                };
817                let resolution = self.resolve_alias_target(&context, alias)?;
818                match resolution {
819                    SymbolResolution::MastRoot(_)
820                    | SymbolResolution::Local(_)
821                    | SymbolResolution::External(_) => Ok(AttributeSet::default()),
822                    SymbolResolution::Exact { gid, .. } => self.resolve_attributes(gid),
823                    SymbolResolution::Module { .. } => {
824                        unreachable!("expected resolver to raise error")
825                    },
826                }
827            },
828            SymbolItem::Compiled(_) | SymbolItem::Constant(_) | SymbolItem::Type(_) => {
829                panic!("procedure index unexpectedly refers to non-procedure item")
830            },
831        }
832    }
833
834    /// Resolves a [GlobalItemIndex] to a concrete [ast::types::Type]
835    pub(super) fn resolve_type(
836        &self,
837        span: SourceSpan,
838        gid: GlobalItemIndex,
839    ) -> Result<types::Type, LinkerError> {
840        use miden_assembly_syntax::ast::TypeResolver;
841
842        let symbol_resolver = SymbolResolver::new(self);
843        let mut cache = ResolverCache::default();
844        let mut resolver = Resolver {
845            cache: &mut cache,
846            resolver: &symbol_resolver,
847            current_module: gid.module,
848        };
849
850        resolver.get_type(span, gid)
851    }
852
853    /// Registers a [MastNodeId] as corresponding to a given [GlobalProcedureIndex].
854    ///
855    /// # SAFETY
856    ///
857    /// It is essential that the caller _guarantee_ that the given digest belongs to the specified
858    /// procedure. It is fine if there are multiple procedures with the same digest, but it _must_
859    /// be the case that if a given digest is specified, it can be used as if it was the definition
860    /// of the referenced procedure, i.e. they are referentially transparent.
861    pub(crate) fn register_procedure_root(
862        &mut self,
863        id: GlobalItemIndex,
864        procedure_mast_root: Word,
865    ) -> Result<(), LinkerError> {
866        use alloc::collections::btree_map::Entry;
867        match self.procedures_by_mast_root.entry(procedure_mast_root) {
868            Entry::Occupied(ref mut entry) => {
869                let prev_id = entry.get()[0];
870                if prev_id != id {
871                    // Multiple procedures with the same root, but compatible
872                    entry.get_mut().push(id);
873                }
874            },
875            Entry::Vacant(entry) => {
876                entry.insert(smallvec![id]);
877            },
878        }
879
880        Ok(())
881    }
882
883    /// Resolve a [Path] to a [ModuleIndex] in this graph
884    pub fn find_module_index(&self, path: &Path) -> Option<ModuleIndex> {
885        self.modules.iter().position(|m| path == m.path()).map(ModuleIndex::new)
886    }
887
888    /// Resolve a [Path] to a [Module] in this graph
889    pub fn find_module(&self, path: &Path) -> Option<&LinkModule> {
890        self.modules.iter().find(|m| path == m.path())
891    }
892}
893
894/// Const evaluation
895impl Linker {
896    /// Evaluate `expr` to a concrete constant value, in the context of the given item.
897    pub(super) fn const_eval(
898        &self,
899        gid: GlobalItemIndex,
900        expr: &ast::ConstantExpr,
901        cache: &mut ResolverCache,
902    ) -> Result<ast::ConstantValue, LinkerError> {
903        let symbol_resolver = SymbolResolver::new(self);
904        let mut resolver = Resolver {
905            resolver: &symbol_resolver,
906            cache,
907            current_module: gid.module,
908        };
909
910        ast::constants::eval::expr(expr, &mut resolver).map(|expr| expr.expect_value())
911    }
912}
913
914impl Index<ModuleIndex> for Linker {
915    type Output = LinkModule;
916
917    fn index(&self, index: ModuleIndex) -> &Self::Output {
918        &self.modules[index.as_usize()]
919    }
920}
921
922impl Index<GlobalItemIndex> for Linker {
923    type Output = Symbol;
924
925    fn index(&self, index: GlobalItemIndex) -> &Self::Output {
926        &self.modules[index.module.as_usize()][index.index]
927    }
928}
929
930#[cfg(test)]
931mod tests {
932    use std::{
933        panic::{AssertUnwindSafe, catch_unwind},
934        string::String,
935        sync::Arc,
936    };
937
938    use miden_assembly_syntax::{
939        ast::{
940            Ident, InvocationTarget, InvokeKind, ItemIndex, Path, SymbolResolutionError,
941            Visibility, types,
942        },
943        debuginfo::{SourceSpan, Span},
944        library::{ItemInfo, TypeInfo},
945    };
946
947    use super::*;
948    use crate::testing::{TestContext, source_file};
949
950    #[test]
951    fn failed_kernel_link_restores_kernel_state() {
952        let context = TestContext::default();
953        let source_manager = context.source_manager();
954        let kernel_source = r#"
955                pub proc a
956                    call.b
957                end
958
959                proc b
960                    call.a
961                end
962                "#;
963
964        let userspace = context
965            .parse_module_with_path(
966                "userspace",
967                source_file!(
968                    &context,
969                    r#"
970                    pub proc helper
971                        push.1
972                    end
973                    "#
974                ),
975            )
976            .expect("userspace module parsing must succeed");
977
978        let mut linker = Linker::new(source_manager);
979        let userspace_index = linker
980            .link([userspace])
981            .expect("userspace module must link successfully")
982            .into_iter()
983            .next()
984            .expect("linked module index must be returned");
985
986        let first_err = linker
987            .link_kernel(
988                context
989                    .parse_kernel(source_file!(&context, kernel_source))
990                    .expect("kernel parsing must succeed"),
991            )
992            .expect_err("expected cyclic kernel to be rejected");
993
994        assert!(first_err.to_string().contains("found a cycle in the call graph"));
995        assert!(!linker.has_nonempty_kernel(), "failed kernel link must not leave a kernel set");
996        assert_eq!(linker[userspace_index].kind(), ast::ModuleKind::Library);
997
998        let second_err = linker
999            .link_kernel(
1000                context
1001                    .parse_kernel(source_file!(&context, kernel_source))
1002                    .expect("kernel parsing must succeed"),
1003            )
1004            .expect_err("expected cyclic kernel retry to be rejected");
1005        assert!(second_err.to_string().contains("found a cycle in the call graph"));
1006        assert!(!second_err.to_string().contains("duplicate module"));
1007
1008        let syscall_context = SymbolResolutionContext {
1009            span: SourceSpan::UNKNOWN,
1010            module: userspace_index,
1011            kind: Some(InvokeKind::SysCall),
1012        };
1013        let err = linker
1014            .resolve_invoke_target(
1015                &syscall_context,
1016                &InvocationTarget::Symbol(Ident::new("a").expect("valid identifier")),
1017            )
1018            .expect_err("expected syscall without a linked kernel to be rejected");
1019        assert!(matches!(err, LinkerError::InvalidSysCallTarget { .. }));
1020    }
1021
1022    #[test]
1023    fn oversized_link_module_resolution_returns_structured_error() {
1024        let context = TestContext::default();
1025        let mut linker = Linker::new(context.source_manager());
1026        let module_id = ModuleIndex::new(0);
1027        let path = Arc::<Path>::from(Path::new("::m::huge"));
1028        let mut symbols = Vec::with_capacity(ItemIndex::MAX_ITEMS + 1);
1029
1030        for i in 0..=ItemIndex::MAX_ITEMS {
1031            let name = Ident::new(format!("a{i}")).expect("valid identifier");
1032            symbols.push(Symbol::new(
1033                name.clone(),
1034                Visibility::Private,
1035                LinkStatus::Unlinked,
1036                SymbolItem::Compiled(ItemInfo::Type(TypeInfo { name, ty: types::Type::Felt })),
1037            ));
1038        }
1039
1040        linker.modules.push(
1041            LinkModule::new(
1042                module_id,
1043                ast::ModuleKind::Library,
1044                LinkStatus::Unlinked,
1045                ModuleSource::Mast,
1046                path,
1047            )
1048            .with_symbols(symbols),
1049        );
1050
1051        let result = catch_unwind(AssertUnwindSafe(|| {
1052            linker[module_id].resolve(Span::unknown("a0"), &SymbolResolver::new(&linker))
1053        }));
1054
1055        let result = match result {
1056            Ok(result) => result,
1057            Err(panic) => {
1058                let message = panic
1059                    .downcast_ref::<&str>()
1060                    .copied()
1061                    .or_else(|| panic.downcast_ref::<String>().map(String::as_str))
1062                    .expect("panic payload should be a string");
1063                panic!("expected graceful error, got panic: {message}");
1064            },
1065        };
1066
1067        assert!(matches!(
1068            result,
1069            Err(err) if matches!(*err, SymbolResolutionError::TooManyItemsInModule { .. })
1070        ));
1071    }
1072}