Skip to main content

miden_assembly/linker/
mod.rs

1//! Assembly of a Miden Assembly project is comprised of four phases:
2//!
3//! 1. _Parsing_, where MASM sources are parsed into the AST data structure. Some light validation
4//!    is done in this phase, to catch invalid syntax, invalid immediate values (e.g. overflow), and
5//!    other simple checks that require little to no reasoning about surrounding context.
6//! 2. _Semantic analysis_, where initial validation of the AST is performed. This step catches
7//!    unused imports, references to undefined local symbols, orphaned doc comments, and other
8//!    checks that only require minimal module-local context. Initial symbol resolution is performed
9//!    here based on module-local context, as well as constant folding of expressions that can be
10//!    resolved locally. Symbols which refer to external items are unable to be fully processed as
11//!    part of this phase, and is instead left to the linking phase.
12//! 3. _Linking_, the most critical phase of compilation. During this phase, the assembler has the
13//!    full compilation graph available to it, and so this is where inter-module symbol references
14//!    are finally able to be resolved (or not, in which case appropriate errors are raised). This
15//!    is the phase where we catch cyclic references, references to undefined symbols, references to
16//!    non-public symbols from other modules, etc. Once all symbols are linked, the assembler is
17//!    free to compile all of the procedures to MAST, and generate a [crate::package::Package].
18//! 4. _Assembly_, the final phase, where all of the linked items provided to the assembler are
19//!    lowered to MAST, or to their final representations in the [crate::package::Package] produced
20//!    as the output of assembly. During this phase, it is expected that the compilation graph has
21//!    been validated by the linker, and we're simply processing the conversion to MAST.
22//!
23//! This module provides the implementation of the linker and its associated data structures. There
24//! are three primary parts:
25//!
26//! 1. The _call graph_, this is what tracks dependencies between procedures in the compilation
27//!    graph, and is used to ensure that all procedure references can be resolved to a MAST root
28//!    during final assembly.
29//! 2. The _symbol resolver_, this is what is responsible for computing symbol resolutions using
30//!    context-sensitive details about how a symbol is referenced. This context sensitivity is how
31//!    we are able to provide better diagnostics when invalid references are found. The resolver
32//!    shares part of it's implementation with the same infrastructure used for symbol resolution
33//!    that is performed during semantic analysis - the difference is that at link-time, we are
34//!    stricter about what happens when a symbol cannot be resolved correctly.
35//! 3. A set of _rewrites_, applied to symbols/modules at link-time, which rewrite the AST so that
36//!    all symbol references and constant expressions are fully resolved/folded. This is where any
37//!    final issues are discovered, and the AST is prepared for lowering to MAST.
38mod callgraph;
39mod debug;
40mod errors;
41mod library;
42mod module;
43mod namespaces;
44mod resolver;
45mod rewrites;
46mod symbols;
47
48use alloc::{boxed::Box, collections::BTreeMap, string::ToString, sync::Arc, vec::Vec};
49use core::{
50    cell::RefCell,
51    ops::{ControlFlow, Index},
52};
53
54use miden_assembly_syntax::{
55    Report,
56    ast::{
57        self, AttributeSet, GlobalItemIndex, InvocationTarget, ItemIndex, Module, ModuleIndex,
58        Path, SymbolResolution, Visibility, types,
59    },
60    debuginfo::{SourceManager, SourceSpan, Span, Spanned},
61    module::{ItemInfo, ModuleInfo},
62};
63use miden_core::{Word, advice::AdviceMap, mast::MastNodeId, program::Kernel};
64use miden_mast_package::Package as MastPackage;
65use smallvec::{SmallVec, smallvec};
66
67pub use self::{
68    callgraph::{CallGraph, CycleError},
69    errors::LinkerError,
70    library::{LinkLibrary, Linkage},
71    resolver::{ResolverCache, SymbolResolutionContext, SymbolResolver},
72    symbols::{Import, Symbol, SymbolItem},
73};
74use self::{
75    module::{LinkModule, ModuleSource},
76    namespaces::{NamespaceGraph, ResolvedImports},
77    resolver::*,
78};
79
80/// Represents the current status of a symbol in the state of the [Linker]
81#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]
82pub enum LinkStatus {
83    /// The module or item has not been visited by the linker
84    #[default]
85    Unlinked,
86    /// The module or item has been visited by the linker, but still refers to one or more
87    /// unresolved symbols.
88    PartiallyLinked,
89    /// The module or item has been visited by the linker, and is fully linked and resolved
90    Linked,
91}
92
93// LINKER
94// ================================================================================================
95
96/// The [`Linker`] is responsible for analyzing the input modules and libraries provided to the
97/// assembler, and _linking_ them together.
98///
99/// The core conceptual data structure of the linker is the _module graph_, which is implemented
100/// by a vector of module nodes, and a _call graph_, which is implemented as an adjacency matrix
101/// of item nodes and the outgoing edges from those nodes, representing references from that item
102/// to another symbol (typically as the result of procedure invocation, hence "call" graph).
103///
104/// Each item/symbol known to the linker is given a _global item index_, which is actually a pair
105/// of indices: a _module index_ (which indexes into the vector of module nodes), and an _item
106/// index_ (which indexes into the items defined by a module). These global item indices function
107/// as a unique identifier within the linker, to a specific item, and can be resolved to either the
108/// original syntax tree of the item, or to metadata about the item retrieved from previously-
109/// assembled MAST.
110///
111/// The process of linking involves two phases:
112///
113/// 1. Setting up the linker context, by providing the set of inputs to link together
114/// 2. Analyzing and rewriting the symbols known to the linker, as needed, to ensure that all symbol
115///    references are resolved to concrete definitions.
116///
117/// The assembler will call [`Self::link`] once it has provided all inputs that it wants to link,
118/// which will, when successful, return the set of module indices corresponding to the modules that
119/// comprise the public interface of the assembled artifact. The assembler then constructs the MAST
120/// starting from the exported procedures of those modules, recursively tracing the call graph
121/// based on whether or not the callee is statically or dynamically linked. In the static linking
122/// case, any procedures referenced in a statically-linked library or module will be included in
123/// the assembled artifact. In the dynamic linking case, referenced procedures are instead
124/// referenced in the assembled artifact only by their MAST root.
125#[derive(Clone)]
126pub struct Linker {
127    /// The set of libraries to link against.
128    libraries: BTreeMap<Word, LinkLibrary>,
129    /// The global set of items known to the linker
130    modules: Vec<LinkModule>,
131    /// The global call graph of calls, not counting those that are performed directly via MAST
132    /// root.
133    callgraph: CallGraph,
134    /// The set of MAST roots which have procedure definitions in this graph. There can be
135    /// multiple procedures bound to the same root due to having identical code.
136    procedures_by_mast_root: BTreeMap<Word, SmallVec<[GlobalItemIndex; 1]>>,
137    /// The index of the kernel module in `modules`, if present
138    kernel_index: Option<ModuleIndex>,
139    /// The kernel library being linked against.
140    ///
141    /// This is always provided, with an empty kernel being the default.
142    kernel: Kernel,
143    kernel_package: Option<Arc<MastPackage>>,
144    /// The source manager to use when emitting diagnostics.
145    source_manager: Arc<dyn SourceManager>,
146}
147
148// ------------------------------------------------------------------------------------------------
149/// Constructors
150impl Linker {
151    /// Instantiate a new [Linker], using the provided [SourceManager] to resolve source info.
152    pub fn new(source_manager: Arc<dyn SourceManager>) -> Self {
153        Self {
154            libraries: Default::default(),
155            modules: Default::default(),
156            callgraph: Default::default(),
157            procedures_by_mast_root: Default::default(),
158            kernel_index: None,
159            kernel: Default::default(),
160            kernel_package: None,
161            source_manager,
162        }
163    }
164
165    /// Registers `library` and all of its modules with the linker, according to its linkage
166    pub fn link_library(&mut self, library: LinkLibrary) -> Result<(), LinkerError> {
167        use alloc::collections::btree_map::Entry;
168
169        let module_infos =
170            library.module_infos().map_err(|err| LinkerError::InvalidPackageModuleSurface {
171                package: library.package.name.to_string(),
172                reason: err.to_string(),
173            })?;
174
175        match self.libraries.entry(library.mast().commitment()) {
176            Entry::Vacant(entry) => {
177                entry.insert(library.clone());
178                self.link_assembled_modules(module_infos)
179            },
180            Entry::Occupied(mut entry) => {
181                let prev = entry.get_mut();
182
183                // If the same library is linked both dynamically and statically, prefer static
184                // linking always.
185                if matches!(prev.linkage, Linkage::Dynamic) {
186                    prev.linkage = library.linkage;
187                }
188
189                Ok(())
190            },
191        }
192    }
193
194    /// Registers a set of MAST modules with the linker.
195    ///
196    /// If called directly, the modules will default to being dynamically linked. You must use
197    /// [`Self::link_library`] if you wish to statically link a set of assembled modules.
198    pub fn link_assembled_modules(
199        &mut self,
200        modules: impl IntoIterator<Item = ModuleInfo>,
201    ) -> Result<(), LinkerError> {
202        for module in modules {
203            self.link_assembled_module(module)?;
204        }
205
206        Ok(())
207    }
208
209    /// Registers a MAST module with the linker.
210    ///
211    /// If called directly, the module will default to being dynamically linked. You must use
212    /// [`Self::link_library`] if you wish to statically link `module`.
213    pub fn link_assembled_module(
214        &mut self,
215        module: ModuleInfo,
216    ) -> Result<ModuleIndex, LinkerError> {
217        log::debug!(target: "linker", "adding pre-assembled module {} to module graph", module.path());
218
219        let module_path = module.path();
220        let is_duplicate = self.find_module_index(module_path).is_some();
221        if is_duplicate {
222            return Err(LinkerError::DuplicateModule {
223                path: module_path.to_path_buf().into_boxed_path().into(),
224            });
225        }
226
227        let module_index = self.next_module_id();
228        let submodules = module.submodules().to_vec();
229        let items = module.items();
230        let mut symbols = Vec::with_capacity(items.len());
231        for (idx, item) in items {
232            let gid = module_index + idx;
233            self.callgraph.get_or_insert_node(gid);
234            match &item {
235                ItemInfo::Procedure(item) => {
236                    self.register_procedure_root(gid, item.digest);
237                },
238                ItemInfo::Constant(_) | ItemInfo::Type(_) => (),
239            }
240            symbols.push(Symbol::new(
241                item.name().clone(),
242                Visibility::Public,
243                LinkStatus::Linked,
244                SymbolItem::Compiled(item.clone()),
245            ));
246        }
247
248        let link_module = LinkModule::new(
249            module_index,
250            ast::ModuleKind::Library,
251            LinkStatus::Linked,
252            ModuleSource::Mast,
253            module_path.into(),
254        )
255        .with_submodules(submodules)
256        .with_symbols(symbols);
257
258        self.modules.push(link_module);
259        Ok(module_index)
260    }
261
262    /// Registers a set of AST modules with the linker.
263    ///
264    /// See [`Self::link_module`] for more details.
265    pub fn link_modules(
266        &mut self,
267        modules: impl IntoIterator<Item = Box<Module>>,
268    ) -> Result<Vec<ModuleIndex>, LinkerError> {
269        modules.into_iter().map(|mut m| self.link_module(&mut m)).collect()
270    }
271
272    /// Registers an AST module with the linker.
273    ///
274    /// A module provided to this method is presumed to be dynamically linked, unless specifically
275    /// handled otherwise by the assembler. In particular, the assembler will only statically link
276    /// the set of AST modules provided to [`Self::link`], as they are expected to comprise the
277    /// public interface of the assembled artifact.
278    ///
279    /// # Errors
280    ///
281    /// This operation can fail for the following reasons:
282    ///
283    /// * Module with same [Path] is in the graph already
284    /// * Too many modules in the graph
285    ///
286    /// # Panics
287    ///
288    /// This function will panic if the number of modules exceeds the maximum representable
289    /// [ModuleIndex] value, `u16::MAX`.
290    pub fn link_module(&mut self, module: &mut Module) -> Result<ModuleIndex, LinkerError> {
291        log::debug!(target: "linker", "adding unprocessed module {}", module.path());
292
293        let is_duplicate = self.find_module_index(module.path()).is_some();
294        if is_duplicate {
295            return Err(LinkerError::DuplicateModule { path: module.path().into() });
296        }
297
298        let module_index = self.next_module_id();
299        let submodules = module.submodules().to_vec();
300        let mut symbols = Vec::new();
301        let imports = module.take_imports().into_iter().map(Import::new).collect::<Vec<_>>();
302        for item in module.take_items() {
303            match item {
304                ast::Item::Type(item) => {
305                    let gid = module_index + ItemIndex::new(symbols.len());
306                    self.callgraph.get_or_insert_node(gid);
307                    symbols.push(Symbol::new(
308                        item.name().clone(),
309                        item.visibility(),
310                        LinkStatus::Unlinked,
311                        SymbolItem::Type(item),
312                    ));
313                },
314                ast::Item::Constant(item) => {
315                    let gid = module_index + ItemIndex::new(symbols.len());
316                    self.callgraph.get_or_insert_node(gid);
317                    symbols.push(Symbol::new(
318                        item.name().clone(),
319                        item.visibility,
320                        LinkStatus::Unlinked,
321                        SymbolItem::Constant(item),
322                    ));
323                },
324                ast::Item::Procedure(item) => {
325                    let gid = module_index + ItemIndex::new(symbols.len());
326                    self.callgraph.get_or_insert_node(gid);
327                    symbols.push(Symbol::new(
328                        item.name().clone().into(),
329                        item.visibility(),
330                        LinkStatus::Unlinked,
331                        SymbolItem::Procedure(RefCell::new(Box::new(item))),
332                    ));
333                },
334            }
335        }
336        let link_module = LinkModule::new(
337            module_index,
338            module.kind(),
339            LinkStatus::Unlinked,
340            ModuleSource::Ast,
341            module.path().into(),
342        )
343        .with_advice_map(module.advice_map().clone())
344        .with_submodules(submodules)
345        .with_imports(imports)
346        .with_symbols(symbols);
347
348        self.modules.push(link_module);
349        Ok(module_index)
350    }
351
352    #[inline]
353    fn next_module_id(&self) -> ModuleIndex {
354        ModuleIndex::new(self.modules.len())
355    }
356}
357
358// ------------------------------------------------------------------------------------------------
359/// Kernels
360impl Linker {
361    /// Returns a new [Linker] instantiated from the provided kernel and kernel info module.
362    ///
363    /// Note: it is assumed that kernel and kernel_module are consistent, but this is not checked.
364    pub(super) fn with_kernel(
365        source_manager: Arc<dyn SourceManager>,
366        kernel_package: Arc<MastPackage>,
367    ) -> Result<Self, Report> {
368        log::debug!(target: "linker", "instantiating linker with kernel package {}@{}", kernel_package.name, kernel_package.version);
369
370        let mut linker = Self::new(source_manager);
371        linker.link_with_kernel(kernel_package)?;
372
373        Ok(linker)
374    }
375
376    /// Add a kernel to the linker after the linker is initially constructed.
377    ///
378    /// This cannot cause any issues with modules already added to the linker (if any), as they
379    /// cannot have directly depended on the kernel, or an error would have been raised.
380    ///
381    /// This will panic if the kernel is empty, or the provided kernel module info is not valid for
382    /// a kernel.
383    pub(super) fn link_with_kernel(
384        &mut self,
385        kernel_package: Arc<MastPackage>,
386    ) -> Result<(), Report> {
387        if !kernel_package.is_kernel() {
388            return Err(Report::msg("invalid kernel package: not a kernel"));
389        }
390        let kernel = kernel_package.to_kernel()?;
391        if kernel.is_empty() {
392            return Err(Report::msg("invalid kernel package: kernel cannot be empty"));
393        }
394        assert!(self.kernel.is_empty());
395        assert!(self.kernel_package.is_none());
396
397        log::debug!(target: "linker", "modifying linker with kernel package {}@{}", kernel_package.name, kernel_package.version);
398
399        let mut kernel_index = None;
400        let module_infos = kernel_package.try_module_infos().map_err(|err| {
401            LinkerError::InvalidPackageModuleSurface {
402                package: kernel_package.name.to_string(),
403                reason: err.to_string(),
404            }
405        })?;
406        for module_info in module_infos {
407            let is_kernel_module = module_info.path().is_kernel_path();
408            let module_index = self.link_assembled_module(module_info)?;
409            if is_kernel_module {
410                kernel_index = Some(module_index);
411            }
412        }
413        assert!(kernel_index.is_some());
414
415        self.kernel_index = kernel_index;
416        self.kernel = kernel;
417        self.kernel_package = Some(kernel_package);
418
419        Ok(())
420    }
421
422    pub fn kernel(&self) -> &Kernel {
423        &self.kernel
424    }
425
426    pub fn kernel_package(&self) -> Option<Arc<MastPackage>> {
427        self.kernel_package.clone()
428    }
429
430    pub fn has_nonempty_kernel(&self) -> bool {
431        self.kernel_index.is_some() || !self.kernel.is_empty()
432    }
433}
434
435// ------------------------------------------------------------------------------------------------
436/// Analysis
437impl Linker {
438    fn cycle_error(&self, cycle: CycleError) -> LinkerError {
439        let iter = cycle.into_node_ids();
440        let mut nodes = Vec::with_capacity(iter.len());
441        for node in iter {
442            let module = self[node.module].path();
443            let item = self[node].name();
444            nodes.push(module.join(item).to_string());
445        }
446        LinkerError::Cycle { nodes: nodes.into() }
447    }
448
449    /// Links the modules in `roots` and `support` using the current state of the linker.
450    ///
451    /// Returns the module indices corresponding to the public interface of the final assembled
452    /// artifact. This is determined by tracing the modules reachable from `roots` via their public
453    /// submodules. Any module in the graph reachable this way is returned as part of the public
454    /// interface.
455    pub fn link(
456        &mut self,
457        roots: impl IntoIterator<Item = Box<Module>>,
458        support: impl IntoIterator<Item = Box<Module>>,
459    ) -> Result<Vec<ModuleIndex>, LinkerError> {
460        use alloc::collections::BTreeSet;
461
462        let root_indices = self.link_modules(roots)?;
463        let _support_indices = self.link_modules(support)?;
464        let namespaces = NamespaceGraph::build(self)?;
465        let imports = namespaces.resolve_imports(self)?;
466
467        self.link_and_rewrite(&namespaces, &imports)?;
468
469        let mut reachable = BTreeSet::new();
470
471        for root in root_indices {
472            reachable.extend(namespaces.reachable_from_root(root));
473        }
474
475        Ok(reachable.into_iter().collect())
476    }
477
478    /// Links `kernel` using the current state of the linker.
479    ///
480    /// Returns the module index of the kernel module, which is expected to provide the public
481    /// interface of the final assembled kernel.
482    ///
483    /// This differs from `link` in that we allow all AST modules in the module graph access to
484    /// kernel features, e.g. `caller`, as if they are defined by the kernel module itself.
485    pub fn link_kernel(
486        &mut self,
487        mut kernel: Box<Module>,
488        support: impl IntoIterator<Item = Box<Module>>,
489    ) -> Result<Vec<ModuleIndex>, LinkerError> {
490        self.link_modules(support)?;
491        let original_module_len = self.modules.len();
492        let original_callgraph = self.callgraph.clone();
493        let module_index = self.link_module(&mut kernel)?;
494        let original_kernel_index = self.kernel_index;
495        let original_module_kinds = self
496            .modules
497            .iter()
498            .enumerate()
499            .take(module_index.as_usize())
500            .filter(|(_, module)| matches!(module.source(), ModuleSource::Ast))
501            .map(|(module_index, module)| (module_index, module.kind()))
502            .collect::<Vec<_>>();
503
504        // Set the module kind of all pending AST modules to Kernel, as we are linking a kernel
505        for module in self.modules.iter_mut().take(module_index.as_usize()) {
506            if matches!(module.source(), ModuleSource::Ast) {
507                module.set_kind(ast::ModuleKind::Kernel);
508            }
509        }
510
511        self.kernel_index = Some(module_index);
512
513        let result = (|| {
514            let namespaces = NamespaceGraph::build(self)?;
515            let imports = namespaces.resolve_imports(self)?;
516            self.link_and_rewrite(&namespaces, &imports)?;
517
518            Ok(namespaces.reachable_from_root(module_index))
519        })();
520
521        match result {
522            ok @ Ok(_) => ok,
523            err => {
524                self.kernel_index = original_kernel_index;
525                self.callgraph = original_callgraph;
526                self.modules.truncate(original_module_len);
527                for (module_index, module_kind) in original_module_kinds {
528                    self.modules[module_index].set_kind(module_kind);
529                }
530
531                err
532            },
533        }
534    }
535
536    /// Compute the module graph from the set of pending modules, and link it, rewriting any AST
537    /// modules with unresolved, or partially-resolved, symbol references.
538    ///
539    /// This should be called any time you add more libraries or modules to the module graph, to
540    /// ensure that the graph is valid, and that there are no unresolved references. In general,
541    /// you will only instantiate the linker, build up the graph, and link a single time; but you
542    /// can re-use the linker to build multiple artifacts as well.
543    ///
544    /// When this function is called, some initial information is calculated about the AST modules
545    /// which are to be added to the graph, and then each module is visited to perform a deeper
546    /// analysis than can be done by the `sema` module, as we now have the full set of modules
547    /// available to do import resolution, and to rewrite invoke targets with their absolute paths
548    /// and/or MAST roots. A variety of issues are caught at this stage.
549    ///
550    /// Once each module is validated, the various analysis results stored as part of the graph
551    /// structure are updated to reflect that module being added to the graph. Once part of the
552    /// graph, the module becomes immutable/clone-on-write, so as to allow the graph to be
553    /// cheaply cloned.
554    ///
555    /// The final, and most important, analysis done by this function is the topological sort of
556    /// the global call graph, which contains the inter-procedural dependencies of every procedure
557    /// in the module graph. We use this sort order to do two things:
558    ///
559    /// 1. Verify that there are no static cycles in the graph that would prevent us from being able
560    ///    to hash the generated MAST of the program. NOTE: dynamic cycles, e.g. those induced by
561    ///    `dynexec`, are perfectly fine, we are only interested in preventing cycles that interfere
562    ///    with the ability to generate MAST roots.
563    ///
564    /// 2. Visit the call graph bottom-up, so that we can fully compile a procedure before any of
565    ///    its callers, and thus rewrite those callers to reference that procedure by MAST root,
566    ///    rather than by name. As a result, a compiled MAST program is like an immutable snapshot
567    ///    of the entire call graph at the time of compilation. Later, if we choose to recompile a
568    ///    subset of modules (currently we do not have support for this in the assembler API), we
569    ///    can re-analyze/re-compile only those parts of the graph which have actually changed.
570    ///
571    /// NOTE: This will return `Err` if we detect a validation error, a cycle in the graph, or an
572    /// operation not supported by the current configuration. Basically, for any reason that would
573    /// cause the resulting graph to represent an invalid program.
574    fn link_and_rewrite(
575        &mut self,
576        namespaces: &NamespaceGraph,
577        imports: &ResolvedImports,
578    ) -> Result<(), LinkerError> {
579        log::debug!(
580            target: "linker",
581            "processing {} unlinked/partially-linked modules, and recomputing module graph",
582            self.modules.iter().filter(|m| !m.is_linked()).count()
583        );
584
585        // It is acceptable for there to be no changes, but if the graph is empty and no changes
586        // are being made, we treat that as an error
587        if self.modules.is_empty() {
588            return Err(LinkerError::Empty);
589        }
590
591        // If no changes are being made, we're done
592        if self.modules.iter().all(LinkModule::is_linked) {
593            return Ok(());
594        }
595
596        // Obtain a set of resolvers for the pending modules so that we can do name resolution
597        // before they are added to the graph
598        let pending_modules = self
599            .modules
600            .iter()
601            .enumerate()
602            .filter(|(_, module)| module.is_unlinked())
603            .map(|(module_index, module)| (module_index, module.clone()))
604            .collect::<Vec<_>>();
605        let original_callgraph = self.callgraph.clone();
606
607        let result = {
608            let resolver = SymbolResolver::with_namespaces(self, namespaces, imports);
609            let mut edges = Vec::new();
610            let mut cache = ResolverCache::default();
611            let mut linked_modules = Vec::new();
612
613            for (module_index, module) in self.modules.iter().enumerate() {
614                if !module.is_unlinked() {
615                    continue;
616                }
617
618                let module_index = ModuleIndex::new(module_index);
619
620                for import in module.imports() {
621                    if let Some(namespaces::ResolvedUse::Item(gid)) =
622                        imports.get(module_index, import.local_name().as_str())
623                    {
624                        import.set_resolved(gid);
625                    }
626                }
627
628                for (symbol_idx, symbol) in module.symbols().enumerate() {
629                    let gid = module_index + ItemIndex::new(symbol_idx);
630
631                    // Perform any applicable rewrites to this item
632                    rewrites::rewrite_symbol(gid, symbol, &resolver, &mut cache)?;
633
634                    // Update the linker graph
635                    match symbol.item() {
636                        SymbolItem::Compiled(_) | SymbolItem::Type(_) | SymbolItem::Constant(_) => {
637                        },
638                        SymbolItem::Procedure(proc) => {
639                            // Add edges to all transitive dependencies of this item due to
640                            // calls/symbol refs
641                            let proc = proc.borrow();
642                            for invoke in proc.invoked() {
643                                log::debug!(target: "linker", "  | recording {} dependency on {}", invoke.kind, invoke.target);
644
645                                let context = SymbolResolutionContext {
646                                    span: invoke.span(),
647                                    module: module_index,
648                                    kind: Some(invoke.kind),
649                                };
650                                if let Some(callee) = resolver
651                                    .resolve_invoke_target(&context, &invoke.target)?
652                                    .into_global_id()
653                                {
654                                    log::debug!(
655                                        target: "linker",
656                                        "  | resolved dependency to gid {}:{}",
657                                        callee.module.as_usize(),
658                                        callee.index.as_usize()
659                                    );
660                                    edges.push((gid, callee));
661                                }
662                            }
663                        },
664                    }
665                }
666
667                linked_modules.push(module_index);
668            }
669
670            let mut callgraph = self.callgraph.clone();
671            for (caller, callee) in edges {
672                callgraph.add_edge(caller, callee).map_err(|cycle| self.cycle_error(cycle))?;
673            }
674
675            // Make sure the graph is free of cycles
676            callgraph.toposort().map_err(|cycle| self.cycle_error(cycle))?;
677
678            Ok::<_, LinkerError>((linked_modules, callgraph))
679        };
680
681        match result {
682            Ok((linked_modules, callgraph)) => {
683                self.callgraph = callgraph;
684                for module_index in linked_modules {
685                    self.modules[module_index.as_usize()].set_status(LinkStatus::Linked);
686                }
687            },
688            Err(err) => {
689                self.callgraph = original_callgraph;
690                for (module_index, module) in pending_modules {
691                    self.modules[module_index] = module;
692                }
693                return Err(err);
694            },
695        }
696
697        Ok(())
698    }
699}
700
701// ------------------------------------------------------------------------------------------------
702/// Accessors/Queries
703impl Linker {
704    /// Get an iterator over the external libraries the linker has linked against
705    pub fn libraries(&self) -> impl Iterator<Item = &LinkLibrary> {
706        self.libraries.values()
707    }
708
709    /// Compute the topological sort of the callgraph rooted at `caller`
710    pub fn topological_sort_from_root(
711        &self,
712        caller: GlobalItemIndex,
713    ) -> Result<Vec<GlobalItemIndex>, CycleError> {
714        self.callgraph.toposort_caller(caller)
715    }
716
717    /// Returns a procedure index which corresponds to the provided procedure digest.
718    ///
719    /// Note that there can be many procedures with the same digest. This method returns an
720    /// arbitrary one.
721    pub fn get_procedure_index_by_digest(
722        &self,
723        procedure_digest: &Word,
724    ) -> Option<GlobalItemIndex> {
725        self.procedures_by_mast_root.get(procedure_digest).map(|indices| indices[0])
726    }
727
728    /// Returns a conflicting export root when a dynamic library cannot identify an exact procedure
729    /// by digest alone.
730    pub fn conflicting_dynamic_procedure_export_root(
731        &self,
732        source_library_commitment: Word,
733        mast_root: Word,
734        selected_root_id: MastNodeId,
735    ) -> Option<MastNodeId> {
736        let library = self.libraries.get(&source_library_commitment)?;
737        if !matches!(library.linkage, Linkage::Dynamic) {
738            return None;
739        }
740
741        library
742            .module_infos()
743            .ok()?
744            .into_iter()
745            .flat_map(|module| {
746                module
747                    .procedures()
748                    .filter_map(|(_, proc)| {
749                        (proc.digest == mast_root).then(|| proc.source_root_id()).flatten()
750                    })
751                    .collect::<Vec<_>>()
752            })
753            .find(|&root_id| root_id != selected_root_id)
754    }
755
756    /// Resolves `target` from the perspective of `caller`.
757    pub fn resolve_invoke_target(
758        &self,
759        caller: &SymbolResolutionContext,
760        target: &InvocationTarget,
761    ) -> Result<SymbolResolution, LinkerError> {
762        let namespaces = NamespaceGraph::build(self)?;
763        let imports = namespaces.resolve_imports(self)?;
764        let resolver = SymbolResolver::with_namespaces(self, &namespaces, &imports);
765        resolver.resolve_invoke_target(caller, target)
766    }
767
768    /// Resolves `path` from the perspective of `caller`.
769    pub fn resolve_path(
770        &self,
771        caller: &SymbolResolutionContext,
772        path: &Path,
773    ) -> Result<SymbolResolution, LinkerError> {
774        let namespaces = NamespaceGraph::build(self)?;
775        let imports = namespaces.resolve_imports(self)?;
776        let resolver = SymbolResolver::with_namespaces(self, &namespaces, &imports);
777        resolver.resolve_path(caller, Span::new(caller.span, path))
778    }
779
780    /// Resolves the user-defined type signature of the given procedure to the HIR type signature
781    pub(super) fn resolve_signature(
782        &self,
783        gid: GlobalItemIndex,
784    ) -> Result<Option<Arc<types::FunctionType>>, LinkerError> {
785        match self[gid].item() {
786            SymbolItem::Compiled(ItemInfo::Procedure(proc)) => Ok(proc.signature.clone()),
787            SymbolItem::Procedure(proc) => {
788                let proc = proc.borrow();
789                match proc.signature() {
790                    Some(ty) => self.translate_function_type(gid.module, ty).map(Some),
791                    None => Ok(None),
792                }
793            },
794            SymbolItem::Compiled(_) | SymbolItem::Constant(_) | SymbolItem::Type(_) => {
795                panic!("procedure index unexpectedly refers to non-procedure item")
796            },
797        }
798    }
799
800    fn translate_function_type(
801        &self,
802        module_index: ModuleIndex,
803        ty: &ast::FunctionType,
804    ) -> Result<Arc<types::FunctionType>, LinkerError> {
805        use miden_assembly_syntax::ast::TypeResolver;
806
807        let cc = ty.cc;
808        let mut args = Vec::with_capacity(ty.args.len());
809
810        let symbol_resolver = SymbolResolver::new(self);
811        let mut cache = ResolverCache::default();
812        let mut resolver = Resolver {
813            resolver: &symbol_resolver,
814            cache: &mut cache,
815            current_module: module_index,
816        };
817        for arg in ty.args.iter() {
818            if let Some(arg) = resolver.resolve(arg)? {
819                args.push(arg);
820            } else {
821                let span = arg.span();
822                return Err(LinkerError::UndefinedType {
823                    span,
824                    source_file: self.source_manager.get(span.source_id()).ok(),
825                });
826            }
827        }
828        let mut results = Vec::with_capacity(ty.results.len());
829        for result in ty.results.iter() {
830            if let Some(result) = resolver.resolve(result)? {
831                results.push(result);
832            } else {
833                let span = result.span();
834                return Err(LinkerError::UndefinedType {
835                    span,
836                    source_file: self.source_manager.get(span.source_id()).ok(),
837                });
838            }
839        }
840        Ok(Arc::new(types::FunctionType::new(cc, args, results)))
841    }
842
843    /// Resolves a [GlobalProcedureIndex] to the known attributes of that procedure
844    pub(super) fn resolve_attributes(&self, gid: GlobalItemIndex) -> AttributeSet {
845        match self[gid].item() {
846            SymbolItem::Compiled(ItemInfo::Procedure(proc)) => proc.attributes.clone(),
847            SymbolItem::Procedure(proc) => {
848                let proc = proc.borrow();
849                proc.attributes().clone()
850            },
851            SymbolItem::Compiled(_) | SymbolItem::Constant(_) | SymbolItem::Type(_) => {
852                panic!("procedure index unexpectedly refers to non-procedure item")
853            },
854        }
855    }
856
857    /// Resolves a [GlobalItemIndex] to a concrete [ast::types::Type]
858    pub(super) fn resolve_type(
859        &self,
860        span: SourceSpan,
861        gid: GlobalItemIndex,
862    ) -> Result<types::Type, LinkerError> {
863        use miden_assembly_syntax::ast::TypeResolver;
864
865        let symbol_resolver = SymbolResolver::new(self);
866        let mut cache = ResolverCache::default();
867        let mut resolver = Resolver {
868            cache: &mut cache,
869            resolver: &symbol_resolver,
870            current_module: gid.module,
871        };
872
873        resolver.get_type(span, gid)
874    }
875
876    /// Registers a [MastNodeId] as corresponding to a given [GlobalProcedureIndex].
877    ///
878    /// # SAFETY
879    ///
880    /// It is essential that the caller _guarantee_ that the given digest belongs to the specified
881    /// procedure. It is fine if there are multiple procedures with the same digest, but it _must_
882    /// be the case that if a given digest is specified, it can be used as if it was the definition
883    /// of the referenced procedure, i.e. they are referentially transparent.
884    pub(crate) fn register_procedure_root(
885        &mut self,
886        id: GlobalItemIndex,
887        procedure_mast_root: Word,
888    ) {
889        use alloc::collections::btree_map::Entry;
890        match self.procedures_by_mast_root.entry(procedure_mast_root) {
891            Entry::Occupied(ref mut entry) => {
892                let prev_id = entry.get()[0];
893                if prev_id != id {
894                    // Multiple procedures with the same root, but compatible
895                    entry.get_mut().push(id);
896                }
897            },
898            Entry::Vacant(entry) => {
899                entry.insert(smallvec![id]);
900            },
901        }
902    }
903
904    /// Resolve a [Path] to a [ModuleIndex] in this graph
905    pub fn find_module_index(&self, path: &Path) -> Option<ModuleIndex> {
906        self.modules.iter().position(|m| path == m.path()).map(ModuleIndex::new)
907    }
908
909    /// Resolve a [Path] to a [Module] in this graph
910    pub fn find_module(&self, path: &Path) -> Option<&LinkModule> {
911        self.modules.iter().find(|m| path == m.path())
912    }
913}
914
915/// Const evaluation
916impl Linker {
917    /// Evaluate `expr` to a concrete constant value, in the context of the given item.
918    pub(super) fn const_eval(
919        &self,
920        gid: GlobalItemIndex,
921        expr: &ast::ConstantExpr,
922        cache: &mut ResolverCache,
923    ) -> Result<ast::ConstantValue, LinkerError> {
924        let symbol_resolver = SymbolResolver::new(self);
925        let mut resolver = Resolver {
926            resolver: &symbol_resolver,
927            cache,
928            current_module: gid.module,
929        };
930
931        ast::constants::eval::expr(expr, &mut resolver).map(|expr| expr.expect_value())
932    }
933}
934
935impl Index<ModuleIndex> for Linker {
936    type Output = LinkModule;
937
938    fn index(&self, index: ModuleIndex) -> &Self::Output {
939        &self.modules[index.as_usize()]
940    }
941}
942
943impl Index<GlobalItemIndex> for Linker {
944    type Output = Symbol;
945
946    fn index(&self, index: GlobalItemIndex) -> &Self::Output {
947        &self.modules[index.module.as_usize()][index.index]
948    }
949}
950
951#[cfg(test)]
952mod tests {
953    use std::{
954        panic::{AssertUnwindSafe, catch_unwind},
955        string::String,
956        sync::Arc,
957    };
958
959    use miden_assembly_syntax::{
960        ast::{
961            Ident, InvocationTarget, InvokeKind, ItemIndex, Path, SymbolResolutionError,
962            Visibility, types,
963        },
964        debuginfo::{SourceSpan, Span},
965        module::{ItemInfo, TypeInfo},
966    };
967
968    use super::*;
969    use crate::testing::{TestContext, source_file};
970
971    #[test]
972    fn failed_kernel_link_restores_kernel_state() {
973        let context = TestContext::default();
974        let source_manager = context.source_manager();
975        let kernel_source = r#"
976                pub proc a
977                    call.b
978                end
979
980                proc b
981                    call.a
982                end
983                "#;
984
985        let userspace = context
986            .parse_module(source_file!(
987                &context,
988                r#"
989                    namespace userspace
990
991                    pub proc helper
992                        push.1
993                    end
994                    "#
995            ))
996            .expect("userspace module parsing must succeed");
997
998        let mut linker = Linker::new(source_manager);
999        let userspace_index = linker
1000            .link([userspace], None)
1001            .expect("userspace module must link successfully")
1002            .into_iter()
1003            .next()
1004            .expect("linked module index must be returned");
1005
1006        let first_err = linker
1007            .link_kernel(
1008                context
1009                    .parse_kernel(source_file!(&context, kernel_source))
1010                    .expect("kernel parsing must succeed"),
1011                None,
1012            )
1013            .expect_err("expected cyclic kernel to be rejected");
1014
1015        assert!(first_err.to_string().contains("found a cycle in the call graph"));
1016        assert!(!linker.has_nonempty_kernel(), "failed kernel link must not leave a kernel set");
1017        assert_eq!(linker[userspace_index].kind(), ast::ModuleKind::Library);
1018
1019        let second_err = linker
1020            .link_kernel(
1021                context
1022                    .parse_kernel(source_file!(&context, kernel_source))
1023                    .expect("kernel parsing must succeed"),
1024                None,
1025            )
1026            .expect_err("expected cyclic kernel retry to be rejected");
1027        assert!(second_err.to_string().contains("found a cycle in the call graph"));
1028        assert!(!second_err.to_string().contains("duplicate module"));
1029
1030        let syscall_context = SymbolResolutionContext {
1031            span: SourceSpan::UNKNOWN,
1032            module: userspace_index,
1033            kind: Some(InvokeKind::SysCall),
1034        };
1035        let err = linker
1036            .resolve_invoke_target(
1037                &syscall_context,
1038                &InvocationTarget::Symbol(Ident::new("a").expect("valid identifier")),
1039            )
1040            .expect_err("expected syscall without a linked kernel to be rejected");
1041        assert!(matches!(err, LinkerError::InvalidSysCallTarget { .. }));
1042    }
1043
1044    #[test]
1045    fn oversized_link_module_resolution_returns_structured_error() {
1046        let context = TestContext::default();
1047        let mut linker = Linker::new(context.source_manager());
1048        let module_id = ModuleIndex::new(0);
1049        let path = Arc::<Path>::from(Path::new("::m::huge"));
1050        let mut symbols = Vec::with_capacity(ItemIndex::MAX_ITEMS + 1);
1051
1052        for i in 0..=ItemIndex::MAX_ITEMS {
1053            let name = Ident::new(format!("a{i}")).expect("valid identifier");
1054            symbols.push(Symbol::new(
1055                name.clone(),
1056                Visibility::Private,
1057                LinkStatus::Unlinked,
1058                SymbolItem::Compiled(ItemInfo::Type(TypeInfo { name, ty: types::Type::Felt })),
1059            ));
1060        }
1061
1062        linker.modules.push(
1063            LinkModule::new(
1064                module_id,
1065                ast::ModuleKind::Library,
1066                LinkStatus::Unlinked,
1067                ModuleSource::Mast,
1068                path,
1069            )
1070            .with_symbols(symbols),
1071        );
1072
1073        let result = catch_unwind(AssertUnwindSafe(|| {
1074            linker[module_id].resolve(Span::unknown("a0"), &SymbolResolver::new(&linker))
1075        }));
1076
1077        let result = match result {
1078            Ok(result) => result,
1079            Err(panic) => {
1080                let message = panic
1081                    .downcast_ref::<&str>()
1082                    .copied()
1083                    .or_else(|| panic.downcast_ref::<String>().map(String::as_str))
1084                    .expect("panic payload should be a string");
1085                panic!("expected graceful error, got panic: {message}");
1086            },
1087        };
1088
1089        assert!(matches!(
1090            result,
1091            Err(err) if matches!(*err, SymbolResolutionError::TooManyItemsInModule { .. })
1092        ));
1093    }
1094}