midenc_hir/program/
mod.rs

1mod linker;
2
3use alloc::collections::BTreeMap;
4use core::ops::{Deref, DerefMut};
5
6use intrusive_collections::RBTree;
7use miden_assembly::Library as CompiledLibrary;
8use miden_core::crypto::hash::RpoDigest;
9
10pub use self::linker::Linker;
11use crate::{
12    diagnostics::{DiagnosticsHandler, Report},
13    *,
14};
15
16/// A [Program] is a collection of [Module]s that are being compiled together as a package.
17///
18/// This is primarily used for storing/querying data which must be shared across modules:
19///
20/// * The set of global variables which will be allocated on the global heap
21/// * The set of modules and functions which have been defined
22///
23/// When translating to Miden Assembly, we need something like this to allow us to perform some
24/// basic linker tasks prior to emitting the textual MASM which will be fed to the Miden VM.
25///
26/// This structure is intended to be allocated via [std::sync::Arc], so that it can be shared
27/// across multiple threads which are emitting/compiling modules at the same time. It is designed
28/// so that individual fields are locked, rather than the structure as a whole, to minimize
29/// contention. The intuition is that, in general, changes at the [Program] level are relatively
30/// infrequent, i.e. only when declaring a new [Module], or [GlobalVariable], do we actually need to
31/// mutate the structure. In all other situations, changes are scoped at the [Module] level.
32pub struct Program {
33    /// This tree stores all of the modules being compiled as part of the current program.
34    modules: RBTree<ModuleTreeAdapter>,
35    /// The set of compiled libraries this program links against
36    libraries: BTreeMap<RpoDigest, CompiledLibrary>,
37    /// If set, this field is used to determine which function is the entrypoint for the program.
38    ///
39    /// When generating Miden Assembly, this will determine whether or not we're emitting
40    /// a program or just a collection of modules; and in the case of the former, what code
41    /// to emit in the root code block.
42    ///
43    /// If not present, but there is a function in the program with the `entrypoint` attribute,
44    /// that function will be used instead. If there are multiple functions with the `entrypoint`
45    /// attribute, and this field is `None`, the linker will raise an error.
46    entrypoint: Option<FunctionIdent>,
47    /// The page size used by this program.
48    ///
49    /// If multiple modules with different page sizes are present, the largest size is used.
50    page_size: u32,
51    /// The size (in pages) of the reserved linear memory region (starting from offset 0)
52    reserved_memory_pages: u32,
53    /// The data segments gathered from all modules in the program, and laid out in address order.
54    segments: DataSegmentTable,
55    /// The global variable table produced by linking the global variable tables of all
56    /// modules in this program. The layout of this table corresponds to the layout of
57    /// global variables in the linear memory heap at runtime.
58    globals: GlobalVariableTable,
59}
60
61impl Default for Program {
62    fn default() -> Self {
63        Self {
64            page_size: 64 * 1024,
65            reserved_memory_pages: 0,
66            modules: Default::default(),
67            libraries: Default::default(),
68            entrypoint: Default::default(),
69            segments: Default::default(),
70            globals: Default::default(),
71        }
72    }
73}
74
75impl Program {
76    /// Create a new, empty [Program].
77    #[inline(always)]
78    pub fn new() -> Self {
79        Self::default()
80    }
81
82    /// Get the default page size for this program
83    #[inline]
84    pub const fn page_size(&self) -> u32 {
85        self.page_size
86    }
87
88    /// Get the size of the reserved linear memory (in pages) region for this program
89    #[inline]
90    pub const fn reserved_memory_pages(&self) -> u32 {
91        self.reserved_memory_pages
92    }
93
94    /// Get the size of the reserved linear memory (in bytes) region for this program
95    #[inline]
96    pub const fn reserved_memory_bytes(&self) -> u32 {
97        self.reserved_memory_pages * self.page_size
98    }
99
100    /// Add to the set of libraries this [Program] will be assembled with
101    pub fn add_library(&mut self, lib: CompiledLibrary) {
102        self.libraries.insert(*lib.digest(), lib);
103    }
104
105    /// Returns true if this program has a defined entrypoint
106    pub fn has_entrypoint(&self) -> bool {
107        self.entrypoint().is_some()
108    }
109
110    /// Returns true if this program is executable.
111    ///
112    /// An executable program is one which has an entrypoint that will be called
113    /// after the program is loaded.
114    pub fn is_executable(&self) -> bool {
115        self.has_entrypoint()
116    }
117
118    /// Returns the [FunctionIdent] corresponding to the program entrypoint
119    pub fn entrypoint(&self) -> Option<FunctionIdent> {
120        self.entrypoint.or_else(|| self.modules.iter().find_map(|m| m.entrypoint()))
121    }
122
123    /// Return a reference to the module table for this program
124    pub fn modules(&self) -> &RBTree<ModuleTreeAdapter> {
125        &self.modules
126    }
127
128    /// Return a mutable reference to the module table for this program
129    pub fn modules_mut(&mut self) -> &mut RBTree<ModuleTreeAdapter> {
130        &mut self.modules
131    }
132
133    /// Return the set of libraries this program links against
134    pub fn libraries(&self) -> &BTreeMap<RpoDigest, CompiledLibrary> {
135        &self.libraries
136    }
137
138    /// Return the set of libraries this program links against as a mutable reference
139    pub fn libraries_mut(&mut self) -> &mut BTreeMap<RpoDigest, CompiledLibrary> {
140        &mut self.libraries
141    }
142
143    /// Return a reference to the data segment table for this program
144    pub fn segments(&self) -> &DataSegmentTable {
145        &self.segments
146    }
147
148    /// Get a reference to the global variable table for this program
149    pub fn globals(&self) -> &GlobalVariableTable {
150        &self.globals
151    }
152
153    /// Get a mutable reference to the global variable table for this program
154    pub fn globals_mut(&mut self) -> &mut GlobalVariableTable {
155        &mut self.globals
156    }
157
158    /// Returns true if `name` is defined in this program.
159    pub fn contains(&self, name: Ident) -> bool {
160        !self.modules.find(&name).is_null()
161    }
162
163    /// Look up the signature of a function in this program by `id`
164    pub fn signature(&self, id: &FunctionIdent) -> Option<&Signature> {
165        let module = self.modules.find(&id.module).get()?;
166        module.function(id.function).map(|f| &f.signature)
167    }
168}
169
170#[doc(hidden)]
171#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
172pub struct ProgramAnalysisKey;
173impl crate::pass::AnalysisKey for Program {
174    type Key = ProgramAnalysisKey;
175
176    fn key(&self) -> Self::Key {
177        ProgramAnalysisKey
178    }
179}
180
181/// This struct provides an ergonomic way to construct a [Program] in an imperative fashion.
182///
183/// Simply create the builder, add/build one or more modules, then call `link` to obtain a
184/// [Program].
185pub struct ProgramBuilder<'a> {
186    /// The set of HIR modules to link into the program
187    modules: BTreeMap<Ident, Box<Module>>,
188    /// The set of modules defined externally, which will be linked during assembly
189    extern_modules: BTreeMap<Ident, Vec<Ident>>,
190    /// The set of libraries we're linking against
191    libraries: BTreeMap<RpoDigest, CompiledLibrary>,
192    entry: Option<FunctionIdent>,
193    page_size: u32,
194    reserved_memory_size: u32,
195    diagnostics: &'a DiagnosticsHandler,
196}
197impl<'a> ProgramBuilder<'a> {
198    pub fn new(diagnostics: &'a DiagnosticsHandler) -> Self {
199        Self {
200            modules: Default::default(),
201            extern_modules: Default::default(),
202            libraries: Default::default(),
203            entry: None,
204            page_size: 64 * 1024,
205            reserved_memory_size: 0,
206            diagnostics,
207        }
208    }
209
210    /// Set the entrypoint for the [Program] being built.
211    #[inline]
212    pub fn with_entrypoint(mut self, id: FunctionIdent) -> Self {
213        self.entry = Some(id);
214        self
215    }
216
217    /// Add `module` to the set of modules to link into the final [Program]
218    ///
219    /// Unlike `add_module`, this function consumes the current builder state
220    /// and returns a new one, to allow for chaining builder calls together.
221    ///
222    /// Returns `Err` if a module with the same name already exists
223    pub fn with_module(mut self, module: Box<Module>) -> Result<Self, ModuleConflictError> {
224        self.add_module(module).map(|_| self)
225    }
226
227    /// Add `module` to the set of modules to link into the final [Program]
228    ///
229    /// Returns `Err` if a module with the same name already exists
230    pub fn add_module(&mut self, module: Box<Module>) -> Result<(), ModuleConflictError> {
231        let module_name = module.name;
232        if self.modules.contains_key(&module_name) || self.extern_modules.contains_key(&module_name)
233        {
234            return Err(ModuleConflictError::new(module_name));
235        }
236
237        self.page_size = core::cmp::max(self.page_size, module.page_size());
238        self.reserved_memory_size =
239            core::cmp::max(self.reserved_memory_size, module.reserved_memory_pages());
240        self.modules.insert(module_name, module);
241
242        Ok(())
243    }
244
245    /// Make the linker aware that `module` (with the given set of exports), is available to be
246    /// linked against, but is already compiled to Miden Assembly, so has no HIR representation.
247    ///
248    /// Returns `Err` if a module with the same name already exists
249    pub fn add_extern_module<E>(
250        &mut self,
251        module: Ident,
252        exports: E,
253    ) -> Result<(), ModuleConflictError>
254    where
255        E: IntoIterator<Item = Ident>,
256    {
257        if self.modules.contains_key(&module) || self.extern_modules.contains_key(&module) {
258            return Err(ModuleConflictError::new(module));
259        }
260
261        self.extern_modules.insert(module, exports.into_iter().collect());
262
263        Ok(())
264    }
265
266    /// Make the linker aware of the objects contained in the given library.
267    ///
268    /// Duplicate libraries/objects are ignored.
269    pub fn add_library(&mut self, library: CompiledLibrary) {
270        self.libraries.insert(*library.digest(), library);
271    }
272
273    /// Start building a [Module] with the given name.
274    ///
275    /// When the builder is done, the resulting [Module] will be inserted
276    /// into the set of modules to be linked into the final [Program].
277    pub fn module<S: Into<Ident>>(&mut self, name: S) -> ProgramModuleBuilder<'_, 'a> {
278        let name = name.into();
279        let module = match self.modules.remove(&name) {
280            None => Box::new(Module::new(name)),
281            Some(module) => module,
282        };
283        ProgramModuleBuilder {
284            pb: self,
285            mb: ModuleBuilder::from(module),
286        }
287    }
288
289    /// Link a [Program] from the current [ProgramBuilder] state
290    pub fn link(self) -> Result<Box<Program>, Report> {
291        let mut linker = Linker::new(self.diagnostics);
292        linker.with_page_size(self.page_size);
293        linker.reserve_memory(self.reserved_memory_size);
294
295        let entrypoint = self.entry.or_else(|| self.modules.values().find_map(|m| m.entrypoint()));
296        if let Some(entry) = entrypoint {
297            linker.with_entrypoint(entry)?;
298        }
299
300        linker.add_libraries(self.libraries.into_values());
301
302        self.extern_modules.into_iter().try_for_each(|obj| linker.add_object(obj))?;
303        self.modules.into_values().try_for_each(|obj| linker.add_object(obj))?;
304
305        linker.link()
306    }
307}
308
309/// This is used to build a [Module] from a [ProgramBuilder].
310///
311/// It is basically just a wrapper around [ModuleBuilder], but overrides two things:
312///
313/// * `build` will add the module to the [ProgramBuilder] directly, rather than returning it
314/// * `function` will delegate to [ProgramFunctionBuilder] which plays a similar role to this
315///   struct, but for [ModuleFunctionBuilder].
316pub struct ProgramModuleBuilder<'a, 'b: 'a> {
317    pb: &'a mut ProgramBuilder<'b>,
318    mb: ModuleBuilder,
319}
320impl<'a, 'b: 'a> ProgramModuleBuilder<'a, 'b> {
321    /// Start building a [Function] wwith the given name and signature.
322    pub fn function<'c, 'd: 'c, S: Into<Ident>>(
323        &'d mut self,
324        name: S,
325        signature: Signature,
326    ) -> Result<ProgramFunctionBuilder<'c, 'd>, SymbolConflictError> {
327        Ok(ProgramFunctionBuilder {
328            diagnostics: self.pb.diagnostics,
329            fb: self.mb.function(name, signature)?,
330        })
331    }
332
333    /// Build the current [Module], adding it to the [ProgramBuilder].
334    ///
335    /// Returns `err` if a module with that name already exists.
336    pub fn build(self) -> Result<(), ModuleConflictError> {
337        let pb = self.pb;
338        let mb = self.mb;
339
340        pb.add_module(mb.build())?;
341        Ok(())
342    }
343}
344impl<'a, 'b: 'a> Deref for ProgramModuleBuilder<'a, 'b> {
345    type Target = ModuleBuilder;
346
347    fn deref(&self) -> &Self::Target {
348        &self.mb
349    }
350}
351impl<'a, 'b: 'a> DerefMut for ProgramModuleBuilder<'a, 'b> {
352    fn deref_mut(&mut self) -> &mut Self::Target {
353        &mut self.mb
354    }
355}
356impl<'a, 'b: 'a> AsRef<ModuleBuilder> for ProgramModuleBuilder<'a, 'b> {
357    fn as_ref(&self) -> &ModuleBuilder {
358        &self.mb
359    }
360}
361impl<'a, 'b: 'a> AsMut<ModuleBuilder> for ProgramModuleBuilder<'a, 'b> {
362    fn as_mut(&mut self) -> &mut ModuleBuilder {
363        &mut self.mb
364    }
365}
366
367/// This is used to build a [Function] from a [ProgramModuleBuilder].
368///
369/// It is basically just a wrapper around [ModuleFunctionBuilder], but overrides
370/// `build` to use the [DiagnosticsHandler] of the parent
371/// [ProgramBuilder].
372pub struct ProgramFunctionBuilder<'a, 'b: 'a> {
373    diagnostics: &'b DiagnosticsHandler,
374    fb: ModuleFunctionBuilder<'a>,
375}
376impl<'a, 'b: 'a> ProgramFunctionBuilder<'a, 'b> {
377    /// Build the current function
378    pub fn build(self) -> Result<FunctionIdent, Report> {
379        let diagnostics = self.diagnostics;
380        self.fb.build(diagnostics)
381    }
382}
383impl<'a, 'b: 'a> Deref for ProgramFunctionBuilder<'a, 'b> {
384    type Target = ModuleFunctionBuilder<'a>;
385
386    fn deref(&self) -> &Self::Target {
387        &self.fb
388    }
389}
390impl<'a, 'b: 'a> DerefMut for ProgramFunctionBuilder<'a, 'b> {
391    fn deref_mut(&mut self) -> &mut Self::Target {
392        &mut self.fb
393    }
394}
395impl<'a, 'b: 'a> AsRef<ModuleFunctionBuilder<'a>> for ProgramFunctionBuilder<'a, 'b> {
396    fn as_ref(&self) -> &ModuleFunctionBuilder<'a> {
397        &self.fb
398    }
399}
400impl<'a, 'b: 'a> AsMut<ModuleFunctionBuilder<'a>> for ProgramFunctionBuilder<'a, 'b> {
401    fn as_mut(&mut self) -> &mut ModuleFunctionBuilder<'a> {
402        &mut self.fb
403    }
404}