midenc_codegen_masm/masm/
program.rs

1use std::{fmt, path::Path, sync::Arc};
2
3use hir::{Signature, Symbol};
4use miden_assembly::{
5    ast::{ModuleKind, ProcedureName},
6    KernelLibrary, Library as CompiledLibrary, LibraryNamespace,
7};
8use miden_core::crypto::hash::Rpo256;
9use midenc_hir::{
10    self as hir, diagnostics::Report, DataSegmentTable, Felt, FieldElement, FunctionIdent,
11    GlobalVariableTable, Ident, SourceSpan,
12};
13use midenc_hir_analysis::GlobalVariableAnalysis;
14use midenc_session::{Emit, Session};
15
16use super::{module::Modules, *};
17use crate::packaging::Rodata;
18
19inventory::submit! {
20    midenc_session::CompileFlag::new("test_harness")
21        .long("test-harness")
22        .action(midenc_session::FlagAction::SetTrue)
23        .help("If present, causes the code generator to emit extra code for the VM test harness")
24        .help_heading("Testing")
25}
26
27/// A [Program] represents a complete set of modules which are intended to be shipped and executed
28/// together.
29#[derive(Clone)]
30pub struct Program {
31    /// The code for this program
32    library: Library,
33    /// The function identifier for the program entrypoint, if applicable
34    entrypoint: FunctionIdent,
35    /// The base address of the dynamic heap, as computed by the codegen backend
36    ///
37    /// Defaults to an offset which is two 64k pages from the start of linear memory,
38    /// or, if available, the next byte following the both the reserved linear memory region as
39    /// declared in HIR, and the global variables of the program.
40    heap_base: u32,
41}
42impl Program {
43    /// Create a new [Program] initialized from an [hir::Program].
44    ///
45    /// The resulting [Program] will have the following:
46    ///
47    /// * Data segments described by the original [hir::Program]
48    /// * The entrypoint function which will be invoked after the initialization phase of startup
49    /// * If an entrypoint is set, an executable [Module] which performs initialization and then
50    ///   invokes the entrypoint
51    ///
52    /// None of the HIR modules will have been added yet
53    pub fn from_hir(
54        program: &hir::Program,
55        globals: &GlobalVariableAnalysis<hir::Program>,
56    ) -> Result<Self, Report> {
57        let Some(entrypoint) = program.entrypoint() else {
58            return Err(Report::msg("invalid program: no entrypoint"));
59        };
60        let library = Library::from_hir(program, globals);
61
62        // Compute the first page boundary after the end of the globals table to use as the start
63        // of the dynamic heap when the program is executed
64        let heap_base = program.reserved_memory_bytes()
65            + u32::try_from(
66                program.globals().size_in_bytes().next_multiple_of(program.page_size() as usize),
67            )
68            .expect("unable to allocate dynamic heap: global table too large");
69        Ok(Self {
70            library,
71            entrypoint,
72            heap_base,
73        })
74    }
75
76    /// Get the raw [Rodata] segments for this program
77    pub fn rodatas(&self) -> &[Rodata] {
78        self.library.rodata.as_slice()
79    }
80
81    /// Link this [Program] against the given kernel during assembly
82    pub fn link_kernel(&mut self, kernel: KernelLibrary) {
83        self.library.link_kernel(kernel);
84    }
85
86    /// Link this [Program] against the given library during assembly
87    pub fn link_library(&mut self, library: CompiledLibrary) {
88        self.library.link_library(library);
89    }
90
91    /// Get the set of [CompiledLibrary] this program links against
92    pub fn link_libraries(&self) -> &[CompiledLibrary] {
93        self.library.link_libraries()
94    }
95
96    /// Generate an executable module which when run expects the raw data segment data to be
97    /// provided on the advice stack in the same order as initialization, and the operands of
98    /// the entrypoint function on the operand stack.
99    fn generate_main(&self, entrypoint: FunctionIdent, emit_test_harness: bool) -> Box<Module> {
100        let mut exe = Box::new(Module::new(LibraryNamespace::Exec.into(), ModuleKind::Executable));
101        let start_id = FunctionIdent {
102            module: Ident::with_empty_span(Symbol::intern(LibraryNamespace::EXEC_PATH)),
103            function: Ident::with_empty_span(Symbol::intern(ProcedureName::MAIN_PROC_NAME)),
104        };
105        let start_sig = Signature::new([], []);
106        let mut start = Box::new(Function::new(start_id, start_sig));
107        {
108            let body = start.body_mut();
109            // Initialize dynamic heap
110            body.push(Op::PushU32(self.heap_base), SourceSpan::default());
111            body.push(
112                Op::Exec("intrinsics::mem::heap_init".parse().unwrap()),
113                SourceSpan::default(),
114            );
115            // Initialize data segments from advice stack
116            self.emit_data_segment_initialization(body);
117            // Possibly initialize test harness
118            if emit_test_harness {
119                self.emit_test_harness(body);
120            }
121            // Invoke the program entrypoint
122            body.push(Op::Exec(entrypoint), SourceSpan::default());
123        }
124        exe.push_back(start);
125        exe
126    }
127
128    fn emit_test_harness(&self, block: &mut Block) {
129        let span = SourceSpan::default();
130
131        // Advice Stack: [dest_ptr, num_words, ...]
132        block.push(Op::AdvPush(2), span); // => [num_words, dest_ptr] on operand stack
133        block.push(Op::Exec("std::mem::pipe_words_to_memory".parse().unwrap()), span);
134        // Drop HASH
135        block.push(Op::Dropw, span);
136        // Drop dest_ptr
137        block.push(Op::Drop, span);
138    }
139
140    /// Emit the sequence of instructions necessary to consume rodata from the advice stack and
141    /// populate the global heap with the data segments of this program, verifying that the
142    /// commitments match.
143    fn emit_data_segment_initialization(&self, block: &mut Block) {
144        // Emit data segment initialization code
145        //
146        // NOTE: This depends on the program being executed with the data for all data
147        // segments having been placed in the advice map with the same commitment and
148        // encoding used here. The program will fail to execute if this is not set up
149        // correctly.
150        //
151        // TODO(pauls): To facilitate automation of this, we should emit an inputs file to
152        // disk that maps each segment to a commitment and its data encoded as binary. This
153        // can then be loaded into the advice provider during VM init.
154        let pipe_preimage_to_memory = "std::mem::pipe_preimage_to_memory".parse().unwrap();
155        for rodata in self.library.rodata.iter() {
156            let span = SourceSpan::default();
157
158            // Move rodata from advice map to advice stack
159            block.push(Op::Pushw(rodata.digest.into()), span); // COM
160            block.push(Op::AdvInjectPushMapVal, span);
161            // write_ptr
162            block.push(Op::PushU32(rodata.start.waddr), span);
163            // num_words
164            block.push(Op::PushU32(rodata.size_in_words() as u32), span);
165            // [num_words, write_ptr, COM, ..] -> [write_ptr']
166            block.push(Op::Exec(pipe_preimage_to_memory), span);
167            // drop write_ptr'
168            block.push(Op::Drop, span);
169        }
170    }
171
172    #[inline(always)]
173    pub fn entrypoint(&self) -> FunctionIdent {
174        self.entrypoint
175    }
176
177    #[inline(always)]
178    pub fn stack_pointer(&self) -> Option<u32> {
179        self.library.stack_pointer
180    }
181
182    /// Freezes this program, preventing further modifications
183    pub fn freeze(mut self: Box<Self>) -> Arc<Program> {
184        self.library.modules.freeze();
185        Arc::from(self)
186    }
187
188    /// Get an iterator over the modules in this program
189    pub fn modules(&self) -> impl Iterator<Item = &Module> + '_ {
190        self.library.modules.iter()
191    }
192
193    /// Access the frozen module tree of this program, and panic if not frozen
194    pub fn unwrap_frozen_modules(&self) -> &FrozenModuleTree {
195        self.library.unwrap_frozen_modules()
196    }
197
198    /// Insert a module into this program.
199    ///
200    /// The insertion order is not preserved - modules are ordered by name.
201    ///
202    /// NOTE: This function will panic if the program has been frozen
203    pub fn insert(&mut self, module: Box<Module>) {
204        self.library.insert(module)
205    }
206
207    /// Get a reference to a module in this program by name
208    pub fn get<Q>(&self, name: &Q) -> Option<&Module>
209    where
210        Q: ?Sized + Ord,
211        Ident: core::borrow::Borrow<Q>,
212    {
213        self.library.get(name)
214    }
215
216    /// Returns true if this program contains a [Module] named `name`
217    pub fn contains<N>(&self, name: N) -> bool
218    where
219        Ident: PartialEq<N>,
220    {
221        self.library.contains(name)
222    }
223
224    /// Write this [Program] to the given output directory.
225    pub fn write_to_directory<P: AsRef<Path>>(
226        &self,
227        path: P,
228        session: &Session,
229    ) -> std::io::Result<()> {
230        let path = path.as_ref();
231        assert!(path.is_dir());
232
233        self.library.write_to_directory(path, session)?;
234
235        let main = self.generate_main(self.entrypoint, /* test_harness= */ false);
236        main.write_to_directory(path, session)?;
237
238        Ok(())
239    }
240
241    // Assemble this program to MAST
242    pub fn assemble(&self, session: &Session) -> Result<Arc<miden_core::Program>, Report> {
243        use miden_assembly::{Assembler, CompileOptions};
244
245        let debug_mode = session.options.emit_debug_decorators();
246
247        log::debug!(
248            "assembling executable with entrypoint '{}' (debug_mode={})",
249            self.entrypoint,
250            debug_mode
251        );
252        let mut assembler =
253            Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
254
255        // Link extra libraries
256        for library in self.library.libraries.iter() {
257            if log::log_enabled!(log::Level::Debug) {
258                for module in library.module_infos() {
259                    log::debug!("registering '{}' with assembler", module.path());
260                }
261            }
262            assembler.add_library(library)?;
263        }
264
265        // Assemble library
266        for module in self.library.modules.iter() {
267            log::debug!("adding '{}' to assembler", module.id.as_str());
268            let kind = module.kind;
269            let module = module.to_ast(debug_mode).map(Box::new)?;
270            assembler.add_module_with_options(
271                module,
272                CompileOptions {
273                    kind,
274                    warnings_as_errors: false,
275                    path: None,
276                },
277            )?;
278        }
279
280        let emit_test_harness = session.get_flag("test_harness");
281        let main = self.generate_main(self.entrypoint, emit_test_harness);
282        let main = main.to_ast(debug_mode).map(Box::new)?;
283        assembler.assemble_program(main).map(Arc::new)
284    }
285
286    pub(crate) fn library(&self) -> &Library {
287        &self.library
288    }
289}
290
291impl fmt::Display for Program {
292    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293        fmt::Display::fmt(&self.library, f)
294    }
295}
296
297impl Emit for Program {
298    fn name(&self) -> Option<Symbol> {
299        None
300    }
301
302    fn output_type(&self, _mode: midenc_session::OutputMode) -> midenc_session::OutputType {
303        midenc_session::OutputType::Masm
304    }
305
306    fn write_to<W: std::io::Write>(
307        &self,
308        mut writer: W,
309        mode: midenc_session::OutputMode,
310        _session: &Session,
311    ) -> std::io::Result<()> {
312        assert_eq!(
313            mode,
314            midenc_session::OutputMode::Text,
315            "binary mode is not supported for masm ir programs"
316        );
317        writer.write_fmt(format_args!("{}\n", self))
318    }
319}
320
321/// A [Library] represents a set of modules and its dependencies, which are compiled/assembled
322/// together into a single artifact, and then linked into a [Program] for execution at a later
323/// time.
324///
325/// Modules are stored in a [Library] in a B-tree map, keyed by the module name. This is done to
326/// make accessing modules by name efficient, and to ensure a stable ordering for compiled programs
327/// when emitted as text.
328#[derive(Default, Clone)]
329pub struct Library {
330    /// The set of modules which belong to this program
331    modules: Modules,
332    /// The set of libraries to link this program against
333    libraries: Vec<CompiledLibrary>,
334    /// The kernel library to link against
335    kernel: Option<KernelLibrary>,
336    /// The rodata segments of this program keyed by the offset of the segment
337    rodata: Vec<Rodata>,
338    /// The address of the `__stack_pointer` global, if such a global has been defined
339    stack_pointer: Option<u32>,
340}
341impl Library {
342    /// Create a new, empty [Library]
343    pub fn empty() -> Self {
344        Self::default()
345    }
346
347    /// Create a new [Library] initialized from an [hir::Program].
348    ///
349    /// The resulting [Library] will have the following:
350    ///
351    /// * Data segments described by the original [hir::Program]
352    ///
353    /// None of the HIR modules will have been added yet
354    pub fn from_hir(
355        program: &hir::Program,
356        globals: &GlobalVariableAnalysis<hir::Program>,
357    ) -> Self {
358        let stack_pointer = program.globals().find("__stack_pointer".parse().unwrap());
359        let stack_pointer = if let Some(stack_pointer) = stack_pointer {
360            let global_table_offset = globals.layout().global_table_offset();
361            Some(global_table_offset + unsafe { program.globals().offset_of(stack_pointer) })
362        } else {
363            None
364        };
365        let rodata = compute_rodata(
366            globals.layout().global_table_offset(),
367            program.globals(),
368            program.segments(),
369        );
370        Self {
371            modules: Modules::default(),
372            libraries: vec![],
373            kernel: None,
374            rodata,
375            stack_pointer,
376        }
377    }
378
379    pub fn rodatas(&self) -> &[Rodata] {
380        self.rodata.as_slice()
381    }
382
383    /// Link this [Library] against the given kernel during assembly
384    pub fn link_kernel(&mut self, kernel: KernelLibrary) {
385        self.kernel = Some(kernel);
386    }
387
388    /// Link this [Library] against the given library during assembly
389    pub fn link_library(&mut self, library: CompiledLibrary) {
390        self.libraries.push(library);
391    }
392
393    /// Get the set of [CompiledLibrary] this library links against
394    pub fn link_libraries(&self) -> &[CompiledLibrary] {
395        self.libraries.as_slice()
396    }
397
398    /// Freezes this library, preventing further modifications
399    pub fn freeze(mut self: Box<Self>) -> Arc<Library> {
400        self.modules.freeze();
401        Arc::from(self)
402    }
403
404    /// Get an iterator over the modules in this library
405    pub fn modules(&self) -> impl Iterator<Item = &Module> + '_ {
406        self.modules.iter()
407    }
408
409    /// Access the frozen module tree of this library, and panic if not frozen
410    pub fn unwrap_frozen_modules(&self) -> &FrozenModuleTree {
411        match self.modules {
412            Modules::Frozen(ref modules) => modules,
413            Modules::Open(_) => panic!("expected program to be frozen"),
414        }
415    }
416
417    /// Insert a module into this library.
418    ///
419    /// The insertion order is not preserved - modules are ordered by name.
420    ///
421    /// NOTE: This function will panic if the program has been frozen
422    pub fn insert(&mut self, module: Box<Module>) {
423        self.modules.insert(module);
424    }
425
426    /// Get a reference to a module in this library by name
427    pub fn get<Q>(&self, name: &Q) -> Option<&Module>
428    where
429        Q: ?Sized + Ord,
430        Ident: core::borrow::Borrow<Q>,
431    {
432        self.modules.get(name)
433    }
434
435    /// Returns true if this library contains a [Module] named `name`
436    pub fn contains<N>(&self, name: N) -> bool
437    where
438        Ident: PartialEq<N>,
439    {
440        self.modules.iter().any(|m| m.id == name)
441    }
442
443    /// Write this [Library] to the given output directory.
444    pub fn write_to_directory<P: AsRef<Path>>(
445        &self,
446        path: P,
447        session: &Session,
448    ) -> std::io::Result<()> {
449        let path = path.as_ref();
450        assert!(path.is_dir());
451
452        for module in self.modules.iter() {
453            module.write_to_directory(path, session)?;
454        }
455
456        Ok(())
457    }
458
459    // Assemble this library to MAST
460    pub fn assemble(&self, session: &Session) -> Result<Arc<CompiledLibrary>, Report> {
461        use miden_assembly::Assembler;
462
463        let debug_mode = session.options.emit_debug_decorators();
464        log::debug!(
465            "assembling library of {} modules (debug_mode={})",
466            self.modules().count(),
467            debug_mode
468        );
469
470        let mut assembler =
471            Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
472
473        // Link extra libraries
474        for library in self.libraries.iter() {
475            if log::log_enabled!(log::Level::Debug) {
476                for module in library.module_infos() {
477                    log::debug!("registering '{}' with assembler", module.path());
478                }
479            }
480            assembler.add_library(library)?;
481        }
482
483        // Assemble library
484        let mut modules = Vec::with_capacity(self.modules.len());
485        for module in self.modules.iter() {
486            log::debug!("adding '{}' to assembler", module.id.as_str());
487            let module = module.to_ast(debug_mode).map(Box::new)?;
488            modules.push(module);
489        }
490        assembler.assemble_library(modules).map(Arc::new)
491    }
492}
493
494impl fmt::Display for Library {
495    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
496        for module in self.modules.iter() {
497            // Don't print intrinsic modules
498            if module.id.as_str().starts_with("intrinsics::") {
499                continue;
500            }
501            if ["intrinsics", "std"].contains(&module.name.namespace().as_str()) {
502                // Skip printing the standard library modules and intrinsics
503                // modules to focus on the user-defined modules and avoid the
504                // stack overflow error when printing large programs
505                // https://github.com/0xPolygonMiden/miden-formatting/issues/4
506                continue;
507            } else {
508                writeln!(f, "# mod {}\n", &module.name)?;
509                writeln!(f, "{}", module)?;
510            }
511        }
512        Ok(())
513    }
514}
515
516impl Emit for Library {
517    fn name(&self) -> Option<Symbol> {
518        None
519    }
520
521    fn output_type(&self, _mode: midenc_session::OutputMode) -> midenc_session::OutputType {
522        midenc_session::OutputType::Masm
523    }
524
525    fn write_to<W: std::io::Write>(
526        &self,
527        mut writer: W,
528        mode: midenc_session::OutputMode,
529        _session: &Session,
530    ) -> std::io::Result<()> {
531        assert_eq!(
532            mode,
533            midenc_session::OutputMode::Text,
534            "binary mode is not supported for masm ir libraries"
535        );
536        writer.write_fmt(format_args!("{}\n", self))
537    }
538}
539
540/// Compute the metadata for each non-empty rodata segment in the program.
541///
542/// This consists of the data itself, as well as a content digest, which will be used to place
543/// that data in the advice map when the program starts.
544fn compute_rodata(
545    global_table_offset: u32,
546    globals: &GlobalVariableTable,
547    segments: &DataSegmentTable,
548) -> Vec<Rodata> {
549    let mut rodatas = Vec::with_capacity(segments.iter().count() + 1);
550
551    // Convert global variable initializers to a data segment, and place it at the computed
552    // global table offset in linear memory.
553    let extra = if !globals.is_empty() {
554        let size = globals.size_in_bytes();
555        let offset = global_table_offset;
556        let mut data = vec![0; size];
557        for gv in globals.iter() {
558            if let Some(init) = gv.initializer() {
559                let offset = unsafe { globals.offset_of(gv.id()) } as usize;
560                let init = globals.get_constant(init);
561                let init_bytes = init.as_slice();
562                assert!(offset + init_bytes.len() <= data.len());
563                let dst = &mut data[offset..(offset + init_bytes.len())];
564                dst.copy_from_slice(init_bytes);
565            }
566        }
567        // Don't bother emitting anything for zeroed segments
568        if data.iter().any(|&b| b != 0) {
569            Some((size as u32, offset, Arc::new(midenc_hir::ConstantData::from(data))))
570        } else {
571            None
572        }
573    } else {
574        None
575    };
576
577    // Process all segments, ignoring zeroed segments (as Miden's memory is always zeroed)
578    for (size, offset, segment_data) in segments
579        .iter()
580        .filter_map(|segment| {
581            if segment.is_zeroed() {
582                None
583            } else {
584                Some((segment.size(), segment.offset(), segment.init()))
585            }
586        })
587        .chain(extra)
588    {
589        let base = NativePtr::from_ptr(offset);
590
591        // TODO(pauls): Do we ever have a need for data segments which are not aligned
592        // to an word boundary? If so, we need to implement that
593        // support when emitting the entry for a program
594        assert_eq!(
595            base.offset,
596            0,
597            "unsupported data segment alignment {}: must be aligned to a 32 byte boundary",
598            base.alignment()
599        );
600        assert_eq!(
601            base.index,
602            0,
603            "unsupported data segment alignment {}: must be aligned to a 32 byte boundary",
604            base.alignment()
605        );
606
607        // Compute the commitment for the data
608        let num_elements = (size.next_multiple_of(4) / 4) as usize;
609        let num_words = num_elements.next_multiple_of(4) / 4;
610        let padding = (num_words * 4).abs_diff(num_elements);
611        let mut elements = Vec::with_capacity(num_elements + padding);
612        // TODO(pauls): If the word containing the first element overlaps with the
613        // previous segment, then ensure the overlapping elements
614        // are mixed together, so that the data is preserved, and
615        // the commitment is correct
616        let mut iter = segment_data.as_slice().iter().copied().array_chunks::<4>();
617        elements.extend(iter.by_ref().map(|bytes| Felt::new(u32::from_le_bytes(bytes) as u64)));
618        if let Some(remainder) = iter.into_remainder() {
619            let mut chunk = [0u8; 4];
620            for (i, byte) in remainder.into_iter().enumerate() {
621                chunk[i] = byte;
622            }
623            elements.push(Felt::new(u32::from_le_bytes(chunk) as u64));
624        }
625        elements.resize(num_elements + padding, Felt::ZERO);
626        let digest = Rpo256::hash_elements(&elements);
627
628        log::debug!(
629            "computed commitment for data segment at offset {offset} ({size} bytes, \
630             {num_elements} elements): '{digest}'"
631        );
632
633        rodatas.push(Rodata {
634            digest,
635            start: base,
636            data: segment_data,
637        });
638    }
639
640    rodatas
641}