midenc_codegen_masm/lower/
component.rs

1use alloc::{collections::BTreeSet, sync::Arc};
2
3use miden_assembly::{LibraryPath, ast::InvocationTarget};
4use miden_assembly_syntax::parser::WordValue;
5use miden_mast_package::ProcedureName;
6use midenc_hir::{
7    CallConv, FunctionIdent, Op, SourceSpan, Span, Symbol, ValueRef, diagnostics::IntoDiagnostic,
8    dialects::builtin, pass::AnalysisManager,
9};
10use midenc_hir_analysis::analyses::LivenessAnalysis;
11use midenc_session::{
12    TargetEnv,
13    diagnostics::{Report, Spanned},
14};
15use smallvec::SmallVec;
16
17use crate::{
18    TraceEvent,
19    artifact::MasmComponent,
20    emitter::BlockEmitter,
21    linker::{LinkInfo, Linker},
22    masm,
23};
24
25/// This trait represents a conversion pass from some HIR entity to a Miden Assembly component.
26pub trait ToMasmComponent {
27    fn to_masm_component(&self, analysis_manager: AnalysisManager)
28    -> Result<MasmComponent, Report>;
29}
30
31/// 1:1 conversion from HIR component to MASM component
32impl ToMasmComponent for builtin::Component {
33    fn to_masm_component(
34        &self,
35        analysis_manager: AnalysisManager,
36    ) -> Result<MasmComponent, Report> {
37        // Get the current compiler context
38        let context = self.as_operation().context_rc();
39
40        // Run the linker for this component in order to compute its data layout
41        let link_info = Linker::default().link(self).map_err(Report::msg)?;
42
43        // Get the library path of the component
44        let component_path = link_info.component().to_library_path();
45
46        // Get the entrypoint, if specified
47        let entrypoint = match context.session().options.entrypoint.as_deref() {
48            Some(entry) => {
49                let entry_id = entry.parse::<FunctionIdent>().map_err(|_| {
50                    Report::msg(format!("invalid entrypoint identifier: '{entry}'"))
51                })?;
52                let name = masm::ProcedureName::from_raw_parts(masm::Ident::from_raw_parts(
53                    Span::new(entry_id.function.span, entry_id.function.as_str().into()),
54                ));
55
56                // Check if we're inside the synthetic "wrapper" component used for pure Rust
57                // compilation. Since the user does not know about it, their entrypoint does not
58                // include the synthetic component path. We append the user-provided path to the
59                // root component path here if needed.
60                //
61                // TODO(pauls): Narrow this to only be true if the target env is not 'rollup', we
62                // cannot currently do so because we do not have sufficient Cargo metadata yet in
63                // 'cargo miden build' to detect the target env, and we default it to 'rollup'
64                let is_wrapper = component_path.path() == "root_ns:root@1.0.0";
65                let path = if is_wrapper {
66                    component_path.clone().append_unchecked(entry_id.module)
67                } else {
68                    // We're compiling a Wasm component and the component id is included
69                    // in the entrypoint.
70                    LibraryPath::new(entry_id.module).into_diagnostic()?
71                };
72                Some(masm::InvocationTarget::AbsoluteProcedurePath { name, path })
73            }
74            None => None,
75        };
76
77        // If we have global variables or data segments, we will require a component initializer
78        // function, as well as a module to hold component-level functions such as init
79        let requires_init = link_info.has_globals() || link_info.has_data_segments();
80        let init = if requires_init {
81            Some(masm::InvocationTarget::AbsoluteProcedurePath {
82                name: masm::ProcedureName::new("init").unwrap(),
83                path: component_path,
84            })
85        } else {
86            None
87        };
88
89        // Initialize the MASM component with basic information we have already
90        let id = link_info.component().clone();
91
92        // Define the initial component modules set
93        //
94        // The top-level component module is always defined, but may be empty
95        let modules =
96            vec![Arc::new(masm::Module::new(masm::ModuleKind::Library, id.to_library_path()))];
97
98        let rodata = data_segments_to_rodata(&link_info)?;
99
100        let kernel = if matches!(context.session().options.target, TargetEnv::Rollup { .. }) {
101            Some(miden_lib::transaction::TransactionKernel::kernel())
102        } else {
103            None
104        };
105
106        // Compute the first page boundary after the end of the globals table (or reserved memory
107        // if no globals) to use as the start of the dynamic heap when the program is executed
108        let heap_base = core::cmp::max(
109            link_info.reserved_memory_bytes(),
110            link_info.globals_layout().next_page_boundary() as usize,
111        );
112        let heap_base = u32::try_from(heap_base)
113            .expect("unable to allocate dynamic heap: global table too large");
114        let stack_pointer = link_info.globals_layout().stack_pointer_offset();
115        let mut masm_component = MasmComponent {
116            id,
117            init,
118            entrypoint,
119            kernel,
120            rodata,
121            heap_base,
122            stack_pointer,
123            modules,
124        };
125        let builder = MasmComponentBuilder {
126            analysis_manager,
127            component: &mut masm_component,
128            link_info: &link_info,
129            init_body: Default::default(),
130            invoked_from_init: Default::default(),
131        };
132
133        builder.build(self)?;
134
135        Ok(masm_component)
136    }
137}
138
139fn data_segments_to_rodata(link_info: &LinkInfo) -> Result<Vec<crate::Rodata>, Report> {
140    use midenc_hir::constants::ConstantData;
141
142    use crate::data_segments::{ResolvedDataSegment, merge_data_segments};
143    let mut resolved = SmallVec::<[ResolvedDataSegment; 2]>::new();
144    for sref in link_info.segment_layout().iter() {
145        let s = sref.borrow();
146        resolved.push(ResolvedDataSegment {
147            offset: *s.offset(),
148            data: s.initializer().as_slice().to_vec(),
149            readonly: *s.readonly(),
150        });
151    }
152    Ok(match merge_data_segments(resolved).map_err(Report::msg)? {
153        None => alloc::vec::Vec::new(),
154        Some(merged) => {
155            let data = alloc::sync::Arc::new(ConstantData::from(merged.data));
156            let felts = crate::Rodata::bytes_to_elements(data.as_slice());
157            let digest = miden_core::crypto::hash::Rpo256::hash_elements(&felts);
158            alloc::vec![crate::Rodata {
159                component: link_info.component().clone(),
160                digest,
161                start: super::NativePtr::from_ptr(merged.offset),
162                data,
163            }]
164        }
165    })
166}
167
168struct MasmComponentBuilder<'a> {
169    component: &'a mut MasmComponent,
170    analysis_manager: AnalysisManager,
171    link_info: &'a LinkInfo,
172    init_body: Vec<masm::Op>,
173    invoked_from_init: BTreeSet<masm::Invoke>,
174}
175
176impl MasmComponentBuilder<'_> {
177    /// Convert the component body to Miden Assembly
178    pub fn build(mut self, component: &builtin::Component) -> Result<(), Report> {
179        use masm::{Instruction as Inst, InvocationTarget, Op};
180
181        // If a component-level init is required, emit code to initialize the heap before any other
182        // initialization code.
183        if self.component.init.is_some() {
184            let span = component.span();
185
186            // Heap metadata initialization
187            let heap_base = self.component.heap_base;
188            self.init_body.push(masm::Op::Inst(Span::new(
189                span,
190                Inst::Push(masm::Immediate::Value(Span::unknown(heap_base.into()))),
191            )));
192            let heap_init = masm::ProcedureName::new("heap_init").unwrap();
193            let memory_intrinsics = masm::LibraryPath::new("intrinsics::mem").unwrap();
194            self.init_body.push(Op::Inst(Span::new(
195                span,
196                Inst::Trace(TraceEvent::FrameStart.as_u32().into()),
197            )));
198            self.init_body.push(Op::Inst(Span::new(
199                span,
200                Inst::Exec(InvocationTarget::AbsoluteProcedurePath {
201                    name: heap_init,
202                    path: memory_intrinsics,
203                }),
204            )));
205            self.init_body
206                .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
207
208            // Data segment initialization
209            self.emit_data_segment_initialization();
210        }
211
212        // Translate component body
213        let region = component.body();
214        let block = region.entry();
215        for op in block.body() {
216            if let Some(module) = op.downcast_ref::<builtin::Module>() {
217                self.define_module(module)?;
218            } else if let Some(interface) = op.downcast_ref::<builtin::Interface>() {
219                self.define_interface(interface)?;
220            } else if let Some(function) = op.downcast_ref::<builtin::Function>() {
221                self.define_function(function)?;
222            } else {
223                panic!(
224                    "invalid component-level operation: '{}' is not supported in a component body",
225                    op.name()
226                )
227            }
228        }
229
230        // Finalize the component-level init, if required
231        if self.component.init.is_some() {
232            let module =
233                Arc::get_mut(&mut self.component.modules[0]).expect("expected unique reference");
234
235            let init_name = masm::ProcedureName::new("init").unwrap();
236            let init_body = core::mem::take(&mut self.init_body);
237            let init = masm::Procedure::new(
238                Default::default(),
239                masm::Visibility::Private,
240                init_name,
241                0,
242                masm::Block::new(component.span(), init_body),
243            );
244
245            module.define_procedure(masm::Export::Procedure(init))?;
246        } else {
247            assert!(
248                self.init_body.is_empty(),
249                "the need for an 'init' function was not expected, but code was generated for one"
250            );
251        }
252
253        Ok(())
254    }
255
256    fn define_interface(&mut self, interface: &builtin::Interface) -> Result<(), Report> {
257        let component_path = self.component.id.to_library_path();
258        let interface_path = component_path.append_unchecked(interface.name());
259        let mut masm_module =
260            Box::new(masm::Module::new(masm::ModuleKind::Library, interface_path));
261        let builder = MasmModuleBuilder {
262            module: &mut masm_module,
263            analysis_manager: self
264                .analysis_manager
265                .nest(interface.as_operation().as_operation_ref()),
266            link_info: self.link_info,
267            init_body: &mut self.init_body,
268            invoked_from_init: &mut self.invoked_from_init,
269        };
270        builder.build_from_interface(interface)?;
271
272        self.component.modules.push(Arc::from(masm_module));
273
274        Ok(())
275    }
276
277    fn define_module(&mut self, module: &builtin::Module) -> Result<(), Report> {
278        let component_path = self.component.id.to_library_path();
279        let module_path = component_path.append_unchecked(module.name());
280        let mut masm_module = Box::new(masm::Module::new(masm::ModuleKind::Library, module_path));
281        let builder = MasmModuleBuilder {
282            module: &mut masm_module,
283            analysis_manager: self.analysis_manager.nest(module.as_operation_ref()),
284            link_info: self.link_info,
285            init_body: &mut self.init_body,
286            invoked_from_init: &mut self.invoked_from_init,
287        };
288        builder.build(module)?;
289
290        self.component.modules.push(Arc::from(masm_module));
291
292        Ok(())
293    }
294
295    fn define_function(&mut self, function: &builtin::Function) -> Result<(), Report> {
296        let builder = MasmFunctionBuilder::new(function)?;
297        let procedure = builder.build(
298            function,
299            self.analysis_manager.nest(function.as_operation_ref()),
300            self.link_info,
301        )?;
302
303        let module =
304            Arc::get_mut(&mut self.component.modules[0]).expect("expected unique reference");
305        assert_eq!(
306            module.path().num_components(),
307            1,
308            "expected top-level namespace module, but one has not been defined (in '{}' of '{}')",
309            module.path(),
310            function.path()
311        );
312        module.define_procedure(masm::Export::Procedure(procedure))?;
313
314        Ok(())
315    }
316
317    /// Emit the sequence of instructions necessary to consume rodata from the advice stack and
318    /// populate the global heap with the data segments of this component, verifying that the
319    /// commitments match.
320    fn emit_data_segment_initialization(&mut self) {
321        use masm::{Instruction as Inst, InvocationTarget, Op};
322
323        // Emit data segment initialization code
324        //
325        // NOTE: This depends on the program being executed with the data for all data segments
326        // having been placed in the advice map with the same commitment and encoding used here.
327        // The program will fail to execute if this is not set up correctly.
328        let pipe_preimage_to_memory = masm::ProcedureName::new("pipe_preimage_to_memory").unwrap();
329        let std_mem = masm::LibraryPath::new("std::mem").unwrap();
330
331        let span = SourceSpan::default();
332        for rodata in self.component.rodata.iter() {
333            // Push the commitment hash (`COM`) for this data onto the operand stack
334
335            // WARNING: These two are equivalent, shouldn't this be a no-op?
336            let word = rodata.digest.as_elements();
337            let word_value = [word[0], word[1], word[2], word[3]];
338
339            self.init_body.push(Op::Inst(Span::new(
340                span,
341                Inst::Push(masm::Immediate::Value(Span::unknown(WordValue(word_value).into()))),
342            )));
343            // Move rodata from the advice map, using the commitment as key, to the advice stack
344            self.init_body
345                .push(Op::Inst(Span::new(span, Inst::SysEvent(masm::SystemEventNode::PushMapVal))));
346            // write_ptr
347            assert!(rodata.start.is_word_aligned(), "rodata segments must be word-aligned");
348            self.init_body.push(Op::Inst(Span::new(
349                span,
350                Inst::Push(masm::Immediate::Value(Span::unknown(rodata.start.addr.into()))),
351            )));
352            // num_words
353            self.init_body.push(Op::Inst(Span::new(
354                span,
355                Inst::Push(masm::Immediate::Value(Span::unknown(
356                    (rodata.size_in_words() as u32).into(),
357                ))),
358            )));
359            // [num_words, write_ptr, COM, ..] -> [write_ptr']
360            self.init_body.push(Op::Inst(Span::new(
361                span,
362                Inst::Trace(TraceEvent::FrameStart.as_u32().into()),
363            )));
364            self.init_body.push(Op::Inst(Span::new(
365                span,
366                Inst::Exec(InvocationTarget::AbsoluteProcedurePath {
367                    name: pipe_preimage_to_memory.clone(),
368                    path: std_mem.clone(),
369                }),
370            )));
371            self.init_body
372                .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
373            // drop write_ptr'
374            self.init_body.push(Op::Inst(Span::new(span, Inst::Drop)));
375        }
376    }
377}
378
379struct MasmModuleBuilder<'a> {
380    module: &'a mut masm::Module,
381    analysis_manager: AnalysisManager,
382    link_info: &'a LinkInfo,
383    init_body: &'a mut Vec<masm::Op>,
384    invoked_from_init: &'a mut BTreeSet<masm::Invoke>,
385}
386
387impl MasmModuleBuilder<'_> {
388    pub fn build(mut self, module: &builtin::Module) -> Result<(), Report> {
389        let region = module.body();
390        let block = region.entry();
391        for op in block.body() {
392            if let Some(function) = op.downcast_ref::<builtin::Function>() {
393                self.define_function(function)?;
394            } else if let Some(gv) = op.downcast_ref::<builtin::GlobalVariable>() {
395                self.emit_global_variable_initializer(gv)?;
396            } else if op.is::<builtin::Segment>() {
397                continue;
398            } else {
399                panic!(
400                    "invalid module-level operation: '{}' is not legal in a MASM module body",
401                    op.name()
402                )
403            }
404        }
405
406        Ok(())
407    }
408
409    pub fn build_from_interface(mut self, interface: &builtin::Interface) -> Result<(), Report> {
410        let region = interface.body();
411        let block = region.entry();
412        for op in block.body() {
413            if let Some(function) = op.downcast_ref::<builtin::Function>() {
414                self.define_function(function)?;
415            } else {
416                panic!(
417                    "invalid interface-level operation: '{}' is not legal in a MASM module body",
418                    op.name()
419                )
420            }
421        }
422
423        Ok(())
424    }
425
426    fn define_function(&mut self, function: &builtin::Function) -> Result<(), Report> {
427        let builder = MasmFunctionBuilder::new(function)?;
428
429        let procedure = builder.build(
430            function,
431            self.analysis_manager.nest(function.as_operation_ref()),
432            self.link_info,
433        )?;
434
435        self.module.define_procedure(masm::Export::Procedure(procedure))?;
436
437        Ok(())
438    }
439
440    fn emit_global_variable_initializer(
441        &mut self,
442        gv: &builtin::GlobalVariable,
443    ) -> Result<(), Report> {
444        // We don't emit anything for declarations
445        if gv.is_declaration() {
446            return Ok(());
447        }
448
449        // We compute liveness for global variables independently
450        let analysis_manager = self.analysis_manager.nest(gv.as_operation_ref());
451        let liveness = analysis_manager.get_analysis::<LivenessAnalysis>()?;
452
453        // Emit the initializer block
454        let initializer_region = gv.region(0);
455        let initializer_block = initializer_region.entry();
456        let mut block_emitter = BlockEmitter {
457            liveness: &liveness,
458            link_info: self.link_info,
459            invoked: self.invoked_from_init,
460            target: Default::default(),
461            stack: Default::default(),
462        };
463        block_emitter.emit_inline(&initializer_block);
464
465        // Sanity checks
466        assert_eq!(block_emitter.stack.len(), 1, "expected only global variable value on stack");
467        let return_ty = block_emitter.stack.peek().unwrap().ty();
468        assert_eq!(
469            &return_ty,
470            gv.ty(),
471            "expected initializer to return value of same type as declaration"
472        );
473
474        // Write the initialized value to the computed storage offset for this global
475        let computed_addr = self
476            .link_info
477            .globals_layout()
478            .get_computed_addr(gv.as_global_var_ref())
479            .expect("undefined global variable");
480        block_emitter.emitter().store_imm(computed_addr, gv.span());
481
482        // Extend the generated init function with the code to initialize this global
483        let mut body = core::mem::take(&mut block_emitter.target);
484        self.init_body.append(&mut body);
485
486        Ok(())
487    }
488}
489
490struct MasmFunctionBuilder {
491    span: midenc_hir::SourceSpan,
492    name: masm::ProcedureName,
493    signature: masm::FunctionType,
494    visibility: masm::Visibility,
495    num_locals: u16,
496}
497
498impl MasmFunctionBuilder {
499    pub fn new(function: &builtin::Function) -> Result<Self, Report> {
500        use midenc_hir::{Symbol, Visibility};
501
502        let name = function.name();
503        let name = masm::ProcedureName::from_raw_parts(masm::Ident::from_raw_parts(Span::new(
504            name.span,
505            name.as_str().into(),
506        )));
507        let visibility = match function.visibility() {
508            Visibility::Public => masm::Visibility::Public,
509            // TODO(pauls): Support internal visibility in MASM
510            Visibility::Internal => masm::Visibility::Public,
511            Visibility::Private => masm::Visibility::Private,
512        };
513        let locals_required = function.locals().iter().map(|ty| ty.size_in_felts()).sum::<usize>();
514        let num_locals = u16::try_from(locals_required).map_err(|_| {
515            let context = function.as_operation().context();
516            context
517                .diagnostics()
518                .diagnostic(miden_assembly::diagnostics::Severity::Error)
519                .with_message("cannot emit masm for function")
520                .with_primary_label(
521                    function.span(),
522                    "local storage exceeds procedure limit: no more than u16::MAX elements are \
523                     supported",
524                )
525                .into_report()
526        })?;
527
528        let sig = function.signature();
529        let args = sig.params.iter().map(|param| masm::TypeExpr::from(param.ty.clone())).collect();
530        let results = sig
531            .results
532            .iter()
533            .map(|result| masm::TypeExpr::from(result.ty.clone()))
534            .collect();
535        let signature = masm::FunctionType::new(sig.cc, args, results);
536
537        Ok(Self {
538            span: function.span(),
539            name,
540            signature,
541            visibility,
542            num_locals,
543        })
544    }
545
546    pub fn build(
547        self,
548        function: &builtin::Function,
549        analysis_manager: AnalysisManager,
550        link_info: &LinkInfo,
551    ) -> Result<masm::Procedure, Report> {
552        use alloc::collections::BTreeSet;
553
554        use midenc_hir_analysis::analyses::LivenessAnalysis;
555
556        log::trace!(target: "codegen", "lowering {}", function.as_operation());
557
558        let liveness = analysis_manager.get_analysis::<LivenessAnalysis>()?;
559
560        let mut invoked = BTreeSet::default();
561        let entry = function.entry_block();
562        let mut stack = crate::OperandStack::default();
563        {
564            let entry_block = entry.borrow();
565            for arg in entry_block.arguments().iter().rev().copied() {
566                stack.push(arg as ValueRef);
567            }
568        }
569        let mut emitter = BlockEmitter {
570            liveness: &liveness,
571            link_info,
572            invoked: &mut invoked,
573            target: Default::default(),
574            stack,
575        };
576
577        // For component export functions, invoke the `init` procedure first if needed.
578        // It loads the data segments and global vars into memory.
579        if function.signature().cc == CallConv::CanonLift
580            && (link_info.has_globals() || link_info.has_data_segments())
581        {
582            let component_path = link_info.component().to_library_path();
583            let init = InvocationTarget::AbsoluteProcedurePath {
584                name: ProcedureName::new("init").unwrap(),
585                path: component_path,
586            };
587            let span = SourceSpan::default();
588            // Add init call to the emitter's target before emitting the function body
589            emitter
590                .target
591                .push(masm::Op::Inst(Span::new(span, masm::Instruction::Exec(init))));
592        }
593
594        let mut body = emitter.emit(&entry.borrow());
595
596        if function.signature().cc == CallConv::CanonLift {
597            // Truncate the stack to 16 elements on exit in the component export function
598            // since it is expected to be `call`ed so it has a requirement to have
599            // no more than 16 elements on the stack when it returns.
600            // See https://0xmiden.github.io/miden-vm/user_docs/assembly/execution_contexts.html
601            // Since the VM's `drop` instruction not letting stack size go beyond the 16 elements
602            // we most likely end up with stack size > 16 elements at the end.
603            // See https://github.com/0xPolygonMiden/miden-vm/blob/c4acf49510fda9ba80f20cee1a9fb1727f410f47/processor/src/stack/mod.rs?plain=1#L226-L253
604            let truncate_stack = InvocationTarget::AbsoluteProcedurePath {
605                name: ProcedureName::new("truncate_stack").unwrap(),
606                path: masm::LibraryPath::new_from_components(
607                    masm::LibraryNamespace::new("std").unwrap(),
608                    [masm::Ident::new("sys").unwrap()],
609                ),
610            };
611            let span = SourceSpan::default();
612            body.push(masm::Op::Inst(Span::new(span, masm::Instruction::Exec(truncate_stack))));
613        }
614        let Self {
615            span,
616            name,
617            signature,
618            visibility,
619            num_locals,
620        } = self;
621
622        let mut procedure = masm::Procedure::new(span, visibility, name, num_locals, body);
623        procedure.set_signature(signature);
624        procedure.extend_invoked(invoked);
625
626        Ok(procedure)
627    }
628}