midenc_codegen_masm/lower/
component.rs

1use alloc::{collections::BTreeSet, sync::Arc};
2
3use miden_assembly::LibraryPath;
4use midenc_hir::{
5    diagnostics::IntoDiagnostic, dialects::builtin, pass::AnalysisManager, FunctionIdent, Op,
6    SourceSpan, Span, Symbol, ValueRef,
7};
8use midenc_hir_analysis::analyses::LivenessAnalysis;
9use midenc_session::{
10    diagnostics::{Report, Spanned},
11    TargetEnv,
12};
13
14use crate::{
15    artifact::MasmComponent,
16    emitter::BlockEmitter,
17    linker::{LinkInfo, Linker},
18    masm, TraceEvent,
19};
20
21/// This trait represents a conversion pass from some HIR entity to a Miden Assembly component.
22pub trait ToMasmComponent {
23    fn to_masm_component(&self, analysis_manager: AnalysisManager)
24        -> Result<MasmComponent, Report>;
25}
26
27/// 1:1 conversion from HIR component to MASM component
28impl ToMasmComponent for builtin::Component {
29    fn to_masm_component(
30        &self,
31        analysis_manager: AnalysisManager,
32    ) -> Result<MasmComponent, Report> {
33        // Get the current compiler context
34        let context = self.as_operation().context_rc();
35
36        // Run the linker for this component in order to compute its data layout
37        let link_info = Linker::default().link(self).map_err(Report::msg)?;
38
39        // Get the library path of the component
40        let component_path = link_info.component().to_library_path();
41
42        // Get the entrypoint, if specified
43        let entrypoint = match context.session().options.entrypoint.as_deref() {
44            Some(entry) => {
45                let entry_id = entry.parse::<FunctionIdent>().map_err(|_| {
46                    Report::msg(format!("invalid entrypoint identifier: '{entry}'"))
47                })?;
48                let name = masm::ProcedureName::from_raw_parts(masm::Ident::from_raw_parts(
49                    Span::new(entry_id.function.span, entry_id.function.as_str().into()),
50                ));
51
52                // Check if we're inside the synthetic "wrapper" component used for pure Rust
53                // compilation. Since the user does not know about it, their entrypoint does not
54                // include the synthetic component path. We append the user-provided path to the
55                // root component path here if needed.
56                //
57                // TODO(pauls): Narrow this to only be true if the target env is not 'rollup', we
58                // cannot currently do so because we do not have sufficient Cargo metadata yet in
59                // 'cargo miden build' to detect the target env, and we default it to 'rollup'
60                let is_wrapper = component_path.path() == "root_ns:root@1.0.0";
61                let path = if is_wrapper {
62                    component_path.clone().append_unchecked(entry_id.module)
63                } else {
64                    // We're compiling a Wasm component and the component id is included
65                    // in the entrypoint.
66                    LibraryPath::new(entry_id.module).into_diagnostic()?
67                };
68                Some(masm::InvocationTarget::AbsoluteProcedurePath { name, path })
69            }
70            None => None,
71        };
72
73        // If we have global variables or data segments, we will require a component initializer
74        // function, as well as a module to hold component-level functions such as init
75        let requires_init = link_info.has_globals() || link_info.has_data_segments();
76        let init = if requires_init {
77            Some(masm::InvocationTarget::AbsoluteProcedurePath {
78                name: masm::ProcedureName::new("init").unwrap(),
79                path: component_path,
80            })
81        } else {
82            None
83        };
84
85        // Initialize the MASM component with basic information we have already
86        let id = link_info.component().clone();
87
88        // Define the initial component modules set
89        //
90        // The top-level component module is always defined, but may be empty
91        let modules =
92            vec![Arc::new(masm::Module::new(masm::ModuleKind::Library, id.to_library_path()))];
93
94        // Compute rodata segments for the component
95        let rodata = link_info
96            .segment_layout()
97            .iter()
98            .map(|segment_ref| {
99                let segment = segment_ref.borrow();
100                let data = segment.initializer();
101                let felts = crate::Rodata::bytes_to_elements(data.as_slice())
102                    .expect("invalid data segment initializer");
103                let digest = miden_core::crypto::hash::Rpo256::hash_elements(&felts);
104                crate::Rodata {
105                    component: link_info.component().clone(),
106                    digest,
107                    start: super::NativePtr::from_ptr(*segment.offset()),
108                    data,
109                }
110            })
111            .collect();
112
113        let kernel = if matches!(context.session().options.target, TargetEnv::Rollup { .. }) {
114            Some(miden_lib::transaction::TransactionKernel::kernel())
115        } else {
116            None
117        };
118
119        // Compute the first page boundary after the end of the globals table to use as the start
120        // of the dynamic heap when the program is executed
121        let heap_base = link_info.reserved_memory_bytes()
122            + link_info.globals_layout().next_page_boundary() as usize;
123        let heap_base = u32::try_from(heap_base)
124            .expect("unable to allocate dynamic heap: global table too large");
125        let stack_pointer = link_info.globals_layout().stack_pointer_offset();
126        let mut masm_component = MasmComponent {
127            id,
128            init,
129            entrypoint,
130            kernel,
131            rodata,
132            heap_base,
133            stack_pointer,
134            modules,
135        };
136        let builder = MasmComponentBuilder {
137            analysis_manager,
138            component: &mut masm_component,
139            link_info: &link_info,
140            init_body: Default::default(),
141            invoked_from_init: Default::default(),
142        };
143
144        builder.build(self)?;
145
146        Ok(masm_component)
147    }
148}
149
150struct MasmComponentBuilder<'a> {
151    component: &'a mut MasmComponent,
152    analysis_manager: AnalysisManager,
153    link_info: &'a LinkInfo,
154    init_body: Vec<masm::Op>,
155    invoked_from_init: BTreeSet<masm::Invoke>,
156}
157
158impl MasmComponentBuilder<'_> {
159    /// Convert the component body to Miden Assembly
160    pub fn build(mut self, component: &builtin::Component) -> Result<(), Report> {
161        use masm::{Instruction as Inst, InvocationTarget, Op};
162
163        // If a component-level init is required, emit code to initialize the heap before any other
164        // initialization code.
165        if self.component.init.is_some() {
166            let span = component.span();
167
168            // Heap metadata initialization
169            let heap_base = self.component.heap_base;
170            self.init_body.push(masm::Op::Inst(Span::new(span, Inst::PushU32(heap_base))));
171            let heap_init = masm::ProcedureName::new("heap_init").unwrap();
172            let memory_intrinsics = masm::LibraryPath::new("intrinsics::mem").unwrap();
173            self.init_body.push(Op::Inst(Span::new(
174                span,
175                Inst::Trace(TraceEvent::FrameStart.as_u32().into()),
176            )));
177            self.init_body.push(Op::Inst(Span::new(
178                span,
179                Inst::Exec(InvocationTarget::AbsoluteProcedurePath {
180                    name: heap_init,
181                    path: memory_intrinsics,
182                }),
183            )));
184            self.init_body
185                .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
186
187            // Data segment initialization
188            self.emit_data_segment_initialization();
189        }
190
191        // Translate component body
192        let region = component.body();
193        let block = region.entry();
194        for op in block.body() {
195            if let Some(module) = op.downcast_ref::<builtin::Module>() {
196                self.define_module(module)?;
197            } else if let Some(interface) = op.downcast_ref::<builtin::Interface>() {
198                self.define_interface(interface)?;
199            } else if let Some(function) = op.downcast_ref::<builtin::Function>() {
200                self.define_function(function)?;
201            } else {
202                panic!(
203                    "invalid component-level operation: '{}' is not supported in a component body",
204                    op.name()
205                )
206            }
207        }
208
209        // Finalize the component-level init, if required
210        if self.component.init.is_some() {
211            let module =
212                Arc::get_mut(&mut self.component.modules[0]).expect("expected unique reference");
213
214            let init_name = masm::ProcedureName::new("init").unwrap();
215            let init_body = core::mem::take(&mut self.init_body);
216            let init = masm::Procedure::new(
217                Default::default(),
218                masm::Visibility::Public,
219                init_name,
220                0,
221                masm::Block::new(component.span(), init_body),
222            );
223
224            module.define_procedure(masm::Export::Procedure(init))?;
225        } else {
226            assert!(
227                self.init_body.is_empty(),
228                "the need for an 'init' function was not expected, but code was generated for one"
229            );
230        }
231
232        Ok(())
233    }
234
235    fn define_interface(&mut self, interface: &builtin::Interface) -> Result<(), Report> {
236        let component_path = self.component.id.to_library_path();
237        let interface_path = component_path.append_unchecked(interface.name());
238        let mut masm_module =
239            Box::new(masm::Module::new(masm::ModuleKind::Library, interface_path));
240        let builder = MasmModuleBuilder {
241            module: &mut masm_module,
242            analysis_manager: self
243                .analysis_manager
244                .nest(interface.as_operation().as_operation_ref()),
245            link_info: self.link_info,
246            init_body: &mut self.init_body,
247            invoked_from_init: &mut self.invoked_from_init,
248        };
249        builder.build_from_interface(interface)?;
250
251        self.component.modules.push(Arc::from(masm_module));
252
253        Ok(())
254    }
255
256    fn define_module(&mut self, module: &builtin::Module) -> Result<(), Report> {
257        let component_path = self.component.id.to_library_path();
258        let module_path = component_path.append_unchecked(module.name());
259        let mut masm_module = Box::new(masm::Module::new(masm::ModuleKind::Library, module_path));
260        let builder = MasmModuleBuilder {
261            module: &mut masm_module,
262            analysis_manager: self.analysis_manager.nest(module.as_operation_ref()),
263            link_info: self.link_info,
264            init_body: &mut self.init_body,
265            invoked_from_init: &mut self.invoked_from_init,
266        };
267        builder.build(module)?;
268
269        self.component.modules.push(Arc::from(masm_module));
270
271        Ok(())
272    }
273
274    fn define_function(&mut self, function: &builtin::Function) -> Result<(), Report> {
275        let builder = MasmFunctionBuilder::new(function)?;
276        let procedure = builder.build(
277            function,
278            self.analysis_manager.nest(function.as_operation_ref()),
279            self.link_info,
280        )?;
281
282        let module =
283            Arc::get_mut(&mut self.component.modules[0]).expect("expected unique reference");
284        assert_eq!(
285            module.path().num_components(),
286            1,
287            "expected top-level namespace module, but one has not been defined (in '{}' of '{}')",
288            module.path(),
289            function.path()
290        );
291        module.define_procedure(masm::Export::Procedure(procedure))?;
292
293        Ok(())
294    }
295
296    /// Emit the sequence of instructions necessary to consume rodata from the advice stack and
297    /// populate the global heap with the data segments of this component, verifying that the
298    /// commitments match.
299    fn emit_data_segment_initialization(&mut self) {
300        use masm::{Instruction as Inst, InvocationTarget, Op};
301
302        // Emit data segment initialization code
303        //
304        // NOTE: This depends on the program being executed with the data for all data segments
305        // having been placed in the advice map with the same commitment and encoding used here.
306        // The program will fail to execute if this is not set up correctly.
307        let pipe_preimage_to_memory = masm::ProcedureName::new("pipe_preimage_to_memory").unwrap();
308        let std_mem = masm::LibraryPath::new("std::mem").unwrap();
309
310        let span = SourceSpan::default();
311        for rodata in self.component.rodata.iter() {
312            // Push the commitment hash (`COM`) for this data onto the operand stack
313            self.init_body
314                .push(Op::Inst(Span::new(span, Inst::PushWord(rodata.digest.into()))));
315            // Move rodata from the advice map, using the commitment as key, to the advice stack
316            self.init_body
317                .push(Op::Inst(Span::new(span, Inst::SysEvent(masm::SystemEventNode::PushMapVal))));
318            // write_ptr
319            assert!(rodata.start.addr.is_multiple_of(4), "rodata segments must be word-aligned");
320            self.init_body.push(Op::Inst(Span::new(span, Inst::PushU32(rodata.start.addr))));
321            // num_words
322            self.init_body
323                .push(Op::Inst(Span::new(span, Inst::PushU32(rodata.size_in_words() as u32))));
324            // [num_words, write_ptr, COM, ..] -> [write_ptr']
325            self.init_body.push(Op::Inst(Span::new(
326                span,
327                Inst::Trace(TraceEvent::FrameStart.as_u32().into()),
328            )));
329            self.init_body.push(Op::Inst(Span::new(
330                span,
331                Inst::Exec(InvocationTarget::AbsoluteProcedurePath {
332                    name: pipe_preimage_to_memory.clone(),
333                    path: std_mem.clone(),
334                }),
335            )));
336            self.init_body
337                .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
338            // drop write_ptr'
339            self.init_body.push(Op::Inst(Span::new(span, Inst::Drop)));
340        }
341    }
342}
343
344struct MasmModuleBuilder<'a> {
345    module: &'a mut masm::Module,
346    analysis_manager: AnalysisManager,
347    link_info: &'a LinkInfo,
348    init_body: &'a mut Vec<masm::Op>,
349    invoked_from_init: &'a mut BTreeSet<masm::Invoke>,
350}
351
352impl MasmModuleBuilder<'_> {
353    pub fn build(mut self, module: &builtin::Module) -> Result<(), Report> {
354        let region = module.body();
355        let block = region.entry();
356        for op in block.body() {
357            if let Some(function) = op.downcast_ref::<builtin::Function>() {
358                self.define_function(function)?;
359            } else if let Some(gv) = op.downcast_ref::<builtin::GlobalVariable>() {
360                self.emit_global_variable_initializer(gv)?;
361            } else if op.is::<builtin::Segment>() {
362                continue;
363            } else {
364                panic!(
365                    "invalid module-level operation: '{}' is not legal in a MASM module body",
366                    op.name()
367                )
368            }
369        }
370
371        Ok(())
372    }
373
374    pub fn build_from_interface(mut self, interface: &builtin::Interface) -> Result<(), Report> {
375        let region = interface.body();
376        let block = region.entry();
377        for op in block.body() {
378            if let Some(function) = op.downcast_ref::<builtin::Function>() {
379                self.define_function(function)?;
380            } else {
381                panic!(
382                    "invalid interface-level operation: '{}' is not legal in a MASM module body",
383                    op.name()
384                )
385            }
386        }
387
388        Ok(())
389    }
390
391    fn define_function(&mut self, function: &builtin::Function) -> Result<(), Report> {
392        let builder = MasmFunctionBuilder::new(function)?;
393
394        let procedure = builder.build(
395            function,
396            self.analysis_manager.nest(function.as_operation_ref()),
397            self.link_info,
398        )?;
399
400        self.module.define_procedure(masm::Export::Procedure(procedure))?;
401
402        Ok(())
403    }
404
405    fn emit_global_variable_initializer(
406        &mut self,
407        gv: &builtin::GlobalVariable,
408    ) -> Result<(), Report> {
409        // We don't emit anything for declarations
410        if gv.is_declaration() {
411            return Ok(());
412        }
413
414        // We compute liveness for global variables independently
415        let analysis_manager = self.analysis_manager.nest(gv.as_operation_ref());
416        let liveness = analysis_manager.get_analysis::<LivenessAnalysis>()?;
417
418        // Emit the initializer block
419        let initializer_region = gv.region(0);
420        let initializer_block = initializer_region.entry();
421        let mut block_emitter = BlockEmitter {
422            liveness: &liveness,
423            link_info: self.link_info,
424            invoked: self.invoked_from_init,
425            target: Default::default(),
426            stack: Default::default(),
427        };
428        block_emitter.emit_inline(&initializer_block);
429
430        // Sanity checks
431        assert_eq!(block_emitter.stack.len(), 1, "expected only global variable value on stack");
432        let return_ty = block_emitter.stack.peek().unwrap().ty();
433        assert_eq!(
434            &return_ty,
435            gv.ty(),
436            "expected initializer to return value of same type as declaration"
437        );
438
439        // Write the initialized value to the computed storage offset for this global
440        let computed_addr = self
441            .link_info
442            .globals_layout()
443            .get_computed_addr(gv.as_global_var_ref())
444            .expect("undefined global variable");
445        block_emitter.emitter().store_imm(computed_addr, gv.span());
446
447        // Extend the generated init function with the code to initialize this global
448        let mut body = core::mem::take(&mut block_emitter.target);
449        self.init_body.append(&mut body);
450
451        Ok(())
452    }
453}
454
455struct MasmFunctionBuilder {
456    span: midenc_hir::SourceSpan,
457    name: masm::ProcedureName,
458    visibility: masm::Visibility,
459    num_locals: u16,
460}
461
462impl MasmFunctionBuilder {
463    pub fn new(function: &builtin::Function) -> Result<Self, Report> {
464        use midenc_hir::{Symbol, Visibility};
465
466        let name = function.name();
467        let name = masm::ProcedureName::from_raw_parts(masm::Ident::from_raw_parts(Span::new(
468            name.span,
469            name.as_str().into(),
470        )));
471        let visibility = match function.visibility() {
472            Visibility::Public => masm::Visibility::Public,
473            // TODO(pauls): Support internal visibility in MASM
474            Visibility::Internal => masm::Visibility::Public,
475            Visibility::Private => masm::Visibility::Private,
476        };
477        let locals_required = function.locals().iter().map(|ty| ty.size_in_felts()).sum::<usize>();
478        let num_locals = u16::try_from(locals_required).map_err(|_| {
479            let context = function.as_operation().context();
480            context
481                .diagnostics()
482                .diagnostic(miden_assembly::diagnostics::Severity::Error)
483                .with_message("cannot emit masm for function")
484                .with_primary_label(
485                    function.span(),
486                    "local storage exceeds procedure limit: no more than u16::MAX elements are \
487                     supported",
488                )
489                .into_report()
490        })?;
491
492        Ok(Self {
493            span: function.span(),
494            name,
495            visibility,
496            num_locals,
497        })
498    }
499
500    pub fn build(
501        self,
502        function: &builtin::Function,
503        analysis_manager: AnalysisManager,
504        link_info: &LinkInfo,
505    ) -> Result<masm::Procedure, Report> {
506        use alloc::collections::BTreeSet;
507
508        use midenc_hir_analysis::analyses::LivenessAnalysis;
509
510        log::trace!(target: "codegen", "lowering {}", function.as_operation());
511
512        let liveness = analysis_manager.get_analysis::<LivenessAnalysis>()?;
513
514        let mut invoked = BTreeSet::default();
515        let entry = function.entry_block();
516        let mut stack = crate::OperandStack::default();
517        {
518            let entry_block = entry.borrow();
519            for arg in entry_block.arguments().iter().rev().copied() {
520                stack.push(arg as ValueRef);
521            }
522        }
523        let emitter = BlockEmitter {
524            liveness: &liveness,
525            link_info,
526            invoked: &mut invoked,
527            target: Default::default(),
528            stack,
529        };
530
531        let body = emitter.emit(&entry.borrow());
532
533        let Self {
534            span,
535            name,
536            visibility,
537            num_locals,
538        } = self;
539
540        let mut procedure = masm::Procedure::new(span, visibility, name, num_locals, body);
541
542        procedure.extend_invoked(invoked);
543
544        Ok(procedure)
545    }
546}