midenc_codegen_masm/
artifact.rs

1use alloc::{
2    collections::{BTreeMap, BTreeSet},
3    sync::Arc,
4};
5use core::fmt;
6
7use miden_assembly::{ast::InvocationTarget, Library};
8use miden_core::{utils::DisplayHex, Program};
9use miden_mast_package::{MastArtifact, Package, ProcedureName};
10use miden_processor::Digest;
11use midenc_hir::{constants::ConstantData, dialects::builtin, interner::Symbol};
12use midenc_session::{
13    diagnostics::{Report, SourceSpan, Span},
14    Session,
15};
16
17use crate::{lower::NativePtr, masm};
18
19pub struct MasmComponent {
20    pub id: builtin::ComponentId,
21    /// The symbol name of the component initializer function
22    ///
23    /// This function is responsible for initializing global variables and writing data segments
24    /// into memory at program startup, and at cross-context call boundaries (in callee prologue).
25    pub init: Option<masm::InvocationTarget>,
26    /// The symbol name of the program entrypoint, if this component is executable.
27    ///
28    /// If unset, it indicates that the component is a library, even if it could be made executable.
29    pub entrypoint: Option<masm::InvocationTarget>,
30    /// The kernel library to link against
31    pub kernel: Option<masm::KernelLibrary>,
32    /// The rodata segments of this component keyed by the offset of the segment
33    pub rodata: Vec<Rodata>,
34    /// The address of the start of the global heap
35    pub heap_base: u32,
36    /// The address of the `__stack_pointer` global, if such a global has been defined
37    pub stack_pointer: Option<u32>,
38    /// The set of modules in this component
39    pub modules: Vec<Arc<masm::Module>>,
40}
41
42/// Represents a read-only data segment, combined with its content digest
43#[derive(Clone, PartialEq, Eq)]
44pub struct Rodata {
45    /// The component to which this read-only data segment belongs
46    pub component: builtin::ComponentId,
47    /// The content digest computed for `data`
48    pub digest: Digest,
49    /// The address at which the data for this segment begins
50    pub start: NativePtr,
51    /// The raw binary data for this segment
52    pub data: Arc<ConstantData>,
53}
54impl fmt::Debug for Rodata {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        f.debug_struct("Rodata")
57            .field("digest", &format_args!("{}", DisplayHex::new(&self.digest.as_bytes())))
58            .field("start", &self.start)
59            .field_with("data", |f| {
60                f.debug_struct("ConstantData")
61                    .field("len", &self.data.len())
62                    .finish_non_exhaustive()
63            })
64            .finish()
65    }
66}
67impl Rodata {
68    pub fn size_in_bytes(&self) -> usize {
69        self.data.len()
70    }
71
72    pub fn size_in_felts(&self) -> usize {
73        self.data.len().next_multiple_of(4) / 4
74    }
75
76    pub fn size_in_words(&self) -> usize {
77        self.size_in_felts().next_multiple_of(4) / 4
78    }
79
80    /// Attempt to convert this rodata object to its equivalent representation in felts
81    ///
82    /// See [Self::bytes_to_elements] for more details.
83    pub fn to_elements(&self) -> Result<Vec<miden_processor::Felt>, String> {
84        Self::bytes_to_elements(self.data.as_slice())
85    }
86
87    /// Attempt to convert the given bytes to their equivalent representation in felts
88    ///
89    /// The resulting felts will be in padded out to the nearest number of words, i.e. if the data
90    /// only takes up 3 felts worth of bytes, then the resulting `Vec` will contain 4 felts, so that
91    /// the total size is a valid number of words.
92    pub fn bytes_to_elements(bytes: &[u8]) -> Result<Vec<miden_processor::Felt>, String> {
93        use miden_core::FieldElement;
94        use miden_processor::Felt;
95
96        let mut felts = Vec::with_capacity(bytes.len() / 4);
97        let mut iter = bytes.iter().copied().array_chunks::<4>();
98        felts.extend(iter.by_ref().map(|chunk| Felt::new(u32::from_le_bytes(chunk) as u64)));
99        if let Some(remainder) = iter.into_remainder() {
100            let mut chunk = [0u8; 4];
101            for (i, byte) in remainder.into_iter().enumerate() {
102                chunk[i] = byte;
103            }
104            felts.push(Felt::new(u32::from_le_bytes(chunk) as u64));
105        }
106
107        let size_in_felts = bytes.len().next_multiple_of(4) / 4;
108        let size_in_words = size_in_felts.next_multiple_of(4) / 4;
109        let padding = (size_in_words * 4).abs_diff(felts.len());
110        felts.resize(felts.len() + padding, Felt::ZERO);
111
112        Ok(felts)
113    }
114}
115
116inventory::submit! {
117    midenc_session::CompileFlag::new("test_harness")
118        .long("test-harness")
119        .action(midenc_session::FlagAction::SetTrue)
120        .help("If present, causes the code generator to emit extra code for the VM test harness")
121        .help_heading("Testing")
122}
123
124impl fmt::Display for MasmComponent {
125    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
126        use crate::intrinsics::INTRINSICS_MODULE_NAMES;
127
128        for module in self.modules.iter() {
129            // Don't print empty modules
130            //
131            // NOTE(pauls): This is a temporary workaround for the fact that component init
132            // functions require a module, and we are not yet emitting component init functions,
133            // so the generated module is empty.
134            if module.exported_procedures().next().is_none() {
135                continue;
136            }
137
138            // Skip printing the standard library modules and intrinsics
139            // modules to focus on the user-defined modules and avoid the
140            // stack overflow error when printing large programs
141            // https://github.com/0xPolygonMiden/miden-formatting/issues/4
142            let module_name = module.path().path();
143            if INTRINSICS_MODULE_NAMES.contains(&module_name.as_ref()) {
144                continue;
145            }
146            if ["std"].contains(&module.namespace().as_str()) {
147                continue;
148            } else {
149                writeln!(f, "# mod {}\n", &module_name)?;
150                writeln!(f, "{}", module)?;
151            }
152        }
153        Ok(())
154    }
155}
156
157impl MasmComponent {
158    pub fn assemble(
159        &self,
160        link_libraries: &[Arc<Library>],
161        link_packages: &BTreeMap<Symbol, Arc<Package>>,
162        session: &Session,
163    ) -> Result<MastArtifact, Report> {
164        if let Some(entrypoint) = self.entrypoint.as_ref() {
165            self.assemble_program(entrypoint, link_libraries, link_packages, session)
166                .map(MastArtifact::Executable)
167        } else {
168            self.assemble_library(link_libraries, link_packages, session)
169                .map(MastArtifact::Library)
170        }
171    }
172
173    fn assemble_program(
174        &self,
175        entrypoint: &InvocationTarget,
176        link_libraries: &[Arc<Library>],
177        _link_packages: &BTreeMap<Symbol, Arc<Package>>,
178        session: &Session,
179    ) -> Result<Arc<Program>, Report> {
180        use miden_assembly::{Assembler, CompileOptions};
181
182        let debug_mode = session.options.emit_debug_decorators();
183
184        log::debug!(
185            target: "assembly",
186            "assembling executable with entrypoint '{}' (debug_mode={})",
187            entrypoint,
188            debug_mode
189        );
190        let mut assembler =
191            Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
192
193        let mut lib_modules = BTreeSet::default();
194        // Link extra libraries
195        for library in link_libraries.iter().cloned() {
196            for module in library.module_infos() {
197                log::debug!(target: "assembly", "registering '{}' with assembler", module.path());
198                lib_modules.insert(module.path().clone());
199            }
200            assembler.add_library(library)?;
201        }
202
203        // Assemble library
204        let mut modules: Vec<Arc<masm::Module>> = self.modules.clone();
205        // Sort modules to ensure intrinsics are first since the target compiled module imports them
206        modules.sort_by_key(|m| {
207            let name = m.path().path().into_owned();
208            let is_intrinsic = crate::intrinsics::INTRINSICS_MODULE_NAMES.contains(&name.as_str());
209            (!is_intrinsic, name)
210        });
211        for module in modules.iter().cloned() {
212            if lib_modules.contains(module.path()) {
213                log::warn!(
214                    target: "assembly",
215                    "module '{}' is already registered with the assembler as library's module, \
216                     skipping",
217                    module.path()
218                );
219                continue;
220            }
221            log::debug!(target: "assembly", "adding '{}' to assembler", module.path());
222            let kind = module.kind();
223            assembler.add_module_with_options(
224                module,
225                CompileOptions {
226                    kind,
227                    warnings_as_errors: false,
228                    path: None,
229                },
230            )?;
231        }
232
233        let emit_test_harness = session.get_flag("test_harness");
234        let main = self.generate_main(entrypoint, emit_test_harness)?;
235        log::debug!(target: "assembly", "generated executable module:\n{main}");
236        let program = assembler.assemble_program(main)?;
237        let advice_map: miden_core::AdviceMap = self
238            .rodata
239            .iter()
240            .map(|rodata| {
241                rodata.to_elements().map_err(Report::msg).map(|felts| (rodata.digest, felts))
242            })
243            .try_collect()?;
244        Ok(Arc::new(program.with_advice_map(advice_map)))
245    }
246
247    fn assemble_library(
248        &self,
249        link_libraries: &[Arc<Library>],
250        _link_packages: &BTreeMap<Symbol, Arc<Package>>,
251        session: &Session,
252    ) -> Result<Arc<Library>, Report> {
253        use miden_assembly::Assembler;
254
255        let debug_mode = session.options.emit_debug_decorators();
256        log::debug!(
257            target: "assembly",
258            "assembling library of {} modules (debug_mode={})",
259            self.modules.len(),
260            debug_mode
261        );
262
263        let mut assembler =
264            Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
265
266        let mut lib_modules = Vec::new();
267        // Link extra libraries
268        for library in link_libraries.iter().cloned() {
269            for module in library.module_infos() {
270                log::debug!(target: "assembly", "registering '{}' with assembler", module.path());
271                lib_modules.push(module.path().clone());
272            }
273            assembler.add_library(library)?;
274        }
275
276        // Assemble library
277        let mut modules = Vec::with_capacity(self.modules.len());
278        for module in self.modules.iter().cloned() {
279            if lib_modules.contains(module.path()) {
280                log::warn!(
281                    target: "assembly",
282                    "module '{}' is already registered with the assembler as library's module, \
283                     skipping",
284                    module.path()
285                );
286                continue;
287            }
288            log::debug!(target: "assembly", "adding '{}' to assembler", module.path());
289            modules.push(module);
290        }
291        let lib = assembler.assemble_library(modules)?;
292        let advice_map: miden_core::AdviceMap = self
293            .rodata
294            .iter()
295            .map(|rodata| {
296                rodata.to_elements().map_err(Report::msg).map(|felts| (rodata.digest, felts))
297            })
298            .try_collect()?;
299
300        let converted_exports = recover_wasm_cm_interfaces(&lib);
301
302        // Get a reference to the library MAST, then drop the library so we can obtain a mutable
303        // reference so we can modify its advice map data
304        let mut mast_forest = lib.mast_forest().clone();
305        drop(lib);
306        {
307            let mast = Arc::get_mut(&mut mast_forest).expect("expected unique reference");
308            mast.advice_map_mut().extend(advice_map);
309        }
310
311        // Reconstruct the library with the updated MAST
312        Ok(Library::new(mast_forest, converted_exports).map(Arc::new)?)
313    }
314
315    /// Generate an executable module which when run expects the raw data segment data to be
316    /// provided on the advice stack in the same order as initialization, and the operands of
317    /// the entrypoint function on the operand stack.
318    fn generate_main(
319        &self,
320        entrypoint: &InvocationTarget,
321        emit_test_harness: bool,
322    ) -> Result<Arc<masm::Module>, Report> {
323        use masm::{Instruction as Inst, Op};
324
325        let mut exe = Box::new(masm::Module::new_executable());
326        let span = SourceSpan::default();
327        let body = {
328            let mut block = masm::Block::new(span, Vec::with_capacity(64));
329            // Invoke component initializer, if present
330            if let Some(init) = self.init.as_ref() {
331                block.push(Op::Inst(Span::new(span, Inst::Exec(init.clone()))));
332            }
333
334            // Initialize test harness, if requested
335            if emit_test_harness {
336                self.emit_test_harness(&mut block);
337            }
338
339            // Invoke the program entrypoint
340            block.push(Op::Inst(Span::new(span, Inst::Exec(entrypoint.clone()))));
341
342            // Truncate the stack to 16 elements on exit
343            let truncate_stack = InvocationTarget::AbsoluteProcedurePath {
344                name: ProcedureName::new("truncate_stack").unwrap(),
345                path: masm::LibraryPath::new_from_components(
346                    masm::LibraryNamespace::new("std").unwrap(),
347                    [masm::Ident::new("sys").unwrap()],
348                ),
349            };
350            block.push(Op::Inst(Span::new(span, Inst::Exec(truncate_stack))));
351            block
352        };
353        let start = masm::Procedure::new(
354            span,
355            masm::Visibility::Public,
356            masm::ProcedureName::main(),
357            0,
358            body,
359        );
360        exe.define_procedure(masm::Export::Procedure(start))?;
361        Ok(Arc::from(exe))
362    }
363
364    fn emit_test_harness(&self, block: &mut masm::Block) {
365        use masm::{Instruction as Inst, Op};
366
367        let span = SourceSpan::default();
368
369        let pipe_words_to_memory = masm::ProcedureName::new("pipe_words_to_memory").unwrap();
370        let std_mem = masm::LibraryPath::new("std::mem").unwrap();
371
372        // Advice Stack: [dest_ptr, num_words, ...]
373
374        // => [num_words, dest_ptr] on operand stack
375        block.push(Op::Inst(Span::new(span, Inst::AdvPush(2.into()))));
376        // => [C, B, A, dest_ptr] on operand stack
377        block.push(Op::Inst(Span::new(
378            span,
379            Inst::Exec(InvocationTarget::AbsoluteProcedurePath {
380                name: pipe_words_to_memory,
381                path: std_mem,
382            }),
383        )));
384        // Drop C, B, A
385        block.push(Op::Inst(Span::new(span, Inst::DropW)));
386        block.push(Op::Inst(Span::new(span, Inst::DropW)));
387        block.push(Op::Inst(Span::new(span, Inst::DropW)));
388        // Drop dest_ptr
389        block.push(Op::Inst(Span::new(span, Inst::Drop)));
390    }
391}
392
393/// Try to recognize Wasm CM interfaces and transform those exports to have Wasm interface encoded
394/// as module name.
395///
396/// Temporary workaround for:
397///
398/// 1. Temporary exporting multiple interfaces from the same(Wasm core) module (an interface is
399///    encoded in the function name)
400///
401/// 2. Assembler using the current module name to generate exports.
402///
403fn recover_wasm_cm_interfaces(
404    lib: &Library,
405) -> BTreeMap<masm::QualifiedProcedureName, miden_processor::MastNodeId> {
406    use crate::intrinsics::INTRINSICS_MODULE_NAMES;
407
408    let mut exports = BTreeMap::new();
409    for export in lib.exports() {
410        let export_node_id = lib.get_export_node_id(export);
411        if INTRINSICS_MODULE_NAMES.contains(&export.module.to_string().as_str())
412            || export.name.as_str().starts_with("cabi")
413        {
414            // Preserve intrinsics modules and internal Wasm CM `cabi_*` functions
415            exports.insert(export.clone(), export_node_id);
416            continue;
417        }
418
419        if let Some((component, interface)) = export.name.as_str().rsplit_once('/') {
420            // Wasm CM interface
421            let (interface, function) =
422                interface.rsplit_once('#').expect("invalid wasm component model identifier");
423
424            let mut component_parts = component.split(':').map(Arc::from);
425            let ns = masm::LibraryNamespace::User(
426                component_parts.next().expect("invalid wasm component model identifier"),
427            );
428            let component_parts = component_parts
429                .map(Span::unknown)
430                .map(masm::Ident::from_raw_parts)
431                .chain([masm::Ident::from_raw_parts(Span::unknown(Arc::from(interface)))]);
432            let path = masm::LibraryPath::new_from_components(ns, component_parts);
433            let name = masm::ProcedureName::from_raw_parts(masm::Ident::from_raw_parts(
434                Span::unknown(Arc::from(function)),
435            ));
436            let new_export = masm::QualifiedProcedureName::new(path, name);
437            exports.insert(new_export, export_node_id);
438        } else {
439            // Non-Wasm CM interface, preserve as is
440            exports.insert(export.clone(), export_node_id);
441        }
442    }
443    exports
444}