midenc_codegen_masm/
artifact.rs

1use alloc::{
2    collections::{BTreeMap, BTreeSet},
3    sync::Arc,
4};
5use core::fmt;
6
7use miden_assembly::{ast::InvocationTarget, Library};
8use miden_core::{utils::DisplayHex, Program};
9use miden_mast_package::{MastArtifact, Package, ProcedureName};
10use miden_processor::Digest;
11use midenc_hir::{constants::ConstantData, dialects::builtin, interner::Symbol};
12use midenc_session::{
13    diagnostics::{Report, SourceSpan, Span},
14    Session,
15};
16
17use crate::{lower::NativePtr, masm, TraceEvent};
18
19pub struct MasmComponent {
20    pub id: builtin::ComponentId,
21    /// The symbol name of the component initializer function
22    ///
23    /// This function is responsible for initializing global variables and writing data segments
24    /// into memory at program startup, and at cross-context call boundaries (in callee prologue).
25    pub init: Option<masm::InvocationTarget>,
26    /// The symbol name of the program entrypoint, if this component is executable.
27    ///
28    /// If unset, it indicates that the component is a library, even if it could be made executable.
29    pub entrypoint: Option<masm::InvocationTarget>,
30    /// The kernel library to link against
31    pub kernel: Option<masm::KernelLibrary>,
32    /// The rodata segments of this component keyed by the offset of the segment
33    pub rodata: Vec<Rodata>,
34    /// The address of the start of the global heap
35    pub heap_base: u32,
36    /// The address of the `__stack_pointer` global, if such a global has been defined
37    pub stack_pointer: Option<u32>,
38    /// The set of modules in this component
39    pub modules: Vec<Arc<masm::Module>>,
40}
41
42/// Represents a read-only data segment, combined with its content digest
43#[derive(Clone, PartialEq, Eq)]
44pub struct Rodata {
45    /// The component to which this read-only data segment belongs
46    pub component: builtin::ComponentId,
47    /// The content digest computed for `data`
48    pub digest: Digest,
49    /// The address at which the data for this segment begins
50    pub start: NativePtr,
51    /// The raw binary data for this segment
52    pub data: Arc<ConstantData>,
53}
54impl fmt::Debug for Rodata {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        f.debug_struct("Rodata")
57            .field("digest", &format_args!("{}", DisplayHex::new(&self.digest.as_bytes())))
58            .field("start", &self.start)
59            .field_with("data", |f| {
60                f.debug_struct("ConstantData")
61                    .field("len", &self.data.len())
62                    .finish_non_exhaustive()
63            })
64            .finish()
65    }
66}
67impl Rodata {
68    pub fn size_in_bytes(&self) -> usize {
69        self.data.len()
70    }
71
72    pub fn size_in_felts(&self) -> usize {
73        self.data.len().next_multiple_of(4) / 4
74    }
75
76    pub fn size_in_words(&self) -> usize {
77        self.size_in_felts().next_multiple_of(4) / 4
78    }
79
80    /// Attempt to convert this rodata object to its equivalent representation in felts
81    ///
82    /// See [Self::bytes_to_elements] for more details.
83    pub fn to_elements(&self) -> Result<Vec<miden_processor::Felt>, String> {
84        Self::bytes_to_elements(self.data.as_slice())
85    }
86
87    /// Attempt to convert the given bytes to their equivalent representation in felts
88    ///
89    /// The resulting felts will be in padded out to the nearest number of words, i.e. if the data
90    /// only takes up 3 felts worth of bytes, then the resulting `Vec` will contain 4 felts, so that
91    /// the total size is a valid number of words.
92    pub fn bytes_to_elements(bytes: &[u8]) -> Result<Vec<miden_processor::Felt>, String> {
93        use miden_core::FieldElement;
94        use miden_processor::Felt;
95
96        let mut felts = Vec::with_capacity(bytes.len() / 4);
97        let mut iter = bytes.iter().copied().array_chunks::<4>();
98        felts.extend(iter.by_ref().map(|chunk| Felt::new(u32::from_le_bytes(chunk) as u64)));
99        if let Some(remainder) = iter.into_remainder() {
100            let mut chunk = [0u8; 4];
101            for (i, byte) in remainder.into_iter().enumerate() {
102                chunk[i] = byte;
103            }
104            felts.push(Felt::new(u32::from_le_bytes(chunk) as u64));
105        }
106
107        let size_in_felts = bytes.len().next_multiple_of(4) / 4;
108        let size_in_words = size_in_felts.next_multiple_of(4) / 4;
109        let padding = (size_in_words * 4).abs_diff(felts.len());
110        felts.resize(felts.len() + padding, Felt::ZERO);
111
112        Ok(felts)
113    }
114}
115
116inventory::submit! {
117    midenc_session::CompileFlag::new("test_harness")
118        .long("test-harness")
119        .action(midenc_session::FlagAction::SetTrue)
120        .help("If present, causes the code generator to emit extra code for the VM test harness")
121        .help_heading("Testing")
122}
123
124impl fmt::Display for MasmComponent {
125    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
126        use crate::intrinsics::INTRINSICS_MODULE_NAMES;
127
128        for module in self.modules.iter() {
129            // Don't print empty modules
130            //
131            // NOTE(pauls): This is a temporary workaround for the fact that component init
132            // functions require a module, and we are not yet emitting component init functions,
133            // so the generated module is empty.
134            if module.exported_procedures().next().is_none() {
135                continue;
136            }
137
138            // Skip printing the standard library modules and intrinsics
139            // modules to focus on the user-defined modules and avoid the
140            // stack overflow error when printing large programs
141            // https://github.com/0xMiden/miden-formatting/issues/4
142            let module_name = module.path().path();
143            if INTRINSICS_MODULE_NAMES.contains(&module_name.as_ref()) {
144                continue;
145            }
146            if ["std"].contains(&module.namespace().as_str()) {
147                continue;
148            } else {
149                writeln!(f, "# mod {}\n", &module_name)?;
150                writeln!(f, "{}", module)?;
151            }
152        }
153        Ok(())
154    }
155}
156
157impl MasmComponent {
158    pub fn assemble(
159        &self,
160        link_libraries: &[Arc<Library>],
161        link_packages: &BTreeMap<Symbol, Arc<Package>>,
162        session: &Session,
163    ) -> Result<MastArtifact, Report> {
164        if let Some(entrypoint) = self.entrypoint.as_ref() {
165            self.assemble_program(entrypoint, link_libraries, link_packages, session)
166                .map(MastArtifact::Executable)
167        } else {
168            self.assemble_library(link_libraries, link_packages, session)
169                .map(MastArtifact::Library)
170        }
171    }
172
173    fn assemble_program(
174        &self,
175        entrypoint: &InvocationTarget,
176        link_libraries: &[Arc<Library>],
177        _link_packages: &BTreeMap<Symbol, Arc<Package>>,
178        session: &Session,
179    ) -> Result<Arc<Program>, Report> {
180        use miden_assembly::{Assembler, CompileOptions};
181
182        let debug_mode = session.options.emit_debug_decorators();
183
184        log::debug!(
185            target: "assembly",
186            "assembling executable with entrypoint '{}' (debug_mode={})",
187            entrypoint,
188            debug_mode
189        );
190        let mut assembler =
191            Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
192
193        let mut lib_modules = BTreeSet::default();
194        // Link extra libraries
195        for library in link_libraries.iter().cloned() {
196            for module in library.module_infos() {
197                log::debug!(target: "assembly", "registering '{}' with assembler", module.path());
198                lib_modules.insert(module.path().clone());
199            }
200            assembler.add_library(library)?;
201        }
202
203        // Assemble library
204        let mut modules: Vec<Arc<masm::Module>> = self.modules.clone();
205
206        // We need to add modules according to their dependencies (add the dependency before the dependent)
207        // Workaround until https://github.com/0xMiden/miden-vm/issues/1669 is implemented
208        modules.reverse();
209
210        log::debug!(target: "assembly", "start adding the following modules with assembler: {}",
211            modules.iter().map(|m| m.path().to_string()).collect::<Vec<_>>().join(", "));
212
213        for module in modules.iter().cloned() {
214            if lib_modules.contains(module.path()) {
215                log::warn!(
216                    target: "assembly",
217                    "module '{}' is already registered with the assembler as library's module, \
218                     skipping",
219                    module.path()
220                );
221                continue;
222            }
223            log::debug!(target: "assembly", "adding '{}' to assembler", module.path());
224            let kind = module.kind();
225            assembler.add_module_with_options(
226                module,
227                CompileOptions {
228                    kind,
229                    warnings_as_errors: false,
230                    path: None,
231                },
232            )?;
233        }
234
235        let emit_test_harness = session.get_flag("test_harness");
236        let main = self.generate_main(entrypoint, emit_test_harness)?;
237        log::debug!(target: "assembly", "generated executable module:\n{main}");
238        let program = assembler.assemble_program(main)?;
239        let advice_map: miden_core::AdviceMap = self
240            .rodata
241            .iter()
242            .map(|rodata| {
243                rodata.to_elements().map_err(Report::msg).map(|felts| (rodata.digest, felts))
244            })
245            .try_collect()?;
246        Ok(Arc::new(program.with_advice_map(advice_map)))
247    }
248
249    fn assemble_library(
250        &self,
251        link_libraries: &[Arc<Library>],
252        _link_packages: &BTreeMap<Symbol, Arc<Package>>,
253        session: &Session,
254    ) -> Result<Arc<Library>, Report> {
255        use miden_assembly::Assembler;
256
257        let debug_mode = session.options.emit_debug_decorators();
258        log::debug!(
259            target: "assembly",
260            "assembling library of {} modules (debug_mode={})",
261            self.modules.len(),
262            debug_mode
263        );
264
265        let mut assembler =
266            Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
267
268        let mut lib_modules = Vec::new();
269        // Link extra libraries
270        for library in link_libraries.iter().cloned() {
271            for module in library.module_infos() {
272                log::debug!(target: "assembly", "registering '{}' with assembler", module.path());
273                lib_modules.push(module.path().clone());
274            }
275            assembler.add_library(library)?;
276        }
277
278        // Assemble library
279        let mut modules = Vec::with_capacity(self.modules.len());
280        for module in self.modules.iter().cloned() {
281            if lib_modules.contains(module.path()) {
282                log::warn!(
283                    target: "assembly",
284                    "module '{}' is already registered with the assembler as library's module, \
285                     skipping",
286                    module.path()
287                );
288                continue;
289            }
290            log::debug!(target: "assembly", "adding '{}' to assembler", module.path());
291            modules.push(module);
292        }
293        let lib = assembler.assemble_library(modules)?;
294        let advice_map: miden_core::AdviceMap = self
295            .rodata
296            .iter()
297            .map(|rodata| {
298                rodata.to_elements().map_err(Report::msg).map(|felts| (rodata.digest, felts))
299            })
300            .try_collect()?;
301
302        let converted_exports = recover_wasm_cm_interfaces(&lib);
303
304        // Get a reference to the library MAST, then drop the library so we can obtain a mutable
305        // reference so we can modify its advice map data
306        let mut mast_forest = lib.mast_forest().clone();
307        drop(lib);
308        {
309            let mast = Arc::get_mut(&mut mast_forest).expect("expected unique reference");
310            mast.advice_map_mut().extend(advice_map);
311        }
312
313        // Reconstruct the library with the updated MAST
314        Ok(Library::new(mast_forest, converted_exports).map(Arc::new)?)
315    }
316
317    /// Generate an executable module which when run expects the raw data segment data to be
318    /// provided on the advice stack in the same order as initialization, and the operands of
319    /// the entrypoint function on the operand stack.
320    fn generate_main(
321        &self,
322        entrypoint: &InvocationTarget,
323        emit_test_harness: bool,
324    ) -> Result<Arc<masm::Module>, Report> {
325        use masm::{Instruction as Inst, Op};
326
327        let mut exe = Box::new(masm::Module::new_executable());
328        let span = SourceSpan::default();
329        let body = {
330            let mut block = masm::Block::new(span, Vec::with_capacity(64));
331            // Invoke component initializer, if present
332            if let Some(init) = self.init.as_ref() {
333                block.push(Op::Inst(Span::new(span, Inst::Exec(init.clone()))));
334            }
335
336            // Initialize test harness, if requested
337            if emit_test_harness {
338                self.emit_test_harness(&mut block);
339            }
340
341            // Invoke the program entrypoint
342            block.push(Op::Inst(Span::new(
343                span,
344                Inst::Trace(TraceEvent::FrameStart.as_u32().into()),
345            )));
346            block.push(Op::Inst(Span::new(span, Inst::Exec(entrypoint.clone()))));
347            block
348                .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
349
350            // Truncate the stack to 16 elements on exit
351            let truncate_stack = InvocationTarget::AbsoluteProcedurePath {
352                name: ProcedureName::new("truncate_stack").unwrap(),
353                path: masm::LibraryPath::new_from_components(
354                    masm::LibraryNamespace::new("std").unwrap(),
355                    [masm::Ident::new("sys").unwrap()],
356                ),
357            };
358            block.push(Op::Inst(Span::new(span, Inst::Exec(truncate_stack))));
359            block
360        };
361        let start = masm::Procedure::new(
362            span,
363            masm::Visibility::Public,
364            masm::ProcedureName::main(),
365            0,
366            body,
367        );
368        exe.define_procedure(masm::Export::Procedure(start))?;
369        Ok(Arc::from(exe))
370    }
371
372    fn emit_test_harness(&self, block: &mut masm::Block) {
373        use masm::{Instruction as Inst, Op};
374
375        let span = SourceSpan::default();
376
377        let pipe_words_to_memory = masm::ProcedureName::new("pipe_words_to_memory").unwrap();
378        let std_mem = masm::LibraryPath::new("std::mem").unwrap();
379
380        // Advice Stack: [dest_ptr, num_words, ...]
381
382        // => [num_words, dest_ptr] on operand stack
383        block.push(Op::Inst(Span::new(span, Inst::AdvPush(2.into()))));
384        // => [C, B, A, dest_ptr] on operand stack
385        block.push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameStart.as_u32().into()))));
386        block.push(Op::Inst(Span::new(
387            span,
388            Inst::Exec(InvocationTarget::AbsoluteProcedurePath {
389                name: pipe_words_to_memory,
390                path: std_mem,
391            }),
392        )));
393        block.push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
394        // Drop C, B, A
395        block.push(Op::Inst(Span::new(span, Inst::DropW)));
396        block.push(Op::Inst(Span::new(span, Inst::DropW)));
397        block.push(Op::Inst(Span::new(span, Inst::DropW)));
398        // Drop dest_ptr
399        block.push(Op::Inst(Span::new(span, Inst::Drop)));
400    }
401}
402
403/// Try to recognize Wasm CM interfaces and transform those exports to have Wasm interface encoded
404/// as module name.
405///
406/// Temporary workaround for:
407///
408/// 1. Temporary exporting multiple interfaces from the same(Wasm core) module (an interface is
409///    encoded in the function name)
410///
411/// 2. Assembler using the current module name to generate exports.
412///
413fn recover_wasm_cm_interfaces(
414    lib: &Library,
415) -> BTreeMap<masm::QualifiedProcedureName, miden_processor::MastNodeId> {
416    use crate::intrinsics::INTRINSICS_MODULE_NAMES;
417
418    let mut exports = BTreeMap::new();
419    for export in lib.exports() {
420        let export_node_id = lib.get_export_node_id(export);
421        if INTRINSICS_MODULE_NAMES.contains(&export.module.to_string().as_str())
422            || export.name.as_str().starts_with("cabi")
423        {
424            // Preserve intrinsics modules and internal Wasm CM `cabi_*` functions
425            exports.insert(export.clone(), export_node_id);
426            continue;
427        }
428
429        if let Some((component, interface)) = export.name.as_str().rsplit_once('/') {
430            // Wasm CM interface
431            let (interface, function) =
432                interface.rsplit_once('#').expect("invalid wasm component model identifier");
433
434            let mut component_parts = component.split(':').map(Arc::from);
435            let ns = masm::LibraryNamespace::User(
436                component_parts.next().expect("invalid wasm component model identifier"),
437            );
438            let component_parts = component_parts
439                .map(Span::unknown)
440                .map(masm::Ident::from_raw_parts)
441                .chain([masm::Ident::from_raw_parts(Span::unknown(Arc::from(interface)))]);
442            let path = masm::LibraryPath::new_from_components(ns, component_parts);
443            let name = masm::ProcedureName::from_raw_parts(masm::Ident::from_raw_parts(
444                Span::unknown(Arc::from(function)),
445            ));
446            let new_export = masm::QualifiedProcedureName::new(path, name);
447            exports.insert(new_export, export_node_id);
448        } else {
449            // Non-Wasm CM interface, preserve as is
450            exports.insert(export.clone(), export_node_id);
451        }
452    }
453    exports
454}