midenc_codegen_masm/
artifact.rs

1use alloc::{
2    collections::{BTreeMap, BTreeSet},
3    sync::Arc,
4};
5use core::fmt;
6
7use miden_assembly::{Library, ast::InvocationTarget, library::LibraryExport};
8use miden_core::{Program, Word};
9use miden_mast_package::{MastArtifact, Package, ProcedureName};
10use midenc_hir::{constants::ConstantData, dialects::builtin, interner::Symbol};
11use midenc_session::{
12    Session,
13    diagnostics::{Report, SourceSpan, Span},
14};
15
16use crate::{TraceEvent, lower::NativePtr, masm};
17
18pub struct MasmComponent {
19    pub id: builtin::ComponentId,
20    /// The symbol name of the component initializer function
21    ///
22    /// This function is responsible for initializing global variables and writing data segments
23    /// into memory at program startup, and at cross-context call boundaries (in callee prologue).
24    pub init: Option<masm::InvocationTarget>,
25    /// The symbol name of the program entrypoint, if this component is executable.
26    ///
27    /// If unset, it indicates that the component is a library, even if it could be made executable.
28    pub entrypoint: Option<masm::InvocationTarget>,
29    /// The kernel library to link against
30    pub kernel: Option<masm::KernelLibrary>,
31    /// The rodata segments of this component keyed by the offset of the segment
32    pub rodata: Vec<Rodata>,
33    /// The address of the start of the global heap
34    pub heap_base: u32,
35    /// The address of the `__stack_pointer` global, if such a global has been defined
36    pub stack_pointer: Option<u32>,
37    /// The set of modules in this component
38    pub modules: Vec<Arc<masm::Module>>,
39}
40
41/// Represents a read-only data segment, combined with its content digest
42#[derive(Clone, PartialEq, Eq)]
43pub struct Rodata {
44    /// The component to which this read-only data segment belongs
45    pub component: builtin::ComponentId,
46    /// The content digest computed for `data`
47    pub digest: Word,
48    /// The address at which the data for this segment begins
49    pub start: NativePtr,
50    /// The raw binary data for this segment
51    pub data: Arc<ConstantData>,
52}
53impl fmt::Debug for Rodata {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        f.debug_struct("Rodata")
56            .field("digest", &format_args!("{}", &self.digest))
57            .field("start", &self.start)
58            .field_with("data", |f| {
59                f.debug_struct("ConstantData")
60                    .field("len", &self.data.len())
61                    .finish_non_exhaustive()
62            })
63            .finish()
64    }
65}
66impl Rodata {
67    pub fn size_in_bytes(&self) -> usize {
68        self.data.len()
69    }
70
71    pub fn size_in_felts(&self) -> usize {
72        self.data.len().next_multiple_of(4) / 4
73    }
74
75    pub fn size_in_words(&self) -> usize {
76        self.size_in_felts().next_multiple_of(4) / 4
77    }
78
79    /// Attempt to convert this rodata object to its equivalent representation in felts
80    ///
81    /// See [Self::bytes_to_elements] for more details.
82    pub fn to_elements(&self) -> Vec<miden_processor::Felt> {
83        Self::bytes_to_elements(self.data.as_slice())
84    }
85
86    /// Attempt to convert the given bytes to their equivalent representation in felts
87    ///
88    /// The resulting felts will be in padded out to the nearest number of words, i.e. if the data
89    /// only takes up 3 felts worth of bytes, then the resulting `Vec` will contain 4 felts, so that
90    /// the total size is a valid number of words.
91    pub fn bytes_to_elements(bytes: &[u8]) -> Vec<miden_processor::Felt> {
92        use miden_core::FieldElement;
93        use miden_processor::Felt;
94
95        let mut felts = Vec::with_capacity(bytes.len() / 4);
96        let mut iter = bytes.iter().copied().array_chunks::<4>();
97        felts.extend(iter.by_ref().map(|chunk| Felt::new(u32::from_le_bytes(chunk) as u64)));
98        let remainder = iter.into_remainder();
99        if remainder.len() > 0 {
100            let mut chunk = [0u8; 4];
101            for (i, byte) in remainder.enumerate() {
102                chunk[i] = byte;
103            }
104            felts.push(Felt::new(u32::from_le_bytes(chunk) as u64));
105        }
106
107        let size_in_felts = bytes.len().next_multiple_of(4) / 4;
108        let size_in_words = size_in_felts.next_multiple_of(4) / 4;
109        let padding = (size_in_words * 4).abs_diff(felts.len());
110        felts.resize(felts.len() + padding, Felt::ZERO);
111        debug_assert_eq!(felts.len() % 4, 0, "expected to be a valid number of words");
112        felts
113    }
114}
115
116inventory::submit! {
117    midenc_session::CompileFlag::new("test_harness")
118        .long("test-harness")
119        .action(midenc_session::FlagAction::SetTrue)
120        .help("If present, causes the code generator to emit extra code for the VM test harness")
121        .help_heading("Testing")
122}
123
124impl fmt::Display for MasmComponent {
125    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
126        use crate::intrinsics::INTRINSICS_MODULE_NAMES;
127
128        for module in self.modules.iter() {
129            // Skip printing the standard library modules and intrinsics
130            // modules to focus on the user-defined modules and avoid the
131            // stack overflow error when printing large programs
132            // https://github.com/0xMiden/miden-formatting/issues/4
133            let module_name = module.path().path();
134            if INTRINSICS_MODULE_NAMES.contains(&module_name.as_ref()) {
135                continue;
136            }
137            if ["std"].contains(&module.namespace().as_str()) {
138                continue;
139            } else {
140                writeln!(f, "# mod {}\n", &module_name)?;
141                writeln!(f, "{module}")?;
142            }
143        }
144        Ok(())
145    }
146}
147
148impl MasmComponent {
149    pub fn assemble(
150        &self,
151        link_libraries: &[Arc<Library>],
152        link_packages: &BTreeMap<Symbol, Arc<Package>>,
153        session: &Session,
154    ) -> Result<MastArtifact, Report> {
155        if let Some(entrypoint) = self.entrypoint.as_ref() {
156            self.assemble_program(entrypoint, link_libraries, link_packages, session)
157                .map(MastArtifact::Executable)
158        } else {
159            self.assemble_library(link_libraries, link_packages, session)
160                .map(MastArtifact::Library)
161        }
162    }
163
164    fn assemble_program(
165        &self,
166        entrypoint: &InvocationTarget,
167        link_libraries: &[Arc<Library>],
168        _link_packages: &BTreeMap<Symbol, Arc<Package>>,
169        session: &Session,
170    ) -> Result<Arc<Program>, Report> {
171        use miden_assembly::Assembler;
172
173        let debug_mode = session.options.emit_debug_decorators();
174
175        log::debug!(
176            target: "assembly",
177            "assembling executable with entrypoint '{entrypoint}' (debug_mode={debug_mode})"
178        );
179        let mut assembler =
180            Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
181
182        let mut lib_modules = BTreeSet::default();
183        // Link extra libraries
184        for library in link_libraries.iter().cloned() {
185            for module in library.module_infos() {
186                log::debug!(target: "assembly", "registering '{}' with assembler", module.path());
187                lib_modules.insert(module.path().clone());
188            }
189            assembler.link_dynamic_library(library)?;
190        }
191
192        // Assemble library
193        log::debug!(target: "assembly", "start adding the following modules with assembler: {}",
194            self.modules.iter().map(|m| m.path().to_string()).collect::<Vec<_>>().join(", "));
195
196        let mut modules = Vec::with_capacity(self.modules.len());
197        for module in self.modules.iter().cloned() {
198            if lib_modules.contains(module.path()) {
199                log::warn!(
200                    target: "assembly",
201                    "module '{}' is already registered with the assembler as library's module, \
202                     skipping",
203                    module.path()
204                );
205                continue;
206            }
207
208            if module.path().to_string().starts_with("intrinsics") {
209                log::debug!(target: "assembly", "adding intrinsics '{}' to assembler", module.path());
210                assembler.compile_and_statically_link(module)?;
211            } else {
212                log::debug!(target: "assembly", "adding '{}' for assembler", module.path());
213                modules.push(module);
214            }
215        }
216
217        // We need to add modules according to their dependencies (add the dependency before the dependent)
218        // Workaround until https://github.com/0xMiden/miden-vm/issues/1669 is implemented
219        for module in modules.into_iter().rev() {
220            assembler.compile_and_statically_link(module)?;
221        }
222
223        let emit_test_harness = session.get_flag("test_harness");
224        let main = self.generate_main(entrypoint, emit_test_harness)?;
225        log::debug!(target: "assembly", "generated executable module:\n{main}");
226        let program = assembler.assemble_program(main)?;
227        let advice_map: miden_core::AdviceMap =
228            self.rodata.iter().map(|rodata| (rodata.digest, rodata.to_elements())).collect();
229        Ok(Arc::new(program.with_advice_map(advice_map)))
230    }
231
232    fn assemble_library(
233        &self,
234        link_libraries: &[Arc<Library>],
235        _link_packages: &BTreeMap<Symbol, Arc<Package>>,
236        session: &Session,
237    ) -> Result<Arc<Library>, Report> {
238        use miden_assembly::Assembler;
239
240        let debug_mode = session.options.emit_debug_decorators();
241        log::debug!(
242            target: "assembly",
243            "assembling library of {} modules (debug_mode={})",
244            self.modules.len(),
245            debug_mode
246        );
247
248        let mut assembler =
249            Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
250
251        let mut lib_modules = Vec::new();
252        // Link extra libraries
253        for library in link_libraries.iter().cloned() {
254            for module in library.module_infos() {
255                log::debug!(target: "assembly", "registering '{}' with assembler", module.path());
256                lib_modules.push(module.path().clone());
257            }
258            assembler.link_dynamic_library(library)?;
259        }
260
261        // Assemble library
262        log::debug!(target: "assembly", "start adding the following modules with assembler: {}",
263            self.modules.iter().map(|m| m.path().to_string()).collect::<Vec<_>>().join(", "));
264        let mut modules = Vec::with_capacity(self.modules.len());
265        for module in self.modules.iter().cloned() {
266            if lib_modules.contains(module.path()) {
267                log::warn!(
268                    target: "assembly",
269                    "module '{}' is already registered with the assembler as library's module, \
270                     skipping",
271                    module.path()
272                );
273                continue;
274            }
275            if module.path().to_string().starts_with("intrinsics") {
276                log::debug!(target: "assembly", "adding intrinsics '{}' to assembler", module.path());
277                assembler.compile_and_statically_link(module)?;
278            } else {
279                log::debug!(target: "assembly", "adding '{}' for assembler", module.path());
280                modules.push(module);
281            }
282        }
283        let lib = assembler.assemble_library(modules)?;
284
285        let advice_map: miden_core::AdviceMap =
286            self.rodata.iter().map(|rodata| (rodata.digest, rodata.to_elements())).collect();
287
288        let converted_exports = recover_wasm_cm_interfaces(&lib);
289
290        // Get a reference to the library MAST, then drop the library so we can obtain a mutable
291        // reference so we can modify its advice map data
292        let mut mast_forest = lib.mast_forest().clone();
293        drop(lib);
294        {
295            let mast = Arc::get_mut(&mut mast_forest).expect("expected unique reference");
296            mast.advice_map_mut().extend(advice_map);
297        }
298
299        // Reconstruct the library with the updated MAST
300        Ok(Library::new(mast_forest, converted_exports).map(Arc::new)?)
301    }
302
303    /// Generate an executable module which when run expects the raw data segment data to be
304    /// provided on the advice stack in the same order as initialization, and the operands of
305    /// the entrypoint function on the operand stack.
306    fn generate_main(
307        &self,
308        entrypoint: &InvocationTarget,
309        emit_test_harness: bool,
310    ) -> Result<Arc<masm::Module>, Report> {
311        use masm::{Instruction as Inst, Op};
312
313        let mut exe = Box::new(masm::Module::new_executable());
314        let span = SourceSpan::default();
315        let body = {
316            let mut block = masm::Block::new(span, Vec::with_capacity(64));
317            // Invoke component initializer, if present
318            if let Some(init) = self.init.as_ref() {
319                block.push(Op::Inst(Span::new(span, Inst::Exec(init.clone()))));
320            }
321
322            // Initialize test harness, if requested
323            if emit_test_harness {
324                self.emit_test_harness(&mut block);
325            }
326
327            // Invoke the program entrypoint
328            block.push(Op::Inst(Span::new(
329                span,
330                Inst::Trace(TraceEvent::FrameStart.as_u32().into()),
331            )));
332            block.push(Op::Inst(Span::new(span, Inst::Exec(entrypoint.clone()))));
333            block
334                .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
335
336            // Truncate the stack to 16 elements on exit
337            let truncate_stack = InvocationTarget::AbsoluteProcedurePath {
338                name: ProcedureName::new("truncate_stack").unwrap(),
339                path: masm::LibraryPath::new_from_components(
340                    masm::LibraryNamespace::new("std").unwrap(),
341                    [masm::Ident::new("sys").unwrap()],
342                ),
343            };
344            block.push(Op::Inst(Span::new(span, Inst::Exec(truncate_stack))));
345            block
346        };
347        let start = masm::Procedure::new(
348            span,
349            masm::Visibility::Public,
350            masm::ProcedureName::main(),
351            0,
352            body,
353        );
354        exe.define_procedure(masm::Export::Procedure(start))?;
355        Ok(Arc::from(exe))
356    }
357
358    fn emit_test_harness(&self, block: &mut masm::Block) {
359        use masm::{Instruction as Inst, IntValue, Op, PushValue};
360        use miden_core::{Felt, FieldElement};
361
362        let span = SourceSpan::default();
363
364        let pipe_words_to_memory = masm::ProcedureName::new("pipe_words_to_memory").unwrap();
365        let std_mem = masm::LibraryPath::new("std::mem").unwrap();
366
367        // Step 1: Get the number of initializers to run
368        // => [inits] on operand stack
369        block.push(Op::Inst(Span::new(span, Inst::AdvPush(1.into()))));
370
371        // Step 2: Evaluate the initial state of the loop condition `inits > 0`
372        // => [inits, inits]
373        block.push(Op::Inst(Span::new(span, Inst::Dup0)));
374        // => [inits > 0, inits]
375        block.push(Op::Inst(Span::new(span, Inst::Push(PushValue::Int(IntValue::U8(0)).into()))));
376        block.push(Op::Inst(Span::new(span, Inst::Gt)));
377
378        // Step 3: Loop until `inits == 0`
379        let mut loop_body = Vec::with_capacity(16);
380
381        // State of operand stack on entry to `loop_body`: [inits]
382        // State of advice stack on entry to `loop_body`: [dest_ptr, num_words, ...]
383        //
384        // Step 3a: Compute next value of `inits`, i.e. `inits'`
385        // => [inits - 1]
386        loop_body.push(Op::Inst(Span::new(span, Inst::SubImm(Felt::ONE.into()))));
387
388        // Step 3b: Copy initializer data to memory
389        // => [num_words, dest_ptr, inits']
390        loop_body.push(Op::Inst(Span::new(span, Inst::AdvPush(2.into()))));
391        // => [C, B, A, dest_ptr, inits'] on operand stack
392        loop_body
393            .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameStart.as_u32().into()))));
394        loop_body.push(Op::Inst(Span::new(
395            span,
396            Inst::Exec(InvocationTarget::AbsoluteProcedurePath {
397                name: pipe_words_to_memory,
398                path: std_mem,
399            }),
400        )));
401        loop_body
402            .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
403        // Drop C, B, A
404        loop_body.push(Op::Inst(Span::new(span, Inst::DropW)));
405        loop_body.push(Op::Inst(Span::new(span, Inst::DropW)));
406        loop_body.push(Op::Inst(Span::new(span, Inst::DropW)));
407        // => [inits']
408        loop_body.push(Op::Inst(Span::new(span, Inst::Drop)));
409
410        // Step 3c: Evaluate loop condition `inits' > 0`
411        // => [inits', inits']
412        loop_body.push(Op::Inst(Span::new(span, Inst::Dup0)));
413        // => [inits' > 0, inits']
414        loop_body
415            .push(Op::Inst(Span::new(span, Inst::Push(PushValue::Int(IntValue::U8(0)).into()))));
416        loop_body.push(Op::Inst(Span::new(span, Inst::Gt)));
417
418        // Step 4: Enter (or skip) loop
419        block.push(Op::While {
420            span,
421            body: masm::Block::new(span, loop_body),
422        });
423
424        // Step 5: Drop `inits` after loop is evaluated
425        block.push(Op::Inst(Span::new(span, Inst::Drop)));
426    }
427}
428
429/// Try to recognize Wasm CM interfaces and transform those exports to have Wasm interface encoded
430/// as module name.
431///
432/// Temporary workaround for:
433///
434/// 1. Temporary exporting multiple interfaces from the same(Wasm core) module (an interface is
435///    encoded in the function name)
436///
437/// 2. Assembler using the current module name to generate exports.
438///
439fn recover_wasm_cm_interfaces(
440    lib: &Library,
441) -> BTreeMap<masm::QualifiedProcedureName, LibraryExport> {
442    use crate::intrinsics::INTRINSICS_MODULE_NAMES;
443
444    let mut exports = BTreeMap::new();
445    for export in lib.exports() {
446        if INTRINSICS_MODULE_NAMES.contains(&export.name.module.to_string().as_str())
447            || export.name.name.as_str().starts_with("cabi")
448        {
449            // Preserve intrinsics modules and internal Wasm CM `cabi_*` functions
450            exports.insert(export.name.clone(), export.clone());
451            continue;
452        }
453
454        if let Some((component, interface)) = export.name.name.as_str().rsplit_once('/') {
455            let export_node_id = lib.get_export_node_id(&export.name);
456
457            // Wasm CM interface
458            let (interface, function) =
459                interface.rsplit_once('#').expect("invalid wasm component model identifier");
460
461            let mut component_parts = component.split(':').map(Arc::from);
462            let ns = masm::LibraryNamespace::User(
463                component_parts.next().expect("invalid wasm component model identifier"),
464            );
465            let component_parts = component_parts
466                .map(Span::unknown)
467                .map(masm::Ident::from_raw_parts)
468                .chain([masm::Ident::from_raw_parts(Span::unknown(Arc::from(interface)))]);
469            let path = masm::LibraryPath::new_from_components(ns, component_parts);
470            let name = masm::ProcedureName::from_raw_parts(masm::Ident::from_raw_parts(
471                Span::unknown(Arc::from(function)),
472            ));
473            let new_export = masm::QualifiedProcedureName::new(path, name);
474
475            let new_lib_export = LibraryExport::new(export_node_id, new_export.clone());
476
477            exports.insert(new_export, new_lib_export.clone());
478        } else {
479            // Non-Wasm CM interface, preserve as is
480            exports.insert(export.name.clone(), export.clone());
481        }
482    }
483    exports
484}
485
486#[cfg(test)]
487mod tests {
488    use miden_core::FieldElement;
489    use proptest::prelude::*;
490
491    use super::*;
492
493    fn validate_bytes_to_elements(bytes: &[u8]) {
494        let result = Rodata::bytes_to_elements(bytes);
495
496        // Each felt represents 4 bytes
497        let expected_felts = bytes.len().div_ceil(4);
498        // Felts should be padded to a multiple of 4 (1 word = 4 felts)
499        let expected_total_felts = expected_felts.div_ceil(4) * 4;
500
501        assert_eq!(
502            result.len(),
503            expected_total_felts,
504            "For {} bytes, expected {} felts (padded from {} felts), but got {}",
505            bytes.len(),
506            expected_total_felts,
507            expected_felts,
508            result.len()
509        );
510
511        // Verify padding is zeros
512        for (i, felt) in result.iter().enumerate().skip(expected_felts) {
513            assert_eq!(*felt, miden_processor::Felt::ZERO, "Padding at index {i} should be zero");
514        }
515    }
516
517    #[test]
518    fn test_bytes_to_elements_edge_cases() {
519        validate_bytes_to_elements(&[]);
520        validate_bytes_to_elements(&[1]);
521        validate_bytes_to_elements(&[0u8; 4]);
522        validate_bytes_to_elements(&[0u8; 15]);
523        validate_bytes_to_elements(&[0u8; 16]);
524        validate_bytes_to_elements(&[0u8; 17]);
525        validate_bytes_to_elements(&[0u8; 31]);
526        validate_bytes_to_elements(&[0u8; 32]);
527        validate_bytes_to_elements(&[0u8; 33]);
528        validate_bytes_to_elements(&[0u8; 64]);
529    }
530
531    proptest! {
532        #![proptest_config(ProptestConfig::with_cases(1000))]
533        #[test]
534        fn proptest_bytes_to_elements(bytes in prop::collection::vec(any::<u8>(), 0..=1000)) {
535            validate_bytes_to_elements(&bytes);
536        }
537
538        #[test]
539        fn proptest_bytes_to_elements_word_boundaries(size_factor in 0u32..=100) {
540            // Test specifically around word boundaries
541            // Test sizes around multiples of 16 (since 1 word = 4 felts = 16 bytes)
542            let base_size = size_factor * 16;
543            for offset in -2i32..=2 {
544                let size = (base_size as i32 + offset).max(0) as usize;
545                let bytes = vec![0u8; size];
546                validate_bytes_to_elements(&bytes);
547            }
548        }
549    }
550}