1use alloc::{
2 collections::{BTreeMap, BTreeSet},
3 sync::Arc,
4};
5use core::fmt;
6
7use miden_assembly::{Library, ast::InvocationTarget, library::LibraryExport};
8use miden_core::{Program, Word};
9use miden_mast_package::{MastArtifact, Package, ProcedureName};
10use midenc_hir::{constants::ConstantData, dialects::builtin, interner::Symbol};
11use midenc_session::{
12 Session,
13 diagnostics::{Report, SourceSpan, Span},
14};
15
16use crate::{TraceEvent, lower::NativePtr, masm};
17
18pub struct MasmComponent {
19 pub id: builtin::ComponentId,
20 pub init: Option<masm::InvocationTarget>,
25 pub entrypoint: Option<masm::InvocationTarget>,
29 pub kernel: Option<masm::KernelLibrary>,
31 pub rodata: Vec<Rodata>,
33 pub heap_base: u32,
35 pub stack_pointer: Option<u32>,
37 pub modules: Vec<Arc<masm::Module>>,
39}
40
41#[derive(Clone, PartialEq, Eq)]
43pub struct Rodata {
44 pub component: builtin::ComponentId,
46 pub digest: Word,
48 pub start: NativePtr,
50 pub data: Arc<ConstantData>,
52}
53impl fmt::Debug for Rodata {
54 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55 f.debug_struct("Rodata")
56 .field("digest", &format_args!("{}", &self.digest))
57 .field("start", &self.start)
58 .field_with("data", |f| {
59 f.debug_struct("ConstantData")
60 .field("len", &self.data.len())
61 .finish_non_exhaustive()
62 })
63 .finish()
64 }
65}
66impl Rodata {
67 pub fn size_in_bytes(&self) -> usize {
68 self.data.len()
69 }
70
71 pub fn size_in_felts(&self) -> usize {
72 self.data.len().next_multiple_of(4) / 4
73 }
74
75 pub fn size_in_words(&self) -> usize {
76 self.size_in_felts().next_multiple_of(4) / 4
77 }
78
79 pub fn to_elements(&self) -> Vec<miden_processor::Felt> {
83 Self::bytes_to_elements(self.data.as_slice())
84 }
85
86 pub fn bytes_to_elements(bytes: &[u8]) -> Vec<miden_processor::Felt> {
92 use miden_core::FieldElement;
93 use miden_processor::Felt;
94
95 let mut felts = Vec::with_capacity(bytes.len() / 4);
96 let mut iter = bytes.iter().copied().array_chunks::<4>();
97 felts.extend(iter.by_ref().map(|chunk| Felt::new(u32::from_le_bytes(chunk) as u64)));
98 let remainder = iter.into_remainder();
99 if remainder.len() > 0 {
100 let mut chunk = [0u8; 4];
101 for (i, byte) in remainder.enumerate() {
102 chunk[i] = byte;
103 }
104 felts.push(Felt::new(u32::from_le_bytes(chunk) as u64));
105 }
106
107 let size_in_felts = bytes.len().next_multiple_of(4) / 4;
108 let size_in_words = size_in_felts.next_multiple_of(4) / 4;
109 let padding = (size_in_words * 4).abs_diff(felts.len());
110 felts.resize(felts.len() + padding, Felt::ZERO);
111 debug_assert_eq!(felts.len() % 4, 0, "expected to be a valid number of words");
112 felts
113 }
114}
115
116inventory::submit! {
117 midenc_session::CompileFlag::new("test_harness")
118 .long("test-harness")
119 .action(midenc_session::FlagAction::SetTrue)
120 .help("If present, causes the code generator to emit extra code for the VM test harness")
121 .help_heading("Testing")
122}
123
124impl fmt::Display for MasmComponent {
125 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
126 use crate::intrinsics::INTRINSICS_MODULE_NAMES;
127
128 for module in self.modules.iter() {
129 let module_name = module.path().path();
134 if INTRINSICS_MODULE_NAMES.contains(&module_name.as_ref()) {
135 continue;
136 }
137 if ["std"].contains(&module.namespace().as_str()) {
138 continue;
139 } else {
140 writeln!(f, "# mod {}\n", &module_name)?;
141 writeln!(f, "{module}")?;
142 }
143 }
144 Ok(())
145 }
146}
147
148impl MasmComponent {
149 pub fn assemble(
150 &self,
151 link_libraries: &[Arc<Library>],
152 link_packages: &BTreeMap<Symbol, Arc<Package>>,
153 session: &Session,
154 ) -> Result<MastArtifact, Report> {
155 if let Some(entrypoint) = self.entrypoint.as_ref() {
156 self.assemble_program(entrypoint, link_libraries, link_packages, session)
157 .map(MastArtifact::Executable)
158 } else {
159 self.assemble_library(link_libraries, link_packages, session)
160 .map(MastArtifact::Library)
161 }
162 }
163
164 fn assemble_program(
165 &self,
166 entrypoint: &InvocationTarget,
167 link_libraries: &[Arc<Library>],
168 _link_packages: &BTreeMap<Symbol, Arc<Package>>,
169 session: &Session,
170 ) -> Result<Arc<Program>, Report> {
171 use miden_assembly::Assembler;
172
173 let debug_mode = session.options.emit_debug_decorators();
174
175 log::debug!(
176 target: "assembly",
177 "assembling executable with entrypoint '{entrypoint}' (debug_mode={debug_mode})"
178 );
179 let mut assembler =
180 Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
181
182 let mut lib_modules = BTreeSet::default();
183 for library in link_libraries.iter().cloned() {
185 for module in library.module_infos() {
186 log::debug!(target: "assembly", "registering '{}' with assembler", module.path());
187 lib_modules.insert(module.path().clone());
188 }
189 assembler.link_dynamic_library(library)?;
190 }
191
192 log::debug!(target: "assembly", "start adding the following modules with assembler: {}",
194 self.modules.iter().map(|m| m.path().to_string()).collect::<Vec<_>>().join(", "));
195
196 let mut modules = Vec::with_capacity(self.modules.len());
197 for module in self.modules.iter().cloned() {
198 if lib_modules.contains(module.path()) {
199 log::warn!(
200 target: "assembly",
201 "module '{}' is already registered with the assembler as library's module, \
202 skipping",
203 module.path()
204 );
205 continue;
206 }
207
208 if module.path().to_string().starts_with("intrinsics") {
209 log::debug!(target: "assembly", "adding intrinsics '{}' to assembler", module.path());
210 assembler.compile_and_statically_link(module)?;
211 } else {
212 log::debug!(target: "assembly", "adding '{}' for assembler", module.path());
213 modules.push(module);
214 }
215 }
216
217 for module in modules.into_iter().rev() {
220 assembler.compile_and_statically_link(module)?;
221 }
222
223 let emit_test_harness = session.get_flag("test_harness");
224 let main = self.generate_main(entrypoint, emit_test_harness)?;
225 log::debug!(target: "assembly", "generated executable module:\n{main}");
226 let program = assembler.assemble_program(main)?;
227 let advice_map: miden_core::AdviceMap =
228 self.rodata.iter().map(|rodata| (rodata.digest, rodata.to_elements())).collect();
229 Ok(Arc::new(program.with_advice_map(advice_map)))
230 }
231
232 fn assemble_library(
233 &self,
234 link_libraries: &[Arc<Library>],
235 _link_packages: &BTreeMap<Symbol, Arc<Package>>,
236 session: &Session,
237 ) -> Result<Arc<Library>, Report> {
238 use miden_assembly::Assembler;
239
240 let debug_mode = session.options.emit_debug_decorators();
241 log::debug!(
242 target: "assembly",
243 "assembling library of {} modules (debug_mode={})",
244 self.modules.len(),
245 debug_mode
246 );
247
248 let mut assembler =
249 Assembler::new(session.source_manager.clone()).with_debug_mode(debug_mode);
250
251 let mut lib_modules = Vec::new();
252 for library in link_libraries.iter().cloned() {
254 for module in library.module_infos() {
255 log::debug!(target: "assembly", "registering '{}' with assembler", module.path());
256 lib_modules.push(module.path().clone());
257 }
258 assembler.link_dynamic_library(library)?;
259 }
260
261 log::debug!(target: "assembly", "start adding the following modules with assembler: {}",
263 self.modules.iter().map(|m| m.path().to_string()).collect::<Vec<_>>().join(", "));
264 let mut modules = Vec::with_capacity(self.modules.len());
265 for module in self.modules.iter().cloned() {
266 if lib_modules.contains(module.path()) {
267 log::warn!(
268 target: "assembly",
269 "module '{}' is already registered with the assembler as library's module, \
270 skipping",
271 module.path()
272 );
273 continue;
274 }
275 if module.path().to_string().starts_with("intrinsics") {
276 log::debug!(target: "assembly", "adding intrinsics '{}' to assembler", module.path());
277 assembler.compile_and_statically_link(module)?;
278 } else {
279 log::debug!(target: "assembly", "adding '{}' for assembler", module.path());
280 modules.push(module);
281 }
282 }
283 let lib = assembler.assemble_library(modules)?;
284
285 let advice_map: miden_core::AdviceMap =
286 self.rodata.iter().map(|rodata| (rodata.digest, rodata.to_elements())).collect();
287
288 let converted_exports = recover_wasm_cm_interfaces(&lib);
289
290 let mut mast_forest = lib.mast_forest().clone();
293 drop(lib);
294 {
295 let mast = Arc::get_mut(&mut mast_forest).expect("expected unique reference");
296 mast.advice_map_mut().extend(advice_map);
297 }
298
299 Ok(Library::new(mast_forest, converted_exports).map(Arc::new)?)
301 }
302
303 fn generate_main(
307 &self,
308 entrypoint: &InvocationTarget,
309 emit_test_harness: bool,
310 ) -> Result<Arc<masm::Module>, Report> {
311 use masm::{Instruction as Inst, Op};
312
313 let mut exe = Box::new(masm::Module::new_executable());
314 let span = SourceSpan::default();
315 let body = {
316 let mut block = masm::Block::new(span, Vec::with_capacity(64));
317 if let Some(init) = self.init.as_ref() {
319 block.push(Op::Inst(Span::new(span, Inst::Exec(init.clone()))));
320 }
321
322 if emit_test_harness {
324 self.emit_test_harness(&mut block);
325 }
326
327 block.push(Op::Inst(Span::new(
329 span,
330 Inst::Trace(TraceEvent::FrameStart.as_u32().into()),
331 )));
332 block.push(Op::Inst(Span::new(span, Inst::Exec(entrypoint.clone()))));
333 block
334 .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
335
336 let truncate_stack = InvocationTarget::AbsoluteProcedurePath {
338 name: ProcedureName::new("truncate_stack").unwrap(),
339 path: masm::LibraryPath::new_from_components(
340 masm::LibraryNamespace::new("std").unwrap(),
341 [masm::Ident::new("sys").unwrap()],
342 ),
343 };
344 block.push(Op::Inst(Span::new(span, Inst::Exec(truncate_stack))));
345 block
346 };
347 let start = masm::Procedure::new(
348 span,
349 masm::Visibility::Public,
350 masm::ProcedureName::main(),
351 0,
352 body,
353 );
354 exe.define_procedure(masm::Export::Procedure(start))?;
355 Ok(Arc::from(exe))
356 }
357
358 fn emit_test_harness(&self, block: &mut masm::Block) {
359 use masm::{Instruction as Inst, IntValue, Op, PushValue};
360 use miden_core::{Felt, FieldElement};
361
362 let span = SourceSpan::default();
363
364 let pipe_words_to_memory = masm::ProcedureName::new("pipe_words_to_memory").unwrap();
365 let std_mem = masm::LibraryPath::new("std::mem").unwrap();
366
367 block.push(Op::Inst(Span::new(span, Inst::AdvPush(1.into()))));
370
371 block.push(Op::Inst(Span::new(span, Inst::Dup0)));
374 block.push(Op::Inst(Span::new(span, Inst::Push(PushValue::Int(IntValue::U8(0)).into()))));
376 block.push(Op::Inst(Span::new(span, Inst::Gt)));
377
378 let mut loop_body = Vec::with_capacity(16);
380
381 loop_body.push(Op::Inst(Span::new(span, Inst::SubImm(Felt::ONE.into()))));
387
388 loop_body.push(Op::Inst(Span::new(span, Inst::AdvPush(2.into()))));
391 loop_body
393 .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameStart.as_u32().into()))));
394 loop_body.push(Op::Inst(Span::new(
395 span,
396 Inst::Exec(InvocationTarget::AbsoluteProcedurePath {
397 name: pipe_words_to_memory,
398 path: std_mem,
399 }),
400 )));
401 loop_body
402 .push(Op::Inst(Span::new(span, Inst::Trace(TraceEvent::FrameEnd.as_u32().into()))));
403 loop_body.push(Op::Inst(Span::new(span, Inst::DropW)));
405 loop_body.push(Op::Inst(Span::new(span, Inst::DropW)));
406 loop_body.push(Op::Inst(Span::new(span, Inst::DropW)));
407 loop_body.push(Op::Inst(Span::new(span, Inst::Drop)));
409
410 loop_body.push(Op::Inst(Span::new(span, Inst::Dup0)));
413 loop_body
415 .push(Op::Inst(Span::new(span, Inst::Push(PushValue::Int(IntValue::U8(0)).into()))));
416 loop_body.push(Op::Inst(Span::new(span, Inst::Gt)));
417
418 block.push(Op::While {
420 span,
421 body: masm::Block::new(span, loop_body),
422 });
423
424 block.push(Op::Inst(Span::new(span, Inst::Drop)));
426 }
427}
428
429fn recover_wasm_cm_interfaces(
440 lib: &Library,
441) -> BTreeMap<masm::QualifiedProcedureName, LibraryExport> {
442 use crate::intrinsics::INTRINSICS_MODULE_NAMES;
443
444 let mut exports = BTreeMap::new();
445 for export in lib.exports() {
446 if INTRINSICS_MODULE_NAMES.contains(&export.name.module.to_string().as_str())
447 || export.name.name.as_str().starts_with("cabi")
448 {
449 exports.insert(export.name.clone(), export.clone());
451 continue;
452 }
453
454 if let Some((component, interface)) = export.name.name.as_str().rsplit_once('/') {
455 let export_node_id = lib.get_export_node_id(&export.name);
456
457 let (interface, function) =
459 interface.rsplit_once('#').expect("invalid wasm component model identifier");
460
461 let mut component_parts = component.split(':').map(Arc::from);
462 let ns = masm::LibraryNamespace::User(
463 component_parts.next().expect("invalid wasm component model identifier"),
464 );
465 let component_parts = component_parts
466 .map(Span::unknown)
467 .map(masm::Ident::from_raw_parts)
468 .chain([masm::Ident::from_raw_parts(Span::unknown(Arc::from(interface)))]);
469 let path = masm::LibraryPath::new_from_components(ns, component_parts);
470 let name = masm::ProcedureName::from_raw_parts(masm::Ident::from_raw_parts(
471 Span::unknown(Arc::from(function)),
472 ));
473 let new_export = masm::QualifiedProcedureName::new(path, name);
474
475 let new_lib_export = LibraryExport::new(export_node_id, new_export.clone());
476
477 exports.insert(new_export, new_lib_export.clone());
478 } else {
479 exports.insert(export.name.clone(), export.clone());
481 }
482 }
483 exports
484}
485
486#[cfg(test)]
487mod tests {
488 use miden_core::FieldElement;
489 use proptest::prelude::*;
490
491 use super::*;
492
493 fn validate_bytes_to_elements(bytes: &[u8]) {
494 let result = Rodata::bytes_to_elements(bytes);
495
496 let expected_felts = bytes.len().div_ceil(4);
498 let expected_total_felts = expected_felts.div_ceil(4) * 4;
500
501 assert_eq!(
502 result.len(),
503 expected_total_felts,
504 "For {} bytes, expected {} felts (padded from {} felts), but got {}",
505 bytes.len(),
506 expected_total_felts,
507 expected_felts,
508 result.len()
509 );
510
511 for (i, felt) in result.iter().enumerate().skip(expected_felts) {
513 assert_eq!(*felt, miden_processor::Felt::ZERO, "Padding at index {i} should be zero");
514 }
515 }
516
517 #[test]
518 fn test_bytes_to_elements_edge_cases() {
519 validate_bytes_to_elements(&[]);
520 validate_bytes_to_elements(&[1]);
521 validate_bytes_to_elements(&[0u8; 4]);
522 validate_bytes_to_elements(&[0u8; 15]);
523 validate_bytes_to_elements(&[0u8; 16]);
524 validate_bytes_to_elements(&[0u8; 17]);
525 validate_bytes_to_elements(&[0u8; 31]);
526 validate_bytes_to_elements(&[0u8; 32]);
527 validate_bytes_to_elements(&[0u8; 33]);
528 validate_bytes_to_elements(&[0u8; 64]);
529 }
530
531 proptest! {
532 #![proptest_config(ProptestConfig::with_cases(1000))]
533 #[test]
534 fn proptest_bytes_to_elements(bytes in prop::collection::vec(any::<u8>(), 0..=1000)) {
535 validate_bytes_to_elements(&bytes);
536 }
537
538 #[test]
539 fn proptest_bytes_to_elements_word_boundaries(size_factor in 0u32..=100) {
540 let base_size = size_factor * 16;
543 for offset in -2i32..=2 {
544 let size = (base_size as i32 + offset).max(0) as usize;
545 let bytes = vec![0u8; size];
546 validate_bytes_to_elements(&bytes);
547 }
548 }
549 }
550}