1use std::{collections::HashMap, io::Write};
2
3use polkavm::CostModelKind;
4use polkavm_common::cast::cast;
5use polkavm_common::program::{ParsedInstruction, ProgramBlob, ProgramCounter};
6
7#[derive(Copy, Clone, Debug, clap::ValueEnum)]
8pub enum DisassemblyFormat {
9 Guest,
10 GuestAndNative,
11 Native,
12 DiffFriendly,
13}
14
15struct NativeCode {
16 machine_code_origin: u64,
17 machine_code: Vec<u8>,
18 instruction_map: Vec<(ProgramCounter, u32)>,
19}
20
21impl TryFrom<&'_ ProgramBlob> for NativeCode {
22 type Error = polkavm::Error;
23
24 fn try_from(blob: &'_ ProgramBlob) -> Result<Self, Self::Error> {
25 if !cfg!(target_arch = "x86_64") {
26 return Err("the selected disassembly format is not supported on this architecture".into());
27 }
28
29 let mut config = polkavm::Config::from_env()?;
30 config.set_worker_count(0);
31
32 let engine = polkavm::Engine::new(&config)?;
33 let module = polkavm::Module::from_blob(&engine, &Default::default(), blob.clone())?;
34
35 let Some(machine_code) = module.machine_code() else {
36 return Err("currently selected VM backend doesn't provide raw machine code".into());
37 };
38
39 let Some(instruction_map) = module.program_counter_to_machine_code_offset() else {
40 return Err("currently selected VM backend doesn't provide a machine code map".into());
41 };
42
43 Ok(Self {
44 machine_code_origin: module.machine_code_origin().unwrap_or(0),
45 machine_code: machine_code.into(),
46 instruction_map: instruction_map.to_vec(),
47 })
48 }
49}
50
51#[derive(Default)]
52struct AssemblyFormatter {
53 buffer: String,
54}
55
56impl AssemblyFormatter {
57 fn emit(
58 &mut self,
59 indent: bool,
60 code_origin: u64,
61 mut code: &[u8],
62 mut position: usize,
63 show_raw_bytes: bool,
64 show_offsets: bool,
65 writer: &mut impl Write,
66 ) -> Result<(), std::io::Error> {
67 use iced_x86::Formatter;
68
69 let mut formatter = iced_x86::NasmFormatter::new();
70 formatter.options_mut().set_space_after_operand_separator(true);
71 formatter.options_mut().set_hex_prefix("0x");
72 formatter.options_mut().set_hex_suffix("");
73 formatter.options_mut().set_uppercase_hex(false);
74 formatter.options_mut().set_small_hex_numbers_in_decimal(false);
75 formatter.options_mut().set_show_useless_prefixes(true);
76 formatter.options_mut().set_branch_leading_zeros(false);
77 formatter.options_mut().set_rip_relative_addresses(true);
78
79 loop {
80 let mut decoder = iced_x86::Decoder::with_ip(64, code, code_origin, iced_x86::DecoderOptions::NONE);
81 if !decoder.can_decode() {
82 break;
83 }
84 let mut instruction = iced_x86::Instruction::default();
85 decoder.decode_out(&mut instruction);
86
87 if indent {
88 write!(writer, " ")?;
89 }
90
91 if show_offsets {
92 write!(writer, "{:8x}: ", position as u64 + code_origin)?;
93 }
94
95 let start_index = (instruction.ip() - code_origin) as usize;
96 let instr_bytes = &code[start_index..start_index + instruction.len()];
97 if show_raw_bytes {
98 let mut count = 0;
99 for b in instr_bytes.iter() {
100 write!(writer, "{:02x} ", b)?;
101 count += 3;
102 }
103 while count < 34 {
104 write!(writer, " ")?;
105 count += 1;
106 }
107 }
108
109 self.buffer.clear();
110 formatter.format(&instruction, &mut self.buffer);
111 write!(writer, "{}", self.buffer.replace("byte [", "byte ptr ["))?;
112 writeln!(writer)?;
113
114 code = &code[instruction.len()..];
115 position += instruction.len();
116 }
117
118 Ok(())
119 }
120}
121
122pub struct Disassembler<'a> {
123 blob: &'a ProgramBlob,
124 format: DisassemblyFormat,
125 gas_cost_map: Option<HashMap<ProgramCounter, i64>>,
126 native: Option<NativeCode>,
127 show_raw_bytes: bool,
128 show_native_raw_bytes: bool,
129 prefer_non_abi_reg_names: bool,
130 prefer_unaliased: bool,
131 prefer_offset_jump_targets: bool,
132 emit_header: bool,
133 emit_exports: bool,
134 show_offsets: bool,
135 show_native_offsets: bool,
136 cost_model: Option<CostModelKind>,
137}
138
139impl<'a> Disassembler<'a> {
140 pub fn new(blob: &'a ProgramBlob, format: DisassemblyFormat) -> Result<Self, polkavm::Error> {
141 let native = if matches!(format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
142 Some(NativeCode::try_from(blob)?)
143 } else {
144 None
145 };
146
147 Ok(Self {
148 blob,
149 format,
150 gas_cost_map: None,
151 native,
152 show_raw_bytes: false,
153 show_native_raw_bytes: true,
154 prefer_non_abi_reg_names: false,
155 prefer_unaliased: false,
156 prefer_offset_jump_targets: false,
157 emit_header: true,
158 emit_exports: true,
159 show_offsets: true,
160 show_native_offsets: true,
161 cost_model: None,
162 })
163 }
164
165 pub fn show_raw_bytes(&mut self, value: bool) {
166 self.show_raw_bytes = value;
167 }
168
169 pub fn show_native_raw_bytes(&mut self, value: bool) {
170 self.show_native_raw_bytes = value;
171 }
172
173 pub fn prefer_non_abi_reg_names(&mut self, value: bool) {
174 self.prefer_non_abi_reg_names = value;
175 }
176
177 pub fn prefer_unaliased(&mut self, value: bool) {
178 self.prefer_unaliased = value;
179 }
180
181 pub fn prefer_offset_jump_targets(&mut self, value: bool) {
182 self.prefer_offset_jump_targets = value;
183 }
184
185 pub fn emit_header(&mut self, value: bool) {
186 self.emit_header = value;
187 }
188
189 pub fn emit_exports(&mut self, value: bool) {
190 self.emit_exports = value;
191 }
192
193 pub fn show_offsets(&mut self, value: bool) {
194 self.show_offsets = value;
195 }
196
197 pub fn show_native_offsets(&mut self, value: bool) {
198 self.show_native_offsets = value;
199 }
200
201 pub fn cost_model(&mut self, value: Option<CostModelKind>) {
202 self.cost_model = value;
203 }
204
205 fn instructions(&self) -> Vec<ParsedInstruction> {
206 self.blob.instructions().collect()
207 }
208
209 pub fn display_gas(&mut self) -> Result<(), polkavm::Error> {
210 let mut config = polkavm::Config::from_env()?;
211 config.set_worker_count(0);
212 config.set_backend(Some(polkavm::BackendKind::Interpreter));
213 config.set_allow_experimental(true);
214 config.set_default_cost_model(self.cost_model.clone());
215
216 let engine = polkavm::Engine::new(&config)?;
217
218 let mut config = polkavm::ModuleConfig::default();
219 config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
220
221 let module = polkavm::Module::from_blob(&engine, &config, self.blob.clone())?;
222
223 let mut in_new_block = true;
224 let mut gas_cost_map = HashMap::new();
225 for instruction in self.instructions() {
226 if in_new_block {
227 in_new_block = false;
228 if let Some(cost) = module.calculate_gas_cost_for(instruction.offset) {
229 gas_cost_map.insert(instruction.offset, cost);
230 }
231 }
232
233 if instruction.starts_new_basic_block() {
234 in_new_block = true;
235 }
236 }
237 self.gas_cost_map = Some(gas_cost_map);
238
239 Ok(())
240 }
241
242 pub fn disassemble_into(&self, mut writer: impl Write) -> Result<(), polkavm::Error> {
243 let mut instructions = Vec::new();
244 let mut instruction_offset_to_basic_block = HashMap::new();
245 {
246 let mut basic_block_counter = 0;
247 let mut basic_block_started = true;
248 for instruction in self.instructions() {
249 if basic_block_started {
250 instruction_offset_to_basic_block.insert(instruction.offset, basic_block_counter);
251 basic_block_started = false;
252 }
253
254 if instruction.starts_new_basic_block() {
255 basic_block_started = true;
256 basic_block_counter += 1;
257 }
258 instructions.push(instruction);
259 }
260 }
261
262 let mut exports_for_code_offset = HashMap::new();
263 for (nth_export, export) in self.blob.exports().enumerate() {
264 exports_for_code_offset
265 .entry(export.program_counter())
266 .or_insert_with(Vec::new)
267 .push((nth_export, export));
268 }
269
270 let mut jump_table_map = HashMap::new();
271 let mut jump_table = Vec::new();
272 for target_code_offset in self.blob.jump_table() {
273 let jump_table_index = jump_table.len() + 1;
274 jump_table.push(target_code_offset);
275 assert!(jump_table_map.insert(target_code_offset, jump_table_index).is_none());
276 }
277
278 macro_rules! w {
279 (@no_newline $($arg:tt)*) => {{
280 if let Err(error) = write!(&mut writer, $($arg)*) {
281 return Err(format!("failed to write to output: {error}").into());
282 }
283 }};
284
285 ($($arg:tt)*) => {{
286 if let Err(error) = writeln!(&mut writer, $($arg)*) {
287 return Err(format!("failed to write to output: {error}").into());
288 }
289 }};
290 }
291
292 if self.emit_header {
293 w!("// RO data = {}/{} bytes", self.blob.ro_data().len(), self.blob.ro_data_size());
294 w!("// RW data = {}/{} bytes", self.blob.rw_data().len(), self.blob.rw_data_size());
295 w!("// Stack size = {} bytes", self.blob.stack_size());
296 w!();
297 w!("// Instructions = {}", instructions.len());
298 w!("// Code size = {} bytes", self.blob.code().len());
299 w!();
300 }
301
302 let format_jump_target = |target_offset: ProgramCounter, basic_block_counter: u32| {
303 use core::fmt::Write;
304
305 let mut buf = String::new();
306 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
307 write!(&mut buf, "@{basic_block_counter}").unwrap()
308 } else {
309 buf.push_str("@_:");
310 }
311
312 if let Some(jump_table_index) = jump_table_map.get(&target_offset) {
313 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
314 write!(&mut buf, " [@dyn {jump_table_index}]").unwrap()
315 } else {
316 buf.push_str(" [_]");
317 }
318 }
319
320 if self.emit_exports {
321 if let Some(exports) = exports_for_code_offset.get(&target_offset) {
322 for (nth_export, export) in exports {
323 write!(&mut buf, " [export #{}: {}]", nth_export, export.symbol()).unwrap()
324 }
325 }
326 }
327
328 if let Some(gas_cost) = self.gas_cost_map.as_ref().and_then(|map| map.get(&target_offset)) {
329 write!(&mut buf, " (gas: {})", gas_cost).unwrap();
330 }
331
332 buf
333 };
334
335 let prefer_offset_jump_targets = self.prefer_offset_jump_targets;
336 let mut disassembly_format = polkavm_common::program::InstructionFormat::default();
337 disassembly_format.prefer_non_abi_reg_names = self.prefer_non_abi_reg_names;
338 disassembly_format.prefer_unaliased = self.prefer_unaliased;
339 disassembly_format.is_64_bit = self.blob.is_64_bit();
340
341 let jump_target_formatter = |target: u32, fmt: &mut core::fmt::Formatter| {
342 if prefer_offset_jump_targets {
343 write!(fmt, "{}", target)
344 } else if let Some(basic_block_index) = instruction_offset_to_basic_block.get(&polkavm::ProgramCounter(target)) {
345 write!(fmt, "@{basic_block_index}")
346 } else {
347 write!(fmt, "{}", target)
348 }
349 };
350 disassembly_format.jump_target_formatter = Some(&jump_target_formatter);
351
352 let mut fmt = AssemblyFormatter::default();
353 let mut last_line_program_entry = None;
354 let mut last_full_name = String::new();
355 let mut basic_block_counter = 0;
356 let mut pending_label = true;
357 for (nth_instruction, instruction) in instructions.iter().copied().enumerate() {
358 let offset = instruction.offset;
359 let length = core::cmp::min(instruction.next_offset.0, self.blob.code().len() as u32) - offset.0;
360 let instruction = instruction.kind;
361 let raw_bytes = &self.blob.code()[offset.0 as usize..offset.0 as usize + length as usize];
362
363 let instruction_s = instruction.display(&disassembly_format);
364 let instruction_s = if let polkavm_common::program::Instruction::ecalli(nth_import) = instruction {
365 if let Some(import) = self.blob.imports().get(cast(nth_import).bitwise_as_u32()) {
366 format!("{instruction_s} // {}", import)
367 } else {
368 format!("{instruction_s} // INVALID")
369 }
370 } else {
371 instruction_s.to_string()
372 };
373
374 let line_program = self.blob.get_debug_line_program_at(offset)?;
375
376 if let Some(mut line_program) = line_program {
377 if last_line_program_entry != Some(line_program.entry_index()) {
378 if nth_instruction != 0 {
379 if let Err(error) = writeln!(&mut writer) {
380 return Err(format!("failed to write to output: {error}").into());
381 }
382 }
383
384 last_line_program_entry = Some(line_program.entry_index());
385 loop {
386 let region = match line_program.run() {
387 Ok(Some(region)) => region,
388 Ok(None) => break,
389 Err(error) => {
390 return Err(format!("failed to parse line program: {error}").into());
391 }
392 };
393
394 if region.instruction_range().contains(&offset) {
395 let frame = region.frames().next().unwrap();
396 let full_name = match frame.full_name() {
397 Ok(full_name) => full_name,
398 Err(error) => {
399 return Err(format!("failed to parse line program: {error}").into());
400 }
401 }
402 .to_string();
403
404 if last_full_name != full_name {
405 w!("<{}>:", full_name);
406 last_full_name = full_name;
407 }
408
409 break;
410 }
411 }
412 }
413 } else {
414 if !last_full_name.is_empty() {
415 if let Err(error) = writeln!(&mut writer) {
416 return Err(format!("failed to write to output: {error}").into());
417 }
418 }
419
420 last_line_program_entry = None;
421 last_full_name.clear();
422 }
423
424 if pending_label {
425 pending_label = false;
426 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
427 if self.show_offsets {
428 w!(@no_newline " : ");
429 }
430
431 if self.show_raw_bytes {
432 w!("{:24} {}", "", format_jump_target(offset, basic_block_counter))
433 } else {
434 w!("{}", format_jump_target(offset, basic_block_counter))
435 }
436 } else {
437 w!(" {}", format_jump_target(offset, basic_block_counter))
438 }
439 }
440
441 if matches!(self.format, DisassemblyFormat::DiffFriendly) {
442 let mut string = instruction_s;
443 if let polkavm_common::program::Instruction::load_imm(dst, _) = instruction {
444 string = format!("{} = _", dst);
445 }
446
447 if let Some(index) = string.find('@') {
448 let length = string[index + 1..]
449 .chars()
450 .take_while(|character| character.is_ascii_digit() || matches!(character, 'a' | 'b' | 'c' | 'd' | 'e' | 'f'))
451 .count();
452 string.replace_range(index + 1..index + 1 + length, "_");
453 }
454
455 if let Some(index_1) = string.find("[0x") {
456 let index_2 = string[index_1..].find(']').unwrap() + index_1;
457 string.replace_range(index_1..=index_2, "[_]");
458 }
459
460 w!(" {}", string);
461 } else if matches!(self.format, DisassemblyFormat::Guest | DisassemblyFormat::GuestAndNative) {
462 if self.show_offsets {
463 w!(@no_newline "{offset:6}: ");
464 }
465 if self.show_raw_bytes {
466 let raw_bytes = raw_bytes.iter().map(|byte| format!("{byte:02x}")).collect::<Vec<_>>().join(" ");
467 w!("{raw_bytes:24} {instruction_s}")
468 } else {
469 w!("{instruction_s}")
470 }
471 }
472
473 if matches!(self.format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
474 let native = self.native.as_ref().unwrap();
475 assert_eq!(offset.0, native.instruction_map[nth_instruction].0 .0);
476
477 let machine_code_position = native.instruction_map[nth_instruction].1 as usize;
478 let machine_next_code_position = native.instruction_map[nth_instruction + 1].1 as usize;
479 let length = machine_next_code_position - machine_code_position;
480 if length != 0 {
481 let machine_code_chunk = &native.machine_code[machine_code_position..machine_next_code_position];
482 if let Err(error) = fmt.emit(
483 matches!(self.format, DisassemblyFormat::GuestAndNative),
484 native.machine_code_origin,
485 machine_code_chunk,
486 machine_code_position,
487 self.show_native_raw_bytes,
488 self.show_native_offsets,
489 &mut writer,
490 ) {
491 return Err(format!("failed to write to output: {error}").into());
492 }
493 }
494 }
495
496 if instruction.opcode().starts_new_basic_block() {
497 if nth_instruction + 1 != instructions.len() {
498 pending_label = true;
499 }
500 basic_block_counter += 1;
501 }
502 }
503
504 if let Err(error) = writer.flush() {
505 return Err(format!("failed to write to output: {error}").into());
506 }
507
508 Ok(())
509 }
510}
511
512#[cfg(test)]
513mod tests {
514 use polkavm::Reg::*;
515 use polkavm_common::abi::MemoryMapBuilder;
516 use polkavm_common::program::{asm, InstructionSetKind};
517 use polkavm_common::writer::ProgramBlobBuilder;
518
519 use super::*;
520
521 fn test_all_formats(blob: &ProgramBlob) {
522 for format in [
523 DisassemblyFormat::Guest,
524 DisassemblyFormat::DiffFriendly,
525 #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
526 DisassemblyFormat::GuestAndNative,
527 #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
528 DisassemblyFormat::Native,
529 ] {
530 assert!(!disassemble_with_gas(blob, format).is_empty());
531 }
532 }
533
534 fn disassemble_with_gas(blob: &ProgramBlob, format: DisassemblyFormat) -> Vec<u8> {
535 let mut disassembler = Disassembler::new(blob, format).unwrap();
536 disassembler.display_gas().unwrap();
537
538 let mut buffer = Vec::with_capacity(1 << 20);
539 disassembler.disassemble_into(&mut buffer).unwrap();
540 buffer
541 }
542
543 #[test]
544 fn simple() {
545 let memory_map = MemoryMapBuilder::new(0x4000).rw_data_size(0x4000).build().unwrap();
546 let mut builder = ProgramBlobBuilder::new(InstructionSetKind::Latest32);
547 builder.set_rw_data_size(0x4000);
548 builder.add_export_by_basic_block(0, b"main");
549 builder.add_import(b"hostcall");
550 builder.set_code(
551 &[
552 asm::store_imm_u32(memory_map.rw_data_address().try_into().unwrap(), 0x12345678),
553 asm::add_32(S0, A0, A1),
554 asm::ecalli(0),
555 asm::add_32(A0, A0, S0),
556 asm::ret(),
557 ],
558 &[],
559 );
560 let blob = ProgramBlob::parse(builder.into_vec().unwrap().into()).unwrap();
561
562 test_all_formats(&blob);
563
564 let assembly_bytes = disassemble_with_gas(&blob, DisassemblyFormat::Guest);
565 let assembly_text = String::from_utf8(assembly_bytes).unwrap();
566 let expected = &[
567 "// RO data = 0/0 bytes",
568 "// RW data = 0/16384 bytes",
569 "// Stack size = 0 bytes",
570 "",
571 "// Instructions = 5",
572 "// Code size = 18 bytes",
573 "",
574 " : @0 [export #0: 'main'] (gas: 5)",
575 " 0: u32 [0x20000] = 0x12345678",
576 " 9: s0 = a0 + a1",
577 " 12: ecalli 0 // 'hostcall'",
578 " 13: a0 = a0 + s0",
579 " 16: ret",
580 "",
581 ]
582 .join("\n");
583
584 assert_eq!(&assembly_text, expected);
585 }
586}