1use std::{collections::HashMap, io::Write};
2
3use polkavm_common::program::{ParsedInstruction, ProgramBlob, ProgramCounter, ISA32_V1, ISA64_V1};
4
5#[derive(Copy, Clone, Debug, clap::ValueEnum)]
6pub enum DisassemblyFormat {
7 Guest,
8 GuestAndNative,
9 Native,
10 DiffFriendly,
11}
12
13struct NativeCode {
14 machine_code_origin: u64,
15 machine_code: Vec<u8>,
16 instruction_map: Vec<(ProgramCounter, u32)>,
17}
18
19impl TryFrom<&'_ ProgramBlob> for NativeCode {
20 type Error = polkavm::Error;
21
22 fn try_from(blob: &'_ ProgramBlob) -> Result<Self, Self::Error> {
23 if !cfg!(target_arch = "x86_64") {
24 return Err("the selected disassembly format is not supported on this architecture".into());
25 }
26
27 let mut config = polkavm::Config::from_env()?;
28 config.set_worker_count(0);
29
30 let engine = polkavm::Engine::new(&config)?;
31 let module = polkavm::Module::from_blob(&engine, &Default::default(), blob.clone())?;
32
33 let Some(machine_code) = module.machine_code() else {
34 return Err("currently selected VM backend doesn't provide raw machine code".into());
35 };
36
37 let Some(instruction_map) = module.program_counter_to_machine_code_offset() else {
38 return Err("currently selected VM backend doesn't provide a machine code map".into());
39 };
40
41 Ok(Self {
42 machine_code_origin: module.machine_code_origin().unwrap_or(0),
43 machine_code: machine_code.into(),
44 instruction_map: instruction_map.to_vec(),
45 })
46 }
47}
48
49#[derive(Default)]
50struct AssemblyFormatter {
51 buffer: String,
52}
53
54impl AssemblyFormatter {
55 fn emit(
56 &mut self,
57 indent: bool,
58 code_origin: u64,
59 mut code: &[u8],
60 mut position: usize,
61 show_raw_bytes: bool,
62 show_offsets: bool,
63 writer: &mut impl Write,
64 ) -> Result<(), std::io::Error> {
65 use iced_x86::Formatter;
66
67 let mut formatter = iced_x86::NasmFormatter::new();
68 formatter.options_mut().set_space_after_operand_separator(true);
69 formatter.options_mut().set_hex_prefix("0x");
70 formatter.options_mut().set_hex_suffix("");
71 formatter.options_mut().set_uppercase_hex(false);
72 formatter.options_mut().set_small_hex_numbers_in_decimal(false);
73 formatter.options_mut().set_show_useless_prefixes(true);
74 formatter.options_mut().set_branch_leading_zeros(false);
75 formatter.options_mut().set_rip_relative_addresses(true);
76
77 loop {
78 let mut decoder = iced_x86::Decoder::with_ip(64, code, code_origin, iced_x86::DecoderOptions::NONE);
79 if !decoder.can_decode() {
80 break;
81 }
82 let mut instruction = iced_x86::Instruction::default();
83 decoder.decode_out(&mut instruction);
84
85 if indent {
86 write!(writer, " ")?;
87 }
88
89 if show_offsets {
90 write!(writer, "{:8x}: ", position as u64 + code_origin)?;
91 }
92
93 let start_index = (instruction.ip() - code_origin) as usize;
94 let instr_bytes = &code[start_index..start_index + instruction.len()];
95 if show_raw_bytes {
96 let mut count = 0;
97 for b in instr_bytes.iter() {
98 write!(writer, "{:02x} ", b)?;
99 count += 3;
100 }
101 while count < 34 {
102 write!(writer, " ")?;
103 count += 1;
104 }
105 }
106
107 self.buffer.clear();
108 formatter.format(&instruction, &mut self.buffer);
109 write!(writer, "{}", self.buffer.replace("byte [", "byte ptr ["))?;
110 writeln!(writer)?;
111
112 code = &code[instruction.len()..];
113 position += instruction.len();
114 }
115
116 Ok(())
117 }
118}
119
120pub struct Disassembler<'a> {
121 blob: &'a ProgramBlob,
122 format: DisassemblyFormat,
123 gas_cost_map: Option<HashMap<ProgramCounter, i64>>,
124 native: Option<NativeCode>,
125 show_raw_bytes: bool,
126 show_native_raw_bytes: bool,
127 prefer_non_abi_reg_names: bool,
128 prefer_unaliased: bool,
129 prefer_offset_jump_targets: bool,
130 emit_header: bool,
131 emit_exports: bool,
132 show_offsets: bool,
133 show_native_offsets: bool,
134}
135
136impl<'a> Disassembler<'a> {
137 pub fn new(blob: &'a ProgramBlob, format: DisassemblyFormat) -> Result<Self, polkavm::Error> {
138 let native = if matches!(format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
139 Some(NativeCode::try_from(blob)?)
140 } else {
141 None
142 };
143
144 Ok(Self {
145 blob,
146 format,
147 gas_cost_map: None,
148 native,
149 show_raw_bytes: false,
150 show_native_raw_bytes: true,
151 prefer_non_abi_reg_names: false,
152 prefer_unaliased: false,
153 prefer_offset_jump_targets: false,
154 emit_header: true,
155 emit_exports: true,
156 show_offsets: true,
157 show_native_offsets: true,
158 })
159 }
160
161 pub fn show_raw_bytes(&mut self, value: bool) {
162 self.show_raw_bytes = value;
163 }
164
165 pub fn show_native_raw_bytes(&mut self, value: bool) {
166 self.show_native_raw_bytes = value;
167 }
168
169 pub fn prefer_non_abi_reg_names(&mut self, value: bool) {
170 self.prefer_non_abi_reg_names = value;
171 }
172
173 pub fn prefer_unaliased(&mut self, value: bool) {
174 self.prefer_unaliased = value;
175 }
176
177 pub fn prefer_offset_jump_targets(&mut self, value: bool) {
178 self.prefer_offset_jump_targets = value;
179 }
180
181 pub fn emit_header(&mut self, value: bool) {
182 self.emit_header = value;
183 }
184
185 pub fn emit_exports(&mut self, value: bool) {
186 self.emit_exports = value;
187 }
188
189 pub fn show_offsets(&mut self, value: bool) {
190 self.show_offsets = value;
191 }
192
193 pub fn show_native_offsets(&mut self, value: bool) {
194 self.show_native_offsets = value;
195 }
196
197 fn instructions(&self) -> Vec<ParsedInstruction> {
198 if self.blob.is_64_bit() {
199 self.blob.instructions(ISA64_V1).collect()
200 } else {
201 self.blob.instructions(ISA32_V1).collect()
202 }
203 }
204
205 pub fn display_gas(&mut self) -> Result<(), polkavm::Error> {
206 let mut config = polkavm::Config::from_env()?;
207 config.set_worker_count(0);
208 config.set_backend(Some(polkavm::BackendKind::Interpreter));
209
210 let engine = polkavm::Engine::new(&config)?;
211
212 let mut config = polkavm::ModuleConfig::default();
213 config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
214
215 let module = polkavm::Module::from_blob(&engine, &config, self.blob.clone())?;
216
217 let mut in_new_block = true;
218 let mut gas_cost_map = HashMap::new();
219 for instruction in self.instructions() {
220 if in_new_block {
221 in_new_block = false;
222 if let Some(cost) = module.calculate_gas_cost_for(instruction.offset) {
223 gas_cost_map.insert(instruction.offset, cost);
224 }
225 }
226
227 if instruction.starts_new_basic_block() {
228 in_new_block = true;
229 }
230 }
231 self.gas_cost_map = Some(gas_cost_map);
232
233 Ok(())
234 }
235
236 pub fn disassemble_into(&self, mut writer: impl Write) -> Result<(), polkavm::Error> {
237 let mut instructions = Vec::new();
238 let mut instruction_offset_to_basic_block = HashMap::new();
239 {
240 let mut basic_block_counter = 0;
241 let mut basic_block_started = true;
242 for instruction in self.instructions() {
243 if basic_block_started {
244 instruction_offset_to_basic_block.insert(instruction.offset, basic_block_counter);
245 basic_block_started = false;
246 }
247
248 if instruction.starts_new_basic_block() {
249 basic_block_started = true;
250 basic_block_counter += 1;
251 }
252 instructions.push(instruction);
253 }
254 }
255
256 let mut exports_for_code_offset = HashMap::new();
257 for (nth_export, export) in self.blob.exports().enumerate() {
258 exports_for_code_offset
259 .entry(export.program_counter())
260 .or_insert_with(Vec::new)
261 .push((nth_export, export));
262 }
263
264 let mut jump_table_map = HashMap::new();
265 let mut jump_table = Vec::new();
266 for target_code_offset in self.blob.jump_table() {
267 let jump_table_index = jump_table.len() + 1;
268 jump_table.push(target_code_offset);
269 assert!(jump_table_map.insert(target_code_offset, jump_table_index).is_none());
270 }
271
272 macro_rules! w {
273 (@no_newline $($arg:tt)*) => {{
274 if let Err(error) = write!(&mut writer, $($arg)*) {
275 return Err(format!("failed to write to output: {error}").into());
276 }
277 }};
278
279 ($($arg:tt)*) => {{
280 if let Err(error) = writeln!(&mut writer, $($arg)*) {
281 return Err(format!("failed to write to output: {error}").into());
282 }
283 }};
284 }
285
286 if self.emit_header {
287 w!("// RO data = {}/{} bytes", self.blob.ro_data().len(), self.blob.ro_data_size());
288 w!("// RW data = {}/{} bytes", self.blob.rw_data().len(), self.blob.rw_data_size());
289 w!("// Stack size = {} bytes", self.blob.stack_size());
290 w!();
291 w!("// Instructions = {}", instructions.len());
292 w!("// Code size = {} bytes", self.blob.code().len());
293 w!();
294 }
295
296 let format_jump_target = |target_offset: ProgramCounter, basic_block_counter: u32| {
297 use core::fmt::Write;
298
299 let mut buf = String::new();
300 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
301 write!(&mut buf, "@{basic_block_counter}").unwrap()
302 } else {
303 buf.push_str("@_:");
304 }
305
306 if let Some(jump_table_index) = jump_table_map.get(&target_offset) {
307 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
308 write!(&mut buf, " [@dyn {jump_table_index}]").unwrap()
309 } else {
310 buf.push_str(" [_]");
311 }
312 }
313
314 if self.emit_exports {
315 if let Some(exports) = exports_for_code_offset.get(&target_offset) {
316 for (nth_export, export) in exports {
317 write!(&mut buf, " [export #{}: {}]", nth_export, export.symbol()).unwrap()
318 }
319 }
320 }
321
322 if let Some(gas_cost) = self.gas_cost_map.as_ref().and_then(|map| map.get(&target_offset)) {
323 write!(&mut buf, " (gas: {})", gas_cost).unwrap();
324 }
325
326 buf
327 };
328
329 let prefer_offset_jump_targets = self.prefer_offset_jump_targets;
330 let mut disassembly_format = polkavm_common::program::InstructionFormat::default();
331 disassembly_format.prefer_non_abi_reg_names = self.prefer_non_abi_reg_names;
332 disassembly_format.prefer_unaliased = self.prefer_unaliased;
333 disassembly_format.is_64_bit = self.blob.is_64_bit();
334
335 let jump_target_formatter = |target: u32, fmt: &mut core::fmt::Formatter| {
336 if prefer_offset_jump_targets {
337 write!(fmt, "{}", target)
338 } else if let Some(basic_block_index) = instruction_offset_to_basic_block.get(&polkavm::ProgramCounter(target)) {
339 write!(fmt, "@{basic_block_index}")
340 } else {
341 write!(fmt, "{}", target)
342 }
343 };
344 disassembly_format.jump_target_formatter = Some(&jump_target_formatter);
345
346 let mut fmt = AssemblyFormatter::default();
347 let mut last_line_program_entry = None;
348 let mut last_full_name = String::new();
349 let mut basic_block_counter = 0;
350 let mut pending_label = true;
351 for (nth_instruction, instruction) in instructions.iter().copied().enumerate() {
352 let offset = instruction.offset;
353 let length = core::cmp::min(instruction.next_offset.0, self.blob.code().len() as u32) - offset.0;
354 let instruction = instruction.kind;
355 let raw_bytes = &self.blob.code()[offset.0 as usize..offset.0 as usize + length as usize];
356
357 let instruction_s = instruction.display(&disassembly_format);
358 let instruction_s = if let polkavm_common::program::Instruction::ecalli(nth_import) = instruction {
359 if let Some(import) = self.blob.imports().get(nth_import) {
360 format!("{instruction_s} // {}", import)
361 } else {
362 format!("{instruction_s} // INVALID")
363 }
364 } else {
365 instruction_s.to_string()
366 };
367
368 let line_program = self.blob.get_debug_line_program_at(offset)?;
369
370 if let Some(mut line_program) = line_program {
371 if last_line_program_entry != Some(line_program.entry_index()) {
372 if nth_instruction != 0 {
373 if let Err(error) = writeln!(&mut writer) {
374 return Err(format!("failed to write to output: {error}").into());
375 }
376 }
377
378 last_line_program_entry = Some(line_program.entry_index());
379 loop {
380 let region = match line_program.run() {
381 Ok(Some(region)) => region,
382 Ok(None) => break,
383 Err(error) => {
384 return Err(format!("failed to parse line program: {error}").into());
385 }
386 };
387
388 if region.instruction_range().contains(&offset) {
389 let frame = region.frames().next().unwrap();
390 let full_name = match frame.full_name() {
391 Ok(full_name) => full_name,
392 Err(error) => {
393 return Err(format!("failed to parse line program: {error}").into());
394 }
395 }
396 .to_string();
397
398 if last_full_name != full_name {
399 w!("<{}>:", full_name);
400 last_full_name = full_name;
401 }
402
403 break;
404 }
405 }
406 }
407 } else {
408 if !last_full_name.is_empty() {
409 if let Err(error) = writeln!(&mut writer) {
410 return Err(format!("failed to write to output: {error}").into());
411 }
412 }
413
414 last_line_program_entry = None;
415 last_full_name.clear();
416 }
417
418 if pending_label {
419 pending_label = false;
420 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
421 if self.show_offsets {
422 w!(@no_newline " : ");
423 }
424
425 if self.show_raw_bytes {
426 w!("{:24} {}", "", format_jump_target(offset, basic_block_counter))
427 } else {
428 w!("{}", format_jump_target(offset, basic_block_counter))
429 }
430 } else {
431 w!(" {}", format_jump_target(offset, basic_block_counter))
432 }
433 }
434
435 if matches!(self.format, DisassemblyFormat::DiffFriendly) {
436 let mut string = instruction_s;
437 if let polkavm_common::program::Instruction::load_imm(dst, _) = instruction {
438 string = format!("{} = _", dst);
439 }
440
441 if let Some(index) = string.find('@') {
442 let length = string[index + 1..]
443 .chars()
444 .take_while(|character| character.is_ascii_digit() || matches!(character, 'a' | 'b' | 'c' | 'd' | 'e' | 'f'))
445 .count();
446 string.replace_range(index + 1..index + 1 + length, "_");
447 }
448
449 if let Some(index_1) = string.find("[0x") {
450 let index_2 = string[index_1..].find(']').unwrap() + index_1;
451 string.replace_range(index_1..=index_2, "[_]");
452 }
453
454 w!(" {}", string);
455 } else if matches!(self.format, DisassemblyFormat::Guest | DisassemblyFormat::GuestAndNative) {
456 if self.show_offsets {
457 w!(@no_newline "{offset:6}: ");
458 }
459 if self.show_raw_bytes {
460 let raw_bytes = raw_bytes.iter().map(|byte| format!("{byte:02x}")).collect::<Vec<_>>().join(" ");
461 w!("{raw_bytes:24} {instruction_s}")
462 } else {
463 w!("{instruction_s}")
464 }
465 }
466
467 if matches!(self.format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
468 let native = self.native.as_ref().unwrap();
469 assert_eq!(offset.0, native.instruction_map[nth_instruction].0 .0);
470
471 let machine_code_position = native.instruction_map[nth_instruction].1 as usize;
472 let machine_next_code_position = native.instruction_map[nth_instruction + 1].1 as usize;
473 let length = machine_next_code_position - machine_code_position;
474 if length != 0 {
475 let machine_code_chunk = &native.machine_code[machine_code_position..machine_next_code_position];
476 if let Err(error) = fmt.emit(
477 matches!(self.format, DisassemblyFormat::GuestAndNative),
478 native.machine_code_origin,
479 machine_code_chunk,
480 machine_code_position,
481 self.show_native_raw_bytes,
482 self.show_native_offsets,
483 &mut writer,
484 ) {
485 return Err(format!("failed to write to output: {error}").into());
486 }
487 }
488 }
489
490 if instruction.opcode().starts_new_basic_block() {
491 if nth_instruction + 1 != instructions.len() {
492 pending_label = true;
493 }
494 basic_block_counter += 1;
495 }
496 }
497
498 if let Err(error) = writer.flush() {
499 return Err(format!("failed to write to output: {error}").into());
500 }
501
502 Ok(())
503 }
504}
505
506#[cfg(test)]
507mod tests {
508 use polkavm::Reg::*;
509 use polkavm_common::abi::MemoryMapBuilder;
510 use polkavm_common::program::asm;
511 use polkavm_common::writer::ProgramBlobBuilder;
512
513 use super::*;
514
515 fn test_all_formats(blob: &ProgramBlob) {
516 for format in [
517 DisassemblyFormat::Guest,
518 DisassemblyFormat::DiffFriendly,
519 #[cfg(target_arg = "x86_84")]
520 DisassemblyFormat::GuestAndNative,
521 #[cfg(target_arg = "x86_84")]
522 DisassemblyFormat::Native,
523 ] {
524 assert!(!disassemble_with_gas(blob, format).is_empty());
525 }
526 }
527
528 fn disassemble_with_gas(blob: &ProgramBlob, format: DisassemblyFormat) -> Vec<u8> {
529 let mut disassembler = Disassembler::new(blob, format).unwrap();
530 disassembler.display_gas().unwrap();
531
532 let mut buffer = Vec::with_capacity(1 << 20);
533 disassembler.disassemble_into(&mut buffer).unwrap();
534 buffer
535 }
536
537 #[test]
538 fn simple() {
539 let memory_map = MemoryMapBuilder::new(0x4000).rw_data_size(0x4000).build().unwrap();
540 let mut builder = ProgramBlobBuilder::new();
541 builder.set_rw_data_size(0x4000);
542 builder.add_export_by_basic_block(0, b"main");
543 builder.add_import(b"hostcall");
544 builder.set_code(
545 &[
546 asm::store_imm_u32(memory_map.rw_data_address(), 0x12345678),
547 asm::add_32(S0, A0, A1),
548 asm::ecalli(0),
549 asm::add_32(A0, A0, S0),
550 asm::ret(),
551 ],
552 &[],
553 );
554 let blob = ProgramBlob::parse(builder.into_vec().unwrap().into()).unwrap();
555
556 test_all_formats(&blob);
557
558 let assembly_bytes = disassemble_with_gas(&blob, DisassemblyFormat::Guest);
559 let assembly_text = String::from_utf8(assembly_bytes).unwrap();
560 let expected = &[
561 "// RO data = 0/0 bytes",
562 "// RW data = 0/16384 bytes",
563 "// Stack size = 0 bytes",
564 "",
565 "// Instructions = 5",
566 "// Code size = 18 bytes",
567 "",
568 " : @0 [export #0: 'main'] (gas: 5)",
569 " 0: u32 [0x20000] = 0x12345678",
570 " 9: s0 = a0 + a1",
571 " 12: ecalli 0 // 'hostcall'",
572 " 13: a0 = a0 + s0",
573 " 16: ret",
574 "",
575 ]
576 .join("\n");
577
578 assert_eq!(&assembly_text, expected);
579 }
580}