1use std::{collections::HashMap, io::Write};
2
3use polkavm_common::program::{ParsedInstruction, ProgramBlob, ProgramCounter, ISA32_V1, ISA64_V1};
4
5#[derive(Copy, Clone, Debug, clap::ValueEnum)]
6pub enum DisassemblyFormat {
7 Guest,
8 GuestAndNative,
9 Native,
10 DiffFriendly,
11}
12
13struct NativeCode {
14 machine_code_origin: u64,
15 machine_code: Vec<u8>,
16 instruction_map: Vec<(ProgramCounter, u32)>,
17}
18
19impl TryFrom<&'_ ProgramBlob> for NativeCode {
20 type Error = polkavm::Error;
21
22 fn try_from(blob: &'_ ProgramBlob) -> Result<Self, Self::Error> {
23 if !cfg!(target_arch = "x86_64") {
24 return Err("the selected disassembly format is not supported on this architecture".into());
25 }
26
27 let mut config = polkavm::Config::from_env()?;
28 config.set_worker_count(0);
29
30 let engine = polkavm::Engine::new(&config)?;
31 let module = polkavm::Module::from_blob(&engine, &Default::default(), blob.clone())?;
32
33 let Some(machine_code) = module.machine_code() else {
34 return Err("currently selected VM backend doesn't provide raw machine code".into());
35 };
36
37 let Some(instruction_map) = module.program_counter_to_machine_code_offset() else {
38 return Err("currently selected VM backend doesn't provide a machine code map".into());
39 };
40
41 Ok(Self {
42 machine_code_origin: module.machine_code_origin().unwrap_or(0),
43 machine_code: machine_code.into(),
44 instruction_map: instruction_map.to_vec(),
45 })
46 }
47}
48
49#[derive(Default)]
50struct AssemblyFormatter {
51 buffer: String,
52}
53
54impl AssemblyFormatter {
55 fn emit(
56 &mut self,
57 indent: bool,
58 code_origin: u64,
59 mut code: &[u8],
60 mut position: usize,
61 writer: &mut impl Write,
62 ) -> Result<(), std::io::Error> {
63 use iced_x86::Formatter;
64
65 let mut formatter = iced_x86::NasmFormatter::new();
66 formatter.options_mut().set_space_after_operand_separator(true);
67 formatter.options_mut().set_hex_prefix("0x");
68 formatter.options_mut().set_hex_suffix("");
69 formatter.options_mut().set_uppercase_hex(false);
70 formatter.options_mut().set_small_hex_numbers_in_decimal(false);
71 formatter.options_mut().set_show_useless_prefixes(true);
72 formatter.options_mut().set_branch_leading_zeros(false);
73 formatter.options_mut().set_rip_relative_addresses(true);
74
75 loop {
76 let mut decoder = iced_x86::Decoder::with_ip(64, code, code_origin, iced_x86::DecoderOptions::NONE);
77 if !decoder.can_decode() {
78 break;
79 }
80 let mut instruction = iced_x86::Instruction::default();
81 decoder.decode_out(&mut instruction);
82
83 if indent {
84 write!(writer, " ")?;
85 }
86 write!(writer, "{:8x}: ", position as u64 + code_origin)?;
87
88 let start_index = (instruction.ip() - code_origin) as usize;
89 let instr_bytes = &code[start_index..start_index + instruction.len()];
90 let mut count = 0;
91 for b in instr_bytes.iter() {
92 write!(writer, "{:02x} ", b)?;
93 count += 3;
94 }
95 while count < 34 {
96 write!(writer, " ")?;
97 count += 1;
98 }
99
100 self.buffer.clear();
101 formatter.format(&instruction, &mut self.buffer);
102 write!(writer, "{}", self.buffer)?;
103 writeln!(writer)?;
104
105 code = &code[instruction.len()..];
106 position += instruction.len();
107 }
108
109 Ok(())
110 }
111}
112
113pub struct Disassembler<'a> {
114 blob: &'a ProgramBlob,
115 format: DisassemblyFormat,
116 gas_cost_map: Option<HashMap<ProgramCounter, i64>>,
117 native: Option<NativeCode>,
118 show_raw_bytes: bool,
119 prefer_non_abi_reg_names: bool,
120 prefer_unaliased: bool,
121 prefer_offset_jump_targets: bool,
122 emit_header: bool,
123 emit_exports: bool,
124 show_offsets: bool,
125}
126
127impl<'a> Disassembler<'a> {
128 pub fn new(blob: &'a ProgramBlob, format: DisassemblyFormat) -> Result<Self, polkavm::Error> {
129 let native = if matches!(format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
130 Some(NativeCode::try_from(blob)?)
131 } else {
132 None
133 };
134
135 Ok(Self {
136 blob,
137 format,
138 gas_cost_map: None,
139 native,
140 show_raw_bytes: false,
141 prefer_non_abi_reg_names: false,
142 prefer_unaliased: false,
143 prefer_offset_jump_targets: false,
144 emit_header: true,
145 emit_exports: true,
146 show_offsets: true,
147 })
148 }
149
150 pub fn show_raw_bytes(&mut self, value: bool) {
151 self.show_raw_bytes = value;
152 }
153
154 pub fn prefer_non_abi_reg_names(&mut self, value: bool) {
155 self.prefer_non_abi_reg_names = value;
156 }
157
158 pub fn prefer_unaliased(&mut self, value: bool) {
159 self.prefer_unaliased = value;
160 }
161
162 pub fn prefer_offset_jump_targets(&mut self, value: bool) {
163 self.prefer_offset_jump_targets = value;
164 }
165
166 pub fn emit_header(&mut self, value: bool) {
167 self.emit_header = value;
168 }
169
170 pub fn emit_exports(&mut self, value: bool) {
171 self.emit_exports = value;
172 }
173
174 pub fn show_offsets(&mut self, value: bool) {
175 self.show_offsets = value;
176 }
177
178 fn instructions(&self) -> Vec<ParsedInstruction> {
179 if self.blob.is_64_bit() {
180 self.blob.instructions(ISA64_V1).collect()
181 } else {
182 self.blob.instructions(ISA32_V1).collect()
183 }
184 }
185
186 pub fn display_gas(&mut self) -> Result<(), polkavm::Error> {
187 let mut config = polkavm::Config::from_env()?;
188 config.set_worker_count(0);
189 config.set_backend(Some(polkavm::BackendKind::Interpreter));
190
191 let engine = polkavm::Engine::new(&config)?;
192
193 let mut config = polkavm::ModuleConfig::default();
194 config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
195
196 let module = polkavm::Module::from_blob(&engine, &config, self.blob.clone())?;
197
198 let mut in_new_block = true;
199 let mut gas_cost_map = HashMap::new();
200 for instruction in self.instructions() {
201 if in_new_block {
202 in_new_block = false;
203 if let Some(cost) = module.calculate_gas_cost_for(instruction.offset) {
204 gas_cost_map.insert(instruction.offset, cost);
205 }
206 }
207
208 if instruction.starts_new_basic_block() {
209 in_new_block = true;
210 }
211 }
212 self.gas_cost_map = Some(gas_cost_map);
213
214 Ok(())
215 }
216
217 pub fn disassemble_into(&self, mut writer: impl Write) -> Result<(), polkavm::Error> {
218 let mut instructions = Vec::new();
219 let mut instruction_offset_to_basic_block = HashMap::new();
220 {
221 let mut basic_block_counter = 0;
222 let mut basic_block_started = true;
223 for instruction in self.instructions() {
224 if basic_block_started {
225 instruction_offset_to_basic_block.insert(instruction.offset, basic_block_counter);
226 basic_block_started = false;
227 }
228
229 if instruction.starts_new_basic_block() {
230 basic_block_started = true;
231 basic_block_counter += 1;
232 }
233 instructions.push(instruction);
234 }
235 }
236
237 let mut exports_for_code_offset = HashMap::new();
238 for (nth_export, export) in self.blob.exports().enumerate() {
239 exports_for_code_offset
240 .entry(export.program_counter())
241 .or_insert_with(Vec::new)
242 .push((nth_export, export));
243 }
244
245 let mut jump_table_map = HashMap::new();
246 let mut jump_table = Vec::new();
247 for target_code_offset in self.blob.jump_table() {
248 let jump_table_index = jump_table.len() + 1;
249 jump_table.push(target_code_offset);
250 assert!(jump_table_map.insert(target_code_offset, jump_table_index).is_none());
251 }
252
253 macro_rules! w {
254 (@no_newline $($arg:tt)*) => {{
255 if let Err(error) = write!(&mut writer, $($arg)*) {
256 return Err(format!("failed to write to output: {error}").into());
257 }
258 }};
259
260 ($($arg:tt)*) => {{
261 if let Err(error) = writeln!(&mut writer, $($arg)*) {
262 return Err(format!("failed to write to output: {error}").into());
263 }
264 }};
265 }
266
267 if self.emit_header {
268 w!("// RO data = {}/{} bytes", self.blob.ro_data().len(), self.blob.ro_data_size());
269 w!("// RW data = {}/{} bytes", self.blob.rw_data().len(), self.blob.rw_data_size());
270 w!("// Stack size = {} bytes", self.blob.stack_size());
271 w!();
272 w!("// Instructions = {}", instructions.len());
273 w!("// Code size = {} bytes", self.blob.code().len());
274 w!();
275 }
276
277 let format_jump_target = |target_offset: ProgramCounter, basic_block_counter: u32| {
278 use core::fmt::Write;
279
280 let mut buf = String::new();
281 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
282 write!(&mut buf, "@{basic_block_counter}").unwrap()
283 } else {
284 buf.push_str("@_:");
285 }
286
287 if let Some(jump_table_index) = jump_table_map.get(&target_offset) {
288 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
289 write!(&mut buf, " [@dyn {jump_table_index}]").unwrap()
290 } else {
291 buf.push_str(" [_]");
292 }
293 }
294
295 if self.emit_exports {
296 if let Some(exports) = exports_for_code_offset.get(&target_offset) {
297 for (nth_export, export) in exports {
298 write!(&mut buf, " [export #{}: {}]", nth_export, export.symbol()).unwrap()
299 }
300 }
301 }
302
303 if let Some(gas_cost) = self.gas_cost_map.as_ref().and_then(|map| map.get(&target_offset)) {
304 write!(&mut buf, " (gas: {})", gas_cost).unwrap();
305 }
306
307 buf
308 };
309
310 let prefer_offset_jump_targets = self.prefer_offset_jump_targets;
311 let mut disassembly_format = polkavm_common::program::InstructionFormat::default();
312 disassembly_format.prefer_non_abi_reg_names = self.prefer_non_abi_reg_names;
313 disassembly_format.prefer_unaliased = self.prefer_unaliased;
314 disassembly_format.is_64_bit = self.blob.is_64_bit();
315
316 let jump_target_formatter = |target: u32, fmt: &mut core::fmt::Formatter| {
317 if prefer_offset_jump_targets {
318 write!(fmt, "{}", target)
319 } else if let Some(basic_block_index) = instruction_offset_to_basic_block.get(&polkavm::ProgramCounter(target)) {
320 write!(fmt, "@{basic_block_index}")
321 } else {
322 write!(fmt, "{}", target)
323 }
324 };
325 disassembly_format.jump_target_formatter = Some(&jump_target_formatter);
326
327 let mut fmt = AssemblyFormatter::default();
328 let mut last_line_program_entry = None;
329 let mut last_full_name = String::new();
330 let mut basic_block_counter = 0;
331 let mut pending_label = true;
332 for (nth_instruction, instruction) in instructions.iter().copied().enumerate() {
333 let offset = instruction.offset;
334 let length = core::cmp::min(instruction.next_offset.0, self.blob.code().len() as u32) - offset.0;
335 let instruction = instruction.kind;
336 let raw_bytes = &self.blob.code()[offset.0 as usize..offset.0 as usize + length as usize];
337
338 let instruction_s = instruction.display(&disassembly_format);
339 let instruction_s = if let polkavm_common::program::Instruction::ecalli(nth_import) = instruction {
340 if let Some(import) = self.blob.imports().get(nth_import) {
341 format!("{instruction_s} // {}", import)
342 } else {
343 format!("{instruction_s} // INVALID")
344 }
345 } else {
346 instruction_s.to_string()
347 };
348
349 let line_program = self.blob.get_debug_line_program_at(offset)?;
350
351 if let Some(mut line_program) = line_program {
352 if last_line_program_entry != Some(line_program.entry_index()) {
353 if nth_instruction != 0 {
354 if let Err(error) = writeln!(&mut writer) {
355 return Err(format!("failed to write to output: {error}").into());
356 }
357 }
358
359 last_line_program_entry = Some(line_program.entry_index());
360 loop {
361 let region = match line_program.run() {
362 Ok(Some(region)) => region,
363 Ok(None) => break,
364 Err(error) => {
365 return Err(format!("failed to parse line program: {error}").into());
366 }
367 };
368
369 if region.instruction_range().contains(&offset) {
370 let frame = region.frames().next().unwrap();
371 let full_name = match frame.full_name() {
372 Ok(full_name) => full_name,
373 Err(error) => {
374 return Err(format!("failed to parse line program: {error}").into());
375 }
376 }
377 .to_string();
378
379 if last_full_name != full_name {
380 w!("<{}>:", full_name);
381 last_full_name = full_name;
382 }
383
384 break;
385 }
386 }
387 }
388 } else {
389 if !last_full_name.is_empty() {
390 if let Err(error) = writeln!(&mut writer) {
391 return Err(format!("failed to write to output: {error}").into());
392 }
393 }
394
395 last_line_program_entry = None;
396 last_full_name.clear();
397 }
398
399 if pending_label {
400 pending_label = false;
401 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
402 if self.show_offsets {
403 w!(@no_newline " : ");
404 }
405
406 if self.show_raw_bytes {
407 w!("{:24} {}", "", format_jump_target(offset, basic_block_counter))
408 } else {
409 w!("{}", format_jump_target(offset, basic_block_counter))
410 }
411 } else {
412 w!(" {}", format_jump_target(offset, basic_block_counter))
413 }
414 }
415
416 if matches!(self.format, DisassemblyFormat::DiffFriendly) {
417 let mut string = instruction_s;
418 if let polkavm_common::program::Instruction::load_imm(dst, _) = instruction {
419 string = format!("{} = _", dst);
420 }
421
422 if let Some(index) = string.find('@') {
423 let length = string[index + 1..]
424 .chars()
425 .take_while(|character| character.is_ascii_digit() || matches!(character, 'a' | 'b' | 'c' | 'd' | 'e' | 'f'))
426 .count();
427 string.replace_range(index + 1..index + 1 + length, "_");
428 }
429
430 if let Some(index_1) = string.find("[0x") {
431 let index_2 = string[index_1..].find(']').unwrap() + index_1;
432 string.replace_range(index_1..=index_2, "[_]");
433 }
434
435 w!(" {}", string);
436 } else if matches!(self.format, DisassemblyFormat::Guest | DisassemblyFormat::GuestAndNative) {
437 if self.show_offsets {
438 w!(@no_newline "{offset:6}: ");
439 }
440 if self.show_raw_bytes {
441 let raw_bytes = raw_bytes.iter().map(|byte| format!("{byte:02x}")).collect::<Vec<_>>().join(" ");
442 w!("{raw_bytes:24} {instruction_s}")
443 } else {
444 w!("{instruction_s}")
445 }
446 }
447
448 if matches!(self.format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
449 let native = self.native.as_ref().unwrap();
450 assert_eq!(offset.0, native.instruction_map[nth_instruction].0 .0);
451
452 let machine_code_position = native.instruction_map[nth_instruction].1 as usize;
453 let machine_next_code_position = native.instruction_map[nth_instruction + 1].1 as usize;
454 let length = machine_next_code_position - machine_code_position;
455 if length != 0 {
456 let machine_code_chunk = &native.machine_code[machine_code_position..machine_next_code_position];
457 if let Err(error) = fmt.emit(
458 matches!(self.format, DisassemblyFormat::GuestAndNative),
459 native.machine_code_origin,
460 machine_code_chunk,
461 machine_code_position,
462 &mut writer,
463 ) {
464 return Err(format!("failed to write to output: {error}").into());
465 }
466 }
467 }
468
469 if instruction.opcode().starts_new_basic_block() {
470 if nth_instruction + 1 != instructions.len() {
471 pending_label = true;
472 }
473 basic_block_counter += 1;
474 }
475 }
476
477 if let Err(error) = writer.flush() {
478 return Err(format!("failed to write to output: {error}").into());
479 }
480
481 Ok(())
482 }
483}
484
485#[cfg(test)]
486mod tests {
487 use polkavm::Reg::*;
488 use polkavm_common::abi::MemoryMapBuilder;
489 use polkavm_common::program::asm;
490 use polkavm_common::writer::ProgramBlobBuilder;
491
492 use super::*;
493
494 fn test_all_formats(blob: &ProgramBlob) {
495 for format in [
496 DisassemblyFormat::Guest,
497 DisassemblyFormat::DiffFriendly,
498 #[cfg(target_arg = "x86_84")]
499 DisassemblyFormat::GuestAndNative,
500 #[cfg(target_arg = "x86_84")]
501 DisassemblyFormat::Native,
502 ] {
503 assert!(!disassemble_with_gas(blob, format).is_empty());
504 }
505 }
506
507 fn disassemble_with_gas(blob: &ProgramBlob, format: DisassemblyFormat) -> Vec<u8> {
508 let mut disassembler = Disassembler::new(blob, format).unwrap();
509 disassembler.display_gas().unwrap();
510
511 let mut buffer = Vec::with_capacity(1 << 20);
512 disassembler.disassemble_into(&mut buffer).unwrap();
513 buffer
514 }
515
516 #[test]
517 fn simple() {
518 let memory_map = MemoryMapBuilder::new(0x4000).rw_data_size(0x4000).build().unwrap();
519 let mut builder = ProgramBlobBuilder::new();
520 builder.set_rw_data_size(0x4000);
521 builder.add_export_by_basic_block(0, b"main");
522 builder.add_import(b"hostcall");
523 builder.set_code(
524 &[
525 asm::store_imm_u32(memory_map.rw_data_address(), 0x12345678),
526 asm::add_32(S0, A0, A1),
527 asm::ecalli(0),
528 asm::add_32(A0, A0, S0),
529 asm::ret(),
530 ],
531 &[],
532 );
533 let blob = ProgramBlob::parse(builder.into_vec().into()).unwrap();
534
535 test_all_formats(&blob);
536
537 let assembly_bytes = disassemble_with_gas(&blob, DisassemblyFormat::Guest);
538 let assembly_text = String::from_utf8(assembly_bytes).unwrap();
539 let expected = &[
540 "// RO data = 0/0 bytes",
541 "// RW data = 0/16384 bytes",
542 "// Stack size = 0 bytes",
543 "",
544 "// Instructions = 5",
545 "// Code size = 18 bytes",
546 "",
547 " : @0 [export #0: 'main'] (gas: 5)",
548 " 0: u32 [0x20000] = 0x12345678",
549 " 9: s0 = a0 + a1",
550 " 12: ecalli 0 // 'hostcall'",
551 " 13: a0 = a0 + s0",
552 " 16: ret",
553 "",
554 ]
555 .join("\n");
556
557 assert_eq!(&assembly_text, expected);
558 }
559}