1use std::{collections::HashMap, io::Write};
2
3use polkavm::CostModelKind;
4use polkavm_common::program::{ParsedInstruction, ProgramBlob, ProgramCounter};
5
6#[derive(Copy, Clone, Debug, clap::ValueEnum)]
7pub enum DisassemblyFormat {
8 Guest,
9 GuestAndNative,
10 Native,
11 DiffFriendly,
12}
13
14struct NativeCode {
15 machine_code_origin: u64,
16 machine_code: Vec<u8>,
17 instruction_map: Vec<(ProgramCounter, u32)>,
18}
19
20impl TryFrom<&'_ ProgramBlob> for NativeCode {
21 type Error = polkavm::Error;
22
23 fn try_from(blob: &'_ ProgramBlob) -> Result<Self, Self::Error> {
24 if !cfg!(target_arch = "x86_64") {
25 return Err("the selected disassembly format is not supported on this architecture".into());
26 }
27
28 let mut config = polkavm::Config::from_env()?;
29 config.set_worker_count(0);
30
31 let engine = polkavm::Engine::new(&config)?;
32 let module = polkavm::Module::from_blob(&engine, &Default::default(), blob.clone())?;
33
34 let Some(machine_code) = module.machine_code() else {
35 return Err("currently selected VM backend doesn't provide raw machine code".into());
36 };
37
38 let Some(instruction_map) = module.program_counter_to_machine_code_offset() else {
39 return Err("currently selected VM backend doesn't provide a machine code map".into());
40 };
41
42 Ok(Self {
43 machine_code_origin: module.machine_code_origin().unwrap_or(0),
44 machine_code: machine_code.into(),
45 instruction_map: instruction_map.to_vec(),
46 })
47 }
48}
49
50#[derive(Default)]
51struct AssemblyFormatter {
52 buffer: String,
53}
54
55impl AssemblyFormatter {
56 fn emit(
57 &mut self,
58 indent: bool,
59 code_origin: u64,
60 mut code: &[u8],
61 mut position: usize,
62 show_raw_bytes: bool,
63 show_offsets: bool,
64 writer: &mut impl Write,
65 ) -> Result<(), std::io::Error> {
66 use iced_x86::Formatter;
67
68 let mut formatter = iced_x86::NasmFormatter::new();
69 formatter.options_mut().set_space_after_operand_separator(true);
70 formatter.options_mut().set_hex_prefix("0x");
71 formatter.options_mut().set_hex_suffix("");
72 formatter.options_mut().set_uppercase_hex(false);
73 formatter.options_mut().set_small_hex_numbers_in_decimal(false);
74 formatter.options_mut().set_show_useless_prefixes(true);
75 formatter.options_mut().set_branch_leading_zeros(false);
76 formatter.options_mut().set_rip_relative_addresses(true);
77
78 loop {
79 let mut decoder = iced_x86::Decoder::with_ip(64, code, code_origin, iced_x86::DecoderOptions::NONE);
80 if !decoder.can_decode() {
81 break;
82 }
83 let mut instruction = iced_x86::Instruction::default();
84 decoder.decode_out(&mut instruction);
85
86 if indent {
87 write!(writer, " ")?;
88 }
89
90 if show_offsets {
91 write!(writer, "{:8x}: ", position as u64 + code_origin)?;
92 }
93
94 let start_index = (instruction.ip() - code_origin) as usize;
95 let instr_bytes = &code[start_index..start_index + instruction.len()];
96 if show_raw_bytes {
97 let mut count = 0;
98 for b in instr_bytes.iter() {
99 write!(writer, "{:02x} ", b)?;
100 count += 3;
101 }
102 while count < 34 {
103 write!(writer, " ")?;
104 count += 1;
105 }
106 }
107
108 self.buffer.clear();
109 formatter.format(&instruction, &mut self.buffer);
110 write!(writer, "{}", self.buffer.replace("byte [", "byte ptr ["))?;
111 writeln!(writer)?;
112
113 code = &code[instruction.len()..];
114 position += instruction.len();
115 }
116
117 Ok(())
118 }
119}
120
121pub struct Disassembler<'a> {
122 blob: &'a ProgramBlob,
123 format: DisassemblyFormat,
124 gas_cost_map: Option<HashMap<ProgramCounter, i64>>,
125 native: Option<NativeCode>,
126 show_raw_bytes: bool,
127 show_native_raw_bytes: bool,
128 prefer_non_abi_reg_names: bool,
129 prefer_unaliased: bool,
130 prefer_offset_jump_targets: bool,
131 emit_header: bool,
132 emit_exports: bool,
133 show_offsets: bool,
134 show_native_offsets: bool,
135 cost_model: Option<CostModelKind>,
136}
137
138impl<'a> Disassembler<'a> {
139 pub fn new(blob: &'a ProgramBlob, format: DisassemblyFormat) -> Result<Self, polkavm::Error> {
140 let native = if matches!(format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
141 Some(NativeCode::try_from(blob)?)
142 } else {
143 None
144 };
145
146 Ok(Self {
147 blob,
148 format,
149 gas_cost_map: None,
150 native,
151 show_raw_bytes: false,
152 show_native_raw_bytes: true,
153 prefer_non_abi_reg_names: false,
154 prefer_unaliased: false,
155 prefer_offset_jump_targets: false,
156 emit_header: true,
157 emit_exports: true,
158 show_offsets: true,
159 show_native_offsets: true,
160 cost_model: None,
161 })
162 }
163
164 pub fn show_raw_bytes(&mut self, value: bool) {
165 self.show_raw_bytes = value;
166 }
167
168 pub fn show_native_raw_bytes(&mut self, value: bool) {
169 self.show_native_raw_bytes = value;
170 }
171
172 pub fn prefer_non_abi_reg_names(&mut self, value: bool) {
173 self.prefer_non_abi_reg_names = value;
174 }
175
176 pub fn prefer_unaliased(&mut self, value: bool) {
177 self.prefer_unaliased = value;
178 }
179
180 pub fn prefer_offset_jump_targets(&mut self, value: bool) {
181 self.prefer_offset_jump_targets = value;
182 }
183
184 pub fn emit_header(&mut self, value: bool) {
185 self.emit_header = value;
186 }
187
188 pub fn emit_exports(&mut self, value: bool) {
189 self.emit_exports = value;
190 }
191
192 pub fn show_offsets(&mut self, value: bool) {
193 self.show_offsets = value;
194 }
195
196 pub fn show_native_offsets(&mut self, value: bool) {
197 self.show_native_offsets = value;
198 }
199
200 pub fn cost_model(&mut self, value: Option<CostModelKind>) {
201 self.cost_model = value;
202 }
203
204 fn instructions(&self) -> Vec<ParsedInstruction> {
205 self.blob.instructions().collect()
206 }
207
208 pub fn display_gas(&mut self) -> Result<(), polkavm::Error> {
209 let mut config = polkavm::Config::from_env()?;
210 config.set_worker_count(0);
211 config.set_backend(Some(polkavm::BackendKind::Interpreter));
212 config.set_allow_experimental(true);
213 config.set_default_cost_model(self.cost_model.clone());
214
215 let engine = polkavm::Engine::new(&config)?;
216
217 let mut config = polkavm::ModuleConfig::default();
218 config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
219
220 let module = polkavm::Module::from_blob(&engine, &config, self.blob.clone())?;
221
222 let mut in_new_block = true;
223 let mut gas_cost_map = HashMap::new();
224 for instruction in self.instructions() {
225 if in_new_block {
226 in_new_block = false;
227 if let Some(cost) = module.calculate_gas_cost_for(instruction.offset) {
228 gas_cost_map.insert(instruction.offset, cost);
229 }
230 }
231
232 if instruction.starts_new_basic_block() {
233 in_new_block = true;
234 }
235 }
236 self.gas_cost_map = Some(gas_cost_map);
237
238 Ok(())
239 }
240
241 pub fn disassemble_into(&self, mut writer: impl Write) -> Result<(), polkavm::Error> {
242 let mut instructions = Vec::new();
243 let mut instruction_offset_to_basic_block = HashMap::new();
244 {
245 let mut basic_block_counter = 0;
246 let mut basic_block_started = true;
247 for instruction in self.instructions() {
248 if basic_block_started {
249 instruction_offset_to_basic_block.insert(instruction.offset, basic_block_counter);
250 basic_block_started = false;
251 }
252
253 if instruction.starts_new_basic_block() {
254 basic_block_started = true;
255 basic_block_counter += 1;
256 }
257 instructions.push(instruction);
258 }
259 }
260
261 let mut exports_for_code_offset = HashMap::new();
262 for (nth_export, export) in self.blob.exports().enumerate() {
263 exports_for_code_offset
264 .entry(export.program_counter())
265 .or_insert_with(Vec::new)
266 .push((nth_export, export));
267 }
268
269 let mut jump_table_map = HashMap::new();
270 let mut jump_table = Vec::new();
271 for target_code_offset in self.blob.jump_table() {
272 let jump_table_index = jump_table.len() + 1;
273 jump_table.push(target_code_offset);
274 assert!(jump_table_map.insert(target_code_offset, jump_table_index).is_none());
275 }
276
277 macro_rules! w {
278 (@no_newline $($arg:tt)*) => {{
279 if let Err(error) = write!(&mut writer, $($arg)*) {
280 return Err(format!("failed to write to output: {error}").into());
281 }
282 }};
283
284 ($($arg:tt)*) => {{
285 if let Err(error) = writeln!(&mut writer, $($arg)*) {
286 return Err(format!("failed to write to output: {error}").into());
287 }
288 }};
289 }
290
291 if self.emit_header {
292 w!("// RO data = {}/{} bytes", self.blob.ro_data().len(), self.blob.ro_data_size());
293 w!("// RW data = {}/{} bytes", self.blob.rw_data().len(), self.blob.rw_data_size());
294 w!("// Stack size = {} bytes", self.blob.stack_size());
295 w!();
296 w!("// Instructions = {}", instructions.len());
297 w!("// Code size = {} bytes", self.blob.code().len());
298 w!();
299 }
300
301 let format_jump_target = |target_offset: ProgramCounter, basic_block_counter: u32| {
302 use core::fmt::Write;
303
304 let mut buf = String::new();
305 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
306 write!(&mut buf, "@{basic_block_counter}").unwrap()
307 } else {
308 buf.push_str("@_:");
309 }
310
311 if let Some(jump_table_index) = jump_table_map.get(&target_offset) {
312 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
313 write!(&mut buf, " [@dyn {jump_table_index}]").unwrap()
314 } else {
315 buf.push_str(" [_]");
316 }
317 }
318
319 if self.emit_exports {
320 if let Some(exports) = exports_for_code_offset.get(&target_offset) {
321 for (nth_export, export) in exports {
322 write!(&mut buf, " [export #{}: {}]", nth_export, export.symbol()).unwrap()
323 }
324 }
325 }
326
327 if let Some(gas_cost) = self.gas_cost_map.as_ref().and_then(|map| map.get(&target_offset)) {
328 write!(&mut buf, " (gas: {})", gas_cost).unwrap();
329 }
330
331 buf
332 };
333
334 let prefer_offset_jump_targets = self.prefer_offset_jump_targets;
335 let mut disassembly_format = polkavm_common::program::InstructionFormat::default();
336 disassembly_format.prefer_non_abi_reg_names = self.prefer_non_abi_reg_names;
337 disassembly_format.prefer_unaliased = self.prefer_unaliased;
338 disassembly_format.is_64_bit = self.blob.is_64_bit();
339
340 let jump_target_formatter = |target: u32, fmt: &mut core::fmt::Formatter| {
341 if prefer_offset_jump_targets {
342 write!(fmt, "{}", target)
343 } else if let Some(basic_block_index) = instruction_offset_to_basic_block.get(&polkavm::ProgramCounter(target)) {
344 write!(fmt, "@{basic_block_index}")
345 } else {
346 write!(fmt, "{}", target)
347 }
348 };
349 disassembly_format.jump_target_formatter = Some(&jump_target_formatter);
350
351 let mut fmt = AssemblyFormatter::default();
352 let mut last_line_program_entry = None;
353 let mut last_full_name = String::new();
354 let mut basic_block_counter = 0;
355 let mut pending_label = true;
356 for (nth_instruction, instruction) in instructions.iter().copied().enumerate() {
357 let offset = instruction.offset;
358 let length = core::cmp::min(instruction.next_offset.0, self.blob.code().len() as u32) - offset.0;
359 let instruction = instruction.kind;
360 let raw_bytes = &self.blob.code()[offset.0 as usize..offset.0 as usize + length as usize];
361
362 let instruction_s = instruction.display(&disassembly_format);
363 let instruction_s = if let polkavm_common::program::Instruction::ecalli(nth_import) = instruction {
364 if let Some(import) = self.blob.imports().get(nth_import) {
365 format!("{instruction_s} // {}", import)
366 } else {
367 format!("{instruction_s} // INVALID")
368 }
369 } else {
370 instruction_s.to_string()
371 };
372
373 let line_program = self.blob.get_debug_line_program_at(offset)?;
374
375 if let Some(mut line_program) = line_program {
376 if last_line_program_entry != Some(line_program.entry_index()) {
377 if nth_instruction != 0 {
378 if let Err(error) = writeln!(&mut writer) {
379 return Err(format!("failed to write to output: {error}").into());
380 }
381 }
382
383 last_line_program_entry = Some(line_program.entry_index());
384 loop {
385 let region = match line_program.run() {
386 Ok(Some(region)) => region,
387 Ok(None) => break,
388 Err(error) => {
389 return Err(format!("failed to parse line program: {error}").into());
390 }
391 };
392
393 if region.instruction_range().contains(&offset) {
394 let frame = region.frames().next().unwrap();
395 let full_name = match frame.full_name() {
396 Ok(full_name) => full_name,
397 Err(error) => {
398 return Err(format!("failed to parse line program: {error}").into());
399 }
400 }
401 .to_string();
402
403 if last_full_name != full_name {
404 w!("<{}>:", full_name);
405 last_full_name = full_name;
406 }
407
408 break;
409 }
410 }
411 }
412 } else {
413 if !last_full_name.is_empty() {
414 if let Err(error) = writeln!(&mut writer) {
415 return Err(format!("failed to write to output: {error}").into());
416 }
417 }
418
419 last_line_program_entry = None;
420 last_full_name.clear();
421 }
422
423 if pending_label {
424 pending_label = false;
425 if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
426 if self.show_offsets {
427 w!(@no_newline " : ");
428 }
429
430 if self.show_raw_bytes {
431 w!("{:24} {}", "", format_jump_target(offset, basic_block_counter))
432 } else {
433 w!("{}", format_jump_target(offset, basic_block_counter))
434 }
435 } else {
436 w!(" {}", format_jump_target(offset, basic_block_counter))
437 }
438 }
439
440 if matches!(self.format, DisassemblyFormat::DiffFriendly) {
441 let mut string = instruction_s;
442 if let polkavm_common::program::Instruction::load_imm(dst, _) = instruction {
443 string = format!("{} = _", dst);
444 }
445
446 if let Some(index) = string.find('@') {
447 let length = string[index + 1..]
448 .chars()
449 .take_while(|character| character.is_ascii_digit() || matches!(character, 'a' | 'b' | 'c' | 'd' | 'e' | 'f'))
450 .count();
451 string.replace_range(index + 1..index + 1 + length, "_");
452 }
453
454 if let Some(index_1) = string.find("[0x") {
455 let index_2 = string[index_1..].find(']').unwrap() + index_1;
456 string.replace_range(index_1..=index_2, "[_]");
457 }
458
459 w!(" {}", string);
460 } else if matches!(self.format, DisassemblyFormat::Guest | DisassemblyFormat::GuestAndNative) {
461 if self.show_offsets {
462 w!(@no_newline "{offset:6}: ");
463 }
464 if self.show_raw_bytes {
465 let raw_bytes = raw_bytes.iter().map(|byte| format!("{byte:02x}")).collect::<Vec<_>>().join(" ");
466 w!("{raw_bytes:24} {instruction_s}")
467 } else {
468 w!("{instruction_s}")
469 }
470 }
471
472 if matches!(self.format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
473 let native = self.native.as_ref().unwrap();
474 assert_eq!(offset.0, native.instruction_map[nth_instruction].0 .0);
475
476 let machine_code_position = native.instruction_map[nth_instruction].1 as usize;
477 let machine_next_code_position = native.instruction_map[nth_instruction + 1].1 as usize;
478 let length = machine_next_code_position - machine_code_position;
479 if length != 0 {
480 let machine_code_chunk = &native.machine_code[machine_code_position..machine_next_code_position];
481 if let Err(error) = fmt.emit(
482 matches!(self.format, DisassemblyFormat::GuestAndNative),
483 native.machine_code_origin,
484 machine_code_chunk,
485 machine_code_position,
486 self.show_native_raw_bytes,
487 self.show_native_offsets,
488 &mut writer,
489 ) {
490 return Err(format!("failed to write to output: {error}").into());
491 }
492 }
493 }
494
495 if instruction.opcode().starts_new_basic_block() {
496 if nth_instruction + 1 != instructions.len() {
497 pending_label = true;
498 }
499 basic_block_counter += 1;
500 }
501 }
502
503 if let Err(error) = writer.flush() {
504 return Err(format!("failed to write to output: {error}").into());
505 }
506
507 Ok(())
508 }
509}
510
511#[cfg(test)]
512mod tests {
513 use polkavm::Reg::*;
514 use polkavm_common::abi::MemoryMapBuilder;
515 use polkavm_common::program::{asm, InstructionSetKind};
516 use polkavm_common::writer::ProgramBlobBuilder;
517
518 use super::*;
519
520 fn test_all_formats(blob: &ProgramBlob) {
521 for format in [
522 DisassemblyFormat::Guest,
523 DisassemblyFormat::DiffFriendly,
524 #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
525 DisassemblyFormat::GuestAndNative,
526 #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
527 DisassemblyFormat::Native,
528 ] {
529 assert!(!disassemble_with_gas(blob, format).is_empty());
530 }
531 }
532
533 fn disassemble_with_gas(blob: &ProgramBlob, format: DisassemblyFormat) -> Vec<u8> {
534 let mut disassembler = Disassembler::new(blob, format).unwrap();
535 disassembler.display_gas().unwrap();
536
537 let mut buffer = Vec::with_capacity(1 << 20);
538 disassembler.disassemble_into(&mut buffer).unwrap();
539 buffer
540 }
541
542 #[test]
543 fn simple() {
544 let memory_map = MemoryMapBuilder::new(0x4000).rw_data_size(0x4000).build().unwrap();
545 let mut builder = ProgramBlobBuilder::new(InstructionSetKind::Latest32);
546 builder.set_rw_data_size(0x4000);
547 builder.add_export_by_basic_block(0, b"main");
548 builder.add_import(b"hostcall");
549 builder.set_code(
550 &[
551 asm::store_imm_u32(memory_map.rw_data_address(), 0x12345678),
552 asm::add_32(S0, A0, A1),
553 asm::ecalli(0),
554 asm::add_32(A0, A0, S0),
555 asm::ret(),
556 ],
557 &[],
558 );
559 let blob = ProgramBlob::parse(builder.into_vec().unwrap().into()).unwrap();
560
561 test_all_formats(&blob);
562
563 let assembly_bytes = disassemble_with_gas(&blob, DisassemblyFormat::Guest);
564 let assembly_text = String::from_utf8(assembly_bytes).unwrap();
565 let expected = &[
566 "// RO data = 0/0 bytes",
567 "// RW data = 0/16384 bytes",
568 "// Stack size = 0 bytes",
569 "",
570 "// Instructions = 5",
571 "// Code size = 18 bytes",
572 "",
573 " : @0 [export #0: 'main'] (gas: 5)",
574 " 0: u32 [0x20000] = 0x12345678",
575 " 9: s0 = a0 + a1",
576 " 12: ecalli 0 // 'hostcall'",
577 " 13: a0 = a0 + s0",
578 " 16: ret",
579 "",
580 ]
581 .join("\n");
582
583 assert_eq!(&assembly_text, expected);
584 }
585}