polkavm_disassembler/
lib.rs

1use std::{collections::HashMap, io::Write};
2
3use polkavm_common::program::{ParsedInstruction, ProgramBlob, ProgramCounter, ISA32_V1, ISA64_V1};
4
5#[derive(Copy, Clone, Debug, clap::ValueEnum)]
6pub enum DisassemblyFormat {
7    Guest,
8    GuestAndNative,
9    Native,
10    DiffFriendly,
11}
12
13struct NativeCode {
14    machine_code_origin: u64,
15    machine_code: Vec<u8>,
16    instruction_map: Vec<(ProgramCounter, u32)>,
17}
18
19impl TryFrom<&'_ ProgramBlob> for NativeCode {
20    type Error = polkavm::Error;
21
22    fn try_from(blob: &'_ ProgramBlob) -> Result<Self, Self::Error> {
23        if !cfg!(target_arch = "x86_64") {
24            return Err("the selected disassembly format is not supported on this architecture".into());
25        }
26
27        let mut config = polkavm::Config::from_env()?;
28        config.set_worker_count(0);
29
30        let engine = polkavm::Engine::new(&config)?;
31        let module = polkavm::Module::from_blob(&engine, &Default::default(), blob.clone())?;
32
33        let Some(machine_code) = module.machine_code() else {
34            return Err("currently selected VM backend doesn't provide raw machine code".into());
35        };
36
37        let Some(instruction_map) = module.program_counter_to_machine_code_offset() else {
38            return Err("currently selected VM backend doesn't provide a machine code map".into());
39        };
40
41        Ok(Self {
42            machine_code_origin: module.machine_code_origin().unwrap_or(0),
43            machine_code: machine_code.into(),
44            instruction_map: instruction_map.to_vec(),
45        })
46    }
47}
48
49#[derive(Default)]
50struct AssemblyFormatter {
51    buffer: String,
52}
53
54impl AssemblyFormatter {
55    fn emit(
56        &mut self,
57        indent: bool,
58        code_origin: u64,
59        mut code: &[u8],
60        mut position: usize,
61        writer: &mut impl Write,
62    ) -> Result<(), std::io::Error> {
63        use iced_x86::Formatter;
64
65        let mut formatter = iced_x86::NasmFormatter::new();
66        formatter.options_mut().set_space_after_operand_separator(true);
67        formatter.options_mut().set_hex_prefix("0x");
68        formatter.options_mut().set_hex_suffix("");
69        formatter.options_mut().set_uppercase_hex(false);
70        formatter.options_mut().set_small_hex_numbers_in_decimal(false);
71        formatter.options_mut().set_show_useless_prefixes(true);
72        formatter.options_mut().set_branch_leading_zeros(false);
73        formatter.options_mut().set_rip_relative_addresses(true);
74
75        loop {
76            let mut decoder = iced_x86::Decoder::with_ip(64, code, code_origin, iced_x86::DecoderOptions::NONE);
77            if !decoder.can_decode() {
78                break;
79            }
80            let mut instruction = iced_x86::Instruction::default();
81            decoder.decode_out(&mut instruction);
82
83            if indent {
84                write!(writer, "                                       ")?;
85            }
86            write!(writer, "{:8x}: ", position as u64 + code_origin)?;
87
88            let start_index = (instruction.ip() - code_origin) as usize;
89            let instr_bytes = &code[start_index..start_index + instruction.len()];
90            let mut count = 0;
91            for b in instr_bytes.iter() {
92                write!(writer, "{:02x} ", b)?;
93                count += 3;
94            }
95            while count < 34 {
96                write!(writer, " ")?;
97                count += 1;
98            }
99
100            self.buffer.clear();
101            formatter.format(&instruction, &mut self.buffer);
102            write!(writer, "{}", self.buffer)?;
103            writeln!(writer)?;
104
105            code = &code[instruction.len()..];
106            position += instruction.len();
107        }
108
109        Ok(())
110    }
111}
112
113pub struct Disassembler<'a> {
114    blob: &'a ProgramBlob,
115    format: DisassemblyFormat,
116    gas_cost_map: Option<HashMap<ProgramCounter, i64>>,
117    native: Option<NativeCode>,
118    show_raw_bytes: bool,
119    prefer_non_abi_reg_names: bool,
120    prefer_unaliased: bool,
121    prefer_offset_jump_targets: bool,
122    emit_header: bool,
123    emit_exports: bool,
124    show_offsets: bool,
125}
126
127impl<'a> Disassembler<'a> {
128    pub fn new(blob: &'a ProgramBlob, format: DisassemblyFormat) -> Result<Self, polkavm::Error> {
129        let native = if matches!(format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
130            Some(NativeCode::try_from(blob)?)
131        } else {
132            None
133        };
134
135        Ok(Self {
136            blob,
137            format,
138            gas_cost_map: None,
139            native,
140            show_raw_bytes: false,
141            prefer_non_abi_reg_names: false,
142            prefer_unaliased: false,
143            prefer_offset_jump_targets: false,
144            emit_header: true,
145            emit_exports: true,
146            show_offsets: true,
147        })
148    }
149
150    pub fn show_raw_bytes(&mut self, value: bool) {
151        self.show_raw_bytes = value;
152    }
153
154    pub fn prefer_non_abi_reg_names(&mut self, value: bool) {
155        self.prefer_non_abi_reg_names = value;
156    }
157
158    pub fn prefer_unaliased(&mut self, value: bool) {
159        self.prefer_unaliased = value;
160    }
161
162    pub fn prefer_offset_jump_targets(&mut self, value: bool) {
163        self.prefer_offset_jump_targets = value;
164    }
165
166    pub fn emit_header(&mut self, value: bool) {
167        self.emit_header = value;
168    }
169
170    pub fn emit_exports(&mut self, value: bool) {
171        self.emit_exports = value;
172    }
173
174    pub fn show_offsets(&mut self, value: bool) {
175        self.show_offsets = value;
176    }
177
178    fn instructions(&self) -> Vec<ParsedInstruction> {
179        if self.blob.is_64_bit() {
180            self.blob.instructions(ISA64_V1).collect()
181        } else {
182            self.blob.instructions(ISA32_V1).collect()
183        }
184    }
185
186    pub fn display_gas(&mut self) -> Result<(), polkavm::Error> {
187        let mut config = polkavm::Config::from_env()?;
188        config.set_worker_count(0);
189        config.set_backend(Some(polkavm::BackendKind::Interpreter));
190
191        let engine = polkavm::Engine::new(&config)?;
192
193        let mut config = polkavm::ModuleConfig::default();
194        config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
195
196        let module = polkavm::Module::from_blob(&engine, &config, self.blob.clone())?;
197
198        let mut in_new_block = true;
199        let mut gas_cost_map = HashMap::new();
200        for instruction in self.instructions() {
201            if in_new_block {
202                in_new_block = false;
203                if let Some(cost) = module.calculate_gas_cost_for(instruction.offset) {
204                    gas_cost_map.insert(instruction.offset, cost);
205                }
206            }
207
208            if instruction.starts_new_basic_block() {
209                in_new_block = true;
210            }
211        }
212        self.gas_cost_map = Some(gas_cost_map);
213
214        Ok(())
215    }
216
217    pub fn disassemble_into(&self, mut writer: impl Write) -> Result<(), polkavm::Error> {
218        let mut instructions = Vec::new();
219        let mut instruction_offset_to_basic_block = HashMap::new();
220        {
221            let mut basic_block_counter = 0;
222            let mut basic_block_started = true;
223            for instruction in self.instructions() {
224                if basic_block_started {
225                    instruction_offset_to_basic_block.insert(instruction.offset, basic_block_counter);
226                    basic_block_started = false;
227                }
228
229                if instruction.starts_new_basic_block() {
230                    basic_block_started = true;
231                    basic_block_counter += 1;
232                }
233                instructions.push(instruction);
234            }
235        }
236
237        let mut exports_for_code_offset = HashMap::new();
238        for (nth_export, export) in self.blob.exports().enumerate() {
239            exports_for_code_offset
240                .entry(export.program_counter())
241                .or_insert_with(Vec::new)
242                .push((nth_export, export));
243        }
244
245        let mut jump_table_map = HashMap::new();
246        let mut jump_table = Vec::new();
247        for target_code_offset in self.blob.jump_table() {
248            let jump_table_index = jump_table.len() + 1;
249            jump_table.push(target_code_offset);
250            assert!(jump_table_map.insert(target_code_offset, jump_table_index).is_none());
251        }
252
253        macro_rules! w {
254            (@no_newline $($arg:tt)*) => {{
255                if let Err(error) = write!(&mut writer, $($arg)*) {
256                    return Err(format!("failed to write to output: {error}").into());
257                }
258            }};
259
260            ($($arg:tt)*) => {{
261                if let Err(error) = writeln!(&mut writer, $($arg)*) {
262                    return Err(format!("failed to write to output: {error}").into());
263                }
264            }};
265        }
266
267        if self.emit_header {
268            w!("// RO data = {}/{} bytes", self.blob.ro_data().len(), self.blob.ro_data_size());
269            w!("// RW data = {}/{} bytes", self.blob.rw_data().len(), self.blob.rw_data_size());
270            w!("// Stack size = {} bytes", self.blob.stack_size());
271            w!();
272            w!("// Instructions = {}", instructions.len());
273            w!("// Code size = {} bytes", self.blob.code().len());
274            w!();
275        }
276
277        let format_jump_target = |target_offset: ProgramCounter, basic_block_counter: u32| {
278            use core::fmt::Write;
279
280            let mut buf = String::new();
281            if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
282                write!(&mut buf, "@{basic_block_counter}").unwrap()
283            } else {
284                buf.push_str("@_:");
285            }
286
287            if let Some(jump_table_index) = jump_table_map.get(&target_offset) {
288                if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
289                    write!(&mut buf, " [@dyn {jump_table_index}]").unwrap()
290                } else {
291                    buf.push_str(" [_]");
292                }
293            }
294
295            if self.emit_exports {
296                if let Some(exports) = exports_for_code_offset.get(&target_offset) {
297                    for (nth_export, export) in exports {
298                        write!(&mut buf, " [export #{}: {}]", nth_export, export.symbol()).unwrap()
299                    }
300                }
301            }
302
303            if let Some(gas_cost) = self.gas_cost_map.as_ref().and_then(|map| map.get(&target_offset)) {
304                write!(&mut buf, " (gas: {})", gas_cost).unwrap();
305            }
306
307            buf
308        };
309
310        let prefer_offset_jump_targets = self.prefer_offset_jump_targets;
311        let mut disassembly_format = polkavm_common::program::InstructionFormat::default();
312        disassembly_format.prefer_non_abi_reg_names = self.prefer_non_abi_reg_names;
313        disassembly_format.prefer_unaliased = self.prefer_unaliased;
314        disassembly_format.is_64_bit = self.blob.is_64_bit();
315
316        let jump_target_formatter = |target: u32, fmt: &mut core::fmt::Formatter| {
317            if prefer_offset_jump_targets {
318                write!(fmt, "{}", target)
319            } else if let Some(basic_block_index) = instruction_offset_to_basic_block.get(&polkavm::ProgramCounter(target)) {
320                write!(fmt, "@{basic_block_index}")
321            } else {
322                write!(fmt, "{}", target)
323            }
324        };
325        disassembly_format.jump_target_formatter = Some(&jump_target_formatter);
326
327        let mut fmt = AssemblyFormatter::default();
328        let mut last_line_program_entry = None;
329        let mut last_full_name = String::new();
330        let mut basic_block_counter = 0;
331        let mut pending_label = true;
332        for (nth_instruction, instruction) in instructions.iter().copied().enumerate() {
333            let offset = instruction.offset;
334            let length = core::cmp::min(instruction.next_offset.0, self.blob.code().len() as u32) - offset.0;
335            let instruction = instruction.kind;
336            let raw_bytes = &self.blob.code()[offset.0 as usize..offset.0 as usize + length as usize];
337
338            let instruction_s = instruction.display(&disassembly_format);
339            let instruction_s = if let polkavm_common::program::Instruction::ecalli(nth_import) = instruction {
340                if let Some(import) = self.blob.imports().get(nth_import) {
341                    format!("{instruction_s} // {}", import)
342                } else {
343                    format!("{instruction_s} // INVALID")
344                }
345            } else {
346                instruction_s.to_string()
347            };
348
349            let line_program = self.blob.get_debug_line_program_at(offset)?;
350
351            if let Some(mut line_program) = line_program {
352                if last_line_program_entry != Some(line_program.entry_index()) {
353                    if nth_instruction != 0 {
354                        if let Err(error) = writeln!(&mut writer) {
355                            return Err(format!("failed to write to output: {error}").into());
356                        }
357                    }
358
359                    last_line_program_entry = Some(line_program.entry_index());
360                    loop {
361                        let region = match line_program.run() {
362                            Ok(Some(region)) => region,
363                            Ok(None) => break,
364                            Err(error) => {
365                                return Err(format!("failed to parse line program: {error}").into());
366                            }
367                        };
368
369                        if region.instruction_range().contains(&offset) {
370                            let frame = region.frames().next().unwrap();
371                            let full_name = match frame.full_name() {
372                                Ok(full_name) => full_name,
373                                Err(error) => {
374                                    return Err(format!("failed to parse line program: {error}").into());
375                                }
376                            }
377                            .to_string();
378
379                            if last_full_name != full_name {
380                                w!("<{}>:", full_name);
381                                last_full_name = full_name;
382                            }
383
384                            break;
385                        }
386                    }
387                }
388            } else {
389                if !last_full_name.is_empty() {
390                    if let Err(error) = writeln!(&mut writer) {
391                        return Err(format!("failed to write to output: {error}").into());
392                    }
393                }
394
395                last_line_program_entry = None;
396                last_full_name.clear();
397            }
398
399            if pending_label {
400                pending_label = false;
401                if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
402                    if self.show_offsets {
403                        w!(@no_newline "      : ");
404                    }
405
406                    if self.show_raw_bytes {
407                        w!("{:24} {}", "", format_jump_target(offset, basic_block_counter))
408                    } else {
409                        w!("{}", format_jump_target(offset, basic_block_counter))
410                    }
411                } else {
412                    w!("    {}", format_jump_target(offset, basic_block_counter))
413                }
414            }
415
416            if matches!(self.format, DisassemblyFormat::DiffFriendly) {
417                let mut string = instruction_s;
418                if let polkavm_common::program::Instruction::load_imm(dst, _) = instruction {
419                    string = format!("{} = _", dst);
420                }
421
422                if let Some(index) = string.find('@') {
423                    let length = string[index + 1..]
424                        .chars()
425                        .take_while(|character| character.is_ascii_digit() || matches!(character, 'a' | 'b' | 'c' | 'd' | 'e' | 'f'))
426                        .count();
427                    string.replace_range(index + 1..index + 1 + length, "_");
428                }
429
430                if let Some(index_1) = string.find("[0x") {
431                    let index_2 = string[index_1..].find(']').unwrap() + index_1;
432                    string.replace_range(index_1..=index_2, "[_]");
433                }
434
435                w!("    {}", string);
436            } else if matches!(self.format, DisassemblyFormat::Guest | DisassemblyFormat::GuestAndNative) {
437                if self.show_offsets {
438                    w!(@no_newline "{offset:6}: ");
439                }
440                if self.show_raw_bytes {
441                    let raw_bytes = raw_bytes.iter().map(|byte| format!("{byte:02x}")).collect::<Vec<_>>().join(" ");
442                    w!("{raw_bytes:24} {instruction_s}")
443                } else {
444                    w!("{instruction_s}")
445                }
446            }
447
448            if matches!(self.format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
449                let native = self.native.as_ref().unwrap();
450                assert_eq!(offset.0, native.instruction_map[nth_instruction].0 .0);
451
452                let machine_code_position = native.instruction_map[nth_instruction].1 as usize;
453                let machine_next_code_position = native.instruction_map[nth_instruction + 1].1 as usize;
454                let length = machine_next_code_position - machine_code_position;
455                if length != 0 {
456                    let machine_code_chunk = &native.machine_code[machine_code_position..machine_next_code_position];
457                    if let Err(error) = fmt.emit(
458                        matches!(self.format, DisassemblyFormat::GuestAndNative),
459                        native.machine_code_origin,
460                        machine_code_chunk,
461                        machine_code_position,
462                        &mut writer,
463                    ) {
464                        return Err(format!("failed to write to output: {error}").into());
465                    }
466                }
467            }
468
469            if instruction.opcode().starts_new_basic_block() {
470                if nth_instruction + 1 != instructions.len() {
471                    pending_label = true;
472                }
473                basic_block_counter += 1;
474            }
475        }
476
477        if let Err(error) = writer.flush() {
478            return Err(format!("failed to write to output: {error}").into());
479        }
480
481        Ok(())
482    }
483}
484
485#[cfg(test)]
486mod tests {
487    use polkavm::Reg::*;
488    use polkavm_common::abi::MemoryMapBuilder;
489    use polkavm_common::program::asm;
490    use polkavm_common::writer::ProgramBlobBuilder;
491
492    use super::*;
493
494    fn test_all_formats(blob: &ProgramBlob) {
495        for format in [
496            DisassemblyFormat::Guest,
497            DisassemblyFormat::DiffFriendly,
498            #[cfg(target_arg = "x86_84")]
499            DisassemblyFormat::GuestAndNative,
500            #[cfg(target_arg = "x86_84")]
501            DisassemblyFormat::Native,
502        ] {
503            assert!(!disassemble_with_gas(blob, format).is_empty());
504        }
505    }
506
507    fn disassemble_with_gas(blob: &ProgramBlob, format: DisassemblyFormat) -> Vec<u8> {
508        let mut disassembler = Disassembler::new(blob, format).unwrap();
509        disassembler.display_gas().unwrap();
510
511        let mut buffer = Vec::with_capacity(1 << 20);
512        disassembler.disassemble_into(&mut buffer).unwrap();
513        buffer
514    }
515
516    #[test]
517    fn simple() {
518        let memory_map = MemoryMapBuilder::new(0x4000).rw_data_size(0x4000).build().unwrap();
519        let mut builder = ProgramBlobBuilder::new();
520        builder.set_rw_data_size(0x4000);
521        builder.add_export_by_basic_block(0, b"main");
522        builder.add_import(b"hostcall");
523        builder.set_code(
524            &[
525                asm::store_imm_u32(memory_map.rw_data_address(), 0x12345678),
526                asm::add_32(S0, A0, A1),
527                asm::ecalli(0),
528                asm::add_32(A0, A0, S0),
529                asm::ret(),
530            ],
531            &[],
532        );
533        let blob = ProgramBlob::parse(builder.into_vec().into()).unwrap();
534
535        test_all_formats(&blob);
536
537        let assembly_bytes = disassemble_with_gas(&blob, DisassemblyFormat::Guest);
538        let assembly_text = String::from_utf8(assembly_bytes).unwrap();
539        let expected = &[
540            "// RO data = 0/0 bytes",
541            "// RW data = 0/16384 bytes",
542            "// Stack size = 0 bytes",
543            "",
544            "// Instructions = 5",
545            "// Code size = 18 bytes",
546            "",
547            "      : @0 [export #0: 'main'] (gas: 5)",
548            "     0: u32 [0x20000] = 0x12345678",
549            "     9: s0 = a0 + a1",
550            "    12: ecalli 0 // 'hostcall'",
551            "    13: a0 = a0 + s0",
552            "    16: ret",
553            "",
554        ]
555        .join("\n");
556
557        assert_eq!(&assembly_text, expected);
558    }
559}