polkavm_disassembler/
lib.rs

1use std::{collections::HashMap, io::Write};
2
3use polkavm::CostModelKind;
4use polkavm_common::program::{ParsedInstruction, ProgramBlob, ProgramCounter};
5
6#[derive(Copy, Clone, Debug, clap::ValueEnum)]
7pub enum DisassemblyFormat {
8    Guest,
9    GuestAndNative,
10    Native,
11    DiffFriendly,
12}
13
14struct NativeCode {
15    machine_code_origin: u64,
16    machine_code: Vec<u8>,
17    instruction_map: Vec<(ProgramCounter, u32)>,
18}
19
20impl TryFrom<&'_ ProgramBlob> for NativeCode {
21    type Error = polkavm::Error;
22
23    fn try_from(blob: &'_ ProgramBlob) -> Result<Self, Self::Error> {
24        if !cfg!(target_arch = "x86_64") {
25            return Err("the selected disassembly format is not supported on this architecture".into());
26        }
27
28        let mut config = polkavm::Config::from_env()?;
29        config.set_worker_count(0);
30
31        let engine = polkavm::Engine::new(&config)?;
32        let module = polkavm::Module::from_blob(&engine, &Default::default(), blob.clone())?;
33
34        let Some(machine_code) = module.machine_code() else {
35            return Err("currently selected VM backend doesn't provide raw machine code".into());
36        };
37
38        let Some(instruction_map) = module.program_counter_to_machine_code_offset() else {
39            return Err("currently selected VM backend doesn't provide a machine code map".into());
40        };
41
42        Ok(Self {
43            machine_code_origin: module.machine_code_origin().unwrap_or(0),
44            machine_code: machine_code.into(),
45            instruction_map: instruction_map.to_vec(),
46        })
47    }
48}
49
50#[derive(Default)]
51struct AssemblyFormatter {
52    buffer: String,
53}
54
55impl AssemblyFormatter {
56    fn emit(
57        &mut self,
58        indent: bool,
59        code_origin: u64,
60        mut code: &[u8],
61        mut position: usize,
62        show_raw_bytes: bool,
63        show_offsets: bool,
64        writer: &mut impl Write,
65    ) -> Result<(), std::io::Error> {
66        use iced_x86::Formatter;
67
68        let mut formatter = iced_x86::NasmFormatter::new();
69        formatter.options_mut().set_space_after_operand_separator(true);
70        formatter.options_mut().set_hex_prefix("0x");
71        formatter.options_mut().set_hex_suffix("");
72        formatter.options_mut().set_uppercase_hex(false);
73        formatter.options_mut().set_small_hex_numbers_in_decimal(false);
74        formatter.options_mut().set_show_useless_prefixes(true);
75        formatter.options_mut().set_branch_leading_zeros(false);
76        formatter.options_mut().set_rip_relative_addresses(true);
77
78        loop {
79            let mut decoder = iced_x86::Decoder::with_ip(64, code, code_origin, iced_x86::DecoderOptions::NONE);
80            if !decoder.can_decode() {
81                break;
82            }
83            let mut instruction = iced_x86::Instruction::default();
84            decoder.decode_out(&mut instruction);
85
86            if indent {
87                write!(writer, "                                       ")?;
88            }
89
90            if show_offsets {
91                write!(writer, "{:8x}: ", position as u64 + code_origin)?;
92            }
93
94            let start_index = (instruction.ip() - code_origin) as usize;
95            let instr_bytes = &code[start_index..start_index + instruction.len()];
96            if show_raw_bytes {
97                let mut count = 0;
98                for b in instr_bytes.iter() {
99                    write!(writer, "{:02x} ", b)?;
100                    count += 3;
101                }
102                while count < 34 {
103                    write!(writer, " ")?;
104                    count += 1;
105                }
106            }
107
108            self.buffer.clear();
109            formatter.format(&instruction, &mut self.buffer);
110            write!(writer, "{}", self.buffer.replace("byte [", "byte ptr ["))?;
111            writeln!(writer)?;
112
113            code = &code[instruction.len()..];
114            position += instruction.len();
115        }
116
117        Ok(())
118    }
119}
120
121pub struct Disassembler<'a> {
122    blob: &'a ProgramBlob,
123    format: DisassemblyFormat,
124    gas_cost_map: Option<HashMap<ProgramCounter, i64>>,
125    native: Option<NativeCode>,
126    show_raw_bytes: bool,
127    show_native_raw_bytes: bool,
128    prefer_non_abi_reg_names: bool,
129    prefer_unaliased: bool,
130    prefer_offset_jump_targets: bool,
131    emit_header: bool,
132    emit_exports: bool,
133    show_offsets: bool,
134    show_native_offsets: bool,
135    cost_model: Option<CostModelKind>,
136}
137
138impl<'a> Disassembler<'a> {
139    pub fn new(blob: &'a ProgramBlob, format: DisassemblyFormat) -> Result<Self, polkavm::Error> {
140        let native = if matches!(format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
141            Some(NativeCode::try_from(blob)?)
142        } else {
143            None
144        };
145
146        Ok(Self {
147            blob,
148            format,
149            gas_cost_map: None,
150            native,
151            show_raw_bytes: false,
152            show_native_raw_bytes: true,
153            prefer_non_abi_reg_names: false,
154            prefer_unaliased: false,
155            prefer_offset_jump_targets: false,
156            emit_header: true,
157            emit_exports: true,
158            show_offsets: true,
159            show_native_offsets: true,
160            cost_model: None,
161        })
162    }
163
164    pub fn show_raw_bytes(&mut self, value: bool) {
165        self.show_raw_bytes = value;
166    }
167
168    pub fn show_native_raw_bytes(&mut self, value: bool) {
169        self.show_native_raw_bytes = value;
170    }
171
172    pub fn prefer_non_abi_reg_names(&mut self, value: bool) {
173        self.prefer_non_abi_reg_names = value;
174    }
175
176    pub fn prefer_unaliased(&mut self, value: bool) {
177        self.prefer_unaliased = value;
178    }
179
180    pub fn prefer_offset_jump_targets(&mut self, value: bool) {
181        self.prefer_offset_jump_targets = value;
182    }
183
184    pub fn emit_header(&mut self, value: bool) {
185        self.emit_header = value;
186    }
187
188    pub fn emit_exports(&mut self, value: bool) {
189        self.emit_exports = value;
190    }
191
192    pub fn show_offsets(&mut self, value: bool) {
193        self.show_offsets = value;
194    }
195
196    pub fn show_native_offsets(&mut self, value: bool) {
197        self.show_native_offsets = value;
198    }
199
200    pub fn cost_model(&mut self, value: Option<CostModelKind>) {
201        self.cost_model = value;
202    }
203
204    fn instructions(&self) -> Vec<ParsedInstruction> {
205        self.blob.instructions().collect()
206    }
207
208    pub fn display_gas(&mut self) -> Result<(), polkavm::Error> {
209        let mut config = polkavm::Config::from_env()?;
210        config.set_worker_count(0);
211        config.set_backend(Some(polkavm::BackendKind::Interpreter));
212        config.set_allow_experimental(true);
213        config.set_default_cost_model(self.cost_model.clone());
214
215        let engine = polkavm::Engine::new(&config)?;
216
217        let mut config = polkavm::ModuleConfig::default();
218        config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
219
220        let module = polkavm::Module::from_blob(&engine, &config, self.blob.clone())?;
221
222        let mut in_new_block = true;
223        let mut gas_cost_map = HashMap::new();
224        for instruction in self.instructions() {
225            if in_new_block {
226                in_new_block = false;
227                if let Some(cost) = module.calculate_gas_cost_for(instruction.offset) {
228                    gas_cost_map.insert(instruction.offset, cost);
229                }
230            }
231
232            if instruction.starts_new_basic_block() {
233                in_new_block = true;
234            }
235        }
236        self.gas_cost_map = Some(gas_cost_map);
237
238        Ok(())
239    }
240
241    pub fn disassemble_into(&self, mut writer: impl Write) -> Result<(), polkavm::Error> {
242        let mut instructions = Vec::new();
243        let mut instruction_offset_to_basic_block = HashMap::new();
244        {
245            let mut basic_block_counter = 0;
246            let mut basic_block_started = true;
247            for instruction in self.instructions() {
248                if basic_block_started {
249                    instruction_offset_to_basic_block.insert(instruction.offset, basic_block_counter);
250                    basic_block_started = false;
251                }
252
253                if instruction.starts_new_basic_block() {
254                    basic_block_started = true;
255                    basic_block_counter += 1;
256                }
257                instructions.push(instruction);
258            }
259        }
260
261        let mut exports_for_code_offset = HashMap::new();
262        for (nth_export, export) in self.blob.exports().enumerate() {
263            exports_for_code_offset
264                .entry(export.program_counter())
265                .or_insert_with(Vec::new)
266                .push((nth_export, export));
267        }
268
269        let mut jump_table_map = HashMap::new();
270        let mut jump_table = Vec::new();
271        for target_code_offset in self.blob.jump_table() {
272            let jump_table_index = jump_table.len() + 1;
273            jump_table.push(target_code_offset);
274            assert!(jump_table_map.insert(target_code_offset, jump_table_index).is_none());
275        }
276
277        macro_rules! w {
278            (@no_newline $($arg:tt)*) => {{
279                if let Err(error) = write!(&mut writer, $($arg)*) {
280                    return Err(format!("failed to write to output: {error}").into());
281                }
282            }};
283
284            ($($arg:tt)*) => {{
285                if let Err(error) = writeln!(&mut writer, $($arg)*) {
286                    return Err(format!("failed to write to output: {error}").into());
287                }
288            }};
289        }
290
291        if self.emit_header {
292            w!("// RO data = {}/{} bytes", self.blob.ro_data().len(), self.blob.ro_data_size());
293            w!("// RW data = {}/{} bytes", self.blob.rw_data().len(), self.blob.rw_data_size());
294            w!("// Stack size = {} bytes", self.blob.stack_size());
295            w!();
296            w!("// Instructions = {}", instructions.len());
297            w!("// Code size = {} bytes", self.blob.code().len());
298            w!();
299        }
300
301        let format_jump_target = |target_offset: ProgramCounter, basic_block_counter: u32| {
302            use core::fmt::Write;
303
304            let mut buf = String::new();
305            if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
306                write!(&mut buf, "@{basic_block_counter}").unwrap()
307            } else {
308                buf.push_str("@_:");
309            }
310
311            if let Some(jump_table_index) = jump_table_map.get(&target_offset) {
312                if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
313                    write!(&mut buf, " [@dyn {jump_table_index}]").unwrap()
314                } else {
315                    buf.push_str(" [_]");
316                }
317            }
318
319            if self.emit_exports {
320                if let Some(exports) = exports_for_code_offset.get(&target_offset) {
321                    for (nth_export, export) in exports {
322                        write!(&mut buf, " [export #{}: {}]", nth_export, export.symbol()).unwrap()
323                    }
324                }
325            }
326
327            if let Some(gas_cost) = self.gas_cost_map.as_ref().and_then(|map| map.get(&target_offset)) {
328                write!(&mut buf, " (gas: {})", gas_cost).unwrap();
329            }
330
331            buf
332        };
333
334        let prefer_offset_jump_targets = self.prefer_offset_jump_targets;
335        let mut disassembly_format = polkavm_common::program::InstructionFormat::default();
336        disassembly_format.prefer_non_abi_reg_names = self.prefer_non_abi_reg_names;
337        disassembly_format.prefer_unaliased = self.prefer_unaliased;
338        disassembly_format.is_64_bit = self.blob.is_64_bit();
339
340        let jump_target_formatter = |target: u32, fmt: &mut core::fmt::Formatter| {
341            if prefer_offset_jump_targets {
342                write!(fmt, "{}", target)
343            } else if let Some(basic_block_index) = instruction_offset_to_basic_block.get(&polkavm::ProgramCounter(target)) {
344                write!(fmt, "@{basic_block_index}")
345            } else {
346                write!(fmt, "{}", target)
347            }
348        };
349        disassembly_format.jump_target_formatter = Some(&jump_target_formatter);
350
351        let mut fmt = AssemblyFormatter::default();
352        let mut last_line_program_entry = None;
353        let mut last_full_name = String::new();
354        let mut basic_block_counter = 0;
355        let mut pending_label = true;
356        for (nth_instruction, instruction) in instructions.iter().copied().enumerate() {
357            let offset = instruction.offset;
358            let length = core::cmp::min(instruction.next_offset.0, self.blob.code().len() as u32) - offset.0;
359            let instruction = instruction.kind;
360            let raw_bytes = &self.blob.code()[offset.0 as usize..offset.0 as usize + length as usize];
361
362            let instruction_s = instruction.display(&disassembly_format);
363            let instruction_s = if let polkavm_common::program::Instruction::ecalli(nth_import) = instruction {
364                if let Some(import) = self.blob.imports().get(nth_import) {
365                    format!("{instruction_s} // {}", import)
366                } else {
367                    format!("{instruction_s} // INVALID")
368                }
369            } else {
370                instruction_s.to_string()
371            };
372
373            let line_program = self.blob.get_debug_line_program_at(offset)?;
374
375            if let Some(mut line_program) = line_program {
376                if last_line_program_entry != Some(line_program.entry_index()) {
377                    if nth_instruction != 0 {
378                        if let Err(error) = writeln!(&mut writer) {
379                            return Err(format!("failed to write to output: {error}").into());
380                        }
381                    }
382
383                    last_line_program_entry = Some(line_program.entry_index());
384                    loop {
385                        let region = match line_program.run() {
386                            Ok(Some(region)) => region,
387                            Ok(None) => break,
388                            Err(error) => {
389                                return Err(format!("failed to parse line program: {error}").into());
390                            }
391                        };
392
393                        if region.instruction_range().contains(&offset) {
394                            let frame = region.frames().next().unwrap();
395                            let full_name = match frame.full_name() {
396                                Ok(full_name) => full_name,
397                                Err(error) => {
398                                    return Err(format!("failed to parse line program: {error}").into());
399                                }
400                            }
401                            .to_string();
402
403                            if last_full_name != full_name {
404                                w!("<{}>:", full_name);
405                                last_full_name = full_name;
406                            }
407
408                            break;
409                        }
410                    }
411                }
412            } else {
413                if !last_full_name.is_empty() {
414                    if let Err(error) = writeln!(&mut writer) {
415                        return Err(format!("failed to write to output: {error}").into());
416                    }
417                }
418
419                last_line_program_entry = None;
420                last_full_name.clear();
421            }
422
423            if pending_label {
424                pending_label = false;
425                if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
426                    if self.show_offsets {
427                        w!(@no_newline "      : ");
428                    }
429
430                    if self.show_raw_bytes {
431                        w!("{:24} {}", "", format_jump_target(offset, basic_block_counter))
432                    } else {
433                        w!("{}", format_jump_target(offset, basic_block_counter))
434                    }
435                } else {
436                    w!("    {}", format_jump_target(offset, basic_block_counter))
437                }
438            }
439
440            if matches!(self.format, DisassemblyFormat::DiffFriendly) {
441                let mut string = instruction_s;
442                if let polkavm_common::program::Instruction::load_imm(dst, _) = instruction {
443                    string = format!("{} = _", dst);
444                }
445
446                if let Some(index) = string.find('@') {
447                    let length = string[index + 1..]
448                        .chars()
449                        .take_while(|character| character.is_ascii_digit() || matches!(character, 'a' | 'b' | 'c' | 'd' | 'e' | 'f'))
450                        .count();
451                    string.replace_range(index + 1..index + 1 + length, "_");
452                }
453
454                if let Some(index_1) = string.find("[0x") {
455                    let index_2 = string[index_1..].find(']').unwrap() + index_1;
456                    string.replace_range(index_1..=index_2, "[_]");
457                }
458
459                w!("    {}", string);
460            } else if matches!(self.format, DisassemblyFormat::Guest | DisassemblyFormat::GuestAndNative) {
461                if self.show_offsets {
462                    w!(@no_newline "{offset:6}: ");
463                }
464                if self.show_raw_bytes {
465                    let raw_bytes = raw_bytes.iter().map(|byte| format!("{byte:02x}")).collect::<Vec<_>>().join(" ");
466                    w!("{raw_bytes:24} {instruction_s}")
467                } else {
468                    w!("{instruction_s}")
469                }
470            }
471
472            if matches!(self.format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
473                let native = self.native.as_ref().unwrap();
474                assert_eq!(offset.0, native.instruction_map[nth_instruction].0 .0);
475
476                let machine_code_position = native.instruction_map[nth_instruction].1 as usize;
477                let machine_next_code_position = native.instruction_map[nth_instruction + 1].1 as usize;
478                let length = machine_next_code_position - machine_code_position;
479                if length != 0 {
480                    let machine_code_chunk = &native.machine_code[machine_code_position..machine_next_code_position];
481                    if let Err(error) = fmt.emit(
482                        matches!(self.format, DisassemblyFormat::GuestAndNative),
483                        native.machine_code_origin,
484                        machine_code_chunk,
485                        machine_code_position,
486                        self.show_native_raw_bytes,
487                        self.show_native_offsets,
488                        &mut writer,
489                    ) {
490                        return Err(format!("failed to write to output: {error}").into());
491                    }
492                }
493            }
494
495            if instruction.opcode().starts_new_basic_block() {
496                if nth_instruction + 1 != instructions.len() {
497                    pending_label = true;
498                }
499                basic_block_counter += 1;
500            }
501        }
502
503        if let Err(error) = writer.flush() {
504            return Err(format!("failed to write to output: {error}").into());
505        }
506
507        Ok(())
508    }
509}
510
511#[cfg(test)]
512mod tests {
513    use polkavm::Reg::*;
514    use polkavm_common::abi::MemoryMapBuilder;
515    use polkavm_common::program::{asm, InstructionSetKind};
516    use polkavm_common::writer::ProgramBlobBuilder;
517
518    use super::*;
519
520    fn test_all_formats(blob: &ProgramBlob) {
521        for format in [
522            DisassemblyFormat::Guest,
523            DisassemblyFormat::DiffFriendly,
524            #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
525            DisassemblyFormat::GuestAndNative,
526            #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
527            DisassemblyFormat::Native,
528        ] {
529            assert!(!disassemble_with_gas(blob, format).is_empty());
530        }
531    }
532
533    fn disassemble_with_gas(blob: &ProgramBlob, format: DisassemblyFormat) -> Vec<u8> {
534        let mut disassembler = Disassembler::new(blob, format).unwrap();
535        disassembler.display_gas().unwrap();
536
537        let mut buffer = Vec::with_capacity(1 << 20);
538        disassembler.disassemble_into(&mut buffer).unwrap();
539        buffer
540    }
541
542    #[test]
543    fn simple() {
544        let memory_map = MemoryMapBuilder::new(0x4000).rw_data_size(0x4000).build().unwrap();
545        let mut builder = ProgramBlobBuilder::new(InstructionSetKind::Latest32);
546        builder.set_rw_data_size(0x4000);
547        builder.add_export_by_basic_block(0, b"main");
548        builder.add_import(b"hostcall");
549        builder.set_code(
550            &[
551                asm::store_imm_u32(memory_map.rw_data_address(), 0x12345678),
552                asm::add_32(S0, A0, A1),
553                asm::ecalli(0),
554                asm::add_32(A0, A0, S0),
555                asm::ret(),
556            ],
557            &[],
558        );
559        let blob = ProgramBlob::parse(builder.into_vec().unwrap().into()).unwrap();
560
561        test_all_formats(&blob);
562
563        let assembly_bytes = disassemble_with_gas(&blob, DisassemblyFormat::Guest);
564        let assembly_text = String::from_utf8(assembly_bytes).unwrap();
565        let expected = &[
566            "// RO data = 0/0 bytes",
567            "// RW data = 0/16384 bytes",
568            "// Stack size = 0 bytes",
569            "",
570            "// Instructions = 5",
571            "// Code size = 18 bytes",
572            "",
573            "      : @0 [export #0: 'main'] (gas: 5)",
574            "     0: u32 [0x20000] = 0x12345678",
575            "     9: s0 = a0 + a1",
576            "    12: ecalli 0 // 'hostcall'",
577            "    13: a0 = a0 + s0",
578            "    16: ret",
579            "",
580        ]
581        .join("\n");
582
583        assert_eq!(&assembly_text, expected);
584    }
585}