polkavm_disassembler/
lib.rs

1use std::{collections::HashMap, io::Write};
2
3use polkavm_common::program::{ParsedInstruction, ProgramBlob, ProgramCounter, ISA32_V1, ISA64_V1};
4
5#[derive(Copy, Clone, Debug, clap::ValueEnum)]
6pub enum DisassemblyFormat {
7    Guest,
8    GuestAndNative,
9    Native,
10    DiffFriendly,
11}
12
13struct NativeCode {
14    machine_code_origin: u64,
15    machine_code: Vec<u8>,
16    instruction_map: Vec<(ProgramCounter, u32)>,
17}
18
19impl TryFrom<&'_ ProgramBlob> for NativeCode {
20    type Error = polkavm::Error;
21
22    fn try_from(blob: &'_ ProgramBlob) -> Result<Self, Self::Error> {
23        if !cfg!(target_arch = "x86_64") {
24            return Err("the selected disassembly format is not supported on this architecture".into());
25        }
26
27        let mut config = polkavm::Config::from_env()?;
28        config.set_worker_count(0);
29
30        let engine = polkavm::Engine::new(&config)?;
31        let module = polkavm::Module::from_blob(&engine, &Default::default(), blob.clone())?;
32
33        let Some(machine_code) = module.machine_code() else {
34            return Err("currently selected VM backend doesn't provide raw machine code".into());
35        };
36
37        let Some(instruction_map) = module.program_counter_to_machine_code_offset() else {
38            return Err("currently selected VM backend doesn't provide a machine code map".into());
39        };
40
41        Ok(Self {
42            machine_code_origin: module.machine_code_origin().unwrap_or(0),
43            machine_code: machine_code.into(),
44            instruction_map: instruction_map.to_vec(),
45        })
46    }
47}
48
49#[derive(Default)]
50struct AssemblyFormatter {
51    buffer: String,
52}
53
54impl AssemblyFormatter {
55    fn emit(
56        &mut self,
57        indent: bool,
58        code_origin: u64,
59        mut code: &[u8],
60        mut position: usize,
61        show_raw_bytes: bool,
62        show_offsets: bool,
63        writer: &mut impl Write,
64    ) -> Result<(), std::io::Error> {
65        use iced_x86::Formatter;
66
67        let mut formatter = iced_x86::NasmFormatter::new();
68        formatter.options_mut().set_space_after_operand_separator(true);
69        formatter.options_mut().set_hex_prefix("0x");
70        formatter.options_mut().set_hex_suffix("");
71        formatter.options_mut().set_uppercase_hex(false);
72        formatter.options_mut().set_small_hex_numbers_in_decimal(false);
73        formatter.options_mut().set_show_useless_prefixes(true);
74        formatter.options_mut().set_branch_leading_zeros(false);
75        formatter.options_mut().set_rip_relative_addresses(true);
76
77        loop {
78            let mut decoder = iced_x86::Decoder::with_ip(64, code, code_origin, iced_x86::DecoderOptions::NONE);
79            if !decoder.can_decode() {
80                break;
81            }
82            let mut instruction = iced_x86::Instruction::default();
83            decoder.decode_out(&mut instruction);
84
85            if indent {
86                write!(writer, "                                       ")?;
87            }
88
89            if show_offsets {
90                write!(writer, "{:8x}: ", position as u64 + code_origin)?;
91            }
92
93            let start_index = (instruction.ip() - code_origin) as usize;
94            let instr_bytes = &code[start_index..start_index + instruction.len()];
95            if show_raw_bytes {
96                let mut count = 0;
97                for b in instr_bytes.iter() {
98                    write!(writer, "{:02x} ", b)?;
99                    count += 3;
100                }
101                while count < 34 {
102                    write!(writer, " ")?;
103                    count += 1;
104                }
105            }
106
107            self.buffer.clear();
108            formatter.format(&instruction, &mut self.buffer);
109            write!(writer, "{}", self.buffer.replace("byte [", "byte ptr ["))?;
110            writeln!(writer)?;
111
112            code = &code[instruction.len()..];
113            position += instruction.len();
114        }
115
116        Ok(())
117    }
118}
119
120pub struct Disassembler<'a> {
121    blob: &'a ProgramBlob,
122    format: DisassemblyFormat,
123    gas_cost_map: Option<HashMap<ProgramCounter, i64>>,
124    native: Option<NativeCode>,
125    show_raw_bytes: bool,
126    show_native_raw_bytes: bool,
127    prefer_non_abi_reg_names: bool,
128    prefer_unaliased: bool,
129    prefer_offset_jump_targets: bool,
130    emit_header: bool,
131    emit_exports: bool,
132    show_offsets: bool,
133    show_native_offsets: bool,
134}
135
136impl<'a> Disassembler<'a> {
137    pub fn new(blob: &'a ProgramBlob, format: DisassemblyFormat) -> Result<Self, polkavm::Error> {
138        let native = if matches!(format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
139            Some(NativeCode::try_from(blob)?)
140        } else {
141            None
142        };
143
144        Ok(Self {
145            blob,
146            format,
147            gas_cost_map: None,
148            native,
149            show_raw_bytes: false,
150            show_native_raw_bytes: true,
151            prefer_non_abi_reg_names: false,
152            prefer_unaliased: false,
153            prefer_offset_jump_targets: false,
154            emit_header: true,
155            emit_exports: true,
156            show_offsets: true,
157            show_native_offsets: true,
158        })
159    }
160
161    pub fn show_raw_bytes(&mut self, value: bool) {
162        self.show_raw_bytes = value;
163    }
164
165    pub fn show_native_raw_bytes(&mut self, value: bool) {
166        self.show_native_raw_bytes = value;
167    }
168
169    pub fn prefer_non_abi_reg_names(&mut self, value: bool) {
170        self.prefer_non_abi_reg_names = value;
171    }
172
173    pub fn prefer_unaliased(&mut self, value: bool) {
174        self.prefer_unaliased = value;
175    }
176
177    pub fn prefer_offset_jump_targets(&mut self, value: bool) {
178        self.prefer_offset_jump_targets = value;
179    }
180
181    pub fn emit_header(&mut self, value: bool) {
182        self.emit_header = value;
183    }
184
185    pub fn emit_exports(&mut self, value: bool) {
186        self.emit_exports = value;
187    }
188
189    pub fn show_offsets(&mut self, value: bool) {
190        self.show_offsets = value;
191    }
192
193    pub fn show_native_offsets(&mut self, value: bool) {
194        self.show_native_offsets = value;
195    }
196
197    fn instructions(&self) -> Vec<ParsedInstruction> {
198        if self.blob.is_64_bit() {
199            self.blob.instructions(ISA64_V1).collect()
200        } else {
201            self.blob.instructions(ISA32_V1).collect()
202        }
203    }
204
205    pub fn display_gas(&mut self) -> Result<(), polkavm::Error> {
206        let mut config = polkavm::Config::from_env()?;
207        config.set_worker_count(0);
208        config.set_backend(Some(polkavm::BackendKind::Interpreter));
209
210        let engine = polkavm::Engine::new(&config)?;
211
212        let mut config = polkavm::ModuleConfig::default();
213        config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
214
215        let module = polkavm::Module::from_blob(&engine, &config, self.blob.clone())?;
216
217        let mut in_new_block = true;
218        let mut gas_cost_map = HashMap::new();
219        for instruction in self.instructions() {
220            if in_new_block {
221                in_new_block = false;
222                if let Some(cost) = module.calculate_gas_cost_for(instruction.offset) {
223                    gas_cost_map.insert(instruction.offset, cost);
224                }
225            }
226
227            if instruction.starts_new_basic_block() {
228                in_new_block = true;
229            }
230        }
231        self.gas_cost_map = Some(gas_cost_map);
232
233        Ok(())
234    }
235
236    pub fn disassemble_into(&self, mut writer: impl Write) -> Result<(), polkavm::Error> {
237        let mut instructions = Vec::new();
238        let mut instruction_offset_to_basic_block = HashMap::new();
239        {
240            let mut basic_block_counter = 0;
241            let mut basic_block_started = true;
242            for instruction in self.instructions() {
243                if basic_block_started {
244                    instruction_offset_to_basic_block.insert(instruction.offset, basic_block_counter);
245                    basic_block_started = false;
246                }
247
248                if instruction.starts_new_basic_block() {
249                    basic_block_started = true;
250                    basic_block_counter += 1;
251                }
252                instructions.push(instruction);
253            }
254        }
255
256        let mut exports_for_code_offset = HashMap::new();
257        for (nth_export, export) in self.blob.exports().enumerate() {
258            exports_for_code_offset
259                .entry(export.program_counter())
260                .or_insert_with(Vec::new)
261                .push((nth_export, export));
262        }
263
264        let mut jump_table_map = HashMap::new();
265        let mut jump_table = Vec::new();
266        for target_code_offset in self.blob.jump_table() {
267            let jump_table_index = jump_table.len() + 1;
268            jump_table.push(target_code_offset);
269            assert!(jump_table_map.insert(target_code_offset, jump_table_index).is_none());
270        }
271
272        macro_rules! w {
273            (@no_newline $($arg:tt)*) => {{
274                if let Err(error) = write!(&mut writer, $($arg)*) {
275                    return Err(format!("failed to write to output: {error}").into());
276                }
277            }};
278
279            ($($arg:tt)*) => {{
280                if let Err(error) = writeln!(&mut writer, $($arg)*) {
281                    return Err(format!("failed to write to output: {error}").into());
282                }
283            }};
284        }
285
286        if self.emit_header {
287            w!("// RO data = {}/{} bytes", self.blob.ro_data().len(), self.blob.ro_data_size());
288            w!("// RW data = {}/{} bytes", self.blob.rw_data().len(), self.blob.rw_data_size());
289            w!("// Stack size = {} bytes", self.blob.stack_size());
290            w!();
291            w!("// Instructions = {}", instructions.len());
292            w!("// Code size = {} bytes", self.blob.code().len());
293            w!();
294        }
295
296        let format_jump_target = |target_offset: ProgramCounter, basic_block_counter: u32| {
297            use core::fmt::Write;
298
299            let mut buf = String::new();
300            if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
301                write!(&mut buf, "@{basic_block_counter}").unwrap()
302            } else {
303                buf.push_str("@_:");
304            }
305
306            if let Some(jump_table_index) = jump_table_map.get(&target_offset) {
307                if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
308                    write!(&mut buf, " [@dyn {jump_table_index}]").unwrap()
309                } else {
310                    buf.push_str(" [_]");
311                }
312            }
313
314            if self.emit_exports {
315                if let Some(exports) = exports_for_code_offset.get(&target_offset) {
316                    for (nth_export, export) in exports {
317                        write!(&mut buf, " [export #{}: {}]", nth_export, export.symbol()).unwrap()
318                    }
319                }
320            }
321
322            if let Some(gas_cost) = self.gas_cost_map.as_ref().and_then(|map| map.get(&target_offset)) {
323                write!(&mut buf, " (gas: {})", gas_cost).unwrap();
324            }
325
326            buf
327        };
328
329        let prefer_offset_jump_targets = self.prefer_offset_jump_targets;
330        let mut disassembly_format = polkavm_common::program::InstructionFormat::default();
331        disassembly_format.prefer_non_abi_reg_names = self.prefer_non_abi_reg_names;
332        disassembly_format.prefer_unaliased = self.prefer_unaliased;
333        disassembly_format.is_64_bit = self.blob.is_64_bit();
334
335        let jump_target_formatter = |target: u32, fmt: &mut core::fmt::Formatter| {
336            if prefer_offset_jump_targets {
337                write!(fmt, "{}", target)
338            } else if let Some(basic_block_index) = instruction_offset_to_basic_block.get(&polkavm::ProgramCounter(target)) {
339                write!(fmt, "@{basic_block_index}")
340            } else {
341                write!(fmt, "{}", target)
342            }
343        };
344        disassembly_format.jump_target_formatter = Some(&jump_target_formatter);
345
346        let mut fmt = AssemblyFormatter::default();
347        let mut last_line_program_entry = None;
348        let mut last_full_name = String::new();
349        let mut basic_block_counter = 0;
350        let mut pending_label = true;
351        for (nth_instruction, instruction) in instructions.iter().copied().enumerate() {
352            let offset = instruction.offset;
353            let length = core::cmp::min(instruction.next_offset.0, self.blob.code().len() as u32) - offset.0;
354            let instruction = instruction.kind;
355            let raw_bytes = &self.blob.code()[offset.0 as usize..offset.0 as usize + length as usize];
356
357            let instruction_s = instruction.display(&disassembly_format);
358            let instruction_s = if let polkavm_common::program::Instruction::ecalli(nth_import) = instruction {
359                if let Some(import) = self.blob.imports().get(nth_import) {
360                    format!("{instruction_s} // {}", import)
361                } else {
362                    format!("{instruction_s} // INVALID")
363                }
364            } else {
365                instruction_s.to_string()
366            };
367
368            let line_program = self.blob.get_debug_line_program_at(offset)?;
369
370            if let Some(mut line_program) = line_program {
371                if last_line_program_entry != Some(line_program.entry_index()) {
372                    if nth_instruction != 0 {
373                        if let Err(error) = writeln!(&mut writer) {
374                            return Err(format!("failed to write to output: {error}").into());
375                        }
376                    }
377
378                    last_line_program_entry = Some(line_program.entry_index());
379                    loop {
380                        let region = match line_program.run() {
381                            Ok(Some(region)) => region,
382                            Ok(None) => break,
383                            Err(error) => {
384                                return Err(format!("failed to parse line program: {error}").into());
385                            }
386                        };
387
388                        if region.instruction_range().contains(&offset) {
389                            let frame = region.frames().next().unwrap();
390                            let full_name = match frame.full_name() {
391                                Ok(full_name) => full_name,
392                                Err(error) => {
393                                    return Err(format!("failed to parse line program: {error}").into());
394                                }
395                            }
396                            .to_string();
397
398                            if last_full_name != full_name {
399                                w!("<{}>:", full_name);
400                                last_full_name = full_name;
401                            }
402
403                            break;
404                        }
405                    }
406                }
407            } else {
408                if !last_full_name.is_empty() {
409                    if let Err(error) = writeln!(&mut writer) {
410                        return Err(format!("failed to write to output: {error}").into());
411                    }
412                }
413
414                last_line_program_entry = None;
415                last_full_name.clear();
416            }
417
418            if pending_label {
419                pending_label = false;
420                if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
421                    if self.show_offsets {
422                        w!(@no_newline "      : ");
423                    }
424
425                    if self.show_raw_bytes {
426                        w!("{:24} {}", "", format_jump_target(offset, basic_block_counter))
427                    } else {
428                        w!("{}", format_jump_target(offset, basic_block_counter))
429                    }
430                } else {
431                    w!("    {}", format_jump_target(offset, basic_block_counter))
432                }
433            }
434
435            if matches!(self.format, DisassemblyFormat::DiffFriendly) {
436                let mut string = instruction_s;
437                if let polkavm_common::program::Instruction::load_imm(dst, _) = instruction {
438                    string = format!("{} = _", dst);
439                }
440
441                if let Some(index) = string.find('@') {
442                    let length = string[index + 1..]
443                        .chars()
444                        .take_while(|character| character.is_ascii_digit() || matches!(character, 'a' | 'b' | 'c' | 'd' | 'e' | 'f'))
445                        .count();
446                    string.replace_range(index + 1..index + 1 + length, "_");
447                }
448
449                if let Some(index_1) = string.find("[0x") {
450                    let index_2 = string[index_1..].find(']').unwrap() + index_1;
451                    string.replace_range(index_1..=index_2, "[_]");
452                }
453
454                w!("    {}", string);
455            } else if matches!(self.format, DisassemblyFormat::Guest | DisassemblyFormat::GuestAndNative) {
456                if self.show_offsets {
457                    w!(@no_newline "{offset:6}: ");
458                }
459                if self.show_raw_bytes {
460                    let raw_bytes = raw_bytes.iter().map(|byte| format!("{byte:02x}")).collect::<Vec<_>>().join(" ");
461                    w!("{raw_bytes:24} {instruction_s}")
462                } else {
463                    w!("{instruction_s}")
464                }
465            }
466
467            if matches!(self.format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
468                let native = self.native.as_ref().unwrap();
469                assert_eq!(offset.0, native.instruction_map[nth_instruction].0 .0);
470
471                let machine_code_position = native.instruction_map[nth_instruction].1 as usize;
472                let machine_next_code_position = native.instruction_map[nth_instruction + 1].1 as usize;
473                let length = machine_next_code_position - machine_code_position;
474                if length != 0 {
475                    let machine_code_chunk = &native.machine_code[machine_code_position..machine_next_code_position];
476                    if let Err(error) = fmt.emit(
477                        matches!(self.format, DisassemblyFormat::GuestAndNative),
478                        native.machine_code_origin,
479                        machine_code_chunk,
480                        machine_code_position,
481                        self.show_native_raw_bytes,
482                        self.show_native_offsets,
483                        &mut writer,
484                    ) {
485                        return Err(format!("failed to write to output: {error}").into());
486                    }
487                }
488            }
489
490            if instruction.opcode().starts_new_basic_block() {
491                if nth_instruction + 1 != instructions.len() {
492                    pending_label = true;
493                }
494                basic_block_counter += 1;
495            }
496        }
497
498        if let Err(error) = writer.flush() {
499            return Err(format!("failed to write to output: {error}").into());
500        }
501
502        Ok(())
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use polkavm::Reg::*;
509    use polkavm_common::abi::MemoryMapBuilder;
510    use polkavm_common::program::asm;
511    use polkavm_common::writer::ProgramBlobBuilder;
512
513    use super::*;
514
515    fn test_all_formats(blob: &ProgramBlob) {
516        for format in [
517            DisassemblyFormat::Guest,
518            DisassemblyFormat::DiffFriendly,
519            #[cfg(target_arg = "x86_84")]
520            DisassemblyFormat::GuestAndNative,
521            #[cfg(target_arg = "x86_84")]
522            DisassemblyFormat::Native,
523        ] {
524            assert!(!disassemble_with_gas(blob, format).is_empty());
525        }
526    }
527
528    fn disassemble_with_gas(blob: &ProgramBlob, format: DisassemblyFormat) -> Vec<u8> {
529        let mut disassembler = Disassembler::new(blob, format).unwrap();
530        disassembler.display_gas().unwrap();
531
532        let mut buffer = Vec::with_capacity(1 << 20);
533        disassembler.disassemble_into(&mut buffer).unwrap();
534        buffer
535    }
536
537    #[test]
538    fn simple() {
539        let memory_map = MemoryMapBuilder::new(0x4000).rw_data_size(0x4000).build().unwrap();
540        let mut builder = ProgramBlobBuilder::new();
541        builder.set_rw_data_size(0x4000);
542        builder.add_export_by_basic_block(0, b"main");
543        builder.add_import(b"hostcall");
544        builder.set_code(
545            &[
546                asm::store_imm_u32(memory_map.rw_data_address(), 0x12345678),
547                asm::add_32(S0, A0, A1),
548                asm::ecalli(0),
549                asm::add_32(A0, A0, S0),
550                asm::ret(),
551            ],
552            &[],
553        );
554        let blob = ProgramBlob::parse(builder.into_vec().unwrap().into()).unwrap();
555
556        test_all_formats(&blob);
557
558        let assembly_bytes = disassemble_with_gas(&blob, DisassemblyFormat::Guest);
559        let assembly_text = String::from_utf8(assembly_bytes).unwrap();
560        let expected = &[
561            "// RO data = 0/0 bytes",
562            "// RW data = 0/16384 bytes",
563            "// Stack size = 0 bytes",
564            "",
565            "// Instructions = 5",
566            "// Code size = 18 bytes",
567            "",
568            "      : @0 [export #0: 'main'] (gas: 5)",
569            "     0: u32 [0x20000] = 0x12345678",
570            "     9: s0 = a0 + a1",
571            "    12: ecalli 0 // 'hostcall'",
572            "    13: a0 = a0 + s0",
573            "    16: ret",
574            "",
575        ]
576        .join("\n");
577
578        assert_eq!(&assembly_text, expected);
579    }
580}