Skip to main content

polkavm_disassembler/
lib.rs

1use std::{collections::HashMap, io::Write};
2
3use polkavm::CostModelKind;
4use polkavm_common::cast::cast;
5use polkavm_common::program::{ParsedInstruction, ProgramBlob, ProgramCounter};
6
7#[derive(Copy, Clone, Debug, clap::ValueEnum)]
8pub enum DisassemblyFormat {
9    Guest,
10    GuestAndNative,
11    Native,
12    DiffFriendly,
13}
14
15struct NativeCode {
16    machine_code_origin: u64,
17    machine_code: Vec<u8>,
18    instruction_map: Vec<(ProgramCounter, u32)>,
19}
20
21impl TryFrom<&'_ ProgramBlob> for NativeCode {
22    type Error = polkavm::Error;
23
24    fn try_from(blob: &'_ ProgramBlob) -> Result<Self, Self::Error> {
25        if !cfg!(target_arch = "x86_64") {
26            return Err("the selected disassembly format is not supported on this architecture".into());
27        }
28
29        let mut config = polkavm::Config::from_env()?;
30        config.set_worker_count(0);
31
32        let engine = polkavm::Engine::new(&config)?;
33        let module = polkavm::Module::from_blob(&engine, &Default::default(), blob.clone())?;
34
35        let Some(machine_code) = module.machine_code() else {
36            return Err("currently selected VM backend doesn't provide raw machine code".into());
37        };
38
39        let Some(instruction_map) = module.program_counter_to_machine_code_offset() else {
40            return Err("currently selected VM backend doesn't provide a machine code map".into());
41        };
42
43        Ok(Self {
44            machine_code_origin: module.machine_code_origin().unwrap_or(0),
45            machine_code: machine_code.into(),
46            instruction_map: instruction_map.to_vec(),
47        })
48    }
49}
50
51#[derive(Default)]
52struct AssemblyFormatter {
53    buffer: String,
54}
55
56impl AssemblyFormatter {
57    fn emit(
58        &mut self,
59        indent: bool,
60        code_origin: u64,
61        mut code: &[u8],
62        mut position: usize,
63        show_raw_bytes: bool,
64        show_offsets: bool,
65        writer: &mut impl Write,
66    ) -> Result<(), std::io::Error> {
67        use iced_x86::Formatter;
68
69        let mut formatter = iced_x86::NasmFormatter::new();
70        formatter.options_mut().set_space_after_operand_separator(true);
71        formatter.options_mut().set_hex_prefix("0x");
72        formatter.options_mut().set_hex_suffix("");
73        formatter.options_mut().set_uppercase_hex(false);
74        formatter.options_mut().set_small_hex_numbers_in_decimal(false);
75        formatter.options_mut().set_show_useless_prefixes(true);
76        formatter.options_mut().set_branch_leading_zeros(false);
77        formatter.options_mut().set_rip_relative_addresses(true);
78
79        loop {
80            let mut decoder = iced_x86::Decoder::with_ip(64, code, code_origin, iced_x86::DecoderOptions::NONE);
81            if !decoder.can_decode() {
82                break;
83            }
84            let mut instruction = iced_x86::Instruction::default();
85            decoder.decode_out(&mut instruction);
86
87            if indent {
88                write!(writer, "                                       ")?;
89            }
90
91            if show_offsets {
92                write!(writer, "{:8x}: ", position as u64 + code_origin)?;
93            }
94
95            let start_index = (instruction.ip() - code_origin) as usize;
96            let instr_bytes = &code[start_index..start_index + instruction.len()];
97            if show_raw_bytes {
98                let mut count = 0;
99                for b in instr_bytes.iter() {
100                    write!(writer, "{:02x} ", b)?;
101                    count += 3;
102                }
103                while count < 34 {
104                    write!(writer, " ")?;
105                    count += 1;
106                }
107            }
108
109            self.buffer.clear();
110            formatter.format(&instruction, &mut self.buffer);
111            write!(writer, "{}", self.buffer.replace("byte [", "byte ptr ["))?;
112            writeln!(writer)?;
113
114            code = &code[instruction.len()..];
115            position += instruction.len();
116        }
117
118        Ok(())
119    }
120}
121
122pub struct Disassembler<'a> {
123    blob: &'a ProgramBlob,
124    format: DisassemblyFormat,
125    gas_cost_map: Option<HashMap<ProgramCounter, i64>>,
126    native: Option<NativeCode>,
127    show_raw_bytes: bool,
128    show_native_raw_bytes: bool,
129    prefer_non_abi_reg_names: bool,
130    prefer_unaliased: bool,
131    prefer_offset_jump_targets: bool,
132    emit_header: bool,
133    emit_exports: bool,
134    show_offsets: bool,
135    show_native_offsets: bool,
136    cost_model: Option<CostModelKind>,
137}
138
139impl<'a> Disassembler<'a> {
140    pub fn new(blob: &'a ProgramBlob, format: DisassemblyFormat) -> Result<Self, polkavm::Error> {
141        let native = if matches!(format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
142            Some(NativeCode::try_from(blob)?)
143        } else {
144            None
145        };
146
147        Ok(Self {
148            blob,
149            format,
150            gas_cost_map: None,
151            native,
152            show_raw_bytes: false,
153            show_native_raw_bytes: true,
154            prefer_non_abi_reg_names: false,
155            prefer_unaliased: false,
156            prefer_offset_jump_targets: false,
157            emit_header: true,
158            emit_exports: true,
159            show_offsets: true,
160            show_native_offsets: true,
161            cost_model: None,
162        })
163    }
164
165    pub fn show_raw_bytes(&mut self, value: bool) {
166        self.show_raw_bytes = value;
167    }
168
169    pub fn show_native_raw_bytes(&mut self, value: bool) {
170        self.show_native_raw_bytes = value;
171    }
172
173    pub fn prefer_non_abi_reg_names(&mut self, value: bool) {
174        self.prefer_non_abi_reg_names = value;
175    }
176
177    pub fn prefer_unaliased(&mut self, value: bool) {
178        self.prefer_unaliased = value;
179    }
180
181    pub fn prefer_offset_jump_targets(&mut self, value: bool) {
182        self.prefer_offset_jump_targets = value;
183    }
184
185    pub fn emit_header(&mut self, value: bool) {
186        self.emit_header = value;
187    }
188
189    pub fn emit_exports(&mut self, value: bool) {
190        self.emit_exports = value;
191    }
192
193    pub fn show_offsets(&mut self, value: bool) {
194        self.show_offsets = value;
195    }
196
197    pub fn show_native_offsets(&mut self, value: bool) {
198        self.show_native_offsets = value;
199    }
200
201    pub fn cost_model(&mut self, value: Option<CostModelKind>) {
202        self.cost_model = value;
203    }
204
205    fn instructions(&self) -> Vec<ParsedInstruction> {
206        self.blob.instructions().collect()
207    }
208
209    pub fn display_gas(&mut self) -> Result<(), polkavm::Error> {
210        let mut config = polkavm::Config::from_env()?;
211        config.set_worker_count(0);
212        config.set_backend(Some(polkavm::BackendKind::Interpreter));
213        config.set_allow_experimental(true);
214        config.set_default_cost_model(self.cost_model.clone());
215
216        let engine = polkavm::Engine::new(&config)?;
217
218        let mut config = polkavm::ModuleConfig::default();
219        config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
220
221        let module = polkavm::Module::from_blob(&engine, &config, self.blob.clone())?;
222
223        let mut in_new_block = true;
224        let mut gas_cost_map = HashMap::new();
225        for instruction in self.instructions() {
226            if in_new_block {
227                in_new_block = false;
228                if let Some(cost) = module.calculate_gas_cost_for(instruction.offset) {
229                    gas_cost_map.insert(instruction.offset, cost);
230                }
231            }
232
233            if instruction.starts_new_basic_block() {
234                in_new_block = true;
235            }
236        }
237        self.gas_cost_map = Some(gas_cost_map);
238
239        Ok(())
240    }
241
242    pub fn disassemble_into(&self, mut writer: impl Write) -> Result<(), polkavm::Error> {
243        let mut instructions = Vec::new();
244        let mut instruction_offset_to_basic_block = HashMap::new();
245        {
246            let mut basic_block_counter = 0;
247            let mut basic_block_started = true;
248            for instruction in self.instructions() {
249                if basic_block_started {
250                    instruction_offset_to_basic_block.insert(instruction.offset, basic_block_counter);
251                    basic_block_started = false;
252                }
253
254                if instruction.starts_new_basic_block() {
255                    basic_block_started = true;
256                    basic_block_counter += 1;
257                }
258                instructions.push(instruction);
259            }
260        }
261
262        let mut exports_for_code_offset = HashMap::new();
263        for (nth_export, export) in self.blob.exports().enumerate() {
264            exports_for_code_offset
265                .entry(export.program_counter())
266                .or_insert_with(Vec::new)
267                .push((nth_export, export));
268        }
269
270        let mut jump_table_map = HashMap::new();
271        let mut jump_table = Vec::new();
272        for target_code_offset in self.blob.jump_table() {
273            let jump_table_index = jump_table.len() + 1;
274            jump_table.push(target_code_offset);
275            assert!(jump_table_map.insert(target_code_offset, jump_table_index).is_none());
276        }
277
278        macro_rules! w {
279            (@no_newline $($arg:tt)*) => {{
280                if let Err(error) = write!(&mut writer, $($arg)*) {
281                    return Err(format!("failed to write to output: {error}").into());
282                }
283            }};
284
285            ($($arg:tt)*) => {{
286                if let Err(error) = writeln!(&mut writer, $($arg)*) {
287                    return Err(format!("failed to write to output: {error}").into());
288                }
289            }};
290        }
291
292        if self.emit_header {
293            w!("// RO data = {}/{} bytes", self.blob.ro_data().len(), self.blob.ro_data_size());
294            w!("// RW data = {}/{} bytes", self.blob.rw_data().len(), self.blob.rw_data_size());
295            w!("// Stack size = {} bytes", self.blob.stack_size());
296            w!();
297            w!("// Instructions = {}", instructions.len());
298            w!("// Code size = {} bytes", self.blob.code().len());
299            w!();
300        }
301
302        let format_jump_target = |target_offset: ProgramCounter, basic_block_counter: u32| {
303            use core::fmt::Write;
304
305            let mut buf = String::new();
306            if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
307                write!(&mut buf, "@{basic_block_counter}").unwrap()
308            } else {
309                buf.push_str("@_:");
310            }
311
312            if let Some(jump_table_index) = jump_table_map.get(&target_offset) {
313                if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
314                    write!(&mut buf, " [@dyn {jump_table_index}]").unwrap()
315                } else {
316                    buf.push_str(" [_]");
317                }
318            }
319
320            if self.emit_exports {
321                if let Some(exports) = exports_for_code_offset.get(&target_offset) {
322                    for (nth_export, export) in exports {
323                        write!(&mut buf, " [export #{}: {}]", nth_export, export.symbol()).unwrap()
324                    }
325                }
326            }
327
328            if let Some(gas_cost) = self.gas_cost_map.as_ref().and_then(|map| map.get(&target_offset)) {
329                write!(&mut buf, " (gas: {})", gas_cost).unwrap();
330            }
331
332            buf
333        };
334
335        let prefer_offset_jump_targets = self.prefer_offset_jump_targets;
336        let mut disassembly_format = polkavm_common::program::InstructionFormat::default();
337        disassembly_format.prefer_non_abi_reg_names = self.prefer_non_abi_reg_names;
338        disassembly_format.prefer_unaliased = self.prefer_unaliased;
339        disassembly_format.is_64_bit = self.blob.is_64_bit();
340
341        let jump_target_formatter = |target: u32, fmt: &mut core::fmt::Formatter| {
342            if prefer_offset_jump_targets {
343                write!(fmt, "{}", target)
344            } else if let Some(basic_block_index) = instruction_offset_to_basic_block.get(&polkavm::ProgramCounter(target)) {
345                write!(fmt, "@{basic_block_index}")
346            } else {
347                write!(fmt, "{}", target)
348            }
349        };
350        disassembly_format.jump_target_formatter = Some(&jump_target_formatter);
351
352        let mut fmt = AssemblyFormatter::default();
353        let mut last_line_program_entry = None;
354        let mut last_full_name = String::new();
355        let mut basic_block_counter = 0;
356        let mut pending_label = true;
357        for (nth_instruction, instruction) in instructions.iter().copied().enumerate() {
358            let offset = instruction.offset;
359            let length = core::cmp::min(instruction.next_offset.0, self.blob.code().len() as u32) - offset.0;
360            let instruction = instruction.kind;
361            let raw_bytes = &self.blob.code()[offset.0 as usize..offset.0 as usize + length as usize];
362
363            let instruction_s = instruction.display(&disassembly_format);
364            let instruction_s = if let polkavm_common::program::Instruction::ecalli(nth_import) = instruction {
365                if let Some(import) = self.blob.imports().get(cast(nth_import).bitwise_as_u32()) {
366                    format!("{instruction_s} // {}", import)
367                } else {
368                    format!("{instruction_s} // INVALID")
369                }
370            } else {
371                instruction_s.to_string()
372            };
373
374            let line_program = self.blob.get_debug_line_program_at(offset)?;
375
376            if let Some(mut line_program) = line_program {
377                if last_line_program_entry != Some(line_program.entry_index()) {
378                    if nth_instruction != 0 {
379                        if let Err(error) = writeln!(&mut writer) {
380                            return Err(format!("failed to write to output: {error}").into());
381                        }
382                    }
383
384                    last_line_program_entry = Some(line_program.entry_index());
385                    loop {
386                        let region = match line_program.run() {
387                            Ok(Some(region)) => region,
388                            Ok(None) => break,
389                            Err(error) => {
390                                return Err(format!("failed to parse line program: {error}").into());
391                            }
392                        };
393
394                        if region.instruction_range().contains(&offset) {
395                            let frame = region.frames().next().unwrap();
396                            let full_name = match frame.full_name() {
397                                Ok(full_name) => full_name,
398                                Err(error) => {
399                                    return Err(format!("failed to parse line program: {error}").into());
400                                }
401                            }
402                            .to_string();
403
404                            if last_full_name != full_name {
405                                w!("<{}>:", full_name);
406                                last_full_name = full_name;
407                            }
408
409                            break;
410                        }
411                    }
412                }
413            } else {
414                if !last_full_name.is_empty() {
415                    if let Err(error) = writeln!(&mut writer) {
416                        return Err(format!("failed to write to output: {error}").into());
417                    }
418                }
419
420                last_line_program_entry = None;
421                last_full_name.clear();
422            }
423
424            if pending_label {
425                pending_label = false;
426                if !matches!(self.format, DisassemblyFormat::DiffFriendly) {
427                    if self.show_offsets {
428                        w!(@no_newline "      : ");
429                    }
430
431                    if self.show_raw_bytes {
432                        w!("{:24} {}", "", format_jump_target(offset, basic_block_counter))
433                    } else {
434                        w!("{}", format_jump_target(offset, basic_block_counter))
435                    }
436                } else {
437                    w!("    {}", format_jump_target(offset, basic_block_counter))
438                }
439            }
440
441            if matches!(self.format, DisassemblyFormat::DiffFriendly) {
442                let mut string = instruction_s;
443                if let polkavm_common::program::Instruction::load_imm(dst, _) = instruction {
444                    string = format!("{} = _", dst);
445                }
446
447                if let Some(index) = string.find('@') {
448                    let length = string[index + 1..]
449                        .chars()
450                        .take_while(|character| character.is_ascii_digit() || matches!(character, 'a' | 'b' | 'c' | 'd' | 'e' | 'f'))
451                        .count();
452                    string.replace_range(index + 1..index + 1 + length, "_");
453                }
454
455                if let Some(index_1) = string.find("[0x") {
456                    let index_2 = string[index_1..].find(']').unwrap() + index_1;
457                    string.replace_range(index_1..=index_2, "[_]");
458                }
459
460                w!("    {}", string);
461            } else if matches!(self.format, DisassemblyFormat::Guest | DisassemblyFormat::GuestAndNative) {
462                if self.show_offsets {
463                    w!(@no_newline "{offset:6}: ");
464                }
465                if self.show_raw_bytes {
466                    let raw_bytes = raw_bytes.iter().map(|byte| format!("{byte:02x}")).collect::<Vec<_>>().join(" ");
467                    w!("{raw_bytes:24} {instruction_s}")
468                } else {
469                    w!("{instruction_s}")
470                }
471            }
472
473            if matches!(self.format, DisassemblyFormat::Native | DisassemblyFormat::GuestAndNative) {
474                let native = self.native.as_ref().unwrap();
475                assert_eq!(offset.0, native.instruction_map[nth_instruction].0 .0);
476
477                let machine_code_position = native.instruction_map[nth_instruction].1 as usize;
478                let machine_next_code_position = native.instruction_map[nth_instruction + 1].1 as usize;
479                let length = machine_next_code_position - machine_code_position;
480                if length != 0 {
481                    let machine_code_chunk = &native.machine_code[machine_code_position..machine_next_code_position];
482                    if let Err(error) = fmt.emit(
483                        matches!(self.format, DisassemblyFormat::GuestAndNative),
484                        native.machine_code_origin,
485                        machine_code_chunk,
486                        machine_code_position,
487                        self.show_native_raw_bytes,
488                        self.show_native_offsets,
489                        &mut writer,
490                    ) {
491                        return Err(format!("failed to write to output: {error}").into());
492                    }
493                }
494            }
495
496            if instruction.opcode().starts_new_basic_block() {
497                if nth_instruction + 1 != instructions.len() {
498                    pending_label = true;
499                }
500                basic_block_counter += 1;
501            }
502        }
503
504        if let Err(error) = writer.flush() {
505            return Err(format!("failed to write to output: {error}").into());
506        }
507
508        Ok(())
509    }
510}
511
512#[cfg(test)]
513mod tests {
514    use polkavm::Reg::*;
515    use polkavm_common::abi::MemoryMapBuilder;
516    use polkavm_common::program::{asm, InstructionSetKind};
517    use polkavm_common::writer::ProgramBlobBuilder;
518
519    use super::*;
520
521    fn test_all_formats(blob: &ProgramBlob) {
522        for format in [
523            DisassemblyFormat::Guest,
524            DisassemblyFormat::DiffFriendly,
525            #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
526            DisassemblyFormat::GuestAndNative,
527            #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
528            DisassemblyFormat::Native,
529        ] {
530            assert!(!disassemble_with_gas(blob, format).is_empty());
531        }
532    }
533
534    fn disassemble_with_gas(blob: &ProgramBlob, format: DisassemblyFormat) -> Vec<u8> {
535        let mut disassembler = Disassembler::new(blob, format).unwrap();
536        disassembler.display_gas().unwrap();
537
538        let mut buffer = Vec::with_capacity(1 << 20);
539        disassembler.disassemble_into(&mut buffer).unwrap();
540        buffer
541    }
542
543    #[test]
544    fn simple() {
545        let memory_map = MemoryMapBuilder::new(0x4000).rw_data_size(0x4000).build().unwrap();
546        let mut builder = ProgramBlobBuilder::new(InstructionSetKind::Latest32);
547        builder.set_rw_data_size(0x4000);
548        builder.add_export_by_basic_block(0, b"main");
549        builder.add_import(b"hostcall");
550        builder.set_code(
551            &[
552                asm::store_imm_u32(memory_map.rw_data_address().try_into().unwrap(), 0x12345678),
553                asm::add_32(S0, A0, A1),
554                asm::ecalli(0),
555                asm::add_32(A0, A0, S0),
556                asm::ret(),
557            ],
558            &[],
559        );
560        let blob = ProgramBlob::parse(builder.into_vec().unwrap().into()).unwrap();
561
562        test_all_formats(&blob);
563
564        let assembly_bytes = disassemble_with_gas(&blob, DisassemblyFormat::Guest);
565        let assembly_text = String::from_utf8(assembly_bytes).unwrap();
566        let expected = &[
567            "// RO data = 0/0 bytes",
568            "// RW data = 0/16384 bytes",
569            "// Stack size = 0 bytes",
570            "",
571            "// Instructions = 5",
572            "// Code size = 18 bytes",
573            "",
574            "      : @0 [export #0: 'main'] (gas: 5)",
575            "     0: u32 [0x20000] = 0x12345678",
576            "     9: s0 = a0 + a1",
577            "    12: ecalli 0 // 'hostcall'",
578            "    13: a0 = a0 + s0",
579            "    16: ret",
580            "",
581        ]
582        .join("\n");
583
584        assert_eq!(&assembly_text, expected);
585    }
586}