Skip to main content

neo_decompiler/
disassembler.rs

1//! Stateless Neo VM bytecode decoder used by the decompiler and CLI.
2//! Converts raw byte buffers into structured instructions with operands.
3use std::fmt;
4
5use crate::error::{DisassemblyError, Result};
6use crate::instruction::{Instruction, OpCode};
7
8mod operand;
9
10/// How to handle unknown opcode bytes encountered during disassembly.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum UnknownHandling {
13    /// Surface an error as soon as an unknown opcode is encountered.
14    Error,
15    /// Emit an `Unknown` instruction and continue disassembling subsequent bytes.
16    Permit,
17}
18
19/// Stateless helper that decodes Neo VM bytecode into structured instructions.
20#[derive(Debug, Clone, Copy)]
21///
22/// The disassembler maintains no state between calls; configuration only
23/// controls how unknown opcode bytes are handled.
24pub struct Disassembler {
25    unknown: UnknownHandling,
26}
27
28/// Disassembly output including any non-fatal warnings.
29#[derive(Debug, Clone)]
30pub struct DisassemblyOutput {
31    /// Decoded instructions.
32    pub instructions: Vec<Instruction>,
33    /// Non-fatal warnings encountered during decoding.
34    pub warnings: Vec<DisassemblyWarning>,
35}
36
37/// Warning emitted during disassembly when configured to tolerate issues.
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub enum DisassemblyWarning {
40    /// An unknown opcode was encountered; output may be desynchronized.
41    UnknownOpcode {
42        /// The raw opcode byte.
43        opcode: u8,
44        /// Offset where the opcode byte was encountered.
45        offset: usize,
46    },
47}
48
49impl fmt::Display for DisassemblyWarning {
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        match self {
52            DisassemblyWarning::UnknownOpcode { opcode, offset } => write!(
53                f,
54                "disassembly: unknown opcode 0x{opcode:02X} at 0x{offset:04X}; continuing may desynchronize output"
55            ),
56        }
57    }
58}
59
60impl Default for Disassembler {
61    fn default() -> Self {
62        Self::new()
63    }
64}
65
66impl Disassembler {
67    /// Create a disassembler that permits unknown opcodes.
68    ///
69    /// Equivalent to `Disassembler::with_unknown_handling(UnknownHandling::Permit)`.
70    #[must_use]
71    pub fn new() -> Self {
72        Self {
73            unknown: UnknownHandling::Permit,
74        }
75    }
76
77    /// Create a disassembler configured with the desired unknown-opcode policy.
78    ///
79    /// See [`UnknownHandling`] for the available strategies.
80    #[must_use]
81    pub fn with_unknown_handling(unknown: UnknownHandling) -> Self {
82        Self { unknown }
83    }
84
85    /// Disassemble an entire bytecode buffer.
86    ///
87    /// # Errors
88    /// Returns an error if the bytecode stream is truncated, contains an operand
89    /// that exceeds the supported maximum size, or contains an unknown opcode
90    /// while configured with [`UnknownHandling::Error`].
91    ///
92    /// Any non-fatal warnings are discarded; call [`Self::disassemble_with_warnings`]
93    /// to inspect them.
94    pub fn disassemble(&self, bytecode: &[u8]) -> Result<Vec<Instruction>> {
95        Ok(self.disassemble_with_warnings(bytecode)?.instructions)
96    }
97
98    /// Disassemble an entire bytecode buffer, returning any non-fatal warnings.
99    ///
100    /// # Errors
101    /// Returns an error if the bytecode stream is truncated, contains an operand
102    /// that exceeds the supported maximum size, or contains an unknown opcode
103    /// while configured with [`UnknownHandling::Error`].
104    pub fn disassemble_with_warnings(&self, bytecode: &[u8]) -> Result<DisassemblyOutput> {
105        let mut instructions = Vec::new();
106        let mut warnings = Vec::new();
107        let mut pc = 0usize;
108
109        while pc < bytecode.len() {
110            let opcode_byte = *bytecode
111                .get(pc)
112                .ok_or(DisassemblyError::UnexpectedEof { offset: pc })?;
113            let opcode = OpCode::from_byte(opcode_byte);
114            if let OpCode::Unknown(_) = opcode {
115                match self.unknown {
116                    UnknownHandling::Permit => {
117                        warnings.push(DisassemblyWarning::UnknownOpcode {
118                            opcode: opcode_byte,
119                            offset: pc,
120                        });
121                        instructions.push(Instruction::new(pc, opcode, None));
122                        pc += 1;
123                        continue;
124                    }
125                    UnknownHandling::Error => {
126                        return Err(DisassemblyError::UnknownOpcode {
127                            opcode: opcode_byte,
128                            offset: pc,
129                        }
130                        .into());
131                    }
132                }
133            }
134
135            let (instruction, size) = self.decode_known_instruction(bytecode, pc, opcode)?;
136            instructions.push(instruction);
137            pc += size;
138        }
139
140        Ok(DisassemblyOutput {
141            instructions,
142            warnings,
143        })
144    }
145
146    fn decode_known_instruction(
147        &self,
148        bytecode: &[u8],
149        offset: usize,
150        opcode: OpCode,
151    ) -> Result<(Instruction, usize)> {
152        let (operand, consumed) = self.read_operand(opcode, bytecode, offset)?;
153        Ok((Instruction::new(offset, opcode, operand), 1 + consumed))
154    }
155}
156
157#[cfg(test)]
158mod tests;