evm_disassembler/
lib.rs

1//! Disassemble evm bytecode into individual instructions.
2//!
3//! This crate provides a simple interface for disassembling evm bytecode into individual
4//! instructions / opcodes.
5//! It supports both hex encoded strings as well as a vector of bytes as input
6//! Additionally it provides a method to format the disassembled instructions into a human readable
7//! format identical to that of the [pyevmasm](https://github.com/crytic/pyevmasm) library
8//!
9//! ```rust
10//! use evm_disassembler::{disassemble_str, disassemble_bytes, format_operations};
11//!    
12//! let bytecode = "60606040526040";
13//! let instructions = disassemble_str(bytecode).unwrap();
14//! // Will print:
15//! // 00000000: PUSH1 0x60
16//! // 00000002: PUSH1 0x40
17//! // 00000004: MSTORE
18//! // 00000005: PUSH1 0x40
19//! println!("{}", format_operations(instructions).unwrap());
20//!
21//! let bytes = hex::decode(bytecode).unwrap();
22//! let instructions_from_bytes = disassemble_bytes(bytes).unwrap();
23//! println!("{}", format_operations(instructions_from_bytes).unwrap());
24//!
25//! ```
26#![warn(missing_docs)]
27use crate::decode::decode_operation;
28use std::fmt::Write;
29
30use eyre::Result;
31
32mod decode;
33
34pub mod types;
35pub use types::{Opcode, Operation};
36
37#[cfg(test)]
38mod test_utils;
39
40/// Disassemble a hex encoded string into a vector of instructions / operations
41///
42/// # Arguments
43/// - `input` - A hex encoded string representing the bytecode to disassemble
44///
45/// # Examples
46///
47/// ```rust
48/// use evm_disassembler::disassemble_str;
49///
50/// let bytecode = "0x608060405260043610603f57600035";
51/// let instructions = disassemble_str(bytecode).unwrap();
52/// ```
53pub fn disassemble_str(input: &str) -> Result<Vec<Operation>> {
54    let input = input.trim_start_matches("0x");
55    let bytes = hex::decode(input)?;
56    disassemble_bytes(bytes)
57}
58
59/// Disassemble a vector of bytes into a vector of decoded Operations
60///
61/// Will stop disassembling when it encounters a push instruction with a size greater than
62/// remaining bytes in the input.
63///
64/// Automatically detects EOF containers (starting with 0xef00) and decodes EOF-specific
65/// opcodes only when appropriate.
66///
67/// # Arguments
68/// - `bytes` - A vector of bytes representing the encoded bytecode
69///
70/// # Examples
71///
72/// ```rust
73/// use evm_disassembler::disassemble_bytes;
74///
75/// let bytecode = "608060405260043610603f57600035";
76/// let bytes = hex::decode(bytecode).unwrap();
77/// let instructions_from_bytes = disassemble_bytes(bytes).unwrap();
78/// ```
79pub fn disassemble_bytes(bytes: Vec<u8>) -> Result<Vec<Operation>> {
80    // Detect EOF container: starts with 0xef00
81    let is_eof = bytes.len() >= 2 && bytes[0] == 0xef && bytes[1] == 0x00;
82
83    let mut operations = Vec::new();
84    let mut new_operation: Operation;
85    let mut offset = 0;
86    let mut bytes_iter = bytes.into_iter();
87    while bytes_iter.len() > 0 {
88        (new_operation, offset) = match decode_operation(&mut bytes_iter, offset, is_eof) {
89            Ok((operation, new_offset)) => (operation, new_offset),
90            Err(e) => {
91                println!("Stop decoding at offset {offset} due to error : {e}");
92                break;
93            }
94        };
95        operations.push(new_operation);
96    }
97    Ok(operations)
98}
99
100/// Converts a vector of decoded operations into a human readable formatted string
101///
102/// Operations are formatted on individual lines with the following format:
103/// `{offset}: {opcode} {bytes}`
104///
105/// - `offset` - The offset of the operation in the bytecode (as hex)
106/// - `opcode` - The respective opcode (i.e. "PUSH1", "ADD")
107/// - `bytes` - Additional bytes that are part of the operation (only for "PUSH" instructions)
108///
109/// # Arguments
110/// - `operations` - A vector of decoded operations as returned by `disassemble_str` or
111///   `disassemble_bytes`
112///
113/// # Examples
114/// ```rust
115/// use evm_disassembler::{disassemble_str, format_operations};
116///
117/// let bytecode = "0x608060405260043610603f57600035";
118/// let instructions = disassemble_str(bytecode).unwrap();
119/// println!("{}", format_operations(instructions).unwrap());
120/// ```
121pub fn format_operations(operations: Vec<Operation>) -> Result<String> {
122    let mut formatted = String::new();
123    for operation in operations.iter() {
124        writeln!(formatted, "{operation:?}")?;
125    }
126    Ok(formatted)
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132    use crate::test_utils::get_contract_code;
133    use crate::types::Opcode;
134    use rstest::*;
135    use std::fs;
136
137    #[rstest]
138    #[case("0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2", 1577, vec![(Opcode::DUP7, 1000), (Opcode::EXTCODECOPY, 1563)])]
139    #[tokio::test]
140    async fn decode_code_from_rpc_provider(
141        #[case] address: &str,
142        #[case] expected_length: usize,
143        #[case] expected_opcodes: Vec<(Opcode, usize)>,
144    ) {
145        let code = get_contract_code(address).await;
146        let operations = disassemble_bytes(code).expect("Unable to disassemble code");
147        assert_eq!(operations.len(), expected_length);
148        for (opcode, expected_position) in expected_opcodes.iter() {
149            assert_eq!(operations[*expected_position].opcode, *opcode);
150        }
151    }
152
153    #[rstest]
154    #[case("0xDef1C0ded9bec7F1a1670819833240f027b25EfF")] // UniswapV3 Router
155    #[case("0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2")] // Weth
156    #[case("0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48")] // ZeroEx Proxy
157    #[case("0x00000000006c3852cbEf3e08E8dF289169EdE581")] // Seaport
158    fn decode_code_from_file(#[case] address: &str) {
159        let mut code = fs::read_to_string(format!("testdata/{address}_encoded.txt"))
160            .expect("Unable to read encoded file");
161        let decoded_reference = fs::read_to_string(format!("testdata/{address}_decoded.txt"))
162            .expect("No reference file");
163        code.pop();
164
165        let operations = disassemble_str(&code).expect("Unable to decode");
166        assert!(!operations.is_empty());
167        let formatted_operations = format_operations(operations);
168        for (i, line) in formatted_operations
169            .expect("failed to format")
170            .lines()
171            .enumerate()
172        {
173            assert_eq!(line, decoded_reference.lines().nth(i).unwrap());
174        }
175        println!("Decoded output from contract {address} matches reference");
176    }
177
178    #[rstest]
179    fn decode_preamble() {
180        let code = "608060405260043610603f57600035";
181        let operations = disassemble_str(code).expect("Unable to decode");
182        assert_eq!(operations.len(), 10);
183    }
184
185    #[rstest]
186    fn decode_preamble_from_bytes() {
187        let bytes = hex::decode("608060405260043610603f57600035").unwrap();
188        let operations = disassemble_bytes(bytes).expect("Unable to decode");
189        assert_eq!(operations.len(), 10);
190    }
191
192    #[rstest]
193    #[case(Opcode::STOP, "0x00")]
194    #[case(Opcode::ADD, "0x01")]
195    #[case(Opcode::MUL, "0x02")]
196    #[case(Opcode::SUB, "0x03")]
197    #[case(Opcode::DIV, "0x04")]
198    #[case(Opcode::SDIV, "0x05")]
199    #[case(Opcode::MOD, "0x06")]
200    #[case(Opcode::SMOD, "0x07")]
201    #[case(Opcode::ADDMOD, "0x08")]
202    #[case(Opcode::MULMOD, "0x09")]
203    fn decode_single_op(#[case] opcode: Opcode, #[case] encoded_opcode: &str) {
204        let result = disassemble_str(encoded_opcode).expect("Unable to decode");
205        assert_eq!(result, vec![Operation::new(opcode, 0)]);
206    }
207
208    #[rstest]
209    fn decode_stop_and_add() {
210        let add_op = "01";
211        let stop_op = "00";
212        let result = disassemble_str(&(add_op.to_owned() + stop_op)).expect("Unable to decode");
213        assert_eq!(
214            result,
215            vec![
216                Operation::new(Opcode::ADD, 0),
217                Operation::new(Opcode::STOP, 1),
218            ]
219        );
220    }
221
222    // EOF container tests
223    // EOF format: ef0001 [header] [types] [code] [data]
224    // Header: 01 XXXX (type section) 02 YYYY ZZZZ (code section) 04 WWWW (data section) 00 (terminator)
225
226    #[rstest]
227    fn test_eof_detection() {
228        // This should be detected as EOF (starts with ef00)
229        // Minimal EOF: ef0001 01 0004 02 0001 0001 04 0000 00 [types: 00000000] [code: 00]
230        let eof_bytecode = "ef00010100040200010001040000000000000000";
231        let ops = disassemble_str(eof_bytecode).expect("Should decode EOF");
232        // In EOF mode, the header bytes are decoded as opcodes (some will be INVALID)
233        // The important thing is it doesn't crash
234        assert!(!ops.is_empty());
235    }
236
237    #[rstest]
238    fn test_eof_with_rjump() {
239        // EOF with RJUMP instruction in code section
240        // Header: ef0001 01 0004 02 0001 0003 04 0000 00
241        // Types: 00 80 00 01 (0 inputs, non-returning, max stack 1)
242        // Code: e0 00 00 (RJUMP with offset 0)
243        let eof_bytecode = "ef000101000402000100030400000000800001e00000";
244        let ops = disassemble_str(eof_bytecode).expect("Should decode EOF");
245        let formatted = format_operations(ops).unwrap();
246        println!("EOF with RJUMP:\n{}", formatted);
247        // Should contain RJUMP since this is EOF format
248        assert!(
249            formatted.contains("RJUMP"),
250            "Should decode RJUMP in EOF container"
251        );
252    }
253
254    #[rstest]
255    fn test_eof_with_callf() {
256        // EOF with CALLF and RETF
257        // Header: ef0001 01 0008 02 0002 0003 0001 04 0000 00
258        // Types section (8 bytes for 2 functions):
259        //   00 80 00 01 - func 0: 0 inputs, non-returning, max stack 1
260        //   00 00 00 00 - func 1: 0 inputs, 0 outputs, max stack 0
261        // Code section 0 (3 bytes): e3 0001 - CALLF 1
262        // Code section 1 (1 byte): e4 - RETF
263        let eof_bytecode = "ef00010100080200020003000104000000008000010000000000e30001e4";
264        let ops = disassemble_str(eof_bytecode).expect("Should decode EOF");
265        let formatted = format_operations(ops).unwrap();
266        println!("EOF with CALLF:\n{}", formatted);
267        assert!(
268            formatted.contains("CALLF"),
269            "Should decode CALLF in EOF container"
270        );
271    }
272
273    #[rstest]
274    fn test_legacy_bytecode_no_eof_opcodes() {
275        // Legacy bytecode containing byte 0xe0 should NOT decode as RJUMP
276        // This tests that EOF opcodes are only decoded in EOF containers
277        let legacy_bytecode = "60e0"; // PUSH1 0xe0
278        let ops = disassemble_str(legacy_bytecode).expect("Should decode legacy");
279        let formatted = format_operations(ops).unwrap();
280        println!("Legacy bytecode:\n{}", formatted);
281        // Should be PUSH1, not RJUMP
282        assert!(formatted.contains("PUSH1"), "Should decode as PUSH1");
283        assert!(
284            !formatted.contains("RJUMP"),
285            "Should NOT decode as RJUMP in legacy"
286        );
287    }
288
289    #[rstest]
290    fn test_legacy_with_eof_like_bytes() {
291        // Legacy bytecode with bytes that would be EOF opcodes if in EOF mode
292        // 0xe7 would be SWAPN in EOF, but should be INVALID in legacy
293        let legacy_bytecode = "e7";
294        let ops = disassemble_str(legacy_bytecode).expect("Should decode legacy");
295        assert_eq!(
296            ops[0].opcode,
297            Opcode::INVALID,
298            "0xe7 should be INVALID in legacy"
299        );
300    }
301
302    #[rstest]
303    fn test_real_eof_contract_from_solidity() {
304        // Real EOF bytecode compiled from Solidity with --evm-version osaka --eofVersion 1
305        // Contract: SimpleEOF with setValue, getValue, add functions
306        let eof = "ef000101009c020027004b0004000400030003000b00010006000d001c00020001000b00050007001c000c000800120003001b001c001d0003000200030007000500090003000f0001000a0001000f00050013001300080400430000800003010100020001000100800002008000020200000301010001020000020201000500800004020100020101000102010003020100020001000200800004010000020201000202010005010100020080000302020005008000040080000200010001010100020101000101010001000100010101000202010004010100010101000101010001020000030100000200800002020100030201000360806040526004361015e10003e500175f35e3000180632096525514e1002980633fa4f24514e1001c80635524107714e1000f63771602f714e10003e0ffcee50016e50014e5000fe5000960e01ce4604051e45f80fd5f80fd5f910312e10001e4e50004e4e300069052e4905f60208301920190e30007e434e10015366004e30005e3001ce30002809181e300080390f3e500031ce4e490600802e3000ae3000be454e3000ce45f5f90e3000de434e10015366004e30005e3000ee30002809181e300080390f3e5000380e3000603e10001e45f80fd90503580e30010e4602081830312e100065f01e30011e4e500045f01e434e10014366004e30012e30023e300028080e300130390f3e5000390604082820312e1000f805f8301e3001191602001e30011e4e5000434e10016366004e3001590e30026e30002809181e300080390f3e500035f80fd5fe45f1ce4e30019e3000be454e3001ae4e30018505fe3001be45f1be4905f1990e3001d91811916911617e4e4e30006e3001fe30006e4e490e30020e300218154e3001e9055e45fe30022e4634e487b7160e01b5f52601160045260245ffde3000690e300068101809111e10001e4e50024e3001850e30025e4";
307
308        let ops = disassemble_str(eof).expect("Should decode real EOF contract");
309        let formatted = format_operations(ops).unwrap();
310
311        println!("\n=== Real EOF Contract Disassembly ===");
312        println!("(Header bytes decoded as opcodes, code section follows)\n");
313
314        // Show lines containing EOF opcodes
315        println!("Lines containing EOF opcodes:");
316        for line in formatted.lines() {
317            if line.contains("RJUMP")
318                || line.contains("CALLF")
319                || line.contains("RETF")
320                || line.contains("JUMPF")
321                || line.contains("DATALOAD")
322            {
323                println!("{}", line);
324            }
325        }
326        println!();
327
328        // Verify EOF opcodes are present
329        assert!(formatted.contains("RJUMPI"), "Should contain RJUMPI");
330        assert!(formatted.contains("JUMPF"), "Should contain JUMPF");
331        assert!(formatted.contains("CALLF"), "Should contain CALLF");
332        assert!(formatted.contains("RETF"), "Should contain RETF");
333    }
334}