evm_disassembler/
lib.rs

1//! Disassemble evm bytecode into individual instructions.
2//!
3//! This crate provides a simple interface for disassembling evm bytecode into individual
4//! instructions / opcodes.
5//! It supports both hex encoded strings as well as a vector of bytes as input
6//! Additionally it provides a method to format the disassembled instructions into a human readable
7//! format identical to that of the [pyevmasm](https://github.com/crytic/pyevmasm) library
8//!
9//! ```rust
10//! use evm_disassembler::{disassemble_str, disassemble_bytes, format_operations};
11//!    
12//! let bytecode = "60606040526040";
13//! let instructions = disassemble_str(bytecode).unwrap();
14//! // Will print:
15//! // 00000000: PUSH1 0x60
16//! // 00000002: PUSH1 0x40
17//! // 00000004: MSTORE
18//! // 00000005: PUSH1 0x40
19//! println!("{}", format_operations(instructions).unwrap());
20//!
21//! let bytes = hex::decode(bytecode).unwrap();
22//! let instructions_from_bytes = disassemble_bytes(bytes).unwrap();
23//! println!("{}", format_operations(instructions_from_bytes).unwrap());
24//!
25//! ```
26#![warn(missing_docs)]
27use crate::decode::decode_operation;
28use std::fmt::Write;
29
30use eyre::Result;
31
32mod decode;
33
34pub mod types;
35pub use types::{Opcode, Operation};
36
37#[cfg(test)]
38mod test_utils;
39
40/// Disassemble a hex encoded string into a vector of instructions / operations
41///
42/// # Arguments
43/// - `input` - A hex encoded string representing the bytecode to disassemble
44///
45/// # Examples
46///
47/// ```rust
48/// use evm_disassembler::disassemble_str;
49///
50/// let bytecode = "0x608060405260043610603f57600035";
51/// let instructions = disassemble_str(bytecode).unwrap();
52/// ```
53pub fn disassemble_str(input: &str) -> Result<Vec<Operation>> {
54    let input = input.trim_start_matches("0x");
55    let bytes = hex::decode(input)?;
56    disassemble_bytes(bytes)
57}
58
59/// Disassemble a vector of bytes into a vector of decoded Operations
60///
61/// Will stop disassembling when it encounters a push instruction with a size greater than
62/// remaining bytes in the input
63///
64/// # Arguments
65/// - `bytes` - A vector of bytes representing the encoded bytecode
66///
67/// # Examples
68///
69/// ```rust
70/// use evm_disassembler::disassemble_bytes;
71///
72/// let bytecode = "608060405260043610603f57600035";
73/// let bytes = hex::decode(bytecode).unwrap();
74/// let instructions_from_bytes = disassemble_bytes(bytes).unwrap();
75/// ```
76pub fn disassemble_bytes(bytes: Vec<u8>) -> Result<Vec<Operation>> {
77    let mut operations = Vec::new();
78    let mut new_operation: Operation;
79    let mut offset = 0;
80    let mut bytes_iter = bytes.into_iter();
81    while bytes_iter.len() > 0 {
82        (new_operation, offset) = match decode_operation(&mut bytes_iter, offset) {
83            Ok((operation, new_offset)) => (operation, new_offset),
84            Err(e) => {
85                println!("Stop decoding at offset {offset} due to error : {e}");
86                break;
87            }
88        };
89        operations.push(new_operation);
90    }
91    Ok(operations)
92}
93
94/// Converts a vector of decoded operations into a human readable formatted string
95///
96/// Operations are formatted on individual lines with the following format:
97/// `{offset}: {opcode} {bytes}`
98///
99/// - `offset` - The offset of the operation in the bytecode (as hex)
100/// - `opcode` - The respective opcode (i.e. "PUSH1", "ADD")
101/// - `bytes` - Additional bytes that are part of the operation (only for "PUSH" instructions)
102///
103/// # Arguments
104/// - `operations` - A vector of decoded operations as returned by `disassemble_str` or
105/// `disassemble_bytes`
106///
107/// # Examples
108/// ```rust
109/// use evm_disassembler::{disassemble_str, format_operations};
110///
111/// let bytecode = "0x608060405260043610603f57600035";
112/// let instructions = disassemble_str(bytecode).unwrap();
113/// println!("{}", format_operations(instructions).unwrap());
114/// ```
115pub fn format_operations(operations: Vec<Operation>) -> Result<String> {
116    let mut formatted = String::new();
117    for operation in operations.iter() {
118        writeln!(formatted, "{operation:?}")?;
119    }
120    Ok(formatted)
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126    use crate::test_utils::get_contract_code;
127    use crate::types::Opcode;
128    use rstest::*;
129    use std::fs;
130
131    #[rstest]
132    #[case("0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2", 1577, vec![(Opcode::DUP7, 1000), (Opcode::EXTCODECOPY, 1563)])]
133    #[tokio::test]
134    async fn decode_code_from_rpc_provider(
135        #[case] address: &str,
136        #[case] expected_length: usize,
137        #[case] expected_opcodes: Vec<(Opcode, usize)>,
138    ) {
139        let code = get_contract_code(address).await;
140        let operations = disassemble_bytes(code).expect("Unable to disassemble code");
141        assert_eq!(operations.len(), expected_length);
142        for (opcode, expected_position) in expected_opcodes.iter() {
143            assert_eq!(operations[*expected_position].opcode, *opcode);
144        }
145    }
146
147    #[rstest]
148    #[case("0xDef1C0ded9bec7F1a1670819833240f027b25EfF")] // UniswapV3 Router
149    #[case("0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2")] // Weth
150    #[case("0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48")] // ZeroEx Proxy
151    #[case("0x00000000006c3852cbEf3e08E8dF289169EdE581")] // Seaport
152    fn decode_code_from_file(#[case] address: &str) {
153        let mut code = fs::read_to_string(format!("testdata/{address}_encoded.txt"))
154            .expect("Unable to read encoded file");
155        let decoded_reference = fs::read_to_string(format!("testdata/{address}_decoded.txt"))
156            .expect("No reference file");
157        code.pop();
158
159        let operations = disassemble_str(&code).expect("Unable to decode");
160        assert!(!operations.is_empty());
161        let formatted_operations = format_operations(operations);
162        for (i, line) in formatted_operations
163            .expect("failed to format")
164            .lines()
165            .enumerate()
166        {
167            assert_eq!(line, decoded_reference.lines().nth(i).unwrap());
168        }
169        println!("Decoded output from contract {address} matches reference");
170    }
171
172    #[rstest]
173    fn decode_preamble() {
174        let code = "608060405260043610603f57600035";
175        let operations = disassemble_str(code).expect("Unable to decode");
176        assert_eq!(operations.len(), 10);
177    }
178
179    #[rstest]
180    fn decode_preamble_from_bytes() {
181        let bytes = hex::decode("608060405260043610603f57600035").unwrap();
182        let operations = disassemble_bytes(bytes).expect("Unable to decode");
183        assert_eq!(operations.len(), 10);
184    }
185
186    #[rstest]
187    #[case(Opcode::STOP, "0x00")]
188    #[case(Opcode::ADD, "0x01")]
189    #[case(Opcode::MUL, "0x02")]
190    #[case(Opcode::SUB, "0x03")]
191    #[case(Opcode::DIV, "0x04")]
192    #[case(Opcode::SDIV, "0x05")]
193    #[case(Opcode::MOD, "0x06")]
194    #[case(Opcode::SMOD, "0x07")]
195    #[case(Opcode::ADDMOD, "0x08")]
196    #[case(Opcode::MULMOD, "0x09")]
197    fn decode_single_op(#[case] opcode: Opcode, #[case] encoded_opcode: &str) {
198        let result = disassemble_str(encoded_opcode).expect("Unable to decode");
199        assert_eq!(result, vec![Operation::new(opcode, 0)]);
200    }
201
202    #[rstest]
203    fn decode_stop_and_add() {
204        let add_op = "01";
205        let stop_op = "00";
206        let result = disassemble_str(&(add_op.to_owned() + stop_op)).expect("Unable to decode");
207        assert_eq!(
208            result,
209            vec![
210                Operation::new(Opcode::ADD, 0),
211                Operation::new(Opcode::STOP, 1),
212            ]
213        );
214    }
215}