evmasm/
lib.rs

1//! The evmasm crate aims to provide a simple interface for the conversion between
2//! evm bytecode and it's human readable form.
3//!
4//!```
5//!extern crate evmasm;
6//!use evmasm::{assemble, disassemble};
7//!
8//!fn main() {
9//!    let bytecode = assemble("PUSH1 2 PUSH1 1 ADD").unwrap();
10//!    println!("{:?}", bytecode);
11//!    let code = disassemble(&bytecode).unwrap();
12//!    for ln in code {
13//!        println!("{}", ln);
14//!    }
15//!}
16//!```
17//!
18//!```
19//!extern crate evmasm;
20//!use evmasm::{BYTE_INST, instruction, arguments_size};
21//!
22//!fn main() {
23//!    for (&bc, _) in BYTE_INST.iter() {
24//!        let inst = instruction(bc).ok().unwrap();
25//!        println!("0x{:2x} - {} - needs {} bytes of arguments",
26//!                 bc,
27//!                 inst,
28//!                 arguments_size(bc).ok().unwrap());
29//!    }
30//!}
31//!```
32
33use std::iter;
34
35#[macro_use]
36extern crate lazy_static;
37
38extern crate hex;
39use hex::{ToHex, FromHexError};
40
41extern crate num;
42use num::bigint::{BigUint, ParseBigIntError};
43use num::traits::Num;
44
45mod instructions;
46pub use instructions::{BYTE_INST, INST_BYTE};
47
48/// Error is only returned when an instruction/bytecode is not found
49#[derive(Debug)]
50pub enum Error {
51    UnknownBytecode(u8),
52    UnknownInstruction(String),
53    NotEnoughBytes,
54    BadValue,
55    MissingPushValue,
56}
57
58impl From<FromHexError> for Error {
59    fn from(e: FromHexError) -> Error {
60        match e {
61            FromHexError::InvalidHexCharacter { .. } |
62            FromHexError::InvalidHexLength => Error::BadValue,
63        }
64    }
65}
66
67impl From<ParseBigIntError> for Error {
68    fn from(e: ParseBigIntError) -> Error {
69        match e {
70            ParseBigIntError::ParseInt(_) |
71            ParseBigIntError::Other => Error::BadValue,
72        }
73    }
74}
75
76/// Return the bytecode corresponding to the provided instruction
77pub fn bytecode(inst: &str) -> Result<u8, Error> {
78    match INST_BYTE.get(inst) {
79        Some(bc) => Ok(*bc),
80        None => Err(Error::UnknownInstruction(inst.to_string())),
81    }
82}
83
84/// Return the instruction corresponding to the provided bytecode
85pub fn instruction(bytecode: u8) -> Result<&'static str, Error> {
86    match BYTE_INST.get(&bytecode) {
87        Some(i) => Ok(*i),
88        None => Err(Error::UnknownBytecode(bytecode)),
89    }
90}
91
92/// Return the size in bytes of the arguments of a specific bytecode
93pub fn arguments_size(bytecode: u8) -> Result<usize, Error> {
94    if bytecode > 0x5f && bytecode < 0x80 {
95        Ok((bytecode as usize) - 0x5f)
96    } else if BYTE_INST.contains_key(&bytecode) {
97        Ok(0)
98    } else {
99        Err(Error::UnknownBytecode(bytecode))
100    }
101}
102
103// Parse and assemble
104pub fn assemble(code: &str) -> Result<Vec<u8>, Error> {
105    let w: Vec<_> = code.to_string()
106        .split_whitespace()
107        .map(|v| v.to_uppercase())
108        .collect();
109    let mut r = Vec::with_capacity(w.len() * 2);
110    let mut words = w.iter();
111    while let Some(word) = words.next() {
112        let opcode = bytecode(word)?;
113        r.push(opcode);
114        let args_size = arguments_size(opcode)?;
115        if args_size > 0 {
116            let value = match words.next() {
117                Some(v) => parse_value(v)?,
118                None => return Err(Error::MissingPushValue),
119            };
120            let extra = iter::repeat(0 as u8).take(args_size - value.len());
121            r.extend(extra);
122            r.extend(value);
123        }
124    }
125    Ok(r)
126}
127
128fn parse_value(val: &str) -> Result<Vec<u8>, Error> {
129    let (radix, val_str) = if val.starts_with('0') {
130        let vs = &val[1..];
131        if vs.starts_with('X') {
132            (16, &vs[1..])
133        } else if vs.starts_with('B') {
134            (2, &vs[1..])
135        } else {
136            (8, vs)
137        }
138    } else {
139        (10, val)
140    };
141    Ok(BigUint::from_str_radix(val_str, radix)?.to_bytes_be())
142}
143
144// Disassemble
145pub fn disassemble(bytes: &[u8]) -> Result<Vec<String>, Error> {
146    let mut r = Vec::with_capacity(4096);
147    let mut b = bytes;
148    while !b.is_empty() {
149        let opcode = b[0];
150        b = &b[1..];
151        let mut inst = instruction(opcode)?.to_string();
152        let args_size = arguments_size(opcode).unwrap();
153        if args_size != 0 {
154            if b.len() < args_size {
155                return Err(Error::NotEnoughBytes);
156            }
157            let args = &b[..args_size];
158            b = &b[args_size..];
159            let inst_sz = inst.len();
160            inst.reserve(inst_sz + 3 + args_size * 2);
161            inst.push_str(" 0x");
162            inst.push_str(&args.to_vec().to_hex());
163        }
164        r.push(inst);
165    }
166    Ok(r)
167}
168
169#[cfg(test)]
170mod tests {
171    use std::iter;
172    use super::{bytecode, assemble, disassemble};
173
174    #[test]
175    fn unknown_instruction() {
176        if let Ok(_) = bytecode("hello world") {
177            panic!("not an instruction");
178        }
179    }
180
181    #[test]
182    fn all_instructions() {
183        let mut all_instructions = "STOP ADD MUL SUB DIV SDIV MOD SMOD ADDMOD MULMOD EXP SIGNEXTEND LT GT SLT SGT EQ ISZERO AND OR XOR NOT BYTE SHA3 ADDRESS BALANCE ORIGIN CALLER CALLVALUE CALLDATALOAD CALLDATASIZE CALLDATACOPY CODESIZE CODECOPY TXGASPRICE EXTCODESIZE EXTCODECOPY BLOCKHASH COINBASE TIMESTAMP NUMBER DIFFICULTY GASLIMIT POP MLOAD MSTORE MSTORE8 SLOAD SSTORE JUMP JUMPI PC MSIZE GAS JUMPDEST DUP1 DUP2 DUP3 DUP4 DUP5 DUP6 DUP7 DUP8 DUP9 DUP10 DUP11 DUP12 DUP13 DUP14 DUP15 DUP16 SWAP1 SWAP2 SWAP3 SWAP4 SWAP5 SWAP6 SWAP7 SWAP8 SWAP9 SWAP10 SWAP11 SWAP12 SWAP13 SWAP14 SWAP15 SWAP16 LOG0 LOG1 LOG2 LOG3 LOG4 CREATE CALL CALLCODE RETURN DELEGATECALL SUICIDE"
184                .to_string();
185        for i in 1..33 {
186            all_instructions.push_str(&format!(" PUSH{} 0x{}",
187                                              i,
188                                              iter::repeat("00").take(i).collect::<String>()));
189        }
190        let exp_opcodes =
191            vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
192                 32, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 64, 65, 66, 67, 68, 69,
193                 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 128, 129, 130, 131, 132, 133,
194                 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
195                 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 240,
196                 241, 242, 243, 244, 255, 96, 0, 97, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0, 0, 100, 0, 0,
197                 0, 0, 0, 101, 0, 0, 0, 0, 0, 0, 102, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0,
198                 0, 0, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0,
199                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 0, 0,
200                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201                 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202                 0, 0, 0, 0, 0, 0, 0, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113,
203                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 0, 0, 0, 0, 0, 0,
204                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
205                 0, 0, 0, 0, 0, 0, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
206                 0, 0, 0, 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
207                 118, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
208                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 0,
209                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 0,
210                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
211                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212                 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213                 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214                 0, 0, 0, 0, 0, 0, 0, 125, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
215                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
216                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 0, 0, 0, 0, 0, 0, 0,
217                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
218        let opcodes = assemble(&all_instructions).unwrap();
219        assert_eq!(opcodes, exp_opcodes);
220        let insts = disassemble(&opcodes).unwrap();
221        let all_insts = &insts.join(" ");
222        assert_eq!(&all_instructions, all_insts);
223    }
224}