Enum evmil::Instruction

source ·

pub enum Instruction {
Show 79 variants    STOP,
    ADD,
    MUL,
    SUB,
    DIV,
    SDIV,
    MOD,
    SMOD,
    ADDMOD,
    MULMOD,
    EXP,
    SIGNEXTEND,
    LT,
    GT,
    SLT,
    SGT,
    EQ,
    ISZERO,
    AND,
    OR,
    XOR,
    NOT,
    BYTE,
    SHL,
    SHR,
    SAR,
    KECCAK256,
    ADDRESS,
    BALANCE,
    ORIGIN,
    CALLER,
    CALLVALUE,
    CALLDATALOAD,
    CALLDATASIZE,
    CALLDATACOPY,
    CODESIZE,
    CODECOPY,
    GASPRICE,
    EXTCODESIZE,
    EXTCODECOPY,
    RETURNDATASIZE,
    RETURNDATACOPY,
    EXTCODEHASH,
    BLOCKHASH,
    COINBASE,
    TIMESTAMP,
    NUMBER,
    DIFFICULTY,
    GASLIMIT,
    CHAINID,
    SELFBALANCE,
    POP,
    MLOAD,
    MSTORE,
    MSTORE8,
    SLOAD,
    SSTORE,
    JUMP,
    JUMPI,
    PC,
    MSIZE,
    GAS,
    JUMPDEST(usize),
    PUSH(Vec<u8>),
    PUSHL(usize),
    DUP(u8),
    SWAP(u8),
    LOG(u8),
    CREATE,
    CALL,
    CALLCODE,
    RETURN,
    DELEGATECALL,
    CREATE2,
    STATICCALL,
    REVERT,
    INVALID,
    SELFDESTRUCT,
    DATA(Vec<u8>),
}

Variants§

§

DATA(Vec<u8>)

Implementations§

source §

impl Instruction

source

pub fn fallthru(&self) -> bool

Determine whether or not control can continue to the next instruction.

source

pub fn can_branch(&self) -> bool

Determine whether or not this instruction can branch. That is, whether or not it is a JUMP or JUMPI instruction.

Examples found in repository ?

src/disassembler.rs (line 315)

    pub fn build(mut self) -> Self {
        let mut changed = true;
        //
        while changed {
            // Reset indicator
            changed = false;
            // Iterate blocks in order
            for i in 0..self.blocks.len() {
                // Sanity check whether block unreachable.
                if !self.is_block_reachable(i) { continue; }
                // Yes, is reachable so continue.
                let blk = &self.blocks[i];
                let mut ctx = self.contexts[i].clone();
                let mut pc = blk.start;
                // println!("BLOCK (start={}, end={}): {:?}", pc, blk.end, i);
                // println!("CONTEXT (pc={}): {}", pc, ctx);
                // Parse the block
                while pc < blk.end {
                    // Decode instruction at the current position
                    let insn = Instruction::decode(pc,&self.bytes);
                    // Check whether a branch is possible
                    if insn.can_branch() {
                        // Determine branch target
                        let target = ctx.peek(0).unwrap();
                        // Determine branch context
                        let branch_ctx = ctx.branch(target,&insn);
                        // Convert target into block ID.
                        let block_id = self.get_enclosing_block_id(target);
                        // println!("Branch: target={} (block {})",target,block_id);
                        // println!("Before merge (pc={}): {}", pc, self.contexts[block_id]);
                        // Merge in updated state
                        changed |= self.contexts[block_id].merge(branch_ctx);
                        // println!("After merge (pc={}): {}", pc, self.contexts[block_id]);
                    }
                    // Apply the transfer function!
                    // print!("{:#08x}: {}",pc,ctx);
                    ctx = ctx.transfer(&insn);
                    // println!(" ==>\t{:?}\t==> {}",insn,ctx);
                    // Next instruction
                    pc = pc + insn.length(&[]);
                }
                // Merge state into following block.
                if (i+1) < self.blocks.len() {
                    changed |= self.contexts[i+1].merge(ctx);
                }
            }
        }
        self
    }

source

pub fn encode(&self, offsets: &[Offset], bytes: &mut Vec<u8>) -> Result<(), Error>

Encode an instruction into a byte sequence, assuming a given set of label offsets.

Examples found in repository ?

src/bytecode.rs (line 65)

    pub fn to_bytes(&self) -> Result<Vec<u8>,instruction::Error> {
        let offsets = self.determine_offsets();
        let mut bytes = Vec::new();
        //
        for b in &self.bytecodes {
            // Encode instruction
            b.encode(&offsets,&mut bytes)?;
        }
        // Done
        Ok(bytes)
    }

source

pub fn length(&self, _offsets: &[Offset]) -> usize

Determine the length of this instruction (in bytes) assuming a given set of label offsets.

Examples found in repository ?

src/disassembler.rs (line 138)

    pub fn get_state(&self, loc: usize) -> T {
        // Determine enclosing block
        let bid = self.get_enclosing_block_id(loc);
        let blk = &self.blocks[bid];
        let mut ctx = self.contexts[bid].clone();
        let mut pc = blk.start;
        // Reconstruct state
        while pc < loc {
            // Decode instruction at the current position
            let insn = Instruction::decode(pc,&self.bytes);
            // Apply the transfer function!
            ctx = ctx.transfer(&insn);
            // Next instruction
            pc = pc + insn.length(&[]);
        }
        // Done
        ctx
    }

    /// Get the enclosing block for a given bytecode location.
    pub fn get_enclosing_block(&self, pc: usize) -> &Block {
        for i in 0..self.blocks.len() {
            if self.blocks[i].encloses(pc) {
                return &self.blocks[i];
            }
        }
        panic!("invalid bytecode address");
    }

    /// Determine whether a given block is currently considered
    /// reachable or not.  Observe the root block (`id=0`) is _always_
    /// considered reachable.
    pub fn is_block_reachable(&self, id: usize) -> bool {
        id == 0 || self.contexts[id].is_reachable()
    }

    /// Read a slice of bytes from the bytecode program, padding with
    /// zeros as necessary.
    pub fn read_bytes(&self, start: usize, end: usize) -> Vec<u8> {
        let n = self.bytes.len();

        if start >= n {
            vec![0; end-start]
        } else if end > n {
            // Determine lower potion
            let mut slice = self.bytes[start..n].to_vec();
            // Probably a more idiomatic way to do this?
            for _i in end .. n { slice.push(0); }
            //
            slice
        } else {
            // Easy case
            self.bytes[start..end].to_vec()
        }
    }

    /// Refine this disassembly to something (ideally) more precise
    /// use a fixed point dataflow analysis.  This destroys the
    /// original disassembly.
    pub fn refine<S>(self) -> Disassembly<'a,S>
    where S:AbstractState+From<T> {
        let mut contexts = Vec::new();
        // Should be able to do this with a map?
        for ctx in self.contexts {
            contexts.push(S::from(ctx));
        }
        // Done
        Disassembly{bytes: self.bytes, blocks: self.blocks, contexts}
    }

    /// Flattern the disassembly into a sequence of instructions.
    pub fn to_vec(&self) -> Vec<Instruction> {
        let mut insns = Vec::new();
        // Iterate blocks in order
        for i in 0..self.blocks.len() {
            let blk = &self.blocks[i];
            let ctx = &self.contexts[i];
            // Check for reachability
            if i == 0 || ctx.is_reachable() {
                // Disassemble block
                self.disassemble_into(blk,&mut insns);
            } else {
                // Not reachable, so must be data.
                let data = self.read_bytes(blk.start,blk.end);
                //
                insns.push(DATA(data));
            }
        }
        //
        insns
    }


    // ================================================================
    // Helpers
    // ================================================================

    /// Disassemble a given block into a sequence of instructions.
    fn disassemble_into(&self, blk: &Block, insns: &mut Vec<Instruction>) {
        let mut pc = blk.start;
        // Parse the block
        while pc < blk.end {
            // Decode instruction at the current position
            let insn = Instruction::decode(pc,&self.bytes);
            // Increment PC for next instruction
            pc = pc + insn.length(&[]);
            //
            insns.push(insn);
        }
    }

    /// Perform a linear scan splitting out the blocks.  This is an
    /// over approximation of the truth, as some blocks may turn out
    /// to be unreachable (e.g. they are data).
    fn scan_blocks(bytes: &[u8]) -> Vec<Block> {
        let mut blocks = Vec::new();
        // Current position in bytecodes
        let mut pc = 0;
        // Identifies start of current block.
        let mut start = 0;
        // Parse the block
        while pc < bytes.len() {
            // Decode instruction at the current position
            let insn = Instruction::decode(pc,&bytes);
            // Increment PC for next instruction
            pc = pc + insn.length(&[]);
            // Check whether terminating instruction
            match insn {
                JUMPDEST(_) => {
                    // Determine whether start of this block, or next
                    // block.
                    if (pc - 1) != start {
                        // Start of next block
                        blocks.push(Block::new(start,pc-1));
                        start = pc - 1;
                    }
                }
                INVALID|JUMP|RETURN|REVERT|STOP => {
                    blocks.push(Block::new(start,pc));
                    start = pc;
                }
                _ => {}
            }
        }
        // Append last block (if necessary)
        if start != pc {
            blocks.push(Block::new(start,pc));
        }
        // Done
        blocks
    }


    /// Determine the enclosing block number for a given bytecode
    /// address.
    fn get_enclosing_block_id(&self, pc: usize) -> usize {
        for i in 0..self.blocks.len() {
            if self.blocks[i].encloses(pc) {
                return i;
            }
        }
        panic!("invalid bytecode address");
    }
}

impl<'a,T> Disassembly<'a,T>
where T:AbstractState+fmt::Display {

    /// Apply flow analysis to refine the results of this disassembly.
    pub fn build(mut self) -> Self {
        let mut changed = true;
        //
        while changed {
            // Reset indicator
            changed = false;
            // Iterate blocks in order
            for i in 0..self.blocks.len() {
                // Sanity check whether block unreachable.
                if !self.is_block_reachable(i) { continue; }
                // Yes, is reachable so continue.
                let blk = &self.blocks[i];
                let mut ctx = self.contexts[i].clone();
                let mut pc = blk.start;
                // println!("BLOCK (start={}, end={}): {:?}", pc, blk.end, i);
                // println!("CONTEXT (pc={}): {}", pc, ctx);
                // Parse the block
                while pc < blk.end {
                    // Decode instruction at the current position
                    let insn = Instruction::decode(pc,&self.bytes);
                    // Check whether a branch is possible
                    if insn.can_branch() {
                        // Determine branch target
                        let target = ctx.peek(0).unwrap();
                        // Determine branch context
                        let branch_ctx = ctx.branch(target,&insn);
                        // Convert target into block ID.
                        let block_id = self.get_enclosing_block_id(target);
                        // println!("Branch: target={} (block {})",target,block_id);
                        // println!("Before merge (pc={}): {}", pc, self.contexts[block_id]);
                        // Merge in updated state
                        changed |= self.contexts[block_id].merge(branch_ctx);
                        // println!("After merge (pc={}): {}", pc, self.contexts[block_id]);
                    }
                    // Apply the transfer function!
                    // print!("{:#08x}: {}",pc,ctx);
                    ctx = ctx.transfer(&insn);
                    // println!(" ==>\t{:?}\t==> {}",insn,ctx);
                    // Next instruction
                    pc = pc + insn.length(&[]);
                }
                // Merge state into following block.
                if (i+1) < self.blocks.len() {
                    changed |= self.contexts[i+1].merge(ctx);
                }
            }
        }
        self
    }

source

pub fn opcode(&self, offsets: &[Offset]) -> Result<u8, Error>

Determine the opcode for a given instruction. In many cases, this is a straightforward mapping. However, in other cases, its slightly more involved as a calculation involving the operands is required.

Examples found in repository ?

src/instruction.rs (line 171)

    pub fn encode(&self, offsets: &[Offset], bytes: &mut Vec<u8>) -> Result<(),Error> {
        // Push opcode
        bytes.push(self.opcode(&offsets)?);
        // Push operands (if applicable)
        match self {
            Instruction::PUSH(args) => {
                bytes.extend(args);
            }
            Instruction::PUSHL(idx) => {
                bytes.extend(offsets[*idx].to_bytes());
            }
            _ => {
                // All other instructions have no operands.
            }
        }
        //
        Ok(())
    }

source

pub fn decode(pc: usize, bytes: &[u8]) -> Instruction

Decode the next instruction in a given sequence of bytes. Observe that this never returns a PUSHL instruction. This is because it cannot determine whether a given operand will be used as a jump destination. A separate analysis is required to “lift” PUSH instructions to PUSHL instructions.

Examples found in repository ?

src/disassembler.rs (line 134)

    pub fn get_state(&self, loc: usize) -> T {
        // Determine enclosing block
        let bid = self.get_enclosing_block_id(loc);
        let blk = &self.blocks[bid];
        let mut ctx = self.contexts[bid].clone();
        let mut pc = blk.start;
        // Reconstruct state
        while pc < loc {
            // Decode instruction at the current position
            let insn = Instruction::decode(pc,&self.bytes);
            // Apply the transfer function!
            ctx = ctx.transfer(&insn);
            // Next instruction
            pc = pc + insn.length(&[]);
        }
        // Done
        ctx
    }

    /// Get the enclosing block for a given bytecode location.
    pub fn get_enclosing_block(&self, pc: usize) -> &Block {
        for i in 0..self.blocks.len() {
            if self.blocks[i].encloses(pc) {
                return &self.blocks[i];
            }
        }
        panic!("invalid bytecode address");
    }

    /// Determine whether a given block is currently considered
    /// reachable or not.  Observe the root block (`id=0`) is _always_
    /// considered reachable.
    pub fn is_block_reachable(&self, id: usize) -> bool {
        id == 0 || self.contexts[id].is_reachable()
    }

    /// Read a slice of bytes from the bytecode program, padding with
    /// zeros as necessary.
    pub fn read_bytes(&self, start: usize, end: usize) -> Vec<u8> {
        let n = self.bytes.len();

        if start >= n {
            vec![0; end-start]
        } else if end > n {
            // Determine lower potion
            let mut slice = self.bytes[start..n].to_vec();
            // Probably a more idiomatic way to do this?
            for _i in end .. n { slice.push(0); }
            //
            slice
        } else {
            // Easy case
            self.bytes[start..end].to_vec()
        }
    }

    /// Refine this disassembly to something (ideally) more precise
    /// use a fixed point dataflow analysis.  This destroys the
    /// original disassembly.
    pub fn refine<S>(self) -> Disassembly<'a,S>
    where S:AbstractState+From<T> {
        let mut contexts = Vec::new();
        // Should be able to do this with a map?
        for ctx in self.contexts {
            contexts.push(S::from(ctx));
        }
        // Done
        Disassembly{bytes: self.bytes, blocks: self.blocks, contexts}
    }

    /// Flattern the disassembly into a sequence of instructions.
    pub fn to_vec(&self) -> Vec<Instruction> {
        let mut insns = Vec::new();
        // Iterate blocks in order
        for i in 0..self.blocks.len() {
            let blk = &self.blocks[i];
            let ctx = &self.contexts[i];
            // Check for reachability
            if i == 0 || ctx.is_reachable() {
                // Disassemble block
                self.disassemble_into(blk,&mut insns);
            } else {
                // Not reachable, so must be data.
                let data = self.read_bytes(blk.start,blk.end);
                //
                insns.push(DATA(data));
            }
        }
        //
        insns
    }


    // ================================================================
    // Helpers
    // ================================================================

    /// Disassemble a given block into a sequence of instructions.
    fn disassemble_into(&self, blk: &Block, insns: &mut Vec<Instruction>) {
        let mut pc = blk.start;
        // Parse the block
        while pc < blk.end {
            // Decode instruction at the current position
            let insn = Instruction::decode(pc,&self.bytes);
            // Increment PC for next instruction
            pc = pc + insn.length(&[]);
            //
            insns.push(insn);
        }
    }

    /// Perform a linear scan splitting out the blocks.  This is an
    /// over approximation of the truth, as some blocks may turn out
    /// to be unreachable (e.g. they are data).
    fn scan_blocks(bytes: &[u8]) -> Vec<Block> {
        let mut blocks = Vec::new();
        // Current position in bytecodes
        let mut pc = 0;
        // Identifies start of current block.
        let mut start = 0;
        // Parse the block
        while pc < bytes.len() {
            // Decode instruction at the current position
            let insn = Instruction::decode(pc,&bytes);
            // Increment PC for next instruction
            pc = pc + insn.length(&[]);
            // Check whether terminating instruction
            match insn {
                JUMPDEST(_) => {
                    // Determine whether start of this block, or next
                    // block.
                    if (pc - 1) != start {
                        // Start of next block
                        blocks.push(Block::new(start,pc-1));
                        start = pc - 1;
                    }
                }
                INVALID|JUMP|RETURN|REVERT|STOP => {
                    blocks.push(Block::new(start,pc));
                    start = pc;
                }
                _ => {}
            }
        }
        // Append last block (if necessary)
        if start != pc {
            blocks.push(Block::new(start,pc));
        }
        // Done
        blocks
    }


    /// Determine the enclosing block number for a given bytecode
    /// address.
    fn get_enclosing_block_id(&self, pc: usize) -> usize {
        for i in 0..self.blocks.len() {
            if self.blocks[i].encloses(pc) {
                return i;
            }
        }
        panic!("invalid bytecode address");
    }
}

impl<'a,T> Disassembly<'a,T>
where T:AbstractState+fmt::Display {

    /// Apply flow analysis to refine the results of this disassembly.
    pub fn build(mut self) -> Self {
        let mut changed = true;
        //
        while changed {
            // Reset indicator
            changed = false;
            // Iterate blocks in order
            for i in 0..self.blocks.len() {
                // Sanity check whether block unreachable.
                if !self.is_block_reachable(i) { continue; }
                // Yes, is reachable so continue.
                let blk = &self.blocks[i];
                let mut ctx = self.contexts[i].clone();
                let mut pc = blk.start;
                // println!("BLOCK (start={}, end={}): {:?}", pc, blk.end, i);
                // println!("CONTEXT (pc={}): {}", pc, ctx);
                // Parse the block
                while pc < blk.end {
                    // Decode instruction at the current position
                    let insn = Instruction::decode(pc,&self.bytes);
                    // Check whether a branch is possible
                    if insn.can_branch() {
                        // Determine branch target
                        let target = ctx.peek(0).unwrap();
                        // Determine branch context
                        let branch_ctx = ctx.branch(target,&insn);
                        // Convert target into block ID.
                        let block_id = self.get_enclosing_block_id(target);
                        // println!("Branch: target={} (block {})",target,block_id);
                        // println!("Before merge (pc={}): {}", pc, self.contexts[block_id]);
                        // Merge in updated state
                        changed |= self.contexts[block_id].merge(branch_ctx);
                        // println!("After merge (pc={}): {}", pc, self.contexts[block_id]);
                    }
                    // Apply the transfer function!
                    // print!("{:#08x}: {}",pc,ctx);
                    ctx = ctx.transfer(&insn);
                    // println!(" ==>\t{:?}\t==> {}",insn,ctx);
                    // Next instruction
                    pc = pc + insn.length(&[]);
                }
                // Merge state into following block.
                if (i+1) < self.blocks.len() {
                    changed |= self.contexts[i+1].merge(ctx);
                }
            }
        }
        self
    }