use crate::{
analysis::x86::types::{
X86Condition, X86DecodedInstruction, X86EpilogueInfo, X86Instruction, X86Memory,
X86Operand, X86PrologueInfo, X86PrologueKind, X86Register,
},
Error, Result,
};
use iced_x86::{Decoder, DecoderOptions, Instruction, Mnemonic, OpKind, Register};
use rustc_hash::FxHashSet;
use std::collections::VecDeque;
pub fn x86_decode_all(
bytes: &[u8],
bitness: u32,
base_address: u64,
) -> Result<Vec<X86DecodedInstruction>> {
if bytes.is_empty() {
return Err(Error::X86Error("Empty input".to_string()));
}
if bitness != 32 && bitness != 64 {
return Err(Error::X86Error(format!(
"Invalid bitness {bitness}, must be 32 or 64"
)));
}
let mut decoder = Decoder::with_ip(bitness, bytes, base_address, DecoderOptions::NONE);
let mut instructions = Vec::new();
for instr in &mut decoder {
let offset = instr.ip() - base_address;
let length = instr.len();
if instr.is_invalid() {
return Err(Error::X86Error(format!(
"Invalid instruction at offset 0x{offset:x}"
)));
}
let converted = convert_instruction(&instr, base_address)?;
let is_ret = matches!(converted, X86Instruction::Ret);
instructions.push(X86DecodedInstruction {
offset,
length,
instruction: converted,
});
if is_ret {
break;
}
}
Ok(instructions)
}
#[derive(Debug)]
pub struct X86TraversalDecodeResult {
pub instructions: Vec<X86DecodedInstruction>,
pub unresolved_targets: Vec<u64>,
pub has_indirect_control_flow: bool,
}
pub fn x86_decode_traversal(
bytes: &[u8],
bitness: u32,
base_address: u64,
entry_offset: u64,
) -> Result<X86TraversalDecodeResult> {
if bytes.is_empty() {
return Err(Error::X86Error("Empty input".to_string()));
}
if bitness != 32 && bitness != 64 {
return Err(Error::X86Error(format!(
"Invalid bitness {bitness}, must be 32 or 64"
)));
}
let code_start = base_address;
let code_end = base_address + bytes.len() as u64;
let mut worklist: VecDeque<u64> = VecDeque::new();
let mut visited: FxHashSet<u64> = FxHashSet::default();
let mut instructions: Vec<X86DecodedInstruction> = Vec::new();
let mut unresolved_targets: Vec<u64> = Vec::new();
let mut has_indirect = false;
worklist.push_back(base_address + entry_offset);
visited.insert(base_address + entry_offset);
while let Some(addr) = worklist.pop_front() {
if addr < code_start || addr >= code_end {
continue;
}
#[allow(clippy::cast_possible_truncation)]
let offset_in_bytes = (addr - base_address) as usize;
let remaining_bytes = &bytes[offset_in_bytes..];
if remaining_bytes.is_empty() {
continue;
}
let mut decoder = Decoder::with_ip(bitness, remaining_bytes, addr, DecoderOptions::NONE);
if let Some(instr) = decoder.iter().next() {
if instr.is_invalid() {
continue;
}
let offset = addr - base_address;
let length = instr.len();
let overlaps = instructions.iter().any(|existing| {
let existing_start = existing.offset;
let existing_end = existing.offset + existing.length as u64;
let new_start = offset;
let new_end = offset + length as u64;
new_start < existing_end && new_end > existing_start
});
if overlaps {
continue;
}
let converted = match convert_instruction(&instr, base_address) {
Ok(i) => i,
Err(_) => X86Instruction::Unsupported {
offset,
mnemonic: format!("{:?}", instr.mnemonic()),
},
};
let next_addr = addr + length as u64;
match &converted {
X86Instruction::Ret => {
}
X86Instruction::Jmp { target } => {
if *target >= code_start && *target < code_end && visited.insert(*target) {
worklist.push_back(*target);
} else if *target < code_start || *target >= code_end {
unresolved_targets.push(*target);
}
}
X86Instruction::Jcc { target, .. } => {
if *target >= code_start && *target < code_end && visited.insert(*target) {
worklist.push_back(*target);
}
if next_addr < code_end && visited.insert(next_addr) {
worklist.push_back(next_addr);
}
}
X86Instruction::Call { target } => {
if next_addr < code_end && visited.insert(next_addr) {
worklist.push_back(next_addr);
}
if *target < code_start || *target >= code_end {
unresolved_targets.push(*target);
}
}
X86Instruction::Unsupported { .. } => {
if instr.mnemonic() == Mnemonic::Jmp || instr.mnemonic() == Mnemonic::Call {
has_indirect = true;
unresolved_targets.push(addr);
}
if next_addr < code_end && visited.insert(next_addr) {
worklist.push_back(next_addr);
}
}
_ => {
if next_addr < code_end && visited.insert(next_addr) {
worklist.push_back(next_addr);
}
}
}
instructions.push(X86DecodedInstruction {
offset,
length,
instruction: converted,
});
}
}
instructions.sort_by_key(|i| i.offset);
Ok(X86TraversalDecodeResult {
instructions,
unresolved_targets,
has_indirect_control_flow: has_indirect,
})
}
#[must_use]
pub fn x86_native_body_size(bytes: &[u8], is_64bit: bool) -> usize {
if bytes.is_empty() {
return 0;
}
let bitness = if is_64bit { 64 } else { 32 };
match x86_decode_traversal(bytes, bitness, 0, 0) {
Ok(result) => result
.instructions
.iter()
.map(|instr| instr.offset as usize + instr.length)
.max()
.unwrap_or(0),
Err(_) => 0,
}
}
pub fn x86_decode_single(
bytes: &[u8],
bitness: u32,
base_address: u64,
offset: u64,
) -> Result<X86DecodedInstruction> {
if bytes.is_empty() {
return Err(Error::X86Error("Empty input".to_string()));
}
if bitness != 32 && bitness != 64 {
return Err(Error::X86Error(format!(
"Invalid bitness {bitness}, must be 32 or 64"
)));
}
#[allow(clippy::cast_possible_truncation)]
let offset_in_bytes = offset as usize;
if offset_in_bytes >= bytes.len() {
return Err(Error::X86Error(format!(
"Invalid instruction at offset 0x{offset:x}"
)));
}
let remaining = &bytes[offset_in_bytes..];
let mut decoder = Decoder::with_ip(
bitness,
remaining,
base_address + offset,
DecoderOptions::NONE,
);
if let Some(instr) = decoder.iter().next() {
if instr.is_invalid() {
return Err(Error::X86Error(format!(
"Invalid instruction at offset 0x{offset:x}"
)));
}
let converted = convert_instruction(&instr, base_address)?;
Ok(X86DecodedInstruction {
offset,
length: instr.len(),
instruction: converted,
})
} else {
Err(Error::X86Error(format!(
"Invalid instruction at offset 0x{offset:x}"
)))
}
}
fn convert_instruction(instr: &Instruction, base_address: u64) -> Result<X86Instruction> {
let offset = instr.ip() - base_address;
match instr.mnemonic() {
Mnemonic::Mov => convert_mov(instr),
Mnemonic::Movzx => convert_movzx(instr),
Mnemonic::Movsx | Mnemonic::Movsxd => convert_movsx(instr),
Mnemonic::Lea => convert_lea(instr),
Mnemonic::Push => convert_push(instr),
Mnemonic::Pop => convert_pop(instr),
Mnemonic::Xchg => convert_xchg(instr),
Mnemonic::Add => convert_binary_op(instr, |dst, src| X86Instruction::Add { dst, src }),
Mnemonic::Sub => convert_binary_op(instr, |dst, src| X86Instruction::Sub { dst, src }),
Mnemonic::Adc => convert_binary_op(instr, |dst, src| X86Instruction::Adc { dst, src }),
Mnemonic::Sbb => convert_binary_op(instr, |dst, src| X86Instruction::Sbb { dst, src }),
Mnemonic::Imul => convert_imul(instr),
Mnemonic::Mul => convert_mul(instr),
Mnemonic::Div => Ok(X86Instruction::Div {
src: convert_operand(instr, 0)?,
}),
Mnemonic::Idiv => Ok(X86Instruction::Idiv {
src: convert_operand(instr, 0)?,
}),
Mnemonic::Neg => convert_unary_op(instr, |dst| X86Instruction::Neg { dst }),
Mnemonic::Inc => convert_unary_op(instr, |dst| X86Instruction::Inc { dst }),
Mnemonic::Dec => convert_unary_op(instr, |dst| X86Instruction::Dec { dst }),
Mnemonic::And => convert_binary_op(instr, |dst, src| X86Instruction::And { dst, src }),
Mnemonic::Or => convert_binary_op(instr, |dst, src| X86Instruction::Or { dst, src }),
Mnemonic::Xor => convert_binary_op(instr, |dst, src| X86Instruction::Xor { dst, src }),
Mnemonic::Not => convert_unary_op(instr, |dst| X86Instruction::Not { dst }),
Mnemonic::Shl | Mnemonic::Sal => {
convert_shift(instr, |dst, count| X86Instruction::Shl { dst, count })
}
Mnemonic::Shr => convert_shift(instr, |dst, count| X86Instruction::Shr { dst, count }),
Mnemonic::Sar => convert_shift(instr, |dst, count| X86Instruction::Sar { dst, count }),
Mnemonic::Rol => convert_shift(instr, |dst, count| X86Instruction::Rol { dst, count }),
Mnemonic::Ror => convert_shift(instr, |dst, count| X86Instruction::Ror { dst, count }),
Mnemonic::Bswap => Ok(X86Instruction::Bswap {
dst: convert_register(instr.op0_register())?,
}),
Mnemonic::Bsf => Ok(X86Instruction::Bsf {
dst: convert_register(instr.op0_register())?,
src: convert_operand(instr, 1)?,
}),
Mnemonic::Bsr => Ok(X86Instruction::Bsr {
dst: convert_register(instr.op0_register())?,
src: convert_operand(instr, 1)?,
}),
Mnemonic::Bt => Ok(X86Instruction::Bt {
src: convert_operand(instr, 0)?,
bit: convert_operand(instr, 1)?,
}),
Mnemonic::Bts | Mnemonic::Btr | Mnemonic::Btc => {
Ok(X86Instruction::Bt {
src: convert_operand(instr, 0)?,
bit: convert_operand(instr, 1)?,
})
}
Mnemonic::Xadd => convert_binary_op(instr, |dst, src| X86Instruction::Xadd { dst, src }),
Mnemonic::Cmove
| Mnemonic::Cmovne
| Mnemonic::Cmovl
| Mnemonic::Cmovge
| Mnemonic::Cmovle
| Mnemonic::Cmovg
| Mnemonic::Cmovb
| Mnemonic::Cmovae
| Mnemonic::Cmovbe
| Mnemonic::Cmova
| Mnemonic::Cmovs
| Mnemonic::Cmovns
| Mnemonic::Cmovo
| Mnemonic::Cmovno
| Mnemonic::Cmovp
| Mnemonic::Cmovnp => convert_cmovcc(instr),
Mnemonic::Sete
| Mnemonic::Setne
| Mnemonic::Setl
| Mnemonic::Setge
| Mnemonic::Setle
| Mnemonic::Setg
| Mnemonic::Setb
| Mnemonic::Setae
| Mnemonic::Setbe
| Mnemonic::Seta
| Mnemonic::Sets
| Mnemonic::Setns
| Mnemonic::Seto
| Mnemonic::Setno
| Mnemonic::Setp
| Mnemonic::Setnp => convert_setcc(instr),
Mnemonic::Cmp => convert_cmp(instr),
Mnemonic::Test => convert_test(instr),
Mnemonic::Jmp => convert_jmp(instr),
Mnemonic::Je
| Mnemonic::Jne
| Mnemonic::Jl
| Mnemonic::Jge
| Mnemonic::Jle
| Mnemonic::Jg
| Mnemonic::Jb
| Mnemonic::Jae
| Mnemonic::Jbe
| Mnemonic::Ja
| Mnemonic::Js
| Mnemonic::Jns
| Mnemonic::Jo
| Mnemonic::Jno
| Mnemonic::Jp
| Mnemonic::Jnp => convert_jcc(instr),
Mnemonic::Call => convert_call(instr),
Mnemonic::Ret | Mnemonic::Retf => Ok(X86Instruction::Ret),
Mnemonic::Nop | Mnemonic::Fnop => Ok(X86Instruction::Nop),
Mnemonic::Cdq | Mnemonic::Cqo => Ok(X86Instruction::Cdq),
Mnemonic::Cwde | Mnemonic::Cdqe => Ok(X86Instruction::Cwde),
Mnemonic::Cbw => Ok(X86Instruction::Cwde), Mnemonic::Cwd => Ok(X86Instruction::Cdq),
Mnemonic::Cld | Mnemonic::Std => Ok(X86Instruction::Nop),
Mnemonic::Wait => Ok(X86Instruction::Nop),
_ => Err(Error::X86Error(format!(
"Unsupported instruction '{:?}' at offset 0x{offset:x}",
instr.mnemonic()
))),
}
}
fn convert_mov(instr: &Instruction) -> Result<X86Instruction> {
let dst = convert_operand(instr, 0)?;
let src = convert_operand(instr, 1)?;
Ok(X86Instruction::Mov { dst, src })
}
fn convert_movzx(instr: &Instruction) -> Result<X86Instruction> {
let dst = convert_operand(instr, 0)?;
let src = convert_operand(instr, 1)?;
Ok(X86Instruction::Movzx { dst, src })
}
fn convert_movsx(instr: &Instruction) -> Result<X86Instruction> {
let dst = convert_operand(instr, 0)?;
let src = convert_operand(instr, 1)?;
Ok(X86Instruction::Movsx { dst, src })
}
fn convert_lea(instr: &Instruction) -> Result<X86Instruction> {
let dst = convert_register(instr.op0_register())?;
let src = convert_memory_operand(instr, 1)?;
Ok(X86Instruction::Lea { dst, src })
}
fn convert_push(instr: &Instruction) -> Result<X86Instruction> {
let src = convert_operand(instr, 0)?;
Ok(X86Instruction::Push { src })
}
fn convert_pop(instr: &Instruction) -> Result<X86Instruction> {
let dst = convert_register(instr.op0_register())?;
Ok(X86Instruction::Pop { dst })
}
fn convert_xchg(instr: &Instruction) -> Result<X86Instruction> {
let dst = convert_operand(instr, 0)?;
let src = convert_operand(instr, 1)?;
Ok(X86Instruction::Xchg { dst, src })
}
fn convert_binary_op<F>(instr: &Instruction, build: F) -> Result<X86Instruction>
where
F: FnOnce(X86Operand, X86Operand) -> X86Instruction,
{
let dst = convert_operand(instr, 0)?;
let src = convert_operand(instr, 1)?;
Ok(build(dst, src))
}
fn convert_unary_op<F>(instr: &Instruction, build: F) -> Result<X86Instruction>
where
F: FnOnce(X86Operand) -> X86Instruction,
{
let dst = convert_operand(instr, 0)?;
Ok(build(dst))
}
fn convert_shift<F>(instr: &Instruction, build: F) -> Result<X86Instruction>
where
F: FnOnce(X86Operand, X86Operand) -> X86Instruction,
{
let dst = convert_operand(instr, 0)?;
let count = if instr.op_count() > 1 {
convert_operand(instr, 1)?
} else {
X86Operand::Immediate(1)
};
Ok(build(dst, count))
}
fn convert_imul(instr: &Instruction) -> Result<X86Instruction> {
let op_count = instr.op_count();
if op_count == 1 {
let src = convert_operand(instr, 0)?;
Ok(X86Instruction::Imul {
dst: X86Register::Eax, src,
src2: None,
})
} else if op_count == 2 {
let dst = convert_register(instr.op0_register())?;
let src = convert_operand(instr, 1)?;
Ok(X86Instruction::Imul {
dst,
src,
src2: None,
})
} else {
let dst = convert_register(instr.op0_register())?;
let src = convert_operand(instr, 1)?;
let src2 = convert_operand(instr, 2)?;
Ok(X86Instruction::Imul {
dst,
src,
src2: Some(src2),
})
}
}
fn convert_mul(instr: &Instruction) -> Result<X86Instruction> {
let src = convert_operand(instr, 0)?;
Ok(X86Instruction::Mul { src })
}
fn convert_cmp(instr: &Instruction) -> Result<X86Instruction> {
let left = convert_operand(instr, 0)?;
let right = convert_operand(instr, 1)?;
Ok(X86Instruction::Cmp { left, right })
}
fn convert_test(instr: &Instruction) -> Result<X86Instruction> {
let left = convert_operand(instr, 0)?;
let right = convert_operand(instr, 1)?;
Ok(X86Instruction::Test { left, right })
}
fn convert_jmp(instr: &Instruction) -> Result<X86Instruction> {
let target = get_branch_target(instr)?;
Ok(X86Instruction::Jmp { target })
}
fn convert_jcc(instr: &Instruction) -> Result<X86Instruction> {
let condition = mnemonic_to_condition(instr.mnemonic())?;
let target = get_branch_target(instr)?;
Ok(X86Instruction::Jcc { condition, target })
}
fn convert_call(instr: &Instruction) -> Result<X86Instruction> {
let target = get_branch_target(instr)?;
Ok(X86Instruction::Call { target })
}
fn convert_cmovcc(instr: &Instruction) -> Result<X86Instruction> {
let condition = cmovcc_to_condition(instr.mnemonic())?;
let dst = convert_register(instr.op0_register())?;
let src = convert_operand(instr, 1)?;
Ok(X86Instruction::Cmovcc {
condition,
dst,
src,
})
}
fn convert_setcc(instr: &Instruction) -> Result<X86Instruction> {
let condition = setcc_to_condition(instr.mnemonic())?;
let dst = convert_operand(instr, 0)?;
Ok(X86Instruction::Setcc { condition, dst })
}
fn cmovcc_to_condition(mnemonic: Mnemonic) -> Result<X86Condition> {
match mnemonic {
Mnemonic::Cmove => Ok(X86Condition::E),
Mnemonic::Cmovne => Ok(X86Condition::Ne),
Mnemonic::Cmovl => Ok(X86Condition::L),
Mnemonic::Cmovge => Ok(X86Condition::Ge),
Mnemonic::Cmovle => Ok(X86Condition::Le),
Mnemonic::Cmovg => Ok(X86Condition::G),
Mnemonic::Cmovb => Ok(X86Condition::B),
Mnemonic::Cmovae => Ok(X86Condition::Ae),
Mnemonic::Cmovbe => Ok(X86Condition::Be),
Mnemonic::Cmova => Ok(X86Condition::A),
Mnemonic::Cmovs => Ok(X86Condition::S),
Mnemonic::Cmovns => Ok(X86Condition::Ns),
Mnemonic::Cmovo => Ok(X86Condition::O),
Mnemonic::Cmovno => Ok(X86Condition::No),
Mnemonic::Cmovp => Ok(X86Condition::P),
Mnemonic::Cmovnp => Ok(X86Condition::Np),
_ => Err(Error::SsaError(format!(
"Unknown CMOVcc mnemonic: {mnemonic:?}"
))),
}
}
fn setcc_to_condition(mnemonic: Mnemonic) -> Result<X86Condition> {
match mnemonic {
Mnemonic::Sete => Ok(X86Condition::E),
Mnemonic::Setne => Ok(X86Condition::Ne),
Mnemonic::Setl => Ok(X86Condition::L),
Mnemonic::Setge => Ok(X86Condition::Ge),
Mnemonic::Setle => Ok(X86Condition::Le),
Mnemonic::Setg => Ok(X86Condition::G),
Mnemonic::Setb => Ok(X86Condition::B),
Mnemonic::Setae => Ok(X86Condition::Ae),
Mnemonic::Setbe => Ok(X86Condition::Be),
Mnemonic::Seta => Ok(X86Condition::A),
Mnemonic::Sets => Ok(X86Condition::S),
Mnemonic::Setns => Ok(X86Condition::Ns),
Mnemonic::Seto => Ok(X86Condition::O),
Mnemonic::Setno => Ok(X86Condition::No),
Mnemonic::Setp => Ok(X86Condition::P),
Mnemonic::Setnp => Ok(X86Condition::Np),
_ => Err(Error::SsaError(format!(
"Unknown SETcc mnemonic: {mnemonic:?}"
))),
}
}
fn get_branch_target(instr: &Instruction) -> Result<u64> {
match instr.op0_kind() {
OpKind::NearBranch16 => Ok(u64::from(instr.near_branch16())),
OpKind::NearBranch32 => Ok(u64::from(instr.near_branch32())),
OpKind::NearBranch64 => Ok(instr.near_branch64()),
OpKind::FarBranch16 => Ok(u64::from(instr.far_branch16())),
OpKind::FarBranch32 => Ok(u64::from(instr.far_branch32())),
_ => {
Err(Error::SsaError(format!(
"Indirect branch at 0x{:x}",
instr.ip()
)))
}
}
}
fn mnemonic_to_condition(mnemonic: Mnemonic) -> Result<X86Condition> {
match mnemonic {
Mnemonic::Je => Ok(X86Condition::E),
Mnemonic::Jne => Ok(X86Condition::Ne),
Mnemonic::Jl => Ok(X86Condition::L),
Mnemonic::Jge => Ok(X86Condition::Ge),
Mnemonic::Jle => Ok(X86Condition::Le),
Mnemonic::Jg => Ok(X86Condition::G),
Mnemonic::Jb => Ok(X86Condition::B),
Mnemonic::Jae => Ok(X86Condition::Ae),
Mnemonic::Jbe => Ok(X86Condition::Be),
Mnemonic::Ja => Ok(X86Condition::A),
Mnemonic::Js => Ok(X86Condition::S),
Mnemonic::Jns => Ok(X86Condition::Ns),
Mnemonic::Jo => Ok(X86Condition::O),
Mnemonic::Jno => Ok(X86Condition::No),
Mnemonic::Jp => Ok(X86Condition::P),
Mnemonic::Jnp => Ok(X86Condition::Np),
_ => Err(Error::SsaError(format!(
"Unknown condition mnemonic: {mnemonic:?}"
))),
}
}
fn convert_operand(instr: &Instruction, index: u32) -> Result<X86Operand> {
let op_kind = instr.op_kind(index);
match op_kind {
OpKind::Register => {
let reg = match index {
0 => instr.op0_register(),
1 => instr.op1_register(),
2 => instr.op2_register(),
3 => instr.op3_register(),
4 => instr.op4_register(),
_ => return Err(Error::SsaError(format!("Invalid operand index {index}"))),
};
Ok(X86Operand::Register(convert_register(reg)?))
}
OpKind::Immediate8 => Ok(X86Operand::Immediate(i64::from(
instr.immediate8().cast_signed(),
))),
OpKind::Immediate16 => Ok(X86Operand::Immediate(i64::from(
instr.immediate16().cast_signed(),
))),
OpKind::Immediate32 => Ok(X86Operand::Immediate(i64::from(
instr.immediate32().cast_signed(),
))),
OpKind::Immediate64 => Ok(X86Operand::Immediate(instr.immediate64().cast_signed())),
OpKind::Immediate8to16 => Ok(X86Operand::Immediate(i64::from(instr.immediate8to16()))),
OpKind::Immediate8to32 => Ok(X86Operand::Immediate(i64::from(instr.immediate8to32()))),
OpKind::Immediate8to64 => Ok(X86Operand::Immediate(instr.immediate8to64())),
OpKind::Immediate32to64 => Ok(X86Operand::Immediate(instr.immediate32to64())),
OpKind::Memory => {
let mem = convert_memory_operand(instr, index)?;
Ok(X86Operand::Memory(mem))
}
_ => Err(Error::SsaError(format!(
"Unsupported operand kind: {op_kind:?}"
))),
}
}
fn convert_memory_operand(instr: &Instruction, _index: u32) -> Result<X86Memory> {
let base = if instr.memory_base() == Register::None {
None
} else {
Some(convert_register(instr.memory_base())?)
};
let index = if instr.memory_index() == Register::None {
None
} else {
Some(convert_register(instr.memory_index())?)
};
#[allow(clippy::cast_possible_truncation)]
let scale = instr.memory_index_scale() as u8;
let displacement = instr.memory_displacement64().cast_signed();
#[allow(clippy::cast_possible_truncation)]
let size = instr.memory_size().size() as u8;
Ok(X86Memory {
base,
index,
scale,
displacement,
size,
})
}
fn convert_register(reg: Register) -> Result<X86Register> {
match reg {
Register::EAX => Ok(X86Register::Eax),
Register::ECX => Ok(X86Register::Ecx),
Register::EDX => Ok(X86Register::Edx),
Register::EBX => Ok(X86Register::Ebx),
Register::ESP => Ok(X86Register::Esp),
Register::EBP => Ok(X86Register::Ebp),
Register::ESI => Ok(X86Register::Esi),
Register::EDI => Ok(X86Register::Edi),
Register::RAX => Ok(X86Register::Rax),
Register::RCX => Ok(X86Register::Rcx),
Register::RDX => Ok(X86Register::Rdx),
Register::RBX => Ok(X86Register::Rbx),
Register::RSP => Ok(X86Register::Rsp),
Register::RBP => Ok(X86Register::Rbp),
Register::RSI => Ok(X86Register::Rsi),
Register::RDI => Ok(X86Register::Rdi),
Register::R8 => Ok(X86Register::R8),
Register::R9 => Ok(X86Register::R9),
Register::R10 => Ok(X86Register::R10),
Register::R11 => Ok(X86Register::R11),
Register::R12 => Ok(X86Register::R12),
Register::R13 => Ok(X86Register::R13),
Register::R14 => Ok(X86Register::R14),
Register::R15 => Ok(X86Register::R15),
Register::AL => Ok(X86Register::Al),
Register::CL => Ok(X86Register::Cl),
Register::DL => Ok(X86Register::Dl),
Register::BL => Ok(X86Register::Bl),
Register::AH => Ok(X86Register::Ah),
Register::CH => Ok(X86Register::Ch),
Register::DH => Ok(X86Register::Dh),
Register::BH => Ok(X86Register::Bh),
Register::AX => Ok(X86Register::Ax),
Register::CX => Ok(X86Register::Cx),
Register::DX => Ok(X86Register::Dx),
Register::BX => Ok(X86Register::Bx),
Register::SP => Ok(X86Register::Sp),
Register::BP => Ok(X86Register::Bp),
Register::SI => Ok(X86Register::Si),
Register::DI => Ok(X86Register::Di),
Register::ES => Ok(X86Register::Es),
Register::CS => Ok(X86Register::Cs),
Register::SS => Ok(X86Register::Ss),
Register::DS => Ok(X86Register::Ds),
Register::FS => Ok(X86Register::Fs),
Register::GS => Ok(X86Register::Gs),
_ => Err(Error::SsaError(format!("Unsupported register: {reg:?}"))),
}
}
const PATTERNS_X86: [&[u8]; 28] = [
&[0x84, 0xEC], &[0x51, 0x53, 0x8B, 0x1D], &[0x53, 0x8B, 0x54], &[0x53, 0x8B, 0xDC], &[0x53, 0x8B, 0xD9, 0x55], &[0x55, 0x89, 0xE5], &[0x55, 0x31, 0xD2], &[0x55, 0x57, 0x89], &[0x55, 0x57, 0x56, 0x53], &[0x55, 0x8B, 0xEC], &[0x55, 0x8B, 0x6C], &[0x55, 0x8B, 0x44, 0x24], &[0x55, 0x8B, 0x54, 0x24], &[0x55, 0x8B, 0x4C, 0x24], &[0x55, 0x8B, 0x89, 0xE5], &[0x56, 0x33, 0xC0], &[0x56, 0x8B, 0xF1], &[0x56, 0x53, 0x89], &[0x56, 0x8B, 0x44, 0x24], &[0x56, 0x8B, 0x4C, 0x24], &[0x56, 0x8B, 0x54, 0x24], &[0x56, 0x53, 0x83, 0xEC], &[0x6A, 0x0C, 0x68], &[0x8B, 0x4C, 0x24], &[0x8B, 0x44, 0x24], &[0x8B, 0x54, 0x24], &[0x8B, 0xFF, 0x56], &[0x8B, 0xFF, 0x55], ];
const PATTERNS_X64: [&[u8]; 22] = [
&[0x48, 0x81, 0xEC], &[0x48, 0x83, 0xEC], &[0x48, 0x89, 0x5C], &[0x40, 0x8B, 0xC4], &[0x55, 0x53, 0x48], &[0x64, 0x48, 0x8D], &[0x55, 0x48, 0x8B], &[0x53, 0x48, 0x89, 0xFB], &[0x53, 0x48, 0x81, 0xBF], &[0x48, 0x89, 0x5C, 0x24], &[0x48, 0x89, 0x4C, 0x24], &[0x55, 0x48, 0x89, 0xE5], &[0x55, 0x48, 0x81, 0xEC], &[0x55, 0x48, 0x83, 0xEC], &[0x55, 0x53, 0x48, 0x89], &[0x40, 0x54, 0x48, 0x83, 0xEC], &[0x40, 0x55, 0x48, 0x83, 0xEC], &[0x40, 0x56, 0x48, 0x84, 0xEC], &[0x48, 0x8B, 0xC4, 0x48, 0xEC], &[0x40, 0x53, 0x57, 0x48, 0x83, 0xEC], &[0x40, 0x53, 0x56, 0x57, 0x48, 0x83, 0xEC], &[0x40, 0x53, 0x55, 0x56, 0x57, 0x48, 0x83, 0xEC], ];
#[must_use]
pub fn x86_detect_prologue(bytes: &[u8], bitness: u32) -> X86PrologueInfo {
const DYNCIPHER_PROLOGUE: [u8; 20] = [
0x89, 0xe0, 0x53, 0x57, 0x56, 0x29, 0xe0, 0x83, 0xf8, 0x18, 0x74, 0x07, 0x8b, 0x44, 0x24,
0x10, 0x50, 0xeb, 0x01, 0x51,
];
if bytes.is_empty() {
return X86PrologueInfo {
kind: X86PrologueKind::None,
size: 0,
arg_count: 0,
};
}
if bytes.len() >= 20 && bytes[..20] == DYNCIPHER_PROLOGUE {
return X86PrologueInfo {
kind: X86PrologueKind::DynCipher,
size: 20,
arg_count: 1,
};
}
if bitness == 32 && bytes.len() >= 3 && bytes[0] == 0x55 && bytes[1] == 0x8B && bytes[2] == 0xEC
{
return X86PrologueInfo {
kind: X86PrologueKind::Standard32,
size: 3,
arg_count: 0,
};
}
if bitness == 32 && bytes.len() >= 3 && bytes[0] == 0x55 && bytes[1] == 0x89 && bytes[2] == 0xE5
{
return X86PrologueInfo {
kind: X86PrologueKind::Standard32,
size: 3,
arg_count: 0,
};
}
if bitness == 64
&& bytes.len() >= 4
&& bytes[0] == 0x55
&& bytes[1] == 0x48
&& bytes[2] == 0x89
&& bytes[3] == 0xE5
{
return X86PrologueInfo {
kind: X86PrologueKind::Standard64,
size: 4,
arg_count: 0,
};
}
let patterns: &[&[u8]] = if bitness == 64 {
&PATTERNS_X64
} else {
&PATTERNS_X86
};
for pattern in patterns {
if bytes.len() >= pattern.len() && bytes[..pattern.len()] == **pattern {
return X86PrologueInfo {
kind: X86PrologueKind::StackFrame {
is_64bit: bitness == 64,
},
size: pattern.len(),
arg_count: 0,
};
}
}
X86PrologueInfo {
kind: X86PrologueKind::None,
size: 0,
arg_count: 0,
}
}
#[must_use]
pub fn x86_detect_epilogue(instructions: &[X86DecodedInstruction]) -> Option<X86EpilogueInfo> {
if instructions.len() < 4 {
return None;
}
let len = instructions.len();
let pop_esi = matches!(
&instructions[len - 4].instruction,
X86Instruction::Pop { dst } if *dst == X86Register::Esi
);
let pop_edi = matches!(
&instructions[len - 3].instruction,
X86Instruction::Pop { dst } if *dst == X86Register::Edi
);
let pop_ebx = matches!(
&instructions[len - 2].instruction,
X86Instruction::Pop { dst } if *dst == X86Register::Ebx
);
let ret = matches!(instructions[len - 1].instruction, X86Instruction::Ret);
if pop_esi && pop_edi && pop_ebx && ret {
Some(X86EpilogueInfo {
offset: instructions[len - 4].offset,
size: 4, })
} else {
None
}
}
#[cfg(test)]
mod tests {
use crate::analysis::x86::{
decoder::{x86_decode_all, x86_detect_prologue},
types::{X86Condition, X86Instruction, X86Operand, X86PrologueKind, X86Register},
};
#[test]
fn test_decode_simple_mov() {
let bytes = [0xb8, 0x34, 0x12, 0x00, 0x00, 0xc3];
let result = x86_decode_all(&bytes, 32, 0).unwrap();
assert_eq!(result.len(), 2);
match &result[0].instruction {
X86Instruction::Mov { dst, src } => {
assert_eq!(dst.as_register(), Some(X86Register::Eax));
assert_eq!(src.as_immediate(), Some(0x1234));
}
_ => panic!("Expected Mov instruction"),
}
assert!(matches!(result[1].instruction, X86Instruction::Ret));
}
#[test]
fn test_decode_add_reg_imm() {
let bytes = [0x83, 0xc0, 0x05, 0xc3];
let result = x86_decode_all(&bytes, 32, 0).unwrap();
assert_eq!(result.len(), 2);
match &result[0].instruction {
X86Instruction::Add { dst, src } => {
assert_eq!(dst.as_register(), Some(X86Register::Eax));
assert_eq!(src.as_immediate(), Some(5));
}
_ => panic!("Expected Add instruction"),
}
}
#[test]
fn test_decode_xor_reg_reg() {
let bytes = [0x31, 0xc8, 0xc3];
let result = x86_decode_all(&bytes, 32, 0).unwrap();
assert_eq!(result.len(), 2);
match &result[0].instruction {
X86Instruction::Xor { dst, src } => {
assert_eq!(dst.as_register(), Some(X86Register::Eax));
assert_eq!(src.as_register(), Some(X86Register::Ecx));
}
_ => panic!("Expected Xor instruction"),
}
}
#[test]
fn test_decode_conditional_jump() {
let bytes = [
0x83, 0xf8, 0x0a, 0x74, 0x03, 0x83, 0xc0, 0x01, 0xc3, ];
let result = x86_decode_all(&bytes, 32, 0).unwrap();
assert_eq!(result.len(), 4);
assert!(matches!(result[0].instruction, X86Instruction::Cmp { .. }));
match &result[1].instruction {
X86Instruction::Jcc { condition, target } => {
assert_eq!(*condition, X86Condition::E);
assert_eq!(*target, 8); }
_ => panic!("Expected Jcc instruction"),
}
}
#[test]
fn test_decode_memory_operand() {
let bytes = [0x8b, 0x44, 0x24, 0x10, 0xc3];
let result = x86_decode_all(&bytes, 32, 0).unwrap();
assert_eq!(result.len(), 2);
match &result[0].instruction {
X86Instruction::Mov { dst, src } => {
assert_eq!(dst.as_register(), Some(X86Register::Eax));
match src {
X86Operand::Memory(mem) => {
assert_eq!(mem.base, Some(X86Register::Esp));
assert_eq!(mem.displacement, 16);
}
_ => panic!("Expected memory operand"),
}
}
_ => panic!("Expected Mov instruction"),
}
}
#[test]
fn test_detect_dyncipher_prologue() {
let prologue = [
0x89, 0xe0, 0x53, 0x57, 0x56, 0x29, 0xe0, 0x83, 0xf8, 0x18, 0x74, 0x07, 0x8b, 0x44, 0x24, 0x10, 0x50, 0xeb, 0x01, 0x51, 0xc3, ];
let info = x86_detect_prologue(&prologue, 32);
assert_eq!(info.kind, X86PrologueKind::DynCipher);
assert_eq!(info.size, 20);
assert_eq!(info.arg_count, 1);
}
#[test]
fn test_detect_standard_32bit_prologue() {
let bytes = [0x55, 0x89, 0xe5, 0xc3]; let info = x86_detect_prologue(&bytes, 32);
assert_eq!(info.kind, X86PrologueKind::Standard32);
assert_eq!(info.size, 3);
}
#[test]
fn test_decode_64bit() {
let bytes = [
0x48, 0xb8, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00, 0x00, 0x00, 0xc3,
];
let result = x86_decode_all(&bytes, 64, 0).unwrap();
assert_eq!(result.len(), 2);
match &result[0].instruction {
X86Instruction::Mov { dst, src } => {
assert_eq!(dst.as_register(), Some(X86Register::Rax));
assert_eq!(src.as_immediate(), Some(0x123456789));
}
_ => panic!("Expected Mov instruction"),
}
}
}