use crate::{BinaryInfo, Result};
use iced_x86::{Formatter, IntelFormatter};
pub mod aarch64_ops;
#[must_use]
pub fn capstone_compat_formatter() -> IntelFormatter {
let mut fmt = IntelFormatter::new();
let opts = fmt.options_mut();
opts.set_hex_prefix("0x");
opts.set_hex_suffix("");
opts.set_uppercase_hex(false);
opts.set_small_hex_numbers_in_decimal(false);
opts.set_add_leading_zero_to_hex_numbers(false);
opts.set_space_after_operand_separator(true);
opts.set_space_between_memory_add_operators(true);
opts.set_space_between_memory_mul_operators(false);
opts.set_uppercase_mnemonics(false);
opts.set_uppercase_registers(false);
opts.set_uppercase_keywords(false);
opts.set_uppercase_decorators(false);
opts.set_uppercase_prefixes(false);
opts.set_memory_size_options(iced_x86::MemorySizeOptions::Always);
fmt
}
pub trait Decoder: Send + Sync {
fn decode_at(&self, code: &[u8], offset: usize, address: u64) -> Option<(DecodedInsn, usize)>;
fn fixed_instruction_size(&self) -> Option<usize>;
}
#[derive(Debug, Clone, Copy)]
pub struct X86Decoder {
pub bitness: u32,
}
impl X86Decoder {
#[must_use]
pub fn new(bitness: u32) -> Self {
Self { bitness }
}
}
impl Decoder for X86Decoder {
fn decode_at(&self, code: &[u8], offset: usize, address: u64) -> Option<(DecodedInsn, usize)> {
let buf = code.get(offset..)?;
if buf.is_empty() {
return None;
}
let mut dec =
iced_x86::Decoder::with_ip(self.bitness, buf, address, iced_x86::DecoderOptions::NONE);
if !dec.can_decode() {
return None;
}
let pos_before = dec.position();
let insn = dec.decode();
if insn.is_invalid() {
return None;
}
let len = dec.position() - pos_before;
Some((
DecodedInsn::X86(IcedInsn {
offset: insn.ip(),
length: len as u32,
iced: insn,
}),
len,
))
}
fn fixed_instruction_size(&self) -> Option<usize> {
None
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct Aarch64Decoder;
impl Aarch64Decoder {
#[must_use]
pub fn new() -> Self {
Self
}
}
impl Decoder for Aarch64Decoder {
fn decode_at(&self, code: &[u8], offset: usize, address: u64) -> Option<(DecodedInsn, usize)> {
let slice = code.get(offset..offset.checked_add(4)?)?;
let opcode = u32::from_le_bytes([slice[0], slice[1], slice[2], slice[3]]);
let decoded = disarm64::decoder::decode(opcode)?;
Some((
DecodedInsn::Aarch64(ArmInsn {
offset: address,
opcode,
decoded,
}),
4,
))
}
fn fixed_instruction_size(&self) -> Option<usize> {
Some(4)
}
}
#[derive(Debug, Clone, Copy)]
pub struct IcedInsn {
pub offset: u64,
pub length: u32,
pub iced: iced_x86::Instruction,
}
#[derive(Debug, Clone, Copy)]
pub struct ArmInsn {
pub offset: u64,
pub opcode: u32,
pub decoded: disarm64::decoder::Opcode,
}
#[derive(Debug, Clone, Copy)]
pub enum DecodedInsn {
X86(IcedInsn),
Aarch64(ArmInsn),
}
impl DecodedInsn {
#[inline]
#[must_use]
pub fn offset(&self) -> u64 {
match self {
DecodedInsn::X86(i) => i.offset,
DecodedInsn::Aarch64(i) => i.offset,
}
}
#[inline]
#[must_use]
pub fn length(&self) -> usize {
match self {
DecodedInsn::X86(i) => i.length as usize,
DecodedInsn::Aarch64(_) => 4,
}
}
pub fn bytes_in<'b>(&self, binary_info: &'b BinaryInfo<'_>) -> Result<&'b [u8]> {
binary_info.bytes_at(self.offset(), self.length() as u32)
}
#[inline]
#[must_use]
pub fn mnemonic_enum_x86(&self) -> Option<iced_x86::Mnemonic> {
match self {
DecodedInsn::X86(i) => Some(i.iced.mnemonic()),
DecodedInsn::Aarch64(_) => None,
}
}
#[must_use]
pub fn mnemonic_aarch64(&self) -> Option<String> {
match self {
DecodedInsn::X86(_) => None,
DecodedInsn::Aarch64(a) => Some(aarch64_mnemonic_str(&a.decoded)),
}
}
#[inline]
#[must_use]
pub fn op_count(&self) -> u32 {
match self {
DecodedInsn::X86(i) => i.iced.op_count(),
DecodedInsn::Aarch64(_) => 0,
}
}
#[inline]
#[must_use]
pub fn op_kind_x86(&self, i: u32) -> Option<iced_x86::OpKind> {
match self {
DecodedInsn::X86(x) => Some(x.iced.op_kind(i)),
DecodedInsn::Aarch64(_) => None,
}
}
#[inline]
#[must_use]
pub fn op_register_x86(&self, i: u32) -> Option<iced_x86::Register> {
match self {
DecodedInsn::X86(x) => Some(x.iced.op_register(i)),
DecodedInsn::Aarch64(_) => None,
}
}
#[inline]
#[must_use]
pub fn memory_base_x86(&self) -> Option<iced_x86::Register> {
match self {
DecodedInsn::X86(x) => Some(x.iced.memory_base()),
DecodedInsn::Aarch64(_) => None,
}
}
#[inline]
#[must_use]
pub fn memory_index_x86(&self) -> Option<iced_x86::Register> {
match self {
DecodedInsn::X86(x) => Some(x.iced.memory_index()),
DecodedInsn::Aarch64(_) => None,
}
}
#[inline]
#[must_use]
pub fn memory_segment_x86(&self) -> Option<iced_x86::Register> {
match self {
DecodedInsn::X86(x) => Some(x.iced.memory_segment()),
DecodedInsn::Aarch64(_) => None,
}
}
#[inline]
#[must_use]
pub fn memory_displacement64_x86(&self) -> Option<u64> {
match self {
DecodedInsn::X86(x) => Some(x.iced.memory_displacement64()),
DecodedInsn::Aarch64(_) => None,
}
}
#[inline]
#[must_use]
pub fn near_branch_target_x86(&self) -> Option<u64> {
match self {
DecodedInsn::X86(x) => Some(x.iced.near_branch_target()),
DecodedInsn::Aarch64(_) => None,
}
}
#[inline]
#[must_use]
pub fn flow_control_x86(&self) -> Option<iced_x86::FlowControl> {
match self {
DecodedInsn::X86(x) => Some(x.iced.flow_control()),
DecodedInsn::Aarch64(_) => None,
}
}
#[inline]
#[must_use]
pub fn code_x86(&self) -> Option<iced_x86::Code> {
match self {
DecodedInsn::X86(x) => Some(x.iced.code()),
DecodedInsn::Aarch64(_) => None,
}
}
#[must_use]
pub fn is_call(&self) -> bool {
match self {
DecodedInsn::X86(x) => matches!(
x.iced.flow_control(),
iced_x86::FlowControl::Call | iced_x86::FlowControl::IndirectCall
),
DecodedInsn::Aarch64(a) => matches!(
a.decoded.mnemonic,
disarm64::decoder::Mnemonic::bl | disarm64::decoder::Mnemonic::blr
),
}
}
#[must_use]
pub fn is_jump(&self) -> bool {
match self {
DecodedInsn::X86(x) => matches!(
x.iced.flow_control(),
iced_x86::FlowControl::UnconditionalBranch | iced_x86::FlowControl::IndirectBranch
),
DecodedInsn::Aarch64(a) => matches!(
a.decoded.mnemonic,
disarm64::decoder::Mnemonic::b | disarm64::decoder::Mnemonic::br
),
}
}
#[must_use]
pub fn is_return(&self) -> bool {
match self {
DecodedInsn::X86(x) => {
matches!(x.iced.flow_control(), iced_x86::FlowControl::Return)
}
DecodedInsn::Aarch64(a) => {
matches!(a.decoded.mnemonic, disarm64::decoder::Mnemonic::ret)
}
}
}
#[must_use]
pub fn is_branch(&self) -> bool {
match self {
DecodedInsn::X86(x) => !matches!(
x.iced.flow_control(),
iced_x86::FlowControl::Next | iced_x86::FlowControl::Exception
),
DecodedInsn::Aarch64(a) => is_aarch64_branch_mnemonic(a.decoded.mnemonic),
}
}
#[must_use]
pub fn format_mnemonic(&self) -> String {
match self {
DecodedInsn::X86(x) => {
use iced_x86::Formatter;
let mut fmt = capstone_compat_formatter();
let mut out = String::new();
fmt.format_mnemonic(&x.iced, &mut out);
out
}
DecodedInsn::Aarch64(a) => aarch64_mnemonic_str(&a.decoded),
}
}
#[must_use]
pub fn format_operands(&self) -> Option<String> {
match self {
DecodedInsn::X86(x) => {
if x.iced.op_count() == 0 {
return None;
}
use iced_x86::Formatter;
let mut fmt = capstone_compat_formatter();
let mut out = String::new();
fmt.format_all_operands(&x.iced, &mut out);
Some(out)
}
DecodedInsn::Aarch64(a) => Some(format!("{:?}", a.decoded.operation)),
}
}
#[inline]
#[must_use]
pub fn as_iced(&self) -> Option<&iced_x86::Instruction> {
match self {
DecodedInsn::X86(x) => Some(&x.iced),
DecodedInsn::Aarch64(_) => None,
}
}
}
fn aarch64_mnemonic_str(op: &disarm64::decoder::Opcode) -> String {
let dbg = format!("{:?}", op.mnemonic);
let stripped = dbg.strip_prefix("r#").unwrap_or(&dbg);
if stripped == "b_" {
return "b.cond".to_string();
}
stripped.to_string()
}
#[inline]
fn is_aarch64_branch_mnemonic(m: disarm64::decoder::Mnemonic) -> bool {
use disarm64::decoder::Mnemonic as M;
matches!(
m,
M::b | M::bl | M::br | M::blr | M::ret | M::r#b_ | M::cbz | M::cbnz | M::tbz | M::tbnz
)
}
#[inline]
fn sign_extend(value: u64, bits: u32) -> i64 {
debug_assert!((1..=63).contains(&bits));
let shift = 64 - bits;
((value << shift) as i64) >> shift
}
#[must_use]
pub fn aarch64_branch_target(opcode: &disarm64::decoder::Opcode, pc: u64) -> Option<u64> {
let raw = aarch64_raw_word(opcode)?;
aarch64_branch_target_raw(opcode, raw, pc)
}
#[must_use]
pub fn aarch64_branch_target_raw(
opcode: &disarm64::decoder::Opcode,
raw: u32,
pc: u64,
) -> Option<u64> {
use disarm64::decoder::Operation;
let raw = raw as u64;
let offset = match opcode.operation {
Operation::BRANCH_IMM(_) => {
let imm26 = raw & 0x03ff_ffff;
sign_extend(imm26, 26) << 2
}
Operation::CONDBRANCH(_) | Operation::COMPBRANCH(_) => {
let imm19 = (raw >> 5) & 0x0007_ffff;
sign_extend(imm19, 19) << 2
}
Operation::TESTBRANCH(_) => {
let imm14 = (raw >> 5) & 0x0000_3fff;
sign_extend(imm14, 14) << 2
}
_ => return None,
};
Some(pc.wrapping_add(offset as u64))
}
fn aarch64_raw_word(opcode: &disarm64::decoder::Opcode) -> Option<u32> {
use disarm64_defn::defn::InsnOpcode;
Some(opcode.bits())
}
#[inline]
#[must_use]
pub fn aarch64_is_direct_call(opcode: &disarm64::decoder::Opcode) -> bool {
matches!(opcode.mnemonic, disarm64::decoder::Mnemonic::bl)
}
#[inline]
#[must_use]
pub fn aarch64_is_unconditional_branch(opcode: &disarm64::decoder::Opcode) -> bool {
matches!(opcode.mnemonic, disarm64::decoder::Mnemonic::b)
}
#[inline]
#[must_use]
pub fn aarch64_is_conditional_branch(opcode: &disarm64::decoder::Opcode) -> bool {
use disarm64::decoder::Mnemonic as M;
matches!(
opcode.mnemonic,
M::r#b_ | M::cbz | M::cbnz | M::tbz | M::tbnz
)
}
#[inline]
#[must_use]
pub fn aarch64_is_return(opcode: &disarm64::decoder::Opcode) -> bool {
use disarm64::decoder::Mnemonic as M;
matches!(
opcode.mnemonic,
M::ret | M::eret | M::retaa | M::retab | M::eretaa | M::eretab | M::drps
)
}
#[inline]
#[must_use]
pub fn aarch64_is_indirect_branch(opcode: &disarm64::decoder::Opcode) -> bool {
use disarm64::decoder::Mnemonic as M;
matches!(
opcode.mnemonic,
M::br | M::braa | M::braaz | M::brab | M::brabz
)
}
#[inline]
#[must_use]
pub fn aarch64_is_indirect_call(opcode: &disarm64::decoder::Opcode) -> bool {
use disarm64::decoder::Mnemonic as M;
matches!(
opcode.mnemonic,
M::blr | M::blraa | M::blraaz | M::blrab | M::blrabz
)
}
#[inline]
#[must_use]
pub fn aarch64_is_trap(opcode: &disarm64::decoder::Opcode) -> bool {
use disarm64::decoder::Mnemonic as M;
matches!(opcode.mnemonic, M::udf | M::brk | M::hlt)
}
#[inline]
#[must_use]
pub fn aarch64_is_svc(opcode: &disarm64::decoder::Opcode) -> bool {
matches!(opcode.mnemonic, disarm64::decoder::Mnemonic::svc)
}