#![allow(clippy::cast_possible_truncation)]
mod codec;
pub use codec::{register, Aarch64Codec};
use ud_core::VAddr;
use ud_ir::{ArchInsn, BasicBlock, Function, Terminator};
pub const INSN_SIZE: usize = 4;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(
"byte buffer length {len} is not a multiple of {INSN_SIZE} (AArch64 insns are fixed-width)"
)]
Misaligned { len: usize },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InsnKind {
BranchDirect { target: u64 },
BranchConditional { taken: u64, fallthrough: u64 },
CompareAndBranch { taken: u64, fallthrough: u64 },
TestBitAndBranch { taken: u64, fallthrough: u64 },
BranchLink { target: u64 },
BranchRegister,
BranchLinkRegister,
Return,
Nop,
Other,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecodedInsn {
pub addr: VAddr,
pub bytes: [u8; INSN_SIZE],
pub kind: InsnKind,
}
impl DecodedInsn {
#[must_use]
pub fn opcode(&self) -> u32 {
u32::from_le_bytes(self.bytes)
}
}
impl ArchInsn for DecodedInsn {
fn addr(&self) -> VAddr {
self.addr
}
fn original_bytes(&self) -> &[u8] {
&self.bytes
}
}
pub fn decode(bytes: &[u8], start: u64) -> Result<Vec<DecodedInsn>> {
if bytes.len() % INSN_SIZE != 0 {
return Err(Error::Misaligned { len: bytes.len() });
}
let mut out = Vec::with_capacity(bytes.len() / INSN_SIZE);
for (i, chunk) in bytes.chunks_exact(INSN_SIZE).enumerate() {
let addr = start.saturating_add((i * INSN_SIZE) as u64);
let mut raw = [0u8; INSN_SIZE];
raw.copy_from_slice(chunk);
let opcode = u32::from_le_bytes(raw);
let kind = classify(opcode, addr);
out.push(DecodedInsn {
addr: VAddr(addr),
bytes: raw,
kind,
});
}
Ok(out)
}
fn classify(opcode: u32, addr: u64) -> InsnKind {
const INDIRECT_BRANCH_MASK: u32 = 0xffff_fc1f;
if opcode == 0xd503_201f {
return InsnKind::Nop;
}
if (opcode & INDIRECT_BRANCH_MASK) == 0xd65f_0000 {
return InsnKind::Return;
}
if (opcode & INDIRECT_BRANCH_MASK) == 0xd61f_0000 {
return InsnKind::BranchRegister;
}
if (opcode & INDIRECT_BRANCH_MASK) == 0xd63f_0000 {
return InsnKind::BranchLinkRegister;
}
if (opcode & 0xfc00_0000) == 0x1400_0000 {
let target = pc_rel26(addr, opcode);
return InsnKind::BranchDirect { target };
}
if (opcode & 0xfc00_0000) == 0x9400_0000 {
let target = pc_rel26(addr, opcode);
return InsnKind::BranchLink { target };
}
if (opcode & 0xff00_0010) == 0x5400_0000 {
let taken = pc_rel19(addr, opcode);
let fallthrough = addr.wrapping_add(INSN_SIZE as u64);
return InsnKind::BranchConditional { taken, fallthrough };
}
if (opcode & 0x7e00_0000) == 0x3400_0000 {
let taken = pc_rel19(addr, opcode);
let fallthrough = addr.wrapping_add(INSN_SIZE as u64);
return InsnKind::CompareAndBranch { taken, fallthrough };
}
if (opcode & 0x7e00_0000) == 0x3600_0000 {
let taken = pc_rel14(addr, opcode);
let fallthrough = addr.wrapping_add(INSN_SIZE as u64);
return InsnKind::TestBitAndBranch { taken, fallthrough };
}
InsnKind::Other
}
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
fn pc_rel26(addr: u64, opcode: u32) -> u64 {
let imm26 = opcode & 0x03ff_ffff;
let signed = ((imm26 as i32) << 6) >> 6;
let off = i64::from(signed) << 2;
addr.wrapping_add(off as u64) }
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
fn pc_rel19(addr: u64, opcode: u32) -> u64 {
let imm19 = (opcode >> 5) & 0x0007_ffff;
let signed = ((imm19 as i32) << 13) >> 13;
let off = i64::from(signed) << 2;
addr.wrapping_add(off as u64) }
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
fn pc_rel14(addr: u64, opcode: u32) -> u64 {
let imm14 = (opcode >> 5) & 0x0000_3fff;
let signed = ((imm14 as i32) << 18) >> 18;
let off = i64::from(signed) << 2;
addr.wrapping_add(off as u64) }
#[must_use]
pub fn format_text(insn: &DecodedInsn) -> String {
match insn.kind {
InsnKind::BranchDirect { target } => format!("b 0x{target:x}"),
InsnKind::BranchLink { target } => format!("bl 0x{target:x}"),
InsnKind::BranchConditional { taken, .. } => format!("b.cond 0x{taken:x}"),
InsnKind::CompareAndBranch { taken, .. } => format!("cbz/cbnz 0x{taken:x}"),
InsnKind::TestBitAndBranch { taken, .. } => format!("tbz/tbnz 0x{taken:x}"),
InsnKind::BranchRegister => "br".into(),
InsnKind::BranchLinkRegister => "blr".into(),
InsnKind::Return => "ret".into(),
InsnKind::Nop => "nop".into(),
InsnKind::Other => format!("<arm64 0x{:08x}>", insn.opcode()),
}
}
#[must_use]
pub fn lift_function(name: String, insns: &[DecodedInsn]) -> Function<DecodedInsn> {
let addr = insns.first().map_or(VAddr(0), |i| i.addr);
let terminator = insns
.last()
.map_or(Terminator::Fallthrough, |i| match i.kind {
InsnKind::Return => Terminator::Return,
InsnKind::BranchDirect { target } => Terminator::UnconditionalBranch {
target: VAddr(target),
},
InsnKind::BranchConditional { taken, fallthrough }
| InsnKind::CompareAndBranch { taken, fallthrough }
| InsnKind::TestBitAndBranch { taken, fallthrough } => Terminator::ConditionalBranch {
taken: VAddr(taken),
fallthrough: VAddr(fallthrough),
},
InsnKind::BranchRegister | InsnKind::BranchLinkRegister => Terminator::IndirectBranch,
_ => Terminator::Fallthrough,
});
Function {
addr,
name,
blocks: vec![BasicBlock {
addr,
insns: insns.to_vec(),
terminator,
}],
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decode_splits_into_4byte_words() {
let bytes = [
0xc0, 0x03, 0x5f, 0xd6, 0x1f, 0x20, 0x03, 0xd5, ];
let insns = decode(&bytes, 0x1000).unwrap();
assert_eq!(insns.len(), 2);
assert_eq!(insns[0].addr, VAddr(0x1000));
assert_eq!(insns[0].kind, InsnKind::Return);
assert_eq!(insns[1].addr, VAddr(0x1004));
assert_eq!(insns[1].kind, InsnKind::Nop);
}
#[test]
fn rejects_misaligned_buffer() {
let bytes = [0x00, 0x01, 0x02];
assert!(matches!(
decode(&bytes, 0x1000),
Err(Error::Misaligned { len: 3 })
));
}
#[test]
fn classifies_b_with_signed_target() {
let opcode: u32 = 0x14_00_00_04;
let bytes = opcode.to_le_bytes();
let insns = decode(&bytes, 0x1000).unwrap();
assert_eq!(insns[0].kind, InsnKind::BranchDirect { target: 0x1010 });
}
#[test]
fn classifies_bl_with_negative_target() {
let opcode: u32 = 0x97_ff_ff_fe;
let bytes = opcode.to_le_bytes();
let insns = decode(&bytes, 0x2000).unwrap();
assert_eq!(insns[0].kind, InsnKind::BranchLink { target: 0x1ff8 });
}
#[test]
fn classifies_b_cond() {
let opcode: u32 = 0x54_00_00_40;
let bytes = opcode.to_le_bytes();
let insns = decode(&bytes, 0x1000).unwrap();
assert!(matches!(
insns[0].kind,
InsnKind::BranchConditional { taken: 0x1008, .. }
));
}
#[test]
fn ret_kind_drives_terminator() {
let bytes = [0x00, 0x00, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6];
let insns = decode(&bytes, 0x1000).unwrap();
let f = lift_function("f".into(), &insns);
assert_eq!(f.blocks.len(), 1);
assert_eq!(f.blocks[0].terminator, Terminator::Return);
}
}