use capstone::prelude::*;
pub(crate) struct InsnData {
pub(crate) address: u64,
pub(crate) call_target: Option<u64>,
}
struct RegisterLoad {
got_addr: u64,
}
fn scan_call_instructions(
cs: &Capstone,
data: &[u8],
base_addr: u64,
got_cache: &ahash::AHashMap<u64, u64>,
) -> Vec<InsnData> {
let mut results = Vec::new();
let insns = match cs.disasm_all(data, base_addr) {
Ok(insns) => insns,
Err(_) => return results,
};
let mut register_loads: ahash::AHashMap<u16, RegisterLoad> = ahash::AHashMap::new();
for insn in insns.iter() {
let mnemonic = insn.mnemonic().unwrap_or("");
if matches!(
mnemonic,
"jmp"
| "je"
| "jne"
| "jz"
| "jnz"
| "ja"
| "jae"
| "jb"
| "jbe"
| "jg"
| "jge"
| "jl"
| "jle"
| "ret"
) {
register_loads.clear();
}
if mnemonic == "mov" {
if let Ok(detail) = cs.insn_detail(insn) {
let arch_detail = detail.arch_detail();
let operands = arch_detail.operands();
if operands.len() == 2 {
if let (
arch::ArchOperand::X86Operand(dst),
arch::ArchOperand::X86Operand(src),
) = (&operands[0], &operands[1])
{
if let arch::x86::X86OperandType::Reg(dst_reg) = dst.op_type {
if let arch::x86::X86OperandType::Mem(mem_op) = src.op_type {
if mem_op.base()
== capstone::RegId(arch::x86::X86Reg::X86_REG_RIP as u16)
{
let rip_offset = mem_op.disp();
let insn_size = insn.bytes().len() as u64;
let next_insn = insn.address().wrapping_add(insn_size);
let got_addr = next_insn.wrapping_add(rip_offset as u64);
register_loads.insert(dst_reg.0, RegisterLoad { got_addr });
} else {
register_loads.remove(&dst_reg.0);
}
} else {
register_loads.remove(&dst_reg.0);
}
}
}
}
}
}
if mnemonic == "call" || mnemonic == "jmp" {
let detail = match cs.insn_detail(insn) {
Ok(detail) => detail,
Err(_) => continue,
};
let arch_detail = detail.arch_detail();
let operands = arch_detail.operands();
let call_target = if operands.len() == 1 {
match &operands[0] {
arch::ArchOperand::X86Operand(op) => match op.op_type {
arch::x86::X86OperandType::Imm(imm_val) => {
let addr = imm_val as u64;
Some(got_cache.get(&addr).copied().unwrap_or(addr))
}
arch::x86::X86OperandType::Mem(mem_op) => {
if mem_op.base()
== capstone::RegId(arch::x86::X86Reg::X86_REG_RIP as u16)
{
let rip_offset = mem_op.disp();
let insn_size = insn.bytes().len() as u64;
got::resolve_target(
insn.address(),
insn_size,
rip_offset,
got_cache,
)
} else {
None
}
}
arch::x86::X86OperandType::Reg(reg) => register_loads
.get(®.0)
.and_then(|load| got_cache.get(&load.got_addr).copied()),
_ => None,
},
_ => None,
}
} else {
None
};
results.push(InsnData {
address: insn.address(),
call_target,
});
}
}
results
}
pub(crate) fn parallel_disassemble(
text_data: &[u8],
text_addr: u64,
binary: &crate::binary_format::BinaryRef,
buffer: &[u8],
) -> Vec<InsnData> {
let got_cache = match binary {
crate::binary_format::BinaryRef::Elf(elf) => got::build_cache(elf, buffer),
crate::binary_format::BinaryRef::MachO(macho) => got::build_macho_stub_cache(macho, buffer),
};
let cs = match Capstone::new()
.x86()
.mode(arch::x86::ArchMode::Mode64)
.detail(true)
.build()
{
Ok(cs) => cs,
Err(_) => return Vec::new(),
};
scan_call_instructions(&cs, text_data, text_addr, &got_cache)
}
pub(crate) const MACHO_RELOC_BRANCH26: u8 = 2;
pub(crate) const ELF_RELOC_PLT32: u32 = 4;
pub(crate) const ELF_RELOC_GOTPCREL: u32 = 9;
pub(crate) fn is_call_relocation(r_type: u32) -> bool {
r_type == ELF_RELOC_PLT32 || r_type == ELF_RELOC_GOTPCREL
}
pub mod got {
use goblin::elf::Elf;
use goblin::mach::MachO;
pub(crate) fn build_cache(elf: &Elf, buffer: &[u8]) -> ahash::AHashMap<u64, u64> {
let mut got_cache = ahash::AHashMap::new();
if let Some(rela_plt) = find_section(elf, ".rela.plt") {
process_relocations(elf, buffer, rela_plt, &mut got_cache);
}
if let Some(rela_dyn) = find_section(elf, ".rela.dyn") {
process_relocations(elf, buffer, rela_dyn, &mut got_cache);
}
got_cache
}
pub(crate) fn build_macho_stub_cache(
macho: &MachO,
buffer: &[u8],
) -> ahash::AHashMap<u64, u64> {
use goblin::mach::constants::{
S_LAZY_SYMBOL_POINTERS, S_NON_LAZY_SYMBOL_POINTERS, S_SYMBOL_STUBS,
};
use goblin::mach::load_command::CommandVariant;
let mut cache = ahash::AHashMap::new();
let dysymtab = macho.load_commands.iter().find_map(|lc| match lc.command {
CommandVariant::Dysymtab(ref cmd) => Some(cmd),
_ => None,
});
let Some(dysymtab) = dysymtab else {
return cache;
};
let indirect_offset = dysymtab.indirectsymoff as usize;
let indirect_count = dysymtab.nindirectsyms as usize;
if indirect_offset + indirect_count * 4 > buffer.len() {
return cache;
}
const SECTION64_SIZE: usize = 80;
const SEGMENT64_HDR: usize = 72;
for lc in &macho.load_commands {
let (nsects, lc_off) = match lc.command {
CommandVariant::Segment64(ref seg) => (seg.nsects as usize, lc.offset),
_ => continue,
};
for i in 0..nsects {
let s = lc_off + SEGMENT64_HDR + i * SECTION64_SIZE;
if s + SECTION64_SIZE > buffer.len() {
break;
}
let flags = u32::from_le_bytes(buffer[s + 64..s + 68].try_into().unwrap());
let section_type = flags & 0xff;
if section_type != S_SYMBOL_STUBS
&& section_type != S_LAZY_SYMBOL_POINTERS
&& section_type != S_NON_LAZY_SYMBOL_POINTERS
{
continue;
}
let sec_addr = u64::from_le_bytes(buffer[s + 32..s + 40].try_into().unwrap());
let sec_size = u64::from_le_bytes(buffer[s + 40..s + 48].try_into().unwrap());
let reserved1 = u32::from_le_bytes(buffer[s + 68..s + 72].try_into().unwrap());
let reserved2 = u32::from_le_bytes(buffer[s + 72..s + 76].try_into().unwrap());
let entry_size = if section_type == S_SYMBOL_STUBS {
reserved2 as u64
} else {
8u64
};
if entry_size == 0 {
continue;
}
let num_entries = sec_size / entry_size;
let indirect_start = reserved1 as usize;
for j in 0..num_entries as usize {
let idx = indirect_start + j;
if idx >= indirect_count {
break;
}
let sym_off = indirect_offset + idx * 4;
if sym_off + 4 > buffer.len() {
break;
}
let sym_idx =
u32::from_le_bytes(buffer[sym_off..sym_off + 4].try_into().unwrap());
if sym_idx & 0xc0000000 != 0 {
continue;
}
if let Some(ref symbols) = macho.symbols {
if let Ok((_name, nlist)) = symbols.get(sym_idx as usize) {
if nlist.n_value != 0 {
let entry_addr = sec_addr + (j as u64) * entry_size;
cache.insert(entry_addr, nlist.n_value);
}
}
}
}
}
}
cache
}
fn find_section<'a>(elf: &'a Elf, name: &str) -> Option<&'a goblin::elf::SectionHeader> {
elf.section_headers.iter().find(|sh| {
elf.shdr_strtab
.get_at(sh.sh_name)
.map(|n| n == name)
.unwrap_or(false)
})
}
fn process_relocations(
elf: &Elf,
buffer: &[u8],
section: &goblin::elf::SectionHeader,
got_cache: &mut ahash::AHashMap<u64, u64>,
) {
let offset = section.sh_offset as usize;
let size = section.sh_size as usize;
let end = match offset.checked_add(size) {
Some(e) if e <= buffer.len() => e,
_ => return,
};
let data = &buffer[offset..end];
let num_relocs = size / 24;
for i in 0..num_relocs {
let reloc_offset = i * 24;
if reloc_offset + 24 > data.len() {
break;
}
let r_offset =
u64::from_le_bytes(data[reloc_offset..reloc_offset + 8].try_into().unwrap());
let r_info = u64::from_le_bytes(
data[reloc_offset + 8..reloc_offset + 16]
.try_into()
.unwrap(),
);
let r_addend = i64::from_le_bytes(
data[reloc_offset + 16..reloc_offset + 24]
.try_into()
.unwrap(),
);
let r_type = (r_info & 0xffffffff) as u32;
let sym_index = (r_info >> 32) as usize;
if r_type == 7 || r_type == 6 {
if let Some(sym) = elf.dynsyms.get(sym_index) {
let target = sym.st_value;
if target != 0 {
got_cache.insert(r_offset, target);
}
}
}
else if r_type == 8 {
got_cache.insert(r_offset, r_addend as u64);
}
}
}
pub(crate) fn resolve_target(
insn_addr: u64,
insn_size: u64,
rip_offset: i64,
got_cache: &ahash::AHashMap<u64, u64>,
) -> Option<u64> {
let next_insn = insn_addr.wrapping_add(insn_size);
let got_addr = next_insn.wrapping_add(rip_offset as u64);
got_cache.get(&got_addr).copied()
}
}
pub mod plt {
use goblin::elf::Elf;
use std::collections::HashMap;
pub(crate) fn build_map(_elf: &Elf, _buffer: &[u8]) -> HashMap<u64, u64> {
HashMap::new()
}
pub(crate) fn resolve_stub(target_addr: u64, _plt_map: &HashMap<u64, u64>) -> u64 {
target_addr
}
}