use rayon::prelude::*;
pub(crate) struct InsnData {
pub(crate) address: u64,
pub(crate) call_target: Option<u64>,
}
const INSN_SIZE: usize = 4;
const MIN_CHUNK_SIZE: usize = 64 * 1024;
const BL_MASK: u32 = 0xFC000000;
const BL_OPCODE: u32 = 0x94000000;
const B_MASK: u32 = 0xFC000000;
const B_OPCODE: u32 = 0x14000000;
pub(crate) const MACHO_RELOC_BRANCH26: u8 = 2;
pub(crate) const ELF_RELOC_CALL26: u32 = 283;
pub(crate) const ELF_RELOC_JUMP26: u32 = 282;
pub(crate) fn is_call_relocation(r_type: u32) -> bool {
r_type == ELF_RELOC_CALL26 || r_type == ELF_RELOC_JUMP26
}
pub(crate) fn decode_branch_target(insn_bytes: u32, pc: u64) -> u64 {
let imm26 = insn_bytes & 0x03FFFFFF;
let offset = ((imm26 as i32) << 6) >> 4;
(pc as i64 + offset as i64) as u64
}
pub(crate) fn scan_branch_instructions(data: &[u8], base_addr: u64) -> Vec<InsnData> {
data.chunks_exact(INSN_SIZE)
.enumerate()
.filter_map(|(i, bytes)| {
let insn = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
let is_bl = (insn & BL_MASK) == BL_OPCODE;
let is_b = (insn & B_MASK) == B_OPCODE;
if is_bl || is_b {
let pc = base_addr + (i * INSN_SIZE) as u64;
let target = decode_branch_target(insn, pc);
Some(InsnData {
address: pc,
call_target: Some(target),
})
} else {
None
}
})
.collect()
}
pub(crate) fn parallel_disassemble(text_data: &[u8], text_addr: u64) -> Vec<InsnData> {
let num_threads = rayon::current_num_threads();
let ideal_chunk_size = text_data.len() / num_threads;
let chunk_size = if ideal_chunk_size < MIN_CHUNK_SIZE {
text_data.len()
} else {
(ideal_chunk_size / INSN_SIZE) * INSN_SIZE
};
if chunk_size >= text_data.len() {
return scan_branch_instructions(text_data, text_addr);
}
let chunks: Vec<(usize, &[u8], u64)> = text_data
.chunks(chunk_size)
.enumerate()
.map(|(i, chunk)| {
let chunk_addr = text_addr + (i * chunk_size) as u64;
(i, chunk, chunk_addr)
})
.collect();
let results: Vec<Vec<InsnData>> = chunks
.par_iter()
.map(|(_i, chunk, chunk_addr)| scan_branch_instructions(chunk, *chunk_addr))
.collect();
results.into_iter().flatten().collect()
}
pub mod plt {
use goblin::elf::Elf;
use std::collections::HashMap;
const ENTRY_SIZE: u64 = 16;
const RESOLVER_SIZE: u64 = 32;
pub(crate) fn build_map(elf: &Elf, buffer: &[u8]) -> HashMap<u64, u64> {
let mut plt_map = HashMap::new();
let plt_section = elf.section_headers.iter().find(|sh| {
elf.shdr_strtab
.get_at(sh.sh_name)
.map(|n| n == ".plt")
.unwrap_or(false)
});
let Some(plt_section) = plt_section else {
eprintln!("[PLT] No .plt section found in ELF");
return plt_map;
};
let plt_base = plt_section.sh_addr;
let rela_plt_section = elf.section_headers.iter().find(|sh| {
elf.shdr_strtab
.get_at(sh.sh_name)
.map(|n| n == ".rela.plt")
.unwrap_or(false)
});
let Some(rela_plt_section) = rela_plt_section else {
return plt_map;
};
let rela_plt_offset = rela_plt_section.sh_offset as usize;
let rela_plt_size = rela_plt_section.sh_size as usize;
let Some(rela_plt_end) = rela_plt_offset.checked_add(rela_plt_size) else {
return plt_map;
};
if rela_plt_end > buffer.len() {
return plt_map;
}
let rela_plt_data = &buffer[rela_plt_offset..rela_plt_end];
let num_relocs = rela_plt_size / 24;
for index in 0..num_relocs {
let offset = index * 24;
if offset + 24 > rela_plt_data.len() {
break;
}
let _r_offset =
u64::from_le_bytes(rela_plt_data[offset..offset + 8].try_into().unwrap());
let r_info =
u64::from_le_bytes(rela_plt_data[offset + 8..offset + 16].try_into().unwrap());
let _r_addend =
i64::from_le_bytes(rela_plt_data[offset + 16..offset + 24].try_into().unwrap());
let sym_index = (r_info >> 32) as usize;
let plt_addr = plt_base + RESOLVER_SIZE + (index as u64 * ENTRY_SIZE);
if let Some(sym) = elf.dynsyms.get(sym_index) {
let target_addr = sym.st_value;
if target_addr != 0 {
plt_map.insert(plt_addr, target_addr);
}
}
}
plt_map
}
pub(crate) fn resolve_stub(target_addr: u64, plt_map: &HashMap<u64, u64>) -> u64 {
plt_map.get(&target_addr).copied().unwrap_or(target_addr)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_resolve_stub_with_mapping() {
let mut plt_map = HashMap::new();
plt_map.insert(0x71ee0, 0x74f20);
let resolved = resolve_stub(0x71ee0, &plt_map);
assert_eq!(resolved, 0x74f20);
}
#[test]
fn test_resolve_stub_without_mapping() {
let plt_map = HashMap::new();
let resolved = resolve_stub(0x12345, &plt_map);
assert_eq!(resolved, 0x12345);
}
#[test]
fn test_resolve_stub_passthrough() {
let mut plt_map = HashMap::new();
plt_map.insert(0x1000, 0x2000);
let resolved = resolve_stub(0x3000, &plt_map);
assert_eq!(resolved, 0x3000);
}
#[test]
#[cfg(all(target_os = "linux", target_arch = "aarch64"))]
fn test_build_map_with_real_elf() {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let dylib_path = format!("{}/../../target/debug/libdylib_example.so", manifest_dir);
if let Ok(buffer) = std::fs::read(&dylib_path) {
if let Ok(elf) = Elf::parse(&buffer) {
let plt_map = build_map(&elf, &buffer);
assert!(
!plt_map.is_empty(),
"PLT map should contain entries for dylib"
);
let plt_section = elf
.section_headers
.iter()
.find(|sh| {
elf.shdr_strtab
.get_at(sh.sh_name)
.map(|n| n == ".plt")
.unwrap_or(false)
})
.expect("ELF dylib should have .plt section");
let plt_start = plt_section.sh_addr;
let plt_end = plt_start + plt_section.sh_size;
for (plt_addr, target_addr) in &plt_map {
assert!(
*plt_addr >= plt_start && *plt_addr < plt_end,
"PLT address {:#x} should be in .plt section [{:#x}, {:#x})",
plt_addr,
plt_start,
plt_end
);
assert!(
*target_addr > 0,
"Target address {:#x} should be non-zero",
target_addr
);
assert_ne!(
plt_addr, target_addr,
"PLT stub {:#x} should differ from target {:#x}",
plt_addr, target_addr
);
}
}
}
}
#[test]
#[cfg(all(target_os = "linux", target_arch = "aarch64"))]
fn test_build_map_resolves_rust_panic() {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let dylib_path = format!("{}/../../target/debug/libdylib_example.so", manifest_dir);
if let Ok(buffer) = std::fs::read(&dylib_path) {
if let Ok(elf) = Elf::parse(&buffer) {
let plt_map = build_map(&elf, &buffer);
let rust_panic_addr = elf.dynsyms.iter().find_map(|sym| {
if sym.st_value > 0 {
if let Some(name) = elf.dynstrtab.get_at(sym.st_name) {
if name.contains("rust_panic") {
return Some(sym.st_value);
}
}
}
None
});
if let Some(rust_panic_target) = rust_panic_addr {
let found_mapping =
plt_map.values().any(|&target| target == rust_panic_target);
assert!(
found_mapping,
"PLT map should contain mapping to rust_panic at {:#x}",
rust_panic_target
);
}
assert!(
plt_map.len() >= 5,
"Dylib should have PLT entries, found {}",
plt_map.len()
);
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decode_branch_target_forward() {
let insn = 0x94000001_u32; let target = decode_branch_target(insn, 0x1000);
assert_eq!(target, 0x1004);
}
#[test]
fn test_decode_branch_target_backward() {
let pc = 0x2000_u64;
let insn = 0x97FFFFFF_u32; let target = decode_branch_target(insn, pc);
assert_eq!(target, pc.wrapping_sub(4));
}
#[test]
fn test_decode_branch_target_zero_offset() {
let insn = 0x94000000_u32;
let target = decode_branch_target(insn, 0x1000);
assert_eq!(target, 0x1000);
}
#[test]
fn test_scan_branch_instructions_bl() {
let bl_insn: [u8; 4] = 0x94000001_u32.to_le_bytes();
let results = scan_branch_instructions(&bl_insn, 0x1000);
assert_eq!(results.len(), 1);
assert_eq!(results[0].address, 0x1000);
assert_eq!(results[0].call_target, Some(0x1004));
}
#[test]
fn test_scan_branch_instructions_b() {
let b_insn: [u8; 4] = 0x14000002_u32.to_le_bytes();
let results = scan_branch_instructions(&b_insn, 0x2000);
assert_eq!(results.len(), 1);
assert_eq!(results[0].address, 0x2000);
assert_eq!(results[0].call_target, Some(0x2008));
}
#[test]
fn test_scan_branch_instructions_non_branch() {
let add_insn: [u8; 4] = 0x91000000_u32.to_le_bytes();
let results = scan_branch_instructions(&add_insn, 0x1000);
assert!(results.is_empty());
}
#[test]
fn test_scan_branch_instructions_multiple() {
let mut code = Vec::new();
code.extend_from_slice(&0x94000003_u32.to_le_bytes()); code.extend_from_slice(&0x91000000_u32.to_le_bytes()); code.extend_from_slice(&0x94000001_u32.to_le_bytes());
let results = scan_branch_instructions(&code, 0x1000);
assert_eq!(results.len(), 2);
assert_eq!(results[0].address, 0x1000);
assert_eq!(results[0].call_target, Some(0x100C));
assert_eq!(results[1].address, 0x1008);
assert_eq!(results[1].call_target, Some(0x100C));
}
#[test]
fn test_scan_branch_instructions_empty_input() {
let results = scan_branch_instructions(&[], 0x1000);
assert!(results.is_empty());
}
#[test]
fn test_parallel_disassemble_small_input() {
let mut code = Vec::new();
code.extend_from_slice(&0x94000002_u32.to_le_bytes()); code.extend_from_slice(&0x91000000_u32.to_le_bytes()); code.extend_from_slice(&0x14000001_u32.to_le_bytes());
let results = parallel_disassemble(&code, 0x1000);
assert_eq!(results.len(), 2);
assert_eq!(results[0].address, 0x1000);
assert_eq!(results[1].address, 0x1008);
}
#[test]
fn test_parallel_disassemble_empty() {
let results = parallel_disassemble(&[], 0x1000);
assert!(results.is_empty());
}
}