use std::rc::Rc;
use object::pe::IMAGE_SCN_MEM_EXECUTE;
use object::{Object, ObjectSection, SectionFlags};
use once_cell::unsync::OnceCell;
use std::collections::btree_map;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::path::Path;
use std::sync::Arc;
use addr2line::Context;
use capstone::arch::{arm, arm64, x86};
use capstone::prelude::*;
use gimli::RunTimeEndian;
use gimli::{read::Dwarf, EndianSlice, SectionId};
use std::error::Error;
use fallible_iterator::FallibleIterator;
#[derive(Debug, Clone,Copy)]
pub struct CodeRange<'a>{
pub address:u64,
pub data:&'a[u8]
}
#[derive(Debug, Default)]
pub struct LineMap<'a> {
inner: BTreeMap<u32, Vec<CodeRange<'a>>>,
extra: Vec<CodeRange<'a>>,
}
impl<'a> LineMap<'a> {
#[inline(always)]
pub fn iter_maped(&'_ self) -> btree_map::Iter<'_, u32, Vec<CodeRange<'a>>> {
self.inner.iter()
}
}
#[derive(Debug, Default)]
pub struct FileMap<'a> {
inner: HashMap<Arc<Path>, LineMap<'a>>,
extra: Vec<CodeRange<'a>>,
}
impl<'a> FileMap<'a> {
#[inline(always)]
pub fn get(&self, id: &Arc<Path>) -> Option<&LineMap<'a>> {
self.inner.get(id)
}
}
pub type EStr<'a> = EndianSlice<'a, RunTimeEndian>;
pub struct MachineFile<'a> {
pub obj: object::File<'a>,
pub sections: Box<[Section<'a>]>,
dwarf: OnceCell<Arc<Dwarf<EStr<'a>>>>,
addr2line: OnceCell<Arc<Context<EStr<'a>>>>,
file_lines: OnceCell<Arc<FileMap<'a>>>, capstone:OnceCell<Rc<Capstone>>,
}
#[derive(Clone, Debug, PartialEq)]
pub enum Section<'a> {
Code(CodeSection<'a>),
Info(InfoSection<'a>),
}
impl Section<'_> {
pub fn name(&self) -> &str {
match self {
Section::Code(x) => &x.name,
Section::Info(x) => &x.name,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct CodeSection<'a> {
pub name: Box<str>,
pub data: &'a [u8],
pub address: u64,
asm: OnceCell<Arc<[InstructionDetail]>>,
}
fn dissasm(
cs: &Capstone,
data: &[u8],
address: u64,
) -> Result<Arc<[InstructionDetail]>, Box<dyn Error>> {
let disasm = cs.disasm_all(data, address)?;
let mut instructions = Vec::new();
for (_serial_number, insn) in disasm.iter().enumerate() {
instructions.push(insn.into());
}
Ok(instructions.into())
}
pub fn map_dissasm(
cs: &Capstone,
data: &[u8],
address: u64,
f:&mut impl FnMut(InstructionDetail)->Result<(),Box<dyn Error>>,
) -> Result<(), Box<dyn Error>> {
let mut cur_address = address;
let mut cur_data = data;
loop{
let disasm = cs.disasm_count(cur_data, cur_address,1000)?;
let Some(last) = disasm.last() else {
return Ok(());
};
let end = last.address()+last.len() as u64;
cur_data = &cur_data[(end-cur_address)as usize..];
cur_address = end;
for (_serial_number, insn) in disasm.iter().enumerate() {
f(insn.into())?;
}
}
}
impl CodeSection<'_> {
pub fn get_high(&self)->u64{
self.address+self.data.len().saturating_sub(1) as u64
}
pub fn get_existing_asm(&self) -> Arc<[InstructionDetail]> {
self.asm.get().unwrap().clone()
}
pub fn map_asm(&self, cs: &Capstone,f:&mut impl FnMut(&InstructionDetail)->Result<(),Box<dyn Error>>,) -> Result<Arc<[InstructionDetail]>, Box<dyn Error>> {
if let Some(ans) = self.asm.get(){
for ins in ans.iter() {
f(&ins)?;
}
return Ok(ans.clone())
}
self.asm
.get_or_try_init(|| {
let mut instructions = Vec::new();
map_dissasm(&cs, self.data, self.address,&mut |ins|{
f(&ins)?;
instructions.push(ins);
Ok(())
})?;
Ok(instructions.into())
})
.cloned()
}
pub fn get_asm(
&self,
cs: &Capstone,
) -> Result<Arc<[InstructionDetail]>, Box<dyn Error>> {
self.asm
.get_or_try_init(|| dissasm(&cs, self.data, self.address))
.cloned()
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct InfoSection<'a> {
pub name: Box<str>,
pub data: &'a [u8],
pub address: u64,
}
#[derive(Clone, Debug, PartialEq)]
pub struct InstructionDetail {
pub address: u64,
pub mnemonic: Box<str>,
pub op_str: Box<str>,
pub size: usize,
}
impl From<&capstone::Insn<'_>> for InstructionDetail{
fn from(insn: &capstone::Insn<'_>) -> Self {
InstructionDetail {
address: insn.address(),
mnemonic: insn.mnemonic().unwrap_or("unknown").into(),
op_str: insn.op_str().unwrap_or("unknown").into(),
size: insn.len(),
}}
}
impl InstructionDetail {
pub fn get_end(&self)->u64{
self.address+self.size as u64
}
}
impl<'a> MachineFile<'a> {
pub fn dissasm_address(&self,target:u64)->Result<Option<InstructionDetail>,Box<dyn Error>>{
for s in &self.sections {
let Section::Code(code) = s else {
continue;
};
if target < code.address || target >= code.get_high(){
continue;
}
let offset = (target - code.address) as usize;
let data = &code.data[offset..];
let cs = self.get_capstone()?;
return Ok(cs.disasm_count(data,target,1)?.first().map(|x| x.into()));
}
Ok(None)
}
pub fn get_lines_map(&self) -> Result<Arc<FileMap<'a>>, Box<dyn Error>> {
self.file_lines
.get_or_try_init(|| {
let context = self.get_addr2line()?;
let mut ans = Arc::new(FileMap::default());
let handle = Arc::get_mut(&mut ans).unwrap();
for section in self.sections.iter() {
let Section::Code(code_section) = section else{
continue;
};
let end_address = code_section.get_high();
let mut iter = context.find_location_range(code_section.address,end_address)?;
let mut prev_end = code_section.address;
while let Some((low,size,loc)) = FallibleIterator::next(&mut iter)?{
if low > prev_end{
let size = (low-prev_end) as usize;
let start_idx =(prev_end-code_section.address)as usize;
let data = &code_section.data[start_idx..][..size];
handle.extra.push(CodeRange{
address:prev_end,
data,
})
}
prev_end = low+size;
let start_idx =(low-code_section.address) as usize;
if start_idx.saturating_add(size.saturating_sub(1) as usize) >= code_section.data.len() {
return Err("dwarf info ordered a bad read (out of section)".into());
}
let data = &code_section.data[start_idx..][..size as usize];
let cur_range = CodeRange{
address:low,
data
};
match (loc.file, loc.line) {
(Some(file_name), Some(line)) => {
let file = Path::new(file_name).into();
handle
.inner
.entry(file)
.or_default()
.inner
.entry(line)
.or_default()
.push(cur_range);
}
(Some(file_name), None) => {
let file = Path::new(file_name).into();
handle
.inner
.entry(file)
.or_default()
.extra
.push(cur_range);
}
(None, _) => handle.extra.push(cur_range),
}
}
}
Ok(ans)
})
.cloned()
}
fn get_gimli_section(&self, section: SectionId) -> &'a [u8] {
self.obj
.section_by_name(section.name())
.and_then(|x| x.data().ok())
.unwrap_or(&[])
}
pub fn load_dwarf(&self) -> Result<Arc<Dwarf<EStr<'a>>>, gimli::Error> {
self.dwarf
.get_or_try_init(|| {
let endian = if self.obj.is_little_endian() {
RunTimeEndian::Little
} else {
RunTimeEndian::Big
};
Dwarf::load(
|section| -> Result<EndianSlice<RunTimeEndian>, gimli::Error> {
Ok(EndianSlice::new(self.get_gimli_section(section), endian))
},
)
.map(Arc::new)
})
.cloned()
}
pub fn get_addr2line(&self) -> Result<Arc<Context<EStr<'a>>>, Box<dyn Error>> {
self.addr2line
.get_or_try_init(|| Ok(Context::from_arc_dwarf(self.load_dwarf()?)?.into()))
.cloned()
}
pub fn get_capstone(&self) -> Result<Rc<Capstone>, Box<dyn Error>> {
self.capstone
.get_or_try_init(|| Ok(create_capstone(self.obj.architecture())?.into()))
.cloned()
}
pub fn parse(buffer: &'a [u8]) -> Result<MachineFile<'a>, Box<dyn Error>> {
let obj = object::File::parse(buffer)?;
let mut parsed_sections = Vec::new();
for section in obj.sections() {
let section_name: Box<str> = section.name()?.into();
let section_data = section.data()?;
if should_disassemble(§ion) {
parsed_sections.push(Section::Code(CodeSection {
name: section_name,
data: section_data,
address: section.address(),
asm: OnceCell::new(),
}));
} else {
parsed_sections.push(Section::Info(InfoSection {
name: section_name,
data: section_data,
address: section.address(),
}));
}
}
let ans = MachineFile {
obj,
sections: parsed_sections.into(),
dwarf: OnceCell::new(),
addr2line: OnceCell::new(),
file_lines:OnceCell::new(),
capstone:OnceCell::new(),
};
Ok(ans)
}
}
pub fn create_capstone(arch: object::Architecture) -> Result<Capstone, Box<dyn Error>> {
let mut cs = match arch {
object::Architecture::X86_64 => Capstone::new()
.x86()
.mode(x86::ArchMode::Mode64)
.detail(false)
.build()?,
object::Architecture::I386 => Capstone::new()
.x86()
.mode(x86::ArchMode::Mode32)
.detail(false)
.build()?,
object::Architecture::Arm => Capstone::new()
.arm()
.mode(arm::ArchMode::Arm)
.detail(false)
.build()?,
object::Architecture::Aarch64 => Capstone::new()
.arm64()
.mode(arm64::ArchMode::Arm)
.detail(false)
.build()?,
object::Architecture::Riscv64 => Capstone::new()
.riscv()
.mode(capstone::arch::riscv::ArchMode::RiscV64)
.detail(false)
.build()?,
object::Architecture::Riscv32 => Capstone::new()
.riscv()
.mode(capstone::arch::riscv::ArchMode::RiscV32)
.detail(false)
.build()?,
object::Architecture::Mips64 => Capstone::new()
.mips()
.mode(capstone::arch::mips::ArchMode::Mips64)
.detail(false)
.build()?,
object::Architecture::PowerPc => Capstone::new()
.ppc()
.mode(capstone::arch::ppc::ArchMode::Mode32)
.detail(false)
.build()?,
object::Architecture::PowerPc64 => Capstone::new()
.ppc()
.mode(capstone::arch::ppc::ArchMode::Mode64)
.detail(false)
.build()?,
object::Architecture::Sparc => Capstone::new()
.sparc()
.mode(capstone::arch::sparc::ArchMode::Default)
.detail(false)
.build()?,
_ => return Err("Unsupported architecture".into()),
};
cs.set_skipdata(true)?;
Ok(cs)
}
fn should_disassemble(sec: &object::Section) -> bool {
match sec.flags() {
SectionFlags::Elf { sh_flags } => {
sh_flags & object::elf::SHF_EXECINSTR as u64 != 0
}
SectionFlags::MachO { flags } => {
flags & object::macho::S_ATTR_PURE_INSTRUCTIONS != 0
}
SectionFlags::Coff { characteristics } => {
characteristics & IMAGE_SCN_MEM_EXECUTE != 0
}
SectionFlags::None => false,
_ => todo!(),
}
}