use std::borrow::Cow;
use std::convert::TryInto;
use std::error::Error;
use std::ffi::CStr;
use std::fmt;
use core::cmp;
use flate2::{Decompress, FlushDecompress};
use goblin::elf::compression_header::{CompressionHeader, ELFCOMPRESS_ZLIB};
use goblin::elf::SectionHeader;
use goblin::elf64::sym::SymIterator;
use goblin::strtab::Strtab;
use goblin::{
container::{Container, Ctx},
elf, strtab,
};
use scroll::Pread;
use thiserror::Error;
use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};
use crate::base::*;
use crate::dwarf::{Dwarf, DwarfDebugSession, DwarfError, DwarfSection, Endian};
use crate::Parse;
const UUID_SIZE: usize = 16;
const PAGE_SIZE: usize = 4096;
const SHN_UNDEF: usize = elf::section_header::SHN_UNDEF as usize;
const SHF_COMPRESSED: u64 = elf::section_header::SHF_COMPRESSED as u64;
#[allow(unused)]
const EF_MIPS_ABI_O32: u32 = 0x0000_1000;
const EF_MIPS_ABI_O64: u32 = 0x0000_2000;
#[allow(unused)]
const EF_MIPS_ABI_EABI32: u32 = 0x0000_3000;
const EF_MIPS_ABI_EABI64: u32 = 0x0000_4000;
const MIPS_64_FLAGS: u32 = EF_MIPS_ABI_O64 | EF_MIPS_ABI_EABI64;
#[derive(Debug, Error)]
#[error("invalid ELF file")]
pub struct ElfError {
#[source]
source: Option<Box<dyn Error + Send + Sync + 'static>>,
}
impl ElfError {
fn new<E>(source: E) -> Self
where
E: Into<Box<dyn Error + Send + Sync>>,
{
let source = Some(source.into());
Self { source }
}
}
pub struct ElfObject<'data> {
elf: elf::Elf<'data>,
data: &'data [u8],
is_malformed: bool,
}
impl<'data> ElfObject<'data> {
pub fn test(data: &[u8]) -> bool {
data.get(0..elf::header::SELFMAG)
.map_or(false, |data| data == elf::header::ELFMAG)
}
fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> goblin::error::Result<usize> {
let buckets_num = bytes.pread_with::<u32>(offset, ctx.le)? as usize;
let min_chain = bytes.pread_with::<u32>(offset + 4, ctx.le)? as usize;
let bloom_size = bytes.pread_with::<u32>(offset + 8, ctx.le)? as usize;
if buckets_num == 0 || min_chain == 0 || bloom_size == 0 {
return Err(goblin::error::Error::Malformed(format!(
"Invalid DT_GNU_HASH: buckets_num={buckets_num} min_chain={min_chain} bloom_size={bloom_size}"
)));
}
let buckets_offset = offset + 16 + bloom_size * if ctx.container.is_big() { 8 } else { 4 };
let mut max_chain = 0;
for bucket in 0..buckets_num {
let chain = bytes.pread_with::<u32>(buckets_offset + bucket * 4, ctx.le)? as usize;
if max_chain < chain {
max_chain = chain;
}
}
if max_chain < min_chain {
return Ok(0);
}
let mut chain_offset = buckets_offset + buckets_num * 4 + (max_chain - min_chain) * 4;
loop {
let hash = bytes.pread_with::<u32>(chain_offset, ctx.le)?;
max_chain += 1;
chain_offset += 4;
if hash & 1 != 0 {
return Ok(max_chain);
}
}
}
fn hash_len(
bytes: &[u8],
offset: usize,
machine: u16,
ctx: Ctx,
) -> goblin::error::Result<usize> {
let nchain = if (machine == elf::header::EM_FAKE_ALPHA || machine == elf::header::EM_S390)
&& ctx.container.is_big()
{
bytes.pread_with::<u64>(offset.saturating_add(4), ctx.le)? as usize
} else {
bytes.pread_with::<u32>(offset.saturating_add(4), ctx.le)? as usize
};
Ok(nchain)
}
pub fn parse(data: &'data [u8]) -> Result<Self, ElfError> {
let header =
elf::Elf::parse_header(data).map_err(|_| ElfError::new("ELF header unreadable"))?;
let mut obj =
elf::Elf::lazy_parse(header).map_err(|_| ElfError::new("cannot parse ELF header"))?;
let ctx = Ctx {
container: if obj.is_64 {
Container::Big
} else {
Container::Little
},
le: if obj.little_endian {
scroll::Endian::Little
} else {
scroll::Endian::Big
},
};
macro_rules! return_partial_on_err {
($parse_func:expr) => {
if let Ok(expected) = $parse_func() {
expected
} else {
return Ok(ElfObject {
elf: obj,
data,
is_malformed: true,
});
}
};
}
obj.program_headers =
elf::ProgramHeader::parse(data, header.e_phoff as usize, header.e_phnum as usize, ctx)
.map_err(|_| ElfError::new("unable to parse program headers"))?;
for ph in &obj.program_headers {
if ph.p_type == elf::program_header::PT_INTERP && ph.p_filesz != 0 {
let count = (ph.p_filesz - 1) as usize;
let offset = ph.p_offset as usize;
obj.interpreter = data
.pread_with::<&str>(offset, ::scroll::ctx::StrCtx::Length(count))
.ok();
}
}
obj.section_headers =
SectionHeader::parse(data, header.e_shoff as usize, header.e_shnum as usize, ctx)
.map_err(|_| ElfError::new("unable to parse section headers"))?;
let get_strtab = |section_headers: &[SectionHeader], section_idx: usize| {
if section_idx >= section_headers.len() {
Ok(Strtab::default())
} else {
let shdr = §ion_headers[section_idx];
shdr.check_size(data.len())?;
Strtab::parse(data, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0)
}
};
let strtab_idx = header.e_shstrndx as usize;
obj.shdr_strtab = return_partial_on_err!(|| get_strtab(&obj.section_headers, strtab_idx));
obj.syms = elf::Symtab::default();
obj.strtab = Strtab::default();
for shdr in &obj.section_headers {
if shdr.sh_type == elf::section_header::SHT_SYMTAB {
let size = shdr.sh_entsize;
let count = if size == 0 { 0 } else { shdr.sh_size / size };
obj.syms = return_partial_on_err!(|| elf::Symtab::parse(
data,
shdr.sh_offset as usize,
count as usize,
ctx
));
obj.strtab = return_partial_on_err!(|| get_strtab(
&obj.section_headers,
shdr.sh_link as usize
));
}
}
obj.soname = None;
obj.libraries = vec![];
obj.dynsyms = elf::Symtab::default();
obj.dynrelas = elf::RelocSection::default();
obj.dynrels = elf::RelocSection::default();
obj.pltrelocs = elf::RelocSection::default();
obj.dynstrtab = Strtab::default();
let dynamic =
return_partial_on_err!(|| elf::Dynamic::parse(data, &obj.program_headers, ctx));
if let Some(ref dynamic) = dynamic {
let dyn_info = &dynamic.info;
obj.dynstrtab = return_partial_on_err!(|| Strtab::parse(
data,
dyn_info.strtab,
dyn_info.strsz,
0x0
));
if dyn_info.soname != 0 {
obj.soname = obj.dynstrtab.get_at(dyn_info.soname);
}
if dyn_info.needed_count > 0 {
obj.libraries = dynamic.get_libraries(&obj.dynstrtab);
}
obj.dynrelas = return_partial_on_err!(|| elf::RelocSection::parse(
data,
dyn_info.rela,
dyn_info.relasz,
true,
ctx
));
obj.dynrels = return_partial_on_err!(|| elf::RelocSection::parse(
data,
dyn_info.rel,
dyn_info.relsz,
false,
ctx
));
let is_rela = dyn_info.pltrel == elf::dynamic::DT_RELA;
obj.pltrelocs = return_partial_on_err!(|| elf::RelocSection::parse(
data,
dyn_info.jmprel,
dyn_info.pltrelsz,
is_rela,
ctx
));
let mut num_syms = if let Some(gnu_hash) = dyn_info.gnu_hash {
return_partial_on_err!(|| ElfObject::gnu_hash_len(data, gnu_hash as usize, ctx))
} else if let Some(hash) = dyn_info.hash {
return_partial_on_err!(|| ElfObject::hash_len(
data,
hash as usize,
header.e_machine,
ctx
))
} else {
0
};
let max_reloc_sym = obj
.dynrelas
.iter()
.chain(obj.dynrels.iter())
.chain(obj.pltrelocs.iter())
.fold(0, |num, reloc| cmp::max(num, reloc.r_sym));
if max_reloc_sym != 0 {
num_syms = cmp::max(num_syms, max_reloc_sym + 1);
}
obj.dynsyms =
return_partial_on_err!(|| elf::Symtab::parse(data, dyn_info.symtab, num_syms, ctx));
}
obj.shdr_relocs = vec![];
for (idx, section) in obj.section_headers.iter().enumerate() {
let is_rela = section.sh_type == elf::section_header::SHT_RELA;
if is_rela || section.sh_type == elf::section_header::SHT_REL {
return_partial_on_err!(|| section.check_size(data.len()));
let sh_relocs = return_partial_on_err!(|| elf::RelocSection::parse(
data,
section.sh_offset as usize,
section.sh_size as usize,
is_rela,
ctx,
));
obj.shdr_relocs.push((idx, sh_relocs));
}
}
obj.versym = return_partial_on_err!(|| elf::symver::VersymSection::parse(
data,
&obj.section_headers,
ctx
));
obj.verdef = return_partial_on_err!(|| elf::symver::VerdefSection::parse(
data,
&obj.section_headers,
ctx
));
obj.verneed = return_partial_on_err!(|| elf::symver::VerneedSection::parse(
data,
&obj.section_headers,
ctx
));
Ok(ElfObject {
elf: obj,
data,
is_malformed: false,
})
}
pub fn file_format(&self) -> FileFormat {
FileFormat::Elf
}
pub fn code_id(&self) -> Option<CodeId> {
self.find_build_id()
.filter(|slice| !slice.is_empty())
.map(CodeId::from_binary)
}
pub fn debug_link(&self) -> Result<Option<DebugLink>, DebugLinkError> {
self.section("gnu_debuglink")
.map(|section| DebugLink::from_data(section.data, self.endianity()))
.transpose()
}
pub fn name(&self) -> Option<&'data str> {
self.elf.soname
}
pub fn debug_id(&self) -> DebugId {
if let Some(identifier) = self.find_build_id() {
return self.compute_debug_id(identifier);
}
if let Some(section) = self.raw_section("text") {
let mut hash = [0; UUID_SIZE];
for i in 0..std::cmp::min(section.data.len(), PAGE_SIZE) {
hash[i % UUID_SIZE] ^= section.data[i];
}
return self.compute_debug_id(&hash);
}
DebugId::default()
}
pub fn arch(&self) -> Arch {
match self.elf.header.e_machine {
goblin::elf::header::EM_386 => Arch::X86,
goblin::elf::header::EM_X86_64 => Arch::Amd64,
goblin::elf::header::EM_AARCH64 => Arch::Arm64,
goblin::elf::header::EM_ARM => Arch::Arm,
goblin::elf::header::EM_PPC => Arch::Ppc,
goblin::elf::header::EM_PPC64 => Arch::Ppc64,
goblin::elf::header::EM_MIPS | goblin::elf::header::EM_MIPS_RS3_LE => {
if self.elf.header.e_flags & MIPS_64_FLAGS != 0 {
Arch::Mips64
} else {
Arch::Mips
}
}
_ => Arch::Unknown,
}
}
pub fn kind(&self) -> ObjectKind {
let kind = match self.elf.header.e_type {
goblin::elf::header::ET_NONE => ObjectKind::None,
goblin::elf::header::ET_REL => ObjectKind::Relocatable,
goblin::elf::header::ET_EXEC => ObjectKind::Executable,
goblin::elf::header::ET_DYN => ObjectKind::Library,
goblin::elf::header::ET_CORE => ObjectKind::Dump,
_ => ObjectKind::Other,
};
if kind == ObjectKind::Executable && self.elf.interpreter.is_none() {
return ObjectKind::Debug;
}
if kind == ObjectKind::Library && self.raw_section("text").is_none() {
return ObjectKind::Debug;
}
kind
}
pub fn load_address(&self) -> u64 {
for phdr in &self.elf.program_headers {
if phdr.p_type == elf::program_header::PT_LOAD {
return phdr.p_vaddr;
}
}
0
}
pub fn has_symbols(&self) -> bool {
!self.elf.syms.is_empty() || !self.elf.dynsyms.is_empty()
}
pub fn symbols(&self) -> ElfSymbolIterator<'data, '_> {
ElfSymbolIterator {
symbols: self.elf.syms.iter(),
strtab: &self.elf.strtab,
dynamic_symbols: self.elf.dynsyms.iter(),
dynamic_strtab: &self.elf.dynstrtab,
sections: &self.elf.section_headers,
load_addr: self.load_address(),
}
}
pub fn symbol_map(&self) -> SymbolMap<'data> {
self.symbols().collect()
}
pub fn has_debug_info(&self) -> bool {
self.has_section("debug_info")
}
pub fn debug_session(&self) -> Result<DwarfDebugSession<'data>, DwarfError> {
let symbols = self.symbol_map();
DwarfDebugSession::parse(self, symbols, self.load_address() as i64, self.kind())
}
pub fn has_unwind_info(&self) -> bool {
self.has_section("eh_frame") || self.has_section("debug_frame")
}
pub fn has_sources(&self) -> bool {
false
}
pub fn is_malformed(&self) -> bool {
self.is_malformed
}
pub fn data(&self) -> &'data [u8] {
self.data
}
fn decompress_section(&self, section_data: &[u8]) -> Option<Vec<u8>> {
let (size, compressed) = if section_data.starts_with(b"ZLIB") {
if section_data.len() < 12 {
return None;
}
let mut size_bytes = [0; 8];
size_bytes.copy_from_slice(§ion_data[4..12]);
(u64::from_be_bytes(size_bytes), §ion_data[12..])
} else {
let container = self.elf.header.container().ok()?;
let endianness = self.elf.header.endianness().ok()?;
let context = Ctx::new(container, endianness);
let compression = CompressionHeader::parse(section_data, 0, context).ok()?;
if compression.ch_type != ELFCOMPRESS_ZLIB {
return None;
}
let compressed = §ion_data[CompressionHeader::size(context)..];
(compression.ch_size, compressed)
};
let mut decompressed = Vec::with_capacity(size as usize);
Decompress::new(true)
.decompress_vec(compressed, &mut decompressed, FlushDecompress::Finish)
.ok()?;
Some(decompressed)
}
fn find_section(&self, name: &str) -> Option<(bool, DwarfSection<'data>)> {
for header in &self.elf.section_headers {
if header.sh_type == elf::section_header::SHT_NOBITS {
continue;
}
if let Some(section_name) = self.elf.shdr_strtab.get_at(header.sh_name) {
let offset = header.sh_offset as usize;
if offset == 0 {
continue;
}
if section_name.is_empty() {
continue;
}
let (compressed, section_name) = match section_name.strip_prefix(".z") {
Some(name) => (true, name),
None => (header.sh_flags & SHF_COMPRESSED != 0, §ion_name[1..]),
};
if section_name != name {
continue;
}
let size = header.sh_size as usize;
let data = &self.data[offset..][..size];
let section = DwarfSection {
data: Cow::Borrowed(data),
address: header.sh_addr,
offset: header.sh_offset,
align: header.sh_addralign,
};
return Some((compressed, section));
}
}
None
}
fn find_build_id(&self) -> Option<&'data [u8]> {
if let Some(mut notes) = self.elf.iter_note_headers(self.data) {
while let Some(Ok(note)) = notes.next() {
if note.n_type == elf::note::NT_GNU_BUILD_ID {
return Some(note.desc);
}
}
}
if let Some(mut notes) = self
.elf
.iter_note_sections(self.data, Some(".note.gnu.build-id"))
{
while let Some(Ok(note)) = notes.next() {
if note.n_type == elf::note::NT_GNU_BUILD_ID {
return Some(note.desc);
}
}
}
None
}
fn compute_debug_id(&self, identifier: &[u8]) -> DebugId {
let mut data = [0; UUID_SIZE];
let len = std::cmp::min(identifier.len(), UUID_SIZE);
data[0..len].copy_from_slice(&identifier[0..len]);
if self.elf.little_endian {
data[0..4].reverse(); data[4..6].reverse(); data[6..8].reverse(); }
Uuid::from_slice(&data)
.map(DebugId::from_uuid)
.unwrap_or_default()
}
}
impl fmt::Debug for ElfObject<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("ElfObject")
.field("code_id", &self.code_id())
.field("debug_id", &self.debug_id())
.field("arch", &self.arch())
.field("kind", &self.kind())
.field("load_address", &format_args!("{:#x}", self.load_address()))
.field("has_symbols", &self.has_symbols())
.field("has_debug_info", &self.has_debug_info())
.field("has_unwind_info", &self.has_unwind_info())
.field("is_malformed", &self.is_malformed())
.finish()
}
}
impl<'slf, 'data: 'slf> AsSelf<'slf> for ElfObject<'data> {
type Ref = ElfObject<'slf>;
fn as_self(&'slf self) -> &Self::Ref {
self
}
}
impl<'data> Parse<'data> for ElfObject<'data> {
type Error = ElfError;
fn test(data: &[u8]) -> bool {
Self::test(data)
}
fn parse(data: &'data [u8]) -> Result<Self, ElfError> {
Self::parse(data)
}
}
impl<'data: 'object, 'object> ObjectLike<'data, 'object> for ElfObject<'data> {
type Error = DwarfError;
type Session = DwarfDebugSession<'data>;
type SymbolIterator = ElfSymbolIterator<'data, 'object>;
fn file_format(&self) -> FileFormat {
self.file_format()
}
fn code_id(&self) -> Option<CodeId> {
self.code_id()
}
fn debug_id(&self) -> DebugId {
self.debug_id()
}
fn arch(&self) -> Arch {
self.arch()
}
fn kind(&self) -> ObjectKind {
self.kind()
}
fn load_address(&self) -> u64 {
self.load_address()
}
fn has_symbols(&self) -> bool {
self.has_symbols()
}
fn symbols(&'object self) -> Self::SymbolIterator {
self.symbols()
}
fn symbol_map(&self) -> SymbolMap<'data> {
self.symbol_map()
}
fn has_debug_info(&self) -> bool {
self.has_debug_info()
}
fn debug_session(&self) -> Result<Self::Session, Self::Error> {
self.debug_session()
}
fn has_unwind_info(&self) -> bool {
self.has_unwind_info()
}
fn has_sources(&self) -> bool {
self.has_sources()
}
fn is_malformed(&self) -> bool {
self.is_malformed()
}
}
impl<'data> Dwarf<'data> for ElfObject<'data> {
fn endianity(&self) -> Endian {
if self.elf.little_endian {
Endian::Little
} else {
Endian::Big
}
}
fn raw_section(&self, name: &str) -> Option<DwarfSection<'data>> {
let (_, section) = self.find_section(name)?;
Some(section)
}
fn section(&self, name: &str) -> Option<DwarfSection<'data>> {
let (compressed, mut section) = self.find_section(name)?;
if compressed {
let decompressed = self.decompress_section(§ion.data)?;
section.data = Cow::Owned(decompressed);
}
Some(section)
}
}
pub struct ElfSymbolIterator<'data, 'object> {
symbols: elf::sym::SymIterator<'data>,
strtab: &'object strtab::Strtab<'data>,
dynamic_symbols: elf::sym::SymIterator<'data>,
dynamic_strtab: &'object strtab::Strtab<'data>,
sections: &'object [elf::SectionHeader],
load_addr: u64,
}
impl<'data, 'object> Iterator for ElfSymbolIterator<'data, 'object> {
type Item = Symbol<'data>;
fn next(&mut self) -> Option<Self::Item> {
fn get_symbols<'data>(
symbols: &mut SymIterator,
strtab: &Strtab<'data>,
load_addr: u64,
sections: &[SectionHeader],
) -> Option<Symbol<'data>> {
for symbol in symbols {
if symbol.st_type() != elf::sym::STT_FUNC {
continue;
}
if symbol.st_value < load_addr {
continue;
}
let section = match symbol.st_shndx {
self::SHN_UNDEF => None,
index => sections.get(index),
};
if !section.map_or(false, |header| header.is_executable()) {
continue;
}
let name = strtab.get_at(symbol.st_name).map(Cow::Borrowed);
return Some(Symbol {
name,
address: symbol.st_value - load_addr,
size: symbol.st_size,
});
}
None
}
get_symbols(
&mut self.symbols,
self.strtab,
self.load_addr,
self.sections,
)
.or_else(|| {
get_symbols(
&mut self.dynamic_symbols,
self.dynamic_strtab,
self.load_addr,
self.sections,
)
})
}
}
#[derive(Debug)]
pub struct DebugLink<'data> {
filename: Cow<'data, CStr>,
crc: u32,
}
impl<'data> DebugLink<'data> {
pub fn from_data(
data: Cow<'data, [u8]>,
endianity: Endian,
) -> Result<Self, DebugLinkError<'data>> {
match data {
Cow::Owned(data) => {
let (filename, crc) = Self::from_borrowed_data(&data, endianity)
.map(|(filename, crc)| (filename.to_owned(), crc))
.map_err(|kind| DebugLinkError {
kind,
data: Cow::Owned(data),
})?;
Ok(Self {
filename: Cow::Owned(filename),
crc,
})
}
Cow::Borrowed(data) => {
let (filename, crc) =
Self::from_borrowed_data(data, endianity).map_err(|kind| DebugLinkError {
kind,
data: Cow::Borrowed(data),
})?;
Ok(Self {
filename: Cow::Borrowed(filename),
crc,
})
}
}
}
fn from_borrowed_data(
data: &[u8],
endianity: Endian,
) -> Result<(&CStr, u32), DebugLinkErrorKind> {
let nul_pos = data
.iter()
.position(|byte| *byte == 0)
.ok_or(DebugLinkErrorKind::MissingNul)?;
if nul_pos + 1 == data.len() {
return Err(DebugLinkErrorKind::MissingCrc {
filename_len_with_nul: nul_pos + 1,
});
}
let filename = &data[..nul_pos + 1];
let crc = data
.get(nul_pos + 1..)
.and_then(|crc| crc.get(crc.len() - 4..))
.ok_or(DebugLinkErrorKind::MissingCrc {
filename_len_with_nul: filename.len(),
})?;
let crc: [u8; 4] = crc.try_into().map_err(|_| DebugLinkErrorKind::MissingCrc {
filename_len_with_nul: filename.len(),
})?;
let crc = match endianity {
Endian::Little => u32::from_le_bytes(crc),
Endian::Big => u32::from_be_bytes(crc),
};
let filename =
CStr::from_bytes_with_nul(filename).map_err(|_| DebugLinkErrorKind::MissingNul)?;
Ok((filename, crc))
}
pub fn filename(&self) -> &CStr {
&self.filename
}
pub fn crc(&self) -> u32 {
self.crc
}
}
#[derive(Debug, Error)]
pub enum DebugLinkErrorKind {
#[error("missing NUL character")]
MissingNul,
#[error("missing CRC")]
MissingCrc {
filename_len_with_nul: usize,
},
}
#[derive(Debug, Error)]
#[error("could not parse debug link section")]
pub struct DebugLinkError<'data> {
#[source]
pub kind: DebugLinkErrorKind,
pub data: Cow<'data, [u8]>,
}