use std::borrow::Cow;
use std::default::Default;
use std::fs;
use std::ops::Deref;
mod dwarf;
use fnv::FnvHashMap as HashMap;
use gimli;
use memmap;
use object::{self, Object, ObjectSection, ObjectSegment, ObjectSymbol, ObjectSymbolTable};
use typed_arena::Arena;
use crate::cfi::Cfi;
use crate::function::{Function, FunctionDetails, FunctionOffset};
use crate::location::Register;
use crate::range::{Range, RangeList};
use crate::types::{Enumerator, Type, TypeOffset};
use crate::unit::Unit;
use crate::variable::Variable;
use crate::{Address, Result, Size};
pub(crate) enum DebugInfo<'input, Endian>
where
Endian: gimli::Endianity + 'input,
{
Dwarf(&'input dwarf::DwarfDebugInfo<'input, Endian>),
}
impl<'input, Endian> DebugInfo<'input, Endian>
where
Endian: gimli::Endianity + 'input,
{
fn get_type(&self, offset: TypeOffset) -> Option<Type<'input>> {
match self {
DebugInfo::Dwarf(dwarf) => dwarf.get_type(offset),
}
}
fn get_enumerators(&self, offset: TypeOffset) -> Vec<Enumerator<'input>> {
match self {
DebugInfo::Dwarf(dwarf) => dwarf.get_enumerators(offset),
}
}
fn get_function_details(
&self,
offset: FunctionOffset,
hash: &FileHash<'input>,
) -> Option<FunctionDetails<'input>> {
match self {
DebugInfo::Dwarf(dwarf) => dwarf.get_function_details(offset, hash),
}
}
fn get_cfi(&self, address: Address, size: Size) -> Vec<Cfi> {
match self {
DebugInfo::Dwarf(dwarf) => dwarf.get_cfi(address, size),
}
}
fn get_register_name(&self, machine: Architecture, register: Register) -> Option<&'static str> {
match self {
DebugInfo::Dwarf(dwarf) => dwarf.get_register_name(machine, register),
}
}
}
pub(crate) struct StringCache {
strings: Arena<String>,
}
impl StringCache {
fn new() -> Self {
StringCache {
strings: Arena::new(),
}
}
fn get<'input>(&'input self, bytes: &'input [u8]) -> &'input str {
match String::from_utf8_lossy(bytes) {
Cow::Borrowed(s) => s,
Cow::Owned(s) => &*self.strings.alloc(s),
}
}
}
pub use object::Architecture;
pub struct File<'input> {
pub(crate) path: &'input str,
pub(crate) machine: Architecture,
pub(crate) segments: Vec<Segment<'input>>,
pub(crate) sections: Vec<Section<'input>>,
pub(crate) symbols: Vec<Symbol<'input>>,
pub(crate) relocations: Vec<Relocation<'input>>,
pub(crate) units: Vec<Unit<'input>>,
debug_info: DebugInfo<'input, gimli::RunTimeEndian>,
}
impl<'input> File<'input> {
pub(crate) fn get_type(&self, offset: TypeOffset) -> Option<Type<'input>> {
self.debug_info.get_type(offset)
}
pub(crate) fn get_enumerators(&self, offset: TypeOffset) -> Vec<Enumerator<'input>> {
self.debug_info.get_enumerators(offset)
}
pub(crate) fn get_function_details(
&self,
offset: FunctionOffset,
hash: &FileHash<'input>,
) -> FunctionDetails<'input> {
self.debug_info
.get_function_details(offset, hash)
.unwrap_or_default()
}
pub(crate) fn get_cfi(&self, address: Address, size: Size) -> Vec<Cfi> {
self.debug_info.get_cfi(address, size)
}
pub(crate) fn get_register_name(&self, register: Register) -> Option<&'static str> {
self.debug_info.get_register_name(self.machine, register)
}
pub fn parse<Cb>(path: &str, cb: Cb) -> Result<()>
where
Cb: FnOnce(&File) -> Result<()>,
{
let handle = match fs::File::open(path) {
Ok(handle) => handle,
Err(e) => {
return Err(format!("open failed: {}", e).into());
}
};
let map = match unsafe { memmap::Mmap::map(&handle) } {
Ok(map) => map,
Err(e) => {
return Err(format!("memmap failed: {}", e).into());
}
};
let object = object::File::parse(&*map)?;
File::parse_object(&object, &object, path, cb)
}
fn parse_object<Cb>(
object: &object::File,
debug_object: &object::File,
path: &str,
cb: Cb,
) -> Result<()>
where
Cb: FnOnce(&File) -> Result<()>,
{
let machine = object.architecture();
let mut segments = Vec::new();
for segment in object.segments() {
if let Ok(bytes) = segment.data() {
segments.push(Segment {
address: segment.address(),
bytes,
});
}
}
let mut sections = Vec::new();
for section in object.sections() {
let name = Some(section.name()?).map(|x| Cow::Owned(x.to_string()));
let segment = section.segment_name()?.map(|x| Cow::Owned(x.to_string()));
let address = if section.address() != 0 {
Some(section.address())
} else {
None
};
let size = section.size();
if size != 0 {
sections.push(Section {
name,
segment,
address,
size,
});
}
}
let mut symbols = Vec::new();
for symbol in object.symbols() {
let address = symbol.address();
if address == 0 {
continue;
}
let size = symbol.size();
if size == 0 {
continue;
}
let kind = match symbol.kind() {
object::SymbolKind::Text => SymbolKind::Function,
object::SymbolKind::Data | object::SymbolKind::Unknown => SymbolKind::Variable,
_ => continue,
};
let name = Some(symbol.name()?);
symbols.push(Symbol {
name,
kind,
address,
size,
});
}
let mut relocations = Vec::new();
if let (Some(dynamic_symbols), Some(dynamic_relocations)) =
(object.dynamic_symbol_table(), object.dynamic_relocations())
{
for (address, relocation) in dynamic_relocations {
let size = relocation.size();
match relocation.target() {
object::RelocationTarget::Symbol(index) => {
if let Ok(symbol) = dynamic_symbols.symbol_by_index(index) {
relocations.push(Relocation {
address,
size,
symbol: symbol.name()?,
});
}
}
_ => {}
}
}
}
let endian = if debug_object.is_little_endian() {
gimli::RunTimeEndian::Little
} else {
gimli::RunTimeEndian::Big
};
let strings = &StringCache::new();
dwarf::parse(endian, debug_object, strings, |units, debug_info| {
let mut file = File {
path,
machine,
segments,
sections,
symbols,
relocations,
units,
debug_info,
};
file.normalize();
cb(&file)
})
}
fn normalize(&mut self) {
self.symbols.sort_by(|a, b| a.address.cmp(&b.address));
let mut used_symbols = vec![false; self.symbols.len()];
for unit in &mut self.units {
for function in &mut unit.functions {
if let Some(address) = function.address() {
if let Some(symbol) = Self::get_symbol(
&*self.symbols,
&mut used_symbols,
address,
function.linkage_name().or_else(|| function.name()),
) {
function.symbol_name = symbol.name;
}
}
}
for variable in &mut unit.variables {
if let Some(address) = variable.address() {
if let Some(symbol) = Self::get_symbol(
&*self.symbols,
&mut used_symbols,
address,
variable.linkage_name().or_else(|| variable.name()),
) {
variable.symbol_name = symbol.name;
}
}
}
}
let mut unit = Unit::default();
unit.name = Some(Cow::Borrowed("<symtab>"));
for (symbol, used) in self.symbols.iter().zip(used_symbols.iter()) {
if *used {
continue;
}
unit.ranges.push(Range {
begin: symbol.address,
end: symbol.address + symbol.size,
});
match symbol.kind() {
SymbolKind::Variable => {
unit.variables.push(Variable {
name: symbol.name,
linkage_name: symbol.name,
address: Address::new(symbol.address),
size: Size::new(symbol.size),
..Default::default()
});
}
SymbolKind::Function => {
unit.functions.push(Function {
name: symbol.name,
linkage_name: symbol.name,
address: Address::new(symbol.address),
size: Size::new(symbol.size),
..Default::default()
});
}
}
}
unit.ranges.sort();
self.units.push(unit);
let mut unit = Unit::default();
unit.name = Some(Cow::Borrowed("<unknown>"));
unit.ranges = self.unknown_ranges();
self.units.push(unit);
}
fn get_symbol<'sym>(
symbols: &'sym [Symbol<'input>],
used_symbols: &mut [bool],
address: u64,
name: Option<&str>,
) -> Option<&'sym Symbol<'input>> {
if let Ok(mut index) = symbols.binary_search_by(|x| x.address.cmp(&address)) {
while index > 0 && symbols[index - 1].address == address {
index -= 1;
}
let mut found = false;
for (symbol, used_symbol) in (&symbols[index..])
.iter()
.zip((&mut used_symbols[index..]).iter_mut())
{
if symbol.address != address {
break;
}
*used_symbol = true;
if symbol.name() == name {
found = true;
}
}
if found {
None
} else {
Some(&symbols[index])
}
} else {
None
}
}
#[inline]
pub fn path(&self) -> &'input str {
self.path
}
#[inline]
pub fn machine(&self) -> Architecture {
self.machine
}
pub fn segment_bytes(&self, range: Range) -> Option<&'input [u8]> {
for segment in &self.segments {
if range.begin >= segment.address
&& range.end <= segment.address + segment.bytes.len() as u64
{
let begin = (range.begin - segment.address) as usize;
let len = (range.end - range.begin) as usize;
return Some(&segment.bytes[begin..][..len]);
}
}
None
}
#[inline]
pub fn segments(&self) -> &[Segment<'input>] {
&self.segments
}
#[inline]
pub fn sections(&self) -> &[Section<'input>] {
&self.sections
}
#[inline]
pub fn relocations(&self) -> &[Relocation<'input>] {
&self.relocations
}
#[inline]
pub fn units(&self) -> &[Unit<'input>] {
&self.units
}
pub fn ranges(&self, hash: &FileHash) -> RangeList {
let mut ranges = RangeList::default();
for unit in &self.units {
for range in unit.ranges(hash).list() {
ranges.push(*range);
}
for range in unit.unknown_ranges(hash).list() {
ranges.push(*range);
}
}
ranges.sort();
ranges
}
fn unknown_ranges(&self) -> RangeList {
let hash = FileHash::new(self);
let unit_ranges = self.ranges(&hash);
let mut ranges = RangeList::default();
for section in &self.sections {
if let Some(range) = section.address() {
ranges.push(range);
}
}
ranges.sort();
ranges.subtract(&unit_ranges)
}
pub fn function_size(&self) -> u64 {
let mut size = 0;
for unit in &self.units {
size += unit.function_size();
}
size
}
pub fn variable_size(&self, hash: &FileHash) -> u64 {
let mut size = 0;
for unit in &self.units {
size += unit.variable_size(hash);
}
size
}
}
pub struct FileHash<'input> {
pub file: &'input File<'input>,
pub functions_by_address: HashMap<u64, &'input Function<'input>>,
pub functions_by_offset: HashMap<FunctionOffset, &'input Function<'input>>,
pub types: HashMap<TypeOffset, &'input Type<'input>>,
pub(crate) void: Type<'input>,
}
impl<'input> FileHash<'input> {
pub fn new(file: &'input File<'input>) -> Self {
FileHash {
file,
functions_by_address: FileHash::functions_by_address(file),
functions_by_offset: FileHash::functions_by_offset(file),
types: FileHash::types(file),
void: Type::void(),
}
}
fn functions_by_address<'a>(file: &'a File<'input>) -> HashMap<u64, &'a Function<'input>> {
let mut functions = HashMap::default();
for unit in &file.units {
for function in &unit.functions {
if let Some(address) = function.address() {
functions.insert(address, function);
}
}
}
functions
}
fn functions_by_offset<'a>(
file: &'a File<'input>,
) -> HashMap<FunctionOffset, &'a Function<'input>> {
let mut functions = HashMap::default();
for unit in &file.units {
for function in &unit.functions {
functions.insert(function.offset, function);
}
}
functions
}
fn types<'a>(file: &'a File<'input>) -> HashMap<TypeOffset, &'a Type<'input>> {
let mut types = HashMap::default();
for unit in &file.units {
for ty in &unit.types {
types.insert(ty.offset, ty);
}
}
types
}
}
#[derive(Debug)]
pub struct Segment<'input> {
pub address: u64,
pub bytes: &'input [u8],
}
#[derive(Debug)]
pub struct Section<'input> {
pub(crate) name: Option<Cow<'input, str>>,
pub(crate) segment: Option<Cow<'input, str>>,
pub(crate) address: Option<u64>,
pub(crate) size: u64,
}
impl<'input> Section<'input> {
pub fn name(&self) -> Option<&str> {
self.name.as_ref().map(Cow::deref)
}
pub fn segment(&self) -> Option<&str> {
self.segment.as_ref().map(Cow::deref)
}
pub fn address(&self) -> Option<Range> {
self.address.map(|address| Range {
begin: address,
end: address + self.size,
})
}
#[inline]
pub fn size(&self) -> u64 {
self.size
}
}
#[derive(Debug, Clone, Copy)]
pub enum SymbolKind {
Variable,
Function,
}
#[derive(Debug, Clone)]
pub struct Symbol<'input> {
pub(crate) name: Option<&'input str>,
pub(crate) kind: SymbolKind,
pub(crate) address: u64,
pub(crate) size: u64,
}
impl<'input> Symbol<'input> {
#[inline]
pub fn name(&self) -> Option<&str> {
self.name
}
#[inline]
pub fn kind(&self) -> SymbolKind {
self.kind
}
#[inline]
pub fn address(&self) -> Range {
Range {
begin: self.address,
end: self.address + self.size,
}
}
#[inline]
pub fn size(&self) -> u64 {
self.size
}
}
#[derive(Debug, Clone)]
pub struct Relocation<'input> {
pub(crate) address: u64,
pub(crate) size: u8,
pub(crate) symbol: &'input str,
}
impl<'input> Relocation<'input> {
#[inline]
pub fn address(&self) -> u64 {
self.address
}
#[inline]
pub fn size(&self) -> u8 {
self.size
}
#[inline]
pub fn symbol(&self) -> &'input str {
self.symbol
}
}