use crate::binary_format::BinaryRef;
use crate::string_tables::StringTables;
use gimli::{
AttributeValue, DebuggingInformationEntry, Dwarf, EndianSlice, Reader, RunTimeEndian,
SectionId, Unit,
};
use rayon::prelude::*;
use rustc_demangle::demangle;
use std::borrow::Cow;
use std::collections::HashMap;
type DwarfReader<'a> = EndianSlice<'a, RunTimeEndian>;
type DwarfFunctionResult = (Vec<FunctionInfo>, Vec<FunctionInfo>, StringTables);
type SpecificationResult = (Option<String>, Option<String>, Option<u32>);
#[derive(Debug, Clone, Copy)]
pub struct FunctionInfo {
pub start_address: u64,
pub end_address: u64,
pub name_idx: u32,
pub file_idx: u32,
pub line: u32,
}
const INLINED_BUCKET_SHIFT: u32 = 12;
#[derive(Debug)]
pub struct FunctionIndex {
functions: Vec<FunctionInfo>,
inlined: Vec<FunctionInfo>,
inlined_buckets: HashMap<u64, Vec<usize>>,
strings: StringTables,
}
impl FunctionIndex {
#[inline]
fn bucket_id(addr: u64) -> u64 {
addr >> INLINED_BUCKET_SHIFT
}
pub fn new(mut functions: Vec<FunctionInfo>, strings: StringTables) -> Self {
functions.sort_by_key(|f| f.start_address);
Self {
functions,
inlined: Vec::new(),
inlined_buckets: HashMap::new(),
strings,
}
}
pub fn new_with_inlined(
mut functions: Vec<FunctionInfo>,
inlined: Vec<FunctionInfo>,
strings: StringTables,
) -> Self {
functions.sort_by_key(|f| f.start_address);
let mut inlined_buckets: HashMap<u64, Vec<usize>> = HashMap::new();
for (idx, func) in inlined.iter().enumerate() {
let start_bucket = Self::bucket_id(func.start_address);
let end_bucket = Self::bucket_id(func.end_address.saturating_sub(1));
for bucket in start_bucket..=end_bucket {
inlined_buckets.entry(bucket).or_default().push(idx);
}
}
Self {
functions,
inlined,
inlined_buckets,
strings,
}
}
#[inline]
pub fn get_name(&self, func: &FunctionInfo) -> &str {
self.strings.get_name(func.name_idx)
}
#[inline]
pub fn get_file(&self, func: &FunctionInfo) -> Option<&str> {
self.strings.get_file(func.file_idx)
}
#[inline]
pub fn get_line(&self, func: &FunctionInfo) -> Option<u32> {
if func.line == 0 {
None
} else {
Some(func.line)
}
}
pub fn strings(&self) -> &StringTables {
&self.strings
}
#[inline]
pub fn find_containing(&self, addr: u64) -> Option<&FunctionInfo> {
if self.functions.is_empty() {
return None;
}
let idx = match self
.functions
.binary_search_by_key(&addr, |f| f.start_address)
{
Ok(i) => i, Err(0) => return None, Err(i) => i - 1, };
let func = &self.functions[idx];
if addr >= func.start_address && addr < func.end_address {
Some(func)
} else {
None
}
}
#[inline]
pub fn find_function_name(&self, addr: u64) -> Option<&str> {
if let Some(inlined) = self.find_crate_inlined(addr) {
return Some(self.strings.get_name(inlined.name_idx));
}
self.find_containing(addr)
.map(|f| self.strings.get_name(f.name_idx))
}
fn find_crate_inlined(&self, addr: u64) -> Option<&FunctionInfo> {
let bucket = Self::bucket_id(addr);
let indices = self.inlined_buckets.get(&bucket)?;
let mut best: Option<&FunctionInfo> = None;
let mut best_size: u64 = u64::MAX;
for &idx in indices {
let func = &self.inlined[idx];
if addr >= func.start_address && addr < func.end_address {
let name = self.strings.get_name(func.name_idx);
let is_stdlib = name.starts_with("core::")
|| name.starts_with("std::")
|| name.starts_with("alloc::");
if is_stdlib {
continue;
}
let size = func.end_address - func.start_address;
if size < best_size {
best = Some(func);
best_size = size;
}
}
}
best
}
pub fn get_inlined_call_site(&self, addr: u64) -> Option<(&str, u32, &str)> {
let bucket = Self::bucket_id(addr);
let indices = self.inlined_buckets.get(&bucket)?;
let mut best: Option<&FunctionInfo> = None;
let mut best_size: u64 = 0;
for &idx in indices {
let func = &self.inlined[idx];
if addr >= func.start_address && addr < func.end_address {
let name = self.strings.get_name(func.name_idx);
let is_stdlib = name.starts_with("core::")
|| name.starts_with("std::")
|| name.starts_with("alloc::");
if !is_stdlib {
continue;
}
let size = func.end_address - func.start_address;
if size > best_size {
best = Some(func);
best_size = size;
}
}
}
let func = best?;
let file = self.strings.get_file(func.file_idx)?;
if func.line == 0 {
return None;
}
let name = self.strings.get_name(func.name_idx);
Some((file, func.line, name))
}
pub fn functions(&self) -> &[FunctionInfo] {
&self.functions
}
}
struct ParsedFunctionInfo {
name: String,
start_address: u64,
end_address: u64,
file: Option<String>,
line: Option<u32>,
}
#[allow(dead_code)] pub(crate) fn load_dwarf_sections_with_relocations_elf<'a>(
elf: &goblin::elf::Elf,
buffer: &'a [u8],
endian: RunTimeEndian,
) -> Result<
Dwarf<gimli::RelocateReader<DwarfReader<'a>, crate::elf_relocations::RelocationMap>>,
gimli::Error,
> {
use gimli::RelocateReader;
let debug_line_relocs =
crate::elf_relocations::RelocationMap::parse_from_elf(elf, buffer, ".rela.debug_line");
let find_section = |name: &str| -> Option<&'a [u8]> {
let section_name = if name.starts_with('.') {
Cow::Borrowed(name)
} else {
Cow::Owned(format!(".debug_{}", name))
};
for sh in &elf.section_headers {
if let Some(sh_name) = elf.shdr_strtab.get_at(sh.sh_name) {
if sh_name == section_name.as_ref() {
let offset = sh.sh_offset as usize;
let size = sh.sh_size as usize;
return buffer.get(offset..offset + size);
}
}
}
None
};
let load_section = |id: SectionId| -> Result<
RelocateReader<DwarfReader<'a>, crate::elf_relocations::RelocationMap>,
gimli::Error,
> {
let data = find_section(id.name()).unwrap_or(&[]);
let slice = EndianSlice::new(data, endian);
if id == SectionId::DebugLine {
Ok(RelocateReader::new(slice, debug_line_relocs.clone()))
} else {
Ok(RelocateReader::new(
slice,
crate::elf_relocations::RelocationMap::empty(),
))
}
};
Dwarf::load(&load_section)
}
pub(crate) fn load_dwarf_sections<'a>(
binary: &BinaryRef<'a>,
buffer: &'a [u8],
) -> Result<Dwarf<DwarfReader<'a>>, gimli::Error> {
let endian = match binary {
BinaryRef::MachO(macho) => {
if macho.little_endian {
RunTimeEndian::Little
} else {
RunTimeEndian::Big
}
}
BinaryRef::Elf(elf) => {
if elf.little_endian {
RunTimeEndian::Little
} else {
RunTimeEndian::Big
}
}
};
let find_section = |name: &str| -> Option<&'a [u8]> {
let section_name = binary.dwarf_section_name(name);
binary
.find_section(buffer, §ion_name)
.map(|(_, data)| data)
};
let load_section = |id: SectionId| -> Result<DwarfReader<'a>, gimli::Error> {
let data = find_section(id.name()).unwrap_or(&[]);
Ok(EndianSlice::new(data, endian))
};
Dwarf::load(&load_section)
}
pub fn get_functions_from_dwarf<'a>(
binary: &BinaryRef<'a>,
buffer: &'a [u8],
project_root: &str,
) -> Result<DwarfFunctionResult, Box<dyn std::error::Error>> {
let dwarf = load_dwarf_sections(binary, buffer)?;
let mut headers = Vec::new();
let mut units_iter = dwarf.units();
while let Some(header) = units_iter.next()? {
headers.push(header);
}
let results: Vec<_> = headers
.into_par_iter()
.filter_map(|header| {
let unit = dwarf.unit(header).ok()?;
let mut funcs = Vec::new();
let mut inl = Vec::new();
let mut entries = unit.entries();
while let Some((_, entry)) = entries.next_dfs().ok()? {
match entry.tag() {
gimli::DW_TAG_subprogram => {
if let Ok(Some(func)) =
parse_function_die(&dwarf, &unit, entry, project_root)
{
funcs.push(func);
}
}
gimli::DW_TAG_inlined_subroutine => {
if let Ok(Some(func)) =
parse_inlined_subroutine(&dwarf, &unit, entry, project_root)
{
inl.push(func);
}
}
_ => {}
}
}
Some((funcs, inl))
})
.collect();
let mut strings = StringTables::new();
let mut functions = Vec::new();
let mut inlined = Vec::new();
for (funcs, inl) in results {
for parsed in funcs {
functions.push(FunctionInfo {
start_address: parsed.start_address,
end_address: parsed.end_address,
name_idx: strings.intern_name(parsed.name),
file_idx: strings.intern_file(parsed.file),
line: parsed.line.unwrap_or(0),
});
}
for parsed in inl {
inlined.push(FunctionInfo {
start_address: parsed.start_address,
end_address: parsed.end_address,
name_idx: strings.intern_name(parsed.name),
file_idx: strings.intern_file(parsed.file),
line: parsed.line.unwrap_or(0),
});
}
}
Ok((functions, inlined, strings))
}
fn parse_function_die<R: Reader>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
project_root: &str,
) -> Result<Option<ParsedFunctionInfo>, gimli::Error> {
let mut name: Option<String> = None;
let mut has_linkage_name = false;
let mut low_pc: Option<u64> = None;
let mut high_pc: Option<u64> = None;
let mut high_pc_is_offset = false;
let mut file: Option<String> = None;
let mut line: Option<u32> = None;
let mut specification: Option<gimli::UnitOffset<R::Offset>> = None;
let mut attrs = entry.attrs();
while let Some(attr) = attrs.next()? {
match attr.name() {
gimli::DW_AT_name => {
if !has_linkage_name {
if let Ok(s) = dwarf.attr_string(unit, attr.value()) {
name = Some(s.to_string_lossy()?.into_owned());
}
}
}
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
if let Ok(s) = dwarf.attr_string(unit, attr.value()) {
let mangled = s.to_string_lossy()?.into_owned();
let stripped = mangled.strip_prefix('_').unwrap_or(&mangled);
name = Some(format!("{:#}", demangle(stripped)));
has_linkage_name = true;
}
}
gimli::DW_AT_low_pc => {
if let AttributeValue::Addr(addr) = attr.value() {
low_pc = Some(addr);
}
}
gimli::DW_AT_high_pc => match attr.value() {
AttributeValue::Addr(addr) => {
high_pc = Some(addr);
}
AttributeValue::Udata(offset) => {
high_pc = Some(offset);
high_pc_is_offset = true;
}
_ => {}
},
gimli::DW_AT_decl_file => {
if let AttributeValue::FileIndex(idx) = attr.value() {
file = resolve_decl_file(dwarf, unit, idx, project_root)?;
}
}
gimli::DW_AT_decl_line => {
if let AttributeValue::Udata(l) = attr.value() {
line = Some(l as u32);
}
}
gimli::DW_AT_specification => {
if let AttributeValue::UnitRef(offset) = attr.value() {
specification = Some(offset);
}
}
_ => {}
}
}
if let Some(spec_offset) = specification {
if name.is_none() || file.is_none() || line.is_none() {
let (spec_name, spec_file, spec_line) =
resolve_specification(dwarf, unit, spec_offset, project_root)?;
if name.is_none() {
name = spec_name;
}
if file.is_none() {
file = spec_file;
}
if line.is_none() {
line = spec_line;
}
}
}
let high_pc = match (low_pc, high_pc, high_pc_is_offset) {
(Some(low), Some(high), true) => Some(low + high),
(_, high, false) => high,
_ => None,
};
match (name, low_pc, high_pc) {
(Some(name), Some(low_pc), Some(high_pc)) => Ok(Some(ParsedFunctionInfo {
name,
start_address: low_pc,
end_address: high_pc,
file,
line,
})),
_ => Ok(None),
}
}
fn parse_inlined_subroutine<R: Reader<Offset = usize>>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
project_root: &str,
) -> Result<Option<ParsedFunctionInfo>, gimli::Error> {
let mut abstract_origin: Option<gimli::UnitOffset<usize>> = None;
let mut low_pc: Option<u64> = None;
let mut high_pc: Option<u64> = None;
let mut high_pc_is_offset = false;
let mut ranges_attr: Option<AttributeValue<R>> = None;
let mut call_file: Option<String> = None;
let mut call_line: Option<u32> = None;
let mut attrs = entry.attrs();
while let Some(attr) = attrs.next()? {
match attr.name() {
gimli::DW_AT_abstract_origin => {
match attr.value() {
AttributeValue::UnitRef(offset) => {
abstract_origin = Some(offset);
}
AttributeValue::DebugInfoRef(debug_info_offset) => {
if let Some(unit_offset) = debug_info_offset.to_unit_offset(&unit.header) {
abstract_origin = Some(unit_offset);
}
}
_ => {}
}
}
gimli::DW_AT_low_pc => {
if let AttributeValue::Addr(addr) = attr.value() {
low_pc = Some(addr);
}
}
gimli::DW_AT_high_pc => match attr.value() {
AttributeValue::Addr(addr) => {
high_pc = Some(addr);
}
AttributeValue::Udata(offset) => {
high_pc = Some(offset);
high_pc_is_offset = true;
}
_ => {}
},
gimli::DW_AT_ranges => {
ranges_attr = Some(attr.value());
}
gimli::DW_AT_call_file => {
let file_idx = match attr.value() {
AttributeValue::FileIndex(idx) => Some(idx),
AttributeValue::Udata(idx) => Some(idx),
_ => None,
};
if let Some(idx) = file_idx {
call_file = resolve_decl_file(dwarf, unit, idx, project_root).unwrap_or(None);
}
}
gimli::DW_AT_call_line => {
if let AttributeValue::Udata(l) = attr.value() {
call_line = Some(l as u32);
}
}
_ => {}
}
}
let high_pc = match (low_pc, high_pc, high_pc_is_offset) {
(Some(low), Some(high), true) => Some(low + high),
(_, high, false) => high,
_ => None,
};
let (final_low_pc, final_high_pc) = if low_pc.is_some() && high_pc.is_some() {
(low_pc, high_pc)
} else if let Some(ranges_value) = ranges_attr {
if let Ok(Some(mut ranges)) = dwarf.attr_ranges(unit, ranges_value) {
if let Ok(Some(range)) = ranges.next() {
(Some(range.begin), Some(range.end))
} else {
(None, None)
}
} else {
(None, None)
}
} else {
(None, None)
};
let name = if let Some(offset) = abstract_origin {
resolve_abstract_origin_name(dwarf, unit, offset)?
} else {
None
};
match (name, final_low_pc, final_high_pc) {
(Some(name), Some(low_pc), Some(high_pc)) => Ok(Some(ParsedFunctionInfo {
name,
start_address: low_pc,
end_address: high_pc,
file: call_file,
line: call_line,
})),
_ => Ok(None),
}
}
fn resolve_abstract_origin_name<R: Reader>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
offset: gimli::UnitOffset<R::Offset>,
) -> Result<Option<String>, gimli::Error> {
let entry = unit.entry(offset)?;
let mut name: Option<String> = None;
let mut specification: Option<gimli::UnitOffset<R::Offset>> = None;
let mut attrs = entry.attrs();
while let Some(attr) = attrs.next()? {
match attr.name() {
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
if let Ok(s) = dwarf.attr_string(unit, attr.value()) {
let mangled = s.to_string_lossy()?.into_owned();
let stripped = mangled.strip_prefix('_').unwrap_or(&mangled);
name = Some(format!("{:#}", demangle(stripped)));
}
}
gimli::DW_AT_name => {
if name.is_none() {
if let Ok(s) = dwarf.attr_string(unit, attr.value()) {
name = Some(s.to_string_lossy()?.into_owned());
}
}
}
gimli::DW_AT_specification => {
if let AttributeValue::UnitRef(spec_offset) = attr.value() {
specification = Some(spec_offset);
}
}
_ => {}
}
}
if name.is_none() {
if let Some(spec_offset) = specification {
return resolve_abstract_origin_name(dwarf, unit, spec_offset);
}
}
Ok(name)
}
pub(crate) fn resolve_line_file_path<R: Reader>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
file_entry: &gimli::FileEntry<R, R::Offset>,
header: &gimli::LineProgramHeader<R, R::Offset>,
project_root: &str,
) -> Result<String, gimli::Error> {
let file_name = dwarf
.attr_string(unit, file_entry.path_name())?
.to_string_lossy()?
.into_owned();
let full_path = if let Some(dir) = file_entry.directory(header) {
let dir_str = dwarf
.attr_string(unit, dir)?
.to_string_lossy()?
.into_owned();
if dir_str.is_empty() {
file_name
} else {
format!("{dir_str}/{file_name}")
}
} else {
file_name
};
if !full_path.starts_with('/') {
if let Some(comp_dir) = &unit.comp_dir {
let comp_dir_str = comp_dir.to_string_lossy()?;
if !comp_dir_str.is_empty() && is_valid_directory_path(&comp_dir_str) {
return Ok(format!("{comp_dir_str}/{full_path}"));
}
}
return Ok(format!("{}/{}", project_root, full_path));
}
Ok(full_path)
}
fn is_valid_directory_path(path: &str) -> bool {
if path.starts_with('/') || path.starts_with('.') || path.starts_with('~') {
return true;
}
if path.contains("LLVM")
|| path.contains("clang")
|| path.contains("rustc")
|| path.contains('(')
|| path.contains(')')
{
return false;
}
true
}
fn resolve_decl_file<R: Reader>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
file_idx: u64,
project_root: &str,
) -> Result<Option<String>, gimli::Error> {
let Some(line_program) = &unit.line_program else {
return Ok(None);
};
let Some(file_entry) = line_program.header().file(file_idx) else {
return Ok(None);
};
Ok(Some(resolve_line_file_path(
dwarf,
unit,
file_entry,
line_program.header(),
project_root,
)?))
}
fn resolve_specification<R: Reader>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
offset: gimli::UnitOffset<R::Offset>,
project_root: &str,
) -> Result<SpecificationResult, gimli::Error> {
let entry = unit.entry(offset)?;
let mut name: Option<String> = None;
let mut file: Option<String> = None;
let mut line: Option<u32> = None;
let mut attrs = entry.attrs();
while let Some(attr) = attrs.next()? {
match attr.name() {
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
if let Ok(s) = dwarf.attr_string(unit, attr.value()) {
let mangled = s.to_string_lossy()?.into_owned();
let stripped = mangled.strip_prefix('_').unwrap_or(&mangled);
name = Some(format!("{:#}", demangle(stripped)));
}
}
gimli::DW_AT_name => {
if name.is_none() {
if let Ok(s) = dwarf.attr_string(unit, attr.value()) {
name = Some(s.to_string_lossy()?.into_owned());
}
}
}
gimli::DW_AT_decl_file => {
if let AttributeValue::FileIndex(idx) = attr.value() {
file = resolve_decl_file(dwarf, unit, idx, project_root)?;
}
}
gimli::DW_AT_decl_line => {
if let AttributeValue::Udata(l) = attr.value() {
line = Some(l as u32);
}
}
_ => {}
}
}
Ok((name, file, line))
}