use std::borrow::Cow;
use std::collections::btree_map::BTreeMap;
use std::error::Error;
use std::fmt;
use std::io::Cursor;
use std::sync::Arc;
use elsa::FrozenMap;
use parking_lot::RwLock;
use pdb_addr2line::pdb::{
AddressMap, FallibleIterator, ImageSectionHeader, InlineSiteSymbol, LineProgram, MachineType,
Module, ModuleInfo, PdbInternalSectionOffset, ProcedureSymbol, RawString, SeparatedCodeSymbol,
SymbolData, TypeIndex,
};
use pdb_addr2line::ModuleProvider;
use smallvec::SmallVec;
use thiserror::Error;
use symbolic_common::{
Arch, AsSelf, CodeId, CpuFamily, DebugId, Language, Name, NameMangling, SelfCell, Uuid,
};
use crate::base::*;
use crate::function_stack::FunctionStack;
use crate::Parse;
type Pdb<'data> = pdb::PDB<'data, Cursor<&'data [u8]>>;
const MAGIC_BIG: &[u8] = b"Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53\x00\x00\x00";
#[doc(hidden)]
pub use pdb_addr2line::pdb;
#[non_exhaustive]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PdbErrorKind {
BadObject,
UnexpectedInline,
FormattingFailed,
}
impl fmt::Display for PdbErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::BadObject => write!(f, "invalid pdb file"),
Self::UnexpectedInline => write!(f, "unexpected inline function without parent"),
Self::FormattingFailed => write!(f, "failed to format type name"),
}
}
}
#[derive(Debug, Error)]
#[error("{kind}")]
pub struct PdbError {
kind: PdbErrorKind,
#[source]
source: Option<Box<dyn Error + Send + Sync + 'static>>,
}
impl PdbError {
fn new<E>(kind: PdbErrorKind, source: E) -> Self
where
E: Into<Box<dyn Error + Send + Sync>>,
{
let source = Some(source.into());
Self { kind, source }
}
pub fn kind(&self) -> PdbErrorKind {
self.kind
}
}
impl From<PdbErrorKind> for PdbError {
fn from(kind: PdbErrorKind) -> Self {
Self { kind, source: None }
}
}
impl From<pdb::Error> for PdbError {
fn from(e: pdb::Error) -> Self {
Self::new(PdbErrorKind::BadObject, e)
}
}
impl From<fmt::Error> for PdbError {
fn from(e: fmt::Error) -> Self {
Self::new(PdbErrorKind::FormattingFailed, e)
}
}
impl From<pdb_addr2line::Error> for PdbError {
fn from(e: pdb_addr2line::Error) -> Self {
match e {
pdb_addr2line::Error::PdbError(e) => Self::new(PdbErrorKind::BadObject, e),
pdb_addr2line::Error::FormatError(e) => Self::new(PdbErrorKind::FormattingFailed, e),
e => Self::new(PdbErrorKind::FormattingFailed, e),
}
}
}
pub struct PdbObject<'data> {
pdb: Arc<RwLock<Pdb<'data>>>,
debug_info: Arc<pdb::DebugInformation<'data>>,
pdb_info: pdb::PDBInformation<'data>,
public_syms: pdb::SymbolTable<'data>,
executable_sections: ExecutableSections,
data: &'data [u8],
}
unsafe impl Send for PdbObject<'_> {}
unsafe impl Sync for PdbObject<'_> {}
impl<'data> PdbObject<'data> {
pub fn test(data: &[u8]) -> bool {
data.starts_with(MAGIC_BIG)
}
pub fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
let mut pdb = Pdb::open(Cursor::new(data))?;
let dbi = pdb.debug_information()?;
let pdbi = pdb.pdb_information()?;
let pubi = pdb.global_symbols()?;
let sections = pdb.sections()?;
Ok(PdbObject {
pdb: Arc::new(RwLock::new(pdb)),
debug_info: Arc::new(dbi),
pdb_info: pdbi,
public_syms: pubi,
data,
executable_sections: ExecutableSections::from_sections(§ions),
})
}
pub fn file_format(&self) -> FileFormat {
FileFormat::Pdb
}
pub fn code_id(&self) -> Option<CodeId> {
None
}
pub fn debug_id(&self) -> DebugId {
let age = self.debug_info.age().unwrap_or(self.pdb_info.age);
match Uuid::from_slice(&self.pdb_info.guid.as_bytes()[..]) {
Ok(uuid) => DebugId::from_parts(uuid, age),
Err(_) => DebugId::default(),
}
}
pub fn arch(&self) -> Arch {
self.debug_info
.machine_type()
.ok()
.map(arch_from_machine)
.unwrap_or_default()
}
pub fn kind(&self) -> ObjectKind {
ObjectKind::Debug
}
pub fn load_address(&self) -> u64 {
0
}
pub fn has_symbols(&self) -> bool {
true
}
pub fn symbols(&self) -> PdbSymbolIterator<'data, '_> {
PdbSymbolIterator {
symbols: self.public_syms.iter(),
address_map: self.pdb.write().address_map().ok(),
executable_sections: &self.executable_sections,
}
}
pub fn symbol_map(&self) -> SymbolMap<'data> {
self.symbols().collect()
}
pub fn has_debug_info(&self) -> bool {
true
}
pub fn has_sources(&self) -> bool {
false
}
pub fn is_malformed(&self) -> bool {
false
}
pub fn debug_session(&self) -> Result<PdbDebugSession<'data>, PdbError> {
PdbDebugSession::build(self)
}
pub fn has_unwind_info(&self) -> bool {
self.arch().cpu_family() == CpuFamily::Intel32
}
pub fn data(&self) -> &'data [u8] {
self.data
}
#[doc(hidden)]
pub fn inner(&self) -> &RwLock<Pdb<'data>> {
&self.pdb
}
}
impl fmt::Debug for PdbObject<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PdbObject")
.field("debug_id", &self.debug_id())
.field("arch", &self.arch())
.field("load_address", &format_args!("{:#x}", self.load_address()))
.field("has_symbols", &self.has_symbols())
.field("has_debug_info", &self.has_debug_info())
.field("has_unwind_info", &self.has_unwind_info())
.field("is_malformed", &self.is_malformed())
.finish()
}
}
impl<'slf, 'data: 'slf> AsSelf<'slf> for PdbObject<'data> {
type Ref = PdbObject<'slf>;
fn as_self(&'slf self) -> &Self::Ref {
unsafe { std::mem::transmute(self) }
}
}
impl<'data> Parse<'data> for PdbObject<'data> {
type Error = PdbError;
fn test(data: &[u8]) -> bool {
Self::test(data)
}
fn parse(data: &'data [u8]) -> Result<Self, PdbError> {
Self::parse(data)
}
}
impl<'data: 'object, 'object> ObjectLike<'data, 'object> for PdbObject<'data> {
type Error = PdbError;
type Session = PdbDebugSession<'data>;
type SymbolIterator = PdbSymbolIterator<'data, 'object>;
fn file_format(&self) -> FileFormat {
self.file_format()
}
fn code_id(&self) -> Option<CodeId> {
self.code_id()
}
fn debug_id(&self) -> DebugId {
self.debug_id()
}
fn arch(&self) -> Arch {
self.arch()
}
fn kind(&self) -> ObjectKind {
self.kind()
}
fn load_address(&self) -> u64 {
self.load_address()
}
fn has_symbols(&self) -> bool {
self.has_symbols()
}
fn symbols(&'object self) -> Self::SymbolIterator {
self.symbols()
}
fn symbol_map(&self) -> SymbolMap<'data> {
self.symbol_map()
}
fn has_debug_info(&self) -> bool {
self.has_debug_info()
}
fn debug_session(&self) -> Result<Self::Session, Self::Error> {
self.debug_session()
}
fn has_unwind_info(&self) -> bool {
self.has_unwind_info()
}
fn has_sources(&self) -> bool {
self.has_sources()
}
fn is_malformed(&self) -> bool {
self.is_malformed()
}
}
pub(crate) fn arch_from_machine(machine: MachineType) -> Arch {
match machine {
MachineType::X86 => Arch::X86,
MachineType::Amd64 => Arch::Amd64,
MachineType::Arm => Arch::Arm,
MachineType::Arm64 => Arch::Arm64,
MachineType::PowerPC => Arch::Ppc,
_ => Arch::Unknown,
}
}
struct ExecutableSections {
is_executable_per_section: Vec<bool>,
}
impl ExecutableSections {
pub fn from_sections(sections: &Option<Vec<ImageSectionHeader>>) -> Self {
Self {
is_executable_per_section: match sections {
Some(sections) => sections
.iter()
.map(|section| section.characteristics)
.map(|char| char.executable() || char.execute())
.collect(),
None => Default::default(),
},
}
}
pub fn contains(&self, offset: &PdbInternalSectionOffset) -> bool {
if offset.section == 0 {
return false;
}
let section_index = (offset.section - 1) as usize;
self.is_executable_per_section
.get(section_index)
.cloned()
.unwrap_or(false)
}
}
pub struct PdbSymbolIterator<'data, 'object> {
symbols: pdb::SymbolIter<'object>,
address_map: Option<AddressMap<'data>>,
executable_sections: &'object ExecutableSections,
}
impl<'data, 'object> Iterator for PdbSymbolIterator<'data, 'object> {
type Item = Symbol<'data>;
fn next(&mut self) -> Option<Self::Item> {
let address_map = self.address_map.as_ref()?;
while let Ok(Some(symbol)) = self.symbols.next() {
if let Ok(SymbolData::Public(public)) = symbol.parse() {
if !self.executable_sections.contains(&public.offset) {
continue;
}
let address = match public.offset.to_rva(address_map) {
Some(address) => address,
None => continue,
};
let cow = public.name.to_string();
let name = Cow::from(String::from(cow));
return Some(Symbol {
name: Some(name),
address: u64::from(address.0),
size: 0, });
}
}
None
}
}
struct PdbStreams<'d> {
debug_info: Arc<pdb::DebugInformation<'d>>,
type_info: pdb::TypeInformation<'d>,
id_info: pdb::IdInformation<'d>,
string_table: Option<pdb::StringTable<'d>>,
pdb: Arc<RwLock<Pdb<'d>>>,
module_infos: FrozenMap<usize, Box<ModuleInfo<'d>>>,
}
impl<'d> PdbStreams<'d> {
fn from_pdb(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
let mut p = pdb.pdb.write();
let string_table = match p.string_table() {
Ok(string_table) => Some(string_table),
Err(pdb::Error::StreamNameNotFound) => None,
Err(e) => return Err(e.into()),
};
Ok(Self {
string_table,
debug_info: pdb.debug_info.clone(),
type_info: p.type_information()?,
id_info: p.id_information()?,
pdb: pdb.pdb.clone(),
module_infos: FrozenMap::new(),
})
}
}
impl<'d> pdb_addr2line::ModuleProvider<'d> for PdbStreams<'d> {
fn get_module_info(
&self,
module_index: usize,
module: &Module,
) -> Result<Option<&ModuleInfo<'d>>, pdb::Error> {
if let Some(module_info) = self.module_infos.get(&module_index) {
return Ok(Some(module_info));
}
let mut pdb = self.pdb.write();
Ok(pdb.module_info(module)?.map(|module_info| {
self.module_infos
.insert(module_index, Box::new(module_info))
}))
}
}
struct PdbDebugInfo<'d> {
streams: &'d PdbStreams<'d>,
address_map: pdb::AddressMap<'d>,
string_table: Option<&'d pdb::StringTable<'d>>,
type_formatter: pdb_addr2line::TypeFormatter<'d, 'd>,
}
impl<'d> PdbDebugInfo<'d> {
fn build(pdb: &PdbObject<'d>, streams: &'d PdbStreams<'d>) -> Result<Self, PdbError> {
let modules = streams.debug_info.modules()?.collect::<Vec<_>>()?;
let mut p = pdb.pdb.write();
let address_map = p.address_map()?;
drop(p);
Ok(PdbDebugInfo {
address_map,
streams,
string_table: streams.string_table.as_ref(),
type_formatter: pdb_addr2line::TypeFormatter::new_from_parts(
streams,
modules,
&streams.debug_info,
&streams.type_info,
&streams.id_info,
streams.string_table.as_ref(),
Default::default(),
)?,
})
}
fn units(&'d self) -> PdbUnitIterator<'_> {
PdbUnitIterator {
debug_info: self,
index: 0,
}
}
fn modules(&self) -> &[Module<'d>] {
self.type_formatter.modules()
}
fn get_module(&'d self, index: usize) -> Result<Option<&ModuleInfo<'_>>, PdbError> {
let module = match self.modules().get(index) {
Some(module) => module,
None => return Ok(None),
};
Ok(self.streams.get_module_info(index, module)?)
}
fn file_info(&self, file_info: pdb::FileInfo<'d>) -> Result<FileInfo<'_>, PdbError> {
let file_path = match self.string_table {
Some(string_table) => file_info.name.to_raw_string(string_table)?,
None => "".into(),
};
Ok(FileInfo::from_path(file_path.as_bytes()))
}
}
impl<'slf, 'd: 'slf> AsSelf<'slf> for PdbDebugInfo<'d> {
type Ref = PdbDebugInfo<'slf>;
fn as_self(&'slf self) -> &Self::Ref {
unsafe { std::mem::transmute(self) }
}
}
pub struct PdbDebugSession<'d> {
cell: SelfCell<Box<PdbStreams<'d>>, PdbDebugInfo<'d>>,
}
impl<'d> PdbDebugSession<'d> {
fn build(pdb: &PdbObject<'d>) -> Result<Self, PdbError> {
let streams = PdbStreams::from_pdb(pdb)?;
let cell = SelfCell::try_new(Box::new(streams), |streams| {
PdbDebugInfo::build(pdb, unsafe { &*streams })
})?;
Ok(PdbDebugSession { cell })
}
pub fn files(&self) -> PdbFileIterator<'_> {
PdbFileIterator {
debug_info: self.cell.get(),
units: self.cell.get().units(),
files: pdb::FileIterator::default(),
finished: false,
}
}
pub fn functions(&self) -> PdbFunctionIterator<'_> {
PdbFunctionIterator {
units: self.cell.get().units(),
functions: Vec::new().into_iter(),
finished: false,
}
}
pub fn source_by_path(&self, _path: &str) -> Result<Option<Cow<'_, str>>, PdbError> {
Ok(None)
}
}
impl<'session> DebugSession<'session> for PdbDebugSession<'_> {
type Error = PdbError;
type FunctionIterator = PdbFunctionIterator<'session>;
type FileIterator = PdbFileIterator<'session>;
fn functions(&'session self) -> Self::FunctionIterator {
self.functions()
}
fn files(&'session self) -> Self::FileIterator {
self.files()
}
fn source_by_path(&self, path: &str) -> Result<Option<Cow<'_, str>>, Self::Error> {
self.source_by_path(path)
}
}
struct Unit<'s> {
debug_info: &'s PdbDebugInfo<'s>,
module_index: usize,
module: &'s pdb::ModuleInfo<'s>,
}
impl<'s> Unit<'s> {
fn load(
debug_info: &'s PdbDebugInfo<'s>,
module_index: usize,
module: &'s pdb::ModuleInfo<'s>,
) -> Result<Self, PdbError> {
Ok(Self {
debug_info,
module_index,
module,
})
}
fn collect_lines<I>(
&self,
mut line_iter: I,
program: &LineProgram<'s>,
) -> Result<Vec<LineInfo<'s>>, PdbError>
where
I: FallibleIterator<Item = pdb::LineInfo>,
PdbError: From<I::Error>,
{
let address_map = &self.debug_info.address_map;
let mut lines = Vec::new();
while let Some(line_info) = line_iter.next()? {
let rva = match line_info.offset.to_rva(address_map) {
Some(rva) => u64::from(rva.0),
None => continue,
};
let size = line_info.length.map(u64::from);
if size == Some(0) {
continue;
}
let file_info = program.get_file_info(line_info.file_index)?;
lines.push(LineInfo {
address: rva,
size,
file: self.debug_info.file_info(file_info)?,
line: line_info.line_start.into(),
});
}
lines.sort_by_key(|line| line.address);
lines.dedup_by(|current, prev| {
let first_end = prev.size.and_then(|size| prev.address.checked_add(size));
let is_consecutive = first_end == Some(current.address);
if is_consecutive && prev.file == current.file && prev.line == current.line {
prev.size = prev
.size
.map(|first_size| first_size.saturating_add(current.size.unwrap_or(0)));
return true;
}
false
});
Ok(lines)
}
fn sanitize_lines(func: &mut Function) {
let fn_start = func.address;
let fn_end = func.end_address();
func.lines.retain(|line| {
if line.address >= fn_end {
return false;
}
let line_end = match line.size {
Some(size) => line.address.saturating_add(size),
None => return true,
};
line_end > fn_start
});
}
fn handle_function(
&self,
offset: PdbInternalSectionOffset,
len: u32,
name: RawString<'s>,
type_index: TypeIndex,
program: &LineProgram<'s>,
) -> Result<Option<Function<'s>>, PdbError> {
let address_map = &self.debug_info.address_map;
let address = match offset.to_rva(address_map) {
Some(addr) => u64::from(addr.0),
None => return Ok(None),
};
let formatter = &self.debug_info.type_formatter;
let name = name.to_string();
let name = Name::new(
formatter
.format_function(&name, self.module_index, type_index)
.map(Cow::Owned)
.unwrap_or(name),
NameMangling::Unmangled,
Language::Unknown,
);
let line_iter = program.lines_for_symbol(offset);
let lines = self.collect_lines(line_iter, program)?;
Ok(Some(Function {
address,
size: len.into(),
name,
compilation_dir: &[],
lines,
inlinees: Vec::new(),
inline: false,
}))
}
fn handle_procedure(
&self,
proc: &ProcedureSymbol<'s>,
program: &LineProgram<'s>,
) -> Result<Option<Function<'s>>, PdbError> {
self.handle_function(proc.offset, proc.len, proc.name, proc.type_index, program)
}
fn handle_separated_code(
&self,
proc: &ProcedureSymbol<'s>,
sepcode: &SeparatedCodeSymbol,
program: &LineProgram<'s>,
) -> Result<Option<Function<'s>>, PdbError> {
self.handle_function(
sepcode.offset,
sepcode.len,
proc.name,
proc.type_index,
program,
)
}
fn handle_inlinee(
&self,
inline_site: InlineSiteSymbol<'s>,
parent_offset: PdbInternalSectionOffset,
inlinee: &pdb::Inlinee<'s>,
program: &LineProgram<'s>,
) -> Result<Option<Function<'s>>, PdbError> {
let line_iter = inlinee.lines(parent_offset, &inline_site);
let lines = self.collect_lines(line_iter, program)?;
let start = match lines.first().map(|line| line.address) {
Some(address) => address,
None => return Ok(None),
};
let end = match lines
.last()
.map(|line| line.address + line.size.unwrap_or(1))
{
Some(address) => address,
None => return Ok(None),
};
let formatter = &self.debug_info.type_formatter;
let name = Name::new(
formatter.format_id(self.module_index, inline_site.inlinee)?,
NameMangling::Unmangled,
Language::Unknown,
);
Ok(Some(Function {
address: start,
size: end - start,
name,
compilation_dir: &[],
lines,
inlinees: Vec::new(),
inline: true,
}))
}
fn functions(&self) -> Result<Vec<Function<'s>>, PdbError> {
let program = self.module.line_program()?;
let mut symbols = self.module.symbols()?;
let inlinees: BTreeMap<_, _> = self
.module
.inlinees()?
.map(|i| Ok((i.index(), i)))
.collect()?;
let mut depth = 0;
let mut inc_next = false;
let mut skipped_depth = None;
let mut functions = Vec::new();
let mut stack = FunctionStack::new();
let mut proc_offsets = SmallVec::<[_; 3]>::new();
let mut last_proc = None;
while let Some(symbol) = symbols.next()? {
if inc_next {
depth += 1;
}
inc_next = symbol.starts_scope();
if symbol.ends_scope() {
depth -= 1;
if proc_offsets.last().map_or(false, |&(d, _)| d >= depth) {
proc_offsets.pop();
}
}
match skipped_depth {
Some(skipped) if depth > skipped => continue,
_ => skipped_depth = None,
}
if symbol.ends_scope() {
stack.flush(depth, &mut functions);
}
let function = match symbol.parse() {
Ok(SymbolData::Procedure(proc)) => {
proc_offsets.push((depth, proc.offset));
let function = self.handle_procedure(&proc, &program)?;
last_proc = Some(proc);
function
}
Ok(SymbolData::SeparatedCode(sepcode)) => match last_proc.as_ref() {
Some(last_proc) if last_proc.offset == sepcode.parent_offset => {
self.handle_separated_code(last_proc, &sepcode, &program)?
}
_ => continue,
},
Ok(SymbolData::InlineSite(site)) => {
let parent_offset = proc_offsets
.last()
.map(|&(_, offset)| offset)
.ok_or(PdbErrorKind::UnexpectedInline)?;
if let Some(inlinee) = inlinees.get(&site.inlinee) {
self.handle_inlinee(site, parent_offset, inlinee, &program)
.ok()
.flatten()
} else {
None
}
}
_ => continue,
};
match function {
Some(mut function) => {
Self::sanitize_lines(&mut function);
stack.push(depth, function)
}
None => skipped_depth = Some(depth),
}
}
stack.flush(0, &mut functions);
Ok(functions)
}
}
struct PdbUnitIterator<'s> {
debug_info: &'s PdbDebugInfo<'s>,
index: usize,
}
impl<'s> Iterator for PdbUnitIterator<'s> {
type Item = Result<Unit<'s>, PdbError>;
fn next(&mut self) -> Option<Self::Item> {
let debug_info = self.debug_info;
while self.index < debug_info.modules().len() {
let module_index = self.index;
let result = debug_info.get_module(module_index);
self.index += 1;
let module = match result {
Ok(Some(module)) => module,
Ok(None) => continue,
Err(error) => return Some(Err(error)),
};
return Some(Unit::load(debug_info, module_index, module));
}
None
}
}
pub struct PdbFileIterator<'s> {
debug_info: &'s PdbDebugInfo<'s>,
units: PdbUnitIterator<'s>,
files: pdb::FileIterator<'s>,
finished: bool,
}
impl<'s> Iterator for PdbFileIterator<'s> {
type Item = Result<FileEntry<'s>, PdbError>;
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
loop {
if let Some(file_result) = self.files.next().transpose() {
let result = file_result
.map_err(|err| err.into())
.and_then(|i| self.debug_info.file_info(i))
.map(|info| FileEntry::new(Cow::default(), info));
return Some(result);
}
let unit = match self.units.next() {
Some(Ok(unit)) => unit,
Some(Err(error)) => return Some(Err(error)),
None => break,
};
let line_program = match unit.module.line_program() {
Ok(line_program) => line_program,
Err(error) => return Some(Err(error.into())),
};
self.files = line_program.files();
}
self.finished = true;
None
}
}
pub struct PdbFunctionIterator<'s> {
units: PdbUnitIterator<'s>,
functions: std::vec::IntoIter<Function<'s>>,
finished: bool,
}
impl<'s> Iterator for PdbFunctionIterator<'s> {
type Item = Result<Function<'s>, PdbError>;
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
loop {
if let Some(func) = self.functions.next() {
return Some(Ok(func));
}
let unit = match self.units.next() {
Some(Ok(unit)) => unit,
Some(Err(error)) => return Some(Err(error)),
None => break,
};
self.functions = match unit.functions() {
Ok(functions) => functions.into_iter(),
Err(error) => return Some(Err(error)),
};
}
self.finished = true;
None
}
}
impl std::iter::FusedIterator for PdbFunctionIterator<'_> {}