symbolic-debuginfo 11.0.0

A library to inspect and load DWARF debugging information from binaries, such as Mach-O or ELF.
Documentation
//! Support for Portable Executables, an extension of COFF used on Windows.

use std::borrow::Cow;
use std::error::Error;
use std::fmt;

use gimli::RunTimeEndian;
use goblin::pe;
use thiserror::Error;

use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};

use crate::base::*;
use crate::dwarf::*;
use crate::Parse;

pub use goblin::pe::exception::*;
pub use goblin::pe::section_table::SectionTable;

/// An error when dealing with [`PEObject`](struct.PEObject.html).
#[derive(Debug, Error)]
#[error("invalid PE file")]
pub struct PeError {
    #[source]
    source: Option<Box<dyn Error + Send + Sync + 'static>>,
}

impl PeError {
    /// Creates a new PE error from an arbitrary error payload.
    fn new<E>(source: E) -> Self
    where
        E: Into<Box<dyn Error + Send + Sync>>,
    {
        let source = Some(source.into());
        Self { source }
    }
}

/// Detects if the PE is a packer stub.
///
/// Such files usually only contain empty stubs in their `.pdata` and `.text` sections, and unwind
/// information cannot be retrieved reliably. Usually, the exception table is present, but unwind
/// info points into a missing section.
fn is_pe_stub(pe: &pe::PE<'_>) -> bool {
    let mut has_stub = false;
    let mut pdata_empty = false;

    for section in &pe.sections {
        let name = section.name().unwrap_or_default();
        pdata_empty = pdata_empty || name == ".pdata" && section.size_of_raw_data == 0;
        has_stub = has_stub || name.starts_with(".stub");
    }

    pdata_empty && has_stub
}

/// Portable Executable, an extension of COFF used on Windows.
///
/// This file format is used to carry program code. Debug information is usually moved to a separate
/// container, [`PdbObject`]. The PE file contains a reference to the PDB and vice versa to verify
/// that the files belong together.
///
/// In rare instances, PE files might contain debug information.
/// This is supported for DWARF debug information.
///
/// [`PdbObject`]: ../pdb/struct.PdbObject.html
pub struct PeObject<'data> {
    pe: pe::PE<'data>,
    data: &'data [u8],
    is_stub: bool,
}

impl<'data> PeObject<'data> {
    /// Tests whether the buffer could contain an PE object.
    pub fn test(data: &[u8]) -> bool {
        use scroll::{Pread, LE};
        matches!(
            data.get(0..2)
                .and_then(|data| data.pread_with::<u16>(0, LE).ok()),
            Some(pe::header::DOS_MAGIC)
        )
    }

    /// Tries to parse a PE object from the given slice.
    pub fn parse(data: &'data [u8]) -> Result<Self, PeError> {
        let pe = pe::PE::parse(data).map_err(PeError::new)?;
        let is_stub = is_pe_stub(&pe);
        Ok(PeObject { pe, data, is_stub })
    }

    /// The container file format, which is always `FileFormat::Pe`.
    pub fn file_format(&self) -> FileFormat {
        FileFormat::Pe
    }

    /// The code identifier of this object.
    ///
    /// The code identifier consists of the `time_date_stamp` field id the COFF header, followed by
    /// the `size_of_image` field in the optional header. If the optional PE header is not present,
    /// this identifier is `None`.
    pub fn code_id(&self) -> Option<CodeId> {
        let header = &self.pe.header;
        let optional_header = header.optional_header.as_ref()?;

        let timestamp = header.coff_header.time_date_stamp;
        let size_of_image = optional_header.windows_fields.size_of_image;
        let string = format!("{timestamp:08x}{size_of_image:x}");
        Some(CodeId::new(string))
    }

    /// The debug information identifier of this PE.
    ///
    /// Since debug information is usually stored in an external
    /// [`PdbObject`](crate::pdb::PdbObject), this identifier actually refers to the
    /// PDB. While strictly the filename of the PDB would also be necessary fully resolve
    /// it, in most instances the GUID and age contained in this identifier are sufficient.
    pub fn debug_id(&self) -> DebugId {
        self.pe
            .debug_data
            .as_ref()
            .and_then(|debug_data| debug_data.codeview_pdb70_debug_info.as_ref())
            .and_then(|debug_info| {
                // PE always stores the signature with little endian UUID fields.
                // Convert to network byte order (big endian) to match the
                // Breakpad processor's expectations.
                let mut data = debug_info.signature;
                data[0..4].reverse(); // uuid field 1
                data[4..6].reverse(); // uuid field 2
                data[6..8].reverse(); // uuid field 3

                let uuid = Uuid::from_slice(&data).ok()?;
                Some(DebugId::from_parts(uuid, debug_info.age))
            })
            .unwrap_or_default()
    }

    /// The name of the referenced PDB file.
    pub fn debug_file_name(&self) -> Option<Cow<'_, str>> {
        self.pe
            .debug_data
            .as_ref()
            .and_then(|debug_data| debug_data.codeview_pdb70_debug_info.as_ref())
            .map(|debug_info| {
                String::from_utf8_lossy(&debug_info.filename[..debug_info.filename.len() - 1])
            })
    }

    /// The CPU architecture of this object, as specified in the COFF header.
    pub fn arch(&self) -> Arch {
        let machine = self.pe.header.coff_header.machine;
        crate::pdb::arch_from_machine(machine.into())
    }

    /// The kind of this object, as specified in the PE header.
    pub fn kind(&self) -> ObjectKind {
        if self.pe.is_lib {
            ObjectKind::Library
        } else if self.is_stub {
            ObjectKind::Other
        } else {
            ObjectKind::Executable
        }
    }

    /// The address at which the image prefers to be loaded into memory.
    ///
    /// ELF files store all internal addresses as if it was loaded at that address. When the image
    /// is actually loaded, that spot might already be taken by other images and so it must be
    /// relocated to a new address. During load time, the loader rewrites all addresses in the
    /// program code to match the new load address so that there is no runtime overhead when
    /// executing the code.
    ///
    /// Addresses used in `symbols` or `debug_session` have already been rebased relative to that
    /// load address, so that the caller only has to deal with addresses relative to the actual
    /// start of the image.
    pub fn load_address(&self) -> u64 {
        self.pe.image_base as u64
    }

    /// Determines whether this object exposes a public symbol table.
    pub fn has_symbols(&self) -> bool {
        !self.pe.exports.is_empty()
    }

    /// Returns an iterator over symbols in the public symbol table.
    pub fn symbols(&self) -> PeSymbolIterator<'data, '_> {
        PeSymbolIterator {
            exports: self.pe.exports.iter(),
        }
    }

    /// Returns an ordered map of symbols in the symbol table.
    pub fn symbol_map(&self) -> SymbolMap<'data> {
        self.symbols().collect()
    }

    /// Determines whether this object contains debug information.
    ///
    /// Not usually the case, except for PE's generated by some alternative toolchains
    /// which contain DWARF debug info.
    pub fn has_debug_info(&self) -> bool {
        self.section(".debug_info").is_some()
    }

    /// Determines whether this object contains embedded source.
    pub fn has_sources(&self) -> bool {
        false
    }

    /// Determines whether this object is malformed and was only partially parsed
    pub fn is_malformed(&self) -> bool {
        false
    }

    /// Constructs a debugging session.
    ///
    /// A debugging session loads certain information from the object file and creates caches for
    /// efficient access to various records in the debug information. Since this can be quite a
    /// costly process, try to reuse the debugging session as long as possible.
    ///
    /// PE files usually don't have embedded debugging information,
    /// but some toolchains (e.g. MinGW) generate DWARF debug info.
    ///
    /// Constructing this session will also work if the object does not contain debugging
    /// information, in which case the session will be a no-op. This can be checked via
    /// [`has_debug_info`](struct.PeObject.html#method.has_debug_info).
    pub fn debug_session(&self) -> Result<DwarfDebugSession<'data>, DwarfError> {
        let symbols = self.symbol_map();
        DwarfDebugSession::parse(self, symbols, self.load_address() as i64, self.kind())
    }

    /// Determines whether this object contains stack unwinding information.
    pub fn has_unwind_info(&self) -> bool {
        !self.is_stub && self.exception_data().map_or(false, |e| !e.is_empty())
    }

    /// Returns the raw data of the PE file.
    pub fn data(&self) -> &'data [u8] {
        self.data
    }

    /// A list of the sections in this PE binary, used to resolve virtual addresses.
    pub fn sections(&self) -> &[SectionTable] {
        &self.pe.sections
    }

    /// Returns the `SectionTable` for the section with this name, if present.
    pub fn section(&self, name: &str) -> Option<SectionTable> {
        for s in &self.pe.sections {
            let sect_name = s.name();
            if sect_name.is_ok() && sect_name.unwrap() == name {
                return Some(s.clone());
            }
        }
        None
    }

    /// Returns exception data containing unwind information.
    pub fn exception_data(&self) -> Option<&ExceptionData<'_>> {
        if self.is_stub {
            None
        } else {
            self.pe.exception_data.as_ref()
        }
    }
}

impl fmt::Debug for PeObject<'_> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("PeObject")
            .field("code_id", &self.code_id())
            .field("debug_id", &self.debug_id())
            .field("debug_file_name", &self.debug_file_name())
            .field("arch", &self.arch())
            .field("kind", &self.kind())
            .field("load_address", &format_args!("{:#x}", self.load_address()))
            .field("has_symbols", &self.has_symbols())
            .field("has_debug_info", &self.has_debug_info())
            .field("has_unwind_info", &self.has_unwind_info())
            .field("is_malformed", &self.is_malformed())
            .finish()
    }
}

impl<'slf, 'data: 'slf> AsSelf<'slf> for PeObject<'data> {
    type Ref = PeObject<'slf>;

    fn as_self(&'slf self) -> &Self::Ref {
        self
    }
}

impl<'data> Parse<'data> for PeObject<'data> {
    type Error = PeError;

    fn test(data: &[u8]) -> bool {
        Self::test(data)
    }

    fn parse(data: &'data [u8]) -> Result<Self, PeError> {
        Self::parse(data)
    }
}

impl<'data: 'object, 'object> ObjectLike<'data, 'object> for PeObject<'data> {
    type Error = DwarfError;
    type Session = DwarfDebugSession<'data>;
    type SymbolIterator = PeSymbolIterator<'data, 'object>;

    fn file_format(&self) -> FileFormat {
        self.file_format()
    }

    fn code_id(&self) -> Option<CodeId> {
        self.code_id()
    }

    fn debug_id(&self) -> DebugId {
        self.debug_id()
    }

    fn arch(&self) -> Arch {
        self.arch()
    }

    fn kind(&self) -> ObjectKind {
        self.kind()
    }

    fn load_address(&self) -> u64 {
        self.load_address()
    }

    fn has_symbols(&self) -> bool {
        self.has_symbols()
    }

    fn symbols(&'object self) -> Self::SymbolIterator {
        self.symbols()
    }

    fn symbol_map(&self) -> SymbolMap<'data> {
        self.symbol_map()
    }

    fn has_debug_info(&self) -> bool {
        self.has_debug_info()
    }

    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
        self.debug_session()
    }

    fn has_unwind_info(&self) -> bool {
        self.has_unwind_info()
    }

    fn has_sources(&self) -> bool {
        self.has_sources()
    }

    fn is_malformed(&self) -> bool {
        self.is_malformed()
    }
}

/// An iterator over symbols in the PE file.
///
/// Returned by [`PeObject::symbols`](struct.PeObject.html#method.symbols).
pub struct PeSymbolIterator<'data, 'object> {
    exports: std::slice::Iter<'object, pe::export::Export<'data>>,
}

impl<'data, 'object> Iterator for PeSymbolIterator<'data, 'object> {
    type Item = Symbol<'data>;

    fn next(&mut self) -> Option<Self::Item> {
        self.exports.next().map(|export| Symbol {
            name: export.name.map(Cow::Borrowed),
            address: export.rva as u64,
            size: export.size as u64,
        })
    }
}

impl<'data> Dwarf<'data> for PeObject<'data> {
    fn endianity(&self) -> RunTimeEndian {
        // According to https://reverseengineering.stackexchange.com/questions/17922/determining-endianness-of-pe-files-windows-on-arm,
        // the only known platform running PE's with big-endian code is the Xbox360. Probably not worth handling.
        RunTimeEndian::Little
    }

    fn raw_section(&self, name: &str) -> Option<DwarfSection<'data>> {
        // Name is given without leading "."
        let sect = self.section(&format!(".{}", name))?;
        let start = sect.pointer_to_raw_data as usize;
        let end = start + (sect.virtual_size as usize);
        let dwarf_data: &'data [u8] = self.data.get(start..end)?;
        let dwarf_sect = DwarfSection {
            // TODO: What about 64-bit PE+? Still 32 bit?
            address: u64::from(sect.virtual_address),
            data: Cow::from(dwarf_data),
            offset: u64::from(sect.pointer_to_raw_data),
            align: 4096, // TODO: Does goblin expose this? For now, assume 4K page size
        };
        Some(dwarf_sect)
    }
}