pdb 0.5.0

A parser for Microsoft PDB (Program Database) debugging information
Documentation
use std::borrow::Cow;

use scroll::Pread;

use crate::common::*;
use crate::msf::Stream;

/// Magic bytes identifying the string name table.
///
/// This value is declared as `NMT::verHdr` in `nmt.h`.
const PDB_NMT_HDR: u32 = 0xEFFE_EFFE;

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum StringTableHashVersion {
    /// Default hash method used for reverse string lookups.
    ///
    /// The hash function has originally been defined in `LHashPbCb`.
    LongHash = 1,

    /// Revised hash method used for reverse string lookups.
    ///
    /// The hash function has originally been defined in `LHashPbCbV2`.
    LongHashV2 = 2,
}

impl StringTableHashVersion {
    fn parse_u32(value: u32) -> Result<Self> {
        match value {
            1 => Ok(StringTableHashVersion::LongHash),
            2 => Ok(StringTableHashVersion::LongHashV2),
            _ => Err(Error::UnimplementedFeature(
                "unknown string table hash version",
            )),
        }
    }
}

/// Raw header of the string table stream.
#[repr(C, packed)]
#[derive(Clone, Copy, Debug, Pread)]
struct StringTableHeader {
    /// Magic bytes of the string table.
    magic: u32,
    /// Version of the hash table after the names.
    hash_version: u32,
    /// The size of all names in bytes.
    names_size: u32,
}

impl StringTableHeader {
    /// Start index of the names buffer in the string table stream.
    fn names_start(self) -> usize {
        std::mem::size_of::<Self>()
    }

    /// End index of the names buffer in the string table stream.
    fn names_end(self) -> usize {
        self.names_start() + self.names_size as usize
    }
}

/// The global string table of a PDB.
///
/// The string table is a two-way mapping from offset to string and back. It can be used to resolve
/// [`StringRef`] offsets to their string values. Sometimes, it is also referred to as "Name table".
/// The mapping from string to offset has not been implemented yet.
///
/// Use [`PDB::string_table`] to obtain an instance.
///
/// [`PDB::string_table`]: struct.PDB.html#method.string_table
#[derive(Debug)]
pub struct StringTable<'s> {
    header: StringTableHeader,
    hash_version: StringTableHashVersion,
    stream: Stream<'s>,
}

impl<'s> StringTable<'s> {
    pub(crate) fn parse(stream: Stream<'s>) -> Result<Self> {
        let mut buf = stream.parse_buffer();
        let header = buf.parse::<StringTableHeader>()?;

        if header.magic != PDB_NMT_HDR {
            return Err(Error::UnimplementedFeature(
                "invalid string table signature",
            ));
        }

        // The string table should at least contain all names as C-strings. Their combined size is
        // declared in the `names_size` header field.
        if buf.len() < header.names_end() {
            return Err(Error::UnexpectedEof);
        }

        let hash_version = StringTableHashVersion::parse_u32(header.hash_version)?;

        // After the name buffer, the stream contains a closed hash table for reverse mapping. From
        // the original header file (`nmi.h`):
        //
        //     Strings are mapped into name indices using a closed hash table of NIs.
        //     To find a string, we hash it and probe into the table, and compare the
        //     string against each successive ni's name until we hit or find an empty
        //     hash table entry.

        Ok(StringTable {
            header,
            hash_version,
            stream,
        })
    }
}

impl<'s> StringTable<'s> {
    /// Resolves a string value from this string table.
    ///
    /// Errors if the offset is out of bounds, otherwise returns the raw binary string value.
    pub fn get(&self, offset: StringRef) -> Result<RawString<'_>> {
        if offset.0 >= self.header.names_size {
            return Err(Error::UnexpectedEof);
        }

        let string_offset = self.header.names_start() + offset.0 as usize;
        let data = &self.stream.as_slice()[string_offset..self.header.names_end()];
        ParseBuffer::from(data).parse_cstring()
    }
}

impl StringRef {
    /// Resolves the raw string value of this reference.
    ///
    /// This method errors if the offset is out of bounds of the string table. Use
    /// [`PDB::string_table`] to obtain an instance of the string table.
    ///
    /// [`PDB::string_table`]: struct.PDB.html#method.string_table
    pub fn to_raw_string<'s>(self, strings: &'s StringTable<'_>) -> Result<RawString<'s>> {
        strings.get(self)
    }

    /// Resolves and decodes the UTF-8 string value of this reference.
    ///
    /// This method errors if the offset is out of bounds of the string table. Use
    /// [`PDB::string_table`] to obtain an instance of the string table.
    ///
    /// [`PDB::string_table`]: struct.PDB.html#method.string_table
    pub fn to_string_lossy<'s>(self, strings: &'s StringTable<'_>) -> Result<Cow<'s, str>> {
        strings.get(self).map(|r| r.to_string())
    }
}