mod metadata;
mod raw;
mod sequence_points;
mod sourcelinks;
mod streams;
mod utils;
use std::{borrow::Cow, fmt, io::Read};
use flate2::read::DeflateDecoder;
use thiserror::Error;
use watto::Pod;
use symbolic_common::{DebugId, Language, Uuid};
use metadata::{
CustomDebugInformation, CustomDebugInformationIterator, CustomDebugInformationTag,
MetadataStream, Table, TableType,
};
use sourcelinks::SourceLinkMappings;
use streams::{BlobStream, GuidStream, PdbStream, StringStream, UsStream};
#[derive(Debug, Clone, Copy, Error)]
#[non_exhaustive]
pub enum FormatErrorKind {
#[error("invalid header")]
InvalidHeader,
#[error("invalid signature")]
InvalidSignature,
#[error("invalid length")]
InvalidLength,
#[error("invalid version string")]
InvalidVersionString,
#[error("invalid stream header")]
InvalidStreamHeader,
#[error("invalid stream name")]
InvalidStreamName,
#[error("file does not contain a #Strings stream")]
NoStringsStream,
#[error("invalid string offset")]
InvalidStringOffset,
#[error("invalid string data")]
InvalidStringData,
#[error("unknown stream")]
UnknownStream,
#[error("file does not contain a #Guid stream")]
NoGuidStream,
#[error("invalid guid index")]
InvalidGuidIndex,
#[error(
"insufficient table data: {0} bytes required, but table stream only contains {1} bytes"
)]
InsufficientTableData(usize, usize),
#[error("invalid blob offset")]
InvalidBlobOffset,
#[error("invalid blob data")]
InvalidBlobData,
#[error("file does not contain a #Blob stream")]
NoBlobStream,
#[error("invalid compressed unsigned number")]
InvalidCompressedUnsigned,
#[error("invalid compressed signed number")]
InvalidCompressedSigned,
#[error("invalid document name")]
InvalidDocumentName,
#[error("invalid sequence point")]
InvalidSequencePoint,
#[error("file does not contain a #~ stream")]
NoMetadataStream,
#[error("row index {1} is out of bounds for table {0:?}")]
RowIndexOutOfBounds(TableType, usize),
#[error("column index {1} is out of bounds for table {0:?}")]
ColIndexOutOfBounds(TableType, usize),
#[error("column {1} in table {0:?} has incompatible width {2}")]
ColumnWidth(TableType, usize, usize),
#[error("invalid custom debug information table item tag {0}")]
InvalidCustomDebugInformationTag(u32),
#[error("invalid blob format {0}")]
InvalidBlobFormat(u32),
#[error("invalid source link JSON")]
InvalidSourceLinkJson,
}
#[derive(Debug, Error)]
#[error("{kind}")]
pub struct FormatError {
pub(crate) kind: FormatErrorKind,
#[source]
pub(crate) source: Option<Box<dyn std::error::Error + Send + Sync + 'static>>,
}
impl FormatError {
pub(crate) fn new<E>(kind: FormatErrorKind, source: E) -> Self
where
E: Into<Box<dyn std::error::Error + Send + Sync>>,
{
let source = Some(source.into());
Self { kind, source }
}
pub fn kind(&self) -> FormatErrorKind {
self.kind
}
}
impl From<FormatErrorKind> for FormatError {
fn from(kind: FormatErrorKind) -> Self {
Self { kind, source: None }
}
}
#[derive(Clone)]
pub struct PortablePdb<'data> {
header: &'data raw::Header,
version_string: &'data str,
header2: &'data raw::HeaderPart2,
pdb_stream: Option<PdbStream<'data>>,
metadata_stream: Option<MetadataStream<'data>>,
string_stream: Option<StringStream<'data>>,
us_stream: Option<UsStream<'data>>,
blob_stream: Option<BlobStream<'data>>,
guid_stream: Option<GuidStream<'data>>,
source_link_mappings: SourceLinkMappings,
}
impl fmt::Debug for PortablePdb<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PortablePdb")
.field("header", &self.header)
.field("version_string", &self.version_string)
.field("header2", &self.header2)
.field("has_pdb_stream", &self.pdb_stream.is_some())
.field("has_table_stream", &self.metadata_stream.is_some())
.field("has_string_stream", &self.string_stream.is_some())
.field("has_us_stream", &self.us_stream.is_some())
.field("has_blob_stream", &self.blob_stream.is_some())
.field("has_guid_stream", &self.guid_stream.is_some())
.finish()
}
}
impl<'data> PortablePdb<'data> {
pub fn peek(buf: &[u8]) -> bool {
if let Some((header, _)) = raw::Header::ref_from_prefix(buf) {
return header.signature == raw::METADATA_SIGNATURE;
}
false
}
pub fn parse(buf: &'data [u8]) -> Result<Self, FormatError> {
let (header, rest) =
raw::Header::ref_from_prefix(buf).ok_or(FormatErrorKind::InvalidHeader)?;
if header.signature != raw::METADATA_SIGNATURE {
return Err(FormatErrorKind::InvalidSignature.into());
}
let version_length = header.version_length as usize;
let version_buf = rest
.get(..version_length)
.ok_or(FormatErrorKind::InvalidLength)?;
let version_buf = version_buf
.split(|c| *c == 0)
.next()
.ok_or(FormatErrorKind::InvalidVersionString)?;
let version = std::str::from_utf8(version_buf)
.map_err(|e| FormatError::new(FormatErrorKind::InvalidVersionString, e))?;
let streams_buf = &rest[version_length..];
let (header2, mut streams_buf) =
raw::HeaderPart2::ref_from_prefix(streams_buf).ok_or(FormatErrorKind::InvalidHeader)?;
let stream_count = header2.streams;
let mut result = Self {
header,
version_string: version,
header2,
pdb_stream: None,
metadata_stream: None,
string_stream: None,
us_stream: None,
blob_stream: None,
guid_stream: None,
source_link_mappings: SourceLinkMappings::default(),
};
let mut metadata_stream = None;
for _ in 0..stream_count {
let (header, after_header_buf) = raw::StreamHeader::ref_from_prefix(streams_buf)
.ok_or(FormatErrorKind::InvalidStreamHeader)?;
let name_buf = after_header_buf.get(..32).unwrap_or(after_header_buf);
let name_buf = name_buf
.split(|c| *c == 0)
.next()
.ok_or(FormatErrorKind::InvalidStreamName)?;
let name = std::str::from_utf8(name_buf)
.map_err(|e| FormatError::new(FormatErrorKind::InvalidStreamName, e))?;
let mut rounded_name_len = name.len() + 1;
rounded_name_len = match rounded_name_len % 4 {
0 => rounded_name_len,
r => rounded_name_len + (4 - r),
};
streams_buf = after_header_buf
.get(rounded_name_len..)
.ok_or(FormatErrorKind::InvalidLength)?;
let offset = header.offset as usize;
let size = header.size as usize;
let stream_buf = buf
.get(offset..offset + size)
.ok_or(FormatErrorKind::InvalidLength)?;
match name {
"#Pdb" => result.pdb_stream = Some(PdbStream::parse(stream_buf)?),
"#~" => metadata_stream = Some(stream_buf),
"#Strings" => result.string_stream = Some(StringStream::new(stream_buf)),
"#US" => result.us_stream = Some(UsStream::new(stream_buf)),
"#Blob" => result.blob_stream = Some(BlobStream::new(stream_buf)),
"#GUID" => result.guid_stream = Some(GuidStream::parse(stream_buf)?),
_ => return Err(FormatErrorKind::UnknownStream.into()),
}
}
if let Some(stream_buf) = metadata_stream {
result.metadata_stream = Some(MetadataStream::parse(
stream_buf,
result
.pdb_stream
.as_ref()
.map_or([0; 64], |s| s.referenced_table_sizes),
)?)
}
const SOURCE_LINK_KIND: Uuid = uuid::uuid!("CC110556-A091-4D38-9FEC-25AB9A351A6A");
let mut source_link_mappings = Vec::new();
for cdi in CustomDebugInformationIterator::new(&result, SOURCE_LINK_KIND)? {
let cdi = cdi?;
if let (CustomDebugInformationTag::Module, 1) = (cdi.tag, cdi.value) {
source_link_mappings.push(result.get_blob(cdi.blob)?);
}
}
result.source_link_mappings = SourceLinkMappings::new(source_link_mappings)?;
Ok(result)
}
#[allow(unused)]
fn get_string(&self, offset: u32) -> Result<&'data str, FormatError> {
self.string_stream
.as_ref()
.ok_or(FormatErrorKind::NoStringsStream)?
.get_string(offset)
}
fn get_guid(&self, idx: u32) -> Result<Uuid, FormatError> {
self.guid_stream
.as_ref()
.ok_or(FormatErrorKind::NoGuidStream)?
.get_guid(idx)
.ok_or_else(|| FormatErrorKind::InvalidGuidIndex.into())
}
fn get_blob(&self, offset: u32) -> Result<&'data [u8], FormatError> {
self.blob_stream
.as_ref()
.ok_or(FormatErrorKind::NoBlobStream)?
.get_blob(offset)
}
pub fn pdb_id(&self) -> Option<DebugId> {
self.pdb_stream.as_ref().map(|stream| stream.id())
}
pub(crate) fn get_table(&self, table: TableType) -> Result<Table, FormatError> {
let md_stream = self
.metadata_stream
.as_ref()
.ok_or(FormatErrorKind::NoMetadataStream)?;
Ok(md_stream[table])
}
pub fn has_debug_info(&self) -> bool {
self.metadata_stream.as_ref().map_or(false, |md_stream| {
md_stream[TableType::MethodDebugInformation].rows > 0
})
}
pub fn get_document(&self, idx: usize) -> Result<Document, FormatError> {
let table = self.get_table(TableType::Document)?;
let row = table.get_row(idx)?;
let name_offset = row.get_col_u32(1)?;
let lang_offset = row.get_col_u32(4)?;
let name = self.get_document_name(name_offset)?;
let lang = self.get_document_lang(lang_offset)?;
Ok(Document { name, lang })
}
pub fn get_documents_count(&self) -> Result<usize, FormatError> {
let table = self.get_table(TableType::Document)?;
Ok(table.rows)
}
pub fn get_embedded_sources(&self) -> Result<EmbeddedSourceIterator<'_, 'data>, FormatError> {
EmbeddedSourceIterator::new(self)
}
pub fn has_source_links(&self) -> Result<bool, FormatError> {
Ok(!self.source_link_mappings.is_empty() && self.get_documents_count()? > 0)
}
pub fn get_source_link(&self, document: &Document) -> Option<Cow<'_, str>> {
self.source_link_mappings
.resolve(&document.name)
.map(Cow::Owned)
}
}
#[derive(Debug, Clone)]
pub struct Document {
pub name: String,
pub(crate) lang: Language,
}
#[derive(Debug, Clone)]
pub struct EmbeddedSourceIterator<'object, 'data> {
ppdb: &'object PortablePdb<'data>,
inner_it: CustomDebugInformationIterator<'data>,
}
impl<'object, 'data> EmbeddedSourceIterator<'object, 'data> {
fn new(ppdb: &'object PortablePdb<'data>) -> Result<Self, FormatError> {
const EMBEDDED_SOURCES_KIND: Uuid = uuid::uuid!("0E8A571B-6926-466E-B4AD-8AB04611F5FE");
let inner_it = CustomDebugInformationIterator::new(ppdb, EMBEDDED_SOURCES_KIND)?;
Ok(EmbeddedSourceIterator { ppdb, inner_it })
}
fn get_source(
&mut self,
info: CustomDebugInformation,
) -> Result<EmbeddedSource<'data>, FormatError> {
let document = self.ppdb.get_document(info.value as usize)?;
let blob = self.ppdb.get_blob(info.blob)?;
Ok(EmbeddedSource { document, blob })
}
}
impl<'object, 'data> Iterator for EmbeddedSourceIterator<'object, 'data> {
type Item = Result<EmbeddedSource<'data>, FormatError>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(row) = self.inner_it.next() {
match row {
Err(e) => return Some(Err(e)),
Ok(info) => {
if let CustomDebugInformationTag::Document = info.tag {
return Some(self.get_source(info));
}
}
}
}
None
}
}
#[derive(Debug, Clone)]
pub struct EmbeddedSource<'data> {
document: Document,
blob: &'data [u8],
}
impl<'data, 'object> EmbeddedSource<'data> {
pub fn get_path(&'object self) -> &'object str {
self.document.name.as_str()
}
pub fn get_contents(&self) -> Result<Cow<'data, [u8]>, FormatError> {
if self.blob.len() < 4 {
return Err(FormatErrorKind::InvalidBlobData.into());
}
let (format_blob, data_blob) = self.blob.split_at(4);
let format = u32::from_ne_bytes(format_blob.try_into().unwrap());
match format {
0 => Ok(Cow::Borrowed(data_blob)),
x if x > 0 => self.inflate_contents(format as usize, data_blob),
_ => Err(FormatErrorKind::InvalidBlobFormat(format).into()),
}
}
fn inflate_contents(
&self,
size: usize,
data: &'data [u8],
) -> Result<Cow<'data, [u8]>, FormatError> {
let mut decoder = DeflateDecoder::new(data);
let mut output = Vec::with_capacity(size);
let read_size = decoder
.read_to_end(&mut output)
.map_err(|e| FormatError::new(FormatErrorKind::InvalidBlobData, e))?;
if read_size != size {
return Err(FormatErrorKind::InvalidLength.into());
}
Ok(Cow::Owned(output))
}
}