1mod metadata;
2mod raw;
3mod sequence_points;
4mod streams;
5mod utils;
6
7use std::{borrow::Cow, collections::BTreeMap, fmt, io::Read};
8
9use flate2::read::DeflateDecoder;
10use serde::Deserialize;
11use thiserror::Error;
12use watto::Pod;
13
14use symbolic_common::{DebugId, Language, SourceLinkMappings, Uuid};
15
16use metadata::{
17 CustomDebugInformation, CustomDebugInformationIterator, CustomDebugInformationTag,
18 MetadataStream, Table, TableType,
19};
20use streams::{BlobStream, GuidStream, PdbStream, StringStream, UsStream};
21
22#[derive(Debug, Clone, Copy, Error)]
24#[non_exhaustive]
25pub enum FormatErrorKind {
26 #[error("invalid header")]
28 InvalidHeader,
29 #[error("invalid signature")]
30 InvalidSignature,
32 #[error("invalid length")]
34 InvalidLength,
35 #[error("invalid version string")]
37 InvalidVersionString,
38 #[error("invalid stream header")]
40 InvalidStreamHeader,
41 #[error("invalid stream name")]
43 InvalidStreamName,
44 #[error("file does not contain a #Strings stream")]
46 NoStringsStream,
47 #[error("invalid string offset")]
49 InvalidStringOffset,
50 #[error("invalid string data")]
52 InvalidStringData,
53 #[error("unknown stream")]
55 UnknownStream,
56 #[error("file does not contain a #Guid stream")]
58 NoGuidStream,
59 #[error("invalid guid index")]
61 InvalidGuidIndex,
62 #[error(
64 "insufficient table data: {0} bytes required, but table stream only contains {1} bytes"
65 )]
66 InsufficientTableData(usize, usize),
67 #[error("invalid blob offset")]
69 InvalidBlobOffset,
70 #[error("invalid blob data")]
72 InvalidBlobData,
73 #[error("file does not contain a #Blob stream")]
75 NoBlobStream,
76 #[error("invalid compressed unsigned number")]
78 InvalidCompressedUnsigned,
79 #[error("invalid compressed signed number")]
81 InvalidCompressedSigned,
82 #[error("invalid document name")]
84 InvalidDocumentName,
85 #[error("invalid sequence point")]
87 InvalidSequencePoint,
88 #[error("file does not contain a #~ stream")]
90 NoMetadataStream,
91 #[error("row index {1} is out of bounds for table {0:?}")]
93 RowIndexOutOfBounds(TableType, usize),
94 #[error("column index {1} is out of bounds for table {0:?}")]
96 ColIndexOutOfBounds(TableType, usize),
97 #[error("column {1} in table {0:?} has incompatible width {2}")]
99 ColumnWidth(TableType, usize, usize),
100 #[error("invalid custom debug information table item tag {0}")]
102 InvalidCustomDebugInformationTag(u32),
103 #[error("invalid blob format {0}")]
105 InvalidBlobFormat(u32),
106 #[error("invalid source link JSON")]
108 InvalidSourceLinkJson,
109}
110
111#[derive(Debug, Error)]
113#[error("{kind}")]
114pub struct FormatError {
115 pub(crate) kind: FormatErrorKind,
116 #[source]
117 pub(crate) source: Option<Box<dyn std::error::Error + Send + Sync + 'static>>,
118}
119
120impl FormatError {
121 pub(crate) fn new<E>(kind: FormatErrorKind, source: E) -> Self
124 where
125 E: Into<Box<dyn std::error::Error + Send + Sync>>,
126 {
127 let source = Some(source.into());
128 Self { kind, source }
129 }
130
131 pub fn kind(&self) -> FormatErrorKind {
133 self.kind
134 }
135}
136
137impl From<FormatErrorKind> for FormatError {
138 fn from(kind: FormatErrorKind) -> Self {
139 Self { kind, source: None }
140 }
141}
142
143#[derive(Clone)]
149pub struct PortablePdb<'data> {
150 header: &'data raw::Header,
152 version_string: &'data str,
154 header2: &'data raw::HeaderPart2,
156 pdb_stream: Option<PdbStream<'data>>,
158 metadata_stream: Option<MetadataStream<'data>>,
160 string_stream: Option<StringStream<'data>>,
162 us_stream: Option<UsStream<'data>>,
164 blob_stream: Option<BlobStream<'data>>,
166 guid_stream: Option<GuidStream<'data>>,
168 source_link_mappings: SourceLinkMappings,
170}
171
172impl fmt::Debug for PortablePdb<'_> {
173 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174 f.debug_struct("PortablePdb")
175 .field("header", &self.header)
176 .field("version_string", &self.version_string)
177 .field("header2", &self.header2)
178 .field("has_pdb_stream", &self.pdb_stream.is_some())
179 .field("has_table_stream", &self.metadata_stream.is_some())
180 .field("has_string_stream", &self.string_stream.is_some())
181 .field("has_us_stream", &self.us_stream.is_some())
182 .field("has_blob_stream", &self.blob_stream.is_some())
183 .field("has_guid_stream", &self.guid_stream.is_some())
184 .finish()
185 }
186}
187
188impl<'data> PortablePdb<'data> {
189 pub fn peek(buf: &[u8]) -> bool {
192 if let Some((header, _)) = raw::Header::ref_from_prefix(buf) {
193 return header.signature == raw::METADATA_SIGNATURE;
194 }
195 false
196 }
197
198 pub fn parse(buf: &'data [u8]) -> Result<Self, FormatError> {
200 let (header, rest) =
201 raw::Header::ref_from_prefix(buf).ok_or(FormatErrorKind::InvalidHeader)?;
202
203 if header.signature != raw::METADATA_SIGNATURE {
204 return Err(FormatErrorKind::InvalidSignature.into());
205 }
206
207 let version_length = header.version_length as usize;
210 let version_buf = rest
211 .get(..version_length)
212 .ok_or(FormatErrorKind::InvalidLength)?;
213 let version_buf = version_buf
214 .split(|c| *c == 0)
215 .next()
216 .ok_or(FormatErrorKind::InvalidVersionString)?;
217 let version = std::str::from_utf8(version_buf)
218 .map_err(|e| FormatError::new(FormatErrorKind::InvalidVersionString, e))?;
219
220 let streams_buf = &rest[version_length..];
222 let (header2, mut streams_buf) =
223 raw::HeaderPart2::ref_from_prefix(streams_buf).ok_or(FormatErrorKind::InvalidHeader)?;
224
225 let stream_count = header2.streams;
228
229 let mut result = Self {
230 header,
231 version_string: version,
232 header2,
233 pdb_stream: None,
234 metadata_stream: None,
235 string_stream: None,
236 us_stream: None,
237 blob_stream: None,
238 guid_stream: None,
239 source_link_mappings: SourceLinkMappings::default(),
240 };
241
242 let mut metadata_stream = None;
243 for _ in 0..stream_count {
244 let (header, after_header_buf) = raw::StreamHeader::ref_from_prefix(streams_buf)
245 .ok_or(FormatErrorKind::InvalidStreamHeader)?;
246
247 let name_buf = after_header_buf.get(..32).unwrap_or(after_header_buf);
248 let name_buf = name_buf
249 .split(|c| *c == 0)
250 .next()
251 .ok_or(FormatErrorKind::InvalidStreamName)?;
252 let name = std::str::from_utf8(name_buf)
253 .map_err(|e| FormatError::new(FormatErrorKind::InvalidStreamName, e))?;
254
255 let mut rounded_name_len = name.len() + 1;
256 rounded_name_len = match rounded_name_len % 4 {
257 0 => rounded_name_len,
258 r => rounded_name_len + (4 - r),
259 };
260 streams_buf = after_header_buf
261 .get(rounded_name_len..)
262 .ok_or(FormatErrorKind::InvalidLength)?;
263
264 let offset = header.offset as usize;
265 let size = header.size as usize;
266 let stream_buf = buf
267 .get(offset..offset + size)
268 .ok_or(FormatErrorKind::InvalidLength)?;
269
270 match name {
271 "#Pdb" => result.pdb_stream = Some(PdbStream::parse(stream_buf)?),
272 "#~" => metadata_stream = Some(stream_buf),
274 "#Strings" => result.string_stream = Some(StringStream::new(stream_buf)),
275 "#US" => result.us_stream = Some(UsStream::new(stream_buf)),
276 "#Blob" => result.blob_stream = Some(BlobStream::new(stream_buf)),
277 "#GUID" => result.guid_stream = Some(GuidStream::parse(stream_buf)?),
278 _ => return Err(FormatErrorKind::UnknownStream.into()),
279 }
280 }
281
282 if let Some(stream_buf) = metadata_stream {
283 result.metadata_stream = Some(MetadataStream::parse(
284 stream_buf,
285 result
286 .pdb_stream
287 .as_ref()
288 .map_or([0; 64], |s| s.referenced_table_sizes),
289 )?)
290 }
291
292 const SOURCE_LINK_KIND: Uuid = uuid::uuid!("CC110556-A091-4D38-9FEC-25AB9A351A6A");
295
296 #[derive(Debug, Clone, Deserialize)]
297 struct SourceLinkDocuments {
298 documents: BTreeMap<String, String>,
299 }
300
301 for cdi in CustomDebugInformationIterator::new(&result, SOURCE_LINK_KIND)? {
302 let cdi = cdi?;
303 if let (CustomDebugInformationTag::Module, 1) = (cdi.tag, cdi.value) {
305 let docs: SourceLinkDocuments = serde_json::from_slice(result.get_blob(cdi.blob)?)
306 .map_err(|e| FormatError::new(FormatErrorKind::InvalidSourceLinkJson, e))?;
307 result
308 .source_link_mappings
309 .extend(docs.documents.iter().map(|(k, v)| (&k[..], &v[..])));
310 }
311 }
312
313 Ok(result)
314 }
315
316 #[allow(unused)]
318 fn get_string(&self, offset: u32) -> Result<&'data str, FormatError> {
319 self.string_stream
320 .as_ref()
321 .ok_or(FormatErrorKind::NoStringsStream)?
322 .get_string(offset)
323 }
324
325 fn get_guid(&self, idx: u32) -> Result<Uuid, FormatError> {
329 self.guid_stream
330 .as_ref()
331 .ok_or(FormatErrorKind::NoGuidStream)?
332 .get_guid(idx)
333 .ok_or_else(|| FormatErrorKind::InvalidGuidIndex.into())
334 }
335
336 fn get_blob(&self, offset: u32) -> Result<&'data [u8], FormatError> {
338 self.blob_stream
339 .as_ref()
340 .ok_or(FormatErrorKind::NoBlobStream)?
341 .get_blob(offset)
342 }
343
344 pub fn pdb_id(&self) -> Option<DebugId> {
346 self.pdb_stream.as_ref().map(|stream| stream.id())
347 }
348
349 pub(crate) fn get_table(&self, table: TableType) -> Result<Table<'_>, FormatError> {
356 let md_stream = self
357 .metadata_stream
358 .as_ref()
359 .ok_or(FormatErrorKind::NoMetadataStream)?;
360 Ok(md_stream[table])
361 }
362
363 pub fn has_debug_info(&self) -> bool {
365 self.metadata_stream
366 .as_ref()
367 .is_some_and(|md_stream| md_stream[TableType::MethodDebugInformation].rows > 0)
368 }
369
370 pub fn get_document(&self, idx: usize) -> Result<Document, FormatError> {
374 let table = self.get_table(TableType::Document)?;
375 let row = table.get_row(idx)?;
376 let name_offset = row.get_col_u32(1)?;
377 let lang_offset = row.get_col_u32(4)?;
378
379 let name = self.get_document_name(name_offset)?;
380 let lang = self.get_document_lang(lang_offset)?;
381
382 Ok(Document { name, lang })
383 }
384
385 pub fn get_documents_count(&self) -> Result<usize, FormatError> {
387 let table = self.get_table(TableType::Document)?;
388 Ok(table.rows)
389 }
390
391 pub fn get_embedded_sources(&self) -> Result<EmbeddedSourceIterator<'_, 'data>, FormatError> {
393 EmbeddedSourceIterator::new(self)
394 }
395
396 pub fn has_source_links(&self) -> Result<bool, FormatError> {
398 Ok(!self.source_link_mappings.is_empty() && self.get_documents_count()? > 0)
399 }
400
401 pub fn get_source_link(&self, document: &Document) -> Option<Cow<'_, str>> {
406 self.source_link_mappings
407 .resolve(&document.name)
408 .map(Cow::Owned)
409 }
410}
411
412#[derive(Debug, Clone)]
414pub struct Document {
415 pub name: String,
417 pub(crate) lang: Language,
418}
419
420#[derive(Debug, Clone)]
422pub struct EmbeddedSourceIterator<'object, 'data> {
423 ppdb: &'object PortablePdb<'data>,
424 inner_it: CustomDebugInformationIterator<'data>,
425}
426
427impl<'object, 'data> EmbeddedSourceIterator<'object, 'data> {
428 fn new(ppdb: &'object PortablePdb<'data>) -> Result<Self, FormatError> {
429 const EMBEDDED_SOURCES_KIND: Uuid = uuid::uuid!("0E8A571B-6926-466E-B4AD-8AB04611F5FE");
431 let inner_it = CustomDebugInformationIterator::new(ppdb, EMBEDDED_SOURCES_KIND)?;
432 Ok(EmbeddedSourceIterator { ppdb, inner_it })
433 }
434
435 fn get_source(
436 &mut self,
437 info: CustomDebugInformation,
438 ) -> Result<EmbeddedSource<'data>, FormatError> {
439 let document = self.ppdb.get_document(info.value as usize)?;
440 let blob = self.ppdb.get_blob(info.blob)?;
441 Ok(EmbeddedSource { document, blob })
442 }
443}
444
445impl<'data> Iterator for EmbeddedSourceIterator<'_, 'data> {
446 type Item = Result<EmbeddedSource<'data>, FormatError>;
447
448 fn next(&mut self) -> Option<Self::Item> {
449 while let Some(row) = self.inner_it.next() {
452 match row {
453 Err(e) => return Some(Err(e)),
454 Ok(info) => {
455 if let CustomDebugInformationTag::Document = info.tag {
456 return Some(self.get_source(info));
457 }
458 }
459 }
460 }
461 None
462 }
463}
464
465#[derive(Debug, Clone)]
467pub struct EmbeddedSource<'data> {
468 document: Document,
469 blob: &'data [u8],
470}
471
472impl<'data, 'object> EmbeddedSource<'data> {
473 pub fn get_path(&'object self) -> &'object str {
475 self.document.name.as_str()
476 }
477
478 pub fn get_contents(&self) -> Result<Cow<'data, [u8]>, FormatError> {
480 if self.blob.len() < 4 {
487 return Err(FormatErrorKind::InvalidBlobData.into());
488 }
489 let (format_blob, data_blob) = self.blob.split_at(4);
490 let format = u32::from_ne_bytes(format_blob.try_into().unwrap());
491 match format {
492 0 => Ok(Cow::Borrowed(data_blob)),
493 x if x > 0 => self.inflate_contents(format as usize, data_blob),
494 _ => Err(FormatErrorKind::InvalidBlobFormat(format).into()),
495 }
496 }
497
498 fn inflate_contents(
499 &self,
500 size: usize,
501 data: &'data [u8],
502 ) -> Result<Cow<'data, [u8]>, FormatError> {
503 let mut decoder = DeflateDecoder::new(data);
504 let mut output = Vec::with_capacity(size);
505 let read_size = decoder
506 .read_to_end(&mut output)
507 .map_err(|e| FormatError::new(FormatErrorKind::InvalidBlobData, e))?;
508 if read_size != size {
509 return Err(FormatErrorKind::InvalidLength.into());
510 }
511 Ok(Cow::Owned(output))
512 }
513}