ms_pdb/
dbi.rs

1//! Provides access to the DBI Stream (Debug Information).
2//!
3//! The DBI Stream is a central data structure of the PDB. It contains many vital fields, and
4//! points to other streams that contain other important information. The DBI is stream 3.
5//!
6//! Briefly, the DBI contains these substreams:
7//!
8//! * Modules: This lists the modules (compilands / translation units) that compose an executable.
9//!   Each Module Info structure contains many important fields, including the stream number for
10//!   a Module Stream.
11//!
12//! * Section Contributions Substream
13//!
14//! * Section Map Substream
15//!
16//! * Sources Substream: This lists the source files that were inputs to all of the translation units.
17//!
18//! * Type Server Map Substream
19//!
20//! * Optional Debug Header Substream
21//!
22//! * Edit-and-Continue Substream
23//!
24//! The `Dbi` stream holds section contributions and the list of modules (compilands).
25//!
26//! * <https://llvm.org/docs/PDB/DbiStream.html>
27//! * <https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/langapi/include/pdb.h#L860>
28
29use crate::Container;
30use crate::{get_or_init_err, Stream};
31use crate::{StreamIndexIsNilError, StreamIndexU16};
32use anyhow::{bail, Result};
33use ms_codeview::parser::{Parser, ParserError, ParserMut};
34use std::mem::size_of;
35use std::ops::Range;
36use sync_file::ReadAt;
37use tracing::{error, warn};
38use zerocopy::{
39    FromBytes, FromZeros, Immutable, IntoBytes, KnownLayout, Unaligned, I32, LE, U16, U32,
40};
41
42#[cfg(doc)]
43use crate::Pdb;
44
45pub mod modules;
46pub mod optional_dbg;
47pub mod section_contrib;
48pub mod section_map;
49pub mod sources;
50
51pub use modules::*;
52#[doc(inline)]
53pub use section_contrib::*;
54#[doc(inline)]
55pub use sources::*;
56
57/// The header of the DBI (Debug Information) stream.
58#[repr(C)]
59#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug, Clone)]
60#[allow(missing_docs)]
61pub struct DbiStreamHeader {
62    /// Always -1
63    pub signature: I32<LE>,
64
65    /// One of the `DBI_STREAM_VERSION_*` values; typically, `DBI_STREAM_VERSION_V110`.
66    pub version: U32<LE>,
67
68    /// The number of times this PDB has been modified. The value is set to 1 when a PDB is
69    /// first created. This value must match the same field within the PE header.
70    pub age: U32<LE>,
71
72    /// The index of the Global Symbol Index, which contains a name-to-symbol lookup table for
73    /// global symbols. The symbol records are not stored in this stream; they are stored in the
74    /// Global Symbol Stream.
75    pub global_symbol_index_stream: StreamIndexU16,
76
77    pub build_number: U16<LE>,
78
79    /// The index of the stream that contains the Public Symbol Index (GSI). This contains a
80    /// name-to-symbol map and an address-to-symbol map. See [`crate::globals::gsi`].
81    pub public_symbol_index_stream: StreamIndexU16,
82
83    /// The version of the MSPDB DLL which produced this DBI stream.
84    pub pdb_dll_version: U16<LE>,
85
86    /// The stream that contains the Global Symbol Stream. This contains symbol records, which can
87    /// be decoded using [`crate::syms::SymIter`].
88    pub global_symbol_stream: StreamIndexU16,
89
90    pub pdb_dll_rbld: U16<LE>,
91
92    // Substreams
93    pub mod_info_size: I32<LE>,
94    pub section_contribution_size: I32<LE>,
95    pub section_map_size: I32<LE>,
96    pub source_info_size: I32<LE>,
97    pub type_server_map_size: I32<LE>,
98    /// This field is _not_ a substream size. Not sure what it is.
99    pub mfc_type_server_index: U32<LE>,
100    pub optional_dbg_header_size: I32<LE>,
101    pub edit_and_continue_size: I32<LE>,
102
103    pub flags: U16<LE>,
104    pub machine: U16<LE>,
105    pub padding: U32<LE>,
106}
107
108/// Data for an empty DBI stream
109pub static EMPTY_DBI_STREAM_HEADER: [u8; DBI_STREAM_HEADER_LEN] = [
110    0xFF, 0xFF, 0xFF, 0xFF, // signature
111    0x77, 0x09, 0x31, 0x01, // version
112    0x01, 0x00, 0x00, 0x00, // age
113    0xFF, 0xFF, // global_stream_index
114    0x00, 0x00, // build_number
115    0xFF, 0xFF, // public_stream_index
116    0x00, 0x00, // pdb_dll_version
117    0xFF, 0xFF, // sym_record_stream
118    0x00, 0x00, // pdb_dll_rbld
119    0x00, 0x00, 0x00, 0x00, // mod_info_size
120    0x00, 0x00, 0x00, 0x00, // section_contribution_size
121    0x00, 0x00, 0x00, 0x00, // section_map_size
122    0x00, 0x00, 0x00, 0x00, // source_info_size
123    0x00, 0x00, 0x00, 0x00, // type_server_map_size
124    0x00, 0x00, 0x00, 0x00, // mfc_type_server_index
125    0x00, 0x00, 0x00, 0x00, // optional_dbg_header_size
126    0x00, 0x00, 0x00, 0x00, // edit_and_continue_size
127    0x00, 0x00, // flags
128    0x00, 0x00, // machine
129    0x00, 0x00, 0x00, 0x00, // padding
130];
131
132#[test]
133fn test_parse_empty_dbi_stream_header() {
134    let h = DbiStreamHeader::read_from_bytes(EMPTY_DBI_STREAM_HEADER.as_slice()).unwrap();
135    assert!(h.global_symbol_index_stream.get().is_none());
136}
137
138impl DbiStreamHeader {
139    /// Gets the stream index for the Global Symbol Stream.
140    pub fn sym_record_stream(&self) -> Result<u32, StreamIndexIsNilError> {
141        self.global_symbol_stream.get_err()
142    }
143
144    /// Gets the stream index for the Public Symbol Index.
145    pub fn public_stream_index(&self) -> Result<u32, StreamIndexIsNilError> {
146        self.public_symbol_index_stream.get_err()
147    }
148
149    /// Gets the stream index for the Global Symbol Index.
150    pub fn global_stream_index(&self) -> Result<u32, StreamIndexIsNilError> {
151        self.global_symbol_index_stream.get_err()
152    }
153
154    /// Byte range of the Modules substream.
155    pub fn modules_range(&self) -> anyhow::Result<Range<usize>> {
156        let start = DBI_STREAM_HEADER_LEN;
157        let size = self.mod_info_size.get() as usize;
158        Ok(start..start + size)
159    }
160
161    /// Byte range of the Modules substream.
162    pub fn sources_range(&self) -> anyhow::Result<Range<usize>> {
163        let start = DBI_STREAM_HEADER_LEN
164            + self.mod_info_size.get() as usize
165            + self.section_contribution_size.get() as usize
166            + self.section_map_size.get() as usize;
167        let size = self.source_info_size.get() as usize;
168        Ok(start..start + size)
169    }
170
171    /// The total length of all substreams, or None if this value cannot be computed.
172    ///
173    /// In a well-formed DBI stream, this value can be computed and the value is less than
174    /// the size of the data that follows the DBI Stream Header.
175    pub fn total_substreams_len(&self) -> Option<u32> {
176        // Read the fields and (where necessary) convert them from i32 to u32.
177        // If a value is negative, then we return None.
178        u32::try_from(self.mod_info_size.get())
179            .ok()?
180            .checked_add(u32::try_from(self.section_contribution_size.get()).ok()?)?
181            .checked_add(u32::try_from(self.section_map_size.get()).ok()?)?
182            .checked_add(u32::try_from(self.source_info_size.get()).ok()?)?
183            .checked_add(u32::try_from(self.type_server_map_size.get()).ok()?)?
184            .checked_add(u32::try_from(self.optional_dbg_header_size.get()).ok()?)?
185            .checked_add(u32::try_from(self.edit_and_continue_size.get()).ok()?)
186    }
187}
188
189static_assertions::const_assert_eq!(size_of::<DbiStreamHeader>(), DBI_STREAM_HEADER_LEN);
190/// The size of the DBI stream header.
191pub const DBI_STREAM_HEADER_LEN: usize = 64;
192
193/// MSVC version 4.1
194pub const DBI_STREAM_VERSION_VC41: u32 = 930803;
195/// MSVC version 5.0
196pub const DBI_STREAM_VERSION_V50: u32 = 19960307;
197/// MSVC version 6.0
198pub const DBI_STREAM_VERSION_V60: u32 = 19970606;
199/// MSVC version 7.0
200pub const DBI_STREAM_VERSION_V70: u32 = 19990903;
201/// MSVC version 11.0
202pub const DBI_STREAM_VERSION_V110: u32 = 20091201;
203
204/// Holds or refers to the DBI stream.
205///
206/// The `StreamData` type parameter can be any type that can contain `[u8]`.
207///
208/// This type contains (or refers to) the _entire_ DBI stream, not just the header.
209#[derive(Clone)]
210pub struct DbiStream<StreamData = Vec<u8>>
211where
212    StreamData: AsRef<[u8]>,
213{
214    /// The contents of the stream.
215    pub stream_data: StreamData,
216
217    /// The byte ranges of the substreams.
218    pub substreams: DbiSubstreamRanges,
219}
220
221// The DBI stream contains a fixed number of "substreams". The DBI header specifies the
222// length of each substream.  The position of each substream is found by computing the
223// sum of all previous substreams (and the header).
224macro_rules! dbi_substreams {
225    (
226        $(
227            $name:ident,
228            $mut_name:ident,
229            $size_field:ident ;
230        )*
231    ) => {
232        /// Contains the byte ranges of the substreams within the DBI stream.
233        #[derive(Clone, Debug, Default)]
234        pub struct DbiSubstreamRanges {
235            $(
236                #[doc = concat!("The range of the ", stringify!($name), " substream.")]
237                pub $name: Range<usize>,
238            )*
239        }
240
241        impl<StreamData: AsRef<[u8]>> DbiStream<StreamData> {
242            $(
243                #[doc = concat!("The unparsed contents of the ", stringify!($name), " substream.")]
244                pub fn $name(&self) -> &[u8] {
245                    self.substream_data(self.substreams.$name.clone())
246                }
247
248                #[doc = concat!("The unparsed contents of the ", stringify!($name), " substream.")]
249                pub fn $mut_name(&mut self) -> &mut [u8]
250                where
251                    StreamData: AsMut<[u8]>,
252                {
253                    self.substream_data_mut(self.substreams.$name.clone())
254                }
255
256            )*
257        }
258
259        impl DbiSubstreamRanges {
260            pub(crate) fn from_sizes(sizes: &DbiStreamHeader, stream_len: usize) -> anyhow::Result<Self> {
261                let mut pos: usize = DBI_STREAM_HEADER_LEN;
262                if pos > stream_len {
263                    bail!("DBI stream is too short; pos = {}, stream_len = {}", pos, stream_len);
264                }
265
266                $(
267                    assert!(pos <= stream_len);
268                    let size: i32 = sizes.$size_field.get();
269                    if size < 0 {
270                        bail!("Substream {} length in DBI header is invalid (is negative)", stringify!($size_field));
271                    }
272
273                    let len = size as usize;
274                    let available = stream_len - pos;
275                    if len > available {
276                        bail!("Substream {} length in DBI header is invalid. It extends beyond the end of the stream.", stringify!($size_field));
277                    }
278                    let start = pos;
279                    pos += len;
280
281                    let $name = start..pos;
282                )*
283
284                if pos < stream_len {
285                    warn!(pos, stream_len, "Something is wrong with the code that finds the ranges of substreams. Expected pos to be equal to stream_len.");
286                } else if pos > stream_len {
287                    error!(pos, stream_len, "Something is very wrong with the DBI header. The sum of the subtream lengths (pos) exceeds the stream len.");
288                } else {
289                    // Substream sizes look good.
290                }
291
292                Ok(Self {
293                    $( $name, )*
294                })
295            }
296        }
297    }
298}
299
300dbi_substreams! {
301    // The order of these determines the order of the substream data in the stream.
302    modules_bytes, modules_bytes_mut, mod_info_size;
303    section_contributions_bytes, section_contributions_bytes_mut, section_contribution_size;
304    section_map_bytes, section_map_bytes_mut, section_map_size;
305    source_info, source_info_mut, source_info_size;
306    type_server_map, type_server_map_mut, type_server_map_size;
307    edit_and_continue, edit_and_continue_mut, edit_and_continue_size;
308    optional_debug_header_bytes, optional_debug_header_bytes_mut, optional_dbg_header_size;
309}
310
311impl<StreamData: AsRef<[u8]>> DbiStream<StreamData> {
312    /// Returns the DBI stream header.
313    pub fn header(&self) -> Result<&DbiStreamHeader> {
314        if let Ok((header, _)) = DbiStreamHeader::ref_from_prefix(self.stream_data.as_ref()) {
315            Ok(header)
316        } else {
317            bail!("The DBI stream is too small to contain a valid header.")
318        }
319    }
320
321    /// Provides mutable access to the DBI stream header.
322    pub fn header_mut(&mut self) -> Result<&mut DbiStreamHeader>
323    where
324        StreamData: AsMut<[u8]>,
325    {
326        if let Ok((header, _)) = DbiStreamHeader::mut_from_prefix(self.stream_data.as_mut()) {
327            Ok(header)
328        } else {
329            bail!("The DBI stream is too small to contain a valid header.")
330        }
331    }
332
333    fn substream_data(&self, range: Range<usize>) -> &[u8] {
334        &self.stream_data.as_ref()[range]
335    }
336
337    fn substream_data_mut(&mut self, range: Range<usize>) -> &mut [u8]
338    where
339        StreamData: AsMut<[u8]>,
340    {
341        &mut self.stream_data.as_mut()[range]
342    }
343
344    /// Reads the Module Information substream.
345    pub fn modules(&self) -> ModInfoSubstream<&[u8]> {
346        ModInfoSubstream {
347            substream_data: self.modules_bytes(),
348        }
349    }
350
351    /// Iterates the Module records in the Module Information Substream.
352    pub fn iter_modules(&self) -> IterModuleInfo<'_> {
353        IterModuleInfo::new(self.modules_bytes())
354    }
355
356    /// Iterates the Module records in the Module Information Substream, with mutable access.
357    pub fn iter_modules_mut(&mut self) -> IterModuleInfoMut<'_>
358    where
359        StreamData: AsMut<[u8]>,
360    {
361        IterModuleInfoMut::new(self.modules_bytes_mut())
362    }
363
364    /// Return a DbiStream over just a a reference
365    pub fn as_slice(&self) -> DbiStream<&[u8]> {
366        DbiStream {
367            stream_data: self.stream_data.as_ref(),
368            substreams: self.substreams.clone(),
369        }
370    }
371
372    /// Read the DBI Stream header and validate it.
373    pub fn parse(stream_data: StreamData) -> anyhow::Result<Self> {
374        let stream_bytes: &[u8] = stream_data.as_ref();
375
376        if stream_bytes.is_empty() {
377            return Ok(Self {
378                substreams: Default::default(),
379                stream_data,
380            });
381        }
382
383        let mut p = Parser::new(stream_bytes);
384        let dbi_header: &DbiStreamHeader = p.get()?;
385
386        let substreams = DbiSubstreamRanges::from_sizes(dbi_header, stream_bytes.len())?;
387
388        // We just computed the ranges for each of the substreams, and we verified that the end of
389        // the substreams is equal to the size of the entire stream. That implicitly validates all
390        // of the range checks for the substreams, so we don't need explicit / verbose checks.
391        // We can simply use normal range indexing.
392
393        Ok(Self {
394            stream_data,
395            substreams,
396        })
397    }
398
399    /// Parses the DBI Sources Substream section.
400    pub fn sources(&self) -> anyhow::Result<sources::DbiSourcesSubstream<'_>> {
401        DbiSourcesSubstream::parse(self.source_info())
402    }
403
404    /// Parses the header of the Section Contributions Substream and returns an object which can
405    /// query it.
406    pub fn section_contributions(
407        &self,
408    ) -> anyhow::Result<section_contrib::SectionContributionsSubstream<'_>> {
409        let substream_bytes = self.section_contributions_bytes();
410        section_contrib::SectionContributionsSubstream::parse(substream_bytes)
411    }
412
413    /// Parses the header of the Section Map Substream and returns an object which can query it.
414    pub fn section_map(&self) -> anyhow::Result<section_map::SectionMap<'_>> {
415        let section_map_bytes = self.section_map_bytes();
416        section_map::SectionMap::parse(section_map_bytes)
417    }
418
419    /// Parses the Optional Debug Header Substream and returns an object which can query it.
420    pub fn optional_debug_header(&self) -> anyhow::Result<optional_dbg::OptionalDebugHeader> {
421        optional_dbg::OptionalDebugHeader::parse(self.optional_debug_header_bytes())
422    }
423
424    /// Gets a mutable reference to the Optional Debug Header substream.
425    pub fn optional_debug_header_mut(&mut self) -> anyhow::Result<&mut [U16<LE>]>
426    where
427        StreamData: AsMut<[u8]>,
428    {
429        if self.substreams.optional_debug_header_bytes.is_empty() {
430            Ok(&mut [])
431        } else {
432            let substream_bytes =
433                &mut self.stream_data.as_mut()[self.substreams.optional_debug_header_bytes.clone()];
434
435            if let Ok(slice) = <[U16<LE>]>::mut_from_bytes(substream_bytes) {
436                Ok(slice)
437            } else {
438                bail!("The Optional Debug Header substream within the DBI stream is malformed (length is not valid).");
439            }
440        }
441    }
442}
443
444/// Reads the header of the DBI stream. This does **not** validate the header.
445///
446/// This is a free function because we need to use it before constructing an instance of [`Pdb`].
447pub fn read_dbi_stream_header<F: ReadAt>(msf: &Container<F>) -> anyhow::Result<DbiStreamHeader> {
448    let stream_reader = msf.get_stream_reader(Stream::DBI.into())?;
449    if !stream_reader.is_empty() {
450        let mut dbi_header = DbiStreamHeader::new_zeroed();
451        stream_reader.read_exact_at(dbi_header.as_mut_bytes(), 0)?;
452        Ok(dbi_header)
453    } else {
454        Ok(DbiStreamHeader::read_from_bytes(EMPTY_DBI_STREAM_HEADER.as_slice()).unwrap())
455    }
456}
457
458/// Reads the entire DBI Stream, validates the header, and then returns an object that
459/// can be used for further queries of the DBI Stream.
460///
461/// This is a free function because we need to use it before constructing an instance of [`Pdb`].
462pub fn read_dbi_stream<F: ReadAt>(
463    container: &Container<F>,
464) -> Result<DbiStream<Vec<u8>>, anyhow::Error> {
465    let mut dbi_stream_data = container.read_stream_to_vec(Stream::DBI.into())?;
466    if dbi_stream_data.is_empty() {
467        dbi_stream_data = EMPTY_DBI_STREAM_HEADER.to_vec();
468    }
469
470    DbiStream::parse(dbi_stream_data)
471}
472
473impl<F: ReadAt> crate::Pdb<F> {
474    /// Reads the header of the DBI stream. This does **not** validate the header.
475    pub fn read_dbi_stream_header(&self) -> anyhow::Result<DbiStreamHeader> {
476        read_dbi_stream_header(&self.container)
477    }
478
479    /// Reads the entire DBI Stream, validates the header, and then returns an object that
480    /// can be used for further queries of the DBI Stream.
481    pub fn read_dbi_stream(&self) -> Result<DbiStream<Vec<u8>>, anyhow::Error> {
482        read_dbi_stream(&self.container)
483    }
484
485    fn read_dbi_substream(&self, range: Range<usize>) -> anyhow::Result<Vec<u8>> {
486        let len = range.len();
487        let mut substream_data = vec![0; len];
488        let reader = self.container.get_stream_reader(Stream::DBI.into())?;
489        reader.read_exact_at(&mut substream_data, range.start as u64)?;
490        Ok(substream_data)
491    }
492
493    /// Reads the module substream data from the DBI stream.
494    ///
495    /// This function always reads the data from the file. It does not cache the data.
496    pub fn read_modules(&self) -> anyhow::Result<ModInfoSubstream<Vec<u8>>> {
497        let substream_data = self.read_dbi_substream(self.dbi_substreams.modules_bytes.clone())?;
498        Ok(ModInfoSubstream { substream_data })
499    }
500
501    /// Gets access to the DBI Modules Substream. This will read the DBI Modules Substream
502    /// on-demand, and will cache it.
503    pub fn modules(&self) -> anyhow::Result<&ModInfoSubstream<Vec<u8>>> {
504        get_or_init_err(&self.dbi_modules_cell, || self.read_modules())
505    }
506
507    /// Reads the DBI Sources Substream. This always reads the data, and does not cache it.
508    pub fn read_sources_data(&self) -> Result<Vec<u8>> {
509        self.read_dbi_substream(self.dbi_substreams.source_info.clone())
510    }
511
512    /// Gets access to the DBI Sources Substream data.
513    pub fn sources_data(&self) -> Result<&[u8]> {
514        let sources_data = get_or_init_err(&self.dbi_sources_cell, || self.read_sources_data())?;
515        Ok(sources_data)
516    }
517
518    /// Gets access to the DBI Sources Substream and parses the header.
519    pub fn sources(&self) -> Result<sources::DbiSourcesSubstream<'_>> {
520        let sources_data = self.sources_data()?;
521        sources::DbiSourcesSubstream::parse(sources_data)
522    }
523
524    /// Drops the cached DBI Sources Substream data, if any.
525    pub fn drop_sources(&mut self) {
526        self.dbi_sources_cell = Default::default();
527    }
528
529    /// Reads the contents of the DBI Section Contributions Substream. This function never caches
530    /// the data; it is always read unconditionally.
531    pub fn read_section_contributions(&self) -> Result<Vec<u8>> {
532        self.read_dbi_substream(self.dbi_substreams.section_contributions_bytes.clone())
533    }
534}
535
536/// Reads fields of the DBI Stream and validates them for consistency with the specification.
537pub fn validate_dbi_stream(stream_data: &[u8]) -> anyhow::Result<()> {
538    let dbi_stream = DbiStream::parse(stream_data)?;
539
540    // For now, the only validation that we do in this function is decoding the ModuleInfo records.
541    let num_modules: usize = dbi_stream.modules().iter().count();
542
543    let sources = DbiSourcesSubstream::parse(dbi_stream.source_info())?;
544    if sources.num_modules() != num_modules {
545        bail!("Number of modules found in Sources substream ({}) does not match number of Module Info structs found in Modules substream ({}).",
546            sources.num_modules(),
547            num_modules
548        );
549    }
550
551    Ok(())
552}