ms_pdb/
dbi.rs

1//! Provides access to the DBI Stream (Debug Information).
2//!
3//! The DBI Stream is a central data structure of the PDB. It contains many vital fields, and
4//! points to other streams that contain other important information. The DBI is stream 3.
5//!
6//! Briefly, the DBI contains these substreams:
7//!
8//! * Modules: This lists the modules (compilands / translation units) that compose an executable.
9//!   Each Module Info structure contains many important fields, including the stream number for
10//!   a Module Stream.
11//!
12//! * Section Contributions Substream
13//!
14//! * Section Map Substream
15//!
16//! * Sources Substream: This lists the source files that were inputs to all of the translation units.
17//!
18//! * Type Server Map Substream
19//!
20//! * Optional Debug Header Substream
21//!
22//! * Edit-and-Continue Substream
23//!
24//! The `Dbi` stream holds section contributions and the list of modules (compilands).
25//!
26//! * <https://llvm.org/docs/PDB/DbiStream.html>
27//! * <https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/langapi/include/pdb.h#L860>
28
29use crate::parser::{Parser, ParserError, ParserMut};
30use crate::Container;
31use crate::{get_or_init_err, Stream};
32use crate::{StreamIndexIsNilError, StreamIndexU16};
33use anyhow::{bail, Result};
34use std::mem::size_of;
35use std::ops::Range;
36use sync_file::ReadAt;
37use tracing::{error, warn};
38use zerocopy::{
39    FromBytes, FromZeros, Immutable, IntoBytes, KnownLayout, Unaligned, I32, LE, U16, U32,
40};
41
42#[cfg(doc)]
43use crate::Pdb;
44
45pub mod modules;
46pub mod optional_dbg;
47pub mod section_contrib;
48pub mod section_map;
49pub mod sources;
50
51pub use modules::*;
52#[doc(inline)]
53pub use section_contrib::*;
54#[doc(inline)]
55pub use sources::*;
56
57/// The header of the DBI (Debug Information) stream.
58#[repr(C)]
59#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug, Clone)]
60#[allow(missing_docs)]
61pub struct DbiStreamHeader {
62    /// Always -1
63    pub signature: I32<LE>,
64
65    /// One of the `DBI_STREAM_VERSION_*` values; typically, `DBI_STREAM_VERSION_V110`.
66    pub version: U32<LE>,
67
68    /// The number of times this PDB has been modified. The value is set to 1 when a PDB is
69    /// first created. This value must match the same field within the PE header.
70    pub age: U32<LE>,
71
72    /// The index of the Global Symbol Index, which contains a name-to-symbol lookup table for
73    /// global symbols. The symbol records are not stored in this stream; they are stored in the
74    /// Global Symbol Stream.
75    pub global_symbol_index_stream: StreamIndexU16,
76
77    pub build_number: U16<LE>,
78
79    /// The index of the stream that contains the Public Symbol Index (GSI). This contains a
80    /// name-to-symbol map and an address-to-symbol map. See [`crate::globals::gsi`].
81    pub public_symbol_index_stream: StreamIndexU16,
82
83    /// The version of the MSPDB DLL which produced this DBI stream.
84    pub pdb_dll_version: U16<LE>,
85
86    /// The stream that contains the Global Symbol Stream. This contains symbol records, which can
87    /// be decoded using [`crate::syms::SymIter`].
88    pub global_symbol_stream: StreamIndexU16,
89
90    pub pdb_dll_rbld: U16<LE>,
91
92    // Substreams
93    pub mod_info_size: I32<LE>,
94    pub section_contribution_size: I32<LE>,
95    pub section_map_size: I32<LE>,
96    pub source_info_size: I32<LE>,
97    pub type_server_map_size: I32<LE>,
98    /// This field is _not_ a substream size. Not sure what it is.
99    pub mfc_type_server_index: U32<LE>,
100    pub optional_dbg_header_size: I32<LE>,
101    pub edit_and_continue_size: I32<LE>,
102
103    pub flags: U16<LE>,
104    pub machine: U16<LE>,
105    pub padding: U32<LE>,
106}
107
108/// Data for an empty DBI stream
109pub static EMPTY_DBI_STREAM_HEADER: [u8; DBI_STREAM_HEADER_LEN] = [
110    0xFF, 0xFF, 0xFF, 0xFF, // signature
111    0x77, 0x09, 0x31, 0x01, // version
112    0x01, 0x00, 0x00, 0x00, // age
113    0xFF, 0xFF, // global_stream_index
114    0x00, 0x00, // build_number
115    0xFF, 0xFF, // public_stream_index
116    0x00, 0x00, // pdb_dll_version
117    0xFF, 0xFF, // sym_record_stream
118    0x00, 0x00, // pdb_dll_rbld
119    0x00, 0x00, 0x00, 0x00, // mod_info_size
120    0x00, 0x00, 0x00, 0x00, // section_contribution_size
121    0x00, 0x00, 0x00, 0x00, // section_map_size
122    0x00, 0x00, 0x00, 0x00, // source_info_size
123    0x00, 0x00, 0x00, 0x00, // type_server_map_size
124    0x00, 0x00, 0x00, 0x00, // mfc_type_server_index
125    0x00, 0x00, 0x00, 0x00, // optional_dbg_header_size
126    0x00, 0x00, 0x00, 0x00, // edit_and_continue_size
127    0x00, 0x00, // flags
128    0x00, 0x00, // machine
129    0x00, 0x00, 0x00, 0x00, // padding
130];
131
132#[test]
133fn test_parse_empty_dbi_stream_header() {
134    let h = DbiStreamHeader::read_from_bytes(EMPTY_DBI_STREAM_HEADER.as_slice()).unwrap();
135    assert!(h.global_symbol_index_stream.get().is_none());
136}
137
138impl DbiStreamHeader {
139    /// Gets the stream index for the Global Symbol Stream.
140    pub fn sym_record_stream(&self) -> Result<u32, StreamIndexIsNilError> {
141        self.global_symbol_stream.get_err()
142    }
143
144    /// Gets the stream index for the Public Symbol Index.
145    pub fn public_stream_index(&self) -> Result<u32, StreamIndexIsNilError> {
146        self.public_symbol_index_stream.get_err()
147    }
148
149    /// Gets the stream index for the Global Symbol Index.
150    pub fn global_stream_index(&self) -> Result<u32, StreamIndexIsNilError> {
151        self.global_symbol_index_stream.get_err()
152    }
153
154    /// Byte range of the Modules substream.
155    pub fn modules_range(&self) -> anyhow::Result<Range<usize>> {
156        let start = DBI_STREAM_HEADER_LEN;
157        let size = self.mod_info_size.get() as usize;
158        Ok(start..start + size)
159    }
160
161    /// Byte range of the Modules substream.
162    pub fn sources_range(&self) -> anyhow::Result<Range<usize>> {
163        let start = DBI_STREAM_HEADER_LEN
164            + self.mod_info_size.get() as usize
165            + self.section_contribution_size.get() as usize
166            + self.section_map_size.get() as usize;
167        let size = self.source_info_size.get() as usize;
168        Ok(start..start + size)
169    }
170}
171
172static_assertions::const_assert_eq!(size_of::<DbiStreamHeader>(), DBI_STREAM_HEADER_LEN);
173const DBI_STREAM_HEADER_LEN: usize = 64;
174
175/// MSVC version 4.1
176pub const DBI_STREAM_VERSION_VC41: u32 = 930803;
177/// MSVC version 5.0
178pub const DBI_STREAM_VERSION_V50: u32 = 19960307;
179/// MSVC version 6.0
180pub const DBI_STREAM_VERSION_V60: u32 = 19970606;
181/// MSVC version 7.0
182pub const DBI_STREAM_VERSION_V70: u32 = 19990903;
183/// MSVC version 11.0
184pub const DBI_STREAM_VERSION_V110: u32 = 20091201;
185
186/// Holds or refers to the DBI stream.
187///
188/// The `StreamData` type parameter can be any type that can contain `[u8]`.
189///
190/// This type contains (or refers to) the _entire_ DBI stream, not just the header.
191#[derive(Clone)]
192pub struct DbiStream<StreamData = Vec<u8>>
193where
194    StreamData: AsRef<[u8]>,
195{
196    /// The contents of the stream.
197    pub stream_data: StreamData,
198
199    /// The byte ranges of the substreams.
200    pub substreams: DbiSubstreamRanges,
201}
202
203// The DBI stream contains a fixed number of "substreams". The DBI header specifies the
204// length of each substream.  The position of each substream is found by computing the
205// sum of all previous substreams (and the header).
206macro_rules! dbi_substreams {
207    (
208        $(
209            $name:ident,
210            $mut_name:ident,
211            $size_field:ident ;
212        )*
213    ) => {
214        /// Contains the byte ranges of the substreams within the DBI stream.
215        #[derive(Clone, Debug, Default)]
216        pub struct DbiSubstreamRanges {
217            $(
218                #[doc = concat!("The range of the ", stringify!($name), " substream.")]
219                pub $name: Range<usize>,
220            )*
221        }
222
223        impl<StreamData: AsRef<[u8]>> DbiStream<StreamData> {
224            $(
225                #[doc = concat!("The unparsed contents of the ", stringify!($name), " substream.")]
226                pub fn $name(&self) -> &[u8] {
227                    self.substream_data(self.substreams.$name.clone())
228                }
229
230                #[doc = concat!("The unparsed contents of the ", stringify!($name), " substream.")]
231                pub fn $mut_name(&mut self) -> &mut [u8]
232                where
233                    StreamData: AsMut<[u8]>,
234                {
235                    self.substream_data_mut(self.substreams.$name.clone())
236                }
237
238            )*
239        }
240
241        impl DbiSubstreamRanges {
242            pub(crate) fn from_sizes(sizes: &DbiStreamHeader, stream_len: usize) -> anyhow::Result<Self> {
243                let mut pos: usize = DBI_STREAM_HEADER_LEN;
244                if pos > stream_len {
245                    bail!("DBI stream is too short; pos = {}, stream_len = {}", pos, stream_len);
246                }
247
248                $(
249                    assert!(pos <= stream_len);
250                    let size: i32 = sizes.$size_field.get();
251                    if size < 0 {
252                        bail!("Substream {} length in DBI header is invalid (is negative)", stringify!($size_field));
253                    }
254
255                    let len = size as usize;
256                    let available = stream_len - pos;
257                    if len > available {
258                        bail!("Substream {} length in DBI header is invalid. It extends beyond the end of the stream.", stringify!($size_field));
259                    }
260                    let start = pos;
261                    pos += len;
262
263                    let $name = start..pos;
264                )*
265
266                if pos < stream_len {
267                    warn!(pos, stream_len, "Something is wrong with the code that finds the ranges of substreams. Expected pos to be equal to stream_len.");
268                } else if pos > stream_len {
269                    error!(pos, stream_len, "Something is very wrong with the DBI header. The sum of the subtream lengths (pos) exceeds the stream len.");
270                } else {
271                    // Substream sizes look good.
272                }
273
274                Ok(Self {
275                    $( $name, )*
276                })
277            }
278        }
279    }
280}
281
282dbi_substreams! {
283    // The order of these determines the order of the substream data in the stream.
284    modules_bytes, modules_bytes_mut, mod_info_size;
285    section_contributions_bytes, section_contributions_bytes_mut, section_contribution_size;
286    section_map_bytes, section_map_bytes_mut, section_map_size;
287    source_info, source_info_mut, source_info_size;
288    type_server_map, type_server_map_mut, type_server_map_size;
289    edit_and_continue, edit_and_continue_mut, edit_and_continue_size;
290    optional_debug_header_bytes, optional_debug_header_bytes_mut, optional_dbg_header_size;
291}
292
293impl<StreamData: AsRef<[u8]>> DbiStream<StreamData> {
294    /// Returns the DBI stream header.
295    pub fn header(&self) -> Result<&DbiStreamHeader> {
296        if let Ok((header, _)) = DbiStreamHeader::ref_from_prefix(self.stream_data.as_ref()) {
297            Ok(header)
298        } else {
299            bail!("The DBI stream is too small to contain a valid header.")
300        }
301    }
302
303    /// Provides mutable access to the DBI stream header.
304    pub fn header_mut(&mut self) -> Result<&mut DbiStreamHeader>
305    where
306        StreamData: AsMut<[u8]>,
307    {
308        if let Ok((header, _)) = DbiStreamHeader::mut_from_prefix(self.stream_data.as_mut()) {
309            Ok(header)
310        } else {
311            bail!("The DBI stream is too small to contain a valid header.")
312        }
313    }
314
315    fn substream_data(&self, range: Range<usize>) -> &[u8] {
316        &self.stream_data.as_ref()[range]
317    }
318
319    fn substream_data_mut(&mut self, range: Range<usize>) -> &mut [u8]
320    where
321        StreamData: AsMut<[u8]>,
322    {
323        &mut self.stream_data.as_mut()[range]
324    }
325
326    /// Reads the Module Information substream.
327    pub fn modules(&self) -> ModInfoSubstream<&[u8]> {
328        ModInfoSubstream {
329            substream_data: self.modules_bytes(),
330        }
331    }
332
333    /// Iterates the Module records in the Module Information Substream.
334    pub fn iter_modules(&self) -> IterModuleInfo<'_> {
335        IterModuleInfo::new(self.modules_bytes())
336    }
337
338    /// Iterates the Module records in the Module Information Substream, with mutable access.
339    pub fn iter_modules_mut(&mut self) -> IterModuleInfoMut<'_>
340    where
341        StreamData: AsMut<[u8]>,
342    {
343        IterModuleInfoMut::new(self.modules_bytes_mut())
344    }
345
346    /// Return a DbiStream over just a a reference
347    pub fn as_slice(&self) -> DbiStream<&[u8]> {
348        DbiStream {
349            stream_data: self.stream_data.as_ref(),
350            substreams: self.substreams.clone(),
351        }
352    }
353
354    /// Read the DBI Stream header and validate it.
355    pub fn parse(stream_data: StreamData) -> anyhow::Result<Self> {
356        let stream_bytes: &[u8] = stream_data.as_ref();
357
358        if stream_bytes.is_empty() {
359            return Ok(Self {
360                substreams: Default::default(),
361                stream_data,
362            });
363        }
364
365        let mut p = Parser::new(stream_bytes);
366        let dbi_header: &DbiStreamHeader = p.get()?;
367
368        let substreams = DbiSubstreamRanges::from_sizes(dbi_header, stream_bytes.len())?;
369
370        // We just computed the ranges for each of the substreams, and we verified that the end of
371        // the substreams is equal to the size of the entire stream. That implicitly validates all
372        // of the range checks for the substreams, so we don't need explicit / verbose checks.
373        // We can simply use normal range indexing.
374
375        Ok(Self {
376            stream_data,
377            substreams,
378        })
379    }
380
381    /// Parses the DBI Sources Substream section.
382    pub fn sources(&self) -> anyhow::Result<sources::DbiSourcesSubstream<'_>> {
383        DbiSourcesSubstream::parse(self.source_info())
384    }
385
386    /// Parses the header of the Section Contributions Substream and returns an object which can
387    /// query it.
388    pub fn section_contributions(
389        &self,
390    ) -> anyhow::Result<section_contrib::SectionContributionsSubstream<'_>> {
391        let substream_bytes = self.section_contributions_bytes();
392        section_contrib::SectionContributionsSubstream::parse(substream_bytes)
393    }
394
395    /// Parses the header of the Section Map Substream and returns an object which can query it.
396    pub fn section_map(&self) -> anyhow::Result<section_map::SectionMap<'_>> {
397        let section_map_bytes = self.section_map_bytes();
398        section_map::SectionMap::parse(section_map_bytes)
399    }
400
401    /// Parses the Optional Debug Header Substream and returns an object which can query it.
402    pub fn optional_debug_header(&self) -> anyhow::Result<optional_dbg::OptionalDebugHeader> {
403        optional_dbg::OptionalDebugHeader::parse(self.optional_debug_header_bytes())
404    }
405
406    /// Gets a mutable reference to the Optional Debug Header substream.
407    pub fn optional_debug_header_mut(&mut self) -> anyhow::Result<&mut [U16<LE>]>
408    where
409        StreamData: AsMut<[u8]>,
410    {
411        if self.substreams.optional_debug_header_bytes.is_empty() {
412            Ok(&mut [])
413        } else {
414            let substream_bytes =
415                &mut self.stream_data.as_mut()[self.substreams.optional_debug_header_bytes.clone()];
416
417            if let Ok(slice) = <[U16<LE>]>::mut_from_bytes(substream_bytes) {
418                Ok(slice)
419            } else {
420                bail!("The Optional Debug Header substream within the DBI stream is malformed (length is not valid).");
421            }
422        }
423    }
424}
425
426/// Reads the header of the DBI stream. This does **not** validate the header.
427///
428/// This is a free function because we need to use it before constructing an instance of [`Pdb`].
429pub fn read_dbi_stream_header<F: ReadAt>(msf: &Container<F>) -> anyhow::Result<DbiStreamHeader> {
430    let stream_reader = msf.get_stream_reader(Stream::DBI.into())?;
431    if !stream_reader.is_empty() {
432        let mut dbi_header = DbiStreamHeader::new_zeroed();
433        stream_reader.read_exact_at(dbi_header.as_mut_bytes(), 0)?;
434        Ok(dbi_header)
435    } else {
436        Ok(DbiStreamHeader::read_from_bytes(EMPTY_DBI_STREAM_HEADER.as_slice()).unwrap())
437    }
438}
439
440/// Reads the entire DBI Stream, validates the header, and then returns an object that
441/// can be used for further queries of the DBI Stream.
442///
443/// This is a free function because we need to use it before constructing an instance of [`Pdb`].
444pub fn read_dbi_stream<F: ReadAt>(
445    container: &Container<F>,
446) -> Result<DbiStream<Vec<u8>>, anyhow::Error> {
447    let mut dbi_stream_data = container.read_stream_to_vec(Stream::DBI.into())?;
448    if dbi_stream_data.is_empty() {
449        dbi_stream_data = EMPTY_DBI_STREAM_HEADER.to_vec();
450    }
451
452    DbiStream::parse(dbi_stream_data)
453}
454
455impl<F: ReadAt> crate::Pdb<F> {
456    /// Reads the header of the DBI stream. This does **not** validate the header.
457    pub fn read_dbi_stream_header(&self) -> anyhow::Result<DbiStreamHeader> {
458        read_dbi_stream_header(&self.container)
459    }
460
461    /// Reads the entire DBI Stream, validates the header, and then returns an object that
462    /// can be used for further queries of the DBI Stream.
463    pub fn read_dbi_stream(&self) -> Result<DbiStream<Vec<u8>>, anyhow::Error> {
464        read_dbi_stream(&self.container)
465    }
466
467    fn read_dbi_substream(&self, range: Range<usize>) -> anyhow::Result<Vec<u8>> {
468        let len = range.len();
469        let mut substream_data = vec![0; len];
470        let reader = self.container.get_stream_reader(Stream::DBI.into())?;
471        reader.read_exact_at(&mut substream_data, range.start as u64)?;
472        Ok(substream_data)
473    }
474
475    /// Reads the module substream data from the DBI stream.
476    ///
477    /// This function always reads the data from the file. It does not cache the data.
478    pub fn read_modules(&self) -> anyhow::Result<ModInfoSubstream<Vec<u8>>> {
479        let substream_data = self.read_dbi_substream(self.dbi_substreams.modules_bytes.clone())?;
480        Ok(ModInfoSubstream { substream_data })
481    }
482
483    /// Gets access to the DBI Modules Substream. This will read the DBI Modules Substream
484    /// on-demand, and will cache it.
485    pub fn modules(&self) -> anyhow::Result<&ModInfoSubstream<Vec<u8>>> {
486        get_or_init_err(&self.dbi_modules_cell, || self.read_modules())
487    }
488
489    /// Reads the DBI Sources Substream. This always reads the data, and does not cache it.
490    pub fn read_sources_data(&self) -> Result<Vec<u8>> {
491        self.read_dbi_substream(self.dbi_substreams.source_info.clone())
492    }
493
494    /// Gets access to the DBI Sources Substream data.
495    pub fn sources_data(&self) -> Result<&[u8]> {
496        let sources_data = get_or_init_err(&self.dbi_sources_cell, || self.read_sources_data())?;
497        Ok(sources_data)
498    }
499
500    /// Gets access to the DBI Sources Substream and parses the header.
501    pub fn sources(&self) -> Result<sources::DbiSourcesSubstream<'_>> {
502        let sources_data = self.sources_data()?;
503        sources::DbiSourcesSubstream::parse(sources_data)
504    }
505
506    /// Drops the cached DBI Sources Substream data, if any.
507    pub fn drop_sources(&mut self) {
508        self.dbi_sources_cell = Default::default();
509    }
510
511    /// Reads the contents of the DBI Section Contributions Substream. This function never caches
512    /// the data; it is always read unconditionally.
513    pub fn read_section_contributions(&self) -> Result<Vec<u8>> {
514        self.read_dbi_substream(self.dbi_substreams.section_contributions_bytes.clone())
515    }
516}
517
518/// Reads fields of the DBI Stream and validates them for consistency with the specification.
519pub fn validate_dbi_stream(stream_data: &[u8]) -> anyhow::Result<()> {
520    let dbi_stream = DbiStream::parse(stream_data)?;
521
522    // For now, the only validation that we do in this function is decoding the ModuleInfo records.
523    let num_modules: usize = dbi_stream.modules().iter().count();
524
525    let sources = DbiSourcesSubstream::parse(dbi_stream.source_info())?;
526    if sources.num_modules() != num_modules {
527        bail!("Number of modules found in Sources substream ({}) does not match number of Module Info structs found in Modules substream ({}).",
528            sources.num_modules(),
529            num_modules
530        );
531    }
532
533    Ok(())
534}