ms_pdb/
lib.rs

1//! Reads and writes Program Database (PDB) files.
2//!
3//! # References
4//! * <https://llvm.org/docs/PDB/index.html>
5//! * <https://github.com/microsoft/microsoft-pdb>
6
7#![forbid(unused_must_use)]
8#![forbid(unsafe_code)]
9#![warn(missing_docs)]
10#![allow(clippy::collapsible_if)]
11#![allow(clippy::single_match)]
12#![allow(clippy::manual_flatten)]
13#![allow(clippy::needless_lifetimes)]
14#![allow(clippy::needless_late_init)]
15
16pub mod container;
17pub mod dbi;
18pub mod globals;
19pub mod guid;
20pub mod hash;
21pub mod lines;
22pub mod modi;
23pub mod taster;
24pub use ::uuid::Uuid;
25pub use ms_pdb_msf as msf;
26pub use ms_pdb_msfz as msfz;
27mod embedded_sources;
28pub mod names;
29pub mod pdbi;
30mod stream_index;
31pub mod tpi;
32pub mod utils;
33pub mod writer;
34
35pub use bstr::BStr;
36pub use container::{Container, StreamReader};
37pub use ms_codeview::{self as codeview, syms, types};
38pub use msfz::StreamData;
39pub use stream_index::{Stream, StreamIndexIsNilError, StreamIndexU16, NIL_STREAM_INDEX};
40pub use sync_file::{RandomAccessFile, ReadAt, WriteAt};
41
42use anyhow::bail;
43use globals::gsi::GlobalSymbolIndex;
44use globals::gss::GlobalSymbolStream;
45use globals::psi::PublicSymbolIndex;
46use names::{NameIndex, NamesStream};
47use std::cell::OnceCell;
48use std::fmt::Debug;
49use std::fs::File;
50use std::path::Path;
51use syms::{Pub, Sym};
52use zerocopy::{FromZeros, IntoBytes};
53
54#[cfg(test)]
55#[static_init::dynamic]
56static INIT_LOGGER: () = {
57    tracing_subscriber::fmt()
58        .with_ansi(false)
59        .with_test_writer()
60        .with_file(true)
61        .with_line_number(true)
62        .with_max_level(tracing::Level::DEBUG)
63        .compact()
64        .without_time()
65        .finish();
66};
67
68/// Allows reading the contents of a PDB file.
69///
70/// This type provides read-only access. It does not provide any means to modify a PDB file or
71/// to create a new one.
72pub struct Pdb<F = sync_file::RandomAccessFile> {
73    container: Container<F>,
74
75    /// The header of the DBI Stream. The DBI Stream contains many of the important data structures
76    /// for PDB, or has pointers (stream indexes) for them. Nearly all programs that read PDBs
77    /// need to read the DBI, so we always load the header.
78    dbi_header: dbi::DbiStreamHeader,
79    dbi_substreams: dbi::DbiSubstreamRanges,
80
81    pdbi: pdbi::PdbiStream,
82    names: OnceCell<NamesStream<Vec<u8>>>,
83
84    tpi_header: OnceCell<tpi::CachedTypeStreamHeader>,
85    ipi_header: OnceCell<tpi::CachedTypeStreamHeader>,
86
87    /// Cached contents of DBI Modules Substream.
88    dbi_modules_cell: OnceCell<dbi::ModInfoSubstream<Vec<u8>>>,
89    /// Cached contents of DBI Sources Substream.
90    dbi_sources_cell: OnceCell<Vec<u8>>,
91
92    gss: OnceCell<Box<GlobalSymbolStream>>,
93    gsi: OnceCell<Box<GlobalSymbolIndex>>,
94    psi: OnceCell<Box<PublicSymbolIndex>>,
95}
96
97#[derive(Copy, Clone, Eq, PartialEq)]
98enum AccessMode {
99    Read,
100    ReadWrite,
101}
102
103impl<F: ReadAt> Pdb<F> {
104    /// Reads the header of a PDB file and provides access to the streams contained within the
105    /// PDB file. Allows read/write access, if using an MSF container format.
106    ///
107    /// This function reads the MSF File Header, which is the header for the entire file.
108    /// It also reads the stream directory, so it knows how to find each of the streams
109    /// and the pages of the streams.
110    fn from_file_access(file: F, access_mode: AccessMode) -> anyhow::Result<Box<Self>> {
111        use crate::taster::{what_flavor, Flavor};
112
113        let Some(flavor) = what_flavor(&file)? else {
114            bail!("The file is not a recognized PDB or PDZ format.");
115        };
116
117        let container = match (flavor, access_mode) {
118            (Flavor::PortablePdb, _) => bail!("Portable PDBs are not supported."),
119            (Flavor::Pdb, AccessMode::Read) => Container::Msf(msf::Msf::open_with_file(file)?),
120            (Flavor::Pdb, AccessMode::ReadWrite) => {
121                Container::Msf(msf::Msf::modify_with_file(file)?)
122            }
123            (Flavor::Pdz, AccessMode::Read) => Container::Msfz(msfz::Msfz::from_file(file)?),
124            (Flavor::Pdz, AccessMode::ReadWrite) => {
125                bail!("The MSFZ file format is read-only.")
126            }
127        };
128
129        let dbi_header = dbi::read_dbi_stream_header(&container)?;
130        let stream_len = container.stream_len(Stream::DBI.into());
131        let dbi_substreams = if stream_len != 0 {
132            dbi::DbiSubstreamRanges::from_sizes(&dbi_header, stream_len as usize)?
133        } else {
134            dbi::DbiSubstreamRanges::default()
135        };
136
137        let pdbi_stream_data = container.read_stream_to_vec(Stream::PDB.into())?;
138        let pdbi = pdbi::PdbiStream::parse(&pdbi_stream_data)?;
139
140        Ok(Box::new(Self {
141            container,
142            dbi_header,
143            dbi_substreams,
144            pdbi,
145            tpi_header: OnceCell::new(),
146            ipi_header: OnceCell::new(),
147            names: OnceCell::new(),
148            dbi_modules_cell: Default::default(),
149            dbi_sources_cell: Default::default(),
150            gss: OnceCell::new(),
151            gsi: OnceCell::new(),
152            psi: OnceCell::new(),
153        }))
154    }
155
156    /// Gets access to the PDB Information Stream.
157    ///
158    /// This loads the PDBI on-demand. The PDBI is usually fairly small.
159    pub fn pdbi(&self) -> &pdbi::PdbiStream {
160        &self.pdbi
161    }
162
163    /// Gets access to the Named Streams table.
164    pub fn named_streams(&self) -> &pdbi::NamedStreams {
165        &self.pdbi.named_streams
166    }
167
168    /// Gets mutable access to the Named Streams table.
169    pub fn named_streams_mut(&mut self) -> &mut pdbi::NamedStreams {
170        &mut self.pdbi.named_streams
171    }
172
173    /// Searches the Named Streams table for a stream with a given name.
174    /// Returns `None` if the stream is not found.
175    pub fn named_stream(&self, name: &str) -> Option<u32> {
176        self.pdbi.named_streams().get(name)
177    }
178
179    /// Searches the Named Streams table for a stream with a given name.
180    /// Returns an error if the stream is not found.
181    pub fn named_stream_err(&self, name: &str) -> anyhow::Result<u32> {
182        if let Some(s) = self.pdbi.named_streams().get(name) {
183            Ok(s)
184        } else {
185            anyhow::bail!("There is no stream with the name {:?}.", name);
186        }
187    }
188
189    /// The header of the DBI Stream.
190    pub fn dbi_header(&self) -> &dbi::DbiStreamHeader {
191        &self.dbi_header
192    }
193
194    /// The byte ranges of the DBI substreams.
195    pub fn dbi_substreams(&self) -> &dbi::DbiSubstreamRanges {
196        &self.dbi_substreams
197    }
198
199    /// Gets the TPI Stream Header.
200    ///
201    /// This loads the TPI Stream Header on-demand. This does not load the rest of the TPI Stream.
202    pub fn tpi_header(&self) -> anyhow::Result<&tpi::CachedTypeStreamHeader> {
203        self.tpi_or_ipi_header(Stream::TPI, &self.tpi_header)
204    }
205
206    /// Gets the IPI Stream Header.
207    ///
208    /// This loads the IPI Stream Header on-demand. This does not load the rest of the TPI Stream.
209    pub fn ipi_header(&self) -> anyhow::Result<&tpi::CachedTypeStreamHeader> {
210        self.tpi_or_ipi_header(Stream::IPI, &self.ipi_header)
211    }
212
213    fn tpi_or_ipi_header<'s>(
214        &'s self,
215        stream: Stream,
216        cell: &'s OnceCell<tpi::CachedTypeStreamHeader>,
217    ) -> anyhow::Result<&'s tpi::CachedTypeStreamHeader> {
218        get_or_init_err(cell, || {
219            let r = self.get_stream_reader(stream.into())?;
220            let mut header = tpi::TypeStreamHeader::new_zeroed();
221            let header_bytes = header.as_mut_bytes();
222            let bytes_read = r.read_at(header_bytes, 0)?;
223            if bytes_read == 0 {
224                // This stream is zero-length.
225                return Ok(tpi::CachedTypeStreamHeader { header: None });
226            }
227
228            if bytes_read < header_bytes.len() {
229                bail!(
230                    "The type stream (stream {}) does not contain enough data for a valid header.",
231                    stream
232                );
233            }
234
235            Ok(tpi::CachedTypeStreamHeader {
236                header: Some(header),
237            })
238        })
239    }
240
241    /// Gets the Names Stream
242    ///
243    /// This loads the Names Stream on-demand.
244    pub fn names(&self) -> anyhow::Result<&NamesStream<Vec<u8>>> {
245        get_or_init_err(&self.names, || {
246            if let Some(stream) = self.named_stream(names::NAMES_STREAM_NAME) {
247                let stream_data = self.read_stream_to_vec(stream)?;
248                Ok(NamesStream::parse(stream_data)?)
249            } else {
250                let stream_data = names::EMPTY_NAMES_STREAM_DATA.to_vec();
251                Ok(NamesStream::parse(stream_data)?)
252            }
253        })
254    }
255
256    /// Gets a name from the Names Stream.
257    pub fn get_name(&self, offset: NameIndex) -> anyhow::Result<&BStr> {
258        let names = self.names()?;
259        names.get_string(offset)
260    }
261
262    /// The binding key that associates this PDB with a given PE executable.
263    pub fn binding_key(&self) -> BindingKey {
264        let pdbi = self.pdbi();
265        pdbi.binding_key()
266    }
267
268    /// Checks whether this PDB has a given feature enabled.
269    pub fn has_feature(&self, feature_code: pdbi::FeatureCode) -> bool {
270        self.pdbi.has_feature(feature_code)
271    }
272
273    /// Indicates that this PDB was built using the "Mini PDB" option, i.e. `/DEBUG:FASTLINK`.
274    pub fn mini_pdb(&self) -> bool {
275        self.has_feature(pdbi::FeatureCode::MINI_PDB)
276    }
277
278    /// Gets a reference to the Global Symbol Stream (GSS). This loads the GSS on-demand.
279    #[inline]
280    pub fn gss(&self) -> anyhow::Result<&GlobalSymbolStream> {
281        if let Some(gss) = self.gss.get() {
282            Ok(gss)
283        } else {
284            self.gss_slow()
285        }
286    }
287
288    /// Gets a reference to the Global Symbol Stream (GSS). This loads the GSS on-demand.
289    #[inline(never)]
290    fn gss_slow(&self) -> anyhow::Result<&GlobalSymbolStream> {
291        let box_ref = get_or_init_err(&self.gss, || -> anyhow::Result<Box<GlobalSymbolStream>> {
292            Ok(Box::new(self.read_gss()?))
293        })?;
294        Ok(box_ref)
295    }
296
297    /// If the GSS has been loaded by using the `gss()` function, then this method frees it.
298    pub fn gss_drop(&mut self) {
299        self.gss.take();
300    }
301
302    /// Gets a reference to the Global Symbol Index (GSI). This loads the GSI on-demand.
303    #[inline(never)]
304    pub fn gsi(&self) -> anyhow::Result<&GlobalSymbolIndex> {
305        if let Some(gsi) = self.gsi.get() {
306            Ok(gsi)
307        } else {
308            self.gsi_slow()
309        }
310    }
311
312    #[inline(never)]
313    fn gsi_slow(&self) -> anyhow::Result<&GlobalSymbolIndex> {
314        let box_ref = get_or_init_err(&self.gsi, || -> anyhow::Result<Box<GlobalSymbolIndex>> {
315            Ok(Box::new(self.read_gsi()?))
316        })?;
317        Ok(box_ref)
318    }
319
320    /// If the GSI has been loaded by using the `gsi()` function, then this method frees it.
321    pub fn gsi_drop(&mut self) {
322        self.gsi.take();
323    }
324
325    /// Gets a reference to the Public Symbol Index (PSI). This loads the PSI on-demand.
326    #[inline]
327    pub fn psi(&self) -> anyhow::Result<&PublicSymbolIndex> {
328        if let Some(psi) = self.psi.get() {
329            Ok(psi)
330        } else {
331            self.psi_slow()
332        }
333    }
334
335    #[inline(never)]
336    fn psi_slow(&self) -> anyhow::Result<&PublicSymbolIndex> {
337        let box_ref = get_or_init_err(&self.psi, || -> anyhow::Result<Box<PublicSymbolIndex>> {
338            Ok(Box::new(self.read_psi()?))
339        })?;
340        Ok(box_ref)
341    }
342
343    /// If the PSI has been loaded by using the `psi()` function, then this method frees it.
344    pub fn psi_drop(&mut self) {
345        self.psi.take();
346    }
347
348    /// Searches for an `S_PUB32` symbol by name.
349    pub fn find_public_by_name(&self, name: &BStr) -> anyhow::Result<Option<Pub<'_>>> {
350        let gss = self.gss()?;
351        let psi = self.psi()?;
352        psi.find_symbol_by_name(gss, name)
353    }
354
355    /// Searches for a global symbol symbol by name.
356    ///
357    /// This uses the Global Symbol Index (GSI). This index _does not_ contain `S_PUB32` records.
358    /// Use `find_public_by_name` to search for `S_PUB32` records.
359    pub fn find_global_by_name(&self, name: &'_ BStr) -> anyhow::Result<Option<Sym<'_>>> {
360        let gss = self.gss()?;
361        let gsi = self.gsi()?;
362        gsi.find_symbol(gss, name)
363    }
364
365    /// Writes any changes that have been buffered in memory to disk. However, this does not commit
366    /// the changes. It is still necessary to call the `commit()` method.
367    ///
368    /// The return value indicates whether any changes were written to disk. `Ok(true)` indicates
369    /// that some change were written to disk.  `Ok(false)` indicates that there were no buffered
370    /// changes and nothing has been written to disk.
371    pub fn flush_all(&mut self) -> anyhow::Result<bool>
372    where
373        F: WriteAt,
374    {
375        let mut any = false;
376
377        if self.pdbi.named_streams.modified {
378            let pdbi_data = self.pdbi.to_bytes()?;
379            let mut w = self.msf_mut_err()?.write_stream(Stream::PDB.into())?;
380            w.set_contents(&pdbi_data)?;
381            self.pdbi.named_streams.modified = false;
382            any = true;
383        }
384
385        Ok(any)
386    }
387
388    /// Gets access to the underlying container.
389    pub fn container(&self) -> &Container<F> {
390        &self.container
391    }
392}
393
394fn get_or_init_err<T, E, F: FnOnce() -> Result<T, E>>(cell: &OnceCell<T>, f: F) -> Result<&T, E> {
395    if let Some(value) = cell.get() {
396        return Ok(value);
397    }
398
399    match f() {
400        Ok(value) => {
401            let _ = cell.set(value);
402            Ok(cell.get().unwrap())
403        }
404        Err(e) => Err(e),
405    }
406}
407
408impl Pdb<RandomAccessFile> {
409    /// Opens a PDB file.
410    pub fn open(file_name: &Path) -> anyhow::Result<Box<Pdb<RandomAccessFile>>> {
411        let f = ms_pdb_msf::open_options_shared(File::options().read(true)).open(file_name)?;
412        let random_file = RandomAccessFile::from(f);
413        Self::from_file_access(random_file, AccessMode::Read)
414    }
415
416    /// Reads the header of a PDB file and provides access to the streams contained within the
417    /// PDB file.
418    ///
419    /// This function reads the MSF File Header, which is the header for the entire file.
420    /// It also reads the stream directory, so it knows how to find each of the streams
421    /// and the pages of the streams.
422    pub fn open_from_file(file: File) -> anyhow::Result<Box<Self>> {
423        let random_file = RandomAccessFile::from(file);
424        Self::from_file_access(random_file, AccessMode::Read)
425    }
426
427    /// Opens a PDB file for editing. The file must use the MSF container format.
428    pub fn modify(filename: &Path) -> anyhow::Result<Box<Pdb<sync_file::RandomAccessFile>>> {
429        let file = File::options().read(true).write(true).open(filename)?;
430        let random_file = sync_file::RandomAccessFile::from(file);
431        Self::from_file_access(random_file, AccessMode::ReadWrite)
432    }
433
434    /// Opens an existing PDB file for read/write access, given a file name.
435    ///
436    /// The file _must_ use the MSF container format. MSFZ is not supported for read/write access.
437    pub fn modify_from_file(file: File) -> anyhow::Result<Box<Self>> {
438        let random_file = RandomAccessFile::from(file);
439        Self::from_file_access(random_file, AccessMode::ReadWrite)
440    }
441}
442
443impl<F: ReadAt> Pdb<F> {
444    /// Reads the header of a PDB file and provides access to the streams contained within the
445    /// PDB file.
446    ///
447    /// This function reads the MSF File Header, which is the header for the entire file.
448    /// It also reads the stream directory, so it knows how to find each of the streams
449    /// and the pages of the streams.
450    pub fn open_from_random_file(random_file: F) -> anyhow::Result<Box<Self>> {
451        Self::from_file_access(random_file, AccessMode::Read)
452    }
453
454    /// Opens an existing PDB file for read/write access, given a file name.
455    ///
456    /// The file _must_ using the MSF container format. MSFZ is not supported for read/write access.
457    pub fn modify_from_random_file(random_file: F) -> anyhow::Result<Box<Self>> {
458        Self::from_file_access(random_file, AccessMode::ReadWrite)
459    }
460}
461
462impl<F> std::ops::Deref for Pdb<F> {
463    type Target = Container<F>;
464
465    fn deref(&self) -> &Self::Target {
466        &self.container
467    }
468}
469
470impl<F> std::ops::DerefMut for Pdb<F> {
471    fn deref_mut(&mut self) -> &mut Self::Target {
472        &mut self.container
473    }
474}
475
476/// This is the key used to associate a given PE executable (DLL or EXE) with a PDB.
477/// All values come from the PDBI stream.
478#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
479pub struct BindingKey {
480    /// The GUID. When MSVC tools are run in deterministic mode, this value is a hash of the PE
481    /// image, rather than being assigned using an RNG.
482    pub guid: uuid::Uuid,
483    /// The age of the executable. This is incremented every time the DLL + PDB are modified.
484    pub age: u32,
485}
486
487impl Debug for BindingKey {
488    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
489        if self.age > 0x1000 {
490            write!(f, "{:?} age 0x{:x}", self.guid, self.age)
491        } else {
492            write!(f, "{:?} age {}", self.guid, self.age)
493        }
494    }
495}