ms_pdb/
lib.rs

1//! Reads and writes Program Database (PDB) files.
2//!
3//! # References
4//! * <https://llvm.org/docs/PDB/index.html>
5//! * <https://github.com/microsoft/microsoft-pdb>
6
7#![forbid(unused_must_use)]
8#![forbid(unsafe_code)]
9#![warn(missing_docs)]
10#![allow(clippy::collapsible_if)]
11#![allow(clippy::single_match)]
12#![allow(clippy::manual_flatten)]
13#![allow(clippy::needless_lifetimes)]
14#![allow(clippy::needless_late_init)]
15
16pub mod container;
17pub mod dbi;
18pub mod encoder;
19pub mod globals;
20pub mod guid;
21pub mod hash;
22pub mod lines;
23pub mod modi;
24pub mod taster;
25pub use ::uuid::Uuid;
26pub use ms_pdb_msf as msf;
27pub use ms_pdb_msfz as msfz;
28mod embedded_sources;
29pub mod names;
30pub mod parser;
31pub mod pdbi;
32mod stream_index;
33pub mod syms;
34pub mod tpi;
35pub mod types;
36pub mod utils;
37pub mod writer;
38
39pub use bstr::BStr;
40pub use container::{Container, StreamReader};
41pub use msfz::StreamData;
42pub use stream_index::{Stream, StreamIndexIsNilError, StreamIndexU16, NIL_STREAM_INDEX};
43pub use sync_file::{RandomAccessFile, ReadAt, WriteAt};
44
45use anyhow::bail;
46use globals::gsi::GlobalSymbolIndex;
47use globals::gss::GlobalSymbolStream;
48use globals::psi::PublicSymbolIndex;
49use names::{NameIndex, NamesStream};
50use std::cell::OnceCell;
51use std::fmt::Debug;
52use std::fs::File;
53use std::path::Path;
54use syms::{Pub, Sym};
55use zerocopy::{FromZeros, IntoBytes};
56
57#[cfg(test)]
58#[static_init::dynamic]
59static INIT_LOGGER: () = {
60    tracing_subscriber::fmt()
61        .with_ansi(false)
62        .with_test_writer()
63        .with_file(true)
64        .with_line_number(true)
65        .with_max_level(tracing::Level::DEBUG)
66        .compact()
67        .without_time()
68        .finish();
69};
70
71/// Allows reading the contents of a PDB file.
72///
73/// This type provides read-only access. It does not provide any means to modify a PDB file or
74/// to create a new one.
75pub struct Pdb<F = sync_file::RandomAccessFile> {
76    container: Container<F>,
77
78    /// The header of the DBI Stream. The DBI Stream contains many of the important data structures
79    /// for PDB, or has pointers (stream indexes) for them. Nearly all programs that read PDBs
80    /// need to read the DBI, so we always load the header.
81    dbi_header: dbi::DbiStreamHeader,
82    dbi_substreams: dbi::DbiSubstreamRanges,
83
84    pdbi: pdbi::PdbiStream,
85    names: OnceCell<NamesStream<Vec<u8>>>,
86
87    tpi_header: OnceCell<tpi::CachedTypeStreamHeader>,
88    ipi_header: OnceCell<tpi::CachedTypeStreamHeader>,
89
90    /// Cached contents of DBI Modules Substream.
91    dbi_modules_cell: OnceCell<dbi::ModInfoSubstream<Vec<u8>>>,
92    /// Cached contents of DBI Sources Substream.
93    dbi_sources_cell: OnceCell<Vec<u8>>,
94
95    gss: OnceCell<Box<GlobalSymbolStream>>,
96    gsi: OnceCell<Box<GlobalSymbolIndex>>,
97    psi: OnceCell<Box<PublicSymbolIndex>>,
98}
99
100#[derive(Copy, Clone, Eq, PartialEq)]
101enum AccessMode {
102    Read,
103    ReadWrite,
104}
105
106impl<F: ReadAt> Pdb<F> {
107    /// Reads the header of a PDB file and provides access to the streams contained within the
108    /// PDB file. Allows read/write access, if using an MSF container format.
109    ///
110    /// This function reads the MSF File Header, which is the header for the entire file.
111    /// It also reads the stream directory, so it knows how to find each of the streams
112    /// and the pages of the streams.
113    fn from_file_access(file: F, access_mode: AccessMode) -> anyhow::Result<Box<Self>> {
114        use crate::taster::{what_flavor, Flavor};
115
116        let Some(flavor) = what_flavor(&file)? else {
117            bail!("The file is not a recognized PDB or PDZ format.");
118        };
119
120        let container = match (flavor, access_mode) {
121            (Flavor::PortablePdb, _) => bail!("Portable PDBs are not supported."),
122            (Flavor::Pdb, AccessMode::Read) => Container::Msf(msf::Msf::open_with_file(file)?),
123            (Flavor::Pdb, AccessMode::ReadWrite) => {
124                Container::Msf(msf::Msf::modify_with_file(file)?)
125            }
126            (Flavor::Pdz, AccessMode::Read) => Container::Msfz(msfz::Msfz::from_file(file)?),
127            (Flavor::Pdz, AccessMode::ReadWrite) => {
128                bail!("The MSFZ file format is read-only.")
129            }
130        };
131
132        let dbi_header = dbi::read_dbi_stream_header(&container)?;
133        let stream_len = container.stream_len(Stream::DBI.into());
134        let dbi_substreams = if stream_len != 0 {
135            dbi::DbiSubstreamRanges::from_sizes(&dbi_header, stream_len as usize)?
136        } else {
137            dbi::DbiSubstreamRanges::default()
138        };
139
140        let pdbi_stream_data = container.read_stream_to_vec(Stream::PDB.into())?;
141        let pdbi = pdbi::PdbiStream::parse(&pdbi_stream_data)?;
142
143        Ok(Box::new(Self {
144            container,
145            dbi_header,
146            dbi_substreams,
147            pdbi,
148            tpi_header: OnceCell::new(),
149            ipi_header: OnceCell::new(),
150            names: OnceCell::new(),
151            dbi_modules_cell: Default::default(),
152            dbi_sources_cell: Default::default(),
153            gss: OnceCell::new(),
154            gsi: OnceCell::new(),
155            psi: OnceCell::new(),
156        }))
157    }
158
159    /// Gets access to the PDB Information Stream.
160    ///
161    /// This loads the PDBI on-demand. The PDBI is usually fairly small.
162    pub fn pdbi(&self) -> &pdbi::PdbiStream {
163        &self.pdbi
164    }
165
166    /// Gets access to the Named Streams table.
167    pub fn named_streams(&self) -> &pdbi::NamedStreams {
168        &self.pdbi.named_streams
169    }
170
171    /// Gets mutable access to the Named Streams table.
172    pub fn named_streams_mut(&mut self) -> &mut pdbi::NamedStreams {
173        &mut self.pdbi.named_streams
174    }
175
176    /// Searches the Named Streams table for a stream with a given name.
177    /// Returns `None` if the stream is not found.
178    pub fn named_stream(&self, name: &str) -> Option<u32> {
179        self.pdbi.named_streams().get(name)
180    }
181
182    /// Searches the Named Streams table for a stream with a given name.
183    /// Returns an error if the stream is not found.
184    pub fn named_stream_err(&self, name: &str) -> anyhow::Result<u32> {
185        if let Some(s) = self.pdbi.named_streams().get(name) {
186            Ok(s)
187        } else {
188            anyhow::bail!("There is no stream with the name {:?}.", name);
189        }
190    }
191
192    /// The header of the DBI Stream.
193    pub fn dbi_header(&self) -> &dbi::DbiStreamHeader {
194        &self.dbi_header
195    }
196
197    /// The byte ranges of the DBI substreams.
198    pub fn dbi_substreams(&self) -> &dbi::DbiSubstreamRanges {
199        &self.dbi_substreams
200    }
201
202    /// Gets the TPI Stream Header.
203    ///
204    /// This loads the TPI Stream Header on-demand. This does not load the rest of the TPI Stream.
205    pub fn tpi_header(&self) -> anyhow::Result<&tpi::CachedTypeStreamHeader> {
206        self.tpi_or_ipi_header(Stream::TPI, &self.tpi_header)
207    }
208
209    /// Gets the IPI Stream Header.
210    ///
211    /// This loads the IPI Stream Header on-demand. This does not load the rest of the TPI Stream.
212    pub fn ipi_header(&self) -> anyhow::Result<&tpi::CachedTypeStreamHeader> {
213        self.tpi_or_ipi_header(Stream::IPI, &self.ipi_header)
214    }
215
216    fn tpi_or_ipi_header<'s>(
217        &'s self,
218        stream: Stream,
219        cell: &'s OnceCell<tpi::CachedTypeStreamHeader>,
220    ) -> anyhow::Result<&'s tpi::CachedTypeStreamHeader> {
221        get_or_init_err(cell, || {
222            let r = self.get_stream_reader(stream.into())?;
223            let mut header = tpi::TypeStreamHeader::new_zeroed();
224            let header_bytes = header.as_mut_bytes();
225            let bytes_read = r.read_at(header_bytes, 0)?;
226            if bytes_read == 0 {
227                // This stream is zero-length.
228                return Ok(tpi::CachedTypeStreamHeader { header: None });
229            }
230
231            if bytes_read < header_bytes.len() {
232                bail!(
233                    "The type stream (stream {}) does not contain enough data for a valid header.",
234                    stream
235                );
236            }
237
238            Ok(tpi::CachedTypeStreamHeader {
239                header: Some(header),
240            })
241        })
242    }
243
244    /// Gets the Names Stream
245    ///
246    /// This loads the Names Stream on-demand.
247    pub fn names(&self) -> anyhow::Result<&NamesStream<Vec<u8>>> {
248        get_or_init_err(&self.names, || {
249            if let Some(stream) = self.named_stream(names::NAMES_STREAM_NAME) {
250                let stream_data = self.read_stream_to_vec(stream)?;
251                Ok(NamesStream::parse(stream_data)?)
252            } else {
253                let stream_data = names::EMPTY_NAMES_STREAM_DATA.to_vec();
254                Ok(NamesStream::parse(stream_data)?)
255            }
256        })
257    }
258
259    /// Gets a name from the Names Stream.
260    pub fn get_name(&self, offset: NameIndex) -> anyhow::Result<&BStr> {
261        let names = self.names()?;
262        names.get_string(offset)
263    }
264
265    /// The binding key that associates this PDB with a given PE executable.
266    pub fn binding_key(&self) -> BindingKey {
267        let pdbi = self.pdbi();
268        pdbi.binding_key()
269    }
270
271    /// Checks whether this PDB has a given feature enabled.
272    pub fn has_feature(&self, feature_code: pdbi::FeatureCode) -> bool {
273        self.pdbi.has_feature(feature_code)
274    }
275
276    /// Indicates that this PDB was built using the "Mini PDB" option, i.e. `/DEBUG:FASTLINK`.
277    pub fn mini_pdb(&self) -> bool {
278        self.has_feature(pdbi::FeatureCode::MINI_PDB)
279    }
280
281    /// Gets a reference to the Global Symbol Stream (GSS). This loads the GSS on-demand.
282    #[inline]
283    pub fn gss(&self) -> anyhow::Result<&GlobalSymbolStream> {
284        if let Some(gss) = self.gss.get() {
285            Ok(gss)
286        } else {
287            self.gss_slow()
288        }
289    }
290
291    /// Gets a reference to the Global Symbol Stream (GSS). This loads the GSS on-demand.
292    #[inline(never)]
293    fn gss_slow(&self) -> anyhow::Result<&GlobalSymbolStream> {
294        let box_ref = get_or_init_err(&self.gss, || -> anyhow::Result<Box<GlobalSymbolStream>> {
295            Ok(Box::new(self.read_gss()?))
296        })?;
297        Ok(box_ref)
298    }
299
300    /// If the GSS has been loaded by using the `gss()` function, then this method frees it.
301    pub fn gss_drop(&mut self) {
302        self.gss.take();
303    }
304
305    /// Gets a reference to the Global Symbol Index (GSI). This loads the GSI on-demand.
306    #[inline(never)]
307    pub fn gsi(&self) -> anyhow::Result<&GlobalSymbolIndex> {
308        if let Some(gsi) = self.gsi.get() {
309            Ok(gsi)
310        } else {
311            self.gsi_slow()
312        }
313    }
314
315    #[inline(never)]
316    fn gsi_slow(&self) -> anyhow::Result<&GlobalSymbolIndex> {
317        let box_ref = get_or_init_err(&self.gsi, || -> anyhow::Result<Box<GlobalSymbolIndex>> {
318            Ok(Box::new(self.read_gsi()?))
319        })?;
320        Ok(box_ref)
321    }
322
323    /// If the GSI has been loaded by using the `gsi()` function, then this method frees it.
324    pub fn gsi_drop(&mut self) {
325        self.gsi.take();
326    }
327
328    /// Gets a reference to the Public Symbol Index (PSI). This loads the PSI on-demand.
329    #[inline]
330    pub fn psi(&self) -> anyhow::Result<&PublicSymbolIndex> {
331        if let Some(psi) = self.psi.get() {
332            Ok(psi)
333        } else {
334            self.psi_slow()
335        }
336    }
337
338    #[inline(never)]
339    fn psi_slow(&self) -> anyhow::Result<&PublicSymbolIndex> {
340        let box_ref = get_or_init_err(&self.psi, || -> anyhow::Result<Box<PublicSymbolIndex>> {
341            Ok(Box::new(self.read_psi()?))
342        })?;
343        Ok(box_ref)
344    }
345
346    /// If the PSI has been loaded by using the `psi()` function, then this method frees it.
347    pub fn psi_drop(&mut self) {
348        self.psi.take();
349    }
350
351    /// Searches for an `S_PUB32` symbol by name.
352    pub fn find_public_by_name(&self, name: &BStr) -> anyhow::Result<Option<Pub<'_>>> {
353        let gss = self.gss()?;
354        let psi = self.psi()?;
355        psi.find_symbol_by_name(gss, name)
356    }
357
358    /// Searches for a global symbol symbol by name.
359    ///
360    /// This uses the Global Symbol Index (GSI). This index _does not_ contain `S_PUB32` records.
361    /// Use `find_public_by_name` to search for `S_PUB32` records.
362    pub fn find_global_by_name(&self, name: &'_ BStr) -> anyhow::Result<Option<Sym<'_>>> {
363        let gss = self.gss()?;
364        let gsi = self.gsi()?;
365        gsi.find_symbol(gss, name)
366    }
367
368    /// Writes any changes that have been buffered in memory to disk. However, this does not commit
369    /// the changes. It is still necessary to call the `commit()` method.
370    ///
371    /// The return value indicates whether any changes were written to disk. `Ok(true)` indicates
372    /// that some change were written to disk.  `Ok(false)` indicates that there were no buffered
373    /// changes and nothing has been written to disk.
374    pub fn flush_all(&mut self) -> anyhow::Result<bool>
375    where
376        F: WriteAt,
377    {
378        let mut any = false;
379
380        if self.pdbi.named_streams.modified {
381            let pdbi_data = self.pdbi.to_bytes()?;
382            let mut w = self.msf_mut_err()?.write_stream(Stream::PDB.into())?;
383            w.set_contents(&pdbi_data)?;
384            self.pdbi.named_streams.modified = false;
385            any = true;
386        }
387
388        Ok(any)
389    }
390
391    /// Gets access to the underlying container.
392    pub fn container(&self) -> &Container<F> {
393        &self.container
394    }
395}
396
397fn get_or_init_err<T, E, F: FnOnce() -> Result<T, E>>(cell: &OnceCell<T>, f: F) -> Result<&T, E> {
398    if let Some(value) = cell.get() {
399        return Ok(value);
400    }
401
402    match f() {
403        Ok(value) => {
404            let _ = cell.set(value);
405            Ok(cell.get().unwrap())
406        }
407        Err(e) => Err(e),
408    }
409}
410
411impl Pdb<RandomAccessFile> {
412    /// Opens a PDB file.
413    pub fn open(file_name: &Path) -> anyhow::Result<Box<Pdb<RandomAccessFile>>> {
414        let f = File::open(file_name)?;
415        let random_file = RandomAccessFile::from(f);
416        Self::from_file_access(random_file, AccessMode::Read)
417    }
418
419    /// Reads the header of a PDB file and provides access to the streams contained within the
420    /// PDB file.
421    ///
422    /// This function reads the MSF File Header, which is the header for the entire file.
423    /// It also reads the stream directory, so it knows how to find each of the streams
424    /// and the pages of the streams.
425    pub fn open_from_file(file: File) -> anyhow::Result<Box<Self>> {
426        let random_file = RandomAccessFile::from(file);
427        Self::from_file_access(random_file, AccessMode::Read)
428    }
429
430    /// Opens a PDB file for editing. The file must use the MSF container format.
431    pub fn modify(filename: &Path) -> anyhow::Result<Box<Pdb<sync_file::RandomAccessFile>>> {
432        let file = File::options().read(true).write(true).open(filename)?;
433        let random_file = sync_file::RandomAccessFile::from(file);
434        Self::from_file_access(random_file, AccessMode::ReadWrite)
435    }
436
437    /// Opens an existing PDB file for read/write access, given a file name.
438    ///
439    /// The file _must_ use the MSF container format. MSFZ is not supported for read/write access.
440    pub fn modify_from_file(file: File) -> anyhow::Result<Box<Self>> {
441        let random_file = RandomAccessFile::from(file);
442        Self::from_file_access(random_file, AccessMode::ReadWrite)
443    }
444}
445
446impl<F: ReadAt> Pdb<F> {
447    /// Reads the header of a PDB file and provides access to the streams contained within the
448    /// PDB file.
449    ///
450    /// This function reads the MSF File Header, which is the header for the entire file.
451    /// It also reads the stream directory, so it knows how to find each of the streams
452    /// and the pages of the streams.
453    pub fn open_from_random_file(random_file: F) -> anyhow::Result<Box<Self>> {
454        Self::from_file_access(random_file, AccessMode::Read)
455    }
456
457    /// Opens an existing PDB file for read/write access, given a file name.
458    ///
459    /// The file _must_ using the MSF container format. MSFZ is not supported for read/write access.
460    pub fn modify_from_random_file(random_file: F) -> anyhow::Result<Box<Self>> {
461        Self::from_file_access(random_file, AccessMode::ReadWrite)
462    }
463}
464
465impl<F> std::ops::Deref for Pdb<F> {
466    type Target = Container<F>;
467
468    fn deref(&self) -> &Self::Target {
469        &self.container
470    }
471}
472
473impl<F> std::ops::DerefMut for Pdb<F> {
474    fn deref_mut(&mut self) -> &mut Self::Target {
475        &mut self.container
476    }
477}
478
479/// This is the key used to associate a given PE executable (DLL or EXE) with a PDB.
480/// All values come from the PDBI stream.
481#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
482pub struct BindingKey {
483    /// The GUID. When MSVC tools are run in deterministic mode, this value is a hash of the PE
484    /// image, rather than being assigned using an RNG.
485    pub guid: uuid::Uuid,
486    /// The age of the executable. This is incremented every time the DLL + PDB are modified.
487    pub age: u32,
488}
489
490impl Debug for BindingKey {
491    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
492        if self.age > 0x1000 {
493            write!(f, "{:?} age 0x{:x}", self.guid, self.age)
494        } else {
495            write!(f, "{:?} age {}", self.guid, self.age)
496        }
497    }
498}