Skip to main content

ms_pdb/
lib.rs

1//! Reads and writes Program Database (PDB) files.
2//!
3//! # References
4//! * <https://llvm.org/docs/PDB/index.html>
5//! * <https://github.com/microsoft/microsoft-pdb>
6
7#![forbid(unused_must_use)]
8#![forbid(unsafe_code)]
9#![warn(missing_docs)]
10#![allow(clippy::collapsible_if)]
11#![allow(clippy::single_match)]
12#![allow(clippy::manual_flatten)]
13#![allow(clippy::needless_lifetimes)]
14#![allow(clippy::needless_late_init)]
15
16pub mod container;
17pub mod dbi;
18pub mod globals;
19pub mod guid;
20pub mod hash;
21pub mod lines;
22pub mod modi;
23pub mod taster;
24pub use ::uuid::Uuid;
25use ms_codeview::arch::Arch;
26use ms_codeview::syms::{SymIter, SymKind};
27use ms_coff::IMAGE_FILE_MACHINE;
28pub use ms_pdb_msf as msf;
29pub use ms_pdb_msfz as msfz;
30use tracing::warn;
31mod coff_groups;
32mod embedded_sources;
33pub mod names;
34pub mod pdbi;
35mod stream_index;
36pub mod tpi;
37pub mod utils;
38pub mod writer;
39
40pub use bstr::BStr;
41pub use coff_groups::{CoffGroup, CoffGroups};
42pub use container::{Container, StreamReader};
43pub use ms_codeview::{self as codeview, syms, types};
44pub use ms_coff::{self as coff, IMAGE_SECTION_HEADER};
45pub use msfz::StreamData;
46pub use stream_index::{NIL_STREAM_INDEX, Stream, StreamIndexIsNilError, StreamIndexU16};
47pub use sync_file::{RandomAccessFile, ReadAt, WriteAt};
48
49use anyhow::bail;
50use globals::gsi::GlobalSymbolIndex;
51use globals::gss::GlobalSymbolStream;
52use globals::psi::PublicSymbolIndex;
53use names::{NameIndex, NamesStream};
54use std::cell::OnceCell;
55use std::fmt::Debug;
56use std::fs::File;
57use std::path::Path;
58use syms::{Pub, Sym};
59use zerocopy::{FromZeros, IntoBytes};
60
61use crate::dbi::ModuleInfo;
62use crate::dbi::optional_dbg::OptionalDebugHeaders;
63
64#[cfg(test)]
65#[static_init::dynamic]
66static INIT_LOGGER: () = {
67    tracing_subscriber::fmt()
68        .with_ansi(false)
69        .with_test_writer()
70        .with_file(true)
71        .with_line_number(true)
72        .with_max_level(tracing::Level::DEBUG)
73        .compact()
74        .without_time()
75        .finish();
76};
77
78/// Allows reading the contents of a PDB file.
79///
80/// This type provides read-only access. It does not provide any means to modify a PDB file or
81/// to create a new one.
82pub struct Pdb<F = sync_file::RandomAccessFile> {
83    container: Container<F>,
84
85    /// The header of the DBI Stream. The DBI Stream contains many of the important data structures
86    /// for PDB, or has pointers (stream indexes) for them. Nearly all programs that read PDBs
87    /// need to read the DBI, so we always load the header.
88    dbi_header: dbi::DbiStreamHeader,
89    dbi_substreams: dbi::DbiSubstreamRanges,
90
91    pdbi: pdbi::PdbiStream,
92
93    cached: PdbCached,
94}
95
96#[derive(Default)]
97struct PdbCached {
98    names: OnceCell<NamesStream<Vec<u8>>>,
99
100    tpi_header: OnceCell<tpi::CachedTypeStreamHeader>,
101    ipi_header: OnceCell<tpi::CachedTypeStreamHeader>,
102
103    /// Cached contents of DBI Modules Substream.
104    dbi_modules_cell: OnceCell<dbi::ModInfoSubstream<Vec<u8>>>,
105    /// Cached contents of DBI Sources Substream.
106    dbi_sources_cell: OnceCell<Vec<u8>>,
107
108    gss: OnceCell<Box<GlobalSymbolStream>>,
109    gsi: OnceCell<Box<GlobalSymbolIndex>>,
110    psi: OnceCell<Box<PublicSymbolIndex>>,
111
112    coff_groups: OnceCell<CoffGroups>,
113    optional_dbg_streams: OnceCell<OptionalDebugHeaders>,
114    section_headers: OnceCell<Box<[IMAGE_SECTION_HEADER]>>,
115}
116
117#[derive(Copy, Clone, Eq, PartialEq)]
118enum AccessMode {
119    Read,
120    ReadWrite,
121}
122
123impl<F: ReadAt> Pdb<F> {
124    /// Reads the header of a PDB file and provides access to the streams contained within the
125    /// PDB file. Allows read/write access, if using an MSF container format.
126    ///
127    /// This function reads the MSF File Header, which is the header for the entire file.
128    /// It also reads the stream directory, so it knows how to find each of the streams
129    /// and the pages of the streams.
130    fn from_file_access(file: F, access_mode: AccessMode) -> anyhow::Result<Box<Self>> {
131        use crate::taster::{Flavor, what_flavor};
132
133        let Some(flavor) = what_flavor(&file)? else {
134            bail!("The file is not a recognized PDB or PDZ format.");
135        };
136
137        let container = match (flavor, access_mode) {
138            (Flavor::PortablePdb, _) => bail!("Portable PDBs are not supported."),
139            (Flavor::Pdb, AccessMode::Read) => Container::Msf(msf::Msf::open_with_file(file)?),
140            (Flavor::Pdb, AccessMode::ReadWrite) => {
141                Container::Msf(msf::Msf::modify_with_file(file)?)
142            }
143            (Flavor::Pdz, AccessMode::Read) => Container::Msfz(msfz::Msfz::from_file(file)?),
144            (Flavor::Pdz, AccessMode::ReadWrite) => {
145                bail!("The MSFZ file format is read-only.")
146            }
147        };
148
149        let dbi_header = dbi::read_dbi_stream_header(&container)?;
150        let stream_len = container.stream_len(Stream::DBI.into());
151        let dbi_substreams = if stream_len != 0 {
152            dbi::DbiSubstreamRanges::from_sizes(&dbi_header, stream_len as usize)?
153        } else {
154            dbi::DbiSubstreamRanges::default()
155        };
156
157        let pdbi_stream_data = container.read_stream_to_vec(Stream::PDB.into())?;
158        let pdbi = pdbi::PdbiStream::parse(&pdbi_stream_data)?;
159
160        Ok(Box::new(Self {
161            container,
162            dbi_header,
163            dbi_substreams,
164            pdbi,
165            cached: Default::default(),
166        }))
167    }
168
169    /// Gets access to the PDB Information Stream.
170    ///
171    /// This loads the PDBI on-demand. The PDBI is usually fairly small.
172    pub fn pdbi(&self) -> &pdbi::PdbiStream {
173        &self.pdbi
174    }
175
176    /// Gets access to the Named Streams table.
177    pub fn named_streams(&self) -> &pdbi::NamedStreams {
178        &self.pdbi.named_streams
179    }
180
181    /// Gets mutable access to the Named Streams table.
182    pub fn named_streams_mut(&mut self) -> &mut pdbi::NamedStreams {
183        &mut self.pdbi.named_streams
184    }
185
186    /// Searches the Named Streams table for a stream with a given name.
187    /// Returns `None` if the stream is not found.
188    pub fn named_stream(&self, name: &str) -> Option<u32> {
189        self.pdbi.named_streams().get(name)
190    }
191
192    /// Searches the Named Streams table for a stream with a given name.
193    /// Returns an error if the stream is not found.
194    pub fn named_stream_err(&self, name: &str) -> anyhow::Result<u32> {
195        if let Some(s) = self.pdbi.named_streams().get(name) {
196            Ok(s)
197        } else {
198            anyhow::bail!("There is no stream with the name {:?}.", name);
199        }
200    }
201
202    /// The header of the DBI Stream.
203    pub fn dbi_header(&self) -> &dbi::DbiStreamHeader {
204        &self.dbi_header
205    }
206
207    /// The byte ranges of the DBI substreams.
208    pub fn dbi_substreams(&self) -> &dbi::DbiSubstreamRanges {
209        &self.dbi_substreams
210    }
211
212    /// Gets the TPI Stream Header.
213    ///
214    /// This loads the TPI Stream Header on-demand. This does not load the rest of the TPI Stream.
215    pub fn tpi_header(&self) -> anyhow::Result<&tpi::CachedTypeStreamHeader> {
216        self.tpi_or_ipi_header(Stream::TPI, &self.cached.tpi_header)
217    }
218
219    /// Gets the IPI Stream Header.
220    ///
221    /// This loads the IPI Stream Header on-demand. This does not load the rest of the TPI Stream.
222    pub fn ipi_header(&self) -> anyhow::Result<&tpi::CachedTypeStreamHeader> {
223        self.tpi_or_ipi_header(Stream::IPI, &self.cached.ipi_header)
224    }
225
226    fn tpi_or_ipi_header<'s>(
227        &'s self,
228        stream: Stream,
229        cell: &'s OnceCell<tpi::CachedTypeStreamHeader>,
230    ) -> anyhow::Result<&'s tpi::CachedTypeStreamHeader> {
231        get_or_init_err(cell, || {
232            let r = self.get_stream_reader(stream.into())?;
233            let mut header = tpi::TypeStreamHeader::new_zeroed();
234            let header_bytes = header.as_mut_bytes();
235            let bytes_read = r.read_at(header_bytes, 0)?;
236            if bytes_read == 0 {
237                // This stream is zero-length.
238                return Ok(tpi::CachedTypeStreamHeader { header: None });
239            }
240
241            if bytes_read < header_bytes.len() {
242                bail!(
243                    "The type stream (stream {}) does not contain enough data for a valid header.",
244                    stream
245                );
246            }
247
248            Ok(tpi::CachedTypeStreamHeader {
249                header: Some(header),
250            })
251        })
252    }
253
254    /// Gets the Names Stream
255    ///
256    /// This loads the Names Stream on-demand.
257    pub fn names(&self) -> anyhow::Result<&NamesStream<Vec<u8>>> {
258        get_or_init_err(&self.cached.names, || {
259            if let Some(stream) = self.named_stream(names::NAMES_STREAM_NAME) {
260                let stream_data = self.read_stream_to_vec(stream)?;
261                Ok(NamesStream::parse(stream_data)?)
262            } else {
263                let stream_data = names::EMPTY_NAMES_STREAM_DATA.to_vec();
264                Ok(NamesStream::parse(stream_data)?)
265            }
266        })
267    }
268
269    /// Gets a name from the Names Stream.
270    pub fn get_name(&self, offset: NameIndex) -> anyhow::Result<&BStr> {
271        let names = self.names()?;
272        names.get_string(offset)
273    }
274
275    /// The binding key that associates this PDB with a given PE executable.
276    pub fn binding_key(&self) -> BindingKey {
277        let pdbi = self.pdbi();
278        pdbi.binding_key()
279    }
280
281    /// Checks whether this PDB has a given feature enabled.
282    pub fn has_feature(&self, feature_code: pdbi::FeatureCode) -> bool {
283        self.pdbi.has_feature(feature_code)
284    }
285
286    /// Indicates that this PDB was built using the "Mini PDB" option, i.e. `/DEBUG:FASTLINK`.
287    pub fn mini_pdb(&self) -> bool {
288        self.has_feature(pdbi::FeatureCode::MINI_PDB)
289    }
290
291    /// Gets a reference to the Global Symbol Stream (GSS). This loads the GSS on-demand.
292    #[inline]
293    pub fn gss(&self) -> anyhow::Result<&GlobalSymbolStream> {
294        if let Some(gss) = self.cached.gss.get() {
295            Ok(gss)
296        } else {
297            self.gss_slow()
298        }
299    }
300
301    /// Gets a reference to the Global Symbol Stream (GSS). This loads the GSS on-demand.
302    #[inline(never)]
303    fn gss_slow(&self) -> anyhow::Result<&GlobalSymbolStream> {
304        let box_ref = get_or_init_err(
305            &self.cached.gss,
306            || -> anyhow::Result<Box<GlobalSymbolStream>> { Ok(Box::new(self.read_gss()?)) },
307        )?;
308        Ok(box_ref)
309    }
310
311    /// If the GSS has been loaded by using the `gss()` function, then this method frees it.
312    pub fn gss_drop(&mut self) {
313        self.cached.gss.take();
314    }
315
316    /// Gets a reference to the Global Symbol Index (GSI). This loads the GSI on-demand.
317    #[inline(never)]
318    pub fn gsi(&self) -> anyhow::Result<&GlobalSymbolIndex> {
319        if let Some(gsi) = self.cached.gsi.get() {
320            Ok(gsi)
321        } else {
322            self.gsi_slow()
323        }
324    }
325
326    #[inline(never)]
327    fn gsi_slow(&self) -> anyhow::Result<&GlobalSymbolIndex> {
328        let box_ref = get_or_init_err(
329            &self.cached.gsi,
330            || -> anyhow::Result<Box<GlobalSymbolIndex>> { Ok(Box::new(self.read_gsi()?)) },
331        )?;
332        Ok(box_ref)
333    }
334
335    /// If the GSI has been loaded by using the `gsi()` function, then this method frees it.
336    pub fn gsi_drop(&mut self) {
337        self.cached.gsi.take();
338    }
339
340    /// Gets a reference to the Public Symbol Index (PSI). This loads the PSI on-demand.
341    #[inline]
342    pub fn psi(&self) -> anyhow::Result<&PublicSymbolIndex> {
343        if let Some(psi) = self.cached.psi.get() {
344            Ok(psi)
345        } else {
346            self.psi_slow()
347        }
348    }
349
350    #[inline(never)]
351    fn psi_slow(&self) -> anyhow::Result<&PublicSymbolIndex> {
352        let box_ref = get_or_init_err(
353            &self.cached.psi,
354            || -> anyhow::Result<Box<PublicSymbolIndex>> { Ok(Box::new(self.read_psi()?)) },
355        )?;
356        Ok(box_ref)
357    }
358
359    /// If the PSI has been loaded by using the `psi()` function, then this method frees it.
360    pub fn psi_drop(&mut self) {
361        self.cached.psi.take();
362    }
363
364    /// Searches for an `S_PUB32` symbol by name.
365    pub fn find_public_by_name(&self, name: &BStr) -> anyhow::Result<Option<Pub<'_>>> {
366        let gss = self.gss()?;
367        let psi = self.psi()?;
368        psi.find_symbol_by_name(gss, name)
369    }
370
371    /// Searches for a global symbol symbol by name.
372    ///
373    /// This uses the Global Symbol Index (GSI). This index _does not_ contain `S_PUB32` records.
374    /// Use `find_public_by_name` to search for `S_PUB32` records.
375    pub fn find_global_by_name(&self, name: &'_ BStr) -> anyhow::Result<Option<Sym<'_>>> {
376        let gss = self.gss()?;
377        let gsi = self.gsi()?;
378        gsi.find_symbol(gss, name)
379    }
380
381    /// Writes any changes that have been buffered in memory to disk. However, this does not commit
382    /// the changes. It is still necessary to call the `commit()` method.
383    ///
384    /// The return value indicates whether any changes were written to disk. `Ok(true)` indicates
385    /// that some change were written to disk.  `Ok(false)` indicates that there were no buffered
386    /// changes and nothing has been written to disk.
387    pub fn flush_all(&mut self) -> anyhow::Result<bool>
388    where
389        F: WriteAt,
390    {
391        let mut any = false;
392
393        if self.pdbi.named_streams.modified {
394            let pdbi_data = self.pdbi.to_bytes()?;
395            let mut w = self.msf_mut_err()?.write_stream(Stream::PDB.into())?;
396            w.set_contents(&pdbi_data)?;
397            self.pdbi.named_streams.modified = false;
398            any = true;
399        }
400
401        Ok(any)
402    }
403
404    /// Gets access to the underlying container.
405    pub fn container(&self) -> &Container<F> {
406        &self.container
407    }
408
409    /// Find the `"* Linker *"` module, which contains the S_COFFGROUP symbols.
410    ///
411    /// If the PDB does not contain a linker module then this returns `Err`.
412    pub fn linker_module(&self) -> anyhow::Result<ModuleInfo<'_>> {
413        if let Some(module) = self.linker_module_opt()? {
414            Ok(module)
415        } else {
416            bail!("This PDB does not contain a linker module.");
417        }
418    }
419
420    /// Find the `"* Linker *"` module, which contains the S_COFFGROUP symbols.
421    ///
422    /// If the PDB does not contain a linker module then this returns `Ok(None)`.
423    pub fn linker_module_opt(&self) -> anyhow::Result<Option<ModuleInfo<'_>>> {
424        let modules = self.modules()?;
425        for module in modules.iter() {
426            if module.module_name == LINKER_MODULE_NAME {
427                return Ok(Some(module));
428            }
429        }
430        Ok(None)
431    }
432
433    /// Gets the list of COFF groups defined in this binary.
434    pub fn coff_groups(&self) -> anyhow::Result<&CoffGroups> {
435        get_or_init_err(&self.cached.coff_groups, || self.read_coff_groups())
436    }
437
438    /// Reads (uncached) the list of COFF groups defined in this binary.
439    pub fn read_coff_groups(&self) -> anyhow::Result<CoffGroups> {
440        // S_COFFGROUP symbols are defined in the linker module.
441        let Some(linker_module) = self.linker_module_opt()? else {
442            return Ok(CoffGroups { vec: Vec::new() });
443        };
444
445        let Some(linker_module_stream) = linker_module.stream() else {
446            bail!("The linker module does not contain any symbols.");
447        };
448
449        let mut linker_module_syms: Vec<u8> = vec![0; linker_module.sym_size() as usize];
450        let sr = self.get_stream_reader(linker_module_stream)?;
451        sr.read_exact_at(&mut linker_module_syms, 0)?;
452
453        // Count the number of S_COFFGROUP symbols. We can use this to do a precise allocation.
454        let mut num_coff_groups: usize = 0;
455        for sym in SymIter::for_module_syms(&linker_module_syms) {
456            if sym.kind == SymKind::S_COFFGROUP {
457                num_coff_groups += 1;
458            }
459        }
460
461        let mut groups = Vec::with_capacity(num_coff_groups);
462
463        for sym in SymIter::for_module_syms(&linker_module_syms) {
464            if sym.kind == SymKind::S_COFFGROUP {
465                match sym.parse_as::<ms_codeview::syms::CoffGroup>() {
466                    Ok(group) => {
467                        groups.push(CoffGroup {
468                            name: group.name.to_string(),
469                            characteristics: group.fixed.characteristics.get(),
470                            offset_segment: group.fixed.off_seg,
471                            size: group.fixed.cb.get(),
472                        });
473                    }
474                    Err(_) => {
475                        warn!("failed to parse S_COFFGROUP symbol");
476                    }
477                }
478            }
479        }
480
481        groups.sort_unstable_by_key(|g| g.offset_segment);
482
483        Ok(CoffGroups { vec: groups })
484    }
485
486    /// Returns the target architecture for this PE binary.
487    pub fn machine(&self) -> IMAGE_FILE_MACHINE {
488        IMAGE_FILE_MACHINE(self.dbi_header.machine.get())
489    }
490
491    /// Returns the target CPU architecture.
492    pub fn arch(&self) -> anyhow::Result<Arch> {
493        match self.machine() {
494            IMAGE_FILE_MACHINE::IMAGE_FILE_MACHINE_AMD64 => Ok(Arch::AMD64),
495            IMAGE_FILE_MACHINE::IMAGE_FILE_MACHINE_ARM64 => Ok(Arch::ARM64),
496            IMAGE_FILE_MACHINE::IMAGE_FILE_MACHINE_I386 => Ok(Arch::X86),
497            _ => bail!("target machine not supported"),
498        }
499    }
500}
501
502fn get_or_init_err<T, E, F: FnOnce() -> Result<T, E>>(cell: &OnceCell<T>, f: F) -> Result<&T, E> {
503    if let Some(value) = cell.get() {
504        return Ok(value);
505    }
506
507    match f() {
508        Ok(value) => {
509            let _ = cell.set(value);
510            Ok(cell.get().unwrap())
511        }
512        Err(e) => Err(e),
513    }
514}
515
516impl Pdb<RandomAccessFile> {
517    /// Opens a PDB file.
518    pub fn open(file_name: &Path) -> anyhow::Result<Box<Pdb<RandomAccessFile>>> {
519        let f = ms_pdb_msf::open_options_shared(File::options().read(true)).open(file_name)?;
520        let random_file = RandomAccessFile::from(f);
521        Self::from_file_access(random_file, AccessMode::Read)
522    }
523
524    /// Reads the header of a PDB file and provides access to the streams contained within the
525    /// PDB file.
526    ///
527    /// This function reads the MSF File Header, which is the header for the entire file.
528    /// It also reads the stream directory, so it knows how to find each of the streams
529    /// and the pages of the streams.
530    pub fn open_from_file(file: File) -> anyhow::Result<Box<Self>> {
531        let random_file = RandomAccessFile::from(file);
532        Self::from_file_access(random_file, AccessMode::Read)
533    }
534
535    /// Opens a PDB file for editing. The file must use the MSF container format.
536    pub fn modify(filename: &Path) -> anyhow::Result<Box<Pdb<sync_file::RandomAccessFile>>> {
537        let file = File::options().read(true).write(true).open(filename)?;
538        let random_file = sync_file::RandomAccessFile::from(file);
539        Self::from_file_access(random_file, AccessMode::ReadWrite)
540    }
541
542    /// Opens an existing PDB file for read/write access, given a file name.
543    ///
544    /// The file _must_ use the MSF container format. MSFZ is not supported for read/write access.
545    pub fn modify_from_file(file: File) -> anyhow::Result<Box<Self>> {
546        let random_file = RandomAccessFile::from(file);
547        Self::from_file_access(random_file, AccessMode::ReadWrite)
548    }
549}
550
551impl<F: ReadAt> Pdb<F> {
552    /// Reads the header of a PDB file and provides access to the streams contained within the
553    /// PDB file.
554    ///
555    /// This function reads the MSF File Header, which is the header for the entire file.
556    /// It also reads the stream directory, so it knows how to find each of the streams
557    /// and the pages of the streams.
558    pub fn open_from_random_file(random_file: F) -> anyhow::Result<Box<Self>> {
559        Self::from_file_access(random_file, AccessMode::Read)
560    }
561
562    /// Opens an existing PDB file for read/write access, given a file name.
563    ///
564    /// The file _must_ using the MSF container format. MSFZ is not supported for read/write access.
565    pub fn modify_from_random_file(random_file: F) -> anyhow::Result<Box<Self>> {
566        Self::from_file_access(random_file, AccessMode::ReadWrite)
567    }
568}
569
570impl<F> std::ops::Deref for Pdb<F> {
571    type Target = Container<F>;
572
573    fn deref(&self) -> &Self::Target {
574        &self.container
575    }
576}
577
578impl<F> std::ops::DerefMut for Pdb<F> {
579    fn deref_mut(&mut self) -> &mut Self::Target {
580        &mut self.container
581    }
582}
583
584/// This is the key used to associate a given PE executable (DLL or EXE) with a PDB.
585/// All values come from the PDBI stream.
586#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
587pub struct BindingKey {
588    /// The GUID. When MSVC tools are run in deterministic mode, this value is a hash of the PE
589    /// image, rather than being assigned using an RNG.
590    pub guid: uuid::Uuid,
591    /// The age of the executable. This is incremented every time the DLL + PDB are modified.
592    pub age: u32,
593}
594
595impl Debug for BindingKey {
596    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
597        if self.age > 0x1000 {
598            write!(f, "{:?} age 0x{:x}", self.guid, self.age)
599        } else {
600            write!(f, "{:?} age {}", self.guid, self.age)
601        }
602    }
603}
604
605/// The name of the special "linker" module.
606///
607/// The linker module is created by the linker and is not an input to the linker. It contains
608/// special / well-known symbols, such as `S_COFFGROUP`.
609pub const LINKER_MODULE_NAME: &str = "* Linker *";