pdb/
pdb.rs

1// Copyright 2017 pdb Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use crate::common::*;
9use crate::dbi::{DBIExtraStreams, DBIHeader, DebugInformation, Module};
10use crate::framedata::FrameTable;
11use crate::modi::ModuleInfo;
12use crate::msf::{self, Msf, Stream};
13use crate::omap::{AddressMap, OMAPTable};
14use crate::pdbi::PDBInformation;
15use crate::pe::ImageSectionHeader;
16use crate::source::Source;
17use crate::strings::StringTable;
18use crate::symbol::SymbolTable;
19use crate::tpi::{IdInformation, TypeInformation};
20
21// Some streams have a fixed stream index.
22// http://llvm.org/docs/PDB/index.html
23
24const PDB_STREAM: u32 = 1;
25const TPI_STREAM: u32 = 2;
26const DBI_STREAM: u32 = 3;
27const IPI_STREAM: u32 = 4;
28
29/// `PDB` provides access to the data within a PDB file.
30///
31/// A PDB file is internally a Multi-Stream File (MSF), composed of multiple independent
32/// (and usually discontiguous) data streams on-disk. `PDB` provides lazy access to these data
33/// structures, which means the `PDB` accessor methods usually cause disk accesses.
34#[derive(Debug)]
35pub struct PDB<'s, S> {
36    /// `msf` provides access to the underlying data streams
37    msf: Box<dyn Msf<'s, S> + 's>,
38
39    /// Memoize the `dbi::Header`, since it contains stream numbers we sometimes need
40    dbi_header: Option<DBIHeader>,
41
42    /// Memoize the `dbi::DBIExtraStreams`, since it too contains stream numbers we sometimes need
43    dbi_extra_streams: Option<DBIExtraStreams>,
44}
45
46impl<'s, S: Source<'s> + 's> PDB<'s, S> {
47    /// Create a new `PDB` for a `Source`.
48    ///
49    /// `open()` accesses enough of the source file to find the MSF stream table. This usually
50    /// involves reading the header, a block near the end of the file, and finally the stream table
51    /// itself. It does not access or validate any of the contents of the rest of the PDB.
52    ///
53    /// # Errors
54    ///
55    /// * `Error::UnimplementedFeature` if the PDB file predates ~2002
56    /// * `Error::UnrecognizedFileFormat` if the `Source` does not appear to be a PDB file
57    /// * `Error::IoError` if returned by the `Source`
58    /// * `Error::PageReferenceOutOfRange`, `Error::InvalidPageSize` if the PDB file seems corrupt
59    pub fn open(source: S) -> Result<PDB<'s, S>> {
60        Ok(PDB {
61            msf: msf::open_msf(source)?,
62            dbi_header: None,
63            dbi_extra_streams: None,
64        })
65    }
66
67    /// Retrieve the `PDBInformation` for this PDB.
68    ///
69    /// The `PDBInformation` object contains the GUID and age fields that can be used to verify
70    /// that a PDB file matches a binary, as well as the stream indicies of named PDB streams.
71    ///
72    /// # Errors
73    ///
74    /// * `Error::StreamNotFound` if the PDB somehow does not contain the PDB information stream
75    /// * `Error::IoError` if returned by the `Source`
76    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
77    pub fn pdb_information(&mut self) -> Result<PDBInformation<'s>> {
78        let stream = self.msf.get(PDB_STREAM, None)?;
79        PDBInformation::parse(stream)
80    }
81
82    /// Retrieve the `TypeInformation` for this PDB.
83    ///
84    /// The `TypeInformation` object owns a `SourceView` for the type information ("TPI") stream.
85    /// This is usually the single largest stream of the PDB file.
86    ///
87    /// # Errors
88    ///
89    /// * `Error::StreamNotFound` if the PDB does not contain the type information stream
90    /// * `Error::IoError` if returned by the `Source`
91    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
92    /// * `Error::InvalidTypeInformationHeader` if the type information stream header was not
93    ///   understood
94    pub fn type_information(&mut self) -> Result<TypeInformation<'s>> {
95        let stream = self.msf.get(TPI_STREAM, None)?;
96        TypeInformation::parse(stream)
97    }
98
99    /// Retrieve the `IdInformation` for this PDB.
100    ///
101    /// The `IdInformation` object owns a `SourceView` for the type information ("IPI") stream.
102    ///
103    /// # Errors
104    ///
105    /// * `Error::StreamNotFound` if the PDB does not contain the id information stream
106    /// * `Error::IoError` if returned by the `Source`
107    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
108    /// * `Error::InvalidTypeInformationHeader` if the id information stream header was not
109    ///   understood
110    pub fn id_information(&mut self) -> Result<IdInformation<'s>> {
111        let stream = self.msf.get(IPI_STREAM, None)?;
112        IdInformation::parse(stream)
113    }
114
115    /// Retrieve the `DebugInformation` for this PDB.
116    ///
117    /// The `DebugInformation` object owns a `SourceView` for the debug information ("DBI") stream.
118    ///
119    /// # Errors
120    ///
121    /// * `Error::StreamNotFound` if the PDB somehow does not contain a symbol records stream
122    /// * `Error::IoError` if returned by the `Source`
123    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
124    /// * `Error::UnimplementedFeature` if the debug information header predates ~1995
125    pub fn debug_information(&mut self) -> Result<DebugInformation<'s>> {
126        let stream = self.msf.get(DBI_STREAM, None)?;
127        let debug_info = DebugInformation::parse(stream)?;
128
129        // Grab its header, since we need that for unrelated operations
130        self.dbi_header = Some(debug_info.header());
131        Ok(debug_info)
132    }
133
134    fn dbi_header(&mut self) -> Result<DBIHeader> {
135        // see if we've already got a header
136        if let Some(ref h) = self.dbi_header {
137            return Ok(*h);
138        }
139
140        // get just the first little bit of the DBI stream
141        let stream = self.msf.get(DBI_STREAM, Some(1024))?;
142        let header = DBIHeader::parse(stream)?;
143
144        self.dbi_header = Some(header);
145        Ok(header)
146    }
147
148    /// Retrieve the global symbol table for this PDB.
149    ///
150    /// The `SymbolTable` object owns a `SourceView` for the symbol records stream. This is usually
151    /// the second-largest stream of the PDB file.
152    ///
153    /// The debug information stream indicates which stream is the symbol records stream, so
154    /// `global_symbols()` accesses the debug information stream to read the header unless
155    /// `debug_information()` was called first.
156    ///
157    /// # Errors
158    ///
159    /// * `Error::StreamNotFound` if the PDB somehow does not contain a symbol records stream
160    /// * `Error::IoError` if returned by the `Source`
161    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
162    ///
163    /// If `debug_information()` was not already called, `global_symbols()` will additionally read
164    /// the debug information header, in which case it can also return:
165    ///
166    /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream
167    /// * `Error::UnimplementedFeature` if the debug information header predates ~1995
168    pub fn global_symbols(&mut self) -> Result<SymbolTable<'s>> {
169        // the global symbol table is stored in a stream number described by the DBI header
170        // so, start by getting the DBI header
171        let dbi_header = self.dbi_header()?;
172
173        // open the appropriate stream, assuming that it is always present.
174        let stream = self
175            .raw_stream(dbi_header.symbol_records_stream)?
176            .ok_or(Error::GlobalSymbolsNotFound)?;
177
178        Ok(SymbolTable::new(stream))
179    }
180
181    /// Retrieve the module info stream for a specific `Module`.
182    ///
183    /// Some information for each module is stored in a separate stream per-module. `Module`s can be
184    /// retrieved from the `PDB` by first calling [`debug_information`](Self::debug_information) to
185    /// get the debug information stream, and then calling [`modules`](DebugInformation::modules) on
186    /// that.
187    ///
188    /// # Errors
189    ///
190    /// * `Error::StreamNotFound` if the PDB does not contain this module info stream
191    /// * `Error::IoError` if returned by the `Source`
192    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
193    /// * `Error::UnimplementedFeature` if the module information stream is an unsupported version
194    ///
195    /// # Example
196    ///
197    /// ```
198    /// # use pdb::FallibleIterator;
199    /// #
200    /// # fn test() -> pdb::Result<()> {
201    /// let file = std::fs::File::open("fixtures/self/foo.pdb")?;
202    /// let mut pdb = pdb::PDB::open(file)?;
203    /// let dbi = pdb.debug_information()?;
204    /// let mut modules = dbi.modules()?;
205    /// if let Some(module) = modules.next()? {
206    ///     println!("module name: {}, object file name: {}",
207    ///              module.module_name(), module.object_file_name());
208    ///     match pdb.module_info(&module)? {
209    ///         Some(info) => println!("contains {} symbols", info.symbols()?.count()?),
210    ///         None => println!("module information not available"),
211    ///     }
212    /// }
213    ///
214    /// # Ok(())
215    /// # }
216    /// ```
217    pub fn module_info<'m>(&mut self, module: &Module<'m>) -> Result<Option<ModuleInfo<'s>>> {
218        Ok(self
219            .raw_stream(module.info().stream)?
220            .map(|stream| ModuleInfo::parse(stream, module)))
221    }
222
223    /// Retrieve the executable's section headers, as stored inside this PDB.
224    ///
225    /// The debug information stream indicates which stream contains the section headers, so
226    /// `sections()` accesses the debug information stream to read the header unless
227    /// `debug_information()` was called first.
228    ///
229    /// # Errors
230    ///
231    /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers
232    /// * `Error::IoError` if returned by the `Source`
233    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
234    /// * `Error::UnexpectedEof` if the section headers are truncated mid-record
235    ///
236    /// If `debug_information()` was not already called, `sections()` will additionally read
237    /// the debug information header, in which case it can also return:
238    ///
239    /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream
240    /// * `Error::UnimplementedFeature` if the debug information header predates ~1995
241    pub fn sections(&mut self) -> Result<Option<Vec<ImageSectionHeader>>> {
242        let index = self.extra_streams()?.section_headers;
243        let stream = match self.raw_stream(index)? {
244            Some(stream) => stream,
245            None => return Ok(None),
246        };
247
248        let mut buf = stream.parse_buffer();
249        let mut headers = Vec::with_capacity(buf.len() / 40);
250        while !buf.is_empty() {
251            headers.push(ImageSectionHeader::parse(&mut buf)?);
252        }
253
254        Ok(Some(headers))
255    }
256
257    /// Retrieve the global frame data table.
258    ///
259    /// This table describes the stack frame layout for functions from all modules in the PDB. Not
260    /// every function in the image file must have FPO information defined for it. Those functions
261    /// that do not have FPO information are assumed to have normal stack frames.
262    ///
263    /// If this PDB does not contain frame data, the returned table is empty.
264    ///
265    /// # Errors
266    ///
267    /// * `Error::StreamNotFound` if the PDB does not contain the referenced streams
268    /// * `Error::IoError` if returned by the `Source`
269    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
270    ///
271    /// # Example
272    ///
273    /// ```rust
274    /// # use pdb::{PDB, Rva, FallibleIterator};
275    /// #
276    /// # fn test() -> pdb::Result<()> {
277    /// # let source = std::fs::File::open("fixtures/self/foo.pdb")?;
278    /// let mut pdb = PDB::open(source)?;
279    ///
280    /// // Read the tables once and reuse them
281    /// let address_map = pdb.address_map()?;
282    /// let frame_table = pdb.frame_table()?;
283    /// let mut frames = frame_table.iter();
284    ///
285    /// // Iterate frame data in internal RVA order
286    /// while let Some(frame) = frames.next()? {
287    ///     println!("{:#?}", frame);
288    /// }
289    /// # Ok(())
290    /// # }
291    /// # test().unwrap()
292    /// ```
293    pub fn frame_table(&mut self) -> Result<FrameTable<'s>> {
294        let extra = self.extra_streams()?;
295        let old_stream = self.raw_stream(extra.fpo)?;
296        let new_stream = self.raw_stream(extra.framedata)?;
297        FrameTable::parse(old_stream, new_stream)
298    }
299
300    pub(crate) fn original_sections(&mut self) -> Result<Option<Vec<ImageSectionHeader>>> {
301        let index = self.extra_streams()?.original_section_headers;
302        let stream = match self.raw_stream(index)? {
303            Some(stream) => stream,
304            None => return Ok(None),
305        };
306
307        let mut buf = stream.parse_buffer();
308        let mut headers = Vec::with_capacity(buf.len() / 40);
309        while !buf.is_empty() {
310            headers.push(ImageSectionHeader::parse(&mut buf)?);
311        }
312
313        Ok(Some(headers))
314    }
315
316    pub(crate) fn omap_from_src(&mut self) -> Result<Option<OMAPTable<'s>>> {
317        let index = self.extra_streams()?.omap_from_src;
318        match self.raw_stream(index)? {
319            Some(stream) => OMAPTable::parse(stream).map(Some),
320            None => Ok(None),
321        }
322    }
323
324    pub(crate) fn omap_to_src(&mut self) -> Result<Option<OMAPTable<'s>>> {
325        let index = self.extra_streams()?.omap_to_src;
326        match self.raw_stream(index)? {
327            Some(stream) => OMAPTable::parse(stream).map(Some),
328            None => Ok(None),
329        }
330    }
331
332    /// Build a map translating between different kinds of offsets and virtual addresses.
333    ///
334    /// For more information on address translation, see [`AddressMap`].
335    ///
336    /// This reads `omap_from_src` and either `original_sections` or `sections` from this PDB and
337    /// chooses internally which strategy to use for resolving RVAs. Consider to reuse this instance
338    /// for multiple translations.
339    ///
340    /// # Errors
341    ///
342    /// * `Error::OmapNotFound` if an OMAP is required for translation but missing
343    /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers
344    /// * `Error::IoError` if returned by the `Source`
345    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
346    /// * `Error::UnexpectedEof` if the section headers are truncated mid-record
347    ///
348    /// If `debug_information()` was not already called, `omap_table()` will additionally read the
349    /// debug information header, in which case it can also return:
350    ///
351    /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream
352    /// * `Error::UnimplementedFeature` if the debug information header predates ~1995
353    ///
354    /// # Example
355    ///
356    /// ```rust
357    /// # use pdb::{Rva, FallibleIterator};
358    /// #
359    /// # fn test() -> pdb::Result<()> {
360    /// # let source = std::fs::File::open("fixtures/self/foo.pdb")?;
361    /// let mut pdb = pdb::PDB::open(source)?;
362    ///
363    /// // Compute the address map once and reuse it
364    /// let address_map = pdb.address_map()?;
365    ///
366    /// # let symbol_table = pdb.global_symbols()?;
367    /// # let symbol = symbol_table.iter().next()?.unwrap();
368    /// # match symbol.parse() { Ok(pdb::SymbolData::Public(pubsym)) => {
369    /// // Obtain some section offset, eg from a symbol, and convert it
370    /// match pubsym.offset.to_rva(&address_map) {
371    ///     Some(rva) => {
372    ///         println!("symbol is at {}", rva);
373    /// #       assert_eq!(rva, Rva(26048));
374    ///     }
375    ///     None => {
376    ///         println!("symbol refers to eliminated code");
377    /// #       panic!("symbol should exist");
378    ///     }
379    /// }
380    /// # } _ => unreachable!() }
381    /// # Ok(())
382    /// # }
383    /// # test().unwrap()
384    /// ```
385    pub fn address_map(&mut self) -> Result<AddressMap<'s>> {
386        let sections = self.sections()?.unwrap_or_default();
387        Ok(match self.original_sections()? {
388            Some(original_sections) => {
389                let omap_from_src = self.omap_from_src()?.ok_or(Error::AddressMapNotFound)?;
390                let omap_to_src = self.omap_to_src()?.ok_or(Error::AddressMapNotFound)?;
391
392                AddressMap {
393                    original_sections,
394                    transformed_sections: Some(sections),
395                    original_to_transformed: Some(omap_from_src),
396                    transformed_to_original: Some(omap_to_src),
397                }
398            }
399            None => AddressMap {
400                original_sections: sections,
401                transformed_sections: None,
402                original_to_transformed: None,
403                transformed_to_original: None,
404            },
405        })
406    }
407
408    /// Retrieve the global string table of this PDB.
409    ///
410    /// Long strings, such as file names, are stored in a global deduplicated string table. They are
411    /// referred to by the [`StringRef`] type, which contains an offset into that table. Strings in
412    /// the table are stored as null-terminated C strings. Modern PDBs only store valid UTF-8 data
413    /// in the string table, but for older types a decoding might be necessary.
414    ///
415    /// The string table offers cheap zero-copy access to the underlying string data. It is
416    /// therefore cheap to build.
417    ///
418    /// # Example
419    ///
420    /// ```
421    /// # use pdb::{FallibleIterator, StringRef, PDB};
422    /// #
423    /// # fn test() -> pdb::Result<()> {
424    /// # let file = std::fs::File::open("fixtures/self/foo.pdb")?;
425    /// let mut pdb = PDB::open(file)?;
426    /// let strings = pdb.string_table()?;
427    ///
428    /// // obtain a string ref somehow
429    /// # let string_ref = StringRef(0);
430    /// let raw_string = strings.get(string_ref)?;
431    /// println!("{}", raw_string.to_string());
432    ///
433    /// // alternatively, use convenience methods
434    /// println!("{}", string_ref.to_string_lossy(&strings)?);
435    ///
436    /// # Ok(())
437    /// # }
438    /// ```
439    ///
440    /// # Errors
441    ///
442    /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers
443    /// * `Error::IoError` if returned by the `Source`
444    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
445    /// * `Error::UnexpectedEof` if the string table ends prematurely
446    pub fn string_table(&mut self) -> Result<StringTable<'s>> {
447        let stream = self.named_stream(b"/names")?;
448        StringTable::parse(stream)
449    }
450
451    /// Retrieve a stream by its index to read its contents as bytes.
452    ///
453    /// # Errors
454    ///
455    /// * `Error::StreamNotFound` if the PDB does not contain this stream
456    /// * `Error::IoError` if returned by the `Source`
457    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
458    ///
459    /// # Example
460    ///
461    /// ```
462    /// # fn test() -> pdb::Result<()> {
463    /// let file = std::fs::File::open("fixtures/self/foo.pdb")?;
464    /// let mut pdb = pdb::PDB::open(file)?;
465    /// // This is the index of the "mystream" stream that was added using pdbstr.exe.
466    /// let s = pdb.raw_stream(pdb::StreamIndex(208))?.expect("stream exists");
467    /// assert_eq!(s.as_slice(), b"hello world\n");
468    /// # Ok(())
469    /// # }
470    /// ```
471    pub fn raw_stream(&mut self, index: StreamIndex) -> Result<Option<Stream<'s>>> {
472        match index.msf_number() {
473            Some(number) => self.msf.get(number, None).map(Some),
474            None => Ok(None),
475        }
476    }
477
478    /// Retrieve a stream by its name, as declared in the PDB info stream.
479    ///
480    /// # Errors
481    ///
482    /// * `Error::StreamNameNotFound` if the PDB does not specify a stream with that name
483    /// * `Error::StreamNotFound` if the PDB does not contain the stream referred to
484    /// * `Error::IoError` if returned by the `Source`
485    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
486    pub fn named_stream(&mut self, name: &[u8]) -> Result<Stream<'s>> {
487        let info = self.pdb_information()?;
488        let names = info.stream_names()?;
489        for named_stream in &names {
490            if named_stream.name.as_bytes() == name {
491                return self
492                    .raw_stream(named_stream.stream_id)?
493                    .ok_or(Error::StreamNameNotFound);
494            }
495        }
496        Err(Error::StreamNameNotFound)
497    }
498
499    /// Loads the Optional Debug Header Stream, which contains offsets into extra streams.
500    ///
501    /// this stream is always returned, but its members are all optional depending on the data
502    /// present in the PDB.
503    ///
504    /// The optional header begins at offset 0 immediately after the EC Substream ends.
505    fn extra_streams(&mut self) -> Result<DBIExtraStreams> {
506        if let Some(extra) = self.dbi_extra_streams {
507            return Ok(extra);
508        }
509
510        // Parse and grab information on extra streams, since we might also need that
511        let debug_info = self.debug_information()?;
512        let extra = DBIExtraStreams::new(&debug_info)?;
513        self.dbi_extra_streams = Some(extra);
514
515        Ok(extra)
516    }
517}
518
519impl StreamIndex {
520    /// Load the raw data of this stream from the PDB.
521    ///
522    /// Returns `None` if this index is none. Otherwise, this will try to read the stream from the
523    /// PDB, which might fail if the stream is missing.
524    ///
525    /// # Errors
526    ///
527    /// * `Error::StreamNotFound` if the PDB does not contain this stream
528    /// * `Error::IoError` if returned by the `Source`
529    /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
530    pub fn get<'s, S>(self, pdb: &mut PDB<'s, S>) -> Result<Option<Stream<'s>>>
531    where
532        S: Source<'s> + 's,
533    {
534        pdb.raw_stream(self)
535    }
536}