pdb/pdb.rs
1// Copyright 2017 pdb Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use crate::common::*;
9use crate::dbi::{DBIExtraStreams, DBIHeader, DebugInformation, Module};
10use crate::framedata::FrameTable;
11use crate::modi::ModuleInfo;
12use crate::msf::{self, Msf, Stream};
13use crate::omap::{AddressMap, OMAPTable};
14use crate::pdbi::PDBInformation;
15use crate::pe::ImageSectionHeader;
16use crate::source::Source;
17use crate::strings::StringTable;
18use crate::symbol::SymbolTable;
19use crate::tpi::{IdInformation, TypeInformation};
20
21// Some streams have a fixed stream index.
22// http://llvm.org/docs/PDB/index.html
23
24const PDB_STREAM: u32 = 1;
25const TPI_STREAM: u32 = 2;
26const DBI_STREAM: u32 = 3;
27const IPI_STREAM: u32 = 4;
28
29/// `PDB` provides access to the data within a PDB file.
30///
31/// A PDB file is internally a Multi-Stream File (MSF), composed of multiple independent
32/// (and usually discontiguous) data streams on-disk. `PDB` provides lazy access to these data
33/// structures, which means the `PDB` accessor methods usually cause disk accesses.
34#[derive(Debug)]
35pub struct PDB<'s, S> {
36 /// `msf` provides access to the underlying data streams
37 msf: Box<dyn Msf<'s, S> + 's>,
38
39 /// Memoize the `dbi::Header`, since it contains stream numbers we sometimes need
40 dbi_header: Option<DBIHeader>,
41
42 /// Memoize the `dbi::DBIExtraStreams`, since it too contains stream numbers we sometimes need
43 dbi_extra_streams: Option<DBIExtraStreams>,
44}
45
46impl<'s, S: Source<'s> + 's> PDB<'s, S> {
47 /// Create a new `PDB` for a `Source`.
48 ///
49 /// `open()` accesses enough of the source file to find the MSF stream table. This usually
50 /// involves reading the header, a block near the end of the file, and finally the stream table
51 /// itself. It does not access or validate any of the contents of the rest of the PDB.
52 ///
53 /// # Errors
54 ///
55 /// * `Error::UnimplementedFeature` if the PDB file predates ~2002
56 /// * `Error::UnrecognizedFileFormat` if the `Source` does not appear to be a PDB file
57 /// * `Error::IoError` if returned by the `Source`
58 /// * `Error::PageReferenceOutOfRange`, `Error::InvalidPageSize` if the PDB file seems corrupt
59 pub fn open(source: S) -> Result<PDB<'s, S>> {
60 Ok(PDB {
61 msf: msf::open_msf(source)?,
62 dbi_header: None,
63 dbi_extra_streams: None,
64 })
65 }
66
67 /// Retrieve the `PDBInformation` for this PDB.
68 ///
69 /// The `PDBInformation` object contains the GUID and age fields that can be used to verify
70 /// that a PDB file matches a binary, as well as the stream indicies of named PDB streams.
71 ///
72 /// # Errors
73 ///
74 /// * `Error::StreamNotFound` if the PDB somehow does not contain the PDB information stream
75 /// * `Error::IoError` if returned by the `Source`
76 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
77 pub fn pdb_information(&mut self) -> Result<PDBInformation<'s>> {
78 let stream = self.msf.get(PDB_STREAM, None)?;
79 PDBInformation::parse(stream)
80 }
81
82 /// Retrieve the `TypeInformation` for this PDB.
83 ///
84 /// The `TypeInformation` object owns a `SourceView` for the type information ("TPI") stream.
85 /// This is usually the single largest stream of the PDB file.
86 ///
87 /// # Errors
88 ///
89 /// * `Error::StreamNotFound` if the PDB does not contain the type information stream
90 /// * `Error::IoError` if returned by the `Source`
91 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
92 /// * `Error::InvalidTypeInformationHeader` if the type information stream header was not
93 /// understood
94 pub fn type_information(&mut self) -> Result<TypeInformation<'s>> {
95 let stream = self.msf.get(TPI_STREAM, None)?;
96 TypeInformation::parse(stream)
97 }
98
99 /// Retrieve the `IdInformation` for this PDB.
100 ///
101 /// The `IdInformation` object owns a `SourceView` for the type information ("IPI") stream.
102 ///
103 /// # Errors
104 ///
105 /// * `Error::StreamNotFound` if the PDB does not contain the id information stream
106 /// * `Error::IoError` if returned by the `Source`
107 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
108 /// * `Error::InvalidTypeInformationHeader` if the id information stream header was not
109 /// understood
110 pub fn id_information(&mut self) -> Result<IdInformation<'s>> {
111 let stream = self.msf.get(IPI_STREAM, None)?;
112 IdInformation::parse(stream)
113 }
114
115 /// Retrieve the `DebugInformation` for this PDB.
116 ///
117 /// The `DebugInformation` object owns a `SourceView` for the debug information ("DBI") stream.
118 ///
119 /// # Errors
120 ///
121 /// * `Error::StreamNotFound` if the PDB somehow does not contain a symbol records stream
122 /// * `Error::IoError` if returned by the `Source`
123 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
124 /// * `Error::UnimplementedFeature` if the debug information header predates ~1995
125 pub fn debug_information(&mut self) -> Result<DebugInformation<'s>> {
126 let stream = self.msf.get(DBI_STREAM, None)?;
127 let debug_info = DebugInformation::parse(stream)?;
128
129 // Grab its header, since we need that for unrelated operations
130 self.dbi_header = Some(debug_info.header());
131 Ok(debug_info)
132 }
133
134 fn dbi_header(&mut self) -> Result<DBIHeader> {
135 // see if we've already got a header
136 if let Some(ref h) = self.dbi_header {
137 return Ok(*h);
138 }
139
140 // get just the first little bit of the DBI stream
141 let stream = self.msf.get(DBI_STREAM, Some(1024))?;
142 let header = DBIHeader::parse(stream)?;
143
144 self.dbi_header = Some(header);
145 Ok(header)
146 }
147
148 /// Retrieve the global symbol table for this PDB.
149 ///
150 /// The `SymbolTable` object owns a `SourceView` for the symbol records stream. This is usually
151 /// the second-largest stream of the PDB file.
152 ///
153 /// The debug information stream indicates which stream is the symbol records stream, so
154 /// `global_symbols()` accesses the debug information stream to read the header unless
155 /// `debug_information()` was called first.
156 ///
157 /// # Errors
158 ///
159 /// * `Error::StreamNotFound` if the PDB somehow does not contain a symbol records stream
160 /// * `Error::IoError` if returned by the `Source`
161 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
162 ///
163 /// If `debug_information()` was not already called, `global_symbols()` will additionally read
164 /// the debug information header, in which case it can also return:
165 ///
166 /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream
167 /// * `Error::UnimplementedFeature` if the debug information header predates ~1995
168 pub fn global_symbols(&mut self) -> Result<SymbolTable<'s>> {
169 // the global symbol table is stored in a stream number described by the DBI header
170 // so, start by getting the DBI header
171 let dbi_header = self.dbi_header()?;
172
173 // open the appropriate stream, assuming that it is always present.
174 let stream = self
175 .raw_stream(dbi_header.symbol_records_stream)?
176 .ok_or(Error::GlobalSymbolsNotFound)?;
177
178 Ok(SymbolTable::new(stream))
179 }
180
181 /// Retrieve the module info stream for a specific `Module`.
182 ///
183 /// Some information for each module is stored in a separate stream per-module. `Module`s can be
184 /// retrieved from the `PDB` by first calling [`debug_information`](Self::debug_information) to
185 /// get the debug information stream, and then calling [`modules`](DebugInformation::modules) on
186 /// that.
187 ///
188 /// # Errors
189 ///
190 /// * `Error::StreamNotFound` if the PDB does not contain this module info stream
191 /// * `Error::IoError` if returned by the `Source`
192 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
193 /// * `Error::UnimplementedFeature` if the module information stream is an unsupported version
194 ///
195 /// # Example
196 ///
197 /// ```
198 /// # use pdb::FallibleIterator;
199 /// #
200 /// # fn test() -> pdb::Result<()> {
201 /// let file = std::fs::File::open("fixtures/self/foo.pdb")?;
202 /// let mut pdb = pdb::PDB::open(file)?;
203 /// let dbi = pdb.debug_information()?;
204 /// let mut modules = dbi.modules()?;
205 /// if let Some(module) = modules.next()? {
206 /// println!("module name: {}, object file name: {}",
207 /// module.module_name(), module.object_file_name());
208 /// match pdb.module_info(&module)? {
209 /// Some(info) => println!("contains {} symbols", info.symbols()?.count()?),
210 /// None => println!("module information not available"),
211 /// }
212 /// }
213 ///
214 /// # Ok(())
215 /// # }
216 /// ```
217 pub fn module_info<'m>(&mut self, module: &Module<'m>) -> Result<Option<ModuleInfo<'s>>> {
218 Ok(self
219 .raw_stream(module.info().stream)?
220 .map(|stream| ModuleInfo::parse(stream, module)))
221 }
222
223 /// Retrieve the executable's section headers, as stored inside this PDB.
224 ///
225 /// The debug information stream indicates which stream contains the section headers, so
226 /// `sections()` accesses the debug information stream to read the header unless
227 /// `debug_information()` was called first.
228 ///
229 /// # Errors
230 ///
231 /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers
232 /// * `Error::IoError` if returned by the `Source`
233 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
234 /// * `Error::UnexpectedEof` if the section headers are truncated mid-record
235 ///
236 /// If `debug_information()` was not already called, `sections()` will additionally read
237 /// the debug information header, in which case it can also return:
238 ///
239 /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream
240 /// * `Error::UnimplementedFeature` if the debug information header predates ~1995
241 pub fn sections(&mut self) -> Result<Option<Vec<ImageSectionHeader>>> {
242 let index = self.extra_streams()?.section_headers;
243 let stream = match self.raw_stream(index)? {
244 Some(stream) => stream,
245 None => return Ok(None),
246 };
247
248 let mut buf = stream.parse_buffer();
249 let mut headers = Vec::with_capacity(buf.len() / 40);
250 while !buf.is_empty() {
251 headers.push(ImageSectionHeader::parse(&mut buf)?);
252 }
253
254 Ok(Some(headers))
255 }
256
257 /// Retrieve the global frame data table.
258 ///
259 /// This table describes the stack frame layout for functions from all modules in the PDB. Not
260 /// every function in the image file must have FPO information defined for it. Those functions
261 /// that do not have FPO information are assumed to have normal stack frames.
262 ///
263 /// If this PDB does not contain frame data, the returned table is empty.
264 ///
265 /// # Errors
266 ///
267 /// * `Error::StreamNotFound` if the PDB does not contain the referenced streams
268 /// * `Error::IoError` if returned by the `Source`
269 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
270 ///
271 /// # Example
272 ///
273 /// ```rust
274 /// # use pdb::{PDB, Rva, FallibleIterator};
275 /// #
276 /// # fn test() -> pdb::Result<()> {
277 /// # let source = std::fs::File::open("fixtures/self/foo.pdb")?;
278 /// let mut pdb = PDB::open(source)?;
279 ///
280 /// // Read the tables once and reuse them
281 /// let address_map = pdb.address_map()?;
282 /// let frame_table = pdb.frame_table()?;
283 /// let mut frames = frame_table.iter();
284 ///
285 /// // Iterate frame data in internal RVA order
286 /// while let Some(frame) = frames.next()? {
287 /// println!("{:#?}", frame);
288 /// }
289 /// # Ok(())
290 /// # }
291 /// # test().unwrap()
292 /// ```
293 pub fn frame_table(&mut self) -> Result<FrameTable<'s>> {
294 let extra = self.extra_streams()?;
295 let old_stream = self.raw_stream(extra.fpo)?;
296 let new_stream = self.raw_stream(extra.framedata)?;
297 FrameTable::parse(old_stream, new_stream)
298 }
299
300 pub(crate) fn original_sections(&mut self) -> Result<Option<Vec<ImageSectionHeader>>> {
301 let index = self.extra_streams()?.original_section_headers;
302 let stream = match self.raw_stream(index)? {
303 Some(stream) => stream,
304 None => return Ok(None),
305 };
306
307 let mut buf = stream.parse_buffer();
308 let mut headers = Vec::with_capacity(buf.len() / 40);
309 while !buf.is_empty() {
310 headers.push(ImageSectionHeader::parse(&mut buf)?);
311 }
312
313 Ok(Some(headers))
314 }
315
316 pub(crate) fn omap_from_src(&mut self) -> Result<Option<OMAPTable<'s>>> {
317 let index = self.extra_streams()?.omap_from_src;
318 match self.raw_stream(index)? {
319 Some(stream) => OMAPTable::parse(stream).map(Some),
320 None => Ok(None),
321 }
322 }
323
324 pub(crate) fn omap_to_src(&mut self) -> Result<Option<OMAPTable<'s>>> {
325 let index = self.extra_streams()?.omap_to_src;
326 match self.raw_stream(index)? {
327 Some(stream) => OMAPTable::parse(stream).map(Some),
328 None => Ok(None),
329 }
330 }
331
332 /// Build a map translating between different kinds of offsets and virtual addresses.
333 ///
334 /// For more information on address translation, see [`AddressMap`].
335 ///
336 /// This reads `omap_from_src` and either `original_sections` or `sections` from this PDB and
337 /// chooses internally which strategy to use for resolving RVAs. Consider to reuse this instance
338 /// for multiple translations.
339 ///
340 /// # Errors
341 ///
342 /// * `Error::OmapNotFound` if an OMAP is required for translation but missing
343 /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers
344 /// * `Error::IoError` if returned by the `Source`
345 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
346 /// * `Error::UnexpectedEof` if the section headers are truncated mid-record
347 ///
348 /// If `debug_information()` was not already called, `omap_table()` will additionally read the
349 /// debug information header, in which case it can also return:
350 ///
351 /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream
352 /// * `Error::UnimplementedFeature` if the debug information header predates ~1995
353 ///
354 /// # Example
355 ///
356 /// ```rust
357 /// # use pdb::{Rva, FallibleIterator};
358 /// #
359 /// # fn test() -> pdb::Result<()> {
360 /// # let source = std::fs::File::open("fixtures/self/foo.pdb")?;
361 /// let mut pdb = pdb::PDB::open(source)?;
362 ///
363 /// // Compute the address map once and reuse it
364 /// let address_map = pdb.address_map()?;
365 ///
366 /// # let symbol_table = pdb.global_symbols()?;
367 /// # let symbol = symbol_table.iter().next()?.unwrap();
368 /// # match symbol.parse() { Ok(pdb::SymbolData::Public(pubsym)) => {
369 /// // Obtain some section offset, eg from a symbol, and convert it
370 /// match pubsym.offset.to_rva(&address_map) {
371 /// Some(rva) => {
372 /// println!("symbol is at {}", rva);
373 /// # assert_eq!(rva, Rva(26048));
374 /// }
375 /// None => {
376 /// println!("symbol refers to eliminated code");
377 /// # panic!("symbol should exist");
378 /// }
379 /// }
380 /// # } _ => unreachable!() }
381 /// # Ok(())
382 /// # }
383 /// # test().unwrap()
384 /// ```
385 pub fn address_map(&mut self) -> Result<AddressMap<'s>> {
386 let sections = self.sections()?.unwrap_or_default();
387 Ok(match self.original_sections()? {
388 Some(original_sections) => {
389 let omap_from_src = self.omap_from_src()?.ok_or(Error::AddressMapNotFound)?;
390 let omap_to_src = self.omap_to_src()?.ok_or(Error::AddressMapNotFound)?;
391
392 AddressMap {
393 original_sections,
394 transformed_sections: Some(sections),
395 original_to_transformed: Some(omap_from_src),
396 transformed_to_original: Some(omap_to_src),
397 }
398 }
399 None => AddressMap {
400 original_sections: sections,
401 transformed_sections: None,
402 original_to_transformed: None,
403 transformed_to_original: None,
404 },
405 })
406 }
407
408 /// Retrieve the global string table of this PDB.
409 ///
410 /// Long strings, such as file names, are stored in a global deduplicated string table. They are
411 /// referred to by the [`StringRef`] type, which contains an offset into that table. Strings in
412 /// the table are stored as null-terminated C strings. Modern PDBs only store valid UTF-8 data
413 /// in the string table, but for older types a decoding might be necessary.
414 ///
415 /// The string table offers cheap zero-copy access to the underlying string data. It is
416 /// therefore cheap to build.
417 ///
418 /// # Example
419 ///
420 /// ```
421 /// # use pdb::{FallibleIterator, StringRef, PDB};
422 /// #
423 /// # fn test() -> pdb::Result<()> {
424 /// # let file = std::fs::File::open("fixtures/self/foo.pdb")?;
425 /// let mut pdb = PDB::open(file)?;
426 /// let strings = pdb.string_table()?;
427 ///
428 /// // obtain a string ref somehow
429 /// # let string_ref = StringRef(0);
430 /// let raw_string = strings.get(string_ref)?;
431 /// println!("{}", raw_string.to_string());
432 ///
433 /// // alternatively, use convenience methods
434 /// println!("{}", string_ref.to_string_lossy(&strings)?);
435 ///
436 /// # Ok(())
437 /// # }
438 /// ```
439 ///
440 /// # Errors
441 ///
442 /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers
443 /// * `Error::IoError` if returned by the `Source`
444 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
445 /// * `Error::UnexpectedEof` if the string table ends prematurely
446 pub fn string_table(&mut self) -> Result<StringTable<'s>> {
447 let stream = self.named_stream(b"/names")?;
448 StringTable::parse(stream)
449 }
450
451 /// Retrieve a stream by its index to read its contents as bytes.
452 ///
453 /// # Errors
454 ///
455 /// * `Error::StreamNotFound` if the PDB does not contain this stream
456 /// * `Error::IoError` if returned by the `Source`
457 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
458 ///
459 /// # Example
460 ///
461 /// ```
462 /// # fn test() -> pdb::Result<()> {
463 /// let file = std::fs::File::open("fixtures/self/foo.pdb")?;
464 /// let mut pdb = pdb::PDB::open(file)?;
465 /// // This is the index of the "mystream" stream that was added using pdbstr.exe.
466 /// let s = pdb.raw_stream(pdb::StreamIndex(208))?.expect("stream exists");
467 /// assert_eq!(s.as_slice(), b"hello world\n");
468 /// # Ok(())
469 /// # }
470 /// ```
471 pub fn raw_stream(&mut self, index: StreamIndex) -> Result<Option<Stream<'s>>> {
472 match index.msf_number() {
473 Some(number) => self.msf.get(number, None).map(Some),
474 None => Ok(None),
475 }
476 }
477
478 /// Retrieve a stream by its name, as declared in the PDB info stream.
479 ///
480 /// # Errors
481 ///
482 /// * `Error::StreamNameNotFound` if the PDB does not specify a stream with that name
483 /// * `Error::StreamNotFound` if the PDB does not contain the stream referred to
484 /// * `Error::IoError` if returned by the `Source`
485 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
486 pub fn named_stream(&mut self, name: &[u8]) -> Result<Stream<'s>> {
487 let info = self.pdb_information()?;
488 let names = info.stream_names()?;
489 for named_stream in &names {
490 if named_stream.name.as_bytes() == name {
491 return self
492 .raw_stream(named_stream.stream_id)?
493 .ok_or(Error::StreamNameNotFound);
494 }
495 }
496 Err(Error::StreamNameNotFound)
497 }
498
499 /// Loads the Optional Debug Header Stream, which contains offsets into extra streams.
500 ///
501 /// this stream is always returned, but its members are all optional depending on the data
502 /// present in the PDB.
503 ///
504 /// The optional header begins at offset 0 immediately after the EC Substream ends.
505 fn extra_streams(&mut self) -> Result<DBIExtraStreams> {
506 if let Some(extra) = self.dbi_extra_streams {
507 return Ok(extra);
508 }
509
510 // Parse and grab information on extra streams, since we might also need that
511 let debug_info = self.debug_information()?;
512 let extra = DBIExtraStreams::new(&debug_info)?;
513 self.dbi_extra_streams = Some(extra);
514
515 Ok(extra)
516 }
517}
518
519impl StreamIndex {
520 /// Load the raw data of this stream from the PDB.
521 ///
522 /// Returns `None` if this index is none. Otherwise, this will try to read the stream from the
523 /// PDB, which might fail if the stream is missing.
524 ///
525 /// # Errors
526 ///
527 /// * `Error::StreamNotFound` if the PDB does not contain this stream
528 /// * `Error::IoError` if returned by the `Source`
529 /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt
530 pub fn get<'s, S>(self, pdb: &mut PDB<'s, S>) -> Result<Option<Stream<'s>>>
531 where
532 S: Source<'s> + 's,
533 {
534 pdb.raw_stream(self)
535 }
536}