pdb/tpi/
mod.rs

1// Copyright 2017 pdb Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use std::fmt;
9use std::marker::PhantomData;
10use std::result;
11
12use crate::common::*;
13use crate::msf::Stream;
14use crate::FallibleIterator;
15
16pub(crate) mod constants;
17mod data;
18mod header;
19mod id;
20mod primitive;
21
22use self::header::*;
23use self::primitive::type_data_for_primitive;
24
25pub use self::data::*;
26pub use self::id::*;
27pub use self::primitive::{Indirection, PrimitiveKind, PrimitiveType};
28
29/// Zero-copy access to a PDB type or id stream.
30///
31/// PDBs store two kinds of related streams with an identical internal structure:
32///
33///  - [`TypeInformation`] (TPI stream) contains information on primitive types, classes and
34///    procedures, including their return type and arguments. Its contents are identified by
35///    [`TypeIndex`].
36///  - [`IdInformation`] (IPI stream) is a stricter version of the above stream that contains inline
37///    functions, build infos and source references. Its contents are identified by [`IdIndex`].
38///
39/// Items in these streams are stored by their index in ascending order. Symbols declared in
40/// [`ModuleInfo`](crate::ModuleInfo) can refer to items in both streams, as well as items to other
41/// items with one exception: `Type`s cannot refer to `Id`s. Also, the PDB format requires that
42/// items refer only to types with lower indexes. Thus, the stream of items forms a directed acyclic
43/// graph.
44///
45/// Both streams can iterate by their index using [`ItemInformation::iter`]. Additionally,
46/// [`ItemFinder`] is a secondary data structure to provide efficient backtracking for random
47/// access.
48///
49/// There are type definitions for both streams:
50///
51///  - `ItemInformation`: [`TypeInformation`] and [`IdInformation`]
52///  - [`ItemFinder`]: [`TypeFinder`] and [`IdFinder`]
53///  - [`ItemIndex`]: [`TypeIndex`] and [`IdIndex`]
54///  - [`ItemIter`]: [`TypeIter`] and [`IdIter`]
55///  - [`Item`]: [`Type`] and [`Id`]
56///
57/// # Examples
58///
59/// Iterating over the types while building a `TypeFinder`:
60///
61/// ```
62/// # use pdb::FallibleIterator;
63/// #
64/// # fn test() -> pdb::Result<usize> {
65/// # let file = std::fs::File::open("fixtures/self/foo.pdb")?;
66/// # let mut pdb = pdb::PDB::open(file)?;
67///
68/// let type_information = pdb.type_information()?;
69/// let mut type_finder = type_information.finder();
70///
71/// # let expected_count = type_information.len();
72/// # let mut count: usize = 0;
73/// let mut iter = type_information.iter();
74/// while let Some(typ) = iter.next()? {
75///     // build the type finder as we go
76///     type_finder.update(&iter);
77///
78///     // parse the type record
79///     match typ.parse() {
80///         Ok(pdb::TypeData::Class(pdb::ClassType {name, properties, fields: Some(fields), ..})) => {
81///             // this Type describes a class-like type with fields
82///             println!("type {} is a class named {}", typ.index(), name);
83///
84///             // `fields` is a TypeIndex which refers to a FieldList
85///             // To find information about the fields, find and parse that Type
86///             match type_finder.find(fields)?.parse()? {
87///                 pdb::TypeData::FieldList(list) => {
88///                     // `fields` is a Vec<TypeData>
89///                     for field in list.fields {
90///                         if let pdb::TypeData::Member(member) = field {
91///                             // follow `member.field_type` as desired
92///                             println!("  - field {} at offset {:x}", member.name, member.offset);
93///                         } else {
94///                             // handle member functions, nested types, etc.
95///                         }
96///                     }
97///
98///                     if let Some(more_fields) = list.continuation {
99///                         // A FieldList can be split across multiple records
100///                         // TODO: follow `more_fields` and handle the next FieldList
101///                     }
102///                 }
103///                 _ => { }
104///             }
105///
106///         },
107///         Ok(_) => {
108///             // ignore everything that's not a class-like type
109///         },
110///         Err(pdb::Error::UnimplementedTypeKind(_)) => {
111///             // found an unhandled type record
112///             // this probably isn't fatal in most use cases
113///         },
114///         Err(e) => {
115///             // other error, probably is worth failing
116///             return Err(e);
117///         }
118///     }
119///     # count += 1;
120/// }
121///
122/// # assert_eq!(expected_count, count);
123/// # Ok(count)
124/// # }
125/// # assert!(test().expect("test") > 8000);
126/// ```
127#[derive(Debug)]
128pub struct ItemInformation<'s, I> {
129    stream: Stream<'s>,
130    header: Header,
131    _ph: PhantomData<&'s I>,
132}
133
134impl<'s, I> ItemInformation<'s, I>
135where
136    I: ItemIndex,
137{
138    /// Parses `TypeInformation` from raw stream data.
139    pub(crate) fn parse(stream: Stream<'s>) -> Result<Self> {
140        let mut buf = stream.parse_buffer();
141        let header = Header::parse(&mut buf)?;
142        let _ph = PhantomData;
143        Ok(Self {
144            stream,
145            header,
146            _ph,
147        })
148    }
149
150    /// Returns an iterator that can traverse the type table in sequential order.
151    pub fn iter(&self) -> ItemIter<'_, I> {
152        // get a parse buffer
153        let mut buf = self.stream.parse_buffer();
154
155        // drop the header
156        // this can't fail; we've already read this once
157        buf.take(self.header.header_size as usize)
158            .expect("dropping TPI header");
159
160        ItemIter {
161            buf,
162            index: self.header.minimum_index,
163            _ph: PhantomData,
164        }
165    }
166
167    /// Returns the number of items contained in this `ItemInformation`.
168    ///
169    /// Note that in the case of the type stream ([`TypeInformation`]) primitive types are not
170    /// stored in the PDB file. The number of distinct types reachable via this table will be higher
171    /// than `len()`.
172    pub fn len(&self) -> usize {
173        (self.header.maximum_index - self.header.minimum_index) as usize
174    }
175
176    /// Returns whether this `ItemInformation` contains any data.
177    pub fn is_empty(&self) -> bool {
178        self.len() == 0
179    }
180
181    /// Returns an `ItemFinder` with a default time-space tradeoff useful for access by
182    /// [`ItemIndex`].
183    ///
184    /// The `ItemFinder` is initially empty and must be populated by iterating. See the struct-level
185    /// docs for an example.
186    pub fn finder(&self) -> ItemFinder<'_, I> {
187        ItemFinder::new(self, 3)
188    }
189}
190
191/// This buffer is used when a `Type` refers to a primitive type. It doesn't contain anything
192/// type-specific, but it does parse as `raw_type() == 0xffff`, which is a reserved value. Seems
193/// like a reasonable thing to do.
194const PRIMITIVE_TYPE: &[u8] = b"\xff\xff";
195
196/// Represents an entry in the type or id stream.
197///
198/// An `Item` has been minimally processed and may not be correctly formed or even understood by
199/// this library. To avoid copying, `Items`s exist as references to data owned by the parent
200/// `ItemInformation`. Therefore, an `Item` may not outlive its parent.
201///
202/// The data held by items can be parsed:
203///
204///  - [`Type::parse`](Self::parse) returns [`TypeData`].
205///  - [`Id::parse`](Self::parse) returns [`IdData`].
206///
207/// Depending on the stream, this can either be a [`Type`] or [`Id`].
208#[derive(Copy, Clone, PartialEq)]
209pub struct Item<'t, I> {
210    index: I,
211    data: &'t [u8],
212}
213
214impl<'t, I> Item<'t, I>
215where
216    I: ItemIndex,
217{
218    /// Returns this item's index.
219    ///
220    /// Depending on the stream, either a [`TypeIndex`] or [`IdIndex`].
221    pub fn index(&self) -> I {
222        self.index
223    }
224
225    /// Returns the the binary data length in the on-disk format.
226    ///
227    /// Items are prefixed by a 16-bit length number, which is not included in this length.
228    pub fn len(&self) -> usize {
229        self.data.len()
230    }
231
232    /// Returns whether this items's data is empty.
233    ///
234    /// Items are prefixed by a 16-bit length number, which is not included in this operation.
235    pub fn is_empty(&self) -> bool {
236        self.data.is_empty()
237    }
238
239    /// Returns the identifier of the kind of data stored by this this `Item`.
240    ///
241    /// As a special case, if this is a primitive [`Type`], this function will return `0xffff`.
242    #[inline]
243    pub fn raw_kind(&self) -> u16 {
244        debug_assert!(self.data.len() >= 2);
245
246        // assemble a little-endian u16
247        u16::from(self.data[0]) | (u16::from(self.data[1]) << 8)
248    }
249}
250
251impl<'t, I> fmt::Debug for Item<'t, I>
252where
253    I: ItemIndex,
254{
255    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256        write!(
257            f,
258            "Type{{ kind: 0x{:04x} [{} bytes] }}",
259            self.raw_kind(),
260            self.data.len()
261        )
262    }
263}
264
265/// In-memory index for efficient random-access to [`Item`]s by index.
266///
267/// `ItemFinder` can be obtained via [`ItemInformation::finder`]. It starts out empty and must be
268/// populated by calling [`ItemFinder::update`] while iterating. There are two typedefs for easier
269/// use:
270///
271///  - [`TypeFinder`] for finding [`Type`]s in a [`TypeInformation`](crate::TypeInformation) (TPI stream).
272///  - [`IdFinder`] for finding [`Id`]s in a [`IdInformation`](crate::IdInformation) (IPI stream).
273///
274/// `ItemFinder` allocates all the memory it needs when it is first created. The footprint is
275/// directly proportional to the total number of types; see [`ItemInformation::len`].
276///
277/// # Time/space trade-off
278///
279/// The naïve approach is to store the position of each `Item` as they are covered in the stream.
280/// The cost is memory: namely one `u32` per `Item`.
281///
282/// Compare this approach to an `ItemFinder` that stores the position of every Nth item. Memory
283/// requirements would be reduced by a factor of N in exchange for requiring an average of (N-1)/2
284/// iterations per lookup. However, iteration is cheap sequential memory access, and spending less
285/// memory on `ItemFinder` means more of the data can fit in the cache, so this is likely a good
286/// trade-off for small-ish values of N.
287///
288/// `ItemFinder` is parameterized by `shift` which controls this trade-off as powers of two:
289///
290///   * If `shift` is 0, `ItemFinder` stores 4 bytes per `Item` and always performs direct lookups.
291///   * If `shift` is 1, `ItemFinder` stores 2 bytes per `Item` and averages 0.5 iterations per
292///     lookup.
293///   * If `shift` is 2, `ItemFinder` stores 1 byte per `Item` and averages 1.5 iterations per
294///     lookup.
295///   * If `shift` is 3, `ItemFinder` stores 4 bits per `Item` and averages 3.5 iterations per
296///     lookup.
297///   * If `shift` is 4, `ItemFinder` stores 2 bits per `Item` and averages 7.5 iterations per
298///     lookup.
299///   * If `shift` is 5, `ItemFinder` stores 1 bit per `Item` and averages 15.5 iterations per
300///     lookup.
301///
302/// This list can continue but with rapidly diminishing returns. Iteration cost is proportional to
303/// item size, which varies, but typical numbers from a large program are:
304///
305///   * 24% of items are    12 bytes
306///   * 34% of items are <= 16 bytes
307///   * 84% of items are <= 32 bytes
308///
309/// A `shift` of 2 or 3 is likely appropriate for most workloads. 500K items would require 1 MB or
310/// 500 KB of memory respectively, and lookups -- though indirect -- would still usually need only
311/// one or two 64-byte cache lines.
312#[derive(Debug)]
313pub struct ItemFinder<'t, I> {
314    buffer: ParseBuffer<'t>,
315    minimum_index: u32,
316    maximum_index: u32,
317    positions: Vec<u32>,
318    shift: u8,
319    _ph: PhantomData<&'t I>,
320}
321
322impl<'t, I> ItemFinder<'t, I>
323where
324    I: ItemIndex,
325{
326    fn new(info: &'t ItemInformation<'_, I>, shift: u8) -> Self {
327        // maximum index is the highest index + 1.
328        let count = info.header.maximum_index - info.header.minimum_index;
329
330        let round_base = (1 << shift) - 1;
331        let shifted_count = ((count + round_base) & !round_base) >> shift;
332        let mut positions = Vec::with_capacity(shifted_count as usize);
333
334        if shifted_count > 0 {
335            // add record zero, which is identical regardless of shift
336            positions.push(info.header.header_size);
337        }
338
339        Self {
340            buffer: info.stream.parse_buffer(),
341            minimum_index: info.header.minimum_index,
342            maximum_index: info.header.maximum_index,
343            positions,
344            shift,
345            _ph: PhantomData,
346        }
347    }
348
349    /// Given an index, find which position in the Vec we should jump to and how many times we
350    /// need to iterate to find the requested type.
351    ///
352    /// `shift` refers to the size of these bit shifts.
353    #[inline]
354    fn resolve(&self, type_index: u32) -> (usize, usize) {
355        let raw = type_index - self.minimum_index;
356        (
357            (raw >> self.shift) as usize,
358            (raw & ((1 << self.shift) - 1)) as usize,
359        )
360    }
361
362    /// Returns the highest index which is currently served by this `ItemFinder`.
363    ///
364    /// When iterating through the stream, you shouldn't need to consider this. Items only ever
365    /// reference lower indexes. However, when loading items referenced by the symbols stream, this
366    /// can be useful to check whether iteration is required.
367    #[inline]
368    pub fn max_index(&self) -> I {
369        I::from(match self.positions.len() {
370            0 => 0, // special case for an empty type index
371            len => (len << self.shift) as u32 + self.minimum_index - 1,
372        })
373    }
374
375    /// Update this `ItemFinder` based on the current position of a [`ItemIter`].
376    ///
377    /// Do this each time you call `.next()`. See documentation of [`ItemInformation`] for an
378    /// example.
379    #[inline]
380    pub fn update(&mut self, iterator: &ItemIter<'t, I>) {
381        let (vec_index, iteration_count) = self.resolve(iterator.index);
382        if iteration_count == 0 && vec_index == self.positions.len() {
383            let pos = iterator.buf.pos();
384            assert!(pos < u32::max_value() as usize);
385            self.positions.push(pos as u32);
386        }
387    }
388
389    /// Find an `Item` by its index.
390    ///
391    /// # Errors
392    ///
393    /// * `Error::TypeNotFound(index)` if you ask for an item that doesn't exist.
394    /// * `Error::TypeNotIndexed(index, max_index)` if you ask for an item that is known to exist
395    ///   but is not currently known by this `ItemFinder`.
396    pub fn find(&self, index: I) -> Result<Item<'t, I>> {
397        let index: u32 = index.into();
398        if index < self.minimum_index {
399            return Ok(Item {
400                index: I::from(index),
401                data: PRIMITIVE_TYPE,
402            });
403        } else if index > self.maximum_index {
404            return Err(Error::TypeNotFound(index));
405        }
406
407        // figure out where we'd find this
408        let (vec_index, iteration_count) = self.resolve(index);
409
410        if let Some(pos) = self.positions.get(vec_index) {
411            // hit
412            let mut buf = self.buffer.clone();
413
414            // jump forwards
415            buf.take(*pos as usize)?;
416
417            // skip some records
418            for _ in 0..iteration_count {
419                let length = buf.parse_u16()?;
420                buf.take(length as usize)?;
421            }
422
423            // read the type
424            let length = buf.parse_u16()?;
425
426            Ok(Item {
427                index: I::from(index),
428                data: buf.take(length as usize)?,
429            })
430        } else {
431            // miss
432            Err(Error::TypeNotIndexed(index, self.max_index().into()))
433        }
434    }
435}
436
437/// An iterator over items in [`TypeInformation`](crate::TypeInformation) or
438/// [`IdInformation`](crate::IdInformation).
439///
440/// The TPI and IPI streams are represented internally as a series of records, each of which have a
441/// length, a kind, and a type-specific field layout. Iteration performance is therefore similar to
442/// a linked list.
443#[derive(Debug)]
444pub struct ItemIter<'t, I> {
445    buf: ParseBuffer<'t>,
446    index: u32,
447    _ph: PhantomData<&'t I>,
448}
449
450impl<'t, I> FallibleIterator for ItemIter<'t, I>
451where
452    I: ItemIndex,
453{
454    type Item = Item<'t, I>;
455    type Error = Error;
456
457    fn next(&mut self) -> result::Result<Option<Self::Item>, Self::Error> {
458        // see if we're at EOF
459        if self.buf.is_empty() {
460            return Ok(None);
461        }
462
463        // read the length of the next type
464        let length = self.buf.parse_u16()? as usize;
465
466        // validate
467        if length < 2 {
468            // this can't be correct
469            return Err(Error::TypeTooShort);
470        }
471
472        // grab the type itself
473        let type_buf = self.buf.take(length)?;
474        let index = self.index;
475
476        self.index += 1;
477
478        // Done
479        Ok(Some(Item {
480            index: I::from(index),
481            data: type_buf,
482        }))
483    }
484}
485
486/// Zero-copy access to the PDB type stream (TPI).
487///
488/// This stream exposes types, the variants of which are enumerated by [`TypeData`]. See
489/// [`ItemInformation`] for more information on accessing types.
490pub type TypeInformation<'s> = ItemInformation<'s, TypeIndex>;
491
492/// In-memory index for efficient random-access of [`Type`]s by index.
493///
494/// `TypeFinder` can be obtained via [`TypeInformation::finder`](ItemInformation::finder). See
495/// [`ItemFinder`] for more information.
496pub type TypeFinder<'t> = ItemFinder<'t, TypeIndex>;
497
498/// An iterator over [`Type`]s returned by [`TypeInformation::iter`](ItemInformation::iter).
499pub type TypeIter<'t> = ItemIter<'t, TypeIndex>;
500
501/// Information on a primitive type, class, or procedure.
502pub type Type<'t> = Item<'t, TypeIndex>;
503
504impl<'t> Item<'t, TypeIndex> {
505    /// Parse this `Type` into `TypeData`.
506    ///
507    /// # Errors
508    ///
509    /// * `Error::UnimplementedTypeKind(kind)` if the type record isn't currently understood by this
510    ///   library
511    /// * `Error::UnexpectedEof` if the type record is malformed
512    pub fn parse(&self) -> Result<TypeData<'t>> {
513        if self.index < TypeIndex(0x1000) {
514            // Primitive type
515            type_data_for_primitive(self.index)
516        } else {
517            let mut buf = ParseBuffer::from(self.data);
518            parse_type_data(&mut buf)
519        }
520    }
521}
522
523/// Zero-copy access to the PDB type stream (TPI).
524///
525/// This stream exposes types, the variants of which are enumerated by [`IdData`]. See
526/// [`ItemInformation`] for more information on accessing types.
527pub type IdInformation<'s> = ItemInformation<'s, IdIndex>;
528
529/// In-memory index for efficient random-access of [`Id`]s by index.
530///
531/// `IdFinder` can be obtained via [`IdInformation::finder`](ItemInformation::finder). See
532/// [`ItemFinder`] for more information.
533pub type IdFinder<'t> = ItemFinder<'t, IdIndex>;
534
535/// An iterator over [`Id`]s returned by [`IdInformation::iter`](ItemInformation::iter).
536pub type IdIter<'t> = ItemIter<'t, IdIndex>;
537
538/// Information on an inline function, build infos or source references.
539pub type Id<'t> = Item<'t, IdIndex>;
540
541impl<'t> Item<'t, IdIndex> {
542    /// Parse this `Id` into `IdData`.
543    ///
544    /// # Errors
545    ///
546    /// * `Error::UnimplementedTypeKind(kind)` if the id record isn't currently understood by this
547    ///   library
548    /// * `Error::UnexpectedEof` if the id record is malformed
549    pub fn parse(&self) -> Result<IdData<'t>> {
550        ParseBuffer::from(self.data).parse()
551    }
552}