pdb/tpi/mod.rs
1// Copyright 2017 pdb Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use std::fmt;
9use std::marker::PhantomData;
10use std::result;
11
12use crate::common::*;
13use crate::msf::Stream;
14use crate::FallibleIterator;
15
16pub(crate) mod constants;
17mod data;
18mod header;
19mod id;
20mod primitive;
21
22use self::header::*;
23use self::primitive::type_data_for_primitive;
24
25pub use self::data::*;
26pub use self::id::*;
27pub use self::primitive::{Indirection, PrimitiveKind, PrimitiveType};
28
29/// Zero-copy access to a PDB type or id stream.
30///
31/// PDBs store two kinds of related streams with an identical internal structure:
32///
33/// - [`TypeInformation`] (TPI stream) contains information on primitive types, classes and
34/// procedures, including their return type and arguments. Its contents are identified by
35/// [`TypeIndex`].
36/// - [`IdInformation`] (IPI stream) is a stricter version of the above stream that contains inline
37/// functions, build infos and source references. Its contents are identified by [`IdIndex`].
38///
39/// Items in these streams are stored by their index in ascending order. Symbols declared in
40/// [`ModuleInfo`](crate::ModuleInfo) can refer to items in both streams, as well as items to other
41/// items with one exception: `Type`s cannot refer to `Id`s. Also, the PDB format requires that
42/// items refer only to types with lower indexes. Thus, the stream of items forms a directed acyclic
43/// graph.
44///
45/// Both streams can iterate by their index using [`ItemInformation::iter`]. Additionally,
46/// [`ItemFinder`] is a secondary data structure to provide efficient backtracking for random
47/// access.
48///
49/// There are type definitions for both streams:
50///
51/// - `ItemInformation`: [`TypeInformation`] and [`IdInformation`]
52/// - [`ItemFinder`]: [`TypeFinder`] and [`IdFinder`]
53/// - [`ItemIndex`]: [`TypeIndex`] and [`IdIndex`]
54/// - [`ItemIter`]: [`TypeIter`] and [`IdIter`]
55/// - [`Item`]: [`Type`] and [`Id`]
56///
57/// # Examples
58///
59/// Iterating over the types while building a `TypeFinder`:
60///
61/// ```
62/// # use pdb::FallibleIterator;
63/// #
64/// # fn test() -> pdb::Result<usize> {
65/// # let file = std::fs::File::open("fixtures/self/foo.pdb")?;
66/// # let mut pdb = pdb::PDB::open(file)?;
67///
68/// let type_information = pdb.type_information()?;
69/// let mut type_finder = type_information.finder();
70///
71/// # let expected_count = type_information.len();
72/// # let mut count: usize = 0;
73/// let mut iter = type_information.iter();
74/// while let Some(typ) = iter.next()? {
75/// // build the type finder as we go
76/// type_finder.update(&iter);
77///
78/// // parse the type record
79/// match typ.parse() {
80/// Ok(pdb::TypeData::Class(pdb::ClassType {name, properties, fields: Some(fields), ..})) => {
81/// // this Type describes a class-like type with fields
82/// println!("type {} is a class named {}", typ.index(), name);
83///
84/// // `fields` is a TypeIndex which refers to a FieldList
85/// // To find information about the fields, find and parse that Type
86/// match type_finder.find(fields)?.parse()? {
87/// pdb::TypeData::FieldList(list) => {
88/// // `fields` is a Vec<TypeData>
89/// for field in list.fields {
90/// if let pdb::TypeData::Member(member) = field {
91/// // follow `member.field_type` as desired
92/// println!(" - field {} at offset {:x}", member.name, member.offset);
93/// } else {
94/// // handle member functions, nested types, etc.
95/// }
96/// }
97///
98/// if let Some(more_fields) = list.continuation {
99/// // A FieldList can be split across multiple records
100/// // TODO: follow `more_fields` and handle the next FieldList
101/// }
102/// }
103/// _ => { }
104/// }
105///
106/// },
107/// Ok(_) => {
108/// // ignore everything that's not a class-like type
109/// },
110/// Err(pdb::Error::UnimplementedTypeKind(_)) => {
111/// // found an unhandled type record
112/// // this probably isn't fatal in most use cases
113/// },
114/// Err(e) => {
115/// // other error, probably is worth failing
116/// return Err(e);
117/// }
118/// }
119/// # count += 1;
120/// }
121///
122/// # assert_eq!(expected_count, count);
123/// # Ok(count)
124/// # }
125/// # assert!(test().expect("test") > 8000);
126/// ```
127#[derive(Debug)]
128pub struct ItemInformation<'s, I> {
129 stream: Stream<'s>,
130 header: Header,
131 _ph: PhantomData<&'s I>,
132}
133
134impl<'s, I> ItemInformation<'s, I>
135where
136 I: ItemIndex,
137{
138 /// Parses `TypeInformation` from raw stream data.
139 pub(crate) fn parse(stream: Stream<'s>) -> Result<Self> {
140 let mut buf = stream.parse_buffer();
141 let header = Header::parse(&mut buf)?;
142 let _ph = PhantomData;
143 Ok(Self {
144 stream,
145 header,
146 _ph,
147 })
148 }
149
150 /// Returns an iterator that can traverse the type table in sequential order.
151 pub fn iter(&self) -> ItemIter<'_, I> {
152 // get a parse buffer
153 let mut buf = self.stream.parse_buffer();
154
155 // drop the header
156 // this can't fail; we've already read this once
157 buf.take(self.header.header_size as usize)
158 .expect("dropping TPI header");
159
160 ItemIter {
161 buf,
162 index: self.header.minimum_index,
163 _ph: PhantomData,
164 }
165 }
166
167 /// Returns the number of items contained in this `ItemInformation`.
168 ///
169 /// Note that in the case of the type stream ([`TypeInformation`]) primitive types are not
170 /// stored in the PDB file. The number of distinct types reachable via this table will be higher
171 /// than `len()`.
172 pub fn len(&self) -> usize {
173 (self.header.maximum_index - self.header.minimum_index) as usize
174 }
175
176 /// Returns whether this `ItemInformation` contains any data.
177 pub fn is_empty(&self) -> bool {
178 self.len() == 0
179 }
180
181 /// Returns an `ItemFinder` with a default time-space tradeoff useful for access by
182 /// [`ItemIndex`].
183 ///
184 /// The `ItemFinder` is initially empty and must be populated by iterating. See the struct-level
185 /// docs for an example.
186 pub fn finder(&self) -> ItemFinder<'_, I> {
187 ItemFinder::new(self, 3)
188 }
189}
190
191/// This buffer is used when a `Type` refers to a primitive type. It doesn't contain anything
192/// type-specific, but it does parse as `raw_type() == 0xffff`, which is a reserved value. Seems
193/// like a reasonable thing to do.
194const PRIMITIVE_TYPE: &[u8] = b"\xff\xff";
195
196/// Represents an entry in the type or id stream.
197///
198/// An `Item` has been minimally processed and may not be correctly formed or even understood by
199/// this library. To avoid copying, `Items`s exist as references to data owned by the parent
200/// `ItemInformation`. Therefore, an `Item` may not outlive its parent.
201///
202/// The data held by items can be parsed:
203///
204/// - [`Type::parse`](Self::parse) returns [`TypeData`].
205/// - [`Id::parse`](Self::parse) returns [`IdData`].
206///
207/// Depending on the stream, this can either be a [`Type`] or [`Id`].
208#[derive(Copy, Clone, PartialEq)]
209pub struct Item<'t, I> {
210 index: I,
211 data: &'t [u8],
212}
213
214impl<'t, I> Item<'t, I>
215where
216 I: ItemIndex,
217{
218 /// Returns this item's index.
219 ///
220 /// Depending on the stream, either a [`TypeIndex`] or [`IdIndex`].
221 pub fn index(&self) -> I {
222 self.index
223 }
224
225 /// Returns the the binary data length in the on-disk format.
226 ///
227 /// Items are prefixed by a 16-bit length number, which is not included in this length.
228 pub fn len(&self) -> usize {
229 self.data.len()
230 }
231
232 /// Returns whether this items's data is empty.
233 ///
234 /// Items are prefixed by a 16-bit length number, which is not included in this operation.
235 pub fn is_empty(&self) -> bool {
236 self.data.is_empty()
237 }
238
239 /// Returns the identifier of the kind of data stored by this this `Item`.
240 ///
241 /// As a special case, if this is a primitive [`Type`], this function will return `0xffff`.
242 #[inline]
243 pub fn raw_kind(&self) -> u16 {
244 debug_assert!(self.data.len() >= 2);
245
246 // assemble a little-endian u16
247 u16::from(self.data[0]) | (u16::from(self.data[1]) << 8)
248 }
249}
250
251impl<'t, I> fmt::Debug for Item<'t, I>
252where
253 I: ItemIndex,
254{
255 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256 write!(
257 f,
258 "Type{{ kind: 0x{:04x} [{} bytes] }}",
259 self.raw_kind(),
260 self.data.len()
261 )
262 }
263}
264
265/// In-memory index for efficient random-access to [`Item`]s by index.
266///
267/// `ItemFinder` can be obtained via [`ItemInformation::finder`]. It starts out empty and must be
268/// populated by calling [`ItemFinder::update`] while iterating. There are two typedefs for easier
269/// use:
270///
271/// - [`TypeFinder`] for finding [`Type`]s in a [`TypeInformation`](crate::TypeInformation) (TPI stream).
272/// - [`IdFinder`] for finding [`Id`]s in a [`IdInformation`](crate::IdInformation) (IPI stream).
273///
274/// `ItemFinder` allocates all the memory it needs when it is first created. The footprint is
275/// directly proportional to the total number of types; see [`ItemInformation::len`].
276///
277/// # Time/space trade-off
278///
279/// The naïve approach is to store the position of each `Item` as they are covered in the stream.
280/// The cost is memory: namely one `u32` per `Item`.
281///
282/// Compare this approach to an `ItemFinder` that stores the position of every Nth item. Memory
283/// requirements would be reduced by a factor of N in exchange for requiring an average of (N-1)/2
284/// iterations per lookup. However, iteration is cheap sequential memory access, and spending less
285/// memory on `ItemFinder` means more of the data can fit in the cache, so this is likely a good
286/// trade-off for small-ish values of N.
287///
288/// `ItemFinder` is parameterized by `shift` which controls this trade-off as powers of two:
289///
290/// * If `shift` is 0, `ItemFinder` stores 4 bytes per `Item` and always performs direct lookups.
291/// * If `shift` is 1, `ItemFinder` stores 2 bytes per `Item` and averages 0.5 iterations per
292/// lookup.
293/// * If `shift` is 2, `ItemFinder` stores 1 byte per `Item` and averages 1.5 iterations per
294/// lookup.
295/// * If `shift` is 3, `ItemFinder` stores 4 bits per `Item` and averages 3.5 iterations per
296/// lookup.
297/// * If `shift` is 4, `ItemFinder` stores 2 bits per `Item` and averages 7.5 iterations per
298/// lookup.
299/// * If `shift` is 5, `ItemFinder` stores 1 bit per `Item` and averages 15.5 iterations per
300/// lookup.
301///
302/// This list can continue but with rapidly diminishing returns. Iteration cost is proportional to
303/// item size, which varies, but typical numbers from a large program are:
304///
305/// * 24% of items are 12 bytes
306/// * 34% of items are <= 16 bytes
307/// * 84% of items are <= 32 bytes
308///
309/// A `shift` of 2 or 3 is likely appropriate for most workloads. 500K items would require 1 MB or
310/// 500 KB of memory respectively, and lookups -- though indirect -- would still usually need only
311/// one or two 64-byte cache lines.
312#[derive(Debug)]
313pub struct ItemFinder<'t, I> {
314 buffer: ParseBuffer<'t>,
315 minimum_index: u32,
316 maximum_index: u32,
317 positions: Vec<u32>,
318 shift: u8,
319 _ph: PhantomData<&'t I>,
320}
321
322impl<'t, I> ItemFinder<'t, I>
323where
324 I: ItemIndex,
325{
326 fn new(info: &'t ItemInformation<'_, I>, shift: u8) -> Self {
327 // maximum index is the highest index + 1.
328 let count = info.header.maximum_index - info.header.minimum_index;
329
330 let round_base = (1 << shift) - 1;
331 let shifted_count = ((count + round_base) & !round_base) >> shift;
332 let mut positions = Vec::with_capacity(shifted_count as usize);
333
334 if shifted_count > 0 {
335 // add record zero, which is identical regardless of shift
336 positions.push(info.header.header_size);
337 }
338
339 Self {
340 buffer: info.stream.parse_buffer(),
341 minimum_index: info.header.minimum_index,
342 maximum_index: info.header.maximum_index,
343 positions,
344 shift,
345 _ph: PhantomData,
346 }
347 }
348
349 /// Given an index, find which position in the Vec we should jump to and how many times we
350 /// need to iterate to find the requested type.
351 ///
352 /// `shift` refers to the size of these bit shifts.
353 #[inline]
354 fn resolve(&self, type_index: u32) -> (usize, usize) {
355 let raw = type_index - self.minimum_index;
356 (
357 (raw >> self.shift) as usize,
358 (raw & ((1 << self.shift) - 1)) as usize,
359 )
360 }
361
362 /// Returns the highest index which is currently served by this `ItemFinder`.
363 ///
364 /// When iterating through the stream, you shouldn't need to consider this. Items only ever
365 /// reference lower indexes. However, when loading items referenced by the symbols stream, this
366 /// can be useful to check whether iteration is required.
367 #[inline]
368 pub fn max_index(&self) -> I {
369 I::from(match self.positions.len() {
370 0 => 0, // special case for an empty type index
371 len => (len << self.shift) as u32 + self.minimum_index - 1,
372 })
373 }
374
375 /// Update this `ItemFinder` based on the current position of a [`ItemIter`].
376 ///
377 /// Do this each time you call `.next()`. See documentation of [`ItemInformation`] for an
378 /// example.
379 #[inline]
380 pub fn update(&mut self, iterator: &ItemIter<'t, I>) {
381 let (vec_index, iteration_count) = self.resolve(iterator.index);
382 if iteration_count == 0 && vec_index == self.positions.len() {
383 let pos = iterator.buf.pos();
384 assert!(pos < u32::max_value() as usize);
385 self.positions.push(pos as u32);
386 }
387 }
388
389 /// Find an `Item` by its index.
390 ///
391 /// # Errors
392 ///
393 /// * `Error::TypeNotFound(index)` if you ask for an item that doesn't exist.
394 /// * `Error::TypeNotIndexed(index, max_index)` if you ask for an item that is known to exist
395 /// but is not currently known by this `ItemFinder`.
396 pub fn find(&self, index: I) -> Result<Item<'t, I>> {
397 let index: u32 = index.into();
398 if index < self.minimum_index {
399 return Ok(Item {
400 index: I::from(index),
401 data: PRIMITIVE_TYPE,
402 });
403 } else if index > self.maximum_index {
404 return Err(Error::TypeNotFound(index));
405 }
406
407 // figure out where we'd find this
408 let (vec_index, iteration_count) = self.resolve(index);
409
410 if let Some(pos) = self.positions.get(vec_index) {
411 // hit
412 let mut buf = self.buffer.clone();
413
414 // jump forwards
415 buf.take(*pos as usize)?;
416
417 // skip some records
418 for _ in 0..iteration_count {
419 let length = buf.parse_u16()?;
420 buf.take(length as usize)?;
421 }
422
423 // read the type
424 let length = buf.parse_u16()?;
425
426 Ok(Item {
427 index: I::from(index),
428 data: buf.take(length as usize)?,
429 })
430 } else {
431 // miss
432 Err(Error::TypeNotIndexed(index, self.max_index().into()))
433 }
434 }
435}
436
437/// An iterator over items in [`TypeInformation`](crate::TypeInformation) or
438/// [`IdInformation`](crate::IdInformation).
439///
440/// The TPI and IPI streams are represented internally as a series of records, each of which have a
441/// length, a kind, and a type-specific field layout. Iteration performance is therefore similar to
442/// a linked list.
443#[derive(Debug)]
444pub struct ItemIter<'t, I> {
445 buf: ParseBuffer<'t>,
446 index: u32,
447 _ph: PhantomData<&'t I>,
448}
449
450impl<'t, I> FallibleIterator for ItemIter<'t, I>
451where
452 I: ItemIndex,
453{
454 type Item = Item<'t, I>;
455 type Error = Error;
456
457 fn next(&mut self) -> result::Result<Option<Self::Item>, Self::Error> {
458 // see if we're at EOF
459 if self.buf.is_empty() {
460 return Ok(None);
461 }
462
463 // read the length of the next type
464 let length = self.buf.parse_u16()? as usize;
465
466 // validate
467 if length < 2 {
468 // this can't be correct
469 return Err(Error::TypeTooShort);
470 }
471
472 // grab the type itself
473 let type_buf = self.buf.take(length)?;
474 let index = self.index;
475
476 self.index += 1;
477
478 // Done
479 Ok(Some(Item {
480 index: I::from(index),
481 data: type_buf,
482 }))
483 }
484}
485
486/// Zero-copy access to the PDB type stream (TPI).
487///
488/// This stream exposes types, the variants of which are enumerated by [`TypeData`]. See
489/// [`ItemInformation`] for more information on accessing types.
490pub type TypeInformation<'s> = ItemInformation<'s, TypeIndex>;
491
492/// In-memory index for efficient random-access of [`Type`]s by index.
493///
494/// `TypeFinder` can be obtained via [`TypeInformation::finder`](ItemInformation::finder). See
495/// [`ItemFinder`] for more information.
496pub type TypeFinder<'t> = ItemFinder<'t, TypeIndex>;
497
498/// An iterator over [`Type`]s returned by [`TypeInformation::iter`](ItemInformation::iter).
499pub type TypeIter<'t> = ItemIter<'t, TypeIndex>;
500
501/// Information on a primitive type, class, or procedure.
502pub type Type<'t> = Item<'t, TypeIndex>;
503
504impl<'t> Item<'t, TypeIndex> {
505 /// Parse this `Type` into `TypeData`.
506 ///
507 /// # Errors
508 ///
509 /// * `Error::UnimplementedTypeKind(kind)` if the type record isn't currently understood by this
510 /// library
511 /// * `Error::UnexpectedEof` if the type record is malformed
512 pub fn parse(&self) -> Result<TypeData<'t>> {
513 if self.index < TypeIndex(0x1000) {
514 // Primitive type
515 type_data_for_primitive(self.index)
516 } else {
517 let mut buf = ParseBuffer::from(self.data);
518 parse_type_data(&mut buf)
519 }
520 }
521}
522
523/// Zero-copy access to the PDB type stream (TPI).
524///
525/// This stream exposes types, the variants of which are enumerated by [`IdData`]. See
526/// [`ItemInformation`] for more information on accessing types.
527pub type IdInformation<'s> = ItemInformation<'s, IdIndex>;
528
529/// In-memory index for efficient random-access of [`Id`]s by index.
530///
531/// `IdFinder` can be obtained via [`IdInformation::finder`](ItemInformation::finder). See
532/// [`ItemFinder`] for more information.
533pub type IdFinder<'t> = ItemFinder<'t, IdIndex>;
534
535/// An iterator over [`Id`]s returned by [`IdInformation::iter`](ItemInformation::iter).
536pub type IdIter<'t> = ItemIter<'t, IdIndex>;
537
538/// Information on an inline function, build infos or source references.
539pub type Id<'t> = Item<'t, IdIndex>;
540
541impl<'t> Item<'t, IdIndex> {
542 /// Parse this `Id` into `IdData`.
543 ///
544 /// # Errors
545 ///
546 /// * `Error::UnimplementedTypeKind(kind)` if the id record isn't currently understood by this
547 /// library
548 /// * `Error::UnexpectedEof` if the id record is malformed
549 pub fn parse(&self) -> Result<IdData<'t>> {
550 ParseBuffer::from(self.data).parse()
551 }
552}