mozpdb/tpi/
mod.rs

1// Copyright 2017 pdb Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use std::fmt;
9use std::result;
10
11use common::*;
12use msf::Stream;
13use FallibleIterator;
14
15pub(crate) mod constants;
16mod data;
17mod header;
18mod primitive;
19
20use self::data::parse_type_data;
21use self::header::*;
22use self::primitive::type_data_for_primitive;
23
24pub use self::data::*;
25pub use self::primitive::{Indirection,PrimitiveKind,PrimitiveType};
26
27/// `TypeInformation` provides zero-copy access to a PDB type data stream.
28///
29/// PDB type information is stored as a stream of length-prefixed `Type` records, and thus the most
30/// fundamental operation supported by `TypeInformation` is to iterate over `Type`s.
31///
32/// Types are uniquely identified by `TypeIndex`, and types are stored within the PDB in ascending
33/// order of `TypeIndex`.
34///
35/// Many types refer to other types by `TypeIndex`, and these references may refer to other types
36/// forming a chain that's arbitrarily long. Fortunately, PDB format requires that types refer only
37/// to types with lower `TypeIndex`es; thus, the stream of types form a directed acyclic graph.
38///
39/// `TypeInformation` can iterate by `TypeIndex`, since that's essentially the only operation
40/// permitted by the data. `TypeFinder` is a secondary data structure to provide efficient
41/// backtracking.
42///
43/// # Examples
44///
45/// Iterating over the types while building a `TypeFinder`:
46///
47/// ```
48/// # use pdb::FallibleIterator;
49/// #
50/// # fn test() -> pdb::Result<usize> {
51/// # let file = std::fs::File::open("fixtures/self/foo.pdb")?;
52/// # let mut pdb = pdb::PDB::open(file)?;
53///
54/// let type_information = pdb.type_information()?;
55/// let mut type_finder = type_information.new_type_finder();
56///
57/// # let expected_count = type_information.len();
58/// # let mut count: usize = 0;
59/// let mut iter = type_information.iter();
60/// while let Some(typ) = iter.next()? {
61///     // build the type finder as we go
62///     type_finder.update(&iter);
63///
64///     // parse the type record
65///     match typ.parse() {
66///         Ok(pdb::TypeData::Class(pdb::ClassType {name, properties, fields: Some(fields), ..})) => {
67///             // this Type describes a class-like type with fields
68///             println!("type {} is a class named {}", typ.type_index(), name);
69///
70///             // `fields` is a TypeIndex which refers to a FieldList
71///             // To find information about the fields, find and parse that Type
72///             match type_finder.find(fields)?.parse()? {
73///                 pdb::TypeData::FieldList(list) => {
74///                     // `fields` is a Vec<TypeData>
75///                     for field in list.fields {
76///                         if let pdb::TypeData::Member(member) = field {
77///                             // follow `member.field_type` as desired
78///                             println!("  - field {} at offset {:x}", member.name, member.offset);
79///                         } else {
80///                             // handle member functions, nested types, etc.
81///                         }
82///                     }
83///
84///                     if let Some(more_fields) = list.continuation {
85///                         // A FieldList can be split across multiple records
86///                         // TODO: follow `more_fields` and handle the next FieldList
87///                     }
88///                 }
89///                 _ => { }
90///             }
91///
92///         },
93///         Ok(_) => {
94///             // ignore everything that's not a class-like type
95///         },
96///         Err(pdb::Error::UnimplementedTypeKind(_)) => {
97///             // found an unhandled type record
98///             // this probably isn't fatal in most use cases
99///         },
100///         Err(e) => {
101///             // other error, probably is worth failing
102///             return Err(e);
103///         }
104///     }
105///     # count += 1;
106/// }
107///
108/// # assert_eq!(expected_count, count);
109/// # Ok(count)
110/// # }
111/// # assert!(test().expect("test") > 8000);
112/// ```
113#[derive(Debug)]
114pub struct TypeInformation<'t> {
115    stream: Stream<'t>,
116    header: Header,
117}
118
119impl<'t> TypeInformation<'t> {
120    /// Returns an iterator that can traverse the type table in sequential order.
121    pub fn iter(&self) -> TypeIter {
122        // get a parse buffer
123        let mut buf = self.stream.parse_buffer();
124
125        // drop the header
126        // this can't fail; we've already read this once
127        buf.take(self.header.header_size as usize).expect("dropping TPI header");
128
129        TypeIter{
130            buf: buf,
131            type_index: self.header.minimum_type_index,
132        }
133    }
134
135    /// Returns the number of types contained in this `TypeInformation`.
136    ///
137    /// Note that primitive types are not stored in the PDB file, so the number of distinct types
138    /// reachable via this `TypeInformation` will be higher than `len()`.
139    pub fn len(&self) -> usize {
140        (self.header.maximum_type_index - self.header.minimum_type_index) as usize
141    }
142
143    /// Returns a `TypeFinder` with a default time-space tradeoff.
144    ///
145    /// The `TypeFinder` is initially empty and must be populated by iterating.
146    pub fn new_type_finder(&self) -> TypeFinder {
147        new_type_finder(self, 3)
148    }
149}
150
151pub(crate) fn new_type_information(stream: Stream) -> Result<TypeInformation> {
152    let h;
153
154    {
155        let mut buf = stream.parse_buffer();
156        h = Header::parse(&mut buf)?;
157    }
158
159    Ok(TypeInformation{
160        stream: stream,
161        header: h,
162    })
163}
164
165/// This buffer is used when a `Type` refers to a primitive type. It doesn't contain anything
166/// type-specific, but it does parse as `raw_type() == 0xffff`, which is a reserved value. Seems
167/// like a reasonable thing to do.
168const PRIMITIVE_TYPE: &'static [u8] = b"\xff\xff";
169
170/// Represents a type from the type table. A `Type` has been minimally processed and may not be
171/// correctly formed or even understood by this library.
172///
173/// To avoid copying, `Type`s exist as references to data owned by the parent `TypeInformation`.
174/// Therefore, a `Type` may not outlive its parent.
175#[derive(Copy,Clone,PartialEq)]
176pub struct Type<'t>(TypeIndex, &'t [u8]);
177
178impl<'t> Type<'t> {
179    /// Returns this type's `TypeIndex`.
180    pub fn type_index(&self) -> TypeIndex {
181        self.0
182    }
183
184    /// Returns the length of this type's data in terms of bytes in the on-disk format.
185    ///
186    /// Types are prefixed by length, which is not included in this count.
187    pub fn len(&self) -> usize {
188        self.1.len()
189    }
190
191    /// Returns the kind of type identified by this `Type`.
192    ///
193    /// As a special case, if this `Type` is actually a primitive type, `raw_kind()` will return
194    /// `0xffff`.
195    #[inline]
196    pub fn raw_kind(&self) -> u16 {
197        debug_assert!(self.1.len() >= 2);
198
199        // assemble a little-endian u16
200        (self.1[0] as u16) | ((self.1[1] as u16) << 8)
201    }
202
203    /// Parse this Type into a TypeData.
204    ///
205    /// # Errors
206    ///
207    /// * `Error::UnimplementedTypeKind(kind)` if the type record isn't currently understood by this
208    ///   library
209    /// * `Error::UnexpectedEof` if the type record is malformed
210    pub fn parse(&self) -> Result<TypeData<'t>> {
211        if self.0 < 0x1000 {
212            // Primitive type
213            type_data_for_primitive(self.0)
214        } else {
215            let mut buf = ParseBuffer::from(self.1);
216            parse_type_data(&mut buf)
217        }
218    }
219}
220
221impl<'t> fmt::Debug for Type<'t> {
222    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
223        write!(f, "Type{{ kind: 0x{:4x} [{} bytes] }}", self.raw_kind(), self.1.len())
224    }
225}
226
227/// A `TypeFinder` is a secondary, in-memory data structure that permits efficiently finding types
228/// by `TypeIndex`. It starts out empty and must be populated by calling `update(&TypeIter)` while
229/// iterating.
230///
231/// `TypeFinder` allocates all the memory it needs when it is first created. The footprint is
232/// directly proportional to the total number of types; see `TypeInformation.len()`.
233///
234/// # Time/space trade-off
235///
236/// The naïve approach is to store the position of each `Type` as they are covered in the stream.
237/// The cost is memory: namely one `u32` per `Type`.
238///
239/// Compare this approach to a `TypeFinder` that stores the position of every Nth type. Memory
240/// requirements would be reduced by a factor of N in exchange for requiring an average of (N-1)/2
241/// iterations per lookup. However, iteration is cheap sequential memory access, and spending less
242/// memory on `TypeFinder` means more of the data can fit in the cache, so this is likely a good
243/// trade-off for small-ish values of N.
244///
245/// `TypeFinder` is parameterized by `shift` which controls this trade-off as powers of two:
246///
247///   * If `shift` is 0, `TypeFinder` stores 4 bytes per `Type` and always performs direct lookups.
248///   * If `shift` is 1, `TypeFinder` stores 2 bytes per `Type` and averages 0.5 iterations per lookup.
249///   * If `shift` is 2, `TypeFinder` stores 1 byte per `Type` and averages 1.5 iterations per lookup.
250///   * If `shift` is 3, `TypeFinder` stores 4 bits per `Type` and averages 3.5 iterations per lookup.
251///   * If `shift` is 4, `TypeFinder` stores 2 bits per `Type` and averages 7.5 iterations per lookup.
252///   * If `shift` is 5, `TypeFinder` stores 1 bit per `Type` and averages 15.5 iterations per lookup.
253///
254/// This list can continue but with rapidly diminishing returns. Iteration cost is proportional to
255/// type size, which varies, but typical numbers from a large program are:
256///
257///   * 24% of types are    12 bytes
258///   * 34% of types are <= 16 bytes
259///   * 84% of types are <= 32 bytes
260///
261/// A `shift` of 2 or 3 is likely appropriate for most workloads. 500K types would require 1 MB or
262/// 500 KB of memory respectively, and lookups -- though indirect -- would still usually need only
263/// one or two 64-byte cache lines.
264#[derive(Debug)]
265pub struct TypeFinder<'t> {
266    buffer: ParseBuffer<'t>,
267    minimum_type_index: TypeIndex,
268    maximum_type_index: TypeIndex,
269    positions: Vec<u32>,
270    shift: u8,
271}
272
273fn new_type_finder<'b, 't: 'b>(type_info: &'b TypeInformation<'t>, shift: u8) -> TypeFinder<'b> {
274    let count = type_info.header.maximum_type_index - type_info.header.minimum_type_index;
275    let shifted_count = (count >> shift) as usize;
276
277    let mut positions: Vec<u32> = Vec::with_capacity(shifted_count);
278
279    // add record zero, which is identical regardless of shift
280    positions.push(type_info.header.header_size);
281
282    TypeFinder{
283        buffer: type_info.stream.parse_buffer(),
284        minimum_type_index: type_info.header.minimum_type_index,
285        maximum_type_index: type_info.header.maximum_type_index,
286        positions: positions,
287        shift: shift,
288    }
289}
290
291impl<'t> TypeFinder<'t> {
292    /// Given a `TypeIndex`, find which position in the Vec we should jump to and how many times we
293    /// need to iterate to find the requested type.
294    ///
295    /// `shift` refers to the size of these bit shifts.
296    #[inline]
297    fn resolve(&self, type_index: TypeIndex) -> (usize, usize) {
298        let raw = type_index - self.minimum_type_index;
299        (
300            (raw >> self.shift) as usize,
301            (raw & ((1 << self.shift) - 1)) as usize
302        )
303    }
304
305    /// Returns the highest `TypeIndex` which is currently served by this `TypeFinder`.
306    ///
307    /// In general, you shouldn't need to consider this. Types always refer to types with lower
308    /// `TypeIndex`es, and either:
309    ///
310    ///  * You obtained a `Type` by iterating, in which case you should be calling `update()` as you
311    ///    iterate, and in which case all types it can reference are <= `max_indexed_type()`, or
312    ///  * You got a `Type` from this `TypeFinder`, in which case all types it can reference are
313    ///    still <= `max_indexed_type()`.
314    ///
315    #[inline]
316    pub fn max_indexed_type(&self) -> TypeIndex {
317        (self.positions.len() << self.shift) as TypeIndex + self.minimum_type_index - 1
318    }
319
320    /// Update this `TypeFinder` based on the current position of a `TypeIter`.
321    ///
322    /// Do this each time you call `.next()`.
323    #[inline]
324    pub fn update(&mut self, iterator: &TypeIter) {
325        let (vec_index, iteration_count) = self.resolve(iterator.type_index);
326        if iteration_count == 0 && vec_index == self.positions.len() {
327            let pos = iterator.buf.pos();
328            assert!(pos < u32::max_value() as usize);
329            self.positions.push(pos as u32);
330        }
331    }
332
333    /// Find a type by `TypeIndex`.
334    ///
335    /// # Errors
336    ///
337    /// * `Error::TypeNotFound(type_index)` if you ask for a type that doesn't exist
338    /// * `Error::TypeNotIndexed(type_index, max_indexed_type)` if you ask for a type that is known
339    ///   to exist but is not currently known by this `TypeFinder`.
340    pub fn find(&self, type_index: TypeIndex) -> Result<Type<'t>> {
341        if type_index < self.minimum_type_index {
342            return Ok(Type(type_index, PRIMITIVE_TYPE));
343        } else if type_index > self.maximum_type_index {
344            return Err(Error::TypeNotFound(type_index));
345        }
346
347        // figure out where we'd find this
348        let (vec_index, iteration_count) = self.resolve(type_index);
349
350        if let Some(pos) = self.positions.get(vec_index) {
351            // hit
352            let mut buf = self.buffer.clone();
353
354            // jump forwards
355            buf.take(*pos as usize)?;
356
357            // skip some records
358            for _ in 0..iteration_count {
359                let length = buf.parse_u16()?;
360                buf.take(length as usize)?;
361            }
362
363            // read the type
364            let length = buf.parse_u16()?;
365            Ok(Type(type_index, buf.take(length as usize)?))
366
367        } else {
368            // miss
369            Err(Error::TypeNotIndexed(type_index, self.max_indexed_type()))
370        }
371    }
372}
373
374/// A `TypeIter` iterates over a `TypeInformation`, producing `Types`s.
375///
376/// Type streams are represented internally as a series of records, each of which have a length, a
377/// type, and a type-specific field layout. Iteration performance is therefore similar to a linked
378/// list.
379#[derive(Debug)]
380pub struct TypeIter<'t> {
381    buf: ParseBuffer<'t>,
382    type_index: TypeIndex,
383}
384
385impl<'t> FallibleIterator for TypeIter<'t> {
386    type Item = Type<'t>;
387    type Error = Error;
388
389    fn next(&mut self) -> result::Result<Option<Self::Item>, Self::Error> {
390        // see if we're at EOF
391        if self.buf.len() == 0 {
392            return Ok(None);
393        }
394
395        // read the length of the next type
396        let length = self.buf.parse_u16()? as usize;
397
398        // validate
399        if length < 2 {
400            // this can't be correct
401            return Err(Error::TypeTooShort);
402        }
403
404        // grab the type itself
405        let type_buf = self.buf.take(length)?;
406        let my_type_index = self.type_index;
407
408        self.type_index += 1;
409
410        // Done
411        Ok(Some(Type(my_type_index, type_buf)))
412    }
413}