mozpdb/tpi/mod.rs
1// Copyright 2017 pdb Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use std::fmt;
9use std::result;
10
11use common::*;
12use msf::Stream;
13use FallibleIterator;
14
15pub(crate) mod constants;
16mod data;
17mod header;
18mod primitive;
19
20use self::data::parse_type_data;
21use self::header::*;
22use self::primitive::type_data_for_primitive;
23
24pub use self::data::*;
25pub use self::primitive::{Indirection,PrimitiveKind,PrimitiveType};
26
27/// `TypeInformation` provides zero-copy access to a PDB type data stream.
28///
29/// PDB type information is stored as a stream of length-prefixed `Type` records, and thus the most
30/// fundamental operation supported by `TypeInformation` is to iterate over `Type`s.
31///
32/// Types are uniquely identified by `TypeIndex`, and types are stored within the PDB in ascending
33/// order of `TypeIndex`.
34///
35/// Many types refer to other types by `TypeIndex`, and these references may refer to other types
36/// forming a chain that's arbitrarily long. Fortunately, PDB format requires that types refer only
37/// to types with lower `TypeIndex`es; thus, the stream of types form a directed acyclic graph.
38///
39/// `TypeInformation` can iterate by `TypeIndex`, since that's essentially the only operation
40/// permitted by the data. `TypeFinder` is a secondary data structure to provide efficient
41/// backtracking.
42///
43/// # Examples
44///
45/// Iterating over the types while building a `TypeFinder`:
46///
47/// ```
48/// # use pdb::FallibleIterator;
49/// #
50/// # fn test() -> pdb::Result<usize> {
51/// # let file = std::fs::File::open("fixtures/self/foo.pdb")?;
52/// # let mut pdb = pdb::PDB::open(file)?;
53///
54/// let type_information = pdb.type_information()?;
55/// let mut type_finder = type_information.new_type_finder();
56///
57/// # let expected_count = type_information.len();
58/// # let mut count: usize = 0;
59/// let mut iter = type_information.iter();
60/// while let Some(typ) = iter.next()? {
61/// // build the type finder as we go
62/// type_finder.update(&iter);
63///
64/// // parse the type record
65/// match typ.parse() {
66/// Ok(pdb::TypeData::Class(pdb::ClassType {name, properties, fields: Some(fields), ..})) => {
67/// // this Type describes a class-like type with fields
68/// println!("type {} is a class named {}", typ.type_index(), name);
69///
70/// // `fields` is a TypeIndex which refers to a FieldList
71/// // To find information about the fields, find and parse that Type
72/// match type_finder.find(fields)?.parse()? {
73/// pdb::TypeData::FieldList(list) => {
74/// // `fields` is a Vec<TypeData>
75/// for field in list.fields {
76/// if let pdb::TypeData::Member(member) = field {
77/// // follow `member.field_type` as desired
78/// println!(" - field {} at offset {:x}", member.name, member.offset);
79/// } else {
80/// // handle member functions, nested types, etc.
81/// }
82/// }
83///
84/// if let Some(more_fields) = list.continuation {
85/// // A FieldList can be split across multiple records
86/// // TODO: follow `more_fields` and handle the next FieldList
87/// }
88/// }
89/// _ => { }
90/// }
91///
92/// },
93/// Ok(_) => {
94/// // ignore everything that's not a class-like type
95/// },
96/// Err(pdb::Error::UnimplementedTypeKind(_)) => {
97/// // found an unhandled type record
98/// // this probably isn't fatal in most use cases
99/// },
100/// Err(e) => {
101/// // other error, probably is worth failing
102/// return Err(e);
103/// }
104/// }
105/// # count += 1;
106/// }
107///
108/// # assert_eq!(expected_count, count);
109/// # Ok(count)
110/// # }
111/// # assert!(test().expect("test") > 8000);
112/// ```
113#[derive(Debug)]
114pub struct TypeInformation<'t> {
115 stream: Stream<'t>,
116 header: Header,
117}
118
119impl<'t> TypeInformation<'t> {
120 /// Returns an iterator that can traverse the type table in sequential order.
121 pub fn iter(&self) -> TypeIter {
122 // get a parse buffer
123 let mut buf = self.stream.parse_buffer();
124
125 // drop the header
126 // this can't fail; we've already read this once
127 buf.take(self.header.header_size as usize).expect("dropping TPI header");
128
129 TypeIter{
130 buf: buf,
131 type_index: self.header.minimum_type_index,
132 }
133 }
134
135 /// Returns the number of types contained in this `TypeInformation`.
136 ///
137 /// Note that primitive types are not stored in the PDB file, so the number of distinct types
138 /// reachable via this `TypeInformation` will be higher than `len()`.
139 pub fn len(&self) -> usize {
140 (self.header.maximum_type_index - self.header.minimum_type_index) as usize
141 }
142
143 /// Returns a `TypeFinder` with a default time-space tradeoff.
144 ///
145 /// The `TypeFinder` is initially empty and must be populated by iterating.
146 pub fn new_type_finder(&self) -> TypeFinder {
147 new_type_finder(self, 3)
148 }
149}
150
151pub(crate) fn new_type_information(stream: Stream) -> Result<TypeInformation> {
152 let h;
153
154 {
155 let mut buf = stream.parse_buffer();
156 h = Header::parse(&mut buf)?;
157 }
158
159 Ok(TypeInformation{
160 stream: stream,
161 header: h,
162 })
163}
164
165/// This buffer is used when a `Type` refers to a primitive type. It doesn't contain anything
166/// type-specific, but it does parse as `raw_type() == 0xffff`, which is a reserved value. Seems
167/// like a reasonable thing to do.
168const PRIMITIVE_TYPE: &'static [u8] = b"\xff\xff";
169
170/// Represents a type from the type table. A `Type` has been minimally processed and may not be
171/// correctly formed or even understood by this library.
172///
173/// To avoid copying, `Type`s exist as references to data owned by the parent `TypeInformation`.
174/// Therefore, a `Type` may not outlive its parent.
175#[derive(Copy,Clone,PartialEq)]
176pub struct Type<'t>(TypeIndex, &'t [u8]);
177
178impl<'t> Type<'t> {
179 /// Returns this type's `TypeIndex`.
180 pub fn type_index(&self) -> TypeIndex {
181 self.0
182 }
183
184 /// Returns the length of this type's data in terms of bytes in the on-disk format.
185 ///
186 /// Types are prefixed by length, which is not included in this count.
187 pub fn len(&self) -> usize {
188 self.1.len()
189 }
190
191 /// Returns the kind of type identified by this `Type`.
192 ///
193 /// As a special case, if this `Type` is actually a primitive type, `raw_kind()` will return
194 /// `0xffff`.
195 #[inline]
196 pub fn raw_kind(&self) -> u16 {
197 debug_assert!(self.1.len() >= 2);
198
199 // assemble a little-endian u16
200 (self.1[0] as u16) | ((self.1[1] as u16) << 8)
201 }
202
203 /// Parse this Type into a TypeData.
204 ///
205 /// # Errors
206 ///
207 /// * `Error::UnimplementedTypeKind(kind)` if the type record isn't currently understood by this
208 /// library
209 /// * `Error::UnexpectedEof` if the type record is malformed
210 pub fn parse(&self) -> Result<TypeData<'t>> {
211 if self.0 < 0x1000 {
212 // Primitive type
213 type_data_for_primitive(self.0)
214 } else {
215 let mut buf = ParseBuffer::from(self.1);
216 parse_type_data(&mut buf)
217 }
218 }
219}
220
221impl<'t> fmt::Debug for Type<'t> {
222 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
223 write!(f, "Type{{ kind: 0x{:4x} [{} bytes] }}", self.raw_kind(), self.1.len())
224 }
225}
226
227/// A `TypeFinder` is a secondary, in-memory data structure that permits efficiently finding types
228/// by `TypeIndex`. It starts out empty and must be populated by calling `update(&TypeIter)` while
229/// iterating.
230///
231/// `TypeFinder` allocates all the memory it needs when it is first created. The footprint is
232/// directly proportional to the total number of types; see `TypeInformation.len()`.
233///
234/// # Time/space trade-off
235///
236/// The naïve approach is to store the position of each `Type` as they are covered in the stream.
237/// The cost is memory: namely one `u32` per `Type`.
238///
239/// Compare this approach to a `TypeFinder` that stores the position of every Nth type. Memory
240/// requirements would be reduced by a factor of N in exchange for requiring an average of (N-1)/2
241/// iterations per lookup. However, iteration is cheap sequential memory access, and spending less
242/// memory on `TypeFinder` means more of the data can fit in the cache, so this is likely a good
243/// trade-off for small-ish values of N.
244///
245/// `TypeFinder` is parameterized by `shift` which controls this trade-off as powers of two:
246///
247/// * If `shift` is 0, `TypeFinder` stores 4 bytes per `Type` and always performs direct lookups.
248/// * If `shift` is 1, `TypeFinder` stores 2 bytes per `Type` and averages 0.5 iterations per lookup.
249/// * If `shift` is 2, `TypeFinder` stores 1 byte per `Type` and averages 1.5 iterations per lookup.
250/// * If `shift` is 3, `TypeFinder` stores 4 bits per `Type` and averages 3.5 iterations per lookup.
251/// * If `shift` is 4, `TypeFinder` stores 2 bits per `Type` and averages 7.5 iterations per lookup.
252/// * If `shift` is 5, `TypeFinder` stores 1 bit per `Type` and averages 15.5 iterations per lookup.
253///
254/// This list can continue but with rapidly diminishing returns. Iteration cost is proportional to
255/// type size, which varies, but typical numbers from a large program are:
256///
257/// * 24% of types are 12 bytes
258/// * 34% of types are <= 16 bytes
259/// * 84% of types are <= 32 bytes
260///
261/// A `shift` of 2 or 3 is likely appropriate for most workloads. 500K types would require 1 MB or
262/// 500 KB of memory respectively, and lookups -- though indirect -- would still usually need only
263/// one or two 64-byte cache lines.
264#[derive(Debug)]
265pub struct TypeFinder<'t> {
266 buffer: ParseBuffer<'t>,
267 minimum_type_index: TypeIndex,
268 maximum_type_index: TypeIndex,
269 positions: Vec<u32>,
270 shift: u8,
271}
272
273fn new_type_finder<'b, 't: 'b>(type_info: &'b TypeInformation<'t>, shift: u8) -> TypeFinder<'b> {
274 let count = type_info.header.maximum_type_index - type_info.header.minimum_type_index;
275 let shifted_count = (count >> shift) as usize;
276
277 let mut positions: Vec<u32> = Vec::with_capacity(shifted_count);
278
279 // add record zero, which is identical regardless of shift
280 positions.push(type_info.header.header_size);
281
282 TypeFinder{
283 buffer: type_info.stream.parse_buffer(),
284 minimum_type_index: type_info.header.minimum_type_index,
285 maximum_type_index: type_info.header.maximum_type_index,
286 positions: positions,
287 shift: shift,
288 }
289}
290
291impl<'t> TypeFinder<'t> {
292 /// Given a `TypeIndex`, find which position in the Vec we should jump to and how many times we
293 /// need to iterate to find the requested type.
294 ///
295 /// `shift` refers to the size of these bit shifts.
296 #[inline]
297 fn resolve(&self, type_index: TypeIndex) -> (usize, usize) {
298 let raw = type_index - self.minimum_type_index;
299 (
300 (raw >> self.shift) as usize,
301 (raw & ((1 << self.shift) - 1)) as usize
302 )
303 }
304
305 /// Returns the highest `TypeIndex` which is currently served by this `TypeFinder`.
306 ///
307 /// In general, you shouldn't need to consider this. Types always refer to types with lower
308 /// `TypeIndex`es, and either:
309 ///
310 /// * You obtained a `Type` by iterating, in which case you should be calling `update()` as you
311 /// iterate, and in which case all types it can reference are <= `max_indexed_type()`, or
312 /// * You got a `Type` from this `TypeFinder`, in which case all types it can reference are
313 /// still <= `max_indexed_type()`.
314 ///
315 #[inline]
316 pub fn max_indexed_type(&self) -> TypeIndex {
317 (self.positions.len() << self.shift) as TypeIndex + self.minimum_type_index - 1
318 }
319
320 /// Update this `TypeFinder` based on the current position of a `TypeIter`.
321 ///
322 /// Do this each time you call `.next()`.
323 #[inline]
324 pub fn update(&mut self, iterator: &TypeIter) {
325 let (vec_index, iteration_count) = self.resolve(iterator.type_index);
326 if iteration_count == 0 && vec_index == self.positions.len() {
327 let pos = iterator.buf.pos();
328 assert!(pos < u32::max_value() as usize);
329 self.positions.push(pos as u32);
330 }
331 }
332
333 /// Find a type by `TypeIndex`.
334 ///
335 /// # Errors
336 ///
337 /// * `Error::TypeNotFound(type_index)` if you ask for a type that doesn't exist
338 /// * `Error::TypeNotIndexed(type_index, max_indexed_type)` if you ask for a type that is known
339 /// to exist but is not currently known by this `TypeFinder`.
340 pub fn find(&self, type_index: TypeIndex) -> Result<Type<'t>> {
341 if type_index < self.minimum_type_index {
342 return Ok(Type(type_index, PRIMITIVE_TYPE));
343 } else if type_index > self.maximum_type_index {
344 return Err(Error::TypeNotFound(type_index));
345 }
346
347 // figure out where we'd find this
348 let (vec_index, iteration_count) = self.resolve(type_index);
349
350 if let Some(pos) = self.positions.get(vec_index) {
351 // hit
352 let mut buf = self.buffer.clone();
353
354 // jump forwards
355 buf.take(*pos as usize)?;
356
357 // skip some records
358 for _ in 0..iteration_count {
359 let length = buf.parse_u16()?;
360 buf.take(length as usize)?;
361 }
362
363 // read the type
364 let length = buf.parse_u16()?;
365 Ok(Type(type_index, buf.take(length as usize)?))
366
367 } else {
368 // miss
369 Err(Error::TypeNotIndexed(type_index, self.max_indexed_type()))
370 }
371 }
372}
373
374/// A `TypeIter` iterates over a `TypeInformation`, producing `Types`s.
375///
376/// Type streams are represented internally as a series of records, each of which have a length, a
377/// type, and a type-specific field layout. Iteration performance is therefore similar to a linked
378/// list.
379#[derive(Debug)]
380pub struct TypeIter<'t> {
381 buf: ParseBuffer<'t>,
382 type_index: TypeIndex,
383}
384
385impl<'t> FallibleIterator for TypeIter<'t> {
386 type Item = Type<'t>;
387 type Error = Error;
388
389 fn next(&mut self) -> result::Result<Option<Self::Item>, Self::Error> {
390 // see if we're at EOF
391 if self.buf.len() == 0 {
392 return Ok(None);
393 }
394
395 // read the length of the next type
396 let length = self.buf.parse_u16()? as usize;
397
398 // validate
399 if length < 2 {
400 // this can't be correct
401 return Err(Error::TypeTooShort);
402 }
403
404 // grab the type itself
405 let type_buf = self.buf.take(length)?;
406 let my_type_index = self.type_index;
407
408 self.type_index += 1;
409
410 // Done
411 Ok(Some(Type(my_type_index, type_buf)))
412 }
413}