1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
// Copyright 2017 pdb Developers
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.

use std::fmt;
use std::result;

use common::*;
use msf::Stream;
use FallibleIterator;

mod constants;
mod data;
mod header;
mod primitive;

use self::data::parse_type_data;
use self::header::*;
use self::primitive::type_data_for_primitive;

pub use self::data::{TypeData,ClassKind,EnumValue,FieldAttributes,FunctionAttributes,MethodListEntry,TypeProperties};
pub use self::primitive::{Indirection,PrimitiveType};

/// `TypeInformation` provides zero-copy access to a PDB type data stream.
///
/// PDB type information is stored as a stream of length-prefixed `Type` records, and thus the most
/// fundamental operation supported by `TypeInformation` is to iterate over `Type`s.
///
/// Types are uniquely identified by `TypeIndex`, and types are stored within the PDB in ascending
/// order of `TypeIndex`.
///
/// Many types refer to other types by `TypeIndex`, and these references may refer to other types
/// forming a chain that's arbitrarily long. Fortunately, PDB format requires that types refer only
/// to types with lower `TypeIndex`es; thus, the stream of types form a directed acyclic graph.
///
/// `TypeInformation` can iterate by `TypeIndex`, since that's essentially the only operation
/// permitted by the data. `TypeFinder` is a secondary data structure to provide efficient
/// backtracking.
///
/// # Examples
///
/// Iterating over the types while building a `TypeFinder`:
///
/// ```
/// # use pdb::FallibleIterator;
/// #
/// # fn test() -> pdb::Result<usize> {
/// # let file = std::fs::File::open("fixtures/self/foo.pdb")?;
/// # let mut pdb = pdb::PDB::open(file)?;
///
/// let type_information = pdb.type_information()?;
/// let mut type_finder = type_information.new_type_finder();
///
/// # let expected_count = type_information.len();
/// # let mut count: usize = 0;
/// let mut iter = type_information.iter();
/// while let Some(typ) = iter.next()? {
///     // build the type finder as we go
///     type_finder.update(&iter);
///
///     // parse the type record
///     match typ.parse() {
///         Ok(pdb::TypeData::Class{name, properties, fields: Some(fields), ..}) => {
///             // this Type describes a class-like type with fields
///             println!("type {} is a class named {}", typ.type_index(), name);
///
///             // `fields` is a TypeIndex which refers to a FieldList
///             // To find information about the fields, find and parse that Type
///             match type_finder.find(fields)?.parse()? {
///                 pdb::TypeData::FieldList{ fields, continuation } => {
///                     // `fields` is a Vec<TypeData>
///                     for field in fields {
///                         if let pdb::TypeData::Member { offset, name, field_type, .. } = field {
///                             // follow `field_type` as desired
///                             println!("  - field {} at offset {:x}", name, offset);
///                         } else {
///                             // handle member functions, nested types, etc.
///                         }
///                     }
///
///                     if let Some(more_fields) = continuation {
///                         // A FieldList can be split across multiple records
///                         // TODO: follow `more_fields` and handle the next FieldList
///                     }
///                 }
///                 _ => { }
///             }
///
///         },
///         Ok(_) => {
///             // ignore everything that's not a class-like type
///         },
///         Err(pdb::Error::UnimplementedTypeKind(_)) => {
///             // found an unhandled type record
///             // this probably isn't fatal in most use cases
///         },
///         Err(e) => {
///             // other error, probably is worth failing
///             return Err(e);
///         }
///     }
///     # count += 1;
/// }
///
/// # assert_eq!(expected_count, count);
/// # Ok(count)
/// # }
/// # assert!(test().expect("test") > 8000);
/// ```
#[derive(Debug)]
pub struct TypeInformation<'t> {
    stream: Stream<'t>,
    header: Header,
}

impl<'t> TypeInformation<'t> {
    /// Returns an iterator that can traverse the type table in sequential order.
    pub fn iter(&self) -> TypeIter {
        // get a parse buffer
        let mut buf = self.stream.parse_buffer();

        // drop the header
        // this can't fail; we've already read this once
        buf.take(self.header.header_size as usize).expect("dropping TPI header");

        TypeIter{
            buf: buf,
            type_index: self.header.minimum_type_index,
        }
    }

    /// Returns the number of types contained in this `TypeInformation`.
    ///
    /// Note that primitive types are not stored in the PDB file, so the number of distinct types
    /// reachable via this `TypeInformation` will be higher than `len()`.
    pub fn len(&self) -> usize {
        (self.header.maximum_type_index - self.header.minimum_type_index) as usize
    }

    /// Returns a `TypeFinder` with a default time-space tradeoff.
    ///
    /// The `TypeFinder` is initially empty and must be populated by iterating.
    pub fn new_type_finder(&self) -> TypeFinder {
        new_type_finder(self, 3)
    }
}

pub fn new_type_information(stream: Stream) -> Result<TypeInformation> {
    let h;

    {
        let mut buf = stream.parse_buffer();
        h = Header::parse(&mut buf)?;
    }

    Ok(TypeInformation{
        stream: stream,
        header: h,
    })
}

/// This buffer is used when a `Type` refers to a primitive type. It doesn't contain anything
/// type-specific, but it does parse as `raw_type() == 0xffff`, which is a reserved value. Seems
/// like a reasonable thing to do.
const PRIMITIVE_TYPE: &'static [u8] = b"\xff\xff";

/// Represents a type from the type table. A `Type` has been minimally processed and may not be
/// correctly formed or even understood by this library.
///
/// To avoid copying, `Type`s exist as references to data owned by the parent `TypeInformation`.
/// Therefore, a `Type` may not outlive its parent.
#[derive(Copy,Clone,PartialEq)]
pub struct Type<'t>(TypeIndex, &'t [u8]);

impl<'t> Type<'t> {
    /// Returns this type's `TypeIndex`.
    pub fn type_index(&self) -> TypeIndex {
        self.0
    }

    /// Returns the length of this type's data in terms of bytes in the on-disk format.
    ///
    /// Types are prefixed by length, which is not included in this count.
    pub fn len(&self) -> usize {
        self.1.len()
    }

    /// Returns the kind of type identified by this `Type`.
    ///
    /// As a special case, if this `Type` is actually a primitive type, `raw_kind()` will return
    /// `0xffff`.
    #[inline]
    pub fn raw_kind(&self) -> u16 {
        debug_assert!(self.1.len() >= 2);

        // assemble a little-endian u16
        (self.1[0] as u16) | ((self.1[1] as u16) << 8)
    }

    /// Parse this Type into a TypeData.
    ///
    /// # Errors
    ///
    /// * `Error::UnimplementedTypeKind(kind)` if the type record isn't currently understood by this
    ///   library
    /// * `Error::UnexpectedEof` if the type record is malformed
    pub fn parse(&self) -> Result<TypeData<'t>> {
        if self.0 < 0x1000 {
            // Primitive type
            type_data_for_primitive(self.0)
        } else {
            let mut buf = ParseBuffer::from(self.1);
            parse_type_data(&mut buf)
        }
    }
}

impl<'t> fmt::Debug for Type<'t> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Type{{ kind: 0x{:4x} [{} bytes] }}", self.raw_kind(), self.1.len())
    }
}

/// A `TypeFinder` is a secondary, in-memory data structure that permits efficiently finding types
/// by `TypeIndex`. It starts out empty and must be populated by calling `update(&TypeIter)` while
/// iterating.
///
/// `TypeFinder` allocates all the memory it needs when it is first created. The footprint is
/// directly proportional to the total number of types; see `TypeInformation.len()`.
///
/// # Time/space trade-off
///
/// The naïve approach is to store the position of each `Type` as they are covered in the stream.
/// The cost is memory: namely one `u32` per `Type`.
///
/// Compare this approach to a `TypeFinder` that stores the position of every Nth type. Memory
/// requirements would be reduced by a factor of N in exchange for requiring an average of (N-1)/2
/// iterations per lookup. However, iteration is cheap sequential memory access, and spending less
/// memory on `TypeFinder` means more of the data can fit in the cache, so this is likely a good
/// trade-off for small-ish values of N.
///
/// `TypeFinder` is parameterized by `shift` which controls this trade-off as powers of two:
///
///   * If `shift` is 0, `TypeFinder` stores 4 bytes per `Type` and always performs direct lookups.
///   * If `shift` is 1, `TypeFinder` stores 2 bytes per `Type` and averages 0.5 iterations per lookup.
///   * If `shift` is 2, `TypeFinder` stores 1 byte per `Type` and averages 1.5 iterations per lookup.
///   * If `shift` is 3, `TypeFinder` stores 4 bits per `Type` and averages 3.5 iterations per lookup.
///   * If `shift` is 4, `TypeFinder` stores 2 bits per `Type` and averages 7.5 iterations per lookup.
///   * If `shift` is 5, `TypeFinder` stores 1 bit per `Type` and averages 15.5 iterations per lookup.
///
/// This list can continue but with rapidly diminishing returns. Iteration cost is proportional to
/// type size, which varies, but typical numbers from a large program are:
///
///   * 24% of types are    12 bytes
///   * 34% of types are <= 16 bytes
///   * 84% of types are <= 32 bytes
///
/// A `shift` of 2 or 3 is likely appropriate for most workloads. 500K types would require 1 MB or
/// 500 KB of memory respectively, and lookups -- though indirect -- would still usually need only
/// one or two 64-byte cache lines.
#[derive(Debug)]
pub struct TypeFinder<'t> {
    buffer: ParseBuffer<'t>,
    minimum_type_index: TypeIndex,
    maximum_type_index: TypeIndex,
    positions: Vec<u32>,
    shift: u8,
}

fn new_type_finder<'b, 't: 'b>(type_info: &'b TypeInformation<'t>, shift: u8) -> TypeFinder<'b> {
    let count = type_info.header.maximum_type_index - type_info.header.minimum_type_index;
    let shifted_count = (count >> shift) as usize;

    let mut positions: Vec<u32> = Vec::with_capacity(shifted_count);

    // add record zero, which is identical regardless of shift
    positions.push(type_info.header.header_size);

    TypeFinder{
        buffer: type_info.stream.parse_buffer(),
        minimum_type_index: type_info.header.minimum_type_index,
        maximum_type_index: type_info.header.maximum_type_index,
        positions: positions,
        shift: shift,
    }
}

impl<'t> TypeFinder<'t> {
    /// Given a `TypeIndex`, find which position in the Vec we should jump to and how many times we
    /// need to iterate to find the requested type.
    ///
    /// `shift` refers to the size of these bit shifts.
    #[inline]
    fn resolve(&self, type_index: TypeIndex) -> (usize, usize) {
        let raw = type_index - self.minimum_type_index;
        (
            (raw >> self.shift) as usize,
            (raw & ((1 << self.shift) - 1)) as usize
        )
    }

    /// Returns the highest `TypeIndex` which is currently served by this `TypeFinder`.
    ///
    /// In general, you shouldn't need to consider this. Types always refer to types with lower
    /// `TypeIndex`es, and either:
    ///
    ///  * You obtained a `Type` by iterating, in which case you should be calling `update()` as you
    ///    iterate, and in which case all types it can reference are <= `max_indexed_type()`, or
    ///  * You got a `Type` from this `TypeFinder`, in which case all types it can reference are
    ///    still <= `max_indexed_type()`.
    ///
    #[inline]
    pub fn max_indexed_type(&self) -> TypeIndex {
        (self.positions.len() << self.shift) as TypeIndex + self.minimum_type_index - 1
    }

    /// Update this `TypeFinder` based on the current position of a `TypeIter`.
    ///
    /// Do this each time you call `.next()`.
    #[inline]
    pub fn update(&mut self, iterator: &TypeIter) {
        let (vec_index, iteration_count) = self.resolve(iterator.type_index);
        if iteration_count == 0 && vec_index == self.positions.len() {
            let pos = iterator.buf.pos();
            assert!(pos < u32::max_value() as usize);
            self.positions.push(pos as u32);
        }
    }

    /// Find a type by `TypeIndex`.
    ///
    /// # Errors
    ///
    /// * `Error::TypeNotFound(type_index)` if you ask for a type that doesn't exist
    /// * `Error::TypeNotIndexed(type_index, max_indexed_type)` if you ask for a type that is known
    ///   to exist but is not currently known by this `TypeFinder`.
    pub fn find(&self, type_index: TypeIndex) -> Result<Type<'t>> {
        if type_index < self.minimum_type_index {
            return Ok(Type(type_index, PRIMITIVE_TYPE));
        } else if type_index > self.maximum_type_index {
            return Err(Error::TypeNotFound(type_index));
        }

        // figure out where we'd find this
        let (vec_index, iteration_count) = self.resolve(type_index);

        if let Some(pos) = self.positions.get(vec_index) {
            // hit
            let mut buf = self.buffer.clone();

            // jump forwards
            buf.take(*pos as usize)?;

            // skip some records
            for _ in 0..iteration_count {
                let length = buf.parse_u16()?;
                buf.take(length as usize)?;
            }

            // read the type
            let length = buf.parse_u16()?;
            Ok(Type(type_index, buf.take(length as usize)?))

        } else {
            // miss
            Err(Error::TypeNotIndexed(type_index, self.max_indexed_type()))
        }
    }
}

/// A `TypeIter` iterates over a `TypeInformation`, producing `Types`s.
///
/// Type streams are represented internally as a series of records, each of which have a length, a
/// type, and a type-specific field layout. Iteration performance is therefore similar to a linked
/// list.
#[derive(Debug)]
pub struct TypeIter<'t> {
    buf: ParseBuffer<'t>,
    type_index: TypeIndex,
}

impl<'t> FallibleIterator for TypeIter<'t> {
    type Item = Type<'t>;
    type Error = Error;

    fn next(&mut self) -> result::Result<Option<Self::Item>, Self::Error> {
        // see if we're at EOF
        if self.buf.len() == 0 {
            return Ok(None);
        }

        // read the length of the next type
        let length = self.buf.parse_u16()? as usize;

        // validate
        if length < 2 {
            // this can't be correct
            return Err(Error::TypeTooShort);
        }

        // grab the type itself
        let type_buf = self.buf.take(length)?;
        let my_type_index = self.type_index;

        self.type_index += 1;

        // Done
        Ok(Some(Type(my_type_index, type_buf)))
    }
}