1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. use std::fmt; use std::result; use common::*; use msf::Stream; use FallibleIterator; pub(crate) mod constants; mod data; mod header; mod primitive; use self::data::parse_type_data; use self::header::*; use self::primitive::type_data_for_primitive; pub use self::data::*; pub use self::primitive::{Indirection,PrimitiveKind,PrimitiveType}; /// `TypeInformation` provides zero-copy access to a PDB type data stream. /// /// PDB type information is stored as a stream of length-prefixed `Type` records, and thus the most /// fundamental operation supported by `TypeInformation` is to iterate over `Type`s. /// /// Types are uniquely identified by `TypeIndex`, and types are stored within the PDB in ascending /// order of `TypeIndex`. /// /// Many types refer to other types by `TypeIndex`, and these references may refer to other types /// forming a chain that's arbitrarily long. Fortunately, PDB format requires that types refer only /// to types with lower `TypeIndex`es; thus, the stream of types form a directed acyclic graph. /// /// `TypeInformation` can iterate by `TypeIndex`, since that's essentially the only operation /// permitted by the data. `TypeFinder` is a secondary data structure to provide efficient /// backtracking. /// /// # Examples /// /// Iterating over the types while building a `TypeFinder`: /// /// ``` /// # use pdb::FallibleIterator; /// # /// # fn test() -> pdb::Result<usize> { /// # let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// # let mut pdb = pdb::PDB::open(file)?; /// /// let type_information = pdb.type_information()?; /// let mut type_finder = type_information.new_type_finder(); /// /// # let expected_count = type_information.len(); /// # let mut count: usize = 0; /// let mut iter = type_information.iter(); /// while let Some(typ) = iter.next()? { /// // build the type finder as we go /// type_finder.update(&iter); /// /// // parse the type record /// match typ.parse() { /// Ok(pdb::TypeData::Class(pdb::ClassType {name, properties, fields: Some(fields), ..})) => { /// // this Type describes a class-like type with fields /// println!("type {} is a class named {}", typ.type_index(), name); /// /// // `fields` is a TypeIndex which refers to a FieldList /// // To find information about the fields, find and parse that Type /// match type_finder.find(fields)?.parse()? { /// pdb::TypeData::FieldList(list) => { /// // `fields` is a Vec<TypeData> /// for field in list.fields { /// if let pdb::TypeData::Member(member) = field { /// // follow `member.field_type` as desired /// println!(" - field {} at offset {:x}", member.name, member.offset); /// } else { /// // handle member functions, nested types, etc. /// } /// } /// /// if let Some(more_fields) = list.continuation { /// // A FieldList can be split across multiple records /// // TODO: follow `more_fields` and handle the next FieldList /// } /// } /// _ => { } /// } /// /// }, /// Ok(_) => { /// // ignore everything that's not a class-like type /// }, /// Err(pdb::Error::UnimplementedTypeKind(_)) => { /// // found an unhandled type record /// // this probably isn't fatal in most use cases /// }, /// Err(e) => { /// // other error, probably is worth failing /// return Err(e); /// } /// } /// # count += 1; /// } /// /// # assert_eq!(expected_count, count); /// # Ok(count) /// # } /// # assert!(test().expect("test") > 8000); /// ``` #[derive(Debug)] pub struct TypeInformation<'t> { stream: Stream<'t>, header: Header, } impl<'t> TypeInformation<'t> { /// Returns an iterator that can traverse the type table in sequential order. pub fn iter(&self) -> TypeIter { // get a parse buffer let mut buf = self.stream.parse_buffer(); // drop the header // this can't fail; we've already read this once buf.take(self.header.header_size as usize).expect("dropping TPI header"); TypeIter{ buf: buf, type_index: self.header.minimum_type_index, } } /// Returns the number of types contained in this `TypeInformation`. /// /// Note that primitive types are not stored in the PDB file, so the number of distinct types /// reachable via this `TypeInformation` will be higher than `len()`. pub fn len(&self) -> usize { (self.header.maximum_type_index - self.header.minimum_type_index) as usize } /// Returns a `TypeFinder` with a default time-space tradeoff. /// /// The `TypeFinder` is initially empty and must be populated by iterating. pub fn new_type_finder(&self) -> TypeFinder { new_type_finder(self, 3) } } pub(crate) fn new_type_information(stream: Stream) -> Result<TypeInformation> { let h; { let mut buf = stream.parse_buffer(); h = Header::parse(&mut buf)?; } Ok(TypeInformation{ stream: stream, header: h, }) } /// This buffer is used when a `Type` refers to a primitive type. It doesn't contain anything /// type-specific, but it does parse as `raw_type() == 0xffff`, which is a reserved value. Seems /// like a reasonable thing to do. const PRIMITIVE_TYPE: &'static [u8] = b"\xff\xff"; /// Represents a type from the type table. A `Type` has been minimally processed and may not be /// correctly formed or even understood by this library. /// /// To avoid copying, `Type`s exist as references to data owned by the parent `TypeInformation`. /// Therefore, a `Type` may not outlive its parent. #[derive(Copy,Clone,PartialEq)] pub struct Type<'t>(TypeIndex, &'t [u8]); impl<'t> Type<'t> { /// Returns this type's `TypeIndex`. pub fn type_index(&self) -> TypeIndex { self.0 } /// Returns the length of this type's data in terms of bytes in the on-disk format. /// /// Types are prefixed by length, which is not included in this count. pub fn len(&self) -> usize { self.1.len() } /// Returns the kind of type identified by this `Type`. /// /// As a special case, if this `Type` is actually a primitive type, `raw_kind()` will return /// `0xffff`. #[inline] pub fn raw_kind(&self) -> u16 { debug_assert!(self.1.len() >= 2); // assemble a little-endian u16 (self.1[0] as u16) | ((self.1[1] as u16) << 8) } /// Parse this Type into a TypeData. /// /// # Errors /// /// * `Error::UnimplementedTypeKind(kind)` if the type record isn't currently understood by this /// library /// * `Error::UnexpectedEof` if the type record is malformed pub fn parse(&self) -> Result<TypeData<'t>> { if self.0 < 0x1000 { // Primitive type type_data_for_primitive(self.0) } else { let mut buf = ParseBuffer::from(self.1); parse_type_data(&mut buf) } } } impl<'t> fmt::Debug for Type<'t> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Type{{ kind: 0x{:4x} [{} bytes] }}", self.raw_kind(), self.1.len()) } } /// A `TypeFinder` is a secondary, in-memory data structure that permits efficiently finding types /// by `TypeIndex`. It starts out empty and must be populated by calling `update(&TypeIter)` while /// iterating. /// /// `TypeFinder` allocates all the memory it needs when it is first created. The footprint is /// directly proportional to the total number of types; see `TypeInformation.len()`. /// /// # Time/space trade-off /// /// The naïve approach is to store the position of each `Type` as they are covered in the stream. /// The cost is memory: namely one `u32` per `Type`. /// /// Compare this approach to a `TypeFinder` that stores the position of every Nth type. Memory /// requirements would be reduced by a factor of N in exchange for requiring an average of (N-1)/2 /// iterations per lookup. However, iteration is cheap sequential memory access, and spending less /// memory on `TypeFinder` means more of the data can fit in the cache, so this is likely a good /// trade-off for small-ish values of N. /// /// `TypeFinder` is parameterized by `shift` which controls this trade-off as powers of two: /// /// * If `shift` is 0, `TypeFinder` stores 4 bytes per `Type` and always performs direct lookups. /// * If `shift` is 1, `TypeFinder` stores 2 bytes per `Type` and averages 0.5 iterations per lookup. /// * If `shift` is 2, `TypeFinder` stores 1 byte per `Type` and averages 1.5 iterations per lookup. /// * If `shift` is 3, `TypeFinder` stores 4 bits per `Type` and averages 3.5 iterations per lookup. /// * If `shift` is 4, `TypeFinder` stores 2 bits per `Type` and averages 7.5 iterations per lookup. /// * If `shift` is 5, `TypeFinder` stores 1 bit per `Type` and averages 15.5 iterations per lookup. /// /// This list can continue but with rapidly diminishing returns. Iteration cost is proportional to /// type size, which varies, but typical numbers from a large program are: /// /// * 24% of types are 12 bytes /// * 34% of types are <= 16 bytes /// * 84% of types are <= 32 bytes /// /// A `shift` of 2 or 3 is likely appropriate for most workloads. 500K types would require 1 MB or /// 500 KB of memory respectively, and lookups -- though indirect -- would still usually need only /// one or two 64-byte cache lines. #[derive(Debug)] pub struct TypeFinder<'t> { buffer: ParseBuffer<'t>, minimum_type_index: TypeIndex, maximum_type_index: TypeIndex, positions: Vec<u32>, shift: u8, } fn new_type_finder<'b, 't: 'b>(type_info: &'b TypeInformation<'t>, shift: u8) -> TypeFinder<'b> { let count = type_info.header.maximum_type_index - type_info.header.minimum_type_index; let shifted_count = (count >> shift) as usize; let mut positions: Vec<u32> = Vec::with_capacity(shifted_count); // add record zero, which is identical regardless of shift positions.push(type_info.header.header_size); TypeFinder{ buffer: type_info.stream.parse_buffer(), minimum_type_index: type_info.header.minimum_type_index, maximum_type_index: type_info.header.maximum_type_index, positions: positions, shift: shift, } } impl<'t> TypeFinder<'t> { /// Given a `TypeIndex`, find which position in the Vec we should jump to and how many times we /// need to iterate to find the requested type. /// /// `shift` refers to the size of these bit shifts. #[inline] fn resolve(&self, type_index: TypeIndex) -> (usize, usize) { let raw = type_index - self.minimum_type_index; ( (raw >> self.shift) as usize, (raw & ((1 << self.shift) - 1)) as usize ) } /// Returns the highest `TypeIndex` which is currently served by this `TypeFinder`. /// /// In general, you shouldn't need to consider this. Types always refer to types with lower /// `TypeIndex`es, and either: /// /// * You obtained a `Type` by iterating, in which case you should be calling `update()` as you /// iterate, and in which case all types it can reference are <= `max_indexed_type()`, or /// * You got a `Type` from this `TypeFinder`, in which case all types it can reference are /// still <= `max_indexed_type()`. /// #[inline] pub fn max_indexed_type(&self) -> TypeIndex { (self.positions.len() << self.shift) as TypeIndex + self.minimum_type_index - 1 } /// Update this `TypeFinder` based on the current position of a `TypeIter`. /// /// Do this each time you call `.next()`. #[inline] pub fn update(&mut self, iterator: &TypeIter) { let (vec_index, iteration_count) = self.resolve(iterator.type_index); if iteration_count == 0 && vec_index == self.positions.len() { let pos = iterator.buf.pos(); assert!(pos < u32::max_value() as usize); self.positions.push(pos as u32); } } /// Find a type by `TypeIndex`. /// /// # Errors /// /// * `Error::TypeNotFound(type_index)` if you ask for a type that doesn't exist /// * `Error::TypeNotIndexed(type_index, max_indexed_type)` if you ask for a type that is known /// to exist but is not currently known by this `TypeFinder`. pub fn find(&self, type_index: TypeIndex) -> Result<Type<'t>> { if type_index < self.minimum_type_index { return Ok(Type(type_index, PRIMITIVE_TYPE)); } else if type_index > self.maximum_type_index { return Err(Error::TypeNotFound(type_index)); } // figure out where we'd find this let (vec_index, iteration_count) = self.resolve(type_index); if let Some(pos) = self.positions.get(vec_index) { // hit let mut buf = self.buffer.clone(); // jump forwards buf.take(*pos as usize)?; // skip some records for _ in 0..iteration_count { let length = buf.parse_u16()?; buf.take(length as usize)?; } // read the type let length = buf.parse_u16()?; Ok(Type(type_index, buf.take(length as usize)?)) } else { // miss Err(Error::TypeNotIndexed(type_index, self.max_indexed_type())) } } } /// A `TypeIter` iterates over a `TypeInformation`, producing `Types`s. /// /// Type streams are represented internally as a series of records, each of which have a length, a /// type, and a type-specific field layout. Iteration performance is therefore similar to a linked /// list. #[derive(Debug)] pub struct TypeIter<'t> { buf: ParseBuffer<'t>, type_index: TypeIndex, } impl<'t> FallibleIterator for TypeIter<'t> { type Item = Type<'t>; type Error = Error; fn next(&mut self) -> result::Result<Option<Self::Item>, Self::Error> { // see if we're at EOF if self.buf.len() == 0 { return Ok(None); } // read the length of the next type let length = self.buf.parse_u16()? as usize; // validate if length < 2 { // this can't be correct return Err(Error::TypeTooShort); } // grab the type itself let type_buf = self.buf.take(length)?; let my_type_index = self.type_index; self.type_index += 1; // Done Ok(Some(Type(my_type_index, type_buf))) } }