1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. use std::fmt; use std::marker::PhantomData; use std::result; use crate::common::*; use crate::msf::Stream; use crate::FallibleIterator; pub(crate) mod constants; mod data; mod header; mod id; mod primitive; use self::header::*; use self::primitive::type_data_for_primitive; pub use self::data::*; pub use self::id::*; pub use self::primitive::{Indirection, PrimitiveKind, PrimitiveType}; /// Zero-copy access to a PDB type or id stream. /// /// PDBs store two kinds of related streams with an identical internal structure: /// /// - [`TypeInformation`] (TPI stream) contains information on primitive types, classes and /// procedures, including their return type and arguments. Its contents are identified by /// [`TypeIndex`]. /// - [`IdInformation`] (IPI stream) is a stricter version of the above stream that contains inline /// functions, build infos and source references. Its contents are identified by [`IdIndex`]. /// /// Items in these streams are stored by their index in ascending order. Symbols declared in /// [`ModuleInfo`](crate::ModuleInfo) can refer to items in both streams, as well as items to other /// items with one exception: `Type`s cannot refer to `Id`s. Also, the PDB format requires that /// items refer only to types with lower indexes. Thus, the stream of items forms a directed acyclic /// graph. /// /// Both streams can iterate by their index using [`ItemInformation::iter`]. Additionally, /// [`ItemFinder`] is a secondary data structure to provide efficient backtracking for random /// access. /// /// There are type definitions for both streams: /// /// - `ItemInformation`: [`TypeInformation`] and [`IdInformation`] /// - [`ItemFinder`]: [`TypeFinder`] and [`IdFinder`] /// - [`ItemIndex`]: [`TypeIndex`] and [`IdIndex`] /// - [`ItemIter`]: [`TypeIter`] and [`IdIter`] /// - [`Item`]: [`Type`] and [`Id`] /// /// # Examples /// /// Iterating over the types while building a `TypeFinder`: /// /// ``` /// # use pdb::FallibleIterator; /// # /// # fn test() -> pdb::Result<usize> { /// # let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// # let mut pdb = pdb::PDB::open(file)?; /// /// let type_information = pdb.type_information()?; /// let mut type_finder = type_information.finder(); /// /// # let expected_count = type_information.len(); /// # let mut count: usize = 0; /// let mut iter = type_information.iter(); /// while let Some(typ) = iter.next()? { /// // build the type finder as we go /// type_finder.update(&iter); /// /// // parse the type record /// match typ.parse() { /// Ok(pdb::TypeData::Class(pdb::ClassType {name, properties, fields: Some(fields), ..})) => { /// // this Type describes a class-like type with fields /// println!("type {} is a class named {}", typ.index(), name); /// /// // `fields` is a TypeIndex which refers to a FieldList /// // To find information about the fields, find and parse that Type /// match type_finder.find(fields)?.parse()? { /// pdb::TypeData::FieldList(list) => { /// // `fields` is a Vec<TypeData> /// for field in list.fields { /// if let pdb::TypeData::Member(member) = field { /// // follow `member.field_type` as desired /// println!(" - field {} at offset {:x}", member.name, member.offset); /// } else { /// // handle member functions, nested types, etc. /// } /// } /// /// if let Some(more_fields) = list.continuation { /// // A FieldList can be split across multiple records /// // TODO: follow `more_fields` and handle the next FieldList /// } /// } /// _ => { } /// } /// /// }, /// Ok(_) => { /// // ignore everything that's not a class-like type /// }, /// Err(pdb::Error::UnimplementedTypeKind(_)) => { /// // found an unhandled type record /// // this probably isn't fatal in most use cases /// }, /// Err(e) => { /// // other error, probably is worth failing /// return Err(e); /// } /// } /// # count += 1; /// } /// /// # assert_eq!(expected_count, count); /// # Ok(count) /// # } /// # assert!(test().expect("test") > 8000); /// ``` #[derive(Debug)] pub struct ItemInformation<'s, I> { stream: Stream<'s>, header: Header, _ph: PhantomData<&'s I>, } impl<'s, I> ItemInformation<'s, I> where I: ItemIndex, { /// Parses `TypeInformation` from raw stream data. pub(crate) fn parse(stream: Stream<'s>) -> Result<Self> { let mut buf = stream.parse_buffer(); let header = Header::parse(&mut buf)?; let _ph = PhantomData; Ok(Self { stream, header, _ph, }) } /// Returns an iterator that can traverse the type table in sequential order. pub fn iter(&self) -> ItemIter<'_, I> { // get a parse buffer let mut buf = self.stream.parse_buffer(); // drop the header // this can't fail; we've already read this once buf.take(self.header.header_size as usize) .expect("dropping TPI header"); ItemIter { buf, index: self.header.minimum_index, _ph: PhantomData, } } /// Returns the number of items contained in this `ItemInformation`. /// /// Note that in the case of the type stream ([`TypeInformation`]) primitive types are not /// stored in the PDB file. The number of distinct types reachable via this table will be higher /// than `len()`. pub fn len(&self) -> usize { (self.header.maximum_index - self.header.minimum_index) as usize } /// Returns whether this `ItemInformation` contains any data. pub fn is_empty(&self) -> bool { self.len() == 0 } /// Returns an `ItemFinder` with a default time-space tradeoff useful for access by /// [`ItemIndex`]. /// /// The `ItemFinder` is initially empty and must be populated by iterating. See the struct-level /// docs for an example. pub fn finder(&self) -> ItemFinder<'_, I> { ItemFinder::new(self, 3) } } /// This buffer is used when a `Type` refers to a primitive type. It doesn't contain anything /// type-specific, but it does parse as `raw_type() == 0xffff`, which is a reserved value. Seems /// like a reasonable thing to do. const PRIMITIVE_TYPE: &[u8] = b"\xff\xff"; /// Represents an entry in the type or id stream. /// /// An `Item` has been minimally processed and may not be correctly formed or even understood by /// this library. To avoid copying, `Items`s exist as references to data owned by the parent /// `ItemInformation`. Therefore, an `Item` may not outlive its parent. /// /// The data held by items can be parsed: /// /// - [`Type::parse`](Self::parse) returns [`TypeData`]. /// - [`Id::parse`](Self::parse) returns [`IdData`]. /// /// Depending on the stream, this can either be a [`Type`] or [`Id`]. #[derive(Copy, Clone, PartialEq)] pub struct Item<'t, I> { index: I, data: &'t [u8], } impl<'t, I> Item<'t, I> where I: ItemIndex, { /// Returns this item's index. /// /// Depending on the stream, either a [`TypeIndex`] or [`IdIndex`]. pub fn index(&self) -> I { self.index } /// Returns the the binary data length in the on-disk format. /// /// Items are prefixed by a 16-bit length number, which is not included in this length. pub fn len(&self) -> usize { self.data.len() } /// Returns whether this items's data is empty. /// /// Items are prefixed by a 16-bit length number, which is not included in this operation. pub fn is_empty(&self) -> bool { self.data.is_empty() } /// Returns the identifier of the kind of data stored by this this `Item`. /// /// As a special case, if this is a primitive [`Type`], this function will return `0xffff`. #[inline] pub fn raw_kind(&self) -> u16 { debug_assert!(self.data.len() >= 2); // assemble a little-endian u16 u16::from(self.data[0]) | (u16::from(self.data[1]) << 8) } } impl<'t, I> fmt::Debug for Item<'t, I> where I: ItemIndex, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "Type{{ kind: 0x{:04x} [{} bytes] }}", self.raw_kind(), self.data.len() ) } } /// In-memory index for efficient random-access to [`Item`]s by index. /// /// `ItemFinder` can be obtained via [`ItemInformation::finder`]. It starts out empty and must be /// populated by calling [`ItemFinder::update`] while iterating. There are two typedefs for easier /// use: /// /// - [`TypeFinder`] for finding [`Type`]s in a [`TypeInformation`](crate::TypeInformation) (TPI stream). /// - [`IdFinder`] for finding [`Id`]s in a [`IdInformation`](crate::IdInformation) (IPI stream). /// /// `ItemFinder` allocates all the memory it needs when it is first created. The footprint is /// directly proportional to the total number of types; see [`ItemInformation::len`]. /// /// # Time/space trade-off /// /// The naïve approach is to store the position of each `Item` as they are covered in the stream. /// The cost is memory: namely one `u32` per `Item`. /// /// Compare this approach to an `ItemFinder` that stores the position of every Nth item. Memory /// requirements would be reduced by a factor of N in exchange for requiring an average of (N-1)/2 /// iterations per lookup. However, iteration is cheap sequential memory access, and spending less /// memory on `ItemFinder` means more of the data can fit in the cache, so this is likely a good /// trade-off for small-ish values of N. /// /// `ItemFinder` is parameterized by `shift` which controls this trade-off as powers of two: /// /// * If `shift` is 0, `ItemFinder` stores 4 bytes per `Item` and always performs direct lookups. /// * If `shift` is 1, `ItemFinder` stores 2 bytes per `Item` and averages 0.5 iterations per /// lookup. /// * If `shift` is 2, `ItemFinder` stores 1 byte per `Item` and averages 1.5 iterations per /// lookup. /// * If `shift` is 3, `ItemFinder` stores 4 bits per `Item` and averages 3.5 iterations per /// lookup. /// * If `shift` is 4, `ItemFinder` stores 2 bits per `Item` and averages 7.5 iterations per /// lookup. /// * If `shift` is 5, `ItemFinder` stores 1 bit per `Item` and averages 15.5 iterations per /// lookup. /// /// This list can continue but with rapidly diminishing returns. Iteration cost is proportional to /// item size, which varies, but typical numbers from a large program are: /// /// * 24% of items are 12 bytes /// * 34% of items are <= 16 bytes /// * 84% of items are <= 32 bytes /// /// A `shift` of 2 or 3 is likely appropriate for most workloads. 500K items would require 1 MB or /// 500 KB of memory respectively, and lookups -- though indirect -- would still usually need only /// one or two 64-byte cache lines. #[derive(Debug)] pub struct ItemFinder<'t, I> { buffer: ParseBuffer<'t>, minimum_index: u32, maximum_index: u32, positions: Vec<u32>, shift: u8, _ph: PhantomData<&'t I>, } impl<'t, I> ItemFinder<'t, I> where I: ItemIndex, { fn new(info: &'t ItemInformation<'_, I>, shift: u8) -> Self { // maximum index is the highest index + 1. let count = info.header.maximum_index - info.header.minimum_index; let round_base = (1 << shift) - 1; let shifted_count = ((count + round_base) & !round_base) >> shift; let mut positions = Vec::with_capacity(shifted_count as usize); if shifted_count > 0 { // add record zero, which is identical regardless of shift positions.push(info.header.header_size); } Self { buffer: info.stream.parse_buffer(), minimum_index: info.header.minimum_index, maximum_index: info.header.maximum_index, positions, shift, _ph: PhantomData, } } /// Given an index, find which position in the Vec we should jump to and how many times we /// need to iterate to find the requested type. /// /// `shift` refers to the size of these bit shifts. #[inline] fn resolve(&self, type_index: u32) -> (usize, usize) { let raw = type_index - self.minimum_index; ( (raw >> self.shift) as usize, (raw & ((1 << self.shift) - 1)) as usize, ) } /// Returns the highest index which is currently served by this `ItemFinder`. /// /// When iterating through the stream, you shouldn't need to consider this. Items only ever /// reference lower indexes. However, when loading items referenced by the symbols stream, this /// can be useful to check whether iteration is required. #[inline] pub fn max_index(&self) -> I { I::from(match self.positions.len() { 0 => 0, // special case for an empty type index len => (len << self.shift) as u32 + self.minimum_index - 1, }) } /// Update this `ItemFinder` based on the current position of a [`ItemIter`]. /// /// Do this each time you call `.next()`. See documentation of [`ItemInformation`] for an /// example. #[inline] pub fn update(&mut self, iterator: &ItemIter<'t, I>) { let (vec_index, iteration_count) = self.resolve(iterator.index); if iteration_count == 0 && vec_index == self.positions.len() { let pos = iterator.buf.pos(); assert!(pos < u32::max_value() as usize); self.positions.push(pos as u32); } } /// Find an `Item` by its index. /// /// # Errors /// /// * `Error::TypeNotFound(index)` if you ask for an item that doesn't exist. /// * `Error::TypeNotIndexed(index, max_index)` if you ask for an item that is known to exist /// but is not currently known by this `ItemFinder`. pub fn find(&self, index: I) -> Result<Item<'t, I>> { let index: u32 = index.into(); if index < self.minimum_index { return Ok(Item { index: I::from(index), data: PRIMITIVE_TYPE, }); } else if index > self.maximum_index { return Err(Error::TypeNotFound(index)); } // figure out where we'd find this let (vec_index, iteration_count) = self.resolve(index); if let Some(pos) = self.positions.get(vec_index) { // hit let mut buf = self.buffer.clone(); // jump forwards buf.take(*pos as usize)?; // skip some records for _ in 0..iteration_count { let length = buf.parse_u16()?; buf.take(length as usize)?; } // read the type let length = buf.parse_u16()?; Ok(Item { index: I::from(index), data: buf.take(length as usize)?, }) } else { // miss Err(Error::TypeNotIndexed(index, self.max_index().into())) } } } /// An iterator over items in [`TypeInformation`](crate::TypeInformation) or /// [`IdInformation`](crate::IdInformation). /// /// The TPI and IPI streams are represented internally as a series of records, each of which have a /// length, a kind, and a type-specific field layout. Iteration performance is therefore similar to /// a linked list. #[derive(Debug)] pub struct ItemIter<'t, I> { buf: ParseBuffer<'t>, index: u32, _ph: PhantomData<&'t I>, } impl<'t, I> FallibleIterator for ItemIter<'t, I> where I: ItemIndex, { type Item = Item<'t, I>; type Error = Error; fn next(&mut self) -> result::Result<Option<Self::Item>, Self::Error> { // see if we're at EOF if self.buf.is_empty() { return Ok(None); } // read the length of the next type let length = self.buf.parse_u16()? as usize; // validate if length < 2 { // this can't be correct return Err(Error::TypeTooShort); } // grab the type itself let type_buf = self.buf.take(length)?; let index = self.index; self.index += 1; // Done Ok(Some(Item { index: I::from(index), data: type_buf, })) } } /// Zero-copy access to the PDB type stream (TPI). /// /// This stream exposes types, the variants of which are enumerated by [`TypeData`]. See /// [`ItemInformation`] for more information on accessing types. pub type TypeInformation<'s> = ItemInformation<'s, TypeIndex>; /// In-memory index for efficient random-access of [`Type`]s by index. /// /// `TypeFinder` can be obtained via [`TypeInformation::finder`](ItemInformation::finder). See /// [`ItemFinder`] for more information. pub type TypeFinder<'t> = ItemFinder<'t, TypeIndex>; /// An iterator over [`Type`]s returned by [`TypeInformation::iter`](ItemInformation::iter). pub type TypeIter<'t> = ItemIter<'t, TypeIndex>; /// Information on a primitive type, class, or procedure. pub type Type<'t> = Item<'t, TypeIndex>; impl<'t> Item<'t, TypeIndex> { /// Parse this `Type` into `TypeData`. /// /// # Errors /// /// * `Error::UnimplementedTypeKind(kind)` if the type record isn't currently understood by this /// library /// * `Error::UnexpectedEof` if the type record is malformed pub fn parse(&self) -> Result<TypeData<'t>> { if self.index < TypeIndex(0x1000) { // Primitive type type_data_for_primitive(self.index) } else { let mut buf = ParseBuffer::from(self.data); parse_type_data(&mut buf) } } } /// Zero-copy access to the PDB type stream (TPI). /// /// This stream exposes types, the variants of which are enumerated by [`IdData`]. See /// [`ItemInformation`] for more information on accessing types. pub type IdInformation<'s> = ItemInformation<'s, IdIndex>; /// In-memory index for efficient random-access of [`Id`]s by index. /// /// `IdFinder` can be obtained via [`IdInformation::finder`](ItemInformation::finder). See /// [`ItemFinder`] for more information. pub type IdFinder<'t> = ItemFinder<'t, IdIndex>; /// An iterator over [`Id`]s returned by [`IdInformation::iter`](ItemInformation::iter). pub type IdIter<'t> = ItemIter<'t, IdIndex>; /// Information on an inline function, build infos or source references. pub type Id<'t> = Item<'t, IdIndex>; impl<'t> Item<'t, IdIndex> { /// Parse this `Id` into `IdData`. /// /// # Errors /// /// * `Error::UnimplementedTypeKind(kind)` if the id record isn't currently understood by this /// library /// * `Error::UnexpectedEof` if the id record is malformed pub fn parse(&self) -> Result<IdData<'t>> { ParseBuffer::from(self.data).parse() } }