ms_pdb/tpi.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
//! Type Information Stream (TPI)
//!
//! Layout of a Type Stream:
//!
//! * `TypeStreamHeader` - specifies lots of important parameters
//! * Type Record Data
//!
//! Each Type Stream may also have an associated Type Hash Stream. The Type Hash Stream contains
//! indexing information that helps find records within the main Type Stream. The Type Stream
//! Header specifies several parameters that are needed for finding and decoding the Type Hash
//! Stream.
//!
//! The Type Hash Stream contains:
//!
//! * Hash Value Buffer: Contains a list of hash values, one for each Type Record in the
//! Type Stream.
//!
//! The offset and size of the Hash Value Buffer is specified in the `TypeStreamHeader`, in the
//! `hash_value_buffer_offset` and `hash_value_buffer_length` fields, respectively.
//!
//! It should be assumed that there are either 0 hash values, or a number equal to the number of
//! type records in the TPI stream (`type_index_end - type_end_begin`). Thus, if
//! `hash_value_buffer_length` is not equal to `(type_index_end - type_end_begin) * hash_key_size`
//! we can consider the PDB malformed.
//!
//! * Type Index Offset Buffer - A list of pairs of `u32` values where the first is a Type Index
//! and the second is the offset within Type Record Data of the type with this index.
//! This enables a binary search to find a given Type Index record.
//!
//! The offset and size of the Type Index Offset Buffer is specified in the `TypeStreamHeader`,
//! in the `index_offset_buffer_offset` and `index_offset_buffer_length` fields, respectively.
//!
//! * Hash Adjustment Buffer - A hash table whose keys are the hash values in the hash value
//! buffer and whose values are type indices.
//!
//! The offset and size of the Type Index Offset BUffer is specified in the `TypeStreamHeader`,
//! in the `index_offset_buffer_offset` and `index_offset_buffer_length` fields, respectively.
//!
pub mod hash;
use super::*;
use crate::parser::Parser;
use crate::types::fields::{Field, IterFields};
use crate::types::{build_types_starts, TypeData, TypeIndex, TypeIndexLe, TypeRecord, TypesIter};
use anyhow::bail;
use std::fmt::Debug;
use std::mem::size_of;
use std::ops::Range;
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, I32, LE, U32};
/// The header of the TPI stream.
#[allow(missing_docs)]
#[derive(Clone, Eq, PartialEq, IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
#[repr(C)]
pub struct TypeStreamHeader {
pub version: U32<LE>,
pub header_size: U32<LE>,
pub type_index_begin: TypeIndexLe,
pub type_index_end: TypeIndexLe,
/// The number of bytes of type record data following the `TypeStreamHeader`.
pub type_record_bytes: U32<LE>,
pub hash_stream_index: StreamIndexU16,
pub hash_aux_stream_index: StreamIndexU16,
/// The size of each hash key in the Hash Value Substream. For the current version of TPI,
/// this value should always be 4.
pub hash_key_size: U32<LE>,
/// The number of hash buckets. This is used when calculating the record hashes. Each hash
/// is computed, and then it is divided by num_hash_buckets and the remainder becomes the
/// final hash.
///
/// If `hash_value_buffer_length` is non-zero, then `num_hash_buckets` must also be non-zero.
pub num_hash_buckets: U32<LE>,
pub hash_value_buffer_offset: I32<LE>,
pub hash_value_buffer_length: U32<LE>,
pub index_offset_buffer_offset: I32<LE>,
pub index_offset_buffer_length: U32<LE>,
pub hash_adj_buffer_offset: I32<LE>,
pub hash_adj_buffer_length: U32<LE>,
}
impl TypeStreamHeader {
/// Makes an empty one
pub fn empty() -> Self {
Self {
version: Default::default(),
header_size: U32::new(size_of::<TypeStreamHeader>() as u32),
type_index_begin: TypeIndexLe(U32::new(TypeIndex::MIN_BEGIN.0)),
type_index_end: TypeIndexLe(U32::new(TypeIndex::MIN_BEGIN.0)),
type_record_bytes: Default::default(),
hash_stream_index: StreamIndexU16::NIL,
hash_aux_stream_index: StreamIndexU16::NIL,
hash_key_size: Default::default(),
num_hash_buckets: Default::default(),
hash_value_buffer_offset: Default::default(),
hash_value_buffer_length: Default::default(),
index_offset_buffer_offset: Default::default(),
index_offset_buffer_length: Default::default(),
hash_adj_buffer_offset: Default::default(),
hash_adj_buffer_length: Default::default(),
}
}
}
/// The size of the `TpiStreamHeader` structure.
pub const TPI_STREAM_HEADER_LEN: usize = size_of::<TypeStreamHeader>();
/// The expected value of `TypeStreamHeader::version`.
pub const TYPE_STREAM_VERSION_2004: u32 = 20040203;
/// Contains a TPI Stream or IPI Stream.
pub struct TypeStream<StreamData>
where
StreamData: AsRef<[u8]>,
{
/// The stream data. This contains the entire type stream, including header and type records.
pub stream_data: StreamData,
type_index_begin: TypeIndex,
type_index_end: TypeIndex,
/// A starts vector for type record offsets. This is created on-demand, since many users of
/// `TypeStream` do not need this.
record_starts: OnceCell<Vec<u32>>,
}
/// Distinguishes the TPI and IPI streams.
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum TypeStreamKind {
/// The primary type stream
TPI,
/// The ID stream
IPI,
}
impl TypeStreamKind {
/// Get the stream index. Fortunately, the stream indexes are fixed.
pub fn stream(self) -> Stream {
match self {
Self::IPI => Stream::IPI,
Self::TPI => Stream::TPI,
}
}
}
/// Represents an entry in the Hash Index Offset Substream.
#[repr(C)]
#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
pub struct HashIndexPair {
/// The type index at the start of this range.
pub type_index: TypeIndexLe,
/// The offset within the Type Records Substream (not the entire Type Stream) where this
/// record begins.
pub offset: U32<LE>,
}
impl<StreamData> TypeStream<StreamData>
where
StreamData: AsRef<[u8]>,
{
/// Gets a reference to the stream header
pub fn header(&self) -> Option<&TypeStreamHeader> {
let stream_data: &[u8] = self.stream_data.as_ref();
let (header, _) = TypeStreamHeader::ref_from_prefix(stream_data).ok()?;
Some(header)
}
/// Returns the version of the stream, or `TYPE_STREAM_VERSION_2004` if this is an empty stream.
pub fn version(&self) -> u32 {
if let Some(header) = self.header() {
header.version.get()
} else {
TYPE_STREAM_VERSION_2004
}
}
/// Returns the stream index of the related hash stream, if any.
pub fn hash_stream(&self) -> Option<u32> {
self.header()?.hash_stream_index.get()
}
/// Checks whether this is a degenerate empty stream.
pub fn is_empty(&self) -> bool {
self.stream_data.as_ref().is_empty()
}
/// Gets a mutable reference to the stream header
pub fn header_mut(&mut self) -> Option<&mut TypeStreamHeader>
where
StreamData: AsMut<[u8]>,
{
let (header, _) = TypeStreamHeader::mut_from_prefix(self.stream_data.as_mut()).ok()?;
Some(header)
}
/// The type index of the first type record.
pub fn type_index_begin(&self) -> TypeIndex {
self.type_index_begin
}
/// The type index of the last type record, plus 1.
pub fn type_index_end(&self) -> TypeIndex {
self.type_index_end
}
/// The number of types defined in the type stream.
pub fn num_types(&self) -> u32 {
self.type_index_end.0 - self.type_index_begin.0
}
/// Gets the byte offset within the stream of the record data.
pub fn records_offset(&self) -> usize {
if let Some(header) = self.header() {
header.header_size.get() as usize
} else {
0
}
}
/// Returns the encoded type records found in the TPI or IPI stream.
///
/// The type records immediately follow the type stream. The length is given by the
/// header field type_record_bytes. The values in the header were validated in
/// read_tpi_or_ipi_stream(), so we do not need to check them again, here.
pub fn type_records_bytes(&self) -> &[u8] {
let records_range = self.type_records_range();
if records_range.is_empty() {
&[]
} else {
&self.stream_data.as_ref()[records_range]
}
}
/// Returns the encoded type records found in the type stream.
pub fn type_records_bytes_mut(&mut self) -> &mut [u8]
where
StreamData: AsMut<[u8]>,
{
let records_range = self.type_records_range();
if records_range.is_empty() {
&mut []
} else {
&mut self.stream_data.as_mut()[records_range]
}
}
/// Returns the byte range of the encoded type records found in the type stream.
pub fn type_records_range(&self) -> std::ops::Range<usize> {
if let Some(header) = self.header() {
let size = header.type_record_bytes.get();
if size == 0 {
return 0..0;
}
let type_records_start = header.header_size.get();
let type_records_end = type_records_start + size;
type_records_start as usize..type_records_end as usize
} else {
0..0
}
}
/// Iterates the types contained within this type stream.
pub fn iter_type_records(&self) -> TypesIter<'_> {
TypesIter::new(self.type_records_bytes())
}
/// Parses the header of a Type Stream and validates it.
pub fn parse(stream_index: Stream, stream_data: StreamData) -> anyhow::Result<Self> {
let stream_bytes: &[u8] = stream_data.as_ref();
if stream_bytes.is_empty() {
return Ok(Self {
stream_data,
type_index_begin: TypeIndex::MIN_BEGIN,
type_index_end: TypeIndex::MIN_BEGIN,
record_starts: OnceCell::new(),
});
}
let mut p = Parser::new(stream_bytes);
let tpi_stream_header: TypeStreamHeader = p.copy()?;
let type_index_begin = tpi_stream_header.type_index_begin.get();
let type_index_end = tpi_stream_header.type_index_end.get();
if type_index_end < type_index_begin {
bail!(
"Type stream (stream {stream_index}) has invalid values in header. \
The type_index_begin field is greater than the type_index_end field."
);
}
if type_index_begin < TypeIndex::MIN_BEGIN {
bail!(
"The Type Stream has an invalid value for type_index_begin ({type_index_begin:?}). \
It is less than the minimum required value ({}).",
TypeIndex::MIN_BEGIN.0
);
}
let type_data_start = tpi_stream_header.header_size.get();
if type_data_start < TPI_STREAM_HEADER_LEN as u32 {
bail!(
"Type stream (stream {stream_index}) has invalid values in header. \
The header_size field is smaller than the definition of the actual header."
);
}
let type_data_end = type_data_start + tpi_stream_header.type_record_bytes.get();
if type_data_end > stream_bytes.len() as u32 {
bail!(
"Type stream (stream {stream_index}) has invalid values in header. \
The header_size and type_record_bytes fields exceed the size of the stream."
);
}
Ok(TypeStream {
stream_data,
type_index_begin,
type_index_end,
record_starts: OnceCell::new(),
})
}
/// Builds a "starts" table that gives the starting location of each type record.
pub fn build_types_starts(&self) -> TypeIndexMap {
let starts =
crate::types::build_types_starts(self.num_types() as usize, self.type_records_bytes());
TypeIndexMap {
type_index_begin: self.type_index_begin,
type_index_end: self.type_index_end,
starts,
}
}
/// Creates a new `TypeStream` that referenced the stream data of this `TypeStream`.
/// This is typically used for temporarily creating a `TypeStream<&[u8]>` from a
/// `TypeStream<Vec<u8>>`.
pub fn to_ref(&self) -> TypeStream<&[u8]> {
TypeStream {
stream_data: self.stream_data.as_ref(),
type_index_begin: self.type_index_begin,
type_index_end: self.type_index_end,
record_starts: OnceCell::new(),
}
}
/// Gets the "starts" vector for the byte offsets of the records in this `TypeStream`.
///
/// This function will create the starts vector on-demand.
pub fn record_starts(&self) -> &[u32] {
self.record_starts.get_or_init(|| {
let type_records = self.type_records_bytes();
build_types_starts(self.num_types() as usize, type_records)
})
}
/// Returns `true` if `type_index` refers to a primitive type.
pub fn is_primitive(&self, type_index: TypeIndex) -> bool {
type_index < self.type_index_begin
}
/// Retrieves the type record identified by `type_index`.
///
/// This should only be used for non-primitive `TypeIndex` values. If this is called with a
/// primitive `TypeIndex` then it will return `Err`.
pub fn record(&self, type_index: TypeIndex) -> anyhow::Result<TypeRecord<'_>> {
let Some(relative_type_index) = type_index.0.checked_sub(self.type_index_begin.0) else {
bail!("The given TypeIndex is a primitive type index, not a type record.");
};
let starts = self.record_starts();
let Some(&record_start) = starts.get(relative_type_index as usize) else {
bail!("The given TypeIndex is out of bounds (exceeds maximum allowed TypeIndex)");
};
let all_type_records = self.type_records_bytes();
let Some(this_type_record_slice) = all_type_records.get(record_start as usize..) else {
// This should never happen, but let's be cautious.
bail!("Internal error: record offset is out of range.");
};
let mut iter = TypesIter::new(this_type_record_slice);
if let Some(record) = iter.next() {
Ok(record)
} else {
bail!("Failed to decode type record");
}
}
/// Iterate the fields of an `LF_STRUCTURE`, `LF_CLASS`, `LF_ENUM`, etc. This correctly
/// iterates across chains of `LF_FIELDLIST`.
pub fn iter_fields(&self, field_list: TypeIndex) -> IterFieldChain<'_, StreamData> {
// We initialize `fields` to an empty iterator so that the first iteration of
// IterFieldChain::next() will find no records and will then check next_field_list.
IterFieldChain {
type_stream: self,
next_field_list: if field_list.0 != 0 {
Some(field_list)
} else {
None
},
fields: IterFields { bytes: &[] },
}
}
}
/// Iterator state for `iter_fields`
pub struct IterFieldChain<'a, StreamData>
where
StreamData: AsRef<[u8]>,
{
/// The current `LF_FIELDLIST` record that we are decoding.
fields: IterFields<'a>,
/// Allows us to read `LF_FIELDLIST` records.
type_stream: &'a TypeStream<StreamData>,
/// The pointer to the next `LF_FIELDLIST` that we will decode.
next_field_list: Option<TypeIndex>,
}
impl<'a, StreamData> Iterator for IterFieldChain<'a, StreamData>
where
StreamData: AsRef<[u8]>,
{
type Item = Field<'a>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(field) = self.fields.next() {
if let Field::Index(index) = &field {
// The full field list is split across more than one LF_FIELDLIST record.
// Store the link to the next field list and do not return this item to the caller.
self.next_field_list = Some(*index);
continue;
}
return Some(field);
}
// We have run out of fields in the current LF_FIELDLIST record.
// See if there is a pointer to another LF_FIELDLIST.
let next_field_list = self.next_field_list.take()?;
let next_record = self.type_stream.record(next_field_list).ok()?;
match next_record.parse().ok()? {
TypeData::FieldList(fl) => {
// Restart iteration on the new field list.
self.fields = fl.iter();
}
_ => {
// Wrong record type!
return None;
}
}
}
}
}
impl<F: ReadAt> crate::Pdb<F> {
/// Reads the TPI stream.
pub fn read_type_stream(&self) -> anyhow::Result<TypeStream<Vec<u8>>> {
self.read_tpi_or_ipi_stream(Stream::TPI)
}
/// Reads the IPI stream.
pub fn read_ipi_stream(&self) -> anyhow::Result<TypeStream<Vec<u8>>> {
self.read_tpi_or_ipi_stream(Stream::IPI)
}
/// Reads the TPI or IPI stream.
pub fn read_tpi_or_ipi_stream(
&self,
stream_index: Stream,
) -> anyhow::Result<TypeStream<Vec<u8>>> {
let stream_data = self.read_stream_to_vec(stream_index.into())?;
TypeStream::parse(stream_index, stream_data)
}
}
/// Maps `TypeIndex` values to the byte range of records within a type stream.
pub struct TypeIndexMap {
/// Copied from type stream header.
pub type_index_begin: TypeIndex,
/// Copied from type stream header.
pub type_index_end: TypeIndex,
/// Contains a "starts" vector for the byte offsets of each type record.
///
/// This vector has an additional value at the end, which gives the size in bytes of the
/// type stream.
pub starts: Vec<u32>,
}
impl TypeIndexMap {
/// Tests whether a `TypeIndex` is a primitive type.
pub fn is_primitive(&self, ti: TypeIndex) -> bool {
ti < self.type_index_begin
}
/// Given a `TypeIndex`, returns the byte range within a `TypeStream` where that record
/// is stored.
///
/// If `ti` is a primitive type then this function will return `Err`. The caller should
/// use the `is_primitive` method to check whether a `TypeIndex` is a primitive type.
pub fn record_range(&self, ti: TypeIndex) -> anyhow::Result<Range<usize>> {
let Some(i) = ti.0.checked_sub(self.type_index_begin.0) else {
bail!("The TypeIndex is a primitive type, not a type record.");
};
if let Some(w) = self.starts.get(i as usize..i as usize + 2) {
Ok(w[0] as usize..w[1] as usize)
} else {
bail!("The TypeIndex is out of range.");
}
}
}
/// Represents the cached state of a Type Stream header.
pub struct CachedTypeStreamHeader {
pub(crate) header: Option<TypeStreamHeader>,
}
impl CachedTypeStreamHeader {
/// Gets direct access to the type stream header, if any.
pub fn header(&self) -> Option<&TypeStreamHeader> {
self.header.as_ref()
}
/// Gets the beginning of the type index space, or `TypeIndex::MIN_BEGIN` if this type stream
/// does not contain any data.
pub fn type_index_begin(&self) -> TypeIndex {
if let Some(h) = &self.header {
h.type_index_begin.get()
} else {
TypeIndex::MIN_BEGIN
}
}
/// Gets the end of the type index space, or `TypeIndex::MIN_BEGIN` if this type stream does
/// not contain any data.
pub fn type_index_end(&self) -> TypeIndex {
if let Some(h) = &self.header {
h.type_index_end.get()
} else {
TypeIndex::MIN_BEGIN
}
}
}