ms_pdb/
tpi.rs

1//! Type Information Stream (TPI)
2//!
3//! Layout of a Type Stream:
4//!
5//! * `TypeStreamHeader` - specifies lots of important parameters
6//! * Type Record Data
7//!
8//! Each Type Stream may also have an associated Type Hash Stream. The Type Hash Stream contains
9//! indexing information that helps find records within the main Type Stream. The Type Stream
10//! Header specifies several parameters that are needed for finding and decoding the Type Hash
11//! Stream.
12//!
13//! The Type Hash Stream contains:
14//!
15//! * Hash Value Buffer: Contains a list of hash values, one for each Type Record in the
16//!   Type Stream.
17//!
18//!   The offset and size of the Hash Value Buffer is specified in the `TypeStreamHeader`, in the
19//!   `hash_value_buffer_offset` and `hash_value_buffer_length` fields, respectively.
20//!
21//!   It should be assumed that there are either 0 hash values, or a number equal to the number of
22//!   type records in the TPI stream (`type_index_end - type_end_begin`). Thus, if
23//!   `hash_value_buffer_length` is not equal to `(type_index_end - type_end_begin) * hash_key_size`
24//!   we can consider the PDB malformed.
25//!
26//! * Type Index Offset Buffer - A list of pairs of `u32` values where the first is a Type Index
27//!   and the second is the offset within Type Record Data of the type with this index.
28//!   This enables a binary search to find a given Type Index record.
29//!
30//!   The offset and size of the Type Index Offset Buffer is specified in the `TypeStreamHeader`,
31//!   in the `index_offset_buffer_offset` and `index_offset_buffer_length` fields, respectively.
32//!
33//! * Hash Adjustment Buffer - A hash table whose keys are the hash values in the hash value
34//!   buffer and whose values are type indices.
35//!
36//!   The offset and size of the Type Index Offset BUffer is specified in the `TypeStreamHeader`,
37//!   in the `index_offset_buffer_offset` and `index_offset_buffer_length` fields, respectively.
38
39pub mod hash;
40
41use super::*;
42use crate::parser::Parser;
43use crate::types::fields::{Field, IterFields};
44use crate::types::{build_types_starts, TypeData, TypeIndex, TypeIndexLe, TypeRecord, TypesIter};
45use anyhow::bail;
46use std::fmt::Debug;
47use std::mem::size_of;
48use std::ops::Range;
49use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, I32, LE, U32};
50
51/// The header of the TPI stream.
52#[allow(missing_docs)]
53#[derive(Clone, Eq, PartialEq, IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
54#[repr(C)]
55pub struct TypeStreamHeader {
56    pub version: U32<LE>,
57    pub header_size: U32<LE>,
58    pub type_index_begin: TypeIndexLe,
59    pub type_index_end: TypeIndexLe,
60    /// The number of bytes of type record data following the `TypeStreamHeader`.
61    pub type_record_bytes: U32<LE>,
62
63    pub hash_stream_index: StreamIndexU16,
64    pub hash_aux_stream_index: StreamIndexU16,
65
66    /// The size of each hash key in the Hash Value Substream. For the current version of TPI,
67    /// this value should always be 4.
68    pub hash_key_size: U32<LE>,
69    /// The number of hash buckets. This is used when calculating the record hashes. Each hash
70    /// is computed, and then it is divided by num_hash_buckets and the remainder becomes the
71    /// final hash.
72    ///
73    /// If `hash_value_buffer_length` is non-zero, then `num_hash_buckets` must also be non-zero.
74    pub num_hash_buckets: U32<LE>,
75    pub hash_value_buffer_offset: I32<LE>,
76    pub hash_value_buffer_length: U32<LE>,
77
78    pub index_offset_buffer_offset: I32<LE>,
79    pub index_offset_buffer_length: U32<LE>,
80
81    pub hash_adj_buffer_offset: I32<LE>,
82    pub hash_adj_buffer_length: U32<LE>,
83}
84
85impl TypeStreamHeader {
86    /// Makes an empty one
87    pub fn empty() -> Self {
88        Self {
89            version: Default::default(),
90            header_size: U32::new(size_of::<TypeStreamHeader>() as u32),
91            type_index_begin: TypeIndexLe(U32::new(TypeIndex::MIN_BEGIN.0)),
92            type_index_end: TypeIndexLe(U32::new(TypeIndex::MIN_BEGIN.0)),
93            type_record_bytes: Default::default(),
94            hash_stream_index: StreamIndexU16::NIL,
95            hash_aux_stream_index: StreamIndexU16::NIL,
96            hash_key_size: Default::default(),
97            num_hash_buckets: Default::default(),
98            hash_value_buffer_offset: Default::default(),
99            hash_value_buffer_length: Default::default(),
100            index_offset_buffer_offset: Default::default(),
101            index_offset_buffer_length: Default::default(),
102            hash_adj_buffer_offset: Default::default(),
103            hash_adj_buffer_length: Default::default(),
104        }
105    }
106}
107
108/// The size of the `TpiStreamHeader` structure.
109pub const TPI_STREAM_HEADER_LEN: usize = size_of::<TypeStreamHeader>();
110
111/// The expected value of `TypeStreamHeader::version`.
112pub const TYPE_STREAM_VERSION_2004: u32 = 20040203;
113
114/// Contains a TPI Stream or IPI Stream.
115pub struct TypeStream<StreamData>
116where
117    StreamData: AsRef<[u8]>,
118{
119    /// The stream data. This contains the entire type stream, including header and type records.
120    pub stream_data: StreamData,
121
122    type_index_begin: TypeIndex,
123    type_index_end: TypeIndex,
124
125    /// A starts vector for type record offsets. This is created on-demand, since many users of
126    /// `TypeStream` do not need this.
127    record_starts: OnceCell<Vec<u32>>,
128}
129
130/// Distinguishes the TPI and IPI streams.
131#[derive(Copy, Clone, Eq, PartialEq, Debug)]
132pub enum TypeStreamKind {
133    /// The primary type stream
134    TPI,
135    /// The ID stream
136    IPI,
137}
138
139impl TypeStreamKind {
140    /// Get the stream index. Fortunately, the stream indexes are fixed.
141    pub fn stream(self) -> Stream {
142        match self {
143            Self::IPI => Stream::IPI,
144            Self::TPI => Stream::TPI,
145        }
146    }
147}
148
149/// Represents an entry in the Hash Index Offset Substream.
150#[repr(C)]
151#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
152pub struct HashIndexPair {
153    /// The type index at the start of this range.
154    pub type_index: TypeIndexLe,
155    /// The offset within the Type Records Substream (not the entire Type Stream) where this
156    /// record begins.
157    pub offset: U32<LE>,
158}
159
160impl<StreamData> TypeStream<StreamData>
161where
162    StreamData: AsRef<[u8]>,
163{
164    /// Gets a reference to the stream header
165    pub fn header(&self) -> Option<&TypeStreamHeader> {
166        let stream_data: &[u8] = self.stream_data.as_ref();
167        let (header, _) = TypeStreamHeader::ref_from_prefix(stream_data).ok()?;
168        Some(header)
169    }
170
171    /// Returns the version of the stream, or `TYPE_STREAM_VERSION_2004` if this is an empty stream.
172    pub fn version(&self) -> u32 {
173        if let Some(header) = self.header() {
174            header.version.get()
175        } else {
176            TYPE_STREAM_VERSION_2004
177        }
178    }
179
180    /// Returns the stream index of the related hash stream, if any.
181    pub fn hash_stream(&self) -> Option<u32> {
182        self.header()?.hash_stream_index.get()
183    }
184
185    /// Checks whether this is a degenerate empty stream.
186    pub fn is_empty(&self) -> bool {
187        self.stream_data.as_ref().is_empty()
188    }
189
190    /// Gets a mutable reference to the stream header
191    pub fn header_mut(&mut self) -> Option<&mut TypeStreamHeader>
192    where
193        StreamData: AsMut<[u8]>,
194    {
195        let (header, _) = TypeStreamHeader::mut_from_prefix(self.stream_data.as_mut()).ok()?;
196        Some(header)
197    }
198
199    /// The type index of the first type record.
200    pub fn type_index_begin(&self) -> TypeIndex {
201        self.type_index_begin
202    }
203
204    /// The type index of the last type record, plus 1.
205    pub fn type_index_end(&self) -> TypeIndex {
206        self.type_index_end
207    }
208
209    /// The number of types defined in the type stream.
210    pub fn num_types(&self) -> u32 {
211        self.type_index_end.0 - self.type_index_begin.0
212    }
213
214    /// Gets the byte offset within the stream of the record data.
215    pub fn records_offset(&self) -> usize {
216        if let Some(header) = self.header() {
217            header.header_size.get() as usize
218        } else {
219            0
220        }
221    }
222
223    /// Returns the encoded type records found in the TPI or IPI stream.
224    ///
225    /// The type records immediately follow the type stream. The length is given by the
226    /// header field type_record_bytes. The values in the header were validated in
227    /// read_tpi_or_ipi_stream(), so we do not need to check them again, here.
228    pub fn type_records_bytes(&self) -> &[u8] {
229        let records_range = self.type_records_range();
230        if records_range.is_empty() {
231            &[]
232        } else {
233            &self.stream_data.as_ref()[records_range]
234        }
235    }
236
237    /// Returns the encoded type records found in the type stream.
238    pub fn type_records_bytes_mut(&mut self) -> &mut [u8]
239    where
240        StreamData: AsMut<[u8]>,
241    {
242        let records_range = self.type_records_range();
243        if records_range.is_empty() {
244            &mut []
245        } else {
246            &mut self.stream_data.as_mut()[records_range]
247        }
248    }
249
250    /// Returns the byte range of the encoded type records found in the type stream.
251    pub fn type_records_range(&self) -> std::ops::Range<usize> {
252        if let Some(header) = self.header() {
253            let size = header.type_record_bytes.get();
254            if size == 0 {
255                return 0..0;
256            }
257            let type_records_start = header.header_size.get();
258            let type_records_end = type_records_start + size;
259            type_records_start as usize..type_records_end as usize
260        } else {
261            0..0
262        }
263    }
264
265    /// Iterates the types contained within this type stream.
266    pub fn iter_type_records(&self) -> TypesIter<'_> {
267        TypesIter::new(self.type_records_bytes())
268    }
269
270    /// Parses the header of a Type Stream and validates it.
271    pub fn parse(stream_index: Stream, stream_data: StreamData) -> anyhow::Result<Self> {
272        let stream_bytes: &[u8] = stream_data.as_ref();
273
274        if stream_bytes.is_empty() {
275            return Ok(Self {
276                stream_data,
277                type_index_begin: TypeIndex::MIN_BEGIN,
278                type_index_end: TypeIndex::MIN_BEGIN,
279                record_starts: OnceCell::new(),
280            });
281        }
282
283        let mut p = Parser::new(stream_bytes);
284        let tpi_stream_header: TypeStreamHeader = p.copy()?;
285
286        let type_index_begin = tpi_stream_header.type_index_begin.get();
287        let type_index_end = tpi_stream_header.type_index_end.get();
288        if type_index_end < type_index_begin {
289            bail!(
290                "Type stream (stream {stream_index}) has invalid values in header.  \
291                 The type_index_begin field is greater than the type_index_end field."
292            );
293        }
294
295        if type_index_begin < TypeIndex::MIN_BEGIN {
296            bail!(
297                "The Type Stream has an invalid value for type_index_begin ({type_index_begin:?}). \
298                 It is less than the minimum required value ({}).",
299                TypeIndex::MIN_BEGIN.0
300            );
301        }
302
303        let type_data_start = tpi_stream_header.header_size.get();
304        if type_data_start < TPI_STREAM_HEADER_LEN as u32 {
305            bail!(
306                "Type stream (stream {stream_index}) has invalid values in header.  \
307                 The header_size field is smaller than the definition of the actual header."
308            );
309        }
310
311        let type_data_end = type_data_start + tpi_stream_header.type_record_bytes.get();
312        if type_data_end > stream_bytes.len() as u32 {
313            bail!(
314                "Type stream (stream {stream_index}) has invalid values in header.  \
315                   The header_size and type_record_bytes fields exceed the size of the stream."
316            );
317        }
318
319        Ok(TypeStream {
320            stream_data,
321            type_index_begin,
322            type_index_end,
323            record_starts: OnceCell::new(),
324        })
325    }
326
327    /// Builds a "starts" table that gives the starting location of each type record.
328    pub fn build_types_starts(&self) -> TypeIndexMap {
329        let starts =
330            crate::types::build_types_starts(self.num_types() as usize, self.type_records_bytes());
331
332        TypeIndexMap {
333            type_index_begin: self.type_index_begin,
334            type_index_end: self.type_index_end,
335            starts,
336        }
337    }
338
339    /// Creates a new `TypeStream` that referenced the stream data of this `TypeStream`.
340    /// This is typically used for temporarily creating a `TypeStream<&[u8]>` from a
341    /// `TypeStream<Vec<u8>>`.
342    pub fn to_ref(&self) -> TypeStream<&[u8]> {
343        TypeStream {
344            stream_data: self.stream_data.as_ref(),
345            type_index_begin: self.type_index_begin,
346            type_index_end: self.type_index_end,
347            record_starts: OnceCell::new(),
348        }
349    }
350
351    /// Gets the "starts" vector for the byte offsets of the records in this `TypeStream`.
352    ///
353    /// This function will create the starts vector on-demand.
354    pub fn record_starts(&self) -> &[u32] {
355        self.record_starts.get_or_init(|| {
356            let type_records = self.type_records_bytes();
357            build_types_starts(self.num_types() as usize, type_records)
358        })
359    }
360
361    /// Returns `true` if `type_index` refers to a primitive type.
362    pub fn is_primitive(&self, type_index: TypeIndex) -> bool {
363        type_index < self.type_index_begin
364    }
365
366    /// Retrieves the type record identified by `type_index`.
367    ///
368    /// This should only be used for non-primitive `TypeIndex` values. If this is called with a
369    /// primitive `TypeIndex` then it will return `Err`.
370    pub fn record(&self, type_index: TypeIndex) -> anyhow::Result<TypeRecord<'_>> {
371        let Some(relative_type_index) = type_index.0.checked_sub(self.type_index_begin.0) else {
372            bail!("The given TypeIndex is a primitive type index, not a type record.");
373        };
374
375        let starts = self.record_starts();
376        let Some(&record_start) = starts.get(relative_type_index as usize) else {
377            bail!("The given TypeIndex is out of bounds (exceeds maximum allowed TypeIndex)");
378        };
379
380        let all_type_records = self.type_records_bytes();
381        let Some(this_type_record_slice) = all_type_records.get(record_start as usize..) else {
382            // This should never happen, but let's be cautious.
383            bail!("Internal error: record offset is out of range.");
384        };
385
386        let mut iter = TypesIter::new(this_type_record_slice);
387        if let Some(record) = iter.next() {
388            Ok(record)
389        } else {
390            bail!("Failed to decode type record");
391        }
392    }
393
394    /// Iterate the fields of an `LF_STRUCTURE`, `LF_CLASS`, `LF_ENUM`, etc. This correctly
395    /// iterates across chains of `LF_FIELDLIST`.
396    pub fn iter_fields(&self, field_list: TypeIndex) -> IterFieldChain<'_, StreamData> {
397        // We initialize `fields` to an empty iterator so that the first iteration of
398        // IterFieldChain::next() will find no records and will then check next_field_list.
399        IterFieldChain {
400            type_stream: self,
401            next_field_list: if field_list.0 != 0 {
402                Some(field_list)
403            } else {
404                None
405            },
406            fields: IterFields { bytes: &[] },
407        }
408    }
409}
410
411/// Iterator state for `iter_fields`
412pub struct IterFieldChain<'a, StreamData>
413where
414    StreamData: AsRef<[u8]>,
415{
416    /// The current `LF_FIELDLIST` record that we are decoding.
417    fields: IterFields<'a>,
418
419    /// Allows us to read `LF_FIELDLIST` records.
420    type_stream: &'a TypeStream<StreamData>,
421
422    /// The pointer to the next `LF_FIELDLIST` that we will decode.
423    next_field_list: Option<TypeIndex>,
424}
425
426impl<'a, StreamData> Iterator for IterFieldChain<'a, StreamData>
427where
428    StreamData: AsRef<[u8]>,
429{
430    type Item = Field<'a>;
431
432    fn next(&mut self) -> Option<Self::Item> {
433        loop {
434            if let Some(field) = self.fields.next() {
435                if let Field::Index(index) = &field {
436                    // The full field list is split across more than one LF_FIELDLIST record.
437                    // Store the link to the next field list and do not return this item to the caller.
438                    self.next_field_list = Some(*index);
439                    continue;
440                }
441
442                return Some(field);
443            }
444
445            // We have run out of fields in the current LF_FIELDLIST record.
446            // See if there is a pointer to another LF_FIELDLIST.
447            let next_field_list = self.next_field_list.take()?;
448            let next_record = self.type_stream.record(next_field_list).ok()?;
449            match next_record.parse().ok()? {
450                TypeData::FieldList(fl) => {
451                    // Restart iteration on the new field list.
452                    self.fields = fl.iter();
453                }
454                _ => {
455                    // Wrong record type!
456                    return None;
457                }
458            }
459        }
460    }
461}
462
463impl<F: ReadAt> crate::Pdb<F> {
464    /// Reads the TPI stream.
465    pub fn read_type_stream(&self) -> anyhow::Result<TypeStream<Vec<u8>>> {
466        self.read_tpi_or_ipi_stream(Stream::TPI)
467    }
468
469    /// Reads the IPI stream.
470    pub fn read_ipi_stream(&self) -> anyhow::Result<TypeStream<Vec<u8>>> {
471        self.read_tpi_or_ipi_stream(Stream::IPI)
472    }
473
474    /// Reads the TPI or IPI stream.
475    pub fn read_tpi_or_ipi_stream(
476        &self,
477        stream_index: Stream,
478    ) -> anyhow::Result<TypeStream<Vec<u8>>> {
479        let stream_data = self.read_stream_to_vec(stream_index.into())?;
480        TypeStream::parse(stream_index, stream_data)
481    }
482}
483
484/// Maps `TypeIndex` values to the byte range of records within a type stream.
485pub struct TypeIndexMap {
486    /// Copied from type stream header.
487    pub type_index_begin: TypeIndex,
488
489    /// Copied from type stream header.
490    pub type_index_end: TypeIndex,
491
492    /// Contains a "starts" vector for the byte offsets of each type record.
493    ///
494    /// This vector has an additional value at the end, which gives the size in bytes of the
495    /// type stream.
496    pub starts: Vec<u32>,
497}
498
499impl TypeIndexMap {
500    /// Tests whether a `TypeIndex` is a primitive type.
501    pub fn is_primitive(&self, ti: TypeIndex) -> bool {
502        ti < self.type_index_begin
503    }
504
505    /// Given a `TypeIndex`, returns the byte range within a `TypeStream` where that record
506    /// is stored.
507    ///
508    /// If `ti` is a primitive type then this function will return `Err`. The caller should
509    /// use the `is_primitive` method to check whether a `TypeIndex` is a primitive type.
510    pub fn record_range(&self, ti: TypeIndex) -> anyhow::Result<Range<usize>> {
511        let Some(i) = ti.0.checked_sub(self.type_index_begin.0) else {
512            bail!("The TypeIndex is a primitive type, not a type record.");
513        };
514
515        if let Some(w) = self.starts.get(i as usize..i as usize + 2) {
516            Ok(w[0] as usize..w[1] as usize)
517        } else {
518            bail!("The TypeIndex is out of range.");
519        }
520    }
521}
522
523/// Represents the cached state of a Type Stream header.
524pub struct CachedTypeStreamHeader {
525    pub(crate) header: Option<TypeStreamHeader>,
526}
527
528impl CachedTypeStreamHeader {
529    /// Gets direct access to the type stream header, if any.
530    pub fn header(&self) -> Option<&TypeStreamHeader> {
531        self.header.as_ref()
532    }
533
534    /// Gets the beginning of the type index space, or `TypeIndex::MIN_BEGIN` if this type stream
535    /// does not contain any data.
536    pub fn type_index_begin(&self) -> TypeIndex {
537        if let Some(h) = &self.header {
538            h.type_index_begin.get()
539        } else {
540            TypeIndex::MIN_BEGIN
541        }
542    }
543
544    /// Gets the end of the type index space, or `TypeIndex::MIN_BEGIN` if this type stream does
545    /// not contain any data.
546    pub fn type_index_end(&self) -> TypeIndex {
547        if let Some(h) = &self.header {
548            h.type_index_end.get()
549        } else {
550            TypeIndex::MIN_BEGIN
551        }
552    }
553}