ms_pdb/
tpi.rs

1//! Type Information Stream (TPI)
2//!
3//! Layout of a Type Stream:
4//!
5//! * `TypeStreamHeader` - specifies lots of important parameters
6//! * Type Record Data
7//!
8//! Each Type Stream may also have an associated Type Hash Stream. The Type Hash Stream contains
9//! indexing information that helps find records within the main Type Stream. The Type Stream
10//! Header specifies several parameters that are needed for finding and decoding the Type Hash
11//! Stream.
12//!
13//! The Type Hash Stream contains:
14//!
15//! * Hash Value Buffer: Contains a list of hash values, one for each Type Record in the
16//!   Type Stream.
17//!
18//!   The offset and size of the Hash Value Buffer is specified in the `TypeStreamHeader`, in the
19//!   `hash_value_buffer_offset` and `hash_value_buffer_length` fields, respectively.
20//!
21//!   It should be assumed that there are either 0 hash values, or a number equal to the number of
22//!   type records in the TPI stream (`type_index_end - type_end_begin`). Thus, if
23//!   `hash_value_buffer_length` is not equal to `(type_index_end - type_end_begin) * hash_key_size`
24//!    we can consider the PDB malformed.
25//!
26//! * Type Index Offset Buffer - A list of pairs of `u32` values where the first is a Type Index
27//!   and the second is the offset within Type Record Data of the type with this index.
28//!   This enables a binary search to find a given Type Index record.
29//!
30//!   The offset and size of the Type Index Offset Buffer is specified in the `TypeStreamHeader`,
31//!   in the `index_offset_buffer_offset` and `index_offset_buffer_length` fields, respectively.
32//!
33//! * Hash Adjustment Buffer - A hash table whose keys are the hash values in the hash value
34//!   buffer and whose values are type indices.
35//!
36//!   The offset and size of the Type Index Offset BUffer is specified in the `TypeStreamHeader`,
37//!   in the `index_offset_buffer_offset` and `index_offset_buffer_length` fields, respectively.
38//!
39
40pub mod hash;
41
42use super::*;
43use crate::parser::Parser;
44use crate::types::fields::{Field, IterFields};
45use crate::types::{build_types_starts, TypeData, TypeIndex, TypeIndexLe, TypeRecord, TypesIter};
46use anyhow::bail;
47use std::fmt::Debug;
48use std::mem::size_of;
49use std::ops::Range;
50use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, I32, LE, U32};
51
52/// The header of the TPI stream.
53#[allow(missing_docs)]
54#[derive(Clone, Eq, PartialEq, IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
55#[repr(C)]
56pub struct TypeStreamHeader {
57    pub version: U32<LE>,
58    pub header_size: U32<LE>,
59    pub type_index_begin: TypeIndexLe,
60    pub type_index_end: TypeIndexLe,
61    /// The number of bytes of type record data following the `TypeStreamHeader`.
62    pub type_record_bytes: U32<LE>,
63
64    pub hash_stream_index: StreamIndexU16,
65    pub hash_aux_stream_index: StreamIndexU16,
66
67    /// The size of each hash key in the Hash Value Substream. For the current version of TPI,
68    /// this value should always be 4.
69    pub hash_key_size: U32<LE>,
70    /// The number of hash buckets. This is used when calculating the record hashes. Each hash
71    /// is computed, and then it is divided by num_hash_buckets and the remainder becomes the
72    /// final hash.
73    ///
74    /// If `hash_value_buffer_length` is non-zero, then `num_hash_buckets` must also be non-zero.
75    pub num_hash_buckets: U32<LE>,
76    pub hash_value_buffer_offset: I32<LE>,
77    pub hash_value_buffer_length: U32<LE>,
78
79    pub index_offset_buffer_offset: I32<LE>,
80    pub index_offset_buffer_length: U32<LE>,
81
82    pub hash_adj_buffer_offset: I32<LE>,
83    pub hash_adj_buffer_length: U32<LE>,
84}
85
86impl TypeStreamHeader {
87    /// Makes an empty one
88    pub fn empty() -> Self {
89        Self {
90            version: Default::default(),
91            header_size: U32::new(size_of::<TypeStreamHeader>() as u32),
92            type_index_begin: TypeIndexLe(U32::new(TypeIndex::MIN_BEGIN.0)),
93            type_index_end: TypeIndexLe(U32::new(TypeIndex::MIN_BEGIN.0)),
94            type_record_bytes: Default::default(),
95            hash_stream_index: StreamIndexU16::NIL,
96            hash_aux_stream_index: StreamIndexU16::NIL,
97            hash_key_size: Default::default(),
98            num_hash_buckets: Default::default(),
99            hash_value_buffer_offset: Default::default(),
100            hash_value_buffer_length: Default::default(),
101            index_offset_buffer_offset: Default::default(),
102            index_offset_buffer_length: Default::default(),
103            hash_adj_buffer_offset: Default::default(),
104            hash_adj_buffer_length: Default::default(),
105        }
106    }
107}
108
109/// The size of the `TpiStreamHeader` structure.
110pub const TPI_STREAM_HEADER_LEN: usize = size_of::<TypeStreamHeader>();
111
112/// The expected value of `TypeStreamHeader::version`.
113pub const TYPE_STREAM_VERSION_2004: u32 = 20040203;
114
115/// Contains a TPI Stream or IPI Stream.
116pub struct TypeStream<StreamData>
117where
118    StreamData: AsRef<[u8]>,
119{
120    /// The stream data. This contains the entire type stream, including header and type records.
121    pub stream_data: StreamData,
122
123    type_index_begin: TypeIndex,
124    type_index_end: TypeIndex,
125
126    /// A starts vector for type record offsets. This is created on-demand, since many users of
127    /// `TypeStream` do not need this.
128    record_starts: OnceCell<Vec<u32>>,
129}
130
131/// Distinguishes the TPI and IPI streams.
132#[derive(Copy, Clone, Eq, PartialEq, Debug)]
133pub enum TypeStreamKind {
134    /// The primary type stream
135    TPI,
136    /// The ID stream
137    IPI,
138}
139
140impl TypeStreamKind {
141    /// Get the stream index. Fortunately, the stream indexes are fixed.
142    pub fn stream(self) -> Stream {
143        match self {
144            Self::IPI => Stream::IPI,
145            Self::TPI => Stream::TPI,
146        }
147    }
148}
149
150/// Represents an entry in the Hash Index Offset Substream.
151#[repr(C)]
152#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
153pub struct HashIndexPair {
154    /// The type index at the start of this range.
155    pub type_index: TypeIndexLe,
156    /// The offset within the Type Records Substream (not the entire Type Stream) where this
157    /// record begins.
158    pub offset: U32<LE>,
159}
160
161impl<StreamData> TypeStream<StreamData>
162where
163    StreamData: AsRef<[u8]>,
164{
165    /// Gets a reference to the stream header
166    pub fn header(&self) -> Option<&TypeStreamHeader> {
167        let stream_data: &[u8] = self.stream_data.as_ref();
168        let (header, _) = TypeStreamHeader::ref_from_prefix(stream_data).ok()?;
169        Some(header)
170    }
171
172    /// Returns the version of the stream, or `TYPE_STREAM_VERSION_2004` if this is an empty stream.
173    pub fn version(&self) -> u32 {
174        if let Some(header) = self.header() {
175            header.version.get()
176        } else {
177            TYPE_STREAM_VERSION_2004
178        }
179    }
180
181    /// Returns the stream index of the related hash stream, if any.
182    pub fn hash_stream(&self) -> Option<u32> {
183        self.header()?.hash_stream_index.get()
184    }
185
186    /// Checks whether this is a degenerate empty stream.
187    pub fn is_empty(&self) -> bool {
188        self.stream_data.as_ref().is_empty()
189    }
190
191    /// Gets a mutable reference to the stream header
192    pub fn header_mut(&mut self) -> Option<&mut TypeStreamHeader>
193    where
194        StreamData: AsMut<[u8]>,
195    {
196        let (header, _) = TypeStreamHeader::mut_from_prefix(self.stream_data.as_mut()).ok()?;
197        Some(header)
198    }
199
200    /// The type index of the first type record.
201    pub fn type_index_begin(&self) -> TypeIndex {
202        self.type_index_begin
203    }
204
205    /// The type index of the last type record, plus 1.
206    pub fn type_index_end(&self) -> TypeIndex {
207        self.type_index_end
208    }
209
210    /// The number of types defined in the type stream.
211    pub fn num_types(&self) -> u32 {
212        self.type_index_end.0 - self.type_index_begin.0
213    }
214
215    /// Gets the byte offset within the stream of the record data.
216    pub fn records_offset(&self) -> usize {
217        if let Some(header) = self.header() {
218            header.header_size.get() as usize
219        } else {
220            0
221        }
222    }
223
224    /// Returns the encoded type records found in the TPI or IPI stream.
225    ///
226    /// The type records immediately follow the type stream. The length is given by the
227    /// header field type_record_bytes. The values in the header were validated in
228    /// read_tpi_or_ipi_stream(), so we do not need to check them again, here.
229    pub fn type_records_bytes(&self) -> &[u8] {
230        let records_range = self.type_records_range();
231        if records_range.is_empty() {
232            &[]
233        } else {
234            &self.stream_data.as_ref()[records_range]
235        }
236    }
237
238    /// Returns the encoded type records found in the type stream.
239    pub fn type_records_bytes_mut(&mut self) -> &mut [u8]
240    where
241        StreamData: AsMut<[u8]>,
242    {
243        let records_range = self.type_records_range();
244        if records_range.is_empty() {
245            &mut []
246        } else {
247            &mut self.stream_data.as_mut()[records_range]
248        }
249    }
250
251    /// Returns the byte range of the encoded type records found in the type stream.
252    pub fn type_records_range(&self) -> std::ops::Range<usize> {
253        if let Some(header) = self.header() {
254            let size = header.type_record_bytes.get();
255            if size == 0 {
256                return 0..0;
257            }
258            let type_records_start = header.header_size.get();
259            let type_records_end = type_records_start + size;
260            type_records_start as usize..type_records_end as usize
261        } else {
262            0..0
263        }
264    }
265
266    /// Iterates the types contained within this type stream.
267    pub fn iter_type_records(&self) -> TypesIter<'_> {
268        TypesIter::new(self.type_records_bytes())
269    }
270
271    /// Parses the header of a Type Stream and validates it.
272    pub fn parse(stream_index: Stream, stream_data: StreamData) -> anyhow::Result<Self> {
273        let stream_bytes: &[u8] = stream_data.as_ref();
274
275        if stream_bytes.is_empty() {
276            return Ok(Self {
277                stream_data,
278                type_index_begin: TypeIndex::MIN_BEGIN,
279                type_index_end: TypeIndex::MIN_BEGIN,
280                record_starts: OnceCell::new(),
281            });
282        }
283
284        let mut p = Parser::new(stream_bytes);
285        let tpi_stream_header: TypeStreamHeader = p.copy()?;
286
287        let type_index_begin = tpi_stream_header.type_index_begin.get();
288        let type_index_end = tpi_stream_header.type_index_end.get();
289        if type_index_end < type_index_begin {
290            bail!(
291                "Type stream (stream {stream_index}) has invalid values in header.  \
292                 The type_index_begin field is greater than the type_index_end field."
293            );
294        }
295
296        if type_index_begin < TypeIndex::MIN_BEGIN {
297            bail!(
298                "The Type Stream has an invalid value for type_index_begin ({type_index_begin:?}). \
299                 It is less than the minimum required value ({}).",
300                TypeIndex::MIN_BEGIN.0
301            );
302        }
303
304        let type_data_start = tpi_stream_header.header_size.get();
305        if type_data_start < TPI_STREAM_HEADER_LEN as u32 {
306            bail!(
307                "Type stream (stream {stream_index}) has invalid values in header.  \
308                 The header_size field is smaller than the definition of the actual header."
309            );
310        }
311
312        let type_data_end = type_data_start + tpi_stream_header.type_record_bytes.get();
313        if type_data_end > stream_bytes.len() as u32 {
314            bail!(
315                "Type stream (stream {stream_index}) has invalid values in header.  \
316                   The header_size and type_record_bytes fields exceed the size of the stream."
317            );
318        }
319
320        Ok(TypeStream {
321            stream_data,
322            type_index_begin,
323            type_index_end,
324            record_starts: OnceCell::new(),
325        })
326    }
327
328    /// Builds a "starts" table that gives the starting location of each type record.
329    pub fn build_types_starts(&self) -> TypeIndexMap {
330        let starts =
331            crate::types::build_types_starts(self.num_types() as usize, self.type_records_bytes());
332
333        TypeIndexMap {
334            type_index_begin: self.type_index_begin,
335            type_index_end: self.type_index_end,
336            starts,
337        }
338    }
339
340    /// Creates a new `TypeStream` that referenced the stream data of this `TypeStream`.
341    /// This is typically used for temporarily creating a `TypeStream<&[u8]>` from a
342    /// `TypeStream<Vec<u8>>`.
343    pub fn to_ref(&self) -> TypeStream<&[u8]> {
344        TypeStream {
345            stream_data: self.stream_data.as_ref(),
346            type_index_begin: self.type_index_begin,
347            type_index_end: self.type_index_end,
348            record_starts: OnceCell::new(),
349        }
350    }
351
352    /// Gets the "starts" vector for the byte offsets of the records in this `TypeStream`.
353    ///
354    /// This function will create the starts vector on-demand.
355    pub fn record_starts(&self) -> &[u32] {
356        self.record_starts.get_or_init(|| {
357            let type_records = self.type_records_bytes();
358            build_types_starts(self.num_types() as usize, type_records)
359        })
360    }
361
362    /// Returns `true` if `type_index` refers to a primitive type.
363    pub fn is_primitive(&self, type_index: TypeIndex) -> bool {
364        type_index < self.type_index_begin
365    }
366
367    /// Retrieves the type record identified by `type_index`.
368    ///
369    /// This should only be used for non-primitive `TypeIndex` values. If this is called with a
370    /// primitive `TypeIndex` then it will return `Err`.
371    pub fn record(&self, type_index: TypeIndex) -> anyhow::Result<TypeRecord<'_>> {
372        let Some(relative_type_index) = type_index.0.checked_sub(self.type_index_begin.0) else {
373            bail!("The given TypeIndex is a primitive type index, not a type record.");
374        };
375
376        let starts = self.record_starts();
377        let Some(&record_start) = starts.get(relative_type_index as usize) else {
378            bail!("The given TypeIndex is out of bounds (exceeds maximum allowed TypeIndex)");
379        };
380
381        let all_type_records = self.type_records_bytes();
382        let Some(this_type_record_slice) = all_type_records.get(record_start as usize..) else {
383            // This should never happen, but let's be cautious.
384            bail!("Internal error: record offset is out of range.");
385        };
386
387        let mut iter = TypesIter::new(this_type_record_slice);
388        if let Some(record) = iter.next() {
389            Ok(record)
390        } else {
391            bail!("Failed to decode type record");
392        }
393    }
394
395    /// Iterate the fields of an `LF_STRUCTURE`, `LF_CLASS`, `LF_ENUM`, etc. This correctly
396    /// iterates across chains of `LF_FIELDLIST`.
397    pub fn iter_fields(&self, field_list: TypeIndex) -> IterFieldChain<'_, StreamData> {
398        // We initialize `fields` to an empty iterator so that the first iteration of
399        // IterFieldChain::next() will find no records and will then check next_field_list.
400        IterFieldChain {
401            type_stream: self,
402            next_field_list: if field_list.0 != 0 {
403                Some(field_list)
404            } else {
405                None
406            },
407            fields: IterFields { bytes: &[] },
408        }
409    }
410}
411
412/// Iterator state for `iter_fields`
413pub struct IterFieldChain<'a, StreamData>
414where
415    StreamData: AsRef<[u8]>,
416{
417    /// The current `LF_FIELDLIST` record that we are decoding.
418    fields: IterFields<'a>,
419
420    /// Allows us to read `LF_FIELDLIST` records.
421    type_stream: &'a TypeStream<StreamData>,
422
423    /// The pointer to the next `LF_FIELDLIST` that we will decode.
424    next_field_list: Option<TypeIndex>,
425}
426
427impl<'a, StreamData> Iterator for IterFieldChain<'a, StreamData>
428where
429    StreamData: AsRef<[u8]>,
430{
431    type Item = Field<'a>;
432
433    fn next(&mut self) -> Option<Self::Item> {
434        loop {
435            if let Some(field) = self.fields.next() {
436                if let Field::Index(index) = &field {
437                    // The full field list is split across more than one LF_FIELDLIST record.
438                    // Store the link to the next field list and do not return this item to the caller.
439                    self.next_field_list = Some(*index);
440                    continue;
441                }
442
443                return Some(field);
444            }
445
446            // We have run out of fields in the current LF_FIELDLIST record.
447            // See if there is a pointer to another LF_FIELDLIST.
448            let next_field_list = self.next_field_list.take()?;
449            let next_record = self.type_stream.record(next_field_list).ok()?;
450            match next_record.parse().ok()? {
451                TypeData::FieldList(fl) => {
452                    // Restart iteration on the new field list.
453                    self.fields = fl.iter();
454                }
455                _ => {
456                    // Wrong record type!
457                    return None;
458                }
459            }
460        }
461    }
462}
463
464impl<F: ReadAt> crate::Pdb<F> {
465    /// Reads the TPI stream.
466    pub fn read_type_stream(&self) -> anyhow::Result<TypeStream<Vec<u8>>> {
467        self.read_tpi_or_ipi_stream(Stream::TPI)
468    }
469
470    /// Reads the IPI stream.
471    pub fn read_ipi_stream(&self) -> anyhow::Result<TypeStream<Vec<u8>>> {
472        self.read_tpi_or_ipi_stream(Stream::IPI)
473    }
474
475    /// Reads the TPI or IPI stream.
476    pub fn read_tpi_or_ipi_stream(
477        &self,
478        stream_index: Stream,
479    ) -> anyhow::Result<TypeStream<Vec<u8>>> {
480        let stream_data = self.read_stream_to_vec(stream_index.into())?;
481        TypeStream::parse(stream_index, stream_data)
482    }
483}
484
485/// Maps `TypeIndex` values to the byte range of records within a type stream.
486pub struct TypeIndexMap {
487    /// Copied from type stream header.
488    pub type_index_begin: TypeIndex,
489
490    /// Copied from type stream header.
491    pub type_index_end: TypeIndex,
492
493    /// Contains a "starts" vector for the byte offsets of each type record.
494    ///
495    /// This vector has an additional value at the end, which gives the size in bytes of the
496    /// type stream.
497    pub starts: Vec<u32>,
498}
499
500impl TypeIndexMap {
501    /// Tests whether a `TypeIndex` is a primitive type.
502    pub fn is_primitive(&self, ti: TypeIndex) -> bool {
503        ti < self.type_index_begin
504    }
505
506    /// Given a `TypeIndex`, returns the byte range within a `TypeStream` where that record
507    /// is stored.
508    ///
509    /// If `ti` is a primitive type then this function will return `Err`. The caller should
510    /// use the `is_primitive` method to check whether a `TypeIndex` is a primitive type.
511    pub fn record_range(&self, ti: TypeIndex) -> anyhow::Result<Range<usize>> {
512        let Some(i) = ti.0.checked_sub(self.type_index_begin.0) else {
513            bail!("The TypeIndex is a primitive type, not a type record.");
514        };
515
516        if let Some(w) = self.starts.get(i as usize..i as usize + 2) {
517            Ok(w[0] as usize..w[1] as usize)
518        } else {
519            bail!("The TypeIndex is out of range.");
520        }
521    }
522}
523
524/// Represents the cached state of a Type Stream header.
525pub struct CachedTypeStreamHeader {
526    pub(crate) header: Option<TypeStreamHeader>,
527}
528
529impl CachedTypeStreamHeader {
530    /// Gets direct access to the type stream header, if any.
531    pub fn header(&self) -> Option<&TypeStreamHeader> {
532        self.header.as_ref()
533    }
534
535    /// Gets the beginning of the type index space, or `TypeIndex::MIN_BEGIN` if this type stream
536    /// does not contain any data.
537    pub fn type_index_begin(&self) -> TypeIndex {
538        if let Some(h) = &self.header {
539            h.type_index_begin.get()
540        } else {
541            TypeIndex::MIN_BEGIN
542        }
543    }
544
545    /// Gets the end of the type index space, or `TypeIndex::MIN_BEGIN` if this type stream does
546    /// not contain any data.
547    pub fn type_index_end(&self) -> TypeIndex {
548        if let Some(h) = &self.header {
549            h.type_index_end.get()
550        } else {
551            TypeIndex::MIN_BEGIN
552        }
553    }
554}