ms_pdb/
pdbi.rs

1//! PDB Info Stream (aka the PDB Stream)
2//!
3//! # References
4//! * <https://llvm.org/docs/PDB/PdbStream.html>
5
6#[cfg(test)]
7mod tests;
8
9use std::collections::BTreeMap;
10
11use super::*;
12use crate::encoder::Encoder;
13use crate::guid::GuidLe;
14use crate::parser::Parser;
15use anyhow::bail;
16use bitvec::prelude::{BitSlice, Lsb0};
17use bstr::ByteSlice;
18use tracing::{trace, trace_span, warn};
19use uuid::Uuid;
20use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, LE, U32};
21
22/// Contains the PDB Information Stream.
23///
24/// This implementation reads all of the data from the PDBI Stream and converts it to in-memory
25/// data structures. This is not typical for most of the data within the PDB. We do this because
26/// the PDBI is fairly small, is needed for reading most PDBs, and will often need to be edited
27/// for generating or rebuilding PDBs.
28#[allow(missing_docs)]
29#[derive(Clone)]
30pub struct PdbiStream {
31    pub signature: u32,
32    pub version: u32,
33    pub age: u32,
34    pub unique_id: Option<Uuid>,
35    pub named_streams: NamedStreams,
36    pub features: Vec<FeatureCode>,
37}
38
39impl PdbiStream {
40    /// Parses the stream.
41    pub fn parse(stream_data: &[u8]) -> anyhow::Result<Self> {
42        let mut p = Parser::new(stream_data);
43
44        let header: &PdbiStreamHeader = p.get()?;
45        let version = header.version.get();
46
47        // Older PDBs (pre-VC7, i.e. before 2000) do not contain a GUID.
48        let unique_id = if pdbi_has_unique_id(version) {
49            // Check that the stream data is large enough to contain the unique ID.
50            // We use slices, below, relying on bounds checking here.
51            Some(p.get::<GuidLe>()?.get())
52        } else {
53            None
54        };
55
56        let named_streams = NamedStreams::parse(&mut p)?;
57
58        // The last part of the PDBI stream is a list of "features". Features are u32 values, and
59        // the feature values are defined as constants. If a feature is present in this list, then
60        // that feature is enabled.
61        let mut features: Vec<FeatureCode> = Vec::with_capacity(p.len() / 4);
62        while p.len() >= 4 {
63            let feature = FeatureCode(p.u32()?);
64            features.push(feature);
65        }
66
67        Ok(Self {
68            signature: header.signature.get(),
69            version,
70            age: header.age.get(),
71            unique_id,
72            named_streams,
73            features,
74        })
75    }
76
77    /// Serializes this to a stream.
78    pub fn to_bytes(&self) -> anyhow::Result<Vec<u8>> {
79        let mut out = Vec::new();
80
81        let mut e = Encoder::new(&mut out);
82
83        let header = PdbiStreamHeader {
84            signature: U32::new(self.signature),
85            version: U32::new(self.version),
86            age: U32::new(self.age),
87        };
88
89        e.t(&header);
90        if pdbi_has_unique_id(self.version) {
91            if let Some(unique_id) = &self.unique_id {
92                e.uuid(unique_id);
93            } else {
94                bail!("The PDBI version requires a unique ID, but none has been provided.");
95            }
96        } else if self.unique_id.is_some() {
97            warn!("PDBI version is too old to have a unique ID, but this PdbiStream has a unique ID. It will be ignored.");
98        }
99
100        self.named_streams.to_bytes(&mut e);
101
102        // Write the features.
103        for &feature in self.features.iter() {
104            e.u32(feature.0);
105        }
106
107        Ok(out)
108    }
109
110    /// Gets the 'age' value of the PDB. This links the PDB with the executable; a PDB must have
111    /// the same age as its related executable.
112    pub fn age(&self) -> u32 {
113        self.age
114    }
115
116    /// Version from the PDBI header, e.g. [`PDBI_VERSION_VC110`].
117    pub fn version(&self) -> u32 {
118        self.version
119    }
120
121    /// The binding key that associates this PDB with a given PE executable.
122    pub fn binding_key(&self) -> BindingKey {
123        BindingKey {
124            guid: self.unique_id.unwrap_or(Uuid::nil()),
125            age: self.age,
126        }
127    }
128
129    /// Provides access to the named streams table.
130    pub fn named_streams(&self) -> &NamedStreams {
131        &self.named_streams
132    }
133
134    /// Provides mutable access to the named streams table.
135    pub fn named_streams_mut(&mut self) -> &mut NamedStreams {
136        &mut self.named_streams
137    }
138
139    /// Checks whether this PDB has a given feature enabled.
140    pub fn has_feature(&self, feature_code: FeatureCode) -> bool {
141        self.features.iter().any(|f| *f == feature_code)
142    }
143}
144
145#[allow(missing_docs)]
146pub const PDBI_VERSION_VC2: u32 = 19941610;
147#[allow(missing_docs)]
148pub const PDBI_VERSION_VC4: u32 = 19950623;
149#[allow(missing_docs)]
150pub const PDBI_VERSION_VC41: u32 = 19950814;
151#[allow(missing_docs)]
152pub const PDBI_VERSION_VC50: u32 = 19960307;
153#[allow(missing_docs)]
154pub const PDBI_VERSION_VC98: u32 = 19970604;
155#[allow(missing_docs)]
156pub const PDBI_VERSION_VC70_DEPRECATED: u32 = 19990604; // deprecated vc70 implementation version
157#[allow(missing_docs)]
158pub const PDBI_VERSION_VC70: u32 = 20000404; // <-- first version that has unique id
159#[allow(missing_docs)]
160pub const PDBI_VERSION_VC80: u32 = 20030901;
161#[allow(missing_docs)]
162pub const PDBI_VERSION_VC110: u32 = 20091201;
163#[allow(missing_docs)]
164pub const PDBI_VERSION_VC140: u32 = 20140508;
165
166fn pdbi_has_unique_id(version: u32) -> bool {
167    version > PDBI_VERSION_VC70_DEPRECATED
168}
169
170/// The header of the PDB Info stream.
171#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
172#[repr(C)]
173#[allow(missing_docs)]
174pub struct PdbiStreamHeader {
175    pub version: U32<LE>,
176    pub signature: U32<LE>,
177    pub age: U32<LE>,
178    // This is only present if the version number is higher than impvVC70Dep.
179    // pub unique_id: GuidLe,
180}
181
182#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
183#[repr(C)]
184#[allow(missing_docs)]
185pub struct HashTableHeader {
186    pub size: U32<LE>,
187    pub capacity: U32<LE>,
188    // present bit vector
189    // deleted bit vector
190    // (key, value) pairs
191}
192
193#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
194#[repr(C)]
195#[allow(missing_docs)]
196pub struct HashEntry {
197    pub key: U32<LE>,
198    pub value: U32<LE>,
199}
200
201/// Provides access to the Named Streams Table.
202#[derive(Default, Clone)]
203pub struct NamedStreams {
204    /// If true, the named streams set has been modified since it was loaded.
205    pub(crate) modified: bool,
206
207    /// Stores the mapping.
208    ///
209    /// We use `BTreeMap` so that the names are ordered.
210    map: BTreeMap<String, u32>,
211}
212
213impl NamedStreams {
214    /// Iterates the named streams.
215    pub fn iter(&self) -> impl Iterator<Item = (&String, &u32)> {
216        self.map.iter()
217    }
218
219    /// Searches the list of named strings for `name`. If found, returns the stream index.
220    ///
221    /// This does _not_ use a hash function. It just sequentially searches.
222    /// This uses a case-sensitive comparison.
223    pub fn get(&self, name: &str) -> Option<u32> {
224        self.map.get(name).copied()
225    }
226
227    /// Searches the list of named strings for `name`. If found, returns the stream index.
228    /// If not found, returns a descriptive error.
229    ///
230    /// This does _not_ use a hash function. It just sequentially searches.
231    /// This uses a case-sensitive comparison.
232    pub fn get_err(&self, name: &str) -> anyhow::Result<u32> {
233        if let Some(&stream) = self.map.get(name) {
234            Ok(stream)
235        } else {
236            bail!("Failed to find a named stream {:?}", name);
237        }
238    }
239
240    /// Parses a `NamedStreams` table.
241    pub fn parse(p: &mut Parser) -> anyhow::Result<Self> {
242        let names_size = p.u32()?;
243        let names_data = p.bytes(names_size as usize)?;
244
245        // This is the "cdr" (cardinality) field in pdb.cpp.
246        let name_count = p.u32()?;
247        let _name_hash_size = p.u32()?;
248
249        let present_u32_count = p.u32()?;
250        let present_mask = p.bytes(present_u32_count as usize * 4)?;
251        let present_num_items: u32 = present_mask.iter().map(|&b| b.count_ones()).sum();
252
253        let deleted_u32_count = p.u32()?;
254        let deleted_mask = p.bytes(deleted_u32_count as usize * 4)?;
255        let _deleted_num_items: u32 = deleted_mask.iter().map(|&b| b.count_ones()).sum();
256
257        if present_num_items != name_count {
258            bail!("The PDBI name table contains inconsistent values.  Name count is {}, but present bitmap count is {}.",
259                name_count, present_num_items);
260        }
261
262        let items: &[HashEntry] = p.slice(name_count as usize)?;
263
264        let mut names: BTreeMap<String, u32> = BTreeMap::new();
265
266        for item in items.iter() {
267            let key = item.key.get();
268            let stream = item.value.get();
269            // Key is a byte offset into names_data.
270            // Value is a stream index.
271
272            let mut kp = Parser::new(names_data);
273            kp.skip(key as usize)?;
274            let name = kp.strz()?.to_str_lossy();
275
276            if let Some(existing_stream) = names.get(&*name) {
277                warn!("The PDBI contains more than one stream with the same name {:?}: stream {} vs stream {}",
278                name, existing_stream, stream);
279                continue;
280            }
281
282            names.insert(name.to_string(), stream);
283        }
284
285        // Parse the "number of NameIndex" values at the end (niMac).
286        let num_name_index = p.u32()?;
287        if num_name_index != 0 {
288            warn!("The Named Streams table contains a non-zero value for the 'niMac' field. This is not supported");
289        }
290
291        Ok(Self {
292            modified: false,
293            map: names,
294        })
295    }
296
297    /// Inserts a new named stream.
298    ///
299    /// Returns `true` if the mapping was inserted.
300    ///
301    /// Returns `false` if there was already a mapping with the given name. In this case, the
302    /// named stream table is not modified.
303    pub fn insert(&mut self, name: &str, value: u32) -> bool {
304        if self.map.contains_key(name) {
305            false
306        } else {
307            self.modified = true;
308            self.map.insert(name.to_string(), value);
309            true
310        }
311    }
312
313    /// Removes all entries from the named stream map.
314    pub fn clear(&mut self) {
315        self.modified = true;
316        self.map.clear();
317    }
318
319    /// Encode this table to a byte stream
320    pub fn to_bytes(&self, e: &mut Encoder) {
321        let _span = trace_span!("NamedStreams::to_bytes").entered();
322
323        // Sort the names in the table, so that we have a deterministic order.
324        let mut sorted_names: Vec<(&String, u32)> = Vec::with_capacity(self.map.len());
325        for (name, stream) in self.map.iter() {
326            sorted_names.push((name, *stream));
327        }
328        sorted_names.sort_unstable();
329        let num_names = sorted_names.len();
330
331        // Find the size of the string data table and find the position of every string in that
332        // table. We have to do this after sorting the strings.
333        let mut strings_len: usize = 0;
334        let name_offsets: Vec<u32> = sorted_names
335            .iter()
336            .map(|(name, _)| {
337                let this_pos = strings_len;
338                strings_len += name.len() + 1;
339                this_pos as u32
340            })
341            .collect();
342
343        // Write the string data. This is prefixed by the length of the string data.
344        e.u32(strings_len as u32);
345        for &(name, _) in sorted_names.iter() {
346            e.strz(BStr::new(name));
347        }
348
349        // We are going to encode this hash table using the format defined by PDBI.  This format
350        // is a hash table that uses linear probing.  We choose a load factor of 2x, then hash all
351        // the items and place them in the table.
352        //
353        // Choose a hash size that is larger than our list of names.
354        let hash_size = if sorted_names.is_empty() {
355            10
356        } else {
357            sorted_names.len() * 2
358        };
359
360        // Find the size of the "present" and "deleted" bitmaps. These bitmaps have the same size.
361        let bitmap_size_u32s = (hash_size + 31) / 32;
362        let mut present_bitmap_bytes: Vec<u8> = vec![0; bitmap_size_u32s * 4];
363        let present_bitmap: &mut BitSlice<u8, Lsb0> =
364            BitSlice::from_slice_mut(present_bitmap_bytes.as_mut_slice());
365
366        // hash_slots contains (string_index, stream)
367        let mut hash_slots: Vec<Option<(u32, u32)>> = Vec::new();
368        hash_slots.resize_with(hash_size, Default::default);
369
370        trace!(num_names, hash_size);
371
372        // Assign all strings to hash slots.
373        for (i, &(name, stream)) in sorted_names.iter().enumerate() {
374            let name_offset = name_offsets[i];
375            let h = crate::hash::hash_mod_u16(name.as_bytes(), 0xffff_ffff) as usize % hash_size;
376            let mut slot = h;
377            loop {
378                if hash_slots[slot].is_none() {
379                    hash_slots[slot] = Some((name_offset, stream));
380                    present_bitmap.set(slot, true);
381                    trace!(
382                        assigned_name = name,
383                        hash = h,
384                        slot = slot,
385                        name_offset,
386                        stream
387                    );
388                    break;
389                }
390                slot += 1;
391                assert_ne!(
392                    slot, h,
393                    "linear probing should not wrap around to starting slot"
394                );
395                if slot == hash_slots.len() {
396                    slot = 0;
397                }
398            }
399        }
400
401        // Write the "cardinality" (number of elements in the table) field.
402        e.u32(num_names as u32);
403
404        // Write the number of hashes field.
405        e.u32(hash_size as u32);
406
407        // Write the "present" bitmap.
408        e.u32(bitmap_size_u32s as u32);
409        e.bytes(&present_bitmap_bytes);
410
411        // Write the "deleted" bitmap.
412        e.u32(bitmap_size_u32s as u32);
413        for _ in 0..bitmap_size_u32s {
414            e.u32(0);
415        }
416
417        // Write the entries from the hash table that are present.
418        for slot in hash_slots.iter() {
419            if let Some(slot) = slot {
420                e.u32(slot.0);
421                e.u32(slot.1);
422            }
423        }
424
425        // Write the "number of NameIndex values" (niMac).
426        e.u32(0);
427    }
428}
429
430/// A feature code is a `u32` value that indicates that an optional feature is enabled for a given PDB.
431#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash, Ord, PartialOrd)]
432pub struct FeatureCode(pub u32);
433
434impl FeatureCode {
435    /// Indicates that this PDB is a "mini PDB", produced by using the `/DEBUG:FASTLINK` parameter.
436    ///
437    /// See: <https://learn.microsoft.com/en-us/cpp/build/reference/debug-generate-debug-info?view=msvc-170>
438    pub const MINI_PDB: FeatureCode = FeatureCode(0x494E494D);
439}