ms_pdb/
pdbi.rs

1//! PDB Info Stream (aka the PDB Stream)
2//!
3//! # References
4//! * <https://llvm.org/docs/PDB/PdbStream.html>
5
6#[cfg(test)]
7mod tests;
8
9use std::collections::BTreeMap;
10
11use super::*;
12use crate::guid::GuidLe;
13use anyhow::bail;
14use bitvec::prelude::{BitSlice, Lsb0};
15use bstr::ByteSlice;
16use ms_codeview::encoder::Encoder;
17use ms_codeview::parser::Parser;
18use tracing::{trace, trace_span, warn};
19use uuid::Uuid;
20use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, LE, U32, Unaligned};
21
22/// Contains the PDB Information Stream.
23///
24/// This implementation reads all of the data from the PDBI Stream and converts it to in-memory
25/// data structures. This is not typical for most of the data within the PDB. We do this because
26/// the PDBI is fairly small, is needed for reading most PDBs, and will often need to be edited
27/// for generating or rebuilding PDBs.
28#[allow(missing_docs)]
29#[derive(Clone)]
30pub struct PdbiStream {
31    pub signature: u32,
32    pub version: u32,
33    pub age: u32,
34    pub unique_id: Option<Uuid>,
35    pub named_streams: NamedStreams,
36    pub features: Vec<FeatureCode>,
37}
38
39impl PdbiStream {
40    /// Parses the stream.
41    pub fn parse(stream_data: &[u8]) -> anyhow::Result<Self> {
42        let mut p = Parser::new(stream_data);
43
44        let header: &PdbiStreamHeader = p.get()?;
45        let version = header.version.get();
46
47        // Older PDBs (pre-VC7, i.e. before 2000) do not contain a GUID.
48        let unique_id = if pdbi_has_unique_id(version) {
49            // Check that the stream data is large enough to contain the unique ID.
50            // We use slices, below, relying on bounds checking here.
51            Some(p.get::<GuidLe>()?.get())
52        } else {
53            None
54        };
55
56        let named_streams = NamedStreams::parse(&mut p)?;
57
58        // The last part of the PDBI stream is a list of "features". Features are u32 values, and
59        // the feature values are defined as constants. If a feature is present in this list, then
60        // that feature is enabled.
61        let mut features: Vec<FeatureCode> = Vec::with_capacity(p.len() / 4);
62        while p.len() >= 4 {
63            let feature = FeatureCode(p.u32()?);
64            features.push(feature);
65        }
66
67        Ok(Self {
68            signature: header.signature.get(),
69            version,
70            age: header.age.get(),
71            unique_id,
72            named_streams,
73            features,
74        })
75    }
76
77    /// Serializes this to a stream.
78    pub fn to_bytes(&self) -> anyhow::Result<Vec<u8>> {
79        let mut out = Vec::new();
80
81        let mut e = Encoder::new(&mut out);
82
83        let header = PdbiStreamHeader {
84            signature: U32::new(self.signature),
85            version: U32::new(self.version),
86            age: U32::new(self.age),
87        };
88
89        e.t(&header);
90        if pdbi_has_unique_id(self.version) {
91            if let Some(unique_id) = &self.unique_id {
92                e.uuid(unique_id);
93            } else {
94                bail!("The PDBI version requires a unique ID, but none has been provided.");
95            }
96        } else if self.unique_id.is_some() {
97            warn!(
98                "PDBI version is too old to have a unique ID, but this PdbiStream has a unique ID. It will be ignored."
99            );
100        }
101
102        self.named_streams.to_bytes(&mut e);
103
104        // Write the features.
105        for &feature in self.features.iter() {
106            e.u32(feature.0);
107        }
108
109        Ok(out)
110    }
111
112    /// Gets the 'age' value of the PDB. This links the PDB with the executable; a PDB must have
113    /// the same age as its related executable.
114    pub fn age(&self) -> u32 {
115        self.age
116    }
117
118    /// Version from the PDBI header, e.g. [`PDBI_VERSION_VC110`].
119    pub fn version(&self) -> u32 {
120        self.version
121    }
122
123    /// The binding key that associates this PDB with a given PE executable.
124    pub fn binding_key(&self) -> BindingKey {
125        BindingKey {
126            guid: self.unique_id.unwrap_or(Uuid::nil()),
127            age: self.age,
128        }
129    }
130
131    /// Provides access to the named streams table.
132    pub fn named_streams(&self) -> &NamedStreams {
133        &self.named_streams
134    }
135
136    /// Provides mutable access to the named streams table.
137    pub fn named_streams_mut(&mut self) -> &mut NamedStreams {
138        &mut self.named_streams
139    }
140
141    /// Checks whether this PDB has a given feature enabled.
142    pub fn has_feature(&self, feature_code: FeatureCode) -> bool {
143        self.features.contains(&feature_code)
144    }
145}
146
147#[allow(missing_docs)]
148pub const PDBI_VERSION_VC2: u32 = 19941610;
149#[allow(missing_docs)]
150pub const PDBI_VERSION_VC4: u32 = 19950623;
151#[allow(missing_docs)]
152pub const PDBI_VERSION_VC41: u32 = 19950814;
153#[allow(missing_docs)]
154pub const PDBI_VERSION_VC50: u32 = 19960307;
155#[allow(missing_docs)]
156pub const PDBI_VERSION_VC98: u32 = 19970604;
157#[allow(missing_docs)]
158pub const PDBI_VERSION_VC70_DEPRECATED: u32 = 19990604; // deprecated vc70 implementation version
159#[allow(missing_docs)]
160pub const PDBI_VERSION_VC70: u32 = 20000404; // <-- first version that has unique id
161#[allow(missing_docs)]
162pub const PDBI_VERSION_VC80: u32 = 20030901;
163#[allow(missing_docs)]
164pub const PDBI_VERSION_VC110: u32 = 20091201;
165#[allow(missing_docs)]
166pub const PDBI_VERSION_VC140: u32 = 20140508;
167
168fn pdbi_has_unique_id(version: u32) -> bool {
169    version > PDBI_VERSION_VC70_DEPRECATED
170}
171
172/// The header of the PDB Info stream.
173#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
174#[repr(C)]
175#[allow(missing_docs)]
176pub struct PdbiStreamHeader {
177    pub version: U32<LE>,
178    pub signature: U32<LE>,
179    pub age: U32<LE>,
180    // This is only present if the version number is higher than impvVC70Dep.
181    // pub unique_id: GuidLe,
182}
183
184#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
185#[repr(C)]
186#[allow(missing_docs)]
187pub struct HashTableHeader {
188    pub size: U32<LE>,
189    pub capacity: U32<LE>,
190    // present bit vector
191    // deleted bit vector
192    // (key, value) pairs
193}
194
195#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Debug)]
196#[repr(C)]
197#[allow(missing_docs)]
198pub struct HashEntry {
199    pub key: U32<LE>,
200    pub value: U32<LE>,
201}
202
203/// Provides access to the Named Streams Table.
204#[derive(Default, Clone)]
205pub struct NamedStreams {
206    /// If true, the named streams set has been modified since it was loaded.
207    pub(crate) modified: bool,
208
209    /// Stores the mapping.
210    ///
211    /// We use `BTreeMap` so that the names are ordered.
212    map: BTreeMap<String, u32>,
213}
214
215impl NamedStreams {
216    /// Iterates the named streams.
217    pub fn iter(&self) -> impl Iterator<Item = (&String, &u32)> {
218        self.map.iter()
219    }
220
221    /// Searches the list of named strings for `name`. If found, returns the stream index.
222    ///
223    /// This does _not_ use a hash function. It just sequentially searches.
224    /// This uses a case-sensitive comparison.
225    pub fn get(&self, name: &str) -> Option<u32> {
226        self.map.get(name).copied()
227    }
228
229    /// Searches the list of named strings for `name`. If found, returns the stream index.
230    /// If not found, returns a descriptive error.
231    ///
232    /// This does _not_ use a hash function. It just sequentially searches.
233    /// This uses a case-sensitive comparison.
234    pub fn get_err(&self, name: &str) -> anyhow::Result<u32> {
235        if let Some(&stream) = self.map.get(name) {
236            Ok(stream)
237        } else {
238            bail!("Failed to find a named stream {:?}", name);
239        }
240    }
241
242    /// Parses a `NamedStreams` table.
243    pub fn parse(p: &mut Parser) -> anyhow::Result<Self> {
244        let names_size = p.u32()?;
245        let names_data = p.bytes(names_size as usize)?;
246
247        // This is the "cdr" (cardinality) field in pdb.cpp.
248        let name_count = p.u32()?;
249        let _name_hash_size = p.u32()?;
250
251        let present_u32_count = p.u32()?;
252        let present_mask = p.bytes(present_u32_count as usize * 4)?;
253        let present_num_items: u32 = present_mask.iter().map(|&b| b.count_ones()).sum();
254
255        let deleted_u32_count = p.u32()?;
256        let deleted_mask = p.bytes(deleted_u32_count as usize * 4)?;
257        let _deleted_num_items: u32 = deleted_mask.iter().map(|&b| b.count_ones()).sum();
258
259        if present_num_items != name_count {
260            bail!(
261                "The PDBI name table contains inconsistent values.  Name count is {}, but present bitmap count is {}.",
262                name_count,
263                present_num_items
264            );
265        }
266
267        let items: &[HashEntry] = p.slice(name_count as usize)?;
268
269        let mut names: BTreeMap<String, u32> = BTreeMap::new();
270
271        for item in items.iter() {
272            let key = item.key.get();
273            let stream = item.value.get();
274            // Key is a byte offset into names_data.
275            // Value is a stream index.
276
277            let mut kp = Parser::new(names_data);
278            kp.skip(key as usize)?;
279            let name = kp.strz()?.to_str_lossy();
280
281            if let Some(existing_stream) = names.get(&*name) {
282                warn!(
283                    "The PDBI contains more than one stream with the same name {:?}: stream {} vs stream {}",
284                    name, existing_stream, stream
285                );
286                continue;
287            }
288
289            names.insert(name.to_string(), stream);
290        }
291
292        // Parse the "number of NameIndex" values at the end (niMac).
293        let num_name_index = p.u32()?;
294        if num_name_index != 0 {
295            warn!(
296                "The Named Streams table contains a non-zero value for the 'niMac' field. This is not supported"
297            );
298        }
299
300        Ok(Self {
301            modified: false,
302            map: names,
303        })
304    }
305
306    /// Inserts a new named stream.
307    ///
308    /// Returns `true` if the mapping was inserted.
309    ///
310    /// Returns `false` if there was already a mapping with the given name. In this case, the
311    /// named stream table is not modified.
312    pub fn insert(&mut self, name: &str, value: u32) -> bool {
313        if self.map.contains_key(name) {
314            false
315        } else {
316            self.modified = true;
317            self.map.insert(name.to_string(), value);
318            true
319        }
320    }
321
322    /// Removes all entries from the named stream map.
323    pub fn clear(&mut self) {
324        self.modified = true;
325        self.map.clear();
326    }
327
328    /// Encode this table to a byte stream
329    pub fn to_bytes(&self, e: &mut Encoder) {
330        let _span = trace_span!("NamedStreams::to_bytes").entered();
331
332        // Sort the names in the table, so that we have a deterministic order.
333        let mut sorted_names: Vec<(&String, u32)> = Vec::with_capacity(self.map.len());
334        for (name, stream) in self.map.iter() {
335            sorted_names.push((name, *stream));
336        }
337        sorted_names.sort_unstable();
338        let num_names = sorted_names.len();
339
340        // Find the size of the string data table and find the position of every string in that
341        // table. We have to do this after sorting the strings.
342        let mut strings_len: usize = 0;
343        let name_offsets: Vec<u32> = sorted_names
344            .iter()
345            .map(|(name, _)| {
346                let this_pos = strings_len;
347                strings_len += name.len() + 1;
348                this_pos as u32
349            })
350            .collect();
351
352        // Write the string data. This is prefixed by the length of the string data.
353        e.u32(strings_len as u32);
354        for &(name, _) in sorted_names.iter() {
355            e.strz(BStr::new(name));
356        }
357
358        // We are going to encode this hash table using the format defined by PDBI.  This format
359        // is a hash table that uses linear probing.  We choose a load factor of 2x, then hash all
360        // the items and place them in the table.
361        //
362        // Choose a hash size that is larger than our list of names.
363        let hash_size = if sorted_names.is_empty() {
364            10
365        } else {
366            sorted_names.len() * 2
367        };
368
369        // Find the size of the "present" and "deleted" bitmaps. These bitmaps have the same size.
370        let bitmap_size_u32s = hash_size.div_ceil(32);
371        let mut present_bitmap_bytes: Vec<u8> = vec![0; bitmap_size_u32s * 4];
372        let present_bitmap: &mut BitSlice<u8, Lsb0> =
373            BitSlice::from_slice_mut(present_bitmap_bytes.as_mut_slice());
374
375        // hash_slots contains (string_index, stream)
376        let mut hash_slots: Vec<Option<(u32, u32)>> = Vec::new();
377        hash_slots.resize_with(hash_size, Default::default);
378
379        trace!(num_names, hash_size);
380
381        // Assign all strings to hash slots.
382        for (i, &(name, stream)) in sorted_names.iter().enumerate() {
383            let name_offset = name_offsets[i];
384            let h = crate::hash::hash_mod_u16(name.as_bytes(), 0xffff_ffff) as usize % hash_size;
385            let mut slot = h;
386            loop {
387                if hash_slots[slot].is_none() {
388                    hash_slots[slot] = Some((name_offset, stream));
389                    present_bitmap.set(slot, true);
390                    trace!(
391                        assigned_name = name,
392                        hash = h,
393                        slot = slot,
394                        name_offset,
395                        stream
396                    );
397                    break;
398                }
399                slot += 1;
400                assert_ne!(
401                    slot, h,
402                    "linear probing should not wrap around to starting slot"
403                );
404                if slot == hash_slots.len() {
405                    slot = 0;
406                }
407            }
408        }
409
410        // Write the "cardinality" (number of elements in the table) field.
411        e.u32(num_names as u32);
412
413        // Write the number of hashes field.
414        e.u32(hash_size as u32);
415
416        // Write the "present" bitmap.
417        e.u32(bitmap_size_u32s as u32);
418        e.bytes(&present_bitmap_bytes);
419
420        // Write the "deleted" bitmap.
421        e.u32(bitmap_size_u32s as u32);
422        for _ in 0..bitmap_size_u32s {
423            e.u32(0);
424        }
425
426        // Write the entries from the hash table that are present.
427        for slot in hash_slots.iter() {
428            if let Some(slot) = slot {
429                e.u32(slot.0);
430                e.u32(slot.1);
431            }
432        }
433
434        // Write the "number of NameIndex values" (niMac).
435        e.u32(0);
436    }
437}
438
439/// A feature code is a `u32` value that indicates that an optional feature is enabled for a given PDB.
440#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash, Ord, PartialOrd)]
441pub struct FeatureCode(pub u32);
442
443impl FeatureCode {
444    /// Indicates that this PDB is a "mini PDB", produced by using the `/DEBUG:FASTLINK` parameter.
445    ///
446    /// See: <https://learn.microsoft.com/en-us/cpp/build/reference/debug-generate-debug-info?view=msvc-170>
447    pub const MINI_PDB: FeatureCode = FeatureCode(0x494E494D);
448}