ms_pdb/
globals.rs

1//! Global Symbols
2//!
3//! This module contains code for reading the public / global symbol streams. This is a
4//! moderately-complicated set of data structures, and requires reading several streams and
5//! correlating data between them.
6//!
7//! Global symbols are stored in several streams. The stream indexes are stored in the DBI
8//! stream header; the stream indexes are not fixed.
9
10pub mod gsi;
11pub mod gss;
12pub mod name_table;
13pub mod psi;
14
15#[cfg(test)]
16mod tests;
17
18use crate::parser::{Parse, ParserError};
19use crate::syms::{self, Constant, OffsetSegment, Pub, SymIter, SymKind};
20use crate::utils::iter::IteratorWithRangesExt;
21use crate::ReadAt;
22use anyhow::Context;
23use bstr::BStr;
24use std::collections::HashMap;
25use tracing::{debug, warn};
26
27#[cfg(doc)]
28use crate::dbi::DbiStreamHeader;
29
30impl<F: ReadAt> crate::Pdb<F> {
31    /// Reads the Global Symbol Stream (GSS). This stream contains global symbol records.
32    ///
33    /// This function does not validate the contents of the stream.
34    pub fn read_gss(&self) -> anyhow::Result<gss::GlobalSymbolStream> {
35        if let Some(gss_stream) = self.dbi_header.global_symbol_stream.get() {
36            let stream_data = self.read_stream_to_vec(gss_stream)?;
37            Ok(gss::GlobalSymbolStream { stream_data })
38        } else {
39            Ok(gss::GlobalSymbolStream::empty())
40        }
41    }
42
43    /// Reads the Global Symbol Index (GSI). This stream contains a name-to-symbol lookup table.
44    /// It indexes many global symbols, such as `S_GPROCREF`, `S_CONSTANT`, etc.
45    pub fn read_gsi(&self) -> anyhow::Result<gsi::GlobalSymbolIndex> {
46        if let Some(gsi_stream) = self.dbi_header.global_symbol_index_stream.get() {
47            let num_buckets = self.num_buckets_for_name_table();
48            let gsi_stream_data = self.read_stream_to_vec(gsi_stream)?;
49            gsi::GlobalSymbolIndex::parse(num_buckets, gsi_stream_data)
50        } else {
51            Ok(gsi::GlobalSymbolIndex::empty())
52        }
53    }
54
55    /// Returns the number of buckets to use in `NameTable`, for use by the GSI and PSI.
56    pub(crate) fn num_buckets_for_name_table(&self) -> usize {
57        let minimal_dbg_info = self.mini_pdb();
58        name_table::get_v1_default_bucket(minimal_dbg_info)
59    }
60
61    /// Reads the Public Symbol Index.
62    pub fn read_psi(&self) -> anyhow::Result<psi::PublicSymbolIndex> {
63        if let Ok(psi_stream) = self.dbi_header.public_stream_index() {
64            let num_buckets = self.num_buckets_for_name_table();
65            let public_stream_data = self.read_stream_to_vec(psi_stream)?;
66            psi::PublicSymbolIndex::parse(num_buckets, public_stream_data)
67        } else {
68            Ok(psi::PublicSymbolIndex::empty())
69        }
70    }
71}
72
73/// If `kind` is a global symbol that should be indexed in the GSI or PSI, then this returns the
74/// name of that global symbol (within `Some`).
75///
76/// A "global symbol" in this context is any symbol that can appear in the Global Symbol Stream
77/// and be indexed in the Global Symbol Index or Public Symbol Index. The list of global symbols:
78///
79/// * `S_PUB32`
80/// * `S_CONSTANT`
81/// * `S_PROCREF`
82/// * `S_LPROCREF`
83/// * `S_DATAREF`
84/// * `S_ANNOTATIONREF`
85/// * `S_UDT`
86/// * `S_LDATA32`
87/// * `S_GDATA32`
88/// * `S_LTHREAD32`
89/// * `S_GTHREAD32`
90pub fn get_global_symbol_name(kind: SymKind, data: &[u8]) -> Result<Option<&BStr>, ParserError> {
91    match kind {
92        SymKind::S_PUB32 => {
93            let pub_data = Pub::parse(data)?;
94            Ok(Some(pub_data.name))
95        }
96
97        SymKind::S_CONSTANT => {
98            let constant_record = Constant::parse(data)?;
99            Ok(Some(constant_record.name))
100        }
101
102        // These symbols have the same structure.
103        SymKind::S_PROCREF
104        | SymKind::S_LPROCREF
105        | SymKind::S_DATAREF
106        | SymKind::S_ANNOTATIONREF => {
107            let ref_sym = syms::RefSym2::parse(data)?;
108            Ok(Some(ref_sym.name))
109        }
110
111        SymKind::S_UDT => {
112            let udt_data = syms::Udt::parse(data)?;
113            Ok(Some(udt_data.name))
114        }
115
116        SymKind::S_LDATA32 | SymKind::S_GDATA32 | SymKind::S_LMANDATA | SymKind::S_GMANDATA => {
117            let data = syms::Data::parse(data)?;
118            Ok(Some(data.name))
119        }
120
121        SymKind::S_LTHREAD32 | SymKind::S_GTHREAD32 => {
122            let thread_storage = syms::ThreadStorageData::parse(data)?;
123            Ok(Some(thread_storage.name))
124        }
125
126        SymKind::S_LMANPROC | SymKind::S_GMANPROC => {
127            let man_proc = syms::ManProcSym::parse(data)?;
128            Ok(Some(man_proc.name))
129        }
130
131        // TODO
132        SymKind::S_TOKENREF => Ok(None),
133
134        _ => Ok(None),
135    }
136}
137
138/// Output of `build_global_symbols_index`
139pub struct BuildGlobalSymbolsIndexesOutput {
140    /// The new GSI contents
141    pub global_symbol_index_stream_data: Vec<u8>,
142    /// The new PSI contents
143    pub public_symbol_index_stream_data: Vec<u8>,
144}
145
146/// Reads a Global Symbol Stream and constructs a new Global Symbol Index (GSI) and
147/// Public Symbol Index (PSI).
148pub fn build_global_symbols_index(
149    symbol_records: &[u8],
150    num_buckets: usize,
151) -> anyhow::Result<BuildGlobalSymbolsIndexesOutput> {
152    debug!("Rebuilding Global Symbol Index (GSI) and Public Symbol Index (PSI)");
153
154    let mut public_hash_records = name_table::NameTableBuilder::new(num_buckets);
155    let mut global_hash_records = name_table::NameTableBuilder::new(num_buckets);
156
157    // contains (byte offset in symbol stream, SegmentOffset)
158    let mut public_addr_map: Vec<(u32, OffsetSegment)> = Vec::new();
159
160    let mut unrecognized_symbols: HashMap<SymKind, u32> = HashMap::new();
161
162    for (sym_range, sym) in SymIter::new(symbol_records).with_ranges() {
163        let sym_offset = sym_range.start;
164
165        // If the symbol is S_PUB32, then add an entry to both public_hash_records and
166        // global_hash_records.
167        if sym.kind == SymKind::S_PUB32 {
168            let pub_data =
169                Pub::parse(sym.data).with_context(|| "failed to parse S_PUB32 record")?;
170            public_hash_records.push(pub_data.name, (sym_offset + 1) as i32);
171            public_addr_map.push((sym_offset as u32, pub_data.offset_segment()));
172            continue;
173        }
174
175        if matches!(sym.kind, SymKind::S_TOKENREF | SymKind::S_DATAREF) {
176            continue;
177        }
178
179        if let Some(sym_name) = get_global_symbol_name(sym.kind, sym.data)? {
180            global_hash_records.push(sym_name, (sym_offset + 1) as i32);
181        } else {
182            *unrecognized_symbols.entry(sym.kind).or_default() += 1;
183        }
184    }
185
186    if !unrecognized_symbols.is_empty() {
187        warn!(
188            "Number of unrecognized symbol types found in Global Symbol Stream: {}",
189            unrecognized_symbols.len()
190        );
191        let mut sorted_unrecognized: Vec<(SymKind, u32)> =
192            unrecognized_symbols.iter().map(|(&k, &v)| (k, v)).collect();
193        sorted_unrecognized.sort_unstable_by_key(|(k, _)| *k);
194        for (kind, count) in sorted_unrecognized.iter() {
195            warn!(
196                "    {count:6} - [{raw_kind:04x}] {kind:?}",
197                raw_kind = kind.0
198            );
199        }
200    }
201
202    psi::sort_address_records(&mut public_addr_map);
203
204    debug!("Building Global Symbol Index (GSI)");
205    let global_symbol_stream_data = gsi::build_gsi(&mut global_hash_records);
206
207    debug!("Building Public Symbol Index (PSI)");
208    let public_symbol_stream_data = psi::build_psi(&mut public_hash_records, &public_addr_map);
209
210    Ok(BuildGlobalSymbolsIndexesOutput {
211        global_symbol_index_stream_data: global_symbol_stream_data,
212        public_symbol_index_stream_data: public_symbol_stream_data,
213    })
214}