Skip to main content

ms_pdb/globals/
psi.rs

1//! Public Symbol Index
2//!
3//! The Public Symbol Index (PSI) provides several look-up tables that accelerate finding
4//! information in the Global Symbol Stream. The PSI indexes only `S_PUB32` symbols in the GSS; all
5//! other symbol kinds are indexed in the GSI.
6//!
7//! The PSI does not have a fixed stream number. The DBI Stream Header contains the stream number
8//! of the PSI.
9
10use super::gss::*;
11use super::name_table::*;
12use crate::syms::{OffsetSegment, Pub};
13use crate::utils::is_aligned_4;
14use anyhow::{Context, bail};
15use bstr::BStr;
16use ms_codeview::parser::{Parse, Parser, ParserMut};
17use std::mem::size_of;
18use tracing::{debug, error, info};
19use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, LE, U16, U32, Unaligned};
20
21/// The header of the GSI stream.
22///
23/// See `PSGSIHDR` in `microsoft-pdb/PDB/dbi/gsi.h`.
24#[repr(C)]
25#[derive(IntoBytes, FromBytes, Unaligned, Immutable, KnownLayout, Clone, Debug)]
26#[allow(missing_docs)]
27pub struct PsiStreamHeader {
28    /// Length in bytes of the symbol hash table.  This region immediately follows PSGSIHDR.
29    pub name_table_size: U32<LE>,
30
31    /// Length in bytes of the address map.  This region immediately follows the symbol hash.
32    pub addr_table_size: U32<LE>,
33
34    /// The number of thunk records.
35    pub num_thunks: U32<LE>,
36    /// Size in bytes of each thunk record.
37    pub thunk_size: U32<LE>,
38    pub thunk_table_section: U16<LE>,
39    pub padding: U16<LE>,
40    pub thunk_table_offset: U32<LE>,
41    pub num_sections: U32<LE>,
42}
43static_assertions::const_assert_eq!(core::mem::size_of::<PsiStreamHeader>(), 28);
44
45/// Contains the Public Symbol Index
46///
47/// The Public Symbol Index (PSI) contains a name-to-symbol lookup table and an address-to-symbol
48/// lookup table.
49pub struct PublicSymbolIndex {
50    /// Allows name-to-symbol look for `S_PUB32` symbols.
51    name_table: NameTable,
52
53    /// Each entry in this table is a byte offset of one `S_PUB32` symbol in the GSS.
54    /// All of the values are sorted by `(segment, offset)`, which allows binary search.
55    addr_map: Vec<u32>,
56}
57
58impl PublicSymbolIndex {
59    /// Parses the PSI from stream data. The caller must specify `num_buckets` because that value
60    /// is not stored within the stream.
61    pub fn parse(num_buckets: usize, public_stream_data: Vec<u8>) -> anyhow::Result<Self> {
62        let mut p = Parser::new(&public_stream_data);
63        if p.is_empty() {
64            return Ok(Self::empty());
65        }
66
67        let psgsi_header: &PsiStreamHeader = p.get()?;
68        debug!("PsiStreamHeader: {:#?}", psgsi_header);
69
70        let sym_hash_size = psgsi_header.name_table_size.get() as usize;
71        let addr_map_size = psgsi_header.addr_table_size.get() as usize;
72
73        debug!("Size of symbol hash table: {} bytes", sym_hash_size);
74        debug!("Size of address map: {} bytes", addr_map_size);
75
76        let sym_hash_bytes = p
77            .bytes(sym_hash_size)
78            .with_context(|| "Failed to locate symbol hash table within Publics stream")?;
79        let addr_map_bytes = p
80            .bytes(addr_map_size)
81            .with_context(|| "Failed to locate address map within Publics stream")?;
82
83        let name_table =
84            NameTable::parse(num_buckets, size_of::<PsiStreamHeader>(), sym_hash_bytes)?;
85
86        // Load the address map. The address map is an array of u32 values, each of which is an
87        // offset into the global symbol stream. I'm _guessing_ that the array is sorted by
88        // [segment:offset].
89        let addr_map: Vec<u32>;
90        {
91            let num_addrs = addr_map_bytes.len() / 4;
92            info!("Number of entries in address map: {}", num_addrs);
93
94            let mut addr_parser = Parser::new(addr_map_bytes);
95            let addr_map_u32_slice: &[U32<LE>] = addr_parser.slice(num_addrs)?;
96
97            addr_map = addr_map_u32_slice.iter().map(|i| i.get()).collect();
98        }
99
100        Ok(PublicSymbolIndex {
101            name_table,
102            addr_map,
103        })
104    }
105
106    /// Constructs an empty instance of the PSI.
107    pub fn empty() -> Self {
108        Self {
109            addr_map: vec![],
110            name_table: NameTable::empty(),
111        }
112    }
113
114    /// Check invariants for the PSI. This requires having access to the GSS, since the PSI
115    /// points into the GSS.
116    pub fn check_consistency(&self, gss: &GlobalSymbolStream) -> anyhow::Result<()> {
117        // Verify that all entries in the address map are in non-decreasing order.
118        let mut prev_sym: Option<Pub<'_>> = None;
119        let mut num_bad_order: u32 = 0;
120        for &offset in self.addr_map.iter() {
121            let sym = gss.get_pub32_at(offset)?;
122            if let Some(prev_sym) = &prev_sym {
123                if prev_sym.offset_segment() > sym.offset_segment() {
124                    if num_bad_order < 20 {
125                        error!("found addr map in bad order");
126                    }
127                    num_bad_order += 1;
128                }
129            }
130            prev_sym = Some(sym);
131        }
132
133        if num_bad_order != 0 {
134            bail!(
135                "Found {} address map entries that were out of order.",
136                num_bad_order
137            );
138        }
139        info!("All address map entries are correctly ordered.");
140
141        Ok(())
142    }
143
144    /// Gets direct access to the name-to-symbol table.
145    pub fn names(&self) -> &NameTable {
146        &self.name_table
147    }
148
149    /// Searches for an `S_PUB32` symbol by name.
150    pub fn find_symbol_by_name<'a>(
151        &self,
152        gss: &'a GlobalSymbolStream,
153        name: &BStr,
154    ) -> anyhow::Result<Option<Pub<'a>>> {
155        if let Some(sym) = self.name_table.find_symbol(gss, name)? {
156            Ok(Some(Pub::parse(sym.data)?))
157        } else {
158            Ok(None)
159        }
160    }
161
162    /// Searches for an `S_PUB32` symbol by address.
163    pub fn find_symbol_by_addr<'a>(
164        &self,
165        gss: &'a GlobalSymbolStream,
166        segment: u16,
167        offset: u32,
168    ) -> anyhow::Result<Option<(Pub<'a>, u32)>> {
169        use std::cmp::Ordering;
170
171        let addr_map = self.addr_map.as_slice();
172
173        let mut items = addr_map;
174        while !items.is_empty() {
175            let mid_index = items.len() / 2;
176            let mid_rec = gss.get_pub32_at(items[mid_index])?;
177            let mid_segment = mid_rec.fixed.offset_segment.segment();
178            let mid_offset = mid_rec.fixed.offset_segment.offset();
179
180            match segment.cmp(&mid_segment) {
181                Ordering::Less => {
182                    // info!("segment is less, moving low");
183                    items = &items[..mid_index];
184                    continue;
185                }
186                Ordering::Greater => {
187                    // info!("segment is greater, moving high");
188                    items = &items[mid_index + 1..];
189                    continue;
190                }
191                Ordering::Equal => {}
192            }
193
194            // Same segment. Compare the offsets.
195
196            if offset < mid_offset {
197                items = &items[..mid_index];
198                continue;
199            }
200
201            if offset == mid_offset {
202                // Bullseye!
203                return Ok(Some((mid_rec, 0)));
204            }
205
206            // The address we are looking for is higher than the address of the symbol that we are
207            // currently looking at.
208            // TODO: Implement best-so-far search.
209            items = &items[mid_index + 1..];
210            continue;
211        }
212
213        Ok(None)
214    }
215}
216
217/// Sorts an address map slice.
218#[inline(never)]
219pub fn sort_address_records(addr_map: &mut [(u32, OffsetSegment)]) {
220    addr_map.sort_unstable_by_key(|(record_offset, os)| (*os, *record_offset));
221}
222
223/// Builds the Public Symbol Index (PSI).
224///
225/// The PSI contains both a name-to-symbol table and an address-to-symbol table.
226pub fn build_psi(
227    sorted_hash_records: &mut NameTableBuilder,
228    sorted_addr_map: &[(u32, OffsetSegment)],
229) -> Vec<u8> {
230    assert_eq!(sorted_hash_records.num_names(), sorted_addr_map.len());
231
232    debug!(
233        "Number of entries in address table: {n} 0x{n:x}",
234        n = sorted_addr_map.len()
235    );
236    debug!(
237        "Size in bytes of address table: {s} 0x{s:x}",
238        s = sorted_addr_map.len() * 4
239    );
240
241    let name_table_info = sorted_hash_records.prepare();
242    let addr_map_size_bytes = sorted_addr_map.len() * size_of::<i32>();
243
244    let stream_size_bytes =
245        size_of::<PsiStreamHeader>() + name_table_info.table_size_bytes + addr_map_size_bytes;
246
247    let mut stream_data: Vec<u8> = vec![0; stream_size_bytes];
248    let mut p = ParserMut::new(&mut stream_data);
249
250    let stream_header = PsiStreamHeader {
251        name_table_size: U32::new(name_table_info.table_size_bytes as u32),
252        addr_table_size: U32::new(addr_map_size_bytes as u32),
253        num_thunks: U32::new(0), // TODO
254        thunk_size: U32::new(0), // TODO
255        padding: U16::new(0),
256        thunk_table_section: U16::new(0), // TODO
257        thunk_table_offset: U32::new(0),  // TODO
258        num_sections: U32::new(0),        // TODO
259    };
260    *p.get_mut::<PsiStreamHeader>().unwrap() = stream_header;
261
262    let name_table_bytes = p.bytes_mut(name_table_info.table_size_bytes).unwrap();
263    sorted_hash_records.encode(&name_table_info, name_table_bytes);
264
265    let addr_map_bytes = p.bytes_mut(addr_map_size_bytes).unwrap();
266    let addr_map_output = <[U32<LE>]>::mut_from_bytes(addr_map_bytes).unwrap();
267    // Write the address map. This converts from the array that we used for sorting, which contains
268    // the symbol record byte offset and the segment:offset, to just the symbol record byte offset.
269    {
270        for (from, to) in sorted_addr_map.iter().zip(addr_map_output.iter_mut()) {
271            *to = U32::new(from.0);
272        }
273    }
274
275    assert!(p.is_empty());
276
277    // Make it easy to understand the output.
278    {
279        let mut pos = 0;
280        let mut region = |name: &str, len: usize| {
281            debug!("    {pos:08x} +{len:08x} : {name}");
282            pos += len;
283        };
284        debug!("PSI Stream layout:");
285        region("PSI Stream Header", size_of::<PsiStreamHeader>());
286        region("Name Table - Header", size_of::<NameTableHeader>());
287        region(
288            "Name Table - Hash Records",
289            sorted_hash_records.num_names() * size_of::<HashRecord>(),
290        );
291        region(
292            "Name Table - Buckets Bitmap",
293            nonempty_bitmap_size_bytes(sorted_hash_records.num_buckets()),
294        );
295        region(
296            "Name Table - Buckets",
297            name_table_info.num_nonempty_buckets * 4,
298        );
299        region("Address Table", addr_map_size_bytes);
300        region("(end)", 0);
301        assert_eq!(pos, stream_data.len());
302    }
303
304    assert!(is_aligned_4(stream_data.len()));
305
306    stream_data
307}