ms_pdb/globals/
name_table.rs

1//! Name-to-Symbol Lookup Table
2
3#[cfg(test)]
4mod tests;
5
6use super::gss::GlobalSymbolStream;
7use crate::parser::{Parser, ParserMut};
8use crate::syms::Sym;
9use anyhow::bail;
10use bitvec::prelude::{BitSlice, Lsb0};
11use bstr::BStr;
12use std::mem::size_of;
13use tracing::{debug, debug_span, error, info, trace, warn};
14use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, I32, LE, U32};
15
16/// This is the size used for calculating hash indices. It was the size of the in-memory form
17/// of the hash records, on 32-bit machines. It is not the same as the length of the hash records
18/// that are stored on disk (which is 8).
19pub const HASH_RECORD_CALC_LEN: i32 = 12;
20
21// section contribution structure
22/// section contribution version, before V70 there was no section version
23pub const GSI_HASH_SC_IMPV_V70: u32 = 0xeffe0000 + 19990810;
24
25/// Signature value for `NameTableHeader::ver_signature`.
26pub const GSI_HASH_HEADER_SIGNATURE: u32 = 0xffff_ffff;
27/// The current value to use (implementation version) for the GSI Hash.
28pub const GSI_HASH_HEADER_VERSION: u32 = GSI_HASH_SC_IMPV_V70;
29
30/// This header is present at the start of the Name Table.
31///
32/// Immediately after this header follows the hash records, whose length is given by
33/// `cb_hash_records`. This section contains an array of [`HashRecord`] structs.
34///
35/// Immediately after the hash records is the hash buckets, whose length is given by `cb_buckets`.
36///
37/// Called `GSIHashHdr` in C++.
38#[derive(IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable, Debug)]
39#[repr(C)]
40pub struct NameTableHeader {
41    /// Constant which identifies this as a `NameTableHeader`. This value (0xffff_ffff) was chosen
42    /// because the previous version of the `NameTable` did not use this header, and this
43    /// signature value would never have occurred in the same position in the previous version.
44    pub signature: U32<LE>,
45
46    /// Version of the name hash table.
47    pub version: U32<LE>,
48
49    /// Size in bytes of the hash records. This should always be a multiple of
50    /// `size_of::<HashRecord>()`, not `HASH_RECORD_CALC_LEN`.
51    pub hash_records_size: U32<LE>,
52
53    /// Size in bytes of the hash buckets.
54    pub buckets_size: U32<LE>,
55}
56
57/// An entry in the GSI hash table. This is in the `cb_hash_records` region.
58#[derive(IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable, Debug, Clone)]
59#[repr(C)]
60pub struct HashRecord {
61    /// The byte offset of a symbol record within the Global Symbol Stream, plus 1.
62    /// It should point to the beginning of a global symbol record, e.g. `S_PUB32`.
63    ///
64    /// The value should never be negative or zero.
65    pub offset: I32<LE>,
66
67    /// This appears to be a reference-count field, use for the in-memory representation.
68    /// On disk, the values are nearly always 1.
69    pub c_ref: I32<LE>,
70}
71
72/// Contains a Name Table, as used in the GSI and PSI.
73pub struct NameTable {
74    /// Each value in this vector is an index into `hash_records`.
75    /// There is an extra index at the end, to make range calculations easier.
76    /// This is a "starts" vector that points into `hash_records`.
77    ///
78    /// `hash_buckets` contains a list of offsets into an array of `HROffsetCalc`
79    /// structures, each of which is 12 bytes long. So all of the offsets should be a multiple
80    /// of 12.  (Entries can also be -1, meaning there are no entries in that bucket.)
81    ///
82    /// But here's where it gets weird!  The value 12 was the size of a Win32 (32-bit) structure
83    /// that contained memory pointers.  It is not the size of the structure that gets written
84    /// to disk.  _That_ structure is 8 bytes!  So when you read these values, you must first
85    /// divide them by 12, then multiply by 8, to get the actual byte offset into the on-disk
86    /// data structure.
87    hash_buckets: Vec<u32>,
88
89    /// Each record in this table corresponds to one symbol record (e.g. `S_PUB32`). It contains
90    /// the offset in bytes into the global symbol stream, plus 1.  The value 0 is reserved.
91    hash_records: Vec<HashRecord>,
92}
93
94impl NameTable {
95    /// Parses a `NameTable`. For the GSI, the entire GSI stream contains only a `NameTable`.
96    /// For the `PSI, the PSI stream begins with a `PsiStreamHeader`, followed by a `NameTable`.
97    pub fn parse(
98        num_buckets: usize,
99        stream_offset: usize,
100        sym_hash_bytes: &[u8],
101    ) -> anyhow::Result<Self> {
102        // Read the hash table from the stream data. The hash table may be in one of two forms:
103        // "large" or "small".  The "large" format was the original format. The "small" format was
104        // added later.
105        //
106        // The hash records are the same for the small and large hash table formats. They are
107        // different in how the hash buckets are stored.
108
109        let _span = debug_span!("NameTable::parse").entered();
110
111        let original_len = stream_offset + sym_hash_bytes.len();
112
113        // See gsi.cpp, GSI1::readHash()
114        let hash_records: Vec<HashRecord>;
115        let hash_buckets: Vec<u32>;
116        {
117            let mut p = Parser::new(sym_hash_bytes);
118            let stream_offset_table_header = original_len - p.len();
119            let hash_header: &NameTableHeader = p.get()?;
120
121            if hash_header.signature.get() == GSI_HASH_HEADER_SIGNATURE
122                && hash_header.version.get() == GSI_HASH_HEADER_VERSION
123            {
124                debug!(
125                    hash_records_size = hash_header.hash_records_size.get(),
126                    buckets_size = hash_header.buckets_size.get(),
127                    "Hash table format: small buckets"
128                );
129
130                let hash_records_size = hash_header.hash_records_size.get() as usize;
131                let buckets_size = hash_header.buckets_size.get() as usize;
132
133                let stream_offset_hash_records = original_len - p.len();
134                let hash_records_bytes = p.bytes(hash_records_size)?;
135                let stream_offset_hash_buckets = original_len - p.len();
136                let buckets_bytes = p.bytes(buckets_size)?;
137                let stream_offset_end = original_len - p.len();
138
139                // We expect that the size of the hash table is equal to the size of the header +
140                // cb_hash_records + cb_buckets.
141                if !p.is_empty() {
142                    warn!("Found extra bytes in hash table, len = {}", p.len());
143                }
144
145                if hash_records_size % size_of::<HashRecord>() != 0 {
146                    warn!("GSI/PSI name table contains hash table with a length that is not a multiple of the hash record size.");
147                }
148                let num_hash_records = hash_records_size / size_of::<HashRecord>();
149                let hash_records_slice: &[HashRecord] =
150                    Parser::new(hash_records_bytes).slice(num_hash_records)?;
151
152                debug!(num_hash_records, "Number of records in name table");
153
154                hash_records = hash_records_slice.to_vec();
155
156                debug!(num_buckets, "Number of hash buckets in name table");
157
158                debug!("[........] Stream offsets:");
159                debug!("[{:08x}] : NameTableHeader", stream_offset_table_header);
160                debug!("[{:08x}] : hash records", stream_offset_hash_records);
161                debug!("[{:08x}] : hash buckets", stream_offset_hash_buckets);
162                debug!("[{:08x}] : (end)", stream_offset_end);
163
164                hash_buckets = expand_buckets(
165                    buckets_bytes,
166                    num_buckets,
167                    num_hash_records,
168                    stream_offset_hash_buckets,
169                )?;
170            } else {
171                // We did not find a GsiHashHeader, so this is an old-style hash.
172                error!("Hash table format: old-style normal buckets");
173                bail!("Old-style hash table is not supported");
174            }
175        }
176
177        Ok(Self {
178            hash_buckets,
179            hash_records,
180        })
181    }
182
183    /// Constructs an empty instance
184    pub fn empty() -> Self {
185        Self {
186            hash_buckets: vec![0],
187            hash_records: vec![],
188        }
189    }
190
191    /// Check all of the hash records in the name table and verify that the hash of the name for
192    /// this record matches the hash bucket that the hash record is in.
193    pub fn check_hashes(&self, global_symbols: &GlobalSymbolStream) -> anyhow::Result<()> {
194        // Verify that hash buckets point to symbol records, and that the hashes of the symbol
195        // names in those symbol records matches the hash code for this bucket.
196
197        let mut num_hashes_incorrect: u32 = 0;
198
199        for (bucket_index, hash_index_window) in self.hash_buckets.windows(2).enumerate() {
200            trace!(
201                "Checking hash bucket #{bucket_index}, hash records at {} .. {}",
202                hash_index_window[0],
203                hash_index_window[1]
204            );
205            let Some(bucket_hash_records) = self
206                .hash_records
207                .get(hash_index_window[0] as usize..hash_index_window[1] as usize)
208            else {
209                error!("hash record range is invalid");
210                continue;
211            };
212
213            for hash_record in bucket_hash_records.iter() {
214                let hash_record_offset = hash_record.offset.get();
215
216                // The hash record offset should always be positive.
217                if hash_record_offset <= 0 {
218                    continue;
219                }
220
221                let record_offset_in_symbol_stream = hash_record_offset - 1;
222                let pub_sym =
223                    match global_symbols.get_pub32_at(record_offset_in_symbol_stream as u32) {
224                        Err(e) => {
225                            error!("{e}");
226                            continue;
227                        }
228                        Ok(s) => s,
229                    };
230
231                let name_hash =
232                    crate::hash::hash_mod_u32(pub_sym.name, self.hash_buckets.len() as u32 - 1);
233
234                if name_hash != bucket_index as u32 {
235                    if num_hashes_incorrect < 50 {
236                        error!(
237                            "bucket #{} has symbol {} with wrong hash code {}",
238                            bucket_index, pub_sym.name, name_hash
239                        );
240                    }
241                    num_hashes_incorrect += 1;
242                }
243            }
244        }
245
246        if num_hashes_incorrect != 0 {
247            error!("Found {num_hashes_incorrect} hash records with the wrong hash value");
248        } else {
249            info!("All name hashes are correct.");
250        }
251
252        Ok(())
253    }
254
255    /// Gets the hash records for a specific bucket index. The caller is responsible for using
256    /// a valid bucket index.
257    pub fn hash_records_for_bucket(&self, bucket: usize) -> &[HashRecord] {
258        let start = self.hash_buckets[bucket] as usize;
259        let end = self.hash_buckets[bucket + 1] as usize;
260        &self.hash_records[start..end]
261    }
262
263    /// Gets the hash records that might contain `name`.
264    pub fn hash_records_for_name<'a>(&'a self, name: &BStr) -> &'a [HashRecord] {
265        // If hash_buckets is empty (i.e. has a length of 1, because it is a "starts" table),
266        // then the table is empty and the hash modulus is zero. We need to avoid dividing by zero.
267        if self.hash_buckets.len() <= 1 {
268            return &[];
269        }
270
271        let name_hash = crate::hash::hash_mod_u32(name, self.hash_buckets.len() as u32 - 1);
272        self.hash_records_for_bucket(name_hash as usize)
273    }
274
275    /// Searches for `name` in the Name Table. The caller must provide access to the GSS.
276    pub fn find_symbol<'a>(
277        &self,
278        gss: &'a GlobalSymbolStream,
279        name: &BStr,
280    ) -> anyhow::Result<Option<Sym<'a>>> {
281        let bucket_entries = self.hash_records_for_name(name);
282        for entry in bucket_entries.iter() {
283            let entry_offset = entry.offset.get();
284            if entry_offset <= 0 {
285                warn!("found invalid hash record; entry.offset <= 0");
286                continue;
287            }
288
289            let sym = gss.get_sym_at(entry_offset as u32 - 1)?;
290
291            if let Some(sym_name) = super::get_global_symbol_name(sym.kind, sym.data)? {
292                if sym_name.eq_ignore_ascii_case(name) {
293                    return Ok(Some(sym));
294                }
295            }
296        }
297
298        Ok(None)
299    }
300
301    /// Iterates the names stored within a `NameTable`.
302    pub fn iter<'i, 'a: 'i>(&'a self, gss: &'a GlobalSymbolStream) -> NameTableIter<'i> {
303        NameTableIter {
304            hash_record_iter: self.hash_records.iter(),
305            gss,
306        }
307    }
308}
309
310/// Iterator state for iterating the names within a `NameTable`.
311pub struct NameTableIter<'a> {
312    hash_record_iter: std::slice::Iter<'a, HashRecord>,
313    gss: &'a GlobalSymbolStream,
314}
315
316impl<'a> Iterator for NameTableIter<'a> {
317    type Item = Sym<'a>;
318
319    fn next(&mut self) -> Option<Self::Item> {
320        loop {
321            let hash_record = self.hash_record_iter.next()?;
322
323            let sym_offset = hash_record.offset.get();
324            if sym_offset <= 0 {
325                // Whatever, just ignore it.
326                continue;
327            }
328
329            if let Ok(sym) = self.gss.get_sym_at((sym_offset - 1) as u32) {
330                return Some(sym);
331            } else {
332                error!("failed to decode symbol in GSS at offset {sym_offset}");
333                return None;
334            }
335        }
336    }
337}
338
339/// Gets the default number of buckets to use.
340///
341/// The values are hard-coded. 0x1000 is used for normal PDBs and 0x3ffff is used for "mini PDBs".
342/// Mini PDBs are produced using the `/DEBUG:FASTLINK` linker option.
343pub fn get_v1_default_bucket(minimal_dbg_info: bool) -> usize {
344    if minimal_dbg_info {
345        0x3ffff
346    } else {
347        0x1000
348    }
349}
350
351/// Expands a compressed bucket. Returns a vector of offsets.
352///
353/// The input contains a bitmap, followed by an array of offsets. The bitmap determines how many
354/// items there are in the array of offsets. The length of the bitmap is specified by num_buckets.
355///
356/// This function returns a vector that contains hash indices. The hash records for a given hash
357/// bucket can be found as:
358///
359/// ```text
360/// let buckets = expand_buckets(...)?;
361/// let bucket_index = 10;
362/// let hash_records: &[HashRecord] = &hash_records[buckets[bucket_index]..buckets[bucket_index + 1]];
363/// ```
364fn expand_buckets(
365    buckets_bytes: &[u8],
366    num_buckets: usize,
367    num_hash_records: usize,
368    stream_offset: usize, // offset of this data structure in stream; for diagnostics only
369) -> anyhow::Result<Vec<u32>> {
370    let _span = debug_span!("expand_buckets").entered();
371
372    trace!(num_buckets, num_hash_records, "expanding buckets");
373
374    let original_len = stream_offset + buckets_bytes.len();
375    let mut p = Parser::new(buckets_bytes);
376
377    let output_len = num_buckets + 1;
378
379    let bitmap_len_in_bytes = nonempty_bitmap_size_bytes(num_buckets);
380    let bitmap_bytes = p.bytes(bitmap_len_in_bytes)?;
381    let bv: &BitSlice<u8, Lsb0> = BitSlice::from_slice(bitmap_bytes);
382    trace!(bitmap_bytes, bitmap_len_in_bytes);
383
384    // Count the number of 1 bits set in the non-empty bucket bitmap.
385    // Use min(num_buckets) so that we ignore any extra bits in the bitmap.
386    let num_nonempty_buckets = bv.count_ones().min(num_buckets);
387    trace!(num_nonempty_buckets);
388
389    let nonempty_pointers_stream_offset = original_len - p.len();
390    let nonempty_pointers: &[I32<LE>] = p.slice(num_nonempty_buckets)?;
391
392    trace!(
393        nonempty_pointers_stream_offset,
394        non_empty_pointers = nonempty_pointers.as_bytes(),
395        "non-empty pointers"
396    );
397
398    let mut nonempty_pointers_iter = nonempty_pointers.iter();
399
400    let mut hash_buckets = Vec::with_capacity(output_len);
401    for bucket_index in bv.iter_ones() {
402        // Be careful to avoid processing 1 bits in the bitmap. It is possible for the bitmap
403        // to contain more bits than there are buckets, due to bit alignment.
404        if bucket_index >= num_buckets {
405            break;
406        }
407
408        // The unwrap() cannot fail because we computed the slice length (num_nonempty_buckets)
409        // from the number of 1 bits in the non-empty mask (bv).
410        let offset_x12 = nonempty_pointers_iter.next().unwrap().get();
411        if offset_x12 < 0 {
412            bail!("found a negative offset in hash buckets");
413        }
414        if offset_x12 % HASH_RECORD_CALC_LEN != 0 {
415            bail!("hash record offset {offset_x12} is not a multiple of 12 (as required)");
416        }
417        let offset = (offset_x12 / HASH_RECORD_CALC_LEN) as u32;
418
419        // It would be strange for offset to be equal to num_hash_records because that would
420        // imply an empty "non-empty" hash bucket.
421        if offset as usize >= num_hash_records {
422            bail!("hash record offset {offset_x12} is beyond range of hash records table");
423        }
424
425        // Record offsets must be non-decreasing. They should actually be strictly increasing,
426        // but we tolerate repeated values.
427        if let Some(&prev_offset) = hash_buckets.last() {
428            if offset < prev_offset {
429                bail!("hash record offset {offset} is less than previous offset {prev_offset}");
430            }
431        } else if offset != 0 {
432            bail!("First hash record offset should be zero, but instead it is: 0x{offset:x}");
433        }
434
435        // Add offsets for previous buckets, which were all empty.
436        if hash_buckets.len() < bucket_index {
437            trace!("    bucket: 0x{:08x} .. 0x{bucket_index:08x} : range is empty, pushing offset: 0x{offset:8x} {offset:10}", hash_buckets.len());
438            hash_buckets.resize(bucket_index, offset);
439        }
440        trace!(
441            "    bucket: 0x{b:08x} --> offset: 0x{offset:08x}",
442            b = hash_buckets.len()
443        );
444        hash_buckets.push(offset);
445        assert!(hash_buckets.len() <= num_buckets);
446    }
447
448    // Fill in the offsets for the remaining empty buckets (if any), and push an extra offset for
449    // the end of the hash records array.
450    assert!(hash_buckets.len() <= num_buckets);
451    hash_buckets.resize(num_buckets + 1, num_hash_records as u32);
452
453    trace!(
454        "hash bucket offsets: {:?}",
455        &hash_buckets[..hash_buckets.len().min(100)]
456    );
457
458    if !nonempty_pointers_iter.as_slice().is_empty() {
459        warn!(
460            num_extra_bytes = p.len(),
461            rest = p.peek_rest(),
462            "Compressed hash buckets table contains extra byte(s)"
463        );
464    }
465
466    if tracing::event_enabled!(tracing::Level::TRACE) {
467        trace!("Non-empty buckets: (within expand_buckets)");
468        for i in 0..hash_buckets.len() - 1 {
469            let start = hash_buckets[i];
470            let end = hash_buckets[i + 1];
471            if start != end {
472                trace!(i, start, end);
473            }
474        }
475    }
476
477    Ok(hash_buckets)
478}
479
480/// Used when rebuilding a Name Table
481///
482/// The order of the fields is significant because it is used for sorting.
483#[derive(Eq, PartialEq, PartialOrd, Ord)]
484pub struct HashEntry {
485    /// computed hash code
486    pub hash: u32,
487    /// Symbol offset within the GSS. This value has a bias of +1. It will never be 0.
488    pub symbol_offset: i32,
489}
490
491/// Scan hash_records and figure out how many hash buckets are _not_ empty. Because hash_records
492/// is sorted by hash, we can do a single scan through it and find all of the "edges" (places where
493/// the `hash` value changes).
494pub fn count_nonempty_buckets(sorted_hash_records: &[HashEntry]) -> usize {
495    iter_nonempty_buckets(sorted_hash_records).count()
496}
497
498/// Compute the size in bytes of the bitmap of non-empty buckets.
499pub fn nonempty_bitmap_size_bytes(num_buckets: usize) -> usize {
500    let compressed_bitvec_size_u32s = (num_buckets + 1).div_ceil(32);
501    compressed_bitvec_size_u32s * 4
502}
503
504/// Compute the size in bytes of the name hash table. This includes the header.
505pub fn compute_hash_table_size_bytes(
506    num_hash_records: usize,
507    num_buckets: usize,
508    num_nonempty_buckets: usize,
509) -> usize {
510    size_of::<NameTableHeader>()
511        + num_hash_records * size_of::<HashRecord>()
512        + nonempty_bitmap_size_bytes(num_buckets)
513        + num_nonempty_buckets * size_of::<i32>()
514}
515
516/// Output of `build_name_table_prepare`
517pub struct NameTableInfo {
518    /// The number of buckets to use. This is an input parameter for the table building code, but
519    /// it is preserved here to simplify control flow.
520    pub num_buckets: usize,
521    /// Number of non-empty buckets.  Always less than or equal to `num_buckets.
522    pub num_nonempty_buckets: usize,
523    /// Size of the entire table, in bytes.
524    pub table_size_bytes: usize,
525}
526
527impl NameTableBuilder {
528    /// Reads a set of sorted hash records and computes the number of non-empty hash buckets and the
529    /// total size of the Name Table in bytes.
530    pub fn prepare(&mut self) -> NameTableInfo {
531        self.sort();
532
533        let num_nonempty_buckets = count_nonempty_buckets(&self.hash_records);
534        let table_size_bytes = compute_hash_table_size_bytes(
535            self.hash_records.len(),
536            self.num_buckets,
537            num_nonempty_buckets,
538        );
539        NameTableInfo {
540            num_buckets: self.num_buckets,
541            num_nonempty_buckets,
542            table_size_bytes,
543        }
544    }
545
546    /// The number of names inserted into this builder.
547    pub fn num_names(&self) -> usize {
548        self.hash_records.len()
549    }
550
551    /// Writes the hash records, the hash header, and the buckets to output. The output size must be
552    /// equal to the expected size.
553    pub fn encode(&self, prepared_info: &NameTableInfo, output: &mut [u8]) {
554        debug!(
555            "Number of symbols found (in name table): {n} 0x{n:x}",
556            n = self.hash_records.len()
557        );
558        debug!(
559            "Size in bytes of hash records (in name table): {s} 0x{s:x}",
560            s = self.hash_records.len() * 8,
561        );
562
563        let compressed_bitvec_size_bytes = nonempty_bitmap_size_bytes(self.num_buckets);
564        let num_nonempty_buckets = prepared_info.num_nonempty_buckets;
565        let hash_buckets_size_bytes =
566            compressed_bitvec_size_bytes + num_nonempty_buckets * size_of::<i32>();
567        debug!(
568            "hash_buckets_size_bytes = 0x{0:x} {0}",
569            hash_buckets_size_bytes
570        );
571
572        // Build the name-to-symbol hash map.
573        // The structure of the name-to-symbol hash map is:
574        //
575        //      GsiHashHeader (fixed size)
576        //      [HashRecord; num_hash_records]
577        //      u32-aligned bitmap of num_buckets + 1 length (in bits), indicating whether a given bucket is non-empty.
578        //      [i32; num_non_empty_buckets]
579        {
580            let mut hash_output_cursor = ParserMut::new(output);
581
582            // Write the hash header.
583            *hash_output_cursor.get_mut().unwrap() = NameTableHeader {
584                signature: U32::new(GSI_HASH_HEADER_SIGNATURE),
585                version: U32::new(GSI_HASH_HEADER_VERSION),
586                buckets_size: U32::new(hash_buckets_size_bytes as u32),
587                hash_records_size: U32::new(
588                    (self.hash_records.len() * size_of::<HashRecord>()) as u32,
589                ),
590            };
591
592            // Write the hash records.
593            {
594                let output_slice: &mut [HashRecord] = hash_output_cursor
595                    .slice_mut(self.hash_records.len())
596                    .unwrap();
597                for (from, to) in self.hash_records.iter().zip(output_slice.iter_mut()) {
598                    *to = HashRecord {
599                        offset: I32::new(from.symbol_offset),
600                        c_ref: I32::new(1),
601                    };
602                }
603            }
604
605            // Set all bits in the presence bitmap.
606            {
607                let sym_hash_bitvec_bytes = hash_output_cursor
608                    .bytes_mut(compressed_bitvec_size_bytes)
609                    .unwrap();
610                let bv: &mut BitSlice<u8, Lsb0> = BitSlice::from_slice_mut(sym_hash_bitvec_bytes);
611
612                for (_record_index, bucket_hashes) in iter_nonempty_buckets(&self.hash_records) {
613                    let hash = bucket_hashes[0].hash;
614                    bv.set(hash as usize, true);
615                }
616
617                assert_eq!(bv.count_ones(), num_nonempty_buckets);
618            }
619
620            // Write the hash record offsets of each of the non-empty hash buckets.
621            // These values are non-decreasing. The end of each hash bucket is the beginning of the
622            // next hash bucket.
623            {
624                let mut num_nonempty_written = 0;
625                let output_offsets_slice: &mut [U32<LE>] =
626                    hash_output_cursor.slice_mut(num_nonempty_buckets).unwrap();
627                let mut output_iter = output_offsets_slice.iter_mut();
628
629                for (record_index, _hashes) in iter_nonempty_buckets(&self.hash_records) {
630                    *output_iter.next().unwrap() = U32::new((record_index as u32) * 12);
631                    num_nonempty_written += 1;
632                }
633
634                assert_eq!(num_nonempty_written, num_nonempty_buckets);
635                assert!(output_iter.as_slice().is_empty());
636            }
637
638            assert!(hash_output_cursor.is_empty());
639        }
640    }
641}
642
643/// Sorts hash records
644#[inline(never)]
645pub fn sort_hash_records(hash_records: &mut [HashEntry]) {
646    // This fully determines the order, since we are comparing all fields.
647    hash_records.sort_unstable();
648}
649
650/// Iterates non-empty buckets. Each item is `(record_index, bucket)`, where `record_index` is
651/// the index within `hashes` (the input of this function) where `bucket` begins.  The iterated
652/// `bucket` value will always contain values (will not be empty).
653fn iter_nonempty_buckets(hashes: &[HashEntry]) -> IterNonEmptyBuckets<'_> {
654    IterNonEmptyBuckets {
655        record_index: 0,
656        hashes,
657    }
658}
659
660struct IterNonEmptyBuckets<'a> {
661    record_index: usize,
662    hashes: &'a [HashEntry],
663}
664
665impl<'a> Iterator for IterNonEmptyBuckets<'a> {
666    // (index into hash records, slice of hash records in bucket)
667    type Item = (usize, &'a [HashEntry]);
668
669    fn next(&mut self) -> Option<Self::Item> {
670        if self.hashes.is_empty() {
671            return None;
672        }
673
674        let record_index = self.record_index;
675
676        let hash = self.hashes[0].hash;
677        let mut end = 1;
678        while end < self.hashes.len() && self.hashes[end].hash == hash {
679            end += 1;
680        }
681
682        let (lo, hi) = self.hashes.split_at(end);
683        self.record_index += end;
684        self.hashes = hi;
685        Some((record_index, lo))
686    }
687}
688
689/// This type constructs a new name table, for the GSI/PSI.
690///
691/// Example:
692///
693/// ```
694/// # use ms_pdb::globals::name_table::NameTableBuilder;
695/// # use bstr::BStr;
696/// let mut builder = NameTableBuilder::new(0x1000);
697/// builder.push("hello".into(), 1);
698/// builder.push("world".into(), 2);
699/// let prepared_info = builder.prepare();
700/// let mut encoded_bytes: Vec<u8> = vec![0; prepared_info.table_size_bytes];
701/// builder.encode(&prepared_info, &mut encoded_bytes);
702/// ```
703pub struct NameTableBuilder {
704    num_buckets: usize,
705    hash_records: Vec<HashEntry>,
706}
707
708impl NameTableBuilder {
709    /// Starts building a new name table.  Specify the number of hash buckets to use.
710    pub fn new(num_buckets: usize) -> Self {
711        assert!(num_buckets > 0);
712        Self {
713            num_buckets,
714            hash_records: Vec::new(),
715        }
716    }
717
718    /// The number of hash buckets.
719    pub fn num_buckets(&self) -> usize {
720        self.num_buckets
721    }
722
723    /// Adds a new string entry to the builder. Specify the hash of the string and its symbol
724    /// offset. The hash _must_ already have been computed and the remainder taken using
725    /// `num_buckets()`.
726    pub fn push_hash(&mut self, hash: u32, symbol_offset: i32) {
727        assert!((hash as usize) < self.num_buckets);
728        self.hash_records.push(HashEntry {
729            hash,
730            symbol_offset,
731        });
732    }
733
734    /// Hashes a string and adds it to the table. The contents of the string are not retained.
735    pub fn push(&mut self, name: &BStr, symbol_offset: i32) {
736        let name_hash = crate::hash::hash_mod_u32(name, self.num_buckets as u32);
737        self.push_hash(name_hash, symbol_offset);
738    }
739
740    fn sort(&mut self) {
741        sort_hash_records(&mut self.hash_records);
742    }
743}