ms_pdb/globals/name_table.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743
//! Name-to-Symbol Lookup Table
#[cfg(test)]
mod tests;
use super::gss::GlobalSymbolStream;
use crate::parser::{Parser, ParserMut};
use crate::syms::Sym;
use anyhow::bail;
use bitvec::prelude::{BitSlice, Lsb0};
use bstr::BStr;
use std::mem::size_of;
use tracing::{debug, debug_span, error, info, trace, warn};
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, I32, LE, U32};
/// This is the size used for calculating hash indices. It was the size of the in-memory form
/// of the hash records, on 32-bit machines. It is not the same as the length of the hash records
/// that are stored on disk (which is 8).
pub const HASH_RECORD_CALC_LEN: i32 = 12;
// section contribution structure
/// section contribution version, before V70 there was no section version
pub const GSI_HASH_SC_IMPV_V70: u32 = 0xeffe0000 + 19990810;
/// Signature value for `NameTableHeader::ver_signature`.
pub const GSI_HASH_HEADER_SIGNATURE: u32 = 0xffff_ffff;
/// The current value to use (implementation version) for the GSI Hash.
pub const GSI_HASH_HEADER_VERSION: u32 = GSI_HASH_SC_IMPV_V70;
/// This header is present at the start of the Name Table.
///
/// Immediately after this header follows the hash records, whose length is given by
/// `cb_hash_records`. This section contains an array of [`HashRecord`] structs.
///
/// Immediately after the hash records is the hash buckets, whose length is given by `cb_buckets`.
///
/// Called `GSIHashHdr` in C++.
#[derive(IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable, Debug)]
#[repr(C)]
pub struct NameTableHeader {
/// Constant which identifies this as a `NameTableHeader`. This value (0xffff_ffff) was chosen
/// because the previous version of the `NameTable` did not use this header, and this
/// signature value would never have occurred in the same position in the previous version.
pub signature: U32<LE>,
/// Version of the name hash table.
pub version: U32<LE>,
/// Size in bytes of the hash records. This should always be a multiple of
/// `size_of::<HashRecord>()`, not `HASH_RECORD_CALC_LEN`.
pub hash_records_size: U32<LE>,
/// Size in bytes of the hash buckets.
pub buckets_size: U32<LE>,
}
/// An entry in the GSI hash table. This is in the `cb_hash_records` region.
#[derive(IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable, Debug, Clone)]
#[repr(C)]
pub struct HashRecord {
/// The byte offset of a symbol record within the Global Symbol Stream, plus 1.
/// It should point to the beginning of a global symbol record, e.g. `S_PUB32`.
///
/// The value should never be negative or zero.
pub offset: I32<LE>,
/// This appears to be a reference-count field, use for the in-memory representation.
/// On disk, the values are nearly always 1.
pub c_ref: I32<LE>,
}
/// Contains a Name Table, as used in the GSI and PSI.
pub struct NameTable {
/// Each value in this vector is an index into `hash_records`.
/// There is an extra index at the end, to make range calculations easier.
/// This is a "starts" vector that points into `hash_records`.
///
/// `hash_buckets` contains a list of offsets into an array of `HROffsetCalc`
/// structures, each of which is 12 bytes long. So all of the offsets should be a multiple
/// of 12. (Entries can also be -1, meaning there are no entries in that bucket.)
///
/// But here's where it gets weird! The value 12 was the size of a Win32 (32-bit) structure
/// that contained memory pointers. It is not the size of the structure that gets written
/// to disk. _That_ structure is 8 bytes! So when you read these values, you must first
/// divide them by 12, then multiply by 8, to get the actual byte offset into the on-disk
/// data structure.
hash_buckets: Vec<u32>,
/// Each record in this table corresponds to one symbol record (e.g. `S_PUB32`). It contains
/// the offset in bytes into the global symbol stream, plus 1. The value 0 is reserved.
hash_records: Vec<HashRecord>,
}
impl NameTable {
/// Parses a `NameTable`. For the GSI, the entire GSI stream contains only a `NameTable`.
/// For the `PSI, the PSI stream begins with a `PsiStreamHeader`, followed by a `NameTable`.
pub fn parse(
num_buckets: usize,
stream_offset: usize,
sym_hash_bytes: &[u8],
) -> anyhow::Result<Self> {
// Read the hash table from the stream data. The hash table may be in one of two forms:
// "large" or "small". The "large" format was the original format. The "small" format was
// added later.
//
// The hash records are the same for the small and large hash table formats. They are
// different in how the hash buckets are stored.
let _span = debug_span!("NameTable::parse").entered();
let original_len = stream_offset + sym_hash_bytes.len();
// See gsi.cpp, GSI1::readHash()
let hash_records: Vec<HashRecord>;
let hash_buckets: Vec<u32>;
{
let mut p = Parser::new(sym_hash_bytes);
let stream_offset_table_header = original_len - p.len();
let hash_header: &NameTableHeader = p.get()?;
if hash_header.signature.get() == GSI_HASH_HEADER_SIGNATURE
&& hash_header.version.get() == GSI_HASH_HEADER_VERSION
{
debug!(
hash_records_size = hash_header.hash_records_size.get(),
buckets_size = hash_header.buckets_size.get(),
"Hash table format: small buckets"
);
let hash_records_size = hash_header.hash_records_size.get() as usize;
let buckets_size = hash_header.buckets_size.get() as usize;
let stream_offset_hash_records = original_len - p.len();
let hash_records_bytes = p.bytes(hash_records_size)?;
let stream_offset_hash_buckets = original_len - p.len();
let buckets_bytes = p.bytes(buckets_size)?;
let stream_offset_end = original_len - p.len();
// We expect that the size of the hash table is equal to the size of the header +
// cb_hash_records + cb_buckets.
if !p.is_empty() {
warn!("Found extra bytes in hash table, len = {}", p.len());
}
if hash_records_size % size_of::<HashRecord>() != 0 {
warn!("GSI/PSI name table contains hash table with a length that is not a multiple of the hash record size.");
}
let num_hash_records = hash_records_size / size_of::<HashRecord>();
let hash_records_slice: &[HashRecord] =
Parser::new(hash_records_bytes).slice(num_hash_records)?;
debug!(num_hash_records, "Number of records in name table");
hash_records = hash_records_slice.to_vec();
debug!(num_buckets, "Number of hash buckets in name table");
debug!("[........] Stream offsets:");
debug!("[{:08x}] : NameTableHeader", stream_offset_table_header);
debug!("[{:08x}] : hash records", stream_offset_hash_records);
debug!("[{:08x}] : hash buckets", stream_offset_hash_buckets);
debug!("[{:08x}] : (end)", stream_offset_end);
hash_buckets = expand_buckets(
buckets_bytes,
num_buckets,
num_hash_records,
stream_offset_hash_buckets,
)?;
} else {
// We did not find a GsiHashHeader, so this is an old-style hash.
error!("Hash table format: old-style normal buckets");
bail!("Old-style hash table is not supported");
}
}
Ok(Self {
hash_buckets,
hash_records,
})
}
/// Constructs an empty instance
pub fn empty() -> Self {
Self {
hash_buckets: vec![0],
hash_records: vec![],
}
}
/// Check all of the hash records in the name table and verify that the hash of the name for
/// this record matches the hash bucket that the hash record is in.
pub fn check_hashes(&self, global_symbols: &GlobalSymbolStream) -> anyhow::Result<()> {
// Verify that hash buckets point to symbol records, and that the hashes of the symbol
// names in those symbol records matches the hash code for this bucket.
let mut num_hashes_incorrect: u32 = 0;
for (bucket_index, hash_index_window) in self.hash_buckets.windows(2).enumerate() {
trace!(
"Checking hash bucket #{bucket_index}, hash records at {} .. {}",
hash_index_window[0],
hash_index_window[1]
);
let Some(bucket_hash_records) = self
.hash_records
.get(hash_index_window[0] as usize..hash_index_window[1] as usize)
else {
error!("hash record range is invalid");
continue;
};
for hash_record in bucket_hash_records.iter() {
let hash_record_offset = hash_record.offset.get();
// The hash record offset should always be positive.
if hash_record_offset <= 0 {
continue;
}
let record_offset_in_symbol_stream = hash_record_offset - 1;
let pub_sym =
match global_symbols.get_pub32_at(record_offset_in_symbol_stream as u32) {
Err(e) => {
error!("{e}");
continue;
}
Ok(s) => s,
};
let name_hash =
crate::hash::hash_mod_u32(pub_sym.name, self.hash_buckets.len() as u32 - 1);
if name_hash != bucket_index as u32 {
if num_hashes_incorrect < 50 {
error!(
"bucket #{} has symbol {} with wrong hash code {}",
bucket_index, pub_sym.name, name_hash
);
}
num_hashes_incorrect += 1;
}
}
}
if num_hashes_incorrect != 0 {
error!("Found {num_hashes_incorrect} hash records with the wrong hash value");
} else {
info!("All name hashes are correct.");
}
Ok(())
}
/// Gets the hash records for a specific bucket index. The caller is responsible for using
/// a valid bucket index.
pub fn hash_records_for_bucket(&self, bucket: usize) -> &[HashRecord] {
let start = self.hash_buckets[bucket] as usize;
let end = self.hash_buckets[bucket + 1] as usize;
&self.hash_records[start..end]
}
/// Gets the hash records that might contain `name`.
pub fn hash_records_for_name<'a>(&'a self, name: &BStr) -> &'a [HashRecord] {
// If hash_buckets is empty (i.e. has a length of 1, because it is a "starts" table),
// then the table is empty and the hash modulus is zero. We need to avoid dividing by zero.
if self.hash_buckets.len() <= 1 {
return &[];
}
let name_hash = crate::hash::hash_mod_u32(name, self.hash_buckets.len() as u32 - 1);
self.hash_records_for_bucket(name_hash as usize)
}
/// Searches for `name` in the Name Table. The caller must provide access to the GSS.
pub fn find_symbol<'a>(
&self,
gss: &'a GlobalSymbolStream,
name: &BStr,
) -> anyhow::Result<Option<Sym<'a>>> {
let bucket_entries = self.hash_records_for_name(name);
for entry in bucket_entries.iter() {
let entry_offset = entry.offset.get();
if entry_offset <= 0 {
warn!("found invalid hash record; entry.offset <= 0");
continue;
}
let sym = gss.get_sym_at(entry_offset as u32 - 1)?;
if let Some(sym_name) = super::get_global_symbol_name(sym.kind, sym.data)? {
if sym_name.eq_ignore_ascii_case(name) {
return Ok(Some(sym));
}
}
}
Ok(None)
}
/// Iterates the names stored within a `NameTable`.
pub fn iter<'i, 'a: 'i>(&'a self, gss: &'a GlobalSymbolStream) -> NameTableIter<'i> {
NameTableIter {
hash_record_iter: self.hash_records.iter(),
gss,
}
}
}
/// Iterator state for iterating the names within a `NameTable`.
pub struct NameTableIter<'a> {
hash_record_iter: std::slice::Iter<'a, HashRecord>,
gss: &'a GlobalSymbolStream,
}
impl<'a> Iterator for NameTableIter<'a> {
type Item = Sym<'a>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let hash_record = self.hash_record_iter.next()?;
let sym_offset = hash_record.offset.get();
if sym_offset <= 0 {
// Whatever, just ignore it.
continue;
}
if let Ok(sym) = self.gss.get_sym_at((sym_offset - 1) as u32) {
return Some(sym);
} else {
error!("failed to decode symbol in GSS at offset {sym_offset}");
return None;
}
}
}
}
/// Gets the default number of buckets to use.
///
/// The values are hard-coded. 0x1000 is used for normal PDBs and 0x3ffff is used for "mini PDBs".
/// Mini PDBs are produced using the `/DEBUG:FASTLINK` linker option.
pub fn get_v1_default_bucket(minimal_dbg_info: bool) -> usize {
if minimal_dbg_info {
0x3ffff
} else {
0x1000
}
}
/// Expands a compressed bucket. Returns a vector of offsets.
///
/// The input contains a bitmap, followed by an array of offsets. The bitmap determines how many
/// items there are in the array of offsets. The length of the bitmap is specified by num_buckets.
///
/// This function returns a vector that contains hash indices. The hash records for a given hash
/// bucket can be found as:
///
/// ```text
/// let buckets = expand_buckets(...)?;
/// let bucket_index = 10;
/// let hash_records: &[HashRecord] = &hash_records[buckets[bucket_index]..buckets[bucket_index + 1]];
/// ```
fn expand_buckets(
buckets_bytes: &[u8],
num_buckets: usize,
num_hash_records: usize,
stream_offset: usize, // offset of this data structure in stream; for diagnostics only
) -> anyhow::Result<Vec<u32>> {
let _span = debug_span!("expand_buckets").entered();
trace!(num_buckets, num_hash_records, "expanding buckets");
let original_len = stream_offset + buckets_bytes.len();
let mut p = Parser::new(buckets_bytes);
let output_len = num_buckets + 1;
let bitmap_len_in_bytes = nonempty_bitmap_size_bytes(num_buckets);
let bitmap_bytes = p.bytes(bitmap_len_in_bytes)?;
let bv: &BitSlice<u8, Lsb0> = BitSlice::from_slice(bitmap_bytes);
trace!(bitmap_bytes, bitmap_len_in_bytes);
// Count the number of 1 bits set in the non-empty bucket bitmap.
// Use min(num_buckets) so that we ignore any extra bits in the bitmap.
let num_nonempty_buckets = bv.count_ones().min(num_buckets);
trace!(num_nonempty_buckets);
let nonempty_pointers_stream_offset = original_len - p.len();
let nonempty_pointers: &[I32<LE>] = p.slice(num_nonempty_buckets)?;
trace!(
nonempty_pointers_stream_offset,
non_empty_pointers = nonempty_pointers.as_bytes(),
"non-empty pointers"
);
let mut nonempty_pointers_iter = nonempty_pointers.iter();
let mut hash_buckets = Vec::with_capacity(output_len);
for bucket_index in bv.iter_ones() {
// Be careful to avoid processing 1 bits in the bitmap. It is possible for the bitmap
// to contain more bits than there are buckets, due to bit alignment.
if bucket_index >= num_buckets {
break;
}
// The unwrap() cannot fail because we computed the slice length (num_nonempty_buckets)
// from the number of 1 bits in the non-empty mask (bv).
let offset_x12 = nonempty_pointers_iter.next().unwrap().get();
if offset_x12 < 0 {
bail!("found a negative offset in hash buckets");
}
if offset_x12 % HASH_RECORD_CALC_LEN != 0 {
bail!("hash record offset {offset_x12} is not a multiple of 12 (as required)");
}
let offset = (offset_x12 / HASH_RECORD_CALC_LEN) as u32;
// It would be strange for offset to be equal to num_hash_records because that would
// imply an empty "non-empty" hash bucket.
if offset as usize >= num_hash_records {
bail!("hash record offset {offset_x12} is beyond range of hash records table");
}
// Record offsets must be non-decreasing. They should actually be strictly increasing,
// but we tolerate repeated values.
if let Some(&prev_offset) = hash_buckets.last() {
if offset < prev_offset {
bail!("hash record offset {offset} is less than previous offset {prev_offset}");
}
} else if offset != 0 {
bail!("First hash record offset should be zero, but instead it is: 0x{offset:x}");
}
// Add offsets for previous buckets, which were all empty.
if hash_buckets.len() < bucket_index {
trace!(" bucket: 0x{:08x} .. 0x{bucket_index:08x} : range is empty, pushing offset: 0x{offset:8x} {offset:10}", hash_buckets.len());
hash_buckets.resize(bucket_index, offset);
}
trace!(
" bucket: 0x{b:08x} --> offset: 0x{offset:08x}",
b = hash_buckets.len()
);
hash_buckets.push(offset);
assert!(hash_buckets.len() <= num_buckets);
}
// Fill in the offsets for the remaining empty buckets (if any), and push an extra offset for
// the end of the hash records array.
assert!(hash_buckets.len() <= num_buckets);
hash_buckets.resize(num_buckets + 1, num_hash_records as u32);
trace!(
"hash bucket offsets: {:?}",
&hash_buckets[..hash_buckets.len().min(100)]
);
if !nonempty_pointers_iter.as_slice().is_empty() {
warn!(
num_extra_bytes = p.len(),
rest = p.peek_rest(),
"Compressed hash buckets table contains extra byte(s)"
);
}
if tracing::event_enabled!(tracing::Level::TRACE) {
trace!("Non-empty buckets: (within expand_buckets)");
for i in 0..hash_buckets.len() - 1 {
let start = hash_buckets[i];
let end = hash_buckets[i + 1];
if start != end {
trace!(i, start, end);
}
}
}
Ok(hash_buckets)
}
/// Used when rebuilding a Name Table
///
/// The order of the fields is significant because it is used for sorting.
#[derive(Eq, PartialEq, PartialOrd, Ord)]
pub struct HashEntry {
/// computed hash code
pub hash: u32,
/// Symbol offset within the GSS. This value has a bias of +1. It will never be 0.
pub symbol_offset: i32,
}
/// Scan hash_records and figure out how many hash buckets are _not_ empty. Because hash_records
/// is sorted by hash, we can do a single scan through it and find all of the "edges" (places where
/// the `hash` value changes).
pub fn count_nonempty_buckets(sorted_hash_records: &[HashEntry]) -> usize {
iter_nonempty_buckets(sorted_hash_records).count()
}
/// Compute the size in bytes of the bitmap of non-empty buckets.
pub fn nonempty_bitmap_size_bytes(num_buckets: usize) -> usize {
let compressed_bitvec_size_u32s = (num_buckets + 1 + 31) / 32;
compressed_bitvec_size_u32s * 4
}
/// Compute the size in bytes of the name hash table. This includes the header.
pub fn compute_hash_table_size_bytes(
num_hash_records: usize,
num_buckets: usize,
num_nonempty_buckets: usize,
) -> usize {
size_of::<NameTableHeader>()
+ num_hash_records * size_of::<HashRecord>()
+ nonempty_bitmap_size_bytes(num_buckets)
+ num_nonempty_buckets * size_of::<i32>()
}
/// Output of `build_name_table_prepare`
pub struct NameTableInfo {
/// The number of buckets to use. This is an input parameter for the table building code, but
/// it is preserved here to simplify control flow.
pub num_buckets: usize,
/// Number of non-empty buckets. Always less than or equal to `num_buckets.
pub num_nonempty_buckets: usize,
/// Size of the entire table, in bytes.
pub table_size_bytes: usize,
}
impl NameTableBuilder {
/// Reads a set of sorted hash records and computes the number of non-empty hash buckets and the
/// total size of the Name Table in bytes.
pub fn prepare(&mut self) -> NameTableInfo {
self.sort();
let num_nonempty_buckets = count_nonempty_buckets(&self.hash_records);
let table_size_bytes = compute_hash_table_size_bytes(
self.hash_records.len(),
self.num_buckets,
num_nonempty_buckets,
);
NameTableInfo {
num_buckets: self.num_buckets,
num_nonempty_buckets,
table_size_bytes,
}
}
/// The number of names inserted into this builder.
pub fn num_names(&self) -> usize {
self.hash_records.len()
}
/// Writes the hash records, the hash header, and the buckets to output. The output size must be
/// equal to the expected size.
pub fn encode(&self, prepared_info: &NameTableInfo, output: &mut [u8]) {
debug!(
"Number of symbols found (in name table): {n} 0x{n:x}",
n = self.hash_records.len()
);
debug!(
"Size in bytes of hash records (in name table): {s} 0x{s:x}",
s = self.hash_records.len() * 8,
);
let compressed_bitvec_size_bytes = nonempty_bitmap_size_bytes(self.num_buckets);
let num_nonempty_buckets = prepared_info.num_nonempty_buckets;
let hash_buckets_size_bytes =
compressed_bitvec_size_bytes + num_nonempty_buckets * size_of::<i32>();
debug!(
"hash_buckets_size_bytes = 0x{0:x} {0}",
hash_buckets_size_bytes
);
// Build the name-to-symbol hash map.
// The structure of the name-to-symbol hash map is:
//
// GsiHashHeader (fixed size)
// [HashRecord; num_hash_records]
// u32-aligned bitmap of num_buckets + 1 length (in bits), indicating whether a given bucket is non-empty.
// [i32; num_non_empty_buckets]
{
let mut hash_output_cursor = ParserMut::new(output);
// Write the hash header.
*hash_output_cursor.get_mut().unwrap() = NameTableHeader {
signature: U32::new(GSI_HASH_HEADER_SIGNATURE),
version: U32::new(GSI_HASH_HEADER_VERSION),
buckets_size: U32::new(hash_buckets_size_bytes as u32),
hash_records_size: U32::new(
(self.hash_records.len() * size_of::<HashRecord>()) as u32,
),
};
// Write the hash records.
{
let output_slice: &mut [HashRecord] = hash_output_cursor
.slice_mut(self.hash_records.len())
.unwrap();
for (from, to) in self.hash_records.iter().zip(output_slice.iter_mut()) {
*to = HashRecord {
offset: I32::new(from.symbol_offset),
c_ref: I32::new(1),
};
}
}
// Set all bits in the presence bitmap.
{
let sym_hash_bitvec_bytes = hash_output_cursor
.bytes_mut(compressed_bitvec_size_bytes)
.unwrap();
let bv: &mut BitSlice<u8, Lsb0> = BitSlice::from_slice_mut(sym_hash_bitvec_bytes);
for (_record_index, bucket_hashes) in iter_nonempty_buckets(&self.hash_records) {
let hash = bucket_hashes[0].hash;
bv.set(hash as usize, true);
}
assert_eq!(bv.count_ones(), num_nonempty_buckets);
}
// Write the hash record offsets of each of the non-empty hash buckets.
// These values are non-decreasing. The end of each hash bucket is the beginning of the
// next hash bucket.
{
let mut num_nonempty_written = 0;
let output_offsets_slice: &mut [U32<LE>] =
hash_output_cursor.slice_mut(num_nonempty_buckets).unwrap();
let mut output_iter = output_offsets_slice.iter_mut();
for (record_index, _hashes) in iter_nonempty_buckets(&self.hash_records) {
*output_iter.next().unwrap() = U32::new((record_index as u32) * 12);
num_nonempty_written += 1;
}
assert_eq!(num_nonempty_written, num_nonempty_buckets);
assert!(output_iter.as_slice().is_empty());
}
assert!(hash_output_cursor.is_empty());
}
}
}
/// Sorts hash records
#[inline(never)]
pub fn sort_hash_records(hash_records: &mut [HashEntry]) {
// This fully determines the order, since we are comparing all fields.
hash_records.sort_unstable();
}
/// Iterates non-empty buckets. Each item is `(record_index, bucket)`, where `record_index` is
/// the index within `hashes` (the input of this function) where `bucket` begins. The iterated
/// `bucket` value will always contain values (will not be empty).
fn iter_nonempty_buckets(hashes: &[HashEntry]) -> IterNonEmptyBuckets<'_> {
IterNonEmptyBuckets {
record_index: 0,
hashes,
}
}
struct IterNonEmptyBuckets<'a> {
record_index: usize,
hashes: &'a [HashEntry],
}
impl<'a> Iterator for IterNonEmptyBuckets<'a> {
// (index into hash records, slice of hash records in bucket)
type Item = (usize, &'a [HashEntry]);
fn next(&mut self) -> Option<Self::Item> {
if self.hashes.is_empty() {
return None;
}
let record_index = self.record_index;
let hash = self.hashes[0].hash;
let mut end = 1;
while end < self.hashes.len() && self.hashes[end].hash == hash {
end += 1;
}
let (lo, hi) = self.hashes.split_at(end);
self.record_index += end;
self.hashes = hi;
Some((record_index, lo))
}
}
/// This type constructs a new name table, for the GSI/PSI.
///
/// Example:
///
/// ```
/// # use ms_pdb::globals::name_table::NameTableBuilder;
/// # use bstr::BStr;
/// let mut builder = NameTableBuilder::new(0x1000);
/// builder.push("hello".into(), 1);
/// builder.push("world".into(), 2);
/// let prepared_info = builder.prepare();
/// let mut encoded_bytes: Vec<u8> = vec![0; prepared_info.table_size_bytes];
/// builder.encode(&prepared_info, &mut encoded_bytes);
/// ```
pub struct NameTableBuilder {
num_buckets: usize,
hash_records: Vec<HashEntry>,
}
impl NameTableBuilder {
/// Starts building a new name table. Specify the number of hash buckets to use.
pub fn new(num_buckets: usize) -> Self {
assert!(num_buckets > 0);
Self {
num_buckets,
hash_records: Vec::new(),
}
}
/// The number of hash buckets.
pub fn num_buckets(&self) -> usize {
self.num_buckets
}
/// Adds a new string entry to the builder. Specify the hash of the string and its symbol
/// offset. The hash _must_ already have been computed and the remainder taken using
/// `num_buckets()`.
pub fn push_hash(&mut self, hash: u32, symbol_offset: i32) {
assert!((hash as usize) < self.num_buckets);
self.hash_records.push(HashEntry {
hash,
symbol_offset,
});
}
/// Hashes a string and adds it to the table. The contents of the string are not retained.
pub fn push(&mut self, name: &BStr, symbol_offset: i32) {
let name_hash = crate::hash::hash_mod_u32(name, self.num_buckets as u32);
self.push_hash(name_hash, symbol_offset);
}
fn sort(&mut self) {
sort_hash_records(&mut self.hash_records);
}
}