1#[cfg(test)]
4mod tests;
5
6use super::gss::GlobalSymbolStream;
7use crate::syms::Sym;
8use anyhow::bail;
9use bitvec::prelude::{BitSlice, Lsb0};
10use bstr::BStr;
11use ms_codeview::parser::{Parser, ParserMut};
12use std::mem::size_of;
13use tracing::{debug, debug_span, error, info, trace, warn};
14use zerocopy::{FromBytes, I32, Immutable, IntoBytes, KnownLayout, LE, U32, Unaligned};
15
16pub const HASH_RECORD_CALC_LEN: i32 = 12;
20
21pub const GSI_HASH_SC_IMPV_V70: u32 = 0xeffe0000 + 19990810;
24
25pub const GSI_HASH_HEADER_SIGNATURE: u32 = 0xffff_ffff;
27pub const GSI_HASH_HEADER_VERSION: u32 = GSI_HASH_SC_IMPV_V70;
29
30#[derive(IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable, Debug)]
39#[repr(C)]
40pub struct NameTableHeader {
41 pub signature: U32<LE>,
45
46 pub version: U32<LE>,
48
49 pub hash_records_size: U32<LE>,
52
53 pub buckets_size: U32<LE>,
55}
56
57#[derive(IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable, Debug, Clone)]
59#[repr(C)]
60pub struct HashRecord {
61 pub offset: I32<LE>,
66
67 pub c_ref: I32<LE>,
70}
71
72pub struct NameTable {
74 hash_buckets: Vec<u32>,
88
89 hash_records: Vec<HashRecord>,
92}
93
94impl NameTable {
95 pub fn parse(
98 num_buckets: usize,
99 stream_offset: usize,
100 sym_hash_bytes: &[u8],
101 ) -> anyhow::Result<Self> {
102 let _span = debug_span!("NameTable::parse").entered();
110
111 let original_len = stream_offset + sym_hash_bytes.len();
112
113 let hash_records: Vec<HashRecord>;
115 let hash_buckets: Vec<u32>;
116 {
117 let mut p = Parser::new(sym_hash_bytes);
118 let stream_offset_table_header = original_len - p.len();
119 let hash_header: &NameTableHeader = p.get()?;
120
121 if hash_header.signature.get() == GSI_HASH_HEADER_SIGNATURE
122 && hash_header.version.get() == GSI_HASH_HEADER_VERSION
123 {
124 debug!(
125 hash_records_size = hash_header.hash_records_size.get(),
126 buckets_size = hash_header.buckets_size.get(),
127 "Hash table format: small buckets"
128 );
129
130 let hash_records_size = hash_header.hash_records_size.get() as usize;
131 let buckets_size = hash_header.buckets_size.get() as usize;
132
133 let stream_offset_hash_records = original_len - p.len();
134 let hash_records_bytes = p.bytes(hash_records_size)?;
135 let stream_offset_hash_buckets = original_len - p.len();
136 let buckets_bytes = p.bytes(buckets_size)?;
137 let stream_offset_end = original_len - p.len();
138
139 if !p.is_empty() {
142 warn!("Found extra bytes in hash table, len = {}", p.len());
143 }
144
145 if hash_records_size % size_of::<HashRecord>() != 0 {
146 warn!(
147 "GSI/PSI name table contains hash table with a length that is not a multiple of the hash record size."
148 );
149 }
150 let num_hash_records = hash_records_size / size_of::<HashRecord>();
151 let hash_records_slice: &[HashRecord] =
152 Parser::new(hash_records_bytes).slice(num_hash_records)?;
153
154 debug!(num_hash_records, "Number of records in name table");
155
156 hash_records = hash_records_slice.to_vec();
157
158 debug!(num_buckets, "Number of hash buckets in name table");
159
160 debug!("[........] Stream offsets:");
161 debug!("[{:08x}] : NameTableHeader", stream_offset_table_header);
162 debug!("[{:08x}] : hash records", stream_offset_hash_records);
163 debug!("[{:08x}] : hash buckets", stream_offset_hash_buckets);
164 debug!("[{:08x}] : (end)", stream_offset_end);
165
166 hash_buckets = expand_buckets(
167 buckets_bytes,
168 num_buckets,
169 num_hash_records,
170 stream_offset_hash_buckets,
171 )?;
172 } else {
173 error!("Hash table format: old-style normal buckets");
175 bail!("Old-style hash table is not supported");
176 }
177 }
178
179 Ok(Self {
180 hash_buckets,
181 hash_records,
182 })
183 }
184
185 pub fn empty() -> Self {
187 Self {
188 hash_buckets: vec![0],
189 hash_records: vec![],
190 }
191 }
192
193 pub fn check_hashes(&self, global_symbols: &GlobalSymbolStream) -> anyhow::Result<()> {
196 let mut num_hashes_incorrect: u32 = 0;
200
201 for (bucket_index, hash_index_window) in self.hash_buckets.windows(2).enumerate() {
202 trace!(
203 "Checking hash bucket #{bucket_index}, hash records at {} .. {}",
204 hash_index_window[0], hash_index_window[1]
205 );
206 let Some(bucket_hash_records) = self
207 .hash_records
208 .get(hash_index_window[0] as usize..hash_index_window[1] as usize)
209 else {
210 error!("hash record range is invalid");
211 continue;
212 };
213
214 for hash_record in bucket_hash_records.iter() {
215 let hash_record_offset = hash_record.offset.get();
216
217 if hash_record_offset <= 0 {
219 continue;
220 }
221
222 let record_offset_in_symbol_stream = hash_record_offset - 1;
223 let pub_sym =
224 match global_symbols.get_pub32_at(record_offset_in_symbol_stream as u32) {
225 Err(e) => {
226 error!("{e}");
227 continue;
228 }
229 Ok(s) => s,
230 };
231
232 let name_hash =
233 crate::hash::hash_mod_u32(pub_sym.name, self.hash_buckets.len() as u32 - 1);
234
235 if name_hash != bucket_index as u32 {
236 if num_hashes_incorrect < 50 {
237 error!(
238 "bucket #{} has symbol {} with wrong hash code {}",
239 bucket_index, pub_sym.name, name_hash
240 );
241 }
242 num_hashes_incorrect += 1;
243 }
244 }
245 }
246
247 if num_hashes_incorrect != 0 {
248 error!("Found {num_hashes_incorrect} hash records with the wrong hash value");
249 } else {
250 info!("All name hashes are correct.");
251 }
252
253 Ok(())
254 }
255
256 pub fn hash_records_for_bucket(&self, bucket: usize) -> &[HashRecord] {
259 let start = self.hash_buckets[bucket] as usize;
260 let end = self.hash_buckets[bucket + 1] as usize;
261 &self.hash_records[start..end]
262 }
263
264 pub fn hash_records_for_name<'a>(&'a self, name: &BStr) -> &'a [HashRecord] {
266 if self.hash_buckets.len() <= 1 {
269 return &[];
270 }
271
272 let name_hash = crate::hash::hash_mod_u32(name, self.hash_buckets.len() as u32 - 1);
273 self.hash_records_for_bucket(name_hash as usize)
274 }
275
276 pub fn find_symbol<'a>(
278 &self,
279 gss: &'a GlobalSymbolStream,
280 name: &BStr,
281 ) -> anyhow::Result<Option<Sym<'a>>> {
282 let bucket_entries = self.hash_records_for_name(name);
283 for entry in bucket_entries.iter() {
284 let entry_offset = entry.offset.get();
285 if entry_offset <= 0 {
286 warn!("found invalid hash record; entry.offset <= 0");
287 continue;
288 }
289
290 let sym = gss.get_sym_at(entry_offset as u32 - 1)?;
291
292 if let Some(sym_name) = super::get_global_symbol_name(sym.kind, sym.data)? {
293 if sym_name.eq_ignore_ascii_case(name) {
294 return Ok(Some(sym));
295 }
296 }
297 }
298
299 Ok(None)
300 }
301
302 pub fn iter<'i, 'a: 'i>(&'a self, gss: &'a GlobalSymbolStream) -> NameTableIter<'i> {
304 NameTableIter {
305 hash_record_iter: self.hash_records.iter(),
306 gss,
307 }
308 }
309}
310
311pub struct NameTableIter<'a> {
313 hash_record_iter: std::slice::Iter<'a, HashRecord>,
314 gss: &'a GlobalSymbolStream,
315}
316
317impl<'a> Iterator for NameTableIter<'a> {
318 type Item = Sym<'a>;
319
320 fn next(&mut self) -> Option<Self::Item> {
321 loop {
322 let hash_record = self.hash_record_iter.next()?;
323
324 let sym_offset = hash_record.offset.get();
325 if sym_offset <= 0 {
326 continue;
328 }
329
330 if let Ok(sym) = self.gss.get_sym_at((sym_offset - 1) as u32) {
331 return Some(sym);
332 } else {
333 error!("failed to decode symbol in GSS at offset {sym_offset}");
334 return None;
335 }
336 }
337 }
338}
339
340pub fn get_v1_default_bucket(minimal_dbg_info: bool) -> usize {
345 if minimal_dbg_info { 0x3ffff } else { 0x1000 }
346}
347
348fn expand_buckets(
362 buckets_bytes: &[u8],
363 num_buckets: usize,
364 num_hash_records: usize,
365 stream_offset: usize, ) -> anyhow::Result<Vec<u32>> {
367 let _span = debug_span!("expand_buckets").entered();
368
369 trace!(num_buckets, num_hash_records, "expanding buckets");
370
371 let original_len = stream_offset + buckets_bytes.len();
372 let mut p = Parser::new(buckets_bytes);
373
374 let output_len = num_buckets + 1;
375
376 let bitmap_len_in_bytes = nonempty_bitmap_size_bytes(num_buckets);
377 let bitmap_bytes = p.bytes(bitmap_len_in_bytes)?;
378 let bv: &BitSlice<u8, Lsb0> = BitSlice::from_slice(bitmap_bytes);
379 trace!(bitmap_bytes, bitmap_len_in_bytes);
380
381 let num_nonempty_buckets = bv.count_ones().min(num_buckets);
384 trace!(num_nonempty_buckets);
385
386 let nonempty_pointers_stream_offset = original_len - p.len();
387 let nonempty_pointers: &[I32<LE>] = p.slice(num_nonempty_buckets)?;
388
389 trace!(
390 nonempty_pointers_stream_offset,
391 non_empty_pointers = nonempty_pointers.as_bytes(),
392 "non-empty pointers"
393 );
394
395 let mut nonempty_pointers_iter = nonempty_pointers.iter();
396
397 let mut hash_buckets = Vec::with_capacity(output_len);
398 for bucket_index in bv.iter_ones() {
399 if bucket_index >= num_buckets {
402 break;
403 }
404
405 let offset_x12 = nonempty_pointers_iter.next().unwrap().get();
408 if offset_x12 < 0 {
409 bail!("found a negative offset in hash buckets");
410 }
411 if offset_x12 % HASH_RECORD_CALC_LEN != 0 {
412 bail!("hash record offset {offset_x12} is not a multiple of 12 (as required)");
413 }
414 let offset = (offset_x12 / HASH_RECORD_CALC_LEN) as u32;
415
416 if offset as usize >= num_hash_records {
419 bail!("hash record offset {offset_x12} is beyond range of hash records table");
420 }
421
422 if let Some(&prev_offset) = hash_buckets.last() {
425 if offset < prev_offset {
426 bail!("hash record offset {offset} is less than previous offset {prev_offset}");
427 }
428 } else if offset != 0 {
429 bail!("First hash record offset should be zero, but instead it is: 0x{offset:x}");
430 }
431
432 if hash_buckets.len() < bucket_index {
434 trace!(
435 " bucket: 0x{:08x} .. 0x{bucket_index:08x} : range is empty, pushing offset: 0x{offset:8x} {offset:10}",
436 hash_buckets.len()
437 );
438 hash_buckets.resize(bucket_index, offset);
439 }
440 trace!(
441 " bucket: 0x{b:08x} --> offset: 0x{offset:08x}",
442 b = hash_buckets.len()
443 );
444 hash_buckets.push(offset);
445 assert!(hash_buckets.len() <= num_buckets);
446 }
447
448 assert!(hash_buckets.len() <= num_buckets);
451 hash_buckets.resize(num_buckets + 1, num_hash_records as u32);
452
453 trace!(
454 "hash bucket offsets: {:?}",
455 &hash_buckets[..hash_buckets.len().min(100)]
456 );
457
458 if !nonempty_pointers_iter.as_slice().is_empty() {
459 warn!(
460 num_extra_bytes = p.len(),
461 rest = p.peek_rest(),
462 "Compressed hash buckets table contains extra byte(s)"
463 );
464 }
465
466 if tracing::event_enabled!(tracing::Level::TRACE) {
467 trace!("Non-empty buckets: (within expand_buckets)");
468 for i in 0..hash_buckets.len() - 1 {
469 let start = hash_buckets[i];
470 let end = hash_buckets[i + 1];
471 if start != end {
472 trace!(i, start, end);
473 }
474 }
475 }
476
477 Ok(hash_buckets)
478}
479
480#[derive(Eq, PartialEq, PartialOrd, Ord)]
484pub struct HashEntry {
485 pub hash: u32,
487 pub symbol_offset: i32,
489}
490
491pub fn count_nonempty_buckets(sorted_hash_records: &[HashEntry]) -> usize {
495 iter_nonempty_buckets(sorted_hash_records).count()
496}
497
498pub fn nonempty_bitmap_size_bytes(num_buckets: usize) -> usize {
500 let compressed_bitvec_size_u32s = (num_buckets + 1).div_ceil(32);
501 compressed_bitvec_size_u32s * 4
502}
503
504pub fn compute_hash_table_size_bytes(
506 num_hash_records: usize,
507 num_buckets: usize,
508 num_nonempty_buckets: usize,
509) -> usize {
510 size_of::<NameTableHeader>()
511 + num_hash_records * size_of::<HashRecord>()
512 + nonempty_bitmap_size_bytes(num_buckets)
513 + num_nonempty_buckets * size_of::<i32>()
514}
515
516pub struct NameTableInfo {
518 pub num_buckets: usize,
521 pub num_nonempty_buckets: usize,
523 pub table_size_bytes: usize,
525}
526
527impl NameTableBuilder {
528 pub fn prepare(&mut self) -> NameTableInfo {
531 self.sort();
532
533 let num_nonempty_buckets = count_nonempty_buckets(&self.hash_records);
534 let table_size_bytes = compute_hash_table_size_bytes(
535 self.hash_records.len(),
536 self.num_buckets,
537 num_nonempty_buckets,
538 );
539 NameTableInfo {
540 num_buckets: self.num_buckets,
541 num_nonempty_buckets,
542 table_size_bytes,
543 }
544 }
545
546 pub fn num_names(&self) -> usize {
548 self.hash_records.len()
549 }
550
551 pub fn encode(&self, prepared_info: &NameTableInfo, output: &mut [u8]) {
554 debug!(
555 "Number of symbols found (in name table): {n} 0x{n:x}",
556 n = self.hash_records.len()
557 );
558 debug!(
559 "Size in bytes of hash records (in name table): {s} 0x{s:x}",
560 s = self.hash_records.len() * 8,
561 );
562
563 let compressed_bitvec_size_bytes = nonempty_bitmap_size_bytes(self.num_buckets);
564 let num_nonempty_buckets = prepared_info.num_nonempty_buckets;
565 let hash_buckets_size_bytes =
566 compressed_bitvec_size_bytes + num_nonempty_buckets * size_of::<i32>();
567 debug!(
568 "hash_buckets_size_bytes = 0x{0:x} {0}",
569 hash_buckets_size_bytes
570 );
571
572 {
580 let mut hash_output_cursor = ParserMut::new(output);
581
582 *hash_output_cursor.get_mut().unwrap() = NameTableHeader {
584 signature: U32::new(GSI_HASH_HEADER_SIGNATURE),
585 version: U32::new(GSI_HASH_HEADER_VERSION),
586 buckets_size: U32::new(hash_buckets_size_bytes as u32),
587 hash_records_size: U32::new(
588 (self.hash_records.len() * size_of::<HashRecord>()) as u32,
589 ),
590 };
591
592 {
594 let output_slice: &mut [HashRecord] = hash_output_cursor
595 .slice_mut(self.hash_records.len())
596 .unwrap();
597 for (from, to) in self.hash_records.iter().zip(output_slice.iter_mut()) {
598 *to = HashRecord {
599 offset: I32::new(from.symbol_offset),
600 c_ref: I32::new(1),
601 };
602 }
603 }
604
605 {
607 let sym_hash_bitvec_bytes = hash_output_cursor
608 .bytes_mut(compressed_bitvec_size_bytes)
609 .unwrap();
610 let bv: &mut BitSlice<u8, Lsb0> = BitSlice::from_slice_mut(sym_hash_bitvec_bytes);
611
612 for (_record_index, bucket_hashes) in iter_nonempty_buckets(&self.hash_records) {
613 let hash = bucket_hashes[0].hash;
614 bv.set(hash as usize, true);
615 }
616
617 assert_eq!(bv.count_ones(), num_nonempty_buckets);
618 }
619
620 {
624 let mut num_nonempty_written = 0;
625 let output_offsets_slice: &mut [U32<LE>] =
626 hash_output_cursor.slice_mut(num_nonempty_buckets).unwrap();
627 let mut output_iter = output_offsets_slice.iter_mut();
628
629 for (record_index, _hashes) in iter_nonempty_buckets(&self.hash_records) {
630 *output_iter.next().unwrap() = U32::new((record_index as u32) * 12);
631 num_nonempty_written += 1;
632 }
633
634 assert_eq!(num_nonempty_written, num_nonempty_buckets);
635 assert!(output_iter.as_slice().is_empty());
636 }
637
638 assert!(hash_output_cursor.is_empty());
639 }
640 }
641}
642
643#[inline(never)]
645pub fn sort_hash_records(hash_records: &mut [HashEntry]) {
646 hash_records.sort_unstable();
648}
649
650fn iter_nonempty_buckets(hashes: &[HashEntry]) -> IterNonEmptyBuckets<'_> {
654 IterNonEmptyBuckets {
655 record_index: 0,
656 hashes,
657 }
658}
659
660struct IterNonEmptyBuckets<'a> {
661 record_index: usize,
662 hashes: &'a [HashEntry],
663}
664
665impl<'a> Iterator for IterNonEmptyBuckets<'a> {
666 type Item = (usize, &'a [HashEntry]);
668
669 fn next(&mut self) -> Option<Self::Item> {
670 if self.hashes.is_empty() {
671 return None;
672 }
673
674 let record_index = self.record_index;
675
676 let hash = self.hashes[0].hash;
677 let mut end = 1;
678 while end < self.hashes.len() && self.hashes[end].hash == hash {
679 end += 1;
680 }
681
682 let (lo, hi) = self.hashes.split_at(end);
683 self.record_index += end;
684 self.hashes = hi;
685 Some((record_index, lo))
686 }
687}
688
689pub struct NameTableBuilder {
704 num_buckets: usize,
705 hash_records: Vec<HashEntry>,
706}
707
708impl NameTableBuilder {
709 pub fn new(num_buckets: usize) -> Self {
711 assert!(num_buckets > 0);
712 Self {
713 num_buckets,
714 hash_records: Vec::new(),
715 }
716 }
717
718 pub fn num_buckets(&self) -> usize {
720 self.num_buckets
721 }
722
723 pub fn push_hash(&mut self, hash: u32, symbol_offset: i32) {
727 assert!((hash as usize) < self.num_buckets);
728 self.hash_records.push(HashEntry {
729 hash,
730 symbol_offset,
731 });
732 }
733
734 pub fn push(&mut self, name: &BStr, symbol_offset: i32) {
736 let name_hash = crate::hash::hash_mod_u32(name, self.num_buckets as u32);
737 self.push_hash(name_hash, symbol_offset);
738 }
739
740 fn sort(&mut self) {
741 sort_hash_records(&mut self.hash_records);
742 }
743}