1#[cfg(test)]
4mod tests;
5
6use super::gss::GlobalSymbolStream;
7use crate::parser::{Parser, ParserMut};
8use crate::syms::Sym;
9use anyhow::bail;
10use bitvec::prelude::{BitSlice, Lsb0};
11use bstr::BStr;
12use std::mem::size_of;
13use tracing::{debug, debug_span, error, info, trace, warn};
14use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, I32, LE, U32};
15
16pub const HASH_RECORD_CALC_LEN: i32 = 12;
20
21pub const GSI_HASH_SC_IMPV_V70: u32 = 0xeffe0000 + 19990810;
24
25pub const GSI_HASH_HEADER_SIGNATURE: u32 = 0xffff_ffff;
27pub const GSI_HASH_HEADER_VERSION: u32 = GSI_HASH_SC_IMPV_V70;
29
30#[derive(IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable, Debug)]
39#[repr(C)]
40pub struct NameTableHeader {
41 pub signature: U32<LE>,
45
46 pub version: U32<LE>,
48
49 pub hash_records_size: U32<LE>,
52
53 pub buckets_size: U32<LE>,
55}
56
57#[derive(IntoBytes, FromBytes, Unaligned, KnownLayout, Immutable, Debug, Clone)]
59#[repr(C)]
60pub struct HashRecord {
61 pub offset: I32<LE>,
66
67 pub c_ref: I32<LE>,
70}
71
72pub struct NameTable {
74 hash_buckets: Vec<u32>,
88
89 hash_records: Vec<HashRecord>,
92}
93
94impl NameTable {
95 pub fn parse(
98 num_buckets: usize,
99 stream_offset: usize,
100 sym_hash_bytes: &[u8],
101 ) -> anyhow::Result<Self> {
102 let _span = debug_span!("NameTable::parse").entered();
110
111 let original_len = stream_offset + sym_hash_bytes.len();
112
113 let hash_records: Vec<HashRecord>;
115 let hash_buckets: Vec<u32>;
116 {
117 let mut p = Parser::new(sym_hash_bytes);
118 let stream_offset_table_header = original_len - p.len();
119 let hash_header: &NameTableHeader = p.get()?;
120
121 if hash_header.signature.get() == GSI_HASH_HEADER_SIGNATURE
122 && hash_header.version.get() == GSI_HASH_HEADER_VERSION
123 {
124 debug!(
125 hash_records_size = hash_header.hash_records_size.get(),
126 buckets_size = hash_header.buckets_size.get(),
127 "Hash table format: small buckets"
128 );
129
130 let hash_records_size = hash_header.hash_records_size.get() as usize;
131 let buckets_size = hash_header.buckets_size.get() as usize;
132
133 let stream_offset_hash_records = original_len - p.len();
134 let hash_records_bytes = p.bytes(hash_records_size)?;
135 let stream_offset_hash_buckets = original_len - p.len();
136 let buckets_bytes = p.bytes(buckets_size)?;
137 let stream_offset_end = original_len - p.len();
138
139 if !p.is_empty() {
142 warn!("Found extra bytes in hash table, len = {}", p.len());
143 }
144
145 if hash_records_size % size_of::<HashRecord>() != 0 {
146 warn!("GSI/PSI name table contains hash table with a length that is not a multiple of the hash record size.");
147 }
148 let num_hash_records = hash_records_size / size_of::<HashRecord>();
149 let hash_records_slice: &[HashRecord] =
150 Parser::new(hash_records_bytes).slice(num_hash_records)?;
151
152 debug!(num_hash_records, "Number of records in name table");
153
154 hash_records = hash_records_slice.to_vec();
155
156 debug!(num_buckets, "Number of hash buckets in name table");
157
158 debug!("[........] Stream offsets:");
159 debug!("[{:08x}] : NameTableHeader", stream_offset_table_header);
160 debug!("[{:08x}] : hash records", stream_offset_hash_records);
161 debug!("[{:08x}] : hash buckets", stream_offset_hash_buckets);
162 debug!("[{:08x}] : (end)", stream_offset_end);
163
164 hash_buckets = expand_buckets(
165 buckets_bytes,
166 num_buckets,
167 num_hash_records,
168 stream_offset_hash_buckets,
169 )?;
170 } else {
171 error!("Hash table format: old-style normal buckets");
173 bail!("Old-style hash table is not supported");
174 }
175 }
176
177 Ok(Self {
178 hash_buckets,
179 hash_records,
180 })
181 }
182
183 pub fn empty() -> Self {
185 Self {
186 hash_buckets: vec![0],
187 hash_records: vec![],
188 }
189 }
190
191 pub fn check_hashes(&self, global_symbols: &GlobalSymbolStream) -> anyhow::Result<()> {
194 let mut num_hashes_incorrect: u32 = 0;
198
199 for (bucket_index, hash_index_window) in self.hash_buckets.windows(2).enumerate() {
200 trace!(
201 "Checking hash bucket #{bucket_index}, hash records at {} .. {}",
202 hash_index_window[0],
203 hash_index_window[1]
204 );
205 let Some(bucket_hash_records) = self
206 .hash_records
207 .get(hash_index_window[0] as usize..hash_index_window[1] as usize)
208 else {
209 error!("hash record range is invalid");
210 continue;
211 };
212
213 for hash_record in bucket_hash_records.iter() {
214 let hash_record_offset = hash_record.offset.get();
215
216 if hash_record_offset <= 0 {
218 continue;
219 }
220
221 let record_offset_in_symbol_stream = hash_record_offset - 1;
222 let pub_sym =
223 match global_symbols.get_pub32_at(record_offset_in_symbol_stream as u32) {
224 Err(e) => {
225 error!("{e}");
226 continue;
227 }
228 Ok(s) => s,
229 };
230
231 let name_hash =
232 crate::hash::hash_mod_u32(pub_sym.name, self.hash_buckets.len() as u32 - 1);
233
234 if name_hash != bucket_index as u32 {
235 if num_hashes_incorrect < 50 {
236 error!(
237 "bucket #{} has symbol {} with wrong hash code {}",
238 bucket_index, pub_sym.name, name_hash
239 );
240 }
241 num_hashes_incorrect += 1;
242 }
243 }
244 }
245
246 if num_hashes_incorrect != 0 {
247 error!("Found {num_hashes_incorrect} hash records with the wrong hash value");
248 } else {
249 info!("All name hashes are correct.");
250 }
251
252 Ok(())
253 }
254
255 pub fn hash_records_for_bucket(&self, bucket: usize) -> &[HashRecord] {
258 let start = self.hash_buckets[bucket] as usize;
259 let end = self.hash_buckets[bucket + 1] as usize;
260 &self.hash_records[start..end]
261 }
262
263 pub fn hash_records_for_name<'a>(&'a self, name: &BStr) -> &'a [HashRecord] {
265 if self.hash_buckets.len() <= 1 {
268 return &[];
269 }
270
271 let name_hash = crate::hash::hash_mod_u32(name, self.hash_buckets.len() as u32 - 1);
272 self.hash_records_for_bucket(name_hash as usize)
273 }
274
275 pub fn find_symbol<'a>(
277 &self,
278 gss: &'a GlobalSymbolStream,
279 name: &BStr,
280 ) -> anyhow::Result<Option<Sym<'a>>> {
281 let bucket_entries = self.hash_records_for_name(name);
282 for entry in bucket_entries.iter() {
283 let entry_offset = entry.offset.get();
284 if entry_offset <= 0 {
285 warn!("found invalid hash record; entry.offset <= 0");
286 continue;
287 }
288
289 let sym = gss.get_sym_at(entry_offset as u32 - 1)?;
290
291 if let Some(sym_name) = super::get_global_symbol_name(sym.kind, sym.data)? {
292 if sym_name.eq_ignore_ascii_case(name) {
293 return Ok(Some(sym));
294 }
295 }
296 }
297
298 Ok(None)
299 }
300
301 pub fn iter<'i, 'a: 'i>(&'a self, gss: &'a GlobalSymbolStream) -> NameTableIter<'i> {
303 NameTableIter {
304 hash_record_iter: self.hash_records.iter(),
305 gss,
306 }
307 }
308}
309
310pub struct NameTableIter<'a> {
312 hash_record_iter: std::slice::Iter<'a, HashRecord>,
313 gss: &'a GlobalSymbolStream,
314}
315
316impl<'a> Iterator for NameTableIter<'a> {
317 type Item = Sym<'a>;
318
319 fn next(&mut self) -> Option<Self::Item> {
320 loop {
321 let hash_record = self.hash_record_iter.next()?;
322
323 let sym_offset = hash_record.offset.get();
324 if sym_offset <= 0 {
325 continue;
327 }
328
329 if let Ok(sym) = self.gss.get_sym_at((sym_offset - 1) as u32) {
330 return Some(sym);
331 } else {
332 error!("failed to decode symbol in GSS at offset {sym_offset}");
333 return None;
334 }
335 }
336 }
337}
338
339pub fn get_v1_default_bucket(minimal_dbg_info: bool) -> usize {
344 if minimal_dbg_info {
345 0x3ffff
346 } else {
347 0x1000
348 }
349}
350
351fn expand_buckets(
365 buckets_bytes: &[u8],
366 num_buckets: usize,
367 num_hash_records: usize,
368 stream_offset: usize, ) -> anyhow::Result<Vec<u32>> {
370 let _span = debug_span!("expand_buckets").entered();
371
372 trace!(num_buckets, num_hash_records, "expanding buckets");
373
374 let original_len = stream_offset + buckets_bytes.len();
375 let mut p = Parser::new(buckets_bytes);
376
377 let output_len = num_buckets + 1;
378
379 let bitmap_len_in_bytes = nonempty_bitmap_size_bytes(num_buckets);
380 let bitmap_bytes = p.bytes(bitmap_len_in_bytes)?;
381 let bv: &BitSlice<u8, Lsb0> = BitSlice::from_slice(bitmap_bytes);
382 trace!(bitmap_bytes, bitmap_len_in_bytes);
383
384 let num_nonempty_buckets = bv.count_ones().min(num_buckets);
387 trace!(num_nonempty_buckets);
388
389 let nonempty_pointers_stream_offset = original_len - p.len();
390 let nonempty_pointers: &[I32<LE>] = p.slice(num_nonempty_buckets)?;
391
392 trace!(
393 nonempty_pointers_stream_offset,
394 non_empty_pointers = nonempty_pointers.as_bytes(),
395 "non-empty pointers"
396 );
397
398 let mut nonempty_pointers_iter = nonempty_pointers.iter();
399
400 let mut hash_buckets = Vec::with_capacity(output_len);
401 for bucket_index in bv.iter_ones() {
402 if bucket_index >= num_buckets {
405 break;
406 }
407
408 let offset_x12 = nonempty_pointers_iter.next().unwrap().get();
411 if offset_x12 < 0 {
412 bail!("found a negative offset in hash buckets");
413 }
414 if offset_x12 % HASH_RECORD_CALC_LEN != 0 {
415 bail!("hash record offset {offset_x12} is not a multiple of 12 (as required)");
416 }
417 let offset = (offset_x12 / HASH_RECORD_CALC_LEN) as u32;
418
419 if offset as usize >= num_hash_records {
422 bail!("hash record offset {offset_x12} is beyond range of hash records table");
423 }
424
425 if let Some(&prev_offset) = hash_buckets.last() {
428 if offset < prev_offset {
429 bail!("hash record offset {offset} is less than previous offset {prev_offset}");
430 }
431 } else if offset != 0 {
432 bail!("First hash record offset should be zero, but instead it is: 0x{offset:x}");
433 }
434
435 if hash_buckets.len() < bucket_index {
437 trace!(" bucket: 0x{:08x} .. 0x{bucket_index:08x} : range is empty, pushing offset: 0x{offset:8x} {offset:10}", hash_buckets.len());
438 hash_buckets.resize(bucket_index, offset);
439 }
440 trace!(
441 " bucket: 0x{b:08x} --> offset: 0x{offset:08x}",
442 b = hash_buckets.len()
443 );
444 hash_buckets.push(offset);
445 assert!(hash_buckets.len() <= num_buckets);
446 }
447
448 assert!(hash_buckets.len() <= num_buckets);
451 hash_buckets.resize(num_buckets + 1, num_hash_records as u32);
452
453 trace!(
454 "hash bucket offsets: {:?}",
455 &hash_buckets[..hash_buckets.len().min(100)]
456 );
457
458 if !nonempty_pointers_iter.as_slice().is_empty() {
459 warn!(
460 num_extra_bytes = p.len(),
461 rest = p.peek_rest(),
462 "Compressed hash buckets table contains extra byte(s)"
463 );
464 }
465
466 if tracing::event_enabled!(tracing::Level::TRACE) {
467 trace!("Non-empty buckets: (within expand_buckets)");
468 for i in 0..hash_buckets.len() - 1 {
469 let start = hash_buckets[i];
470 let end = hash_buckets[i + 1];
471 if start != end {
472 trace!(i, start, end);
473 }
474 }
475 }
476
477 Ok(hash_buckets)
478}
479
480#[derive(Eq, PartialEq, PartialOrd, Ord)]
484pub struct HashEntry {
485 pub hash: u32,
487 pub symbol_offset: i32,
489}
490
491pub fn count_nonempty_buckets(sorted_hash_records: &[HashEntry]) -> usize {
495 iter_nonempty_buckets(sorted_hash_records).count()
496}
497
498pub fn nonempty_bitmap_size_bytes(num_buckets: usize) -> usize {
500 let compressed_bitvec_size_u32s = (num_buckets + 1).div_ceil(32);
501 compressed_bitvec_size_u32s * 4
502}
503
504pub fn compute_hash_table_size_bytes(
506 num_hash_records: usize,
507 num_buckets: usize,
508 num_nonempty_buckets: usize,
509) -> usize {
510 size_of::<NameTableHeader>()
511 + num_hash_records * size_of::<HashRecord>()
512 + nonempty_bitmap_size_bytes(num_buckets)
513 + num_nonempty_buckets * size_of::<i32>()
514}
515
516pub struct NameTableInfo {
518 pub num_buckets: usize,
521 pub num_nonempty_buckets: usize,
523 pub table_size_bytes: usize,
525}
526
527impl NameTableBuilder {
528 pub fn prepare(&mut self) -> NameTableInfo {
531 self.sort();
532
533 let num_nonempty_buckets = count_nonempty_buckets(&self.hash_records);
534 let table_size_bytes = compute_hash_table_size_bytes(
535 self.hash_records.len(),
536 self.num_buckets,
537 num_nonempty_buckets,
538 );
539 NameTableInfo {
540 num_buckets: self.num_buckets,
541 num_nonempty_buckets,
542 table_size_bytes,
543 }
544 }
545
546 pub fn num_names(&self) -> usize {
548 self.hash_records.len()
549 }
550
551 pub fn encode(&self, prepared_info: &NameTableInfo, output: &mut [u8]) {
554 debug!(
555 "Number of symbols found (in name table): {n} 0x{n:x}",
556 n = self.hash_records.len()
557 );
558 debug!(
559 "Size in bytes of hash records (in name table): {s} 0x{s:x}",
560 s = self.hash_records.len() * 8,
561 );
562
563 let compressed_bitvec_size_bytes = nonempty_bitmap_size_bytes(self.num_buckets);
564 let num_nonempty_buckets = prepared_info.num_nonempty_buckets;
565 let hash_buckets_size_bytes =
566 compressed_bitvec_size_bytes + num_nonempty_buckets * size_of::<i32>();
567 debug!(
568 "hash_buckets_size_bytes = 0x{0:x} {0}",
569 hash_buckets_size_bytes
570 );
571
572 {
580 let mut hash_output_cursor = ParserMut::new(output);
581
582 *hash_output_cursor.get_mut().unwrap() = NameTableHeader {
584 signature: U32::new(GSI_HASH_HEADER_SIGNATURE),
585 version: U32::new(GSI_HASH_HEADER_VERSION),
586 buckets_size: U32::new(hash_buckets_size_bytes as u32),
587 hash_records_size: U32::new(
588 (self.hash_records.len() * size_of::<HashRecord>()) as u32,
589 ),
590 };
591
592 {
594 let output_slice: &mut [HashRecord] = hash_output_cursor
595 .slice_mut(self.hash_records.len())
596 .unwrap();
597 for (from, to) in self.hash_records.iter().zip(output_slice.iter_mut()) {
598 *to = HashRecord {
599 offset: I32::new(from.symbol_offset),
600 c_ref: I32::new(1),
601 };
602 }
603 }
604
605 {
607 let sym_hash_bitvec_bytes = hash_output_cursor
608 .bytes_mut(compressed_bitvec_size_bytes)
609 .unwrap();
610 let bv: &mut BitSlice<u8, Lsb0> = BitSlice::from_slice_mut(sym_hash_bitvec_bytes);
611
612 for (_record_index, bucket_hashes) in iter_nonempty_buckets(&self.hash_records) {
613 let hash = bucket_hashes[0].hash;
614 bv.set(hash as usize, true);
615 }
616
617 assert_eq!(bv.count_ones(), num_nonempty_buckets);
618 }
619
620 {
624 let mut num_nonempty_written = 0;
625 let output_offsets_slice: &mut [U32<LE>] =
626 hash_output_cursor.slice_mut(num_nonempty_buckets).unwrap();
627 let mut output_iter = output_offsets_slice.iter_mut();
628
629 for (record_index, _hashes) in iter_nonempty_buckets(&self.hash_records) {
630 *output_iter.next().unwrap() = U32::new((record_index as u32) * 12);
631 num_nonempty_written += 1;
632 }
633
634 assert_eq!(num_nonempty_written, num_nonempty_buckets);
635 assert!(output_iter.as_slice().is_empty());
636 }
637
638 assert!(hash_output_cursor.is_empty());
639 }
640 }
641}
642
643#[inline(never)]
645pub fn sort_hash_records(hash_records: &mut [HashEntry]) {
646 hash_records.sort_unstable();
648}
649
650fn iter_nonempty_buckets(hashes: &[HashEntry]) -> IterNonEmptyBuckets<'_> {
654 IterNonEmptyBuckets {
655 record_index: 0,
656 hashes,
657 }
658}
659
660struct IterNonEmptyBuckets<'a> {
661 record_index: usize,
662 hashes: &'a [HashEntry],
663}
664
665impl<'a> Iterator for IterNonEmptyBuckets<'a> {
666 type Item = (usize, &'a [HashEntry]);
668
669 fn next(&mut self) -> Option<Self::Item> {
670 if self.hashes.is_empty() {
671 return None;
672 }
673
674 let record_index = self.record_index;
675
676 let hash = self.hashes[0].hash;
677 let mut end = 1;
678 while end < self.hashes.len() && self.hashes[end].hash == hash {
679 end += 1;
680 }
681
682 let (lo, hi) = self.hashes.split_at(end);
683 self.record_index += end;
684 self.hashes = hi;
685 Some((record_index, lo))
686 }
687}
688
689pub struct NameTableBuilder {
704 num_buckets: usize,
705 hash_records: Vec<HashEntry>,
706}
707
708impl NameTableBuilder {
709 pub fn new(num_buckets: usize) -> Self {
711 assert!(num_buckets > 0);
712 Self {
713 num_buckets,
714 hash_records: Vec::new(),
715 }
716 }
717
718 pub fn num_buckets(&self) -> usize {
720 self.num_buckets
721 }
722
723 pub fn push_hash(&mut self, hash: u32, symbol_offset: i32) {
727 assert!((hash as usize) < self.num_buckets);
728 self.hash_records.push(HashEntry {
729 hash,
730 symbol_offset,
731 });
732 }
733
734 pub fn push(&mut self, name: &BStr, symbol_offset: i32) {
736 let name_hash = crate::hash::hash_mod_u32(name, self.num_buckets as u32);
737 self.push_hash(name_hash, symbol_offset);
738 }
739
740 fn sort(&mut self) {
741 sort_hash_records(&mut self.hash_records);
742 }
743}