1#[cfg(test)]
39mod tests;
40
41use crate::parser::{Parser, ParserMut};
42use crate::utils::align_4;
43use crate::utils::iter::{HasRestLen, IteratorWithRangesExt};
44use crate::ReadAt;
45use anyhow::bail;
46use bstr::BStr;
47use std::ops::Range;
48use tracing::{debug, trace, trace_span, warn};
49use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, LE, U32};
50
51pub const NAMES_STREAM_NAME: &str = "/names";
54
55#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash, Ord, PartialOrd)]
60pub struct NameIndex(pub u32);
61
62impl std::fmt::Display for NameIndex {
63 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64 std::fmt::Display::fmt(&self.0, f)
65 }
66}
67
68#[test]
69fn name_index_display() {
70 assert_eq!(format!("{}", NameIndex(42)), "42");
71}
72
73#[derive(
75 Copy, Clone, Eq, PartialEq, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned,
76)]
77#[repr(transparent)]
78pub struct NameIndexLe(pub U32<LE>);
79
80impl NameIndexLe {
81 #[inline(always)]
83 pub fn get(self) -> NameIndex {
84 NameIndex(self.0.get())
85 }
86}
87
88pub const NAMES_STREAM_SIGNATURE: u32 = 0xEFFE_EFFE;
90
91pub const NAMES_STREAM_VERSION_V1: u32 = 1;
93
94#[repr(C)]
96#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
97pub struct NamesStreamHeader {
98 pub signature: U32<LE>,
100 pub version: U32<LE>,
102 pub strings_size: U32<LE>,
104}
105
106pub static EMPTY_NAMES_STREAM_DATA: &[u8] = &[
108 0xFE, 0xEF, 0xFE, 0xEF, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
116
117#[test]
118fn parse_empty_names_stream() {
119 let names = NamesStream::parse(EMPTY_NAMES_STREAM_DATA).unwrap();
120 assert_eq!(names.num_strings, 0);
121 assert_eq!(names.num_hashes, 1);
122}
123
124pub const NAMES_STREAM_HEADER_LEN: usize = 12;
126
127pub struct NamesStream<StreamData>
129where
130 StreamData: AsRef<[u8]>,
131{
132 pub stream_data: StreamData,
134
135 pub strings_size: usize,
137
138 pub num_hashes: usize,
140
141 pub hashes_offset: usize,
144
145 pub num_strings: usize,
148}
149
150impl<StreamData> NamesStream<StreamData>
151where
152 StreamData: AsRef<[u8]>,
153{
154 pub fn parse(stream_data: StreamData) -> anyhow::Result<Self> {
159 let stream_data_slice: &[u8] = stream_data.as_ref();
160 let mut p = Parser::new(stream_data_slice);
161 let header: &NamesStreamHeader = p.get()?;
162
163 if header.signature.get() != NAMES_STREAM_SIGNATURE {
164 bail!(
165 "The `/names` stream has an invalid signature: 0x{:08x}.",
166 header.signature.get()
167 );
168 }
169
170 if header.version.get() != NAMES_STREAM_VERSION_V1 {
171 bail!(
172 "The `/names` stream is using an unsupported version: {}.",
173 header.version.get()
174 );
175 }
176
177 let strings_size = header.strings_size.get() as usize;
178 let _string_data = p.bytes(strings_size)?;
179
180 let num_hashes = p.u32()? as usize;
183
184 let hashes_offset = stream_data_slice.len() - p.len();
185 let _hashed_names: &[U32<LE>] = p.slice(num_hashes)?;
186
187 let num_strings = p.u32()? as usize;
189
190 Ok(Self {
191 stream_data,
192 strings_size,
193 num_hashes,
194 hashes_offset,
195 num_strings,
196 })
197 }
198
199 pub fn strings_range(&self) -> Range<usize> {
201 NAMES_STREAM_HEADER_LEN..NAMES_STREAM_HEADER_LEN + self.strings_size
202 }
203
204 pub fn strings_bytes(&self) -> &[u8] {
206 &self.stream_data.as_ref()[self.strings_range()]
207 }
208
209 pub fn hashes(&self) -> &[U32<LE>] {
212 let stream_data = self.stream_data.as_ref();
213 <[U32<LE>]>::ref_from_prefix_with_elems(&stream_data[self.hashes_offset..], self.num_hashes)
214 .unwrap()
215 .0
216 }
217
218 pub fn get_string(&self, offset: NameIndex) -> anyhow::Result<&BStr> {
220 let strings_bytes = self.strings_bytes();
221 if let Some(s_bytes) = strings_bytes.get(offset.0 as usize..) {
222 let mut p = Parser::new(s_bytes);
223 let s = p.strz()?;
224 trace!("found string at {offset:?} : {s:?}");
225 Ok(s)
226 } else {
227 bail!("String offset {offset:?} is invalid (out of range)");
228 }
229 }
230
231 pub fn iter(&self) -> IterNames<'_> {
239 IterNames {
240 rest: self.strings_bytes(),
241 }
242 }
243
244 pub fn rebuild(&self) -> (NameIndexMapping, Vec<u8>) {
250 let _span = trace_span!("NamesStream::rebuild").entered();
251
252 let old_stream_data: &[u8] = self.stream_data.as_ref();
253 let old_string_data = self.strings_bytes();
255
256 if old_string_data.is_empty() {
260 return (
261 NameIndexMapping { table: Vec::new() },
262 old_stream_data.to_vec(),
263 );
264 }
265
266 let num_strings = self.iter().filter(|s| !s.is_empty()).count();
268 debug!("Number of strings found: {num_strings}");
269
270 let mut strings: Vec<(Range<usize>, &BStr)> = Vec::with_capacity(num_strings);
272 strings.extend(self.iter().with_ranges().filter(|(_, s)| !s.is_empty()));
273
274 strings.sort_unstable_by_key(|i| i.1);
276 strings.dedup_by_key(|i| i.1);
277
278 let num_unique_strings = strings.len();
279 if num_unique_strings != num_strings {
280 debug!(
281 "Removed {} duplicate strings.",
282 num_strings - num_unique_strings
283 );
284 } else {
285 debug!("Did not find duplicate strings.");
286 }
287
288 let new_strings_len_unaligned = 1 + strings.iter().map(|(_, s)| s.len() + 1).sum::<usize>();
291 let new_strings_len = align_4(new_strings_len_unaligned);
292
293 let num_hashes = num_unique_strings * 6 / 4;
295 assert!(num_hashes >= num_unique_strings);
296 debug!(
297 "Using {} hashes for {} strings with linear probing.",
298 num_hashes, num_unique_strings
299 );
300
301 let new_hash_size_bytes = 4 + num_hashes * 4 + 4; let new_stream_data_len = NAMES_STREAM_HEADER_LEN + new_strings_len + new_hash_size_bytes;
306 debug!(
307 "Old name stream size (strings only): {}",
308 old_string_data.len()
309 );
310 debug!("New name stream size (strings only): {}", new_strings_len);
311
312 let mut new_stream_data: Vec<u8> = vec![0; new_stream_data_len];
313 let mut p = ParserMut::new(&mut new_stream_data);
314 *p.get_mut().unwrap() = NamesStreamHeader {
315 signature: U32::new(NAMES_STREAM_SIGNATURE),
316 version: U32::new(NAMES_STREAM_VERSION_V1),
317 strings_size: U32::new(new_strings_len as u32),
318 };
319
320 let mut remapping_table: Vec<(NameIndex, NameIndex)> = Vec::with_capacity(num_strings + 1);
322 remapping_table.push((NameIndex(0), NameIndex(0)));
324 {
325 let new_strings_data_with_alignment = p.bytes_mut(new_strings_len).unwrap();
326 let out_bytes = &mut new_strings_data_with_alignment[..new_strings_len_unaligned];
327 let out_bytes_len = out_bytes.len();
328 let mut out_iter = out_bytes;
329
330 out_iter[0] = 0;
332 out_iter = &mut out_iter[1..];
333
334 for (old_range, s) in strings.iter() {
335 let old_ni = NameIndex(old_range.start as u32);
336 let new_ni = NameIndex((out_bytes_len - out_iter.len()) as u32);
337 remapping_table.push((old_ni, new_ni));
338 let sb: &[u8] = s;
339
340 trace!(
341 "string: old_ni: 0x{old_ni:08x}, new_ni: 0x{new_ni:08x}, old_range: {:08x}..{:08x} s: {:?}",
342 old_range.start,
343 old_range.end,
344 s,
345 old_ni = old_ni.0,
346 new_ni = new_ni.0,
347 );
348
349 out_iter[..sb.len()].copy_from_slice(sb);
350 out_iter = &mut out_iter[sb.len() + 1..]; }
352
353 assert!(out_iter.is_empty());
354 remapping_table.sort_unstable_by_key(|&(old, _)| old);
355 }
356
357 let stream_offset_num_hashes = new_stream_data_len - p.len();
362 *p.get_mut::<U32<LE>>().unwrap() = U32::new(num_hashes as u32);
363 let stream_offset_hash_table = new_stream_data_len - p.len();
364
365 {
366 debug!("Building hash table, num_hashes = {}", num_hashes);
367 let hash_table: &mut [U32<LE>] = p.slice_mut(num_hashes).unwrap();
368 let mut new_ni: u32 = 1; for &(_, sb) in strings.iter() {
370 let h = crate::hash::hash_mod_u32(sb, num_hashes as u32);
371 trace!("ni {:08x}, hash {:08x}, {:?}", new_ni, h, sb);
372
373 let mut hi = h;
374 let mut wrapped = false;
375 loop {
376 let slot = &mut hash_table[hi as usize];
377 if slot.get() == 0 {
378 *slot = U32::new(new_ni);
379 break;
380 }
381 hi += 1;
382 if hi as usize == hash_table.len() {
383 hi = 0;
384 assert!(!wrapped, "should not wrap around the table more than once");
385 wrapped = true;
386 }
387 }
388
389 new_ni += (sb.len() + 1) as u32;
390 }
391 }
392
393 let stream_offset_num_strings = new_stream_data_len - p.len();
394 *p.get_mut::<U32<LE>>().unwrap() = U32::new(strings.len() as u32);
395
396 assert!(p.is_empty());
397
398 debug!("Stream offsets:");
399 debug!(
400 " [{:08x}] - Names Stream header",
401 NAMES_STREAM_HEADER_LEN
402 );
403 debug!(" [{:08x}] - string data", NAMES_STREAM_HEADER_LEN);
404 debug!(
405 " [{:08x}] - hash table header (num_hashes)",
406 stream_offset_num_hashes
407 );
408 debug!(
409 " [{:08x}] - hash table, size in bytes = {}",
410 stream_offset_hash_table,
411 num_hashes * 4
412 );
413 debug!(
414 " [{:08x}] - num_strings field",
415 stream_offset_num_strings
416 );
417 debug!(" [{:08x}] - (end)", new_stream_data_len);
418
419 (
420 NameIndexMapping {
421 table: remapping_table,
422 },
423 new_stream_data,
424 )
425 }
426}
427
428#[derive(Default)]
430pub struct NameIndexMapping {
431 pub table: Vec<(NameIndex, NameIndex)>,
435}
436
437impl NameIndexMapping {
438 pub fn map_old_to_new(&self, name: NameIndex) -> anyhow::Result<NameIndex> {
440 if name.0 == 0 {
442 return Ok(name);
443 }
444
445 let table = self.table.as_slice();
446 match table.binary_search_by_key(&name, |(old, _)| *old) {
447 Ok(i) => Ok(table[i].1),
448 Err(_) => bail!("The NameIndex value 0x{:x} cannot be remapped because it was not present in the old Names stream.", name.0),
449 }
450 }
451}
452
453#[allow(dead_code)]
468fn find_collision_ranges(hashes: &[U32<LE>], i: usize) -> (Range<usize>, Range<usize>) {
469 assert!(i < hashes.len());
470 assert!(hashes[i].get() != 0);
471
472 let mut start = i;
473 while start > 0 && hashes[start - 1].get() != 0 {
474 start -= 1;
475 }
476
477 let mut end = i + 1;
478 while end < hashes.len() && hashes[end].get() != 0 {
479 end += 1;
480 }
481
482 if start == 0 {
483 if end == hashes.len() {
486 return (start..end, 0..0);
487 }
488
489 let mut r2_start = hashes.len();
490 while r2_start > 0 && hashes[r2_start - 1].get() != 0 {
491 r2_start -= 1;
492 assert!(r2_start > end); }
494 if r2_start != hashes.len() {
495 (start..end, r2_start..hashes.len())
496 } else {
497 (start..end, 0..0)
498 }
499 } else if end == hashes.len() {
500 let mut r2_end = 0;
503 while r2_end < hashes.len() && hashes[r2_end].get() != 0 {
504 assert!(r2_end < start); r2_end += 1;
506 }
507
508 (start..end, 0..r2_end)
509 } else {
510 (start..end, 0..0)
511 }
512}
513
514#[test]
515fn test_find_collision_range() {
516 const EMPTY: U32<LE> = U32::from_bytes([0; 4]);
517 const BUSY: U32<LE> = U32::from_bytes([0xff; 4]);
518
519 let hashes_full: Vec<U32<LE>> = vec![BUSY, BUSY, BUSY, BUSY, BUSY];
520 assert_eq!(find_collision_ranges(&hashes_full, 0), (0..5, 0..0));
521 assert_eq!(find_collision_ranges(&hashes_full, 2), (0..5, 0..0));
522
523 {
524 let hashes_2 = vec![
525 BUSY, EMPTY, BUSY, EMPTY, EMPTY, BUSY, BUSY, ];
533 assert_eq!(find_collision_ranges(&hashes_2, 0), (0..1, 5..7));
534 assert_eq!(find_collision_ranges(&hashes_2, 2), (2..3, 0..0));
535 assert_eq!(find_collision_ranges(&hashes_2, 5), (5..7, 0..1));
536 }
537
538 {
539 let hashes_3 = vec![
540 BUSY, EMPTY, BUSY, EMPTY, EMPTY, BUSY, EMPTY, ];
548 assert_eq!(find_collision_ranges(&hashes_3, 0), (0..1, 0..0));
549 assert_eq!(find_collision_ranges(&hashes_3, 2), (2..3, 0..0));
550 assert_eq!(find_collision_ranges(&hashes_3, 5), (5..6, 0..0));
551 }
552 {
553 let hashes_4 = vec![
554 EMPTY, EMPTY, BUSY, EMPTY, EMPTY, BUSY, BUSY, ];
562 assert_eq!(find_collision_ranges(&hashes_4, 2), (2..3, 0..0));
563 assert_eq!(find_collision_ranges(&hashes_4, 5), (5..7, 0..0));
564 assert_eq!(find_collision_ranges(&hashes_4, 6), (5..7, 0..0));
565 }
566}
567
568pub struct IterNames<'a> {
570 rest: &'a [u8],
571}
572
573impl<'a> HasRestLen for IterNames<'a> {
574 fn rest_len(&self) -> usize {
575 self.rest.len()
576 }
577}
578
579impl<'a> Iterator for IterNames<'a> {
580 type Item = &'a BStr;
581
582 fn next(&mut self) -> Option<Self::Item> {
583 if self.rest.is_empty() {
584 return None;
585 }
586
587 let mut p = Parser::new(self.rest);
588 let Ok(s) = p.strz() else {
589 warn!(
590 rest_len = self.rest.len(),
591 "Found malformed string in /names stream"
592 );
593 return None;
594 };
595
596 self.rest = p.into_rest();
597 Some(s)
598 }
599}
600
601impl NamesStream<Vec<u8>> {
602 pub fn load_and_parse<F: ReadAt>(
604 pdb: &crate::msf::Msf<F>,
605 named_streams: &crate::pdbi::NamedStreams,
606 ) -> anyhow::Result<Self> {
607 let named_stream_index = named_streams.get_err(NAMES_STREAM_NAME)?;
608 let named_stream_data = pdb.read_stream_to_vec(named_stream_index)?;
609 Self::parse(named_stream_data)
610 }
611}