#[cfg(test)]
mod tests;
use crate::ReadAt;
use crate::utils::align_4;
use anyhow::bail;
use bstr::BStr;
use ms_codeview::parser::{Parser, ParserMut};
use ms_codeview::{HasRestLen, IteratorWithRangesExt};
use std::ops::Range;
use tracing::{debug, trace, trace_span, warn};
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, LE, U32, Unaligned};
pub const NAMES_STREAM_NAME: &str = "/names";
#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash, Ord, PartialOrd)]
pub struct NameIndex(pub u32);
impl std::fmt::Display for NameIndex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
#[test]
fn name_index_display() {
assert_eq!(format!("{}", NameIndex(42)), "42");
}
#[derive(
Copy, Clone, Eq, PartialEq, Debug, IntoBytes, Immutable, KnownLayout, FromBytes, Unaligned,
)]
#[repr(transparent)]
pub struct NameIndexLe(pub U32<LE>);
impl NameIndexLe {
#[inline(always)]
pub fn get(self) -> NameIndex {
NameIndex(self.0.get())
}
}
pub const NAMES_STREAM_SIGNATURE: u32 = 0xEFFE_EFFE;
pub const NAMES_STREAM_VERSION_V1: u32 = 1;
#[repr(C)]
#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
pub struct NamesStreamHeader {
pub signature: U32<LE>,
pub version: U32<LE>,
pub strings_size: U32<LE>,
}
pub static EMPTY_NAMES_STREAM_DATA: &[u8] = &[
0xFE, 0xEF, 0xFE, 0xEF, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
#[test]
fn parse_empty_names_stream() {
let names = NamesStream::parse(EMPTY_NAMES_STREAM_DATA).unwrap();
assert_eq!(names.num_strings, 0);
assert_eq!(names.num_hashes, 1);
}
pub const NAMES_STREAM_HEADER_LEN: usize = 12;
pub struct NamesStream<StreamData>
where
StreamData: AsRef<[u8]>,
{
pub stream_data: StreamData,
pub strings_size: usize,
pub num_hashes: usize,
pub hashes_offset: usize,
pub num_strings: usize,
}
impl<StreamData> NamesStream<StreamData>
where
StreamData: AsRef<[u8]>,
{
pub fn parse(stream_data: StreamData) -> anyhow::Result<Self> {
let stream_data_slice: &[u8] = stream_data.as_ref();
let mut p = Parser::new(stream_data_slice);
let header: &NamesStreamHeader = p.get()?;
if header.signature.get() != NAMES_STREAM_SIGNATURE {
bail!(
"The `/names` stream has an invalid signature: 0x{:08x}.",
header.signature.get()
);
}
if header.version.get() != NAMES_STREAM_VERSION_V1 {
bail!(
"The `/names` stream is using an unsupported version: {}.",
header.version.get()
);
}
let strings_size = header.strings_size.get() as usize;
let _string_data = p.bytes(strings_size)?;
let num_hashes = p.u32()? as usize;
let hashes_offset = stream_data_slice.len() - p.len();
let _hashed_names: &[U32<LE>] = p.slice(num_hashes)?;
let num_strings = p.u32()? as usize;
Ok(Self {
stream_data,
strings_size,
num_hashes,
hashes_offset,
num_strings,
})
}
pub fn strings_range(&self) -> Range<usize> {
NAMES_STREAM_HEADER_LEN..NAMES_STREAM_HEADER_LEN + self.strings_size
}
pub fn strings_bytes(&self) -> &[u8] {
&self.stream_data.as_ref()[self.strings_range()]
}
pub fn hashes(&self) -> &[U32<LE>] {
let stream_data = self.stream_data.as_ref();
<[U32<LE>]>::ref_from_prefix_with_elems(&stream_data[self.hashes_offset..], self.num_hashes)
.unwrap()
.0
}
pub fn get_string(&self, offset: NameIndex) -> anyhow::Result<&BStr> {
let strings_bytes = self.strings_bytes();
if let Some(s_bytes) = strings_bytes.get(offset.0 as usize..) {
let mut p = Parser::new(s_bytes);
let s = p.strz()?;
trace!("found string at {offset:?} : {s:?}");
Ok(s)
} else {
bail!("String offset {offset:?} is invalid (out of range)");
}
}
pub fn iter(&self) -> IterNames<'_> {
IterNames {
rest: self.strings_bytes(),
}
}
pub fn rebuild(&self) -> (NameIndexMapping, Vec<u8>) {
let _span = trace_span!("NamesStream::rebuild").entered();
let old_stream_data: &[u8] = self.stream_data.as_ref();
let old_string_data = self.strings_bytes();
if old_string_data.is_empty() {
return (
NameIndexMapping { table: Vec::new() },
old_stream_data.to_vec(),
);
}
let num_strings = self.iter().filter(|s| !s.is_empty()).count();
debug!("Number of strings found: {num_strings}");
let mut strings: Vec<(Range<usize>, &BStr)> = Vec::with_capacity(num_strings);
strings.extend(self.iter().with_ranges().filter(|(_, s)| !s.is_empty()));
strings.sort_unstable_by_key(|i| i.1);
strings.dedup_by_key(|i| i.1);
let num_unique_strings = strings.len();
if num_unique_strings != num_strings {
debug!(
"Removed {} duplicate strings.",
num_strings - num_unique_strings
);
} else {
debug!("Did not find duplicate strings.");
}
let new_strings_len_unaligned = 1 + strings.iter().map(|(_, s)| s.len() + 1).sum::<usize>();
let new_strings_len = align_4(new_strings_len_unaligned);
let num_hashes = num_unique_strings * 6 / 4;
assert!(num_hashes >= num_unique_strings);
debug!(
"Using {} hashes for {} strings with linear probing.",
num_hashes, num_unique_strings
);
let new_hash_size_bytes = 4 + num_hashes * 4 + 4;
let new_stream_data_len = NAMES_STREAM_HEADER_LEN + new_strings_len + new_hash_size_bytes;
debug!(
"Old name stream size (strings only): {}",
old_string_data.len()
);
debug!("New name stream size (strings only): {}", new_strings_len);
let mut new_stream_data: Vec<u8> = vec![0; new_stream_data_len];
let mut p = ParserMut::new(&mut new_stream_data);
*p.get_mut().unwrap() = NamesStreamHeader {
signature: U32::new(NAMES_STREAM_SIGNATURE),
version: U32::new(NAMES_STREAM_VERSION_V1),
strings_size: U32::new(new_strings_len as u32),
};
let mut remapping_table: Vec<(NameIndex, NameIndex)> = Vec::with_capacity(num_strings + 1);
remapping_table.push((NameIndex(0), NameIndex(0)));
{
let new_strings_data_with_alignment = p.bytes_mut(new_strings_len).unwrap();
let out_bytes = &mut new_strings_data_with_alignment[..new_strings_len_unaligned];
let out_bytes_len = out_bytes.len();
let mut out_iter = out_bytes;
out_iter[0] = 0;
out_iter = &mut out_iter[1..];
for (old_range, s) in strings.iter() {
let old_ni = NameIndex(old_range.start as u32);
let new_ni = NameIndex((out_bytes_len - out_iter.len()) as u32);
remapping_table.push((old_ni, new_ni));
let sb: &[u8] = s;
trace!(
"string: old_ni: 0x{old_ni:08x}, new_ni: 0x{new_ni:08x}, old_range: {:08x}..{:08x} s: {:?}",
old_range.start,
old_range.end,
s,
old_ni = old_ni.0,
new_ni = new_ni.0,
);
out_iter[..sb.len()].copy_from_slice(sb);
out_iter = &mut out_iter[sb.len() + 1..]; }
assert!(out_iter.is_empty());
remapping_table.sort_unstable_by_key(|&(old, _)| old);
}
let stream_offset_num_hashes = new_stream_data_len - p.len();
*p.get_mut::<U32<LE>>().unwrap() = U32::new(num_hashes as u32);
let stream_offset_hash_table = new_stream_data_len - p.len();
{
debug!("Building hash table, num_hashes = {}", num_hashes);
let hash_table: &mut [U32<LE>] = p.slice_mut(num_hashes).unwrap();
let mut new_ni: u32 = 1; for &(_, sb) in strings.iter() {
let h = crate::hash::hash_mod_u32(sb, num_hashes as u32);
trace!("ni {:08x}, hash {:08x}, {:?}", new_ni, h, sb);
let mut hi = h;
let mut wrapped = false;
loop {
let slot = &mut hash_table[hi as usize];
if slot.get() == 0 {
*slot = U32::new(new_ni);
break;
}
hi += 1;
if hi as usize == hash_table.len() {
hi = 0;
assert!(!wrapped, "should not wrap around the table more than once");
wrapped = true;
}
}
new_ni += (sb.len() + 1) as u32;
}
}
let stream_offset_num_strings = new_stream_data_len - p.len();
*p.get_mut::<U32<LE>>().unwrap() = U32::new(strings.len() as u32);
assert!(p.is_empty());
debug!("Stream offsets:");
debug!(
" [{:08x}] - Names Stream header",
NAMES_STREAM_HEADER_LEN
);
debug!(" [{:08x}] - string data", NAMES_STREAM_HEADER_LEN);
debug!(
" [{:08x}] - hash table header (num_hashes)",
stream_offset_num_hashes
);
debug!(
" [{:08x}] - hash table, size in bytes = {}",
stream_offset_hash_table,
num_hashes * 4
);
debug!(
" [{:08x}] - num_strings field",
stream_offset_num_strings
);
debug!(" [{:08x}] - (end)", new_stream_data_len);
(
NameIndexMapping {
table: remapping_table,
},
new_stream_data,
)
}
}
#[derive(Default)]
pub struct NameIndexMapping {
pub table: Vec<(NameIndex, NameIndex)>,
}
impl NameIndexMapping {
pub fn map_old_to_new(&self, name: NameIndex) -> anyhow::Result<NameIndex> {
if name.0 == 0 {
return Ok(name);
}
let table = self.table.as_slice();
match table.binary_search_by_key(&name, |(old, _)| *old) {
Ok(i) => Ok(table[i].1),
Err(_) => bail!(
"The NameIndex value 0x{:x} cannot be remapped because it was not present in the old Names stream.",
name.0
),
}
}
}
#[allow(dead_code)]
fn find_collision_ranges(hashes: &[U32<LE>], i: usize) -> (Range<usize>, Range<usize>) {
assert!(i < hashes.len());
assert!(hashes[i].get() != 0);
let mut start = i;
while start > 0 && hashes[start - 1].get() != 0 {
start -= 1;
}
let mut end = i + 1;
while end < hashes.len() && hashes[end].get() != 0 {
end += 1;
}
if start == 0 {
if end == hashes.len() {
return (start..end, 0..0);
}
let mut r2_start = hashes.len();
while r2_start > 0 && hashes[r2_start - 1].get() != 0 {
r2_start -= 1;
assert!(r2_start > end); }
if r2_start != hashes.len() {
(start..end, r2_start..hashes.len())
} else {
(start..end, 0..0)
}
} else if end == hashes.len() {
let mut r2_end = 0;
while r2_end < hashes.len() && hashes[r2_end].get() != 0 {
assert!(r2_end < start); r2_end += 1;
}
(start..end, 0..r2_end)
} else {
(start..end, 0..0)
}
}
#[test]
fn test_find_collision_range() {
const EMPTY: U32<LE> = U32::from_bytes([0; 4]);
const BUSY: U32<LE> = U32::from_bytes([0xff; 4]);
let hashes_full: Vec<U32<LE>> = vec![BUSY, BUSY, BUSY, BUSY, BUSY];
assert_eq!(find_collision_ranges(&hashes_full, 0), (0..5, 0..0));
assert_eq!(find_collision_ranges(&hashes_full, 2), (0..5, 0..0));
{
let hashes_2 = vec![
BUSY, EMPTY, BUSY, EMPTY, EMPTY, BUSY, BUSY, ];
assert_eq!(find_collision_ranges(&hashes_2, 0), (0..1, 5..7));
assert_eq!(find_collision_ranges(&hashes_2, 2), (2..3, 0..0));
assert_eq!(find_collision_ranges(&hashes_2, 5), (5..7, 0..1));
}
{
let hashes_3 = vec![
BUSY, EMPTY, BUSY, EMPTY, EMPTY, BUSY, EMPTY, ];
assert_eq!(find_collision_ranges(&hashes_3, 0), (0..1, 0..0));
assert_eq!(find_collision_ranges(&hashes_3, 2), (2..3, 0..0));
assert_eq!(find_collision_ranges(&hashes_3, 5), (5..6, 0..0));
}
{
let hashes_4 = vec![
EMPTY, EMPTY, BUSY, EMPTY, EMPTY, BUSY, BUSY, ];
assert_eq!(find_collision_ranges(&hashes_4, 2), (2..3, 0..0));
assert_eq!(find_collision_ranges(&hashes_4, 5), (5..7, 0..0));
assert_eq!(find_collision_ranges(&hashes_4, 6), (5..7, 0..0));
}
}
pub struct IterNames<'a> {
rest: &'a [u8],
}
impl<'a> HasRestLen for IterNames<'a> {
fn rest_len(&self) -> usize {
self.rest.len()
}
}
impl<'a> Iterator for IterNames<'a> {
type Item = &'a BStr;
fn next(&mut self) -> Option<Self::Item> {
if self.rest.is_empty() {
return None;
}
let mut p = Parser::new(self.rest);
let Ok(s) = p.strz() else {
warn!(
rest_len = self.rest.len(),
"Found malformed string in /names stream"
);
return None;
};
self.rest = p.into_rest();
Some(s)
}
}
impl NamesStream<Vec<u8>> {
pub fn load_and_parse<F: ReadAt>(
pdb: &crate::msf::Msf<F>,
named_streams: &crate::pdbi::NamedStreams,
) -> anyhow::Result<Self> {
let named_stream_index = named_streams.get_err(NAMES_STREAM_NAME)?;
let named_stream_data = pdb.read_stream_to_vec(named_stream_index)?;
Self::parse(named_stream_data)
}
}