use std::collections::{HashMap, HashSet};
use rustc_hash::FxHashMap;
use crate::{
cilassembly::{
changes::HeapChanges,
writer::{output::Output, signatures::remap_signature_tokens},
},
metadata::streams::{Blob, Guid, Strings, UserStrings},
utils::{compressed_uint_size, hash_blob, hash_string, to_u32, write_compressed_uint},
Error, Result,
};
#[derive(Debug)]
pub struct StreamResult {
pub bytes_written: u64,
pub remapping: HashMap<u32, u32>,
}
impl StreamResult {
pub fn new() -> Self {
Self {
bytes_written: 0,
remapping: HashMap::new(),
}
}
}
impl Default for StreamResult {
fn default() -> Self {
Self::new()
}
}
pub fn stream_strings_heap(
output: &mut Output,
start_offset: u64,
source_data: &[u8],
changes: &HeapChanges<String>,
referenced_offsets: &HashSet<u32>,
) -> Result<StreamResult> {
process_strings_heap(
Some(output),
start_offset,
source_data,
changes,
referenced_offsets,
)
}
pub fn compute_strings_heap_offsets(
source_data: &[u8],
changes: &HeapChanges<String>,
) -> Result<StreamResult> {
let empty = HashSet::new();
process_strings_heap(None, 0, source_data, changes, &empty)
}
fn process_strings_heap(
mut output: Option<&mut Output>,
start_offset: u64,
source_data: &[u8],
changes: &HeapChanges<String>,
referenced_offsets: &HashSet<u32>,
) -> Result<StreamResult> {
let mut result = StreamResult::new();
if changes.has_replacement()
&& changes.additions_count() == 0
&& changes.modifications_count() == 0
&& changes.removals_count() == 0
{
if let Some(out) = output.as_mut() {
out.write_at(start_offset, source_data)?;
}
result.bytes_written = source_data.len() as u64;
return Ok(result);
}
let mut pos: u64 = 1;
let mut dedup_map: FxHashMap<u64, u32> = FxHashMap::default();
dedup_map.insert(hash_string(""), 0);
if let Some(out) = output.as_mut() {
out.write_at(start_offset, &[0u8])?;
}
if let Ok(strings) = Strings::from(source_data) {
for (old_offset, original_str) in strings.iter() {
let old_offset_u32 = u32::try_from(old_offset).map_err(|_| {
Error::LayoutFailed(format!("String offset {old_offset} exceeds u32 range"))
})?;
if changes.is_removed(old_offset_u32) {
continue;
}
let final_str = changes
.get_modification(old_offset_u32)
.map_or(original_str, |s| s.as_str());
let content_hash = hash_string(final_str);
if let Some(&existing_offset) = dedup_map.get(&content_hash) {
if old_offset_u32 != existing_offset {
result.remapping.insert(old_offset_u32, existing_offset);
}
continue;
}
let new_offset = u32::try_from(pos).map_err(|_| {
Error::LayoutFailed(format!("Heap position {pos} exceeds u32 range"))
})?;
let str_bytes = final_str.as_bytes();
let entry_size = str_bytes.len() as u64 + 1;
if let Some(out) = output.as_mut() {
out.write_at(start_offset + pos, str_bytes)?;
out.write_at(start_offset + pos + str_bytes.len() as u64, &[0u8])?;
}
pos += entry_size;
dedup_map.insert(content_hash, new_offset);
if old_offset_u32 != new_offset {
result.remapping.insert(old_offset_u32, new_offset);
}
let str_end = old_offset_u32 + to_u32(str_bytes.len())? + 1; for &ref_offset in referenced_offsets {
if ref_offset > old_offset_u32 && ref_offset < str_end {
let substring_delta = ref_offset - old_offset_u32;
let new_substring_offset = new_offset + substring_delta;
result.remapping.insert(ref_offset, new_substring_offset);
}
}
}
}
for (new_string, change_ref) in changes.appended_iter() {
if changes.is_ref_removed(change_ref) {
continue;
}
let placeholder = change_ref.placeholder();
let final_str = changes
.get_modification(placeholder)
.map_or(new_string.as_str(), |s| s.as_str());
let content_hash = hash_string(final_str);
if let Some(&existing_offset) = dedup_map.get(&content_hash) {
change_ref.resolve_to_offset(existing_offset);
continue;
}
#[allow(clippy::cast_possible_truncation)]
while result.remapping.contains_key(&(pos as u32)) {
pos += 1;
}
let new_offset = u32::try_from(pos)
.map_err(|_| Error::LayoutFailed(format!("Heap position {pos} exceeds u32 range")))?;
let str_bytes = final_str.as_bytes();
let entry_size = str_bytes.len() as u64 + 1;
if let Some(out) = output.as_mut() {
out.write_at(start_offset + pos, str_bytes)?;
out.write_at(start_offset + pos + str_bytes.len() as u64, &[0u8])?;
}
pos += entry_size;
dedup_map.insert(content_hash, new_offset);
change_ref.resolve_to_offset(new_offset);
}
result.bytes_written = pos;
Ok(result)
}
pub fn stream_blob_heap(
output: &mut Output,
start_offset: u64,
source_data: &[u8],
changes: &HeapChanges<Vec<u8>>,
typedef_remap: &HashMap<u32, u32>,
typeref_remap: &HashMap<u32, u32>,
) -> Result<StreamResult> {
process_blob_heap(
Some(output),
start_offset,
source_data,
changes,
typedef_remap,
typeref_remap,
)
}
pub fn compute_blob_heap_offsets(
source_data: &[u8],
changes: &HeapChanges<Vec<u8>>,
) -> Result<StreamResult> {
let empty = HashMap::new();
process_blob_heap(None, 0, source_data, changes, &empty, &empty)
}
fn process_blob_heap(
mut output: Option<&mut Output>,
start_offset: u64,
source_data: &[u8],
changes: &HeapChanges<Vec<u8>>,
typedef_remap: &HashMap<u32, u32>,
typeref_remap: &HashMap<u32, u32>,
) -> Result<StreamResult> {
let mut result = StreamResult::new();
let mut pos: u64 = 1;
let mut dedup_map: FxHashMap<u64, u32> = FxHashMap::default();
dedup_map.insert(hash_blob(&[]), 0);
if let Some(out) = output.as_mut() {
out.write_at(start_offset, &[0u8])?;
}
if let Ok(blobs) = Blob::from(source_data) {
for (old_offset, original_blob) in blobs.iter() {
let old_offset_u32 = u32::try_from(old_offset).map_err(|_| {
Error::LayoutFailed(format!("Blob offset {old_offset} exceeds u32 range"))
})?;
if changes.is_removed(old_offset_u32) {
continue;
}
let base_blob: &[u8] = changes
.get_modification(old_offset_u32)
.map_or(original_blob, Vec::as_slice);
let remapped_blob: Option<Vec<u8>> = if (!typedef_remap.is_empty()
|| !typeref_remap.is_empty())
&& !base_blob.is_empty()
{
remap_signature_tokens(base_blob, typedef_remap, typeref_remap)
.ok()
.flatten()
} else {
None
};
let final_blob: &[u8] = remapped_blob.as_deref().unwrap_or(base_blob);
let content_hash = hash_blob(final_blob);
if let Some(&existing_offset) = dedup_map.get(&content_hash) {
if existing_offset == 0 && old_offset_u32 != 0 && final_blob.is_empty() {
let new_offset = u32::try_from(pos).map_err(|_| {
Error::LayoutFailed(format!("Heap position {pos} exceeds u32 range"))
})?;
if let Some(out) = output.as_mut() {
out.write_at(start_offset + pos, &[0u8])?;
}
pos += 1;
if old_offset_u32 != new_offset {
result.remapping.insert(old_offset_u32, new_offset);
}
continue;
}
if old_offset_u32 != existing_offset {
result.remapping.insert(old_offset_u32, existing_offset);
}
continue;
}
let new_offset = u32::try_from(pos).map_err(|_| {
Error::LayoutFailed(format!("Heap position {pos} exceeds u32 range"))
})?;
let len_size = compressed_uint_size(final_blob.len());
let entry_size = len_size + final_blob.len() as u64;
if let Some(out) = output.as_mut() {
let blob_len_u32 = u32::try_from(final_blob.len()).map_err(|_| {
Error::LayoutFailed(format!(
"Blob length {} exceeds u32 range",
final_blob.len()
))
})?;
let mut len_bytes = Vec::with_capacity(4);
write_compressed_uint(blob_len_u32, &mut len_bytes);
let write_pos = start_offset + pos;
out.write_at(write_pos, &len_bytes)?;
out.write_at(write_pos + len_bytes.len() as u64, final_blob)?;
}
pos += entry_size;
dedup_map.insert(content_hash, new_offset);
if old_offset_u32 != new_offset {
result.remapping.insert(old_offset_u32, new_offset);
}
}
}
for (new_blob, change_ref) in changes.appended_iter() {
if changes.is_ref_removed(change_ref) {
continue;
}
let placeholder = change_ref.placeholder();
let base_blob: &[u8] = changes
.get_modification(placeholder)
.map_or(new_blob.as_slice(), Vec::as_slice);
let remapped_blob: Option<Vec<u8>> =
if (!typedef_remap.is_empty() || !typeref_remap.is_empty()) && !base_blob.is_empty() {
remap_signature_tokens(base_blob, typedef_remap, typeref_remap)
.ok()
.flatten()
} else {
None
};
let final_blob: &[u8] = remapped_blob.as_deref().unwrap_or(base_blob);
let content_hash = hash_blob(final_blob);
if let Some(&existing_offset) = dedup_map.get(&content_hash) {
change_ref.resolve_to_offset(existing_offset);
continue;
}
#[allow(clippy::cast_possible_truncation)]
while result.remapping.contains_key(&(pos as u32)) {
pos += 1;
}
let new_offset = u32::try_from(pos)
.map_err(|_| Error::LayoutFailed(format!("Heap position {pos} exceeds u32 range")))?;
let len_size = compressed_uint_size(final_blob.len());
let entry_size = len_size + final_blob.len() as u64;
if let Some(out) = output.as_mut() {
let blob_len_u32 = u32::try_from(final_blob.len()).map_err(|_| {
Error::LayoutFailed(format!(
"Blob length {} exceeds u32 range",
final_blob.len()
))
})?;
let mut len_bytes = Vec::with_capacity(4);
write_compressed_uint(blob_len_u32, &mut len_bytes);
out.write_at(start_offset + pos, &len_bytes)?;
out.write_at(start_offset + pos + len_bytes.len() as u64, final_blob)?;
}
pos += entry_size;
dedup_map.insert(content_hash, new_offset);
change_ref.resolve_to_offset(new_offset);
}
result.bytes_written = pos;
Ok(result)
}
pub fn stream_guid_heap(
output: &mut Output,
start_offset: u64,
source_data: &[u8],
changes: &HeapChanges<[u8; 16]>,
) -> Result<StreamResult> {
process_guid_heap(Some(output), start_offset, source_data, changes)
}
pub fn compute_guid_heap_offsets(
source_data: &[u8],
changes: &HeapChanges<[u8; 16]>,
) -> Result<StreamResult> {
process_guid_heap(None, 0, source_data, changes)
}
fn process_guid_heap(
mut output: Option<&mut Output>,
start_offset: u64,
source_data: &[u8],
changes: &HeapChanges<[u8; 16]>,
) -> Result<StreamResult> {
let mut result = StreamResult::new();
let mut pos: u64 = 0;
if changes.has_replacement()
&& changes.additions_count() == 0
&& changes.modifications_count() == 0
&& changes.removals_count() == 0
{
if let Some(out) = output.as_mut() {
out.write_at(start_offset, source_data)?;
}
result.bytes_written = source_data.len() as u64;
return Ok(result);
}
let mut dedup_map: FxHashMap<[u8; 16], u32> = FxHashMap::default();
let mut current_index: u32 = 1;
if let Ok(guids) = Guid::from(source_data) {
for (old_index, guid) in guids.iter() {
let old_index_u32 = u32::try_from(old_index).map_err(|_| {
Error::LayoutFailed(format!("GUID index {old_index} exceeds u32 range"))
})?;
let byte_offset = (old_index_u32.saturating_sub(1)) * 16;
if changes.is_removed(byte_offset) {
continue;
}
let final_guid: [u8; 16] = changes
.get_modification(byte_offset)
.copied()
.unwrap_or_else(|| guid.to_bytes());
if let Some(&existing_index) = dedup_map.get(&final_guid) {
if old_index_u32 != existing_index {
result.remapping.insert(old_index_u32, existing_index);
}
continue;
}
if let Some(out) = output.as_mut() {
out.write_at(start_offset + pos, &final_guid)?;
}
pos += 16;
dedup_map.insert(final_guid, current_index);
if old_index_u32 != current_index {
result.remapping.insert(old_index_u32, current_index);
}
current_index += 1;
}
}
for (new_guid, change_ref) in changes.appended_iter() {
if changes.is_ref_removed(change_ref) {
continue;
}
let placeholder = change_ref.placeholder();
let final_guid = changes.get_modification(placeholder).unwrap_or(new_guid);
if let Some(&existing_index) = dedup_map.get(final_guid) {
change_ref.resolve_to_offset(existing_index);
continue;
}
if let Some(out) = output.as_mut() {
out.write_at(start_offset + pos, final_guid)?;
}
pos += 16;
dedup_map.insert(*final_guid, current_index);
change_ref.resolve_to_offset(current_index);
current_index += 1;
}
result.bytes_written = pos;
Ok(result)
}
pub fn stream_userstring_heap(
output: &mut Output,
start_offset: u64,
source_data: &[u8],
changes: &HeapChanges<String>,
) -> Result<StreamResult> {
process_userstring_heap(Some(output), start_offset, source_data, changes)
}
pub fn compute_userstring_heap_offsets(
source_data: &[u8],
changes: &HeapChanges<String>,
) -> Result<StreamResult> {
process_userstring_heap(None, 0, source_data, changes)
}
fn process_userstring_heap(
mut output: Option<&mut Output>,
start_offset: u64,
source_data: &[u8],
changes: &HeapChanges<String>,
) -> Result<StreamResult> {
let mut result = StreamResult::new();
if changes.has_replacement()
&& changes.additions_count() == 0
&& changes.modifications_count() == 0
&& changes.removals_count() == 0
{
if let Some(out) = output.as_mut() {
out.write_at(start_offset, source_data)?;
}
result.bytes_written = source_data.len() as u64;
return Ok(result);
}
let mut pos: u64 = 1;
let mut dedup_map: FxHashMap<u64, u32> = FxHashMap::default();
dedup_map.insert(hash_string(""), 0);
if let Some(out) = output.as_mut() {
out.write_at(start_offset, &[0u8])?;
}
if let Ok(userstrings) = UserStrings::from(source_data) {
for (old_offset, original_str) in userstrings.iter() {
let old_offset_u32 = u32::try_from(old_offset).map_err(|_| {
Error::LayoutFailed(format!("UserString offset {old_offset} exceeds u32 range"))
})?;
if changes.is_removed(old_offset_u32) {
continue;
}
let original_utf8 = original_str.to_string_lossy();
let final_str = changes
.get_modification(old_offset_u32)
.map_or(original_utf8.as_ref(), |s| s.as_str());
let content_hash = hash_string(final_str);
if let Some(&existing_offset) = dedup_map.get(&content_hash) {
if old_offset_u32 != existing_offset {
result.remapping.insert(old_offset_u32, existing_offset);
}
continue;
}
let new_offset = u32::try_from(pos).map_err(|_| {
Error::LayoutFailed(format!("Heap position {pos} exceeds u32 range"))
})?;
let entry_size = userstring_entry_size(final_str);
if let Some(out) = output.as_mut() {
write_userstring_entry(out, start_offset + pos, final_str)?;
}
pos += entry_size;
dedup_map.insert(content_hash, new_offset);
if old_offset_u32 != new_offset {
result.remapping.insert(old_offset_u32, new_offset);
}
}
}
for (new_string, change_ref) in changes.appended_iter() {
if changes.is_ref_removed(change_ref) {
continue;
}
let placeholder = change_ref.placeholder();
let final_str = changes
.get_modification(placeholder)
.map_or(new_string.as_str(), |s| s.as_str());
let content_hash = hash_string(final_str);
if let Some(&existing_offset) = dedup_map.get(&content_hash) {
change_ref.resolve_to_offset(existing_offset);
continue;
}
let new_offset = u32::try_from(pos)
.map_err(|_| Error::LayoutFailed(format!("Heap position {pos} exceeds u32 range")))?;
let entry_size = userstring_entry_size(final_str);
if let Some(out) = output.as_mut() {
write_userstring_entry(out, start_offset + pos, final_str)?;
}
pos += entry_size;
dedup_map.insert(content_hash, new_offset);
change_ref.resolve_to_offset(new_offset);
}
result.bytes_written = pos;
Ok(result)
}
fn userstring_entry_size(s: &str) -> u64 {
let utf16_len = s.encode_utf16().count() * 2;
let total_len = utf16_len + 1; compressed_uint_size(total_len) + total_len as u64
}
fn write_userstring_entry(output: &mut Output, pos: u64, s: &str) -> Result<()> {
let utf16_bytes: Vec<u8> = s.encode_utf16().flat_map(u16::to_le_bytes).collect();
let total_len = utf16_bytes.len() + 1;
let total_len_u32 = u32::try_from(total_len).map_err(|_| {
Error::LayoutFailed(format!("UserString length {total_len} exceeds u32 range"))
})?;
let mut len_bytes = Vec::with_capacity(4);
write_compressed_uint(total_len_u32, &mut len_bytes);
output.write_at(pos, &len_bytes)?;
output.write_at(pos + len_bytes.len() as u64, &utf16_bytes)?;
let terminal = u8::from(utf16_bytes.iter().any(|&b| b & 0x80 != 0));
output.write_at(
pos + len_bytes.len() as u64 + utf16_bytes.len() as u64,
&[terminal],
)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::NamedTempFile;
#[test]
fn test_stream_strings_heap_empty() {
let temp_file = NamedTempFile::new().unwrap();
let mut output = Output::create(temp_file.path(), 4096).unwrap();
let changes = HeapChanges::<String>::new_strings();
let empty_refs = HashSet::new();
let result = stream_strings_heap(&mut output, 0, &[0u8], &changes, &empty_refs).unwrap();
assert_eq!(result.bytes_written, 1); assert!(result.remapping.is_empty());
}
#[test]
fn test_stream_strings_heap_with_source() {
let temp_file = NamedTempFile::new().unwrap();
let mut output = Output::create(temp_file.path(), 4096).unwrap();
let source = [
0x00, b'H', b'e', b'l', b'l', b'o', 0x00, b'W', b'o', b'r', b'l', b'd', 0x00, ];
let changes = HeapChanges::<String>::new_strings();
let empty_refs = HashSet::new();
let result = stream_strings_heap(&mut output, 0, &source, &changes, &empty_refs).unwrap();
assert_eq!(result.bytes_written, 13);
}
#[test]
fn test_stream_strings_heap_deduplication() {
let temp_file = NamedTempFile::new().unwrap();
let mut output = Output::create(temp_file.path(), 4096).unwrap();
let source = [
0x00, b'H', b'e', b'l', b'l', b'o', 0x00, b'H', b'e', b'l', b'l', b'o', 0x00, ];
let changes = HeapChanges::<String>::new_strings();
let empty_refs = HashSet::new();
let result = stream_strings_heap(&mut output, 0, &source, &changes, &empty_refs).unwrap();
assert_eq!(result.bytes_written, 7);
assert_eq!(result.remapping.get(&7), Some(&1));
}
}