use std::collections::HashSet;
use strum::IntoEnumIterator;
use crate::{
cilassembly::{changes::ChangeRef, CilAssembly, Operation, TableModifications},
dispatch_table_type,
metadata::{
tablefields::{get_heap_fields, HeapFieldDescriptor, HeapType},
tables::{RowWritable, TableDataOwned, TableId, TableInfoRef},
},
utils::{calculate_table_row_size, read_le_at_dyn},
Result,
};
#[derive(Debug, Default)]
pub struct CompactionStats {
pub strings: usize,
pub blobs: usize,
pub guids: usize,
}
impl CompactionStats {
#[must_use]
pub fn has_removals(&self) -> bool {
self.strings > 0 || self.blobs > 0 || self.guids > 0
}
#[must_use]
pub fn total_removed(&self) -> usize {
self.strings + self.blobs + self.guids
}
}
pub fn mark_unreferenced_heap_entries(assembly: &mut CilAssembly) -> Result<CompactionStats> {
let mut stats = CompactionStats::default();
let (ref_strings, ref_blobs, ref_guids) = collect_referenced_heap_entries(assembly);
assembly
.changes_mut()
.referenced_string_offsets
.clone_from(&ref_strings);
let unreferenced_strings: Vec<u32>;
let unreferenced_blobs: Vec<u32>;
let unreferenced_guids: Vec<u32>;
{
let view = assembly.view();
unreferenced_strings = if let Some(strings) = view.strings() {
strings
.iter()
.filter_map(|(offset, content)| {
#[allow(clippy::cast_possible_truncation)]
let offset_u32 = offset as u32;
if offset_u32 == 0 {
return None;
}
#[allow(clippy::cast_possible_truncation)]
let str_end = offset_u32 + content.len() as u32 + 1;
let has_reference = ref_strings
.iter()
.any(|&ref_off| ref_off >= offset_u32 && ref_off < str_end);
if has_reference {
None
} else {
Some(offset_u32)
}
})
.collect()
} else {
Vec::new()
};
unreferenced_blobs = if let Some(blobs) = view.blobs() {
blobs
.iter()
.filter_map(|(offset, _)| {
#[allow(clippy::cast_possible_truncation)]
let offset_u32 = offset as u32;
if offset_u32 > 0 && !ref_blobs.contains(&offset_u32) {
Some(offset_u32)
} else {
None
}
})
.collect()
} else {
Vec::new()
};
unreferenced_guids = if let Some(guids) = view.guids() {
guids
.iter()
.filter_map(|(index, _)| {
#[allow(clippy::cast_possible_truncation)]
let index_u32 = index as u32;
if index_u32 > 0 && !ref_guids.contains(&index_u32) {
Some(index_u32)
} else {
None
}
})
.collect()
} else {
Vec::new()
};
}
for offset in unreferenced_strings {
assembly.string_remove(offset)?;
stats.strings += 1;
}
for offset in unreferenced_blobs {
assembly.blob_remove(offset)?;
stats.blobs += 1;
}
for index in unreferenced_guids {
assembly.guid_remove(index)?;
stats.guids += 1;
}
Ok(stats)
}
fn collect_referenced_heap_entries(
assembly: &CilAssembly,
) -> (HashSet<u32>, HashSet<u32>, HashSet<u32>) {
let mut ref_strings: HashSet<u32> = HashSet::new();
let mut ref_blobs: HashSet<u32> = HashSet::new();
let mut ref_guids: HashSet<u32> = HashSet::new();
ref_strings.insert(0);
ref_blobs.insert(0);
let view = assembly.view();
let Some(tables) = view.tables() else {
return (ref_strings, ref_blobs, ref_guids);
};
let table_info = &tables.info;
let changes = assembly.changes();
for table_id in TableId::iter() {
let heap_fields = get_heap_fields(table_id, table_info);
if heap_fields.is_empty() {
continue;
}
let row_count = tables.table_row_count(table_id);
let row_size = calculate_table_row_size(table_id, table_info) as usize;
match changes.table_changes.get(&table_id) {
Some(TableModifications::Replaced(replacement_rows)) => {
scan_table_data_owned_rows(
replacement_rows,
table_id,
table_info,
&heap_fields,
&mut ref_strings,
&mut ref_blobs,
&mut ref_guids,
);
}
Some(TableModifications::Sparse {
operations,
deleted_rows,
..
}) => {
let updated_rids: HashSet<u32> = operations
.iter()
.filter_map(|op| match &op.operation {
Operation::Update(rid, _) => Some(*rid),
_ => None,
})
.collect();
dispatch_table_type!(table_id, |RawType| {
if let Some(table) = tables.table::<RawType>() {
let mut row_buffer = vec![0u8; row_size];
for rid in 1..=row_count {
if deleted_rows.contains(&rid) {
continue;
}
if updated_rids.contains(&rid) {
continue;
}
let Some(row) = table.get(rid) else {
continue;
};
let mut offset = 0;
if row
.row_write(&mut row_buffer, &mut offset, rid, table_info)
.is_err()
{
continue;
}
extract_heap_refs_from_row(
&row_buffer,
&heap_fields,
&mut ref_strings,
&mut ref_blobs,
&mut ref_guids,
);
}
}
});
for op in operations {
let row_data = match &op.operation {
Operation::Update(_, data) | Operation::Insert(_, data) => data,
Operation::Delete(_) => continue,
};
if row_data.table_id() != table_id {
continue;
}
let mut row_buffer = vec![0u8; row_size];
let mut offset = 0;
if row_data
.row_write(&mut row_buffer, &mut offset, 0, table_info)
.is_ok()
{
extract_heap_refs_from_row(
&row_buffer,
&heap_fields,
&mut ref_strings,
&mut ref_blobs,
&mut ref_guids,
);
}
}
}
None => {
if row_count == 0 {
continue;
}
dispatch_table_type!(table_id, |RawType| {
if let Some(table) = tables.table::<RawType>() {
let mut row_buffer = vec![0u8; row_size];
for rid in 1..=row_count {
let Some(row) = table.get(rid) else {
continue;
};
let mut offset = 0;
if row
.row_write(&mut row_buffer, &mut offset, rid, table_info)
.is_err()
{
continue;
}
extract_heap_refs_from_row(
&row_buffer,
&heap_fields,
&mut ref_strings,
&mut ref_blobs,
&mut ref_guids,
);
}
}
});
}
}
}
(ref_strings, ref_blobs, ref_guids)
}
fn scan_table_data_owned_rows(
rows: &[TableDataOwned],
table_id: TableId,
table_info: &TableInfoRef,
heap_fields: &[HeapFieldDescriptor],
ref_strings: &mut HashSet<u32>,
ref_blobs: &mut HashSet<u32>,
ref_guids: &mut HashSet<u32>,
) {
let row_size = calculate_table_row_size(table_id, table_info) as usize;
let mut row_buffer = vec![0u8; row_size];
for (idx, row_data) in rows.iter().enumerate() {
if row_data.table_id() != table_id {
continue;
}
#[allow(clippy::cast_possible_truncation)]
let rid = (idx + 1) as u32;
let mut offset = 0;
if row_data
.row_write(&mut row_buffer, &mut offset, rid, table_info)
.is_ok()
{
extract_heap_refs_from_row(&row_buffer, heap_fields, ref_strings, ref_blobs, ref_guids);
}
}
}
fn extract_heap_refs_from_row(
row_buffer: &[u8],
heap_fields: &[HeapFieldDescriptor],
ref_strings: &mut HashSet<u32>,
ref_blobs: &mut HashSet<u32>,
ref_guids: &mut HashSet<u32>,
) {
for field in heap_fields {
if field.offset + field.size > row_buffer.len() {
continue;
}
let is_large = field.size == 4;
let mut read_offset = field.offset;
if let Ok(value) = read_le_at_dyn(row_buffer, &mut read_offset, is_large) {
if value == 0 || ChangeRef::is_placeholder(value) {
continue;
}
match field.heap_type {
HeapType::String => {
ref_strings.insert(value);
}
HeapType::Blob => {
ref_blobs.insert(value);
}
HeapType::Guid => {
ref_guids.insert(value);
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compaction_stats_default() {
let stats = CompactionStats::default();
assert!(!stats.has_removals());
assert_eq!(stats.total_removed(), 0);
}
#[test]
fn test_compaction_stats_with_removals() {
let stats = CompactionStats {
strings: 5,
blobs: 3,
guids: 1,
};
assert!(stats.has_removals());
assert_eq!(stats.total_removed(), 9);
}
}