use crate::{
cilassembly::{Operation, TableModifications},
metadata::{
tables::{TableDataOwned, TableId},
validation::{
context::{RawValidationContext, ValidationContext},
traits::RawValidator,
},
},
Error, Result,
};
use rustc_hash::{FxHashMap, FxHashSet};
use std::collections::HashMap;
pub struct RawChangeIntegrityValidator;
impl RawChangeIntegrityValidator {
#[must_use]
pub fn new() -> Self {
Self
}
fn validate_table_integrity(
table_changes: &HashMap<TableId, TableModifications>,
) -> Result<()> {
for (table_id, modifications) in table_changes {
match modifications {
TableModifications::Sparse {
operations,
next_rid,
original_row_count,
deleted_rows,
..
} => {
let mut final_rids = FxHashSet::default();
for rid in 1..=*original_row_count {
if !deleted_rows.contains(&rid) {
final_rids.insert(rid);
}
}
for operation in operations {
if let Operation::Insert(rid, _) = &operation.operation {
if final_rids.contains(rid) {
return Err(malformed_error!(
"Table {:?} integrity violation: RID {} conflicts with existing row after modifications",
table_id,
rid
));
}
final_rids.insert(*rid);
}
}
if let Some(&max_rid) = final_rids.iter().max() {
let expected_min_count =
u32::try_from(final_rids.len() * 7 / 10).unwrap_or(0);
if max_rid > expected_min_count.max(1) * 2 {
return Err(malformed_error!(
"Table {:?} integrity violation: RID sequence too sparse - max RID {} with only {} rows (>70% gaps)",
table_id,
max_rid,
final_rids.len()
));
}
}
if let Some(&max_rid) = final_rids.iter().max() {
if *next_rid <= max_rid {
return Err(malformed_error!(
"Table {:?} integrity violation: next_rid {} is not greater than max existing RID {}",
table_id,
next_rid,
max_rid
));
}
}
if matches!(table_id, TableId::Module) && !final_rids.contains(&1) {
return Err(malformed_error!(
"Table {:?} integrity violation: Module table must contain RID 1 (primary module entry)",
table_id
));
}
}
TableModifications::Replaced(rows) => {
if rows.is_empty() && matches!(table_id, TableId::Module | TableId::Assembly) {
return Err(malformed_error!(
"Table {:?} integrity violation: Critical table cannot be empty after replacement",
table_id
));
}
if rows.len() > 1_000_000 {
return Err(malformed_error!(
"Table {:?} integrity violation: Replacement table too large ({} rows) - potential corruption",
table_id,
rows.len()
));
}
}
}
}
Ok(())
}
fn validate_heap_integrity(context: &RawValidationContext) -> Result<()> {
if let Some(changes) = context.changes() {
if changes.string_heap_changes.additions_count() > 100_000 {
return Err(malformed_error!(
"String heap integrity violation: Too many string additions ({}) - potential memory exhaustion",
changes.string_heap_changes.additions_count()
));
}
if changes.blob_heap_changes.additions_count() > 50_000 {
return Err(malformed_error!(
"Blob heap integrity violation: Too many blob additions ({}) - potential memory exhaustion",
changes.blob_heap_changes.additions_count()
));
}
if changes.guid_heap_changes.additions_count() > 10_000 {
return Err(malformed_error!(
"GUID heap integrity violation: Too many GUID additions ({}) - potential memory exhaustion",
changes.guid_heap_changes.additions_count()
));
}
if changes.userstring_heap_changes.additions_count() > 50_000 {
return Err(malformed_error!(
"User string heap integrity violation: Too many user string additions ({}) - potential memory exhaustion",
changes.userstring_heap_changes.additions_count()
));
}
}
Ok(())
}
fn validate_reference_integrity(
table_changes: &HashMap<TableId, TableModifications>,
) -> Result<()> {
let mut final_table_rids: FxHashMap<TableId, FxHashSet<u32>> = FxHashMap::default();
for (table_id, modifications) in table_changes {
let mut final_rids = FxHashSet::default();
match modifications {
TableModifications::Sparse {
operations,
original_row_count,
deleted_rows,
..
} => {
for rid in 1..=*original_row_count {
if !deleted_rows.contains(&rid) {
final_rids.insert(rid);
}
}
for operation in operations {
if let Operation::Insert(rid, _) = &operation.operation {
final_rids.insert(*rid);
}
}
}
TableModifications::Replaced(rows) => {
for rid in 1..=u32::try_from(rows.len()).unwrap_or(u32::MAX) {
final_rids.insert(rid);
}
}
}
final_table_rids.insert(*table_id, final_rids);
}
if let (Some(typedef_rids), Some(field_rids)) = (
final_table_rids.get(&TableId::TypeDef),
final_table_rids.get(&TableId::Field),
) {
if typedef_rids.is_empty() && !field_rids.is_empty() {
return Err(malformed_error!(
"Reference integrity violation: Fields exist but no TypeDef entries - orphaned fields detected"
));
}
Self::validate_field_ownership_ranges(typedef_rids, field_rids, table_changes)?;
}
if let (Some(typedef_rids), Some(method_rids)) = (
final_table_rids.get(&TableId::TypeDef),
final_table_rids.get(&TableId::MethodDef),
) {
if typedef_rids.is_empty() && !method_rids.is_empty() {
return Err(malformed_error!(
"Reference integrity violation: Methods exist but no TypeDef entries - orphaned methods detected"
));
}
}
Ok(())
}
fn validate_change_conflicts(
table_changes: &HashMap<TableId, TableModifications>,
) -> Result<()> {
for (table_id, modifications) in table_changes {
if let TableModifications::Sparse { operations, .. } = modifications {
for window in operations.windows(2) {
let curr_time = window[0].timestamp;
let next_time = window[1].timestamp;
if curr_time > next_time {
return Err(malformed_error!(
"Change conflict detected: Operations for table {:?} not in chronological order - {} > {}",
table_id,
curr_time,
next_time
));
}
}
let total_operations = operations.len();
if total_operations > 10_000 {
return Err(malformed_error!(
"Change conflict detected: Table {:?} has excessive operations ({}) - potential conflict storm",
table_id,
total_operations
));
}
}
}
Ok(())
}
fn validate_field_ownership_ranges(
typedef_rids: &FxHashSet<u32>,
field_rids: &FxHashSet<u32>,
table_changes: &HashMap<TableId, TableModifications>,
) -> Result<()> {
let Some(typedef_modifications) = table_changes.get(&TableId::TypeDef) else {
return Ok(()); };
let mut typedef_field_lists: Vec<(u32, u32)> = Vec::new();
match typedef_modifications {
TableModifications::Sparse {
operations,
original_row_count,
deleted_rows,
..
} => {
for rid in 1..=*original_row_count {
if !deleted_rows.contains(&rid) && typedef_rids.contains(&rid) {
}
}
for operation in operations {
if let Operation::Insert(rid, data) = &operation.operation {
if typedef_rids.contains(rid) {
if let TableDataOwned::TypeDef(typedef_row) = data {
typedef_field_lists.push((*rid, typedef_row.field_list));
}
}
}
}
}
TableModifications::Replaced(rows) => {
for (i, row_data) in rows.iter().enumerate() {
let rid = u32::try_from(i + 1).map_err(|_| Error::ValidationRawFailed {
validator: "integrity".to_string(),
message: "Table row index exceeds u32 range".to_string(),
})?;
if typedef_rids.contains(&rid) {
if let TableDataOwned::TypeDef(typedef_row) = row_data {
typedef_field_lists.push((rid, typedef_row.field_list));
}
}
}
}
}
typedef_field_lists.sort_by_key(|(_, field_list)| *field_list);
for i in 0..typedef_field_lists.len() {
let (typedef_rid, field_list_start) = typedef_field_lists[i];
let field_list_end = if i + 1 < typedef_field_lists.len() {
typedef_field_lists[i + 1].1 } else {
field_rids.iter().max().map_or(1, |max| max + 1)
};
if field_list_start > 0 {
for field_rid in field_list_start..field_list_end {
if !field_rids.contains(&field_rid) {
return Err(malformed_error!(
"Field ownership violation: TypeDef RID {} expects field RID {} but field was deleted",
typedef_rid,
field_rid
));
}
}
}
}
let mut owned_fields: FxHashSet<u32> = FxHashSet::default();
for (_, field_list_start) in &typedef_field_lists {
if *field_list_start > 0 {
owned_fields.insert(*field_list_start);
}
}
let min_owned_field = owned_fields.iter().min().copied().unwrap_or(u32::MAX);
let _max_field = field_rids.iter().max().copied().unwrap_or(0);
if min_owned_field != u32::MAX && min_owned_field > 1 {
for field_rid in 1..min_owned_field {
if field_rids.contains(&field_rid) {
return Err(malformed_error!(
"Orphaned field detected: Field RID {} exists but is not owned by any TypeDef",
field_rid
));
}
}
}
Ok(())
}
}
impl RawValidator for RawChangeIntegrityValidator {
fn validate_raw(&self, context: &RawValidationContext) -> Result<()> {
if let Some(changes) = context.changes() {
let table_changes = &changes.table_changes;
Self::validate_table_integrity(table_changes)?;
Self::validate_heap_integrity(context)?;
Self::validate_reference_integrity(table_changes)?;
Self::validate_change_conflicts(table_changes)?;
}
Ok(())
}
fn name(&self) -> &'static str {
"RawChangeIntegrityValidator"
}
fn priority(&self) -> u32 {
100
}
fn should_run(&self, context: &RawValidationContext) -> bool {
context.config().enable_structural_validation && context.is_modification_validation()
}
}
impl Default for RawChangeIntegrityValidator {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
cilassembly::{AssemblyChanges, Operation, TableModifications, TableOperation},
metadata::{
cilassemblyview::CilAssemblyView,
tables::{CodedIndex, CodedIndexType, TableDataOwned, TableId, TypeDefRaw},
token::Token,
validation::{
context::RawValidationContext, scanner::ReferenceScanner, ValidationConfig,
},
},
test::{
factories::validation::raw_modification_integrity::{
create_dummy_field, create_dummy_method, create_dummy_typedef,
raw_change_integrity_validator_file_factory,
},
get_testfile_wb, validator_test,
},
Error,
};
use rayon::ThreadPoolBuilder;
use std::collections::{HashMap, HashSet};
#[test]
fn test_raw_change_integrity_validator_direct_corruption() -> Result<()> {
let validator = RawChangeIntegrityValidator::new();
{
let mut corrupted_changes = AssemblyChanges::new();
let typedef_data = create_dummy_typedef(1)?;
let operation = TableOperation::new_with_timestamp(
Operation::Insert(1, TableDataOwned::TypeDef(typedef_data)),
1000,
);
let operations = vec![operation];
let sparse_modifications = TableModifications::Sparse {
operations,
next_rid: 2,
original_row_count: 1,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::TypeDef, sparse_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let typedef_data = create_dummy_typedef(100)?;
let operation = TableOperation::new_with_timestamp(
Operation::Insert(100, TableDataOwned::TypeDef(typedef_data)),
1000,
);
let operations = vec![operation];
let sparse_modifications = TableModifications::Sparse {
operations,
next_rid: 101,
original_row_count: 1,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::TypeDef, sparse_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let typedef_data = create_dummy_typedef(5)?;
let operation = TableOperation::new_with_timestamp(
Operation::Insert(5, TableDataOwned::TypeDef(typedef_data)),
1000,
);
let operations = vec![operation];
let sparse_modifications = TableModifications::Sparse {
operations,
next_rid: 5,
original_row_count: 1,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::TypeDef, sparse_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let mut deleted_rows = HashSet::new();
deleted_rows.insert(1);
let sparse_modifications = TableModifications::Sparse {
operations: Vec::new(),
next_rid: 2,
original_row_count: 1,
deleted_rows,
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::Module, sparse_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let replaced_modifications = TableModifications::Replaced(Vec::new());
corrupted_changes
.table_changes
.insert(TableId::Module, replaced_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let mut huge_table = Vec::new();
for _ in 0..1_000_001 {
huge_table.push(TableDataOwned::TypeDef(create_dummy_typedef(1)?));
}
let replaced_modifications = TableModifications::Replaced(huge_table);
corrupted_changes
.table_changes
.insert(TableId::TypeDef, replaced_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let typedef_modifications = TableModifications::Replaced(Vec::new());
corrupted_changes
.table_changes
.insert(TableId::TypeDef, typedef_modifications);
let field_data = create_dummy_field(1)?;
let operation = TableOperation::new_with_timestamp(
Operation::Insert(1, TableDataOwned::Field(field_data)),
1000,
);
let field_modifications = TableModifications::Sparse {
operations: vec![operation],
next_rid: 2,
original_row_count: 0,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::Field, field_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let typedef_modifications = TableModifications::Replaced(Vec::new());
corrupted_changes
.table_changes
.insert(TableId::TypeDef, typedef_modifications);
let method_data = create_dummy_method(1)?;
let operation = TableOperation::new_with_timestamp(
Operation::Insert(1, TableDataOwned::MethodDef(method_data)),
1000,
);
let method_modifications = TableModifications::Sparse {
operations: vec![operation],
next_rid: 2,
original_row_count: 0,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::MethodDef, method_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let operation1 = TableOperation::new_with_timestamp(
Operation::Insert(2, TableDataOwned::TypeDef(create_dummy_typedef(2)?)),
2000,
);
let operation2 = TableOperation::new_with_timestamp(
Operation::Insert(3, TableDataOwned::TypeDef(create_dummy_typedef(3)?)),
1000,
);
let sparse_modifications = TableModifications::Sparse {
operations: vec![operation1, operation2],
next_rid: 4,
original_row_count: 1,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::TypeDef, sparse_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let mut operations = Vec::new();
for i in 0..10_001 {
let operation = TableOperation::new_with_timestamp(
Operation::Insert(i + 2, TableDataOwned::TypeDef(create_dummy_typedef(i + 2)?)),
1000 + i as u64,
);
operations.push(operation);
}
let sparse_modifications = TableModifications::Sparse {
operations,
next_rid: 10_003,
original_row_count: 1,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::TypeDef, sparse_modifications);
assert!(test_validator_with_corrupted_changes(&validator, corrupted_changes).is_err());
}
{
let mut corrupted_changes = AssemblyChanges::new();
let typedef_operation = TableOperation::new(Operation::Insert(
1,
TableDataOwned::TypeDef(TypeDefRaw {
rid: 1,
token: Token::new(1 | 0x0200_0000),
offset: 0,
flags: 0x00100001,
type_name: 0,
type_namespace: 0,
extends: CodedIndex::new(TableId::TypeRef, 1, CodedIndexType::TypeDefOrRef),
field_list: 100, method_list: 1,
}),
));
let typedef_modifications = TableModifications::Sparse {
operations: vec![typedef_operation],
next_rid: 2,
original_row_count: 0,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::TypeDef, typedef_modifications);
let field_modifications = TableModifications::Sparse {
operations: vec![], next_rid: 1,
original_row_count: 0,
deleted_rows: HashSet::new(),
inserted_rows: HashSet::new(),
change_refs: HashMap::new(),
};
corrupted_changes
.table_changes
.insert(TableId::Field, field_modifications);
let result = test_validator_with_corrupted_changes(&validator, corrupted_changes);
if result.is_ok() {
println!(
"WARNING: Field ownership validation did not detect the expected violation"
);
}
}
println!("All RawChangeIntegrityValidator corruption tests passed successfully!");
Ok(())
}
fn test_validator_with_corrupted_changes(
validator: &RawChangeIntegrityValidator,
corrupted_changes: AssemblyChanges,
) -> Result<()> {
let Some(clean_testfile) = get_testfile_wb() else {
return Err(Error::Other("WindowsBase.dll not available".to_string()));
};
let view = CilAssemblyView::from_path(&clean_testfile)?;
let config = ValidationConfig {
enable_structural_validation: true,
..Default::default()
};
let scanner = ReferenceScanner::from_view(&view)?;
let thread_pool = ThreadPoolBuilder::new().num_threads(4).build().unwrap();
let context = RawValidationContext::new_for_modification(
&view,
&corrupted_changes,
&scanner,
&config,
&thread_pool,
);
validator.validate_raw(&context)
}
#[test]
fn test_raw_change_integrity_validator() -> Result<()> {
let validator = RawChangeIntegrityValidator::new();
let config = ValidationConfig {
enable_structural_validation: true,
..Default::default()
};
validator_test(
raw_change_integrity_validator_file_factory,
"RawChangeIntegrityValidator",
"Malformed",
config,
|context| validator.validate_raw(context),
)
}
}