use std::collections::{hash_map::Entry, BTreeMap, BTreeSet, HashMap, HashSet};
use crate::{
cilassembly::{
compute_entry_points, find_unreferenced_types, CilAssembly, CleanupRequest, GeneratorConfig,
},
compiler::{EventKind, ProxyDevirtualizationPass},
deobfuscation::{
context::AnalysisContext, engine::DeobfuscationEngine, renamer, techniques::Detections,
},
metadata::{
tables::{
AssemblyRaw, FieldRaw, ModuleRaw, NestedClassRaw, TableDataOwned, TableId, TypeDefRaw,
},
token::Token,
typesystem::wellknown,
validation::ValidationConfig,
},
CilObject, Result,
};
pub(crate) fn build_cleanup_request(
engine: &DeobfuscationEngine,
ctx: &AnalysisContext,
detections: &Detections,
assembly: &CilObject,
ssa_call_graph: &BTreeMap<Token, BTreeSet<Token>>,
) -> CleanupRequest {
let registry = engine.technique_registry();
let mut request = detections.merged_cleanup();
for tech in registry.sorted_techniques(detections) {
if !detections.is_detected(tech.id()) {
continue;
}
let detection = detections.get(tech.id()).unwrap();
if let Some(tech_cleanup) = tech.cleanup(detection) {
request.merge(&tech_cleanup);
}
}
let removable = ctx.decryptors.removable_decryptors();
for token in ctx.decryptors.registered_tokens() {
if !removable.contains(&token) {
request.protect_token(token);
}
}
for token in removable {
request.add_method(token);
}
sweep_dead_module_methods(assembly, &mut request, ssa_call_graph, ctx);
mark_orphaned_proxy_stubs(assembly, &request, ssa_call_graph, ctx);
sweep_inlined_unreferenced_methods(assembly, &mut request, ssa_call_graph, ctx);
let unreferenced_types = find_unreferenced_types(assembly, ssa_call_graph, &request);
for type_token in unreferenced_types {
request.add_type(type_token);
}
let aggressive = ctx.config.cleanup.remove_unused_methods;
if aggressive {
let entry_points = compute_entry_points(assembly, aggressive);
for token in ctx.dead_methods.iter() {
let token = *token;
if !entry_points.contains(&token) {
request.add_method(token);
}
}
}
remove_dispensable_types(assembly, &mut request, ctx);
if ctx.config.cleanup.remove_orphan_metadata {
let neutralized = ctx.neutralized_tokens.iter().map(|t| *t);
request.add_rewrite_orphaned_tokens(neutralized);
}
sweep_empty_module_cctor(assembly, &mut request, ctx);
request
}
pub fn execute_cleanup(
assembly: CilObject,
cleanup_request: Option<CleanupRequest>,
ctx: &AnalysisContext,
) -> Result<CilObject> {
let request = cleanup_request.unwrap_or_else(|| {
CleanupRequest::with_settings(
ctx.config.cleanup.remove_orphan_metadata,
ctx.config.cleanup.remove_empty_types,
)
});
let rename_obfuscated = ctx.config.cleanup.rename_obfuscated_names;
if !request.has_deletions()
&& request.excluded_sections().is_empty()
&& !rename_obfuscated
&& !needs_metadata_repair(&assembly)
{
return Ok(assembly);
}
let types_count = request.types_len();
let methods_count = request.methods_len();
let fields_count = request.fields_len();
if types_count > 0 || methods_count > 0 || fields_count > 0 {
log::info!("Cleanup: {types_count} types, {methods_count} methods, {fields_count} fields");
}
for section_name in request.excluded_sections() {
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!("Removing artifact section: {section_name}"));
}
log_cleanup_request(&request, &assembly, ctx);
let rename_entries = if rename_obfuscated {
Some(renamer::renames_collect(
&assembly,
ctx.config.cleanup.smart_rename.as_ref(),
)?)
} else {
None
};
let mut cil_assembly = assembly.into_assembly();
repair_duplicate_assembly_rows(&mut cil_assembly, ctx);
repair_duplicate_module_rows(&mut cil_assembly, ctx);
repair_invalid_module_guids(&mut cil_assembly, ctx);
let duplicate_types = repair_duplicate_typedef_rows(&mut cil_assembly, ctx);
repair_global_field_visibility(&mut cil_assembly, ctx);
let excluded_sections: HashSet<String> = request.excluded_sections().clone();
let mut request = request;
for token in duplicate_types {
request.add_type(token);
}
cil_assembly.add_cleanup(&request);
if let Some(entries) = rename_entries {
let count = renamer::renames_apply(&mut cil_assembly, entries)?;
if count > 0 {
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!(
"Renamed {count} obfuscated names to simple identifiers"
));
}
}
let generator_config = GeneratorConfig::default().with_excluded_sections(excluded_sections);
cil_assembly.into_cilobject_with(ValidationConfig::analysis(), generator_config)
}
fn log_cleanup_request(request: &CleanupRequest, assembly: &CilObject, ctx: &AnalysisContext) {
for type_token in request.types() {
if let Some(cil_type) = assembly.types().get(type_token) {
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!(
"Removing type: {} (0x{:08X})",
cil_type.name,
type_token.value()
));
} else {
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!("Removing type: TypeDef RID {}", type_token.row()));
}
}
for method_token in request.methods() {
ctx.events
.record(EventKind::ArtifactRemoved)
.method(*method_token)
.message("Removing method");
}
for field_token in request.fields() {
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!("Removing field 0x{:08X}", field_token.value()));
}
for attr_token in request.attributes() {
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!(
"Removing custom attribute 0x{:08X}",
attr_token.value()
));
}
}
fn repair_duplicate_assembly_rows(cil_assembly: &mut CilAssembly, ctx: &AnalysisContext) {
let row_count = cil_assembly.original_table_row_count(TableId::Assembly);
if row_count <= 1 {
return;
}
let duplicates = row_count - 1;
for rid in (2..=row_count).rev() {
if let Err(e) = cil_assembly.table_row_remove(TableId::Assembly, rid) {
log::warn!("Failed to remove duplicate Assembly row {rid}: {e}");
}
}
log::info!(
"Repaired Assembly table: removed {duplicates} duplicate row(s) (ECMA-335 §22.2 violation)"
);
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!(
"Repaired Assembly table: removed {duplicates} duplicate row(s)"
));
}
fn repair_duplicate_module_rows(cil_assembly: &mut CilAssembly, ctx: &AnalysisContext) {
let row_count = cil_assembly.original_table_row_count(TableId::Module);
if row_count <= 1 {
return;
}
let duplicates = row_count - 1;
for rid in (2..=row_count).rev() {
if let Err(e) = cil_assembly.table_row_remove(TableId::Module, rid) {
log::warn!("Failed to remove duplicate Module row {rid}: {e}");
}
}
log::info!(
"Repaired Module table: removed {duplicates} duplicate row(s) (ECMA-335 §22.30 violation)"
);
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!(
"Repaired Module table: removed {duplicates} duplicate row(s)"
));
}
fn repair_invalid_module_guids(cil_assembly: &mut CilAssembly, ctx: &AnalysisContext) {
let guid_count: u32 = cil_assembly
.view()
.guids()
.map_or(0, |g| (g.data().len() / 16) as u32);
let Some(tables) = cil_assembly.view().tables() else {
return;
};
let Some(module_table) = tables.table::<ModuleRaw>() else {
return;
};
let Some(row) = module_table.get(1) else {
return;
};
let bad_encid = row.encid != 0 && row.encid > guid_count;
let bad_encbaseid = row.encbaseid != 0 && row.encbaseid > guid_count;
if !bad_encid && !bad_encbaseid {
return;
}
let original_encid = row.encid;
let original_encbaseid = row.encbaseid;
let fixed = ModuleRaw {
rid: row.rid,
token: row.token,
offset: row.offset,
generation: row.generation,
name: row.name,
mvid: row.mvid,
encid: if bad_encid { 0 } else { row.encid },
encbaseid: if bad_encbaseid { 0 } else { row.encbaseid },
};
if let Err(e) = cil_assembly.table_row_update(TableId::Module, 1, TableDataOwned::Module(fixed))
{
log::warn!("Failed to repair Module row 1 GUID indices: {e}");
return;
}
log::info!(
"Repaired Module row 1 GUIDs: encid {original_encid} → 0 ({}), encbaseid {original_encbaseid} → 0 ({}); heap has {guid_count} GUID(s)",
if bad_encid { "fixed" } else { "kept" },
if bad_encbaseid { "fixed" } else { "kept" },
);
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!(
"Repaired Module row 1: cleared out-of-bounds ENC GUID indices \
(encid={original_encid}, encbaseid={original_encbaseid})"
));
}
fn needs_metadata_repair(assembly: &CilObject) -> bool {
let Some(tables) = assembly.tables() else {
return false;
};
if let Some(t) = tables.table::<ModuleRaw>() {
if t.row_count > 1 {
return true;
}
if let Some(row) = t.get(1) {
let guid_count: u32 = assembly.guids().map_or(0, |g| (g.data().len() / 16) as u32);
if (row.encid != 0 && row.encid > guid_count)
|| (row.encbaseid != 0 && row.encbaseid > guid_count)
{
return true;
}
}
}
if let Some(t) = tables.table::<AssemblyRaw>() {
if t.row_count > 1 {
return true;
}
}
false
}
fn repair_duplicate_typedef_rows(
cil_assembly: &mut CilAssembly,
ctx: &AnalysisContext,
) -> Vec<Token> {
let Some(tables) = cil_assembly.view().tables() else {
return Vec::new();
};
let Some(strings) = cil_assembly.view().strings() else {
return Vec::new();
};
let nested_class_entries: Vec<(u32, u32, u32)> = tables
.table::<NestedClassRaw>()
.map(|rows| {
rows.into_iter()
.map(|r| (r.nested_class, r.enclosing_class, r.rid))
.collect()
})
.unwrap_or_default();
let nested_class_map: HashMap<u32, u32> = nested_class_entries
.iter()
.map(|&(nested, enclosing, _)| (nested, enclosing))
.collect();
let typedef_rows: Vec<TypeDefRaw> = tables
.table::<TypeDefRaw>()
.map(|t| t.into_iter().collect())
.unwrap_or_default();
let method_count = cil_assembly.original_table_row_count(TableId::MethodDef);
let field_count = cil_assembly.original_table_row_count(TableId::Field);
let mut seen: HashMap<(String, String, u32), u32> = HashMap::new();
let mut duplicates_to_remove: Vec<u32> = Vec::new();
let mut duplicates_for_cleanup: Vec<Token> = Vec::new();
for row in &typedef_rows {
let name = strings
.get(row.type_name as usize)
.unwrap_or_default()
.to_string();
let namespace = strings
.get(row.type_namespace as usize)
.unwrap_or_default()
.to_string();
let enclosing = nested_class_map.get(&row.rid).copied().unwrap_or(0);
let key = (name, namespace, enclosing);
if let Entry::Vacant(entry) = seen.entry(key) {
entry.insert(row.rid);
} else {
let method_start = row.method_list;
let method_end = typedef_rows
.iter()
.find(|t| t.rid == row.rid + 1)
.map(|t| t.method_list)
.unwrap_or(method_count + 1);
let field_start = row.field_list;
let field_end = typedef_rows
.iter()
.find(|t| t.rid == row.rid + 1)
.map(|t| t.field_list)
.unwrap_or(field_count + 1);
let has_methods = method_end > method_start;
let has_fields = field_end > field_start;
if !has_methods && !has_fields {
duplicates_to_remove.push(row.rid);
} else {
duplicates_for_cleanup.push(Token::from_parts(TableId::TypeDef, row.rid));
}
}
}
if duplicates_to_remove.is_empty() && duplicates_for_cleanup.is_empty() {
return Vec::new();
}
let removed_count = duplicates_to_remove.len();
let cleanup_count = duplicates_for_cleanup.len();
if !duplicates_to_remove.is_empty() {
let dup_set: HashSet<u32> = duplicates_to_remove.iter().copied().collect();
for &(nested, _, nc_rid) in &nested_class_entries {
if dup_set.contains(&nested) {
if let Err(e) = cil_assembly.table_row_remove(TableId::NestedClass, nc_rid) {
log::warn!("Failed to remove NestedClass row {nc_rid}: {e}");
}
}
}
for rid in duplicates_to_remove.iter().rev() {
if let Err(e) = cil_assembly.table_row_remove(TableId::TypeDef, *rid) {
log::warn!("Failed to remove duplicate TypeDef row {rid}: {e}");
}
}
}
let total = removed_count + cleanup_count;
log::info!(
"Repaired TypeDef table: {removed_count} removed, {cleanup_count} scheduled for cleanup ({total} total duplicates, ECMA-335 §22.37)"
);
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!(
"Repaired TypeDef table: {removed_count} removed, {cleanup_count} scheduled for cleanup ({total} duplicates)"
));
duplicates_for_cleanup
}
fn repair_global_field_visibility(cil_assembly: &mut CilAssembly, ctx: &AnalysisContext) {
let Some(tables) = cil_assembly.view().tables() else {
return;
};
let Some(typedefs) = tables.table::<TypeDefRaw>() else {
return;
};
let typedef_rows: Vec<TypeDefRaw> = typedefs.into_iter().collect();
if typedef_rows.is_empty() {
return;
}
let module_type = &typedef_rows[0];
let field_start = module_type.field_list;
let field_end = if typedef_rows.len() > 1 {
typedef_rows[1].field_list
} else {
cil_assembly.original_table_row_count(TableId::Field) + 1
};
if field_start >= field_end {
return;
}
let Some(fields_table) = tables.table::<FieldRaw>() else {
return;
};
let fields: Vec<FieldRaw> = fields_table.into_iter().collect();
let mut repaired = 0;
for rid in field_start..field_end {
let idx = (rid - 1) as usize;
if idx >= fields.len() {
break;
}
let field = &fields[idx];
let access = field.flags & 0x0007;
if !matches!(access, 0x0000 | 0x0001 | 0x0006) {
let mut fixed = field.clone();
fixed.flags = (fixed.flags & !0x0007) | 0x0001; if let Err(e) =
cil_assembly.table_row_update(TableId::Field, rid, TableDataOwned::Field(fixed))
{
log::warn!("Failed to repair <Module> field {rid} visibility: {e}");
} else {
repaired += 1;
}
}
}
if repaired > 0 {
log::info!(
"Repaired {repaired} <Module> field(s) with invalid visibility (ECMA-335 §22.15)"
);
ctx.events
.record(EventKind::ArtifactRemoved)
.message(format!(
"Repaired {repaired} <Module> field(s) with invalid visibility"
));
}
}
fn sweep_dead_module_methods(
assembly: &CilObject,
request: &mut CleanupRequest,
ssa_call_graph: &BTreeMap<Token, BTreeSet<Token>>,
ctx: &AnalysisContext,
) {
let Some(module_type) = assembly.types().module_type() else {
return;
};
let mut deleted_methods: HashSet<Token> = request.methods().copied().collect();
let deleted_types: HashSet<Token> = request.types().copied().collect();
let type_registry = assembly.types();
for type_token in &deleted_types {
if let Some(cil_type) = type_registry.get(type_token) {
for m in cil_type.methods() {
deleted_methods.insert(m.token);
}
}
}
let mut callers_of: HashMap<Token, HashSet<Token>> = HashMap::new();
for (caller, callees) in ssa_call_graph {
for callee in callees {
callers_of.entry(*callee).or_default().insert(*caller);
}
}
loop {
let mut newly_dead = Vec::new();
for method in module_type.methods() {
if deleted_methods.contains(&method.token) {
continue;
}
let callers = callers_of.get(&method.token);
let has_callers = callers.is_some_and(|c| !c.is_empty());
let all_callers_deleted =
has_callers && callers.unwrap().iter().all(|c| deleted_methods.contains(c));
let inlined_and_unreferenced = !has_callers && ctx.was_inlined(method.token);
if method.is_cctor() {
if let Some(callees) = ssa_call_graph.get(&method.token) {
if !callees.is_empty() && callees.iter().all(|c| deleted_methods.contains(c)) {
newly_dead.push(method.token);
}
}
} else if all_callers_deleted || inlined_and_unreferenced {
newly_dead.push(method.token);
}
}
if newly_dead.is_empty() {
break;
}
for token in &newly_dead {
deleted_methods.insert(*token);
request.add_method(*token);
}
}
}
fn mark_orphaned_proxy_stubs(
assembly: &CilObject,
request: &CleanupRequest,
ssa_call_graph: &BTreeMap<Token, BTreeSet<Token>>,
ctx: &AnalysisContext,
) {
let deleted_methods: HashSet<Token> = request.methods().copied().collect();
let mut has_caller: HashSet<Token> = HashSet::new();
for (caller, callees) in ssa_call_graph {
if deleted_methods.contains(caller) {
continue;
}
for callee in callees {
has_caller.insert(*callee);
}
}
let entry_token = assembly.cor20header().entry_point_token;
for entry in ctx.ssa_functions.iter() {
let token = *entry.key();
if token.table() != 0x06
|| deleted_methods.contains(&token)
|| has_caller.contains(&token)
|| ctx.was_inlined(token)
|| ctx.was_devirtualized(token)
|| token.value() == entry_token
{
continue;
}
if let Some(name) = assembly.resolve_method_name(token) {
if name == ".ctor" || name == ".cctor" {
continue;
}
}
let is_proxy = ctx
.with_ssa(token, |ssa| {
ProxyDevirtualizationPass::detect_proxy_pattern(ssa).is_some()
})
.unwrap_or(false);
if is_proxy {
ctx.mark_devirtualized(token);
}
}
}
fn sweep_inlined_unreferenced_methods(
assembly: &CilObject,
request: &mut CleanupRequest,
ssa_call_graph: &BTreeMap<Token, BTreeSet<Token>>,
ctx: &AnalysisContext,
) {
let deleted_methods: HashSet<Token> = request.methods().copied().collect();
let mut callers_of: HashMap<Token, HashSet<Token>> = HashMap::new();
for (caller, callees) in ssa_call_graph {
if deleted_methods.contains(caller) {
continue;
}
for callee in callees {
callers_of.entry(*callee).or_default().insert(*caller);
}
}
let registry = assembly.types();
for type_entry in registry.iter() {
let cil_type = type_entry.value();
if cil_type.is_module_type() {
continue; }
for method in cil_type.methods() {
if deleted_methods.contains(&method.token) || method.is_cctor() || method.is_ctor() {
continue;
}
let has_callers = callers_of.get(&method.token).is_some_and(|c| !c.is_empty());
if has_callers {
continue;
}
if ctx.was_inlined(method.token) || ctx.was_devirtualized(method.token) {
request.add_method(method.token);
}
}
}
}
fn remove_dispensable_types(
assembly: &CilObject,
request: &mut CleanupRequest,
ctx: &AnalysisContext,
) {
let registry = assembly.types();
for type_entry in registry.iter() {
let token: Token = *type_entry.key();
if token.table() != 0x02 {
continue;
}
if request.types().any(|t| *t == token) {
continue;
}
let cil_type = type_entry.value();
let methods: Vec<_> = cil_type
.methods()
.map(|m| (m.token, m.is_cctor()))
.collect();
if methods.is_empty() {
continue;
}
let all_dispensable = methods
.iter()
.all(|(m, is_cctor)| *is_cctor || ctx.was_inlined(*m) || ctx.is_dead(*m));
let has_dispensable_methods = methods
.iter()
.any(|(m, is_cctor)| !is_cctor && (ctx.was_inlined(*m) || ctx.is_dead(*m)));
if all_dispensable && has_dispensable_methods {
request.add_type(token);
}
}
}
fn sweep_empty_module_cctor(
assembly: &CilObject,
request: &mut CleanupRequest,
ctx: &AnalysisContext,
) {
let Some(module_type) = assembly.types().module_type() else {
return;
};
for (_, method_ref) in module_type.methods.iter() {
let Some(method) = method_ref.upgrade() else {
continue;
};
let Some(ssa_func) = ctx.ssa_functions.get(&method.token) else {
continue;
};
if ssa_func.instruction_count() <= 1 {
let is_cctor = method.name == wellknown::members::CCTOR;
let is_dead = ctx.dead_methods.contains(&method.token);
if !is_cctor && !is_dead {
continue;
}
log::debug!(
"Sweep: empty module method 0x{:08X} ({}) with {} instructions",
method.token.value(),
method.name,
ssa_func.instruction_count()
);
request.add_method(method.token);
}
}
}
pub(crate) fn create_cleanup_request(ctx: &AnalysisContext) -> Option<CleanupRequest> {
let cleanup_config = &ctx.config.cleanup;
if !cleanup_config.any_enabled() {
return None;
}
Some(CleanupRequest::with_settings(
cleanup_config.remove_orphan_metadata,
cleanup_config.remove_empty_types,
))
}
#[cfg(test)]
mod tests {
use crate::{
cilassembly::CleanupRequest,
deobfuscation::utils::{is_obfuscated_name, is_special_name},
metadata::token::Token,
};
#[test]
fn test_cleanup_request_builder() {
let mut request = CleanupRequest::new();
request
.add_type(Token::new(0x02000001))
.add_method(Token::new(0x06000001))
.add_field(Token::new(0x04000001));
assert!(request.has_deletions());
assert_eq!(request.types_len(), 1);
assert_eq!(request.methods_len(), 1);
assert_eq!(request.fields_len(), 1);
}
#[test]
fn test_is_obfuscated_name() {
assert!(!is_obfuscated_name("MyClass"));
assert!(!is_obfuscated_name("Main"));
assert!(is_obfuscated_name("\u{200B}test"));
assert!(is_obfuscated_name("te\u{200D}st"));
}
#[test]
fn test_is_obfuscated_name_spaces() {
assert!(is_obfuscated_name(
"Translate Start <FixedUpdate>b__4_0.get_Syntax"
));
assert!(is_obfuscated_name(
"get_Syntax get_AllowedCaller get_RebindActionMap"
));
assert!(is_obfuscated_name("A B"));
assert!(!is_obfuscated_name("ValidName"));
assert!(!is_obfuscated_name("get_Value"));
}
#[test]
fn test_is_special_name() {
assert!(is_special_name(".ctor"));
assert!(is_special_name(".cctor"));
assert!(is_special_name("<Module>"));
assert!(is_special_name("get_Value"));
assert!(!is_special_name("MyMethod"));
}
#[test]
fn test_is_special_name_rejects_spaces() {
assert!(!is_special_name("get_Syntax get_AllowedCaller"));
assert!(!is_special_name("set_Value some_other_word"));
assert!(!is_special_name(".ctor with spaces"));
assert!(!is_special_name("<Module> extra"));
assert!(is_special_name("<Generic Parameter>"));
assert!(is_special_name("<Generic Method Parameter>"));
assert!(is_special_name("get_Value"));
assert!(is_special_name("set_Item"));
assert!(is_special_name("add_Click"));
assert!(is_special_name("remove_Changed"));
}
}