use std::collections::HashSet;
use crate::{
assembly::Operand,
cilassembly::CleanupRequest,
deobfuscation::{
cleanup::is_entry_point, context::AnalysisContext, findings::DeobfuscationFindings,
},
metadata::{method::Method, signatures::TypeSignature, tables::TableId, token::Token},
prelude::{CilTypeRef, FlowType},
CilObject,
};
fn build_cleanup_request(
findings: &DeobfuscationFindings,
assembly: &CilObject,
ctx: &AnalysisContext,
) -> CleanupRequest {
let mut request = CleanupRequest::with_settings(
ctx.config.cleanup.remove_orphan_metadata,
ctx.config.cleanup.remove_empty_types,
);
let aggressive = ctx.config.cleanup.remove_unused_methods;
if ctx.config.cleanup.remove_decryptors {
for token in ctx.decryptors.removable_decryptors() {
if !is_entry_point(assembly, token, aggressive) {
request.add_method(token);
}
}
}
if ctx.config.cleanup.remove_protection_methods {
for (_, token) in &findings.anti_tamper_methods {
if !is_entry_point(assembly, *token, aggressive) {
request.add_method(*token);
}
}
for (_, token) in &findings.anti_debug_methods {
if !is_entry_point(assembly, *token, aggressive) {
request.add_method(*token);
}
}
for (_, token) in &findings.anti_dump_methods {
if !is_entry_point(assembly, *token, aggressive) {
request.add_method(*token);
}
}
for (_, token) in &findings.resource_handler_methods {
if !is_entry_point(assembly, *token, aggressive) {
request.add_method(*token);
}
}
for (_, token) in &findings.decryptor_methods {
let fully_decrypted = ctx.decryptors.is_fully_decrypted(*token);
if fully_decrypted && !is_entry_point(assembly, *token, aggressive) {
request.add_method(*token);
}
}
for (_, native_helper) in &findings.native_helpers {
if !is_entry_point(assembly, native_helper.token, aggressive) {
request.add_method(native_helper.token);
}
}
for (_, token) in &findings.proxy_methods {
if !is_entry_point(assembly, *token, aggressive) {
request.add_method(*token);
}
}
}
for (_, type_token) in &findings.obfuscator_type_tokens {
request.add_type(*type_token);
}
for (_, type_token) in &findings.constant_data_types {
request.add_type(*type_token);
}
for (_, field_token) in &findings.constant_data_fields {
request.add_field(*field_token);
}
if ctx.config.cleanup.remove_protection_methods {
for (_, field_token) in &findings.infrastructure_fields {
request.add_field(*field_token);
}
}
if ctx.config.cleanup.remove_protection_methods {
for (_, type_token) in &findings.protection_infrastructure_types {
request.add_type(*type_token);
}
}
if let Some(ref provider) = findings.statemachine_provider {
let semantics = provider.semantics();
if let Some(type_token) = semantics.type_token {
request.add_type(type_token);
}
if let Some(init_token) = semantics.init_method {
request.add_method(init_token);
}
if let Some(update_token) = semantics.update_method {
request.add_method(update_token);
}
}
for (methodspec, base_method) in ctx.decryptors.all_methodspec_mappings() {
if request.is_deleted(base_method) {
request.add_methodspec(methodspec);
}
}
if ctx.config.cleanup.remove_protection_methods {
let helper_methods = find_dead_helper_methods(assembly, &request);
for method_token in helper_methods {
if !is_entry_point(assembly, method_token, aggressive) {
request.add_method(method_token);
}
}
}
request
}
pub fn build_request(
assembly: &CilObject,
ctx: &AnalysisContext,
findings: &DeobfuscationFindings,
) -> Option<CleanupRequest> {
let cleanup_config = &ctx.config.cleanup;
if !cleanup_config.any_enabled() {
return None;
}
let mut request = build_cleanup_request(findings, assembly, ctx);
if cleanup_config.remove_artifact_sections {
for (_, section_name) in &findings.artifact_sections {
request.exclude_section(section_name.clone());
}
}
if request.has_deletions() || !request.excluded_sections().is_empty() {
Some(request)
} else {
None
}
}
fn is_byte_array_transform_signature(method: &Method) -> bool {
let sig = &method.signature;
let returns_byte_array = is_byte_array_type(&sig.return_type.base);
let takes_byte_array = sig.params.len() == 1 && is_byte_array_type(&sig.params[0].base);
returns_byte_array && takes_byte_array
}
fn is_byte_array_type(sig: &TypeSignature) -> bool {
match sig {
TypeSignature::SzArray(inner) => matches!(*inner.base, TypeSignature::U1),
_ => false,
}
}
fn uses_decompression_types(assembly: &CilObject, method: &Method) -> bool {
const DECOMPRESSION_TYPES: &[&str] = &[
"MemoryStream",
"DeflateStream",
"GZipStream",
"BinaryReader",
"Lzma",
"LzmaDecoder",
"SevenZip",
];
for instr in method.instructions() {
if instr.flow_type != FlowType::Call {
continue;
}
let Operand::Token(token) = &instr.operand else {
continue;
};
if token.is_table(TableId::MemberRef) {
if let Some(member) = assembly.member_ref(token) {
let type_name = member.declaredby.name().unwrap_or_default();
if DECOMPRESSION_TYPES.iter().any(|t| type_name.contains(t)) {
return true;
}
if (member.name.contains("Read")
|| member.name.contains("Decompress")
|| member.name.contains("Inflate"))
&& type_name.contains("Stream")
{
return true;
}
}
}
if token.is_table(TableId::MethodDef) {
if let Some(target) = assembly.method(token) {
if let Some(owner) = target.declaring_type_rc() {
if DECOMPRESSION_TYPES.iter().any(|t| owner.name.contains(t)) {
return true;
}
}
}
}
}
for (_, local) in method.local_vars.iter() {
let type_name = format!("{:?}", local.base);
if DECOMPRESSION_TYPES.iter().any(|t| type_name.contains(t)) {
return true;
}
}
false
}
fn creates_thread_with_delegate(
assembly: &CilObject,
method: &Method,
request: &CleanupRequest,
) -> bool {
let mut has_thread_ctor = false;
let mut has_parameterized_thread_start = false;
let mut references_removed_type = false;
for instr in method.instructions() {
if instr.flow_type != FlowType::Call && instr.mnemonic != "newobj" {
continue;
}
let Operand::Token(token) = &instr.operand else {
continue;
};
if token.is_table(TableId::MemberRef) {
if let Some(member) = assembly.member_ref(token) {
let type_name = member.declaredby.name().unwrap_or_default();
if type_name == "Thread" && member.name == ".ctor" {
has_thread_ctor = true;
}
if type_name == "ParameterizedThreadStart" && member.name == ".ctor" {
has_parameterized_thread_start = true;
}
}
}
if instr.mnemonic == "ldftn" {
if let Operand::Token(fn_token) = &instr.operand {
if fn_token.is_table(TableId::MethodDef) {
if let Some(target) = assembly.method(fn_token) {
if let Some(owner) = target.declaring_type_rc() {
if request.is_deleted(owner.token) {
references_removed_type = true;
}
}
}
}
}
}
}
has_thread_ctor && (has_parameterized_thread_start || references_removed_type)
}
fn find_dead_helper_methods(assembly: &CilObject, request: &CleanupRequest) -> HashSet<Token> {
let mut dead_helpers: HashSet<Token> = HashSet::new();
let cctor_token = assembly.methods().iter().find_map(|entry| {
let method = entry.value();
if method.is_cctor() {
if let Some(owner) = method.declaring_type_rc() {
if owner.name == "<Module>" {
return Some(method.token);
}
}
}
None
});
for method_entry in assembly.methods() {
let method = method_entry.value();
let method_token = method.token;
if request.is_deleted(method_token) {
continue;
}
if let Some(owner_type) = method.declaring_type_rc() {
if request.is_deleted(owner_type.token) {
continue;
}
}
let mut called_by_removed = false;
let mut called_by_cctor_only = false;
let mut called_by_non_removed = false;
let mut call_count = 0;
for other_method_entry in assembly.methods() {
let other_method = other_method_entry.value();
let caller_token = other_method.token;
let calls_target = other_method.instructions().any(|instr| {
if let Operand::Token(t) = &instr.operand {
t == &method_token
} else {
false
}
});
if calls_target {
call_count += 1;
if request.is_deleted(caller_token) {
called_by_removed = true;
} else if Some(caller_token) == cctor_token {
called_by_cctor_only = true;
} else {
called_by_non_removed = true;
break; }
}
}
if called_by_removed && !called_by_non_removed && !called_by_cctor_only {
dead_helpers.insert(method_token);
continue;
}
if called_by_cctor_only && !called_by_non_removed && call_count > 0 {
let is_in_module = method
.declaring_type
.get()
.and_then(CilTypeRef::upgrade)
.is_some_and(|owner| owner.name == "<Module>");
let is_void_no_params = method.signature.return_type.base == TypeSignature::Void
&& method.signature.params.is_empty();
let has_many_locals = method.local_vars.count() >= 10;
if is_in_module && is_void_no_params && has_many_locals {
dead_helpers.insert(method_token);
continue;
}
let is_byte_array_helper = is_in_module && is_byte_array_transform_signature(method);
if is_byte_array_helper {
dead_helpers.insert(method_token);
continue;
}
if is_in_module && uses_decompression_types(assembly, method) {
dead_helpers.insert(method_token);
continue;
}
}
if (called_by_removed || called_by_cctor_only) && !called_by_non_removed && call_count > 0 {
let is_in_module = method
.declaring_type
.get()
.and_then(CilTypeRef::upgrade)
.is_some_and(|owner| owner.name == "<Module>");
if is_in_module && uses_decompression_types(assembly, method) {
dead_helpers.insert(method_token);
}
}
}
for method_entry in assembly.methods() {
let method = method_entry.value();
let method_token = method.token;
if request.is_deleted(method_token) || dead_helpers.contains(&method_token) {
continue;
}
let is_in_module = method
.declaring_type
.get()
.and_then(CilTypeRef::upgrade)
.is_some_and(|owner| owner.name == "<Module>");
if !is_in_module {
continue;
}
let creates_protection_objects = creates_thread_with_delegate(assembly, method, request);
if creates_protection_objects {
let has_live_caller = assembly.methods().iter().any(|other_entry| {
let other = other_entry.value();
let caller_token = other.token;
if request.is_deleted(caller_token)
|| dead_helpers.contains(&caller_token)
|| Some(caller_token) == cctor_token
{
return false;
}
other.instructions().any(|instr| {
if let Operand::Token(t) = &instr.operand {
t == &method_token
} else {
false
}
})
});
if !has_live_caller {
dead_helpers.insert(method_token);
}
}
}
loop {
let mut new_dead: HashSet<Token> = HashSet::new();
for method_entry in assembly.methods() {
let method = method_entry.value();
let method_token = method.token;
if request.is_deleted(method_token) || dead_helpers.contains(&method_token) {
continue;
}
let mut called_by_dead_only = false;
let mut called_by_live = false;
for other_method_entry in assembly.methods() {
let other_method = other_method_entry.value();
let caller_token = other_method.token;
let calls_target = other_method.instructions().any(|instr| {
if let Operand::Token(t) = &instr.operand {
t == &method_token
} else {
false
}
});
if calls_target {
if request.is_deleted(caller_token) || dead_helpers.contains(&caller_token) {
called_by_dead_only = true;
} else {
called_by_live = true;
break;
}
}
}
if called_by_dead_only && !called_by_live {
new_dead.insert(method_token);
}
}
if new_dead.is_empty() {
break;
}
dead_helpers.extend(new_dead);
}
dead_helpers
}
#[cfg(test)]
mod tests {
use crate::{
deobfuscation::{DeobfuscationEngine, EngineConfig},
metadata::validation::ValidationConfig,
CilObject,
};
#[test]
fn test_cleanup_full_pipeline() {
let sample_path = "tests/samples/packers/confuserex/mkaring_normal.exe";
if !std::path::Path::new(sample_path).exists() {
eprintln!("Skipping test: sample not found at {}", sample_path);
return;
}
let original =
CilObject::from_path_with_validation(sample_path, ValidationConfig::analysis())
.expect("Original should load");
let original_type_count = original.types().len();
let original_sections: Vec<String> = original
.file()
.sections()
.iter()
.map(|s| s.name.clone())
.collect();
let config = EngineConfig::default();
let mut engine = DeobfuscationEngine::new(config);
let (deobfuscated, result) = engine
.process_file(sample_path)
.expect("Deobfuscation should succeed");
let stats = result.stats();
assert!(
stats.methods_transformed > 0 || stats.constants_folded > 0,
"Deobfuscation should have made some changes"
);
let deobfuscated_bytes = deobfuscated.file().data().to_vec();
let reloaded =
CilObject::from_mem_with_validation(deobfuscated_bytes, ValidationConfig::production());
assert!(
reloaded.is_ok(),
"Deobfuscated assembly should pass production validation: {:?}",
reloaded.err()
);
let reloaded = reloaded.unwrap();
assert!(reloaded.module().is_some(), "Assembly should have a module");
assert!(
reloaded.assembly().is_some(),
"Assembly should have assembly metadata"
);
let entry_token = reloaded.cor20header().entry_point_token;
assert!(entry_token != 0, "Entry point should still exist");
let deobfuscated_type_count = reloaded.types().len();
assert!(
deobfuscated_type_count <= original_type_count,
"Should have same or fewer types after cleanup: {} vs {}",
deobfuscated_type_count,
original_type_count
);
let has_confuser_types = deobfuscated.types().iter().any(|t| {
let type_info = t.value();
type_info.name.contains("Confuser")
|| type_info.name.contains("ConfusedBy")
|| type_info.namespace.contains("Confuser")
});
assert!(
!has_confuser_types,
"ConfuserEx marker types should be removed after cleanup"
);
let original_artifact_sections: Vec<_> = original_sections
.iter()
.filter(|name| !name.is_empty() && name.chars().all(|c| c.is_ascii_digit()))
.collect();
if !original_artifact_sections.is_empty() {
let deobfuscated_sections: Vec<String> = deobfuscated
.file()
.sections()
.iter()
.map(|s| s.name.clone())
.collect();
let remaining_artifacts: Vec<&String> = deobfuscated_sections
.iter()
.filter(|name| !name.is_empty() && name.chars().all(|c| c.is_ascii_digit()))
.collect();
assert!(
remaining_artifacts.is_empty(),
"Artifact sections should be removed, but found: {:?}",
remaining_artifacts
);
}
}
}