mod detection;
mod dispatcher;
mod reconstruction;
mod statevar;
mod tracer;
pub use reconstruction::{apply_patch_plan, extract_patch_plan};
pub use tracer::{trace_method_tree, TraceTree};
use std::sync::Arc;
use dashmap::DashSet;
use crate::{
analysis::SsaFunction,
compiler::{CompilerContext, SsaPass},
deobfuscation::context::AnalysisContext,
metadata::{token::Token, typesystem::PointerSize},
CilObject, Result,
};
pub fn unflatten_with_tree(
ssa: &SsaFunction,
config: &UnflattenConfig,
assembly: Option<&CilObject>,
) -> Option<SsaFunction> {
let tree = trace_method_tree(ssa, config, assembly);
tree.dispatcher.as_ref()?;
let plan = extract_patch_plan(&tree)?;
if plan.state_transitions_removed == 0 {
return None;
}
let mut patched = ssa.clone();
let _result = apply_patch_plan(&mut patched, &plan);
Some(patched)
}
#[derive(Debug, Clone)]
pub struct UnflattenConfig {
pub max_states: usize,
pub enable_solver: bool,
pub solver_timeout_ms: u64,
pub min_confidence: f64,
pub max_eval_depth: usize,
pub max_block_visits: usize,
pub max_tree_depth: usize,
pub pointer_size: PointerSize,
}
impl Default for UnflattenConfig {
fn default() -> Self {
Self {
max_states: 1000,
enable_solver: true,
solver_timeout_ms: 100,
min_confidence: 0.6,
max_eval_depth: 30,
max_block_visits: 10000,
max_tree_depth: 100,
pointer_size: PointerSize::Bit32,
}
}
}
impl UnflattenConfig {
#[must_use]
pub fn confuserex() -> Self {
Self {
max_states: 500,
enable_solver: false, solver_timeout_ms: 50,
min_confidence: 0.5,
max_eval_depth: 25,
max_block_visits: 5000,
max_tree_depth: 75,
pointer_size: PointerSize::Bit32,
}
}
#[must_use]
pub fn aggressive() -> Self {
Self {
max_states: 2000,
enable_solver: true,
solver_timeout_ms: 200,
min_confidence: 0.4,
max_eval_depth: 50,
max_block_visits: 20000,
max_tree_depth: 150,
pointer_size: PointerSize::Bit32,
}
}
}
pub struct CffReconstructionPass {
config: UnflattenConfig,
unflattened_dispatchers: Arc<DashSet<Token>>,
dispatchers: Arc<DashSet<Token>>,
}
impl Default for CffReconstructionPass {
fn default() -> Self {
Self {
config: UnflattenConfig::default(),
unflattened_dispatchers: Arc::new(DashSet::new()),
dispatchers: Arc::new(DashSet::new()),
}
}
}
impl CffReconstructionPass {
#[must_use]
pub fn new(ctx: &AnalysisContext, config: UnflattenConfig) -> Self {
Self {
config,
unflattened_dispatchers: Arc::clone(&ctx.unflattened_dispatchers),
dispatchers: Arc::clone(&ctx.dispatchers),
}
}
#[must_use]
pub fn with_defaults() -> Self {
Self::default()
}
}
impl SsaPass for CffReconstructionPass {
fn name(&self) -> &'static str {
"cff-reconstruction"
}
fn description(&self) -> &'static str {
"Recovers original control flow from flattened state machine patterns"
}
fn run_on_method(
&self,
ssa: &mut SsaFunction,
method_token: Token,
ctx: &CompilerContext,
assembly: &Arc<CilObject>,
) -> Result<bool> {
if self.unflattened_dispatchers.contains(&method_token) {
return Ok(false);
}
let mut config = self.config.clone();
config.pointer_size = PointerSize::from_pe(assembly.file().pe().is_64bit);
match unflatten_with_tree(ssa, &config, Some(assembly.as_ref())) {
Some(mut patched) => {
patched.rebuild_ssa();
*ssa = patched;
self.unflattened_dispatchers.insert(method_token);
self.dispatchers.insert(method_token);
ctx.no_inline.insert(method_token);
ctx.clear_known_values(method_token);
Ok(true)
}
None => Ok(false),
}
}
}
#[cfg(test)]
mod tests {
use std::sync::{Arc, LazyLock};
use crate::{
analysis::{
ControlFlowGraph, PhiNode, PhiOperand, SsaBlock, SsaConverter, SsaFunction,
SsaInstruction, SsaOp, SsaVarId, VariableOrigin,
},
assembly::{decode_blocks, InstructionAssembler},
deobfuscation::passes::unflattening::{detection::CffDetector, dispatcher::Dispatcher},
deobfuscation::{DeobfuscationEngine, EngineConfig},
metadata::token::Token,
CilObject,
};
fn create_dispatcher_ssa() -> (SsaFunction, Dispatcher) {
let mut ssa = SsaFunction::new(0, 1);
let state_var = SsaVarId::new();
let switch_var = SsaVarId::new();
let mut b0 = SsaBlock::new(0);
b0.add_instruction(SsaInstruction::synthetic(SsaOp::Jump { target: 1 }));
ssa.add_block(b0);
let mut b1 = SsaBlock::new(1);
let mut phi = PhiNode::new(state_var, VariableOrigin::Local(0));
phi.add_operand(PhiOperand::new(state_var, 0));
b1.add_phi(phi);
b1.add_instruction(SsaInstruction::synthetic(SsaOp::Copy {
dest: switch_var,
src: state_var,
}));
b1.add_instruction(SsaInstruction::synthetic(SsaOp::Switch {
value: switch_var,
targets: vec![2, 3],
default: 4,
}));
ssa.add_block(b1);
for i in 2..=3 {
let mut b = SsaBlock::new(i);
b.add_instruction(SsaInstruction::synthetic(SsaOp::Jump { target: 1 }));
ssa.add_block(b);
}
let mut b4 = SsaBlock::new(4);
b4.add_instruction(SsaInstruction::synthetic(SsaOp::Return { value: None }));
ssa.add_block(b4);
let dispatcher = Dispatcher::new(1, switch_var, vec![2, 3], 4)
.with_state_phi(state_var)
.with_confidence(1.0);
(ssa, dispatcher)
}
#[test]
fn test_detect_dispatcher_finds_switch_with_back_edge() {
let (ssa, _) = create_dispatcher_ssa();
let mut detector = CffDetector::new(&ssa);
let detected = detector.detect_best();
assert!(detected.is_some());
let candidate = detected.unwrap();
assert_eq!(candidate.block, 1);
}
fn build_cfg(assembler: InstructionAssembler) -> crate::Result<ControlFlowGraph<'static>> {
let (bytecode, _max_stack, _) = assembler.finish()?;
let blocks = decode_blocks(&bytecode, 0, 0x1000, Some(bytecode.len()))?;
ControlFlowGraph::from_basic_blocks(blocks)
}
fn build_ssa(
cfg: &ControlFlowGraph<'_>,
num_args: usize,
num_locals: usize,
) -> crate::Result<SsaFunction> {
SsaConverter::build(cfg, num_args, num_locals, None)
}
fn has_switch(ssa: &SsaFunction) -> bool {
for block in ssa.blocks() {
for instr in block.instructions() {
if matches!(instr.op(), SsaOp::Switch { .. }) {
return true;
}
}
}
false
}
fn count_switches(ssa: &SsaFunction) -> usize {
let mut count = 0;
for block in ssa.blocks() {
for instr in block.instructions() {
if matches!(instr.op(), SsaOp::Switch { .. }) {
count += 1;
}
}
}
count
}
fn run_full_deobfuscation(ssa: &mut SsaFunction) -> crate::Result<()> {
static TEST_ASSEMBLY: LazyLock<Arc<CilObject>> = LazyLock::new(|| {
Arc::new(
CilObject::from_path("tests/samples/crafted_2.exe")
.expect("Failed to load test assembly"),
)
});
let config = EngineConfig::default();
let mut engine = DeobfuscationEngine::new(config);
let token = Token::new(0x06000001); engine.process_ssa(&TEST_ASSEMBLY, ssa, token)?;
Ok(())
}
#[test]
fn test_unflatten_simple_cff_pattern() -> crate::Result<()> {
let mut asm = InstructionAssembler::new();
asm
.ldc_i4(0)? .br("dispatcher")?
.label("dispatcher")?
.dup()? .stloc_s(2)? .ldc_i4(0)? .xor()?
.ldc_i4(3)? .rem_un()?
.switch(&["case0", "case1", "case2"])?
.br("exit")? .label("case0")?
.ldc_i4(1)?
.stloc_0()?
.ldc_i4(1)? .br("dispatcher")?
.label("case1")?
.ldc_i4(2)?
.stloc_0()?
.ldc_i4(2)? .br("dispatcher")?
.label("case2")?
.ldc_i4(3)?
.stloc_0()?
.br("exit")?
.label("exit")?
.ret()?;
let cfg = build_cfg(asm)?;
let mut ssa = build_ssa(&cfg, 0, 3)?;
assert!(has_switch(&ssa), "Should have switch before unflattening");
let initial_switch_count = count_switches(&ssa);
assert!(initial_switch_count > 0, "Should have at least one switch");
run_full_deobfuscation(&mut ssa)?;
let final_switch_count = count_switches(&ssa);
assert_eq!(
final_switch_count, 0,
"CFF switch should be eliminated after deobfuscation"
);
Ok(())
}
#[test]
fn test_unflatten_demoloop_exact() -> crate::Result<()> {
let mut asm = InstructionAssembler::new();
asm
.ldc_i4_0()?
.stloc_0()?
.label("init_state")?
.ldc_i4(-781784372_i32)?
.label("dispatcher_entry")?
.ldc_i4(-576502913_i32)?
.xor()?
.dup()?
.stloc_s(2)?
.ldc_i4(7)?
.rem_un()?
.switch(&[
"case0",
"case1",
"case2",
"case3",
"exit",
"init_state",
"case6",
])?
.br_s("exit")?
.label("case0")?
.ldloc_1()?
.ldc_i4_1()?
.add()?
.stloc_1()?
.ldloc_s(2)?
.ldc_i4(1975223132_i32)?
.mul()?
.ldc_i4(483589312_i32)?
.xor()?
.br_s("dispatcher_entry")?
.label("case3")?
.ldloc_0()?
.ldloc_1()?
.add()?
.stloc_0()?
.ldc_i4(-730624750_i32)?
.br_s("dispatcher_entry")?
.label("case1")?
.ldc_i4_1()?
.stloc_1()?
.ldloc_s(2)?
.ldc_i4(-1381170983_i32)?
.mul()?
.ldc_i4(-1625566633_i32)?
.xor()?
.br_s("dispatcher_entry")?
.label("case6")?
.ldloc_1()?
.ldarg_1()?
.bgt_s("case6_false")?
.ldc_i4(-654475495_i32)?
.dup()?
.br_s("case6_merge")?
.label("case6_false")?
.ldc_i4(-1309138752_i32)?
.dup()?
.label("case6_merge")?
.pop()?
.br_s("dispatcher_entry")?
.label("case2")?
.nop()? .ldloc_s(2)?
.ldc_i4(-1166059892_i32)?
.mul()?
.ldc_i4(-15245519_i32)?
.xor()?
.br("dispatcher_entry")?
.label("exit")?
.ret()?;
let cfg = build_cfg(asm)?;
let mut ssa = build_ssa(&cfg, 2, 3)?;
assert!(has_switch(&ssa), "Should have switch before unflattening");
let initial_switch_count = count_switches(&ssa);
assert_eq!(initial_switch_count, 1, "Should have exactly 1 CFF switch");
run_full_deobfuscation(&mut ssa)?;
let final_switch_count = count_switches(&ssa);
assert_eq!(
final_switch_count, 0,
"CFF switch should be eliminated after deobfuscation"
);
Ok(())
}
#[test]
fn test_unflatten_demoifelse_exact() -> crate::Result<()> {
let mut asm = InstructionAssembler::new();
asm
.ldarg_1()?
.ldc_i4_0()?
.bge_s("check_zero")?
.label("init_state")?
.ldc_i4(-1433646196_i32)?
.label("dispatcher_entry")?
.ldc_i4(-2107303682_i32)?
.xor()?
.dup()?
.stloc_1()?
.ldc_i4(12)?
.rem_un()?
.switch(&[
"output",
"check_zero",
"negative",
"zero",
"medium",
"exit_state",
"large",
"small",
"done",
"check_small",
"init_state",
"check_medium",
])?
.br("done")?
.label("negative")?
.ldc_i4(1)? .stloc_0()?
.ldloc_1()?
.ldc_i4(1606139385_i32)?
.mul()?
.ldc_i4(-134536511_i32)?
.xor()?
.br_s("dispatcher_entry")?
.label("check_zero")?
.ldarg_1()?
.brtrue_s("check_zero_nonzero")?
.ldc_i4(-61014635_i32)?
.dup()?
.br_s("check_zero_merge")?
.label("check_zero_nonzero")?
.ldc_i4(-1737504033_i32)?
.dup()?
.label("check_zero_merge")?
.pop()?
.br_s("dispatcher_entry")?
.label("medium")?
.ldc_i4(3)? .stloc_0()?
.ldloc_1()?
.ldc_i4(-1112056451_i32)?
.mul()?
.ldc_i4(-957501578_i32)?
.xor()?
.br("dispatcher_entry")?
.label("small")?
.ldc_i4(4)? .stloc_0()?
.ldloc_1()?
.ldc_i4(1822600615_i32)?
.mul()?
.ldc_i4(972313123_i32)?
.xor()?
.br("dispatcher_entry")?
.label("check_small")?
.ldarg_1()?
.ldc_i4(10)?
.blt_s("check_small_true")?
.ldc_i4(-6576863_i32)?
.dup()?
.br_s("check_small_merge")?
.label("check_small_true")?
.ldc_i4(-904275411_i32)?
.dup()?
.label("check_small_merge")?
.pop()?
.br("dispatcher_entry")?
.label("check_medium")?
.ldarg_1()?
.ldc_i4(100)?
.bge_s("check_medium_large")?
.ldc_i4(-1847017906_i32)?
.dup()?
.br_s("check_medium_merge")?
.label("check_medium_large")?
.ldc_i4(-1069863352_i32)?
.dup()?
.label("check_medium_merge")?
.pop()?
.br("dispatcher_entry")?
.label("output")?
.nop()? .ldc_i4(-1184376018_i32)?
.br("dispatcher_entry")?
.label("zero")?
.ldc_i4(2)? .stloc_0()?
.ldloc_1()?
.ldc_i4(-1658452976_i32)?
.mul()?
.ldc_i4(-1485570506_i32)?
.xor()?
.br("dispatcher_entry")?
.label("exit_state")?
.ldloc_1()?
.ldc_i4(-1339746023_i32)?
.mul()?
.ldc_i4(-1565321453_i32)?
.xor()?
.br("dispatcher_entry")?
.label("large")?
.ldc_i4(5)? .stloc_0()?
.ldc_i4(-546964346_i32)?
.br("dispatcher_entry")?
.label("done")?
.ret()?;
let cfg = build_cfg(asm)?;
let mut ssa = build_ssa(&cfg, 2, 2)?;
assert!(has_switch(&ssa), "Should have switch before unflattening");
let initial_switch_count = count_switches(&ssa);
assert_eq!(initial_switch_count, 1, "Should have exactly 1 CFF switch");
run_full_deobfuscation(&mut ssa)?;
let final_switch_count = count_switches(&ssa);
assert_eq!(
final_switch_count, 0,
"CFF switch should be eliminated after deobfuscation"
);
Ok(())
}
#[test]
fn test_unflatten_demoswitch_exact() -> crate::Result<()> {
let mut asm = InstructionAssembler::new();
asm
.ldarg_1()?
.switch(&["nothing", "start", "process", "stop", "reset"])?
.label("init_state")?
.ldc_i4(1109748230_i32)?
.label("dispatcher_entry")?
.ldc_i4(1399323750_i32)?
.xor()?
.dup()?
.stloc_1()?
.ldc_i4(14)?
.rem_un()?
.switch(&[
"init_state",
"state1",
"process",
"state3",
"state4",
"reset",
"start",
"stop",
"state8",
"done",
"output",
"nothing",
"state12",
"unknown",
])?
.br("done")?
.label("stop")?
.ldc_i4(4)? .stloc_0()?
.ldc_i4(1874087721_i32)?
.br_s("dispatcher_entry")?
.label("state3")?
.ldloc_1()?
.ldc_i4(2035784935_i32)?
.mul()?
.ldc_i4(1232406471_i32)?
.xor()?
.br_s("dispatcher_entry")?
.label("nothing")?
.ldc_i4(0)? .stloc_0()?
.ldc_i4(1654581703_i32)?
.br_s("dispatcher_entry")?
.label("output")?
.nop()? .ldc_i4(1061947317_i32)?
.br("dispatcher_entry")?
.label("unknown")?
.ldc_i4(6)? .stloc_0()?
.ldc_i4(827576000_i32)?
.br("dispatcher_entry")?
.label("state12")?
.ldloc_1()?
.ldc_i4(1665283153_i32)?
.mul()?
.ldc_i4(-1691273355_i32)?
.xor()?
.br("dispatcher_entry")?
.label("state1")?
.ldloc_1()?
.ldc_i4(2085043702_i32)?
.mul()?
.ldc_i4(964678200_i32)?
.xor()?
.br("dispatcher_entry")?
.label("state4")?
.ldloc_1()?
.ldc_i4(355033724_i32)?
.mul()?
.ldc_i4(-844819946_i32)?
.xor()?
.br("dispatcher_entry")?
.label("process")?
.ldc_i4(2)? .stloc_0()?
.ldc_i4(1649866638_i32)?
.br("dispatcher_entry")?
.label("state8")?
.ldloc_1()?
.ldc_i4(1697020441_i32)?
.mul()?
.ldc_i4(464976312_i32)?
.xor()?
.br("dispatcher_entry")?
.label("reset")?
.ldc_i4(5)? .stloc_0()?
.ldc_i4(588169916_i32)?
.br("dispatcher_entry")?
.label("start")?
.ldc_i4(1)? .stloc_0()?
.ldc_i4(1649866638_i32)?
.br("dispatcher_entry")?
.label("done")?
.ret()?;
let cfg = build_cfg(asm)?;
let mut ssa = build_ssa(&cfg, 2, 2)?;
assert!(has_switch(&ssa), "Should have switch before unflattening");
let initial_switch_count = count_switches(&ssa);
assert_eq!(
initial_switch_count, 2,
"Should have exactly 2 switches (user + CFF)"
);
run_full_deobfuscation(&mut ssa)?;
let final_switch_count = count_switches(&ssa);
assert_eq!(
final_switch_count, 1,
"User's original switch should be preserved, only CFF switch eliminated"
);
Ok(())
}
}