use std::collections::{HashMap, HashSet};
use crate::{
analysis::{
cff_taint_config, ConstValue, PhiTaintMode, SsaBlock, SsaEvaluator, SsaFunction,
SsaInstruction, SsaOp, SsaVarId, SsaVariable, TaintAnalysis, TaintConfig,
},
deobfuscation::passes::unflattening::{detection::CffDetector, UnflattenConfig},
metadata::{token::Token, typesystem::PointerSize},
CilObject,
};
#[derive(Debug, Clone)]
pub struct TracedDispatcher {
pub block: usize,
pub switch_var: SsaVarId,
pub targets: Vec<usize>,
pub default: usize,
pub state_var: Option<SsaVarId>,
}
#[derive(Debug, Clone)]
pub enum StopReason {
Terminator,
MaxVisitsExceeded,
UnknownControlFlow { block: usize },
InfiniteLoop { block: usize },
}
#[derive(Debug, Clone)]
pub struct InstructionWithValues {
pub instruction: SsaInstruction,
pub block_idx: usize,
pub input_values: HashMap<SsaVarId, i64>,
pub output_value: Option<i64>,
}
#[derive(Debug, Clone)]
pub struct TraceTree {
pub root: TraceNode,
pub dispatcher: Option<TracedDispatcher>,
pub state_tainted: HashSet<SsaVarId>,
pub stats: TraceStats,
}
#[derive(Debug, Clone, Default)]
pub struct TraceStats {
pub node_count: usize,
pub user_branch_count: usize,
pub state_transition_count: usize,
pub max_depth: usize,
pub exit_count: usize,
}
#[derive(Debug, Clone)]
pub struct TraceNode {
pub id: usize,
pub start_block: usize,
pub instructions: Vec<InstructionWithValues>,
pub blocks_visited: Vec<usize>,
pub terminator: TraceTerminator,
}
#[derive(Debug, Clone)]
pub enum TraceTerminator {
Exit {
block: usize,
},
StateTransition {
from_state: i64,
to_state: i64,
target_block: usize,
continues: Box<TraceNode>,
},
UserBranch {
block: usize,
condition: SsaVarId,
true_branch: Box<TraceNode>,
false_branch: Box<TraceNode>,
},
UserSwitch {
block: usize,
value: SsaVarId,
cases: Vec<(i64, Box<TraceNode>)>,
default: Box<TraceNode>,
},
Stopped { reason: StopReason },
LoopBack {
target_block: usize,
state: i64,
},
}
impl TraceTree {
#[must_use]
pub fn new(root: TraceNode) -> Self {
Self {
root,
dispatcher: None,
state_tainted: HashSet::new(),
stats: TraceStats::default(),
}
}
#[must_use]
pub fn is_state_tainted(&self, var: SsaVarId) -> bool {
self.state_tainted.contains(&var)
}
pub fn mark_tainted(&mut self, var: SsaVarId) {
self.state_tainted.insert(var);
}
}
impl TraceNode {
pub fn new(id: usize, start_block: usize) -> Self {
Self {
id,
start_block,
instructions: Vec::new(),
blocks_visited: vec![start_block],
terminator: TraceTerminator::Stopped {
reason: StopReason::UnknownControlFlow { block: start_block },
},
}
}
pub fn add_instruction(&mut self, instr: InstructionWithValues) {
self.instructions.push(instr);
}
pub fn visit_block(&mut self, block: usize) {
self.blocks_visited.push(block);
}
pub fn set_terminator(&mut self, terminator: TraceTerminator) {
self.terminator = terminator;
}
pub fn is_exit(&self) -> bool {
matches!(self.terminator, TraceTerminator::Exit { .. })
}
pub fn is_user_branch(&self) -> bool {
matches!(
self.terminator,
TraceTerminator::UserBranch { .. } | TraceTerminator::UserSwitch { .. }
)
}
}
struct TreeTraceContext<'a> {
ssa: &'a SsaFunction,
evaluator: SsaEvaluator<'a>,
assembly: Option<&'a CilObject>,
dispatcher: Option<TracedDispatcher>,
state_tainted: HashSet<SsaVarId>,
next_node_id: usize,
total_visits: usize,
visited_states: HashSet<(usize, i64)>, max_block_visits: usize,
max_tree_depth: usize,
}
impl<'a> TreeTraceContext<'a> {
fn new(
ssa: &'a SsaFunction,
config: &UnflattenConfig,
assembly: Option<&'a CilObject>,
) -> Self {
Self {
ssa,
evaluator: SsaEvaluator::new(ssa, config.pointer_size),
assembly,
dispatcher: None,
state_tainted: HashSet::new(),
next_node_id: 0,
total_visits: 0,
visited_states: HashSet::new(),
max_block_visits: config.max_block_visits,
max_tree_depth: config.max_tree_depth,
}
}
fn with_dispatcher(
ssa: &'a SsaFunction,
dispatcher: TracedDispatcher,
config: &UnflattenConfig,
assembly: Option<&'a CilObject>,
) -> Self {
let mut ctx = Self::new(ssa, config, assembly);
if let Some(state_var) = dispatcher.state_var {
let state_origin = ssa.variable(state_var).map(SsaVariable::origin);
let taint_config = cff_taint_config(ssa, dispatcher.block, state_origin);
let mut taint = TaintAnalysis::new(taint_config);
taint.add_tainted_var(state_var);
taint.propagate(ssa);
ctx.state_tainted.clone_from(taint.tainted_variables());
}
ctx.dispatcher = Some(dispatcher);
ctx
}
fn next_id(&mut self) -> usize {
let id = self.next_node_id;
self.next_node_id += 1;
id
}
fn is_tainted(&self, var: SsaVarId) -> bool {
self.state_tainted.contains(&var)
}
fn any_tainted(&self, vars: &[SsaVarId]) -> bool {
vars.iter().any(|v| self.is_tainted(*v))
}
fn taint(&mut self, var: SsaVarId) {
self.state_tainted.insert(var);
}
fn current_state(&self) -> Option<i64> {
self.dispatcher
.as_ref()
.and_then(|d| d.state_var)
.and_then(|v| self.evaluator.get_concrete(v))
.and_then(ConstValue::as_i64)
}
fn is_visited(&self, block: usize, state: i64) -> bool {
self.visited_states.contains(&(block, state))
}
fn mark_visited(&mut self, block: usize, state: i64) {
self.visited_states.insert((block, state));
}
fn snapshot_evaluator(&self) -> SsaEvaluator<'a> {
self.evaluator.clone()
}
fn restore_evaluator(&mut self, snapshot: SsaEvaluator<'a>) {
self.evaluator = snapshot;
}
}
pub fn trace_method_tree(
ssa: &SsaFunction,
config: &UnflattenConfig,
assembly: Option<&CilObject>,
) -> TraceTree {
let mut detector = CffDetector::new(ssa);
let dispatcher = detector.detect_best().map(|d| TracedDispatcher {
block: d.block,
switch_var: d.switch_var,
targets: d.cases.clone(),
default: d.default,
state_var: d.state_phi,
});
let mut ctx = match dispatcher {
Some(d) => TreeTraceContext::with_dispatcher(ssa, d, config, assembly),
None => TreeTraceContext::new(ssa, config, assembly),
};
let root = trace_from_block(&mut ctx, 0, 0);
propagate_taint_forward(ssa, &mut ctx.state_tainted);
let mut tree = TraceTree::new(root);
tree.dispatcher = ctx.dispatcher;
tree.state_tainted = ctx.state_tainted;
compute_tree_stats(&tree.root, &mut tree.stats, 0);
tree
}
fn propagate_taint_forward(ssa: &SsaFunction, tainted: &mut HashSet<SsaVarId>) {
let config = TaintConfig {
forward: true,
backward: false,
phi_mode: PhiTaintMode::NoPropagation,
max_iterations: 100,
};
let mut taint = TaintAnalysis::new(config);
taint.add_tainted_vars(tainted.iter().copied());
taint.propagate(ssa);
*tainted = taint.tainted_variables().clone();
}
fn resolve_call_result(
assembly: &CilObject,
method_token: Token,
concrete_args: &[ConstValue],
pointer_size: PointerSize,
) -> Option<ConstValue> {
let method = assembly.method(&method_token)?;
let callee_ssa = method.ssa(assembly)?;
let mut eval = SsaEvaluator::new(&callee_ssa, pointer_size);
for (var, value) in callee_ssa.argument_variables().zip(concrete_args) {
eval.set_concrete(var.id(), value.clone());
}
let trace = eval.execute(0, None, 50);
if !trace.is_complete() {
return None;
}
let last_block_idx = trace.last_block()?;
let last_block = callee_ssa.block(last_block_idx)?;
for instr in last_block.instructions() {
if let SsaOp::Return {
value: Some(ret_var),
} = instr.op()
{
return eval.get_concrete(*ret_var).cloned();
}
}
None
}
fn trace_from_block(ctx: &mut TreeTraceContext<'_>, block_idx: usize, depth: usize) -> TraceNode {
let mut node = TraceNode::new(ctx.next_id(), block_idx);
if depth > ctx.max_tree_depth {
node.set_terminator(TraceTerminator::Stopped {
reason: StopReason::MaxVisitsExceeded,
});
return node;
}
ctx.total_visits += 1;
if ctx.total_visits > ctx.max_block_visits {
node.set_terminator(TraceTerminator::Stopped {
reason: StopReason::MaxVisitsExceeded,
});
return node;
}
if let Some(state) = ctx.current_state() {
if ctx.is_visited(block_idx, state) {
node.set_terminator(TraceTerminator::LoopBack {
target_block: block_idx,
state,
});
return node;
}
ctx.mark_visited(block_idx, state);
}
let mut current_block = block_idx;
loop {
let Some(block) = ctx.ssa.block(current_block) else {
node.set_terminator(TraceTerminator::Stopped {
reason: StopReason::UnknownControlFlow {
block: current_block,
},
});
return node;
};
if node.blocks_visited.len() > 1 {
let prev = node.blocks_visited[node.blocks_visited.len() - 2];
ctx.evaluator.set_predecessor(Some(prev));
}
ctx.evaluator.evaluate_phis(current_block);
for instr in block.instructions() {
let step = trace_instruction_tree(ctx, instr, current_block);
node.add_instruction(step);
}
match handle_terminator_tree(ctx, block, current_block, &mut node, depth) {
TerminatorResult::Continue(next) => {
node.visit_block(next);
current_block = next;
}
TerminatorResult::Done => return node,
}
}
}
enum TerminatorResult {
Continue(usize),
Done,
}
fn handle_terminator_tree(
ctx: &mut TreeTraceContext<'_>,
block: &SsaBlock,
block_idx: usize,
node: &mut TraceNode,
depth: usize,
) -> TerminatorResult {
let Some(terminator) = block.instructions().last() else {
node.set_terminator(TraceTerminator::Stopped {
reason: StopReason::UnknownControlFlow { block: block_idx },
});
return TerminatorResult::Done;
};
match terminator.op() {
SsaOp::Jump { target } | SsaOp::Leave { target } => {
TerminatorResult::Continue(*target)
}
SsaOp::Branch {
condition,
true_target,
false_target,
} => {
let is_tainted = ctx.is_tainted(*condition);
if is_tainted {
match ctx
.evaluator
.get_concrete(*condition)
.and_then(ConstValue::as_i64)
{
Some(v) if v != 0 => TerminatorResult::Continue(*true_target),
Some(_) => TerminatorResult::Continue(*false_target),
None => {
node.set_terminator(TraceTerminator::Stopped {
reason: StopReason::UnknownControlFlow { block: block_idx },
});
TerminatorResult::Done
}
}
} else {
let snapshot = ctx.snapshot_evaluator();
ctx.evaluator.set_predecessor(Some(block_idx));
let true_node = trace_from_block(ctx, *true_target, depth + 1);
ctx.restore_evaluator(snapshot);
ctx.evaluator.set_predecessor(Some(block_idx));
let false_node = trace_from_block(ctx, *false_target, depth + 1);
node.set_terminator(TraceTerminator::UserBranch {
block: block_idx,
condition: *condition,
true_branch: Box::new(true_node),
false_branch: Box::new(false_node),
});
TerminatorResult::Done
}
}
SsaOp::BranchCmp {
left,
right,
true_target,
false_target,
..
} => {
let is_tainted = ctx.is_tainted(*left) || ctx.is_tainted(*right);
if is_tainted {
node.set_terminator(TraceTerminator::Stopped {
reason: StopReason::UnknownControlFlow { block: block_idx },
});
TerminatorResult::Done
} else {
let snapshot = ctx.snapshot_evaluator();
ctx.evaluator.set_predecessor(Some(block_idx));
let true_node = trace_from_block(ctx, *true_target, depth + 1);
ctx.restore_evaluator(snapshot);
ctx.evaluator.set_predecessor(Some(block_idx));
let false_node = trace_from_block(ctx, *false_target, depth + 1);
node.set_terminator(TraceTerminator::UserBranch {
block: block_idx,
condition: *left, true_branch: Box::new(true_node),
false_branch: Box::new(false_node),
});
TerminatorResult::Done
}
}
SsaOp::Switch {
value,
targets,
default,
} => {
let is_dispatcher = ctx
.dispatcher
.as_ref()
.is_some_and(|d| d.block == block_idx);
if is_dispatcher || ctx.is_tainted(*value) {
if let Some(idx) = ctx
.evaluator
.get_concrete(*value)
.and_then(ConstValue::as_u64)
{
#[allow(clippy::cast_possible_truncation)]
let idx_usize = idx as usize;
let target = if idx_usize < targets.len() {
targets[idx_usize]
} else {
*default
};
let from_state = ctx.current_state().unwrap_or(0);
ctx.evaluator.set_predecessor(Some(block_idx));
let continues = trace_from_block(ctx, target, depth + 1);
let to_state = ctx.current_state().unwrap_or(0);
node.set_terminator(TraceTerminator::StateTransition {
from_state,
to_state,
target_block: target,
continues: Box::new(continues),
});
TerminatorResult::Done
} else {
node.set_terminator(TraceTerminator::Stopped {
reason: StopReason::UnknownControlFlow { block: block_idx },
});
TerminatorResult::Done
}
} else {
let snapshot = ctx.snapshot_evaluator();
let mut cases = Vec::new();
for (i, &target) in targets.iter().enumerate() {
ctx.restore_evaluator(snapshot.clone());
ctx.evaluator.set_predecessor(Some(block_idx));
let case_node = trace_from_block(ctx, target, depth + 1);
#[allow(clippy::cast_possible_wrap)]
let case_value = i as i64;
cases.push((case_value, Box::new(case_node)));
}
ctx.restore_evaluator(snapshot);
ctx.evaluator.set_predecessor(Some(block_idx));
let default_node = trace_from_block(ctx, *default, depth + 1);
node.set_terminator(TraceTerminator::UserSwitch {
block: block_idx,
value: *value,
cases,
default: Box::new(default_node),
});
TerminatorResult::Done
}
}
SsaOp::Return { .. } | SsaOp::Throw { .. } => {
node.set_terminator(TraceTerminator::Exit { block: block_idx });
TerminatorResult::Done
}
_ => {
node.set_terminator(TraceTerminator::Stopped {
reason: StopReason::UnknownControlFlow { block: block_idx },
});
TerminatorResult::Done
}
}
}
fn trace_instruction_tree(
ctx: &mut TreeTraceContext<'_>,
instr: &SsaInstruction,
block_idx: usize,
) -> InstructionWithValues {
let input_values: HashMap<SsaVarId, i64> = instr
.uses()
.iter()
.filter_map(|&var| {
ctx.evaluator
.get_concrete(var)
.and_then(ConstValue::as_i64)
.map(|v| (var, v))
})
.collect();
if ctx.any_tainted(&instr.uses()) {
if let Some(def) = instr.def() {
ctx.taint(def);
}
}
ctx.evaluator.evaluate_op(instr.op());
if let SsaOp::Call {
dest: Some(dest),
method,
args,
} = instr.op()
{
if let Some(assembly) = ctx.assembly {
let concrete_args: Option<Vec<ConstValue>> = args
.iter()
.map(|&a| ctx.evaluator.get_concrete(a).cloned())
.collect();
if let Some(concrete_args) = concrete_args {
if let Some(result) = resolve_call_result(
assembly,
method.token(),
&concrete_args,
ctx.evaluator.pointer_size(),
) {
ctx.evaluator.set_concrete(*dest, result);
}
}
}
}
let output_value = instr
.def()
.and_then(|d| ctx.evaluator.get_concrete(d))
.and_then(ConstValue::as_i64);
InstructionWithValues {
instruction: instr.clone(),
block_idx,
input_values,
output_value,
}
}
fn compute_tree_stats(node: &TraceNode, stats: &mut TraceStats, depth: usize) {
stats.node_count += 1;
stats.max_depth = stats.max_depth.max(depth);
match &node.terminator {
TraceTerminator::Exit { .. } => {
stats.exit_count += 1;
}
TraceTerminator::StateTransition { continues, .. } => {
stats.state_transition_count += 1;
compute_tree_stats(continues, stats, depth + 1);
}
TraceTerminator::UserBranch {
true_branch,
false_branch,
..
} => {
stats.user_branch_count += 1;
compute_tree_stats(true_branch, stats, depth + 1);
compute_tree_stats(false_branch, stats, depth + 1);
}
TraceTerminator::UserSwitch { cases, default, .. } => {
stats.user_branch_count += 1;
for (_, case_node) in cases {
compute_tree_stats(case_node, stats, depth + 1);
}
compute_tree_stats(default, stats, depth + 1);
}
TraceTerminator::Stopped { .. } | TraceTerminator::LoopBack { .. } => {}
}
}
#[cfg(test)]
mod tests {
use crate::{
analysis::{
ConstValue, PhiNode, PhiOperand, SsaBlock, SsaFunction, SsaInstruction, SsaOp,
SsaVarId, VariableOrigin,
},
deobfuscation::passes::unflattening::{
tracer::{trace_method_tree, TraceNode, TraceTerminator},
UnflattenConfig,
},
};
fn create_simple_cff() -> SsaFunction {
let mut ssa = SsaFunction::new(0, 1);
let state_var = SsaVarId::new();
let const_var = SsaVarId::new();
let mut b0 = SsaBlock::new(0);
b0.add_instruction(SsaInstruction::synthetic(SsaOp::Const {
dest: const_var,
value: ConstValue::I32(0),
}));
b0.add_instruction(SsaInstruction::synthetic(SsaOp::Jump { target: 1 }));
ssa.add_block(b0);
let mut b1 = SsaBlock::new(1);
let mut phi = PhiNode::new(state_var, VariableOrigin::Local(0));
phi.add_operand(PhiOperand::new(const_var, 0));
b1.add_phi(phi);
b1.add_instruction(SsaInstruction::synthetic(SsaOp::Switch {
value: state_var,
targets: vec![2, 3, 4],
default: 5,
}));
ssa.add_block(b1);
for i in 2..=4 {
let mut b = SsaBlock::new(i);
b.add_instruction(SsaInstruction::synthetic(SsaOp::Jump { target: 1 }));
ssa.add_block(b);
}
let mut b5 = SsaBlock::new(5);
b5.add_instruction(SsaInstruction::synthetic(SsaOp::Return { value: None }));
ssa.add_block(b5);
ssa
}
#[test]
fn test_tree_trace_simple_cff() {
let ssa = create_simple_cff();
let config = UnflattenConfig::default();
let tree = trace_method_tree(&ssa, &config, None);
assert!(tree.dispatcher.is_some(), "Should detect dispatcher");
let dispatcher = tree.dispatcher.as_ref().unwrap();
assert_eq!(dispatcher.block, 1);
assert!(!tree.state_tainted.is_empty(), "Should have tainted vars");
println!("Tree stats: {:?}", tree.stats);
assert!(tree.stats.node_count >= 1, "Should have at least one node");
}
fn create_cff_with_user_branch() -> SsaFunction {
let mut ssa = SsaFunction::new(1, 1); let state_var = SsaVarId::new();
let init_state = SsaVarId::new(); let const_one = SsaVarId::new();
let arg0 = SsaVarId::new();
let user_zero = SsaVarId::new(); let cmp_result = SsaVarId::new();
let mut b0 = SsaBlock::new(0);
b0.add_instruction(SsaInstruction::synthetic(SsaOp::Const {
dest: init_state,
value: ConstValue::I32(0),
}));
b0.add_instruction(SsaInstruction::synthetic(SsaOp::Const {
dest: arg0,
value: ConstValue::I32(42), }));
b0.add_instruction(SsaInstruction::synthetic(SsaOp::Jump { target: 1 }));
ssa.add_block(b0);
let mut b1 = SsaBlock::new(1);
let mut phi = PhiNode::new(state_var, VariableOrigin::Local(0));
phi.add_operand(PhiOperand::new(init_state, 0)); phi.add_operand(PhiOperand::new(const_one, 3)); phi.add_operand(PhiOperand::new(const_one, 4)); b1.add_phi(phi);
b1.add_instruction(SsaInstruction::synthetic(SsaOp::Switch {
value: state_var,
targets: vec![2, 5], default: 6,
}));
ssa.add_block(b1);
let mut b2 = SsaBlock::new(2);
b2.add_instruction(SsaInstruction::synthetic(SsaOp::Const {
dest: const_one,
value: ConstValue::I32(1),
}));
b2.add_instruction(SsaInstruction::synthetic(SsaOp::Const {
dest: user_zero,
value: ConstValue::I32(0),
}));
b2.add_instruction(SsaInstruction::synthetic(SsaOp::Cgt {
dest: cmp_result,
left: arg0,
right: user_zero,
unsigned: false,
}));
b2.add_instruction(SsaInstruction::synthetic(SsaOp::Branch {
condition: cmp_result,
true_target: 3, false_target: 4, }));
ssa.add_block(b2);
let mut b3a = SsaBlock::new(3);
b3a.add_instruction(SsaInstruction::synthetic(SsaOp::Jump { target: 1 }));
ssa.add_block(b3a);
let mut b3b = SsaBlock::new(4);
b3b.add_instruction(SsaInstruction::synthetic(SsaOp::Jump { target: 1 }));
ssa.add_block(b3b);
let mut b5 = SsaBlock::new(5);
b5.add_instruction(SsaInstruction::synthetic(SsaOp::Return { value: None }));
ssa.add_block(b5);
let mut b6 = SsaBlock::new(6);
b6.add_instruction(SsaInstruction::synthetic(SsaOp::Return { value: None }));
ssa.add_block(b6);
ssa
}
#[test]
fn test_tree_trace_with_user_branch() {
let ssa = create_cff_with_user_branch();
let config = UnflattenConfig::default();
let tree = trace_method_tree(&ssa, &config, None);
println!("=== Tree Trace with User Branch ===");
println!("Dispatcher: {:?}", tree.dispatcher);
println!("Tainted vars: {:?}", tree.state_tainted);
println!("Stats: {:?}", tree.stats);
assert!(
tree.stats.user_branch_count > 0,
"Should have forked at branches"
);
assert!(tree.stats.exit_count > 0, "Should have exit points");
println!("User branch count: {}", tree.stats.user_branch_count);
println!("Exit count: {}", tree.stats.exit_count);
fn find_user_branch(node: &TraceNode, depth: usize) -> bool {
if depth > 200 {
return false; }
match &node.terminator {
TraceTerminator::UserBranch { .. } => {
true
}
TraceTerminator::StateTransition { continues, .. } => {
find_user_branch(continues, depth + 1)
}
TraceTerminator::UserSwitch { cases, default, .. } => {
cases.iter().any(|(_, n)| find_user_branch(n, depth + 1))
|| find_user_branch(default, depth + 1)
}
_ => false,
}
}
println!(
"Stats confirm {} user branches were created",
tree.stats.user_branch_count
);
assert!(
tree.stats.user_branch_count > 0,
"Stats must show user branches"
);
}
}