use std::collections::{HashSet, VecDeque};
use rayon::prelude::*;
use crate::{
analysis::{ControlFlow, SsaEvaluator, SsaFunction, SsaOp, SsaVarId, VariableOrigin},
deobfuscation::passes::unflattening::{
dispatcher::{analyze_switch_dispatcher, Dispatcher, DispatcherInfo},
statevar::{identify_state_variable, StateVariable},
UnflattenConfig,
},
utils::{
graph::{
algorithms::{compute_dominators, DominatorTree},
GraphBase, NodeId, Successors,
},
BitSet,
},
};
#[derive(Debug, Clone)]
pub struct EntryPoint {
pub block: usize,
pub initial_state: Option<i64>,
pub condition: Option<EntryCondition>,
pub state_var: Option<SsaVarId>,
}
#[derive(Debug, Clone)]
pub enum EntryCondition {
Compare {
var: SsaVarId,
value: i64,
is_equal: bool,
},
SwitchCase(i64),
Boolean {
var: SsaVarId,
when_true: bool,
},
Argument {
index: u16,
expected: Option<i64>,
},
}
impl EntryPoint {
#[must_use]
pub fn new(block: usize) -> Self {
Self {
block,
initial_state: None,
condition: None,
state_var: None,
}
}
#[must_use]
pub fn with_state(block: usize, initial_state: i64) -> Self {
Self {
block,
initial_state: Some(initial_state),
condition: None,
state_var: None,
}
}
pub fn with_condition(mut self, condition: EntryCondition) -> Self {
self.condition = Some(condition);
self
}
pub fn with_state_var(mut self, var: SsaVarId) -> Self {
self.state_var = Some(var);
self
}
#[must_use]
pub fn is_unconditional(&self) -> bool {
self.condition.is_none()
}
#[must_use]
pub fn has_known_state(&self) -> bool {
self.initial_state.is_some()
}
}
struct SsaGraphAdapter<'a> {
ssa: &'a SsaFunction,
}
impl<'a> SsaGraphAdapter<'a> {
fn new(ssa: &'a SsaFunction) -> Self {
Self { ssa }
}
}
impl GraphBase for SsaGraphAdapter<'_> {
fn node_count(&self) -> usize {
self.ssa.block_count()
}
fn node_ids(&self) -> impl Iterator<Item = NodeId> {
(0..self.ssa.block_count()).map(NodeId::new)
}
}
impl Successors for SsaGraphAdapter<'_> {
fn successors(&self, node: NodeId) -> impl Iterator<Item = NodeId> {
self.ssa
.block_successors(node.index())
.into_iter()
.map(NodeId::new)
}
}
#[derive(Debug, Clone)]
pub struct CffPattern {
pub dispatcher_block: usize,
pub dispatcher: DispatcherInfo,
pub state_var: Option<StateVariable>,
pub case_blocks: BitSet,
pub entry_block: Option<usize>,
pub entry_points: Vec<EntryPoint>,
pub exit_blocks: BitSet,
pub confidence: f64,
}
impl CffPattern {
#[must_use]
pub fn case_count(&self) -> usize {
self.dispatcher.case_count()
}
#[must_use]
pub fn is_confuserex_style(&self) -> bool {
matches!(&self.dispatcher, DispatcherInfo::Switch { transform, .. }
if transform.modulo_divisor().is_some())
}
#[must_use]
pub fn has_multiple_entries(&self) -> bool {
self.entry_points.len() > 1
}
#[must_use]
pub fn entry_count(&self) -> usize {
self.entry_points.len()
}
#[must_use]
pub fn primary_entry(&self) -> Option<&EntryPoint> {
self.entry_points.first()
}
#[must_use]
pub fn entries_with_states(&self) -> Vec<&EntryPoint> {
self.entry_points
.iter()
.filter(|e| e.has_known_state())
.collect()
}
#[must_use]
pub fn initial_states(&self) -> Vec<i64> {
self.entry_points
.iter()
.filter_map(|e| e.initial_state)
.collect()
}
}
pub struct CffDetector<'a> {
ssa: &'a SsaFunction,
config: UnflattenConfig,
dom_tree: Option<DominatorTree>,
}
impl<'a> CffDetector<'a> {
#[must_use]
pub fn new(ssa: &'a SsaFunction) -> Self {
Self {
ssa,
config: UnflattenConfig::default(),
dom_tree: None,
}
}
#[must_use]
pub fn with_config(ssa: &'a SsaFunction, config: &UnflattenConfig) -> Self {
Self {
ssa,
config: config.clone(),
dom_tree: None,
}
}
fn get_dom_tree(&mut self) -> &DominatorTree {
let ssa = self.ssa;
self.dom_tree.get_or_insert_with(|| {
let adapter = SsaGraphAdapter::new(ssa);
compute_dominators(&adapter, NodeId::new(0))
})
}
pub fn detect_best(&mut self) -> Option<Dispatcher> {
let pattern = self.detect()?;
Self::pattern_to_dispatcher(&pattern)
}
pub fn detect_all_dispatchers(&mut self) -> Vec<Dispatcher> {
let patterns = self.detect_all();
patterns
.iter()
.filter_map(Self::pattern_to_dispatcher)
.collect()
}
fn detect_all(&mut self) -> Vec<CffPattern> {
if self.ssa.block_count() < 4 {
return Vec::new();
}
let candidates = self.find_dispatcher_candidates();
if candidates.is_empty() {
return Vec::new();
}
let _ = self.get_dom_tree();
let dom_tree = self.dom_tree.as_ref().unwrap();
let mut patterns: Vec<CffPattern> = candidates
.into_par_iter()
.filter_map(|block_idx| self.analyze_candidate_with_dom(block_idx, dom_tree))
.collect();
patterns.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
});
patterns
}
fn pattern_to_dispatcher(pattern: &CffPattern) -> Option<Dispatcher> {
match &pattern.dispatcher {
DispatcherInfo::Switch {
block,
switch_var,
cases,
default,
transform,
} => {
let mut dispatcher = Dispatcher::new(*block, *switch_var, cases.clone(), *default);
if let Some(ref state_var) = pattern.state_var {
if let Some(phi_var) = state_var.dispatcher_var {
dispatcher = dispatcher.with_state_phi(phi_var);
}
}
dispatcher = dispatcher
.with_transform(transform.clone())
.with_confidence(pattern.confidence);
if let Some(state) = pattern.primary_entry().and_then(|e| e.initial_state) {
dispatcher = dispatcher.with_initial_state(state);
}
Some(dispatcher)
}
DispatcherInfo::IfElseChain {
head_block,
state_var,
comparisons,
default,
} => {
let cases: Vec<usize> = comparisons.iter().map(|(_, target)| *target).collect();
let default_block = default.unwrap_or(*head_block);
let mut dispatcher = Dispatcher::new(*head_block, *state_var, cases, default_block);
if let Some(ref state_var_info) = pattern.state_var {
if let Some(phi_var) = state_var_info.dispatcher_var {
dispatcher = dispatcher.with_state_phi(phi_var);
}
}
dispatcher = dispatcher.with_confidence(pattern.confidence);
Some(dispatcher)
}
DispatcherInfo::ComputedJump {
block,
target_var,
jump_table,
..
} => {
let default_block = jump_table.first().copied().unwrap_or(0);
let mut dispatcher =
Dispatcher::new(*block, *target_var, jump_table.clone(), default_block);
if let Some(ref state_var_info) = pattern.state_var {
if let Some(phi_var) = state_var_info.dispatcher_var {
dispatcher = dispatcher.with_state_phi(phi_var);
}
}
dispatcher = dispatcher.with_confidence(pattern.confidence);
Some(dispatcher)
}
}
}
pub fn detect(&mut self) -> Option<CffPattern> {
if self.ssa.block_count() < 4 {
return None;
}
let candidates = self.find_dispatcher_candidates();
if candidates.is_empty() {
return None;
}
let _ = self.get_dom_tree();
let dom_tree = self.dom_tree.as_ref().unwrap();
let mut best_pattern: Option<CffPattern> = None;
let mut best_score = 0.0;
for block_idx in candidates {
if let Some(pattern) = self.analyze_candidate_with_dom(block_idx, dom_tree) {
if pattern.confidence > best_score {
best_score = pattern.confidence;
best_pattern = Some(pattern);
}
}
}
best_pattern
}
fn find_dispatcher_candidates(&self) -> Vec<usize> {
(0..self.ssa.block_count())
.into_par_iter()
.filter(|&block_idx| self.is_dispatcher_candidate(block_idx))
.collect()
}
fn is_argument_derived(&self, mut var: SsaVarId) -> bool {
let mut visited: HashSet<SsaVarId> = HashSet::new();
loop {
if !visited.insert(var) {
return false;
}
let Some(variable) = self.ssa.variable(var) else {
return false;
};
if matches!(variable.origin(), VariableOrigin::Argument(_)) {
return true;
}
match self.ssa.get_definition(var) {
Some(SsaOp::Copy { src, .. }) => var = *src,
_ => return false,
}
}
}
fn is_dispatcher_candidate(&self, block_idx: usize) -> bool {
let Some(block) = self.ssa.block(block_idx) else {
return false;
};
if block.instructions().is_empty() {
return false;
}
let has_switch = block
.instructions()
.iter()
.any(|instr| matches!(instr.op(), SsaOp::Switch { .. }));
if has_switch {
let switch_on_argument = block.instructions().iter().any(|instr| {
let SsaOp::Switch { value, .. } = instr.op() else {
return false;
};
self.is_argument_derived(*value)
});
if switch_on_argument {
return false;
}
let pred_count = self.ssa.block_predecessors(block_idx).len();
let has_self_loop = block
.instructions()
.iter()
.any(|i| i.op().successors().contains(&block_idx));
let effective_preds = pred_count + usize::from(has_self_loop);
return effective_preds >= 2;
}
let has_branch = block
.instructions()
.iter()
.any(|instr| matches!(instr.op(), SsaOp::Branch { .. }));
if has_branch {
let pred_count = self.ssa.block_predecessors(block_idx).len();
return pred_count >= 3;
}
false
}
fn analyze_candidate_with_dom(
&self,
block_idx: usize,
dom_tree: &DominatorTree,
) -> Option<CffPattern> {
let dispatcher = analyze_switch_dispatcher(self.ssa, block_idx)?;
let state_var = identify_state_variable(self.ssa, block_idx, dispatcher.dispatch_var());
let mut case_blocks = BitSet::new(self.ssa.block_count());
for target in dispatcher.all_targets() {
case_blocks.insert(target);
}
let exit_blocks = self.find_exit_blocks(block_idx, &case_blocks);
let entry_block = self.find_entry_block(block_idx);
let entry_points = self.find_entry_points(block_idx, &case_blocks, state_var.as_ref());
let confidence = self.compute_confidence(
block_idx,
&dispatcher,
state_var.as_ref(),
&case_blocks,
&exit_blocks,
dom_tree,
);
Some(CffPattern {
dispatcher_block: block_idx,
dispatcher,
state_var,
case_blocks,
entry_block,
entry_points,
exit_blocks,
confidence,
})
}
fn find_exit_blocks(&self, dispatcher_block: usize, case_blocks: &BitSet) -> BitSet {
let mut exits = BitSet::new(self.ssa.block_count());
for case_block in case_blocks.iter() {
for succ in self.ssa.block_successors(case_block) {
if succ != dispatcher_block && !case_blocks.contains(succ) {
exits.insert(succ);
}
}
}
exits
}
fn find_entry_block(&self, dispatcher_block: usize) -> Option<usize> {
let preds = self.ssa.block_predecessors(dispatcher_block);
for pred in preds {
if pred == 0 {
return Some(pred);
}
}
if dispatcher_block == 0 {
return None;
}
None
}
fn find_entry_points(
&self,
dispatcher_block: usize,
case_blocks: &BitSet,
state_var: Option<&StateVariable>,
) -> Vec<EntryPoint> {
let mut entries = Vec::new();
let preds = self.ssa.block_predecessors(dispatcher_block);
let entry_blocks: Vec<usize> = preds
.iter()
.filter(|&&pred| !case_blocks.contains(pred))
.copied()
.collect();
if entry_blocks.is_empty() {
let entry = EntryPoint::new(dispatcher_block);
entries.push(entry);
return entries;
}
for &entry_block in &entry_blocks {
let mut entry = EntryPoint::new(entry_block);
if let Some(initial) =
self.extract_initial_state(entry_block, dispatcher_block, state_var)
{
entry.initial_state = Some(initial);
}
if entry_blocks.len() > 1 {
if let Some(condition) = self.extract_entry_condition(entry_block, &entry_blocks) {
entry.condition = Some(condition);
}
}
if let Some(sv) = state_var {
if let Some(ssa_var) = sv.var.as_ssa_var() {
entry.state_var = Some(ssa_var);
}
}
entries.push(entry);
}
entries
}
fn extract_initial_state(
&self,
block_idx: usize,
dispatcher_block: usize,
state_var: Option<&StateVariable>,
) -> Option<i64> {
let block = self.ssa.block(block_idx)?;
if let Some(sv) = state_var {
if let Some(ssa_var) = sv.var.as_ssa_var() {
for instr in block.instructions() {
match instr.op() {
SsaOp::Const { dest, value } if *dest == ssa_var => {
return value.as_i64();
}
SsaOp::Copy { dest, src } if *dest == ssa_var => {
if let Some(SsaOp::Const { value, .. }) = self.ssa.get_definition(*src)
{
return value.as_i64();
}
}
_ => {}
}
}
}
if let Some(dispatcher_var) = sv.dispatcher_var {
if let Some(disp_block) = self.ssa.block(dispatcher_block) {
for phi in disp_block.phi_nodes() {
if phi.result() == dispatcher_var {
if let Some(op) = phi
.operands()
.iter()
.find(|op| op.predecessor() == block_idx)
{
if let Some(val) = self.trace_to_constant(op.value(), 20) {
return Some(val);
}
}
}
}
}
if let Some(val) =
self.evaluate_entry_to_dispatcher(dispatcher_block, dispatcher_var)
{
return Some(val);
}
}
}
None
}
fn trace_to_constant(&self, var: SsaVarId, max_depth: usize) -> Option<i64> {
let mut stack: Vec<(SsaVarId, usize)> = Vec::new();
let mut visited: HashSet<SsaVarId> = HashSet::new();
stack.push((var, max_depth));
while let Some((v, depth)) = stack.pop() {
if depth == 0 || !visited.insert(v) {
continue;
}
if let Some(def) = self.ssa.get_definition(v) {
match def {
SsaOp::Const { value, .. } => {
if let Some(i) = value.as_i64() {
return Some(i);
}
}
SsaOp::Copy { src, .. } => {
stack.push((*src, depth - 1));
}
_ => {}
}
continue;
}
let Some(variable) = self.ssa.variable(v) else {
continue;
};
let def_site = variable.def_site();
if def_site.instruction.is_some() {
continue; }
let Some(block) = self.ssa.block(def_site.block) else {
continue;
};
for phi in block.phi_nodes() {
if phi.result() == v {
for op in phi.operands().iter().rev() {
stack.push((op.value(), depth - 1));
}
break;
}
}
}
None
}
fn evaluate_entry_to_dispatcher(
&self,
dispatcher_block: usize,
dispatcher_var: SsaVarId,
) -> Option<i64> {
let mut evaluator = SsaEvaluator::new(self.ssa, self.config.pointer_size);
let mut current = 0usize;
let mut visited = BitSet::new(self.ssa.block_count());
for _ in 0..30 {
if visited.contains(current) {
break;
}
visited.insert(current);
evaluator.evaluate_block(current);
if current == dispatcher_block {
return evaluator
.get_concrete(dispatcher_var)
.and_then(|v| v.as_i64());
}
match evaluator.next_block(current) {
ControlFlow::Continue(next) if !visited.contains(next) => {
evaluator.set_predecessor(Some(current));
current = next;
}
_ => {
let Some(block) = self.ssa.block(current) else {
break;
};
let succs = block.successors();
if let Some(&next) = succs.iter().find(|&&s| !visited.contains(s)) {
evaluator.set_predecessor(Some(current));
current = next;
} else {
break;
}
}
}
}
None
}
fn extract_entry_condition(
&self,
entry_block: usize,
_all_entries: &[usize],
) -> Option<EntryCondition> {
let preds = self.ssa.block_predecessors(entry_block);
for pred in preds {
let Some(pred_block) = self.ssa.block(pred) else {
continue;
};
for instr in pred_block.instructions() {
if let SsaOp::Branch {
condition,
true_target,
false_target,
} = instr.op()
{
let is_true_branch = *true_target == entry_block;
let is_false_branch = *false_target == entry_block;
if is_true_branch || is_false_branch {
if let Some(SsaOp::Ceq { left, right, .. }) =
self.ssa.get_definition(*condition)
{
if let Some(SsaOp::Const { value, .. }) =
self.ssa.get_definition(*right)
{
if let Some(val) = value.as_i64() {
return Some(EntryCondition::Compare {
var: *left,
value: val,
is_equal: is_true_branch,
});
}
}
}
return Some(EntryCondition::Boolean {
var: *condition,
when_true: is_true_branch,
});
}
}
}
}
None
}
fn compute_confidence(
&self,
dispatcher_block: usize,
dispatcher: &DispatcherInfo,
state_var: Option<&StateVariable>,
case_blocks: &BitSet,
exit_blocks: &BitSet,
dom_tree: &DominatorTree,
) -> f64 {
let ssa = self.ssa;
let mut score = 0.0;
let case_count = case_blocks.count();
let w = &self.config.confidence_weights;
if case_count >= 3 {
score += w.case_count_base;
}
if case_count >= 5 {
score += w.case_count_bonus;
}
if case_count >= 10 {
score += w.case_count_bonus;
}
if let Some(sv) = state_var {
if sv.dispatcher_var.is_some() {
score += w.state_variable;
}
if sv.def_count() >= case_count.saturating_sub(1) {
score += w.state_update_coverage;
}
}
let pred_count = ssa.block_predecessors(dispatcher_block).len();
if pred_count >= case_count / 2 {
score += w.predecessor_ratio;
}
let dispatcher_node = NodeId::new(dispatcher_block);
let back_edge_count = case_blocks
.iter()
.filter(|&b| {
can_reach_dispatcher(
ssa,
b,
dispatcher_block,
dispatcher_node,
dom_tree,
self.config.max_backedge_depth,
)
})
.count();
#[allow(clippy::cast_precision_loss)]
let back_edge_ratio = back_edge_count as f64 / case_count.max(1) as f64;
score += back_edge_ratio * w.back_edge_ratio;
if !exit_blocks.is_empty() {
score += w.exit_blocks;
}
if matches!(dispatcher, DispatcherInfo::Switch { .. }) {
score += w.switch_bonus;
}
let transform = dispatcher.transform();
if transform.modulo_divisor().is_some() {
score += w.modulo_bonus;
}
let dominated_count = case_blocks
.iter()
.filter(|&case_block| {
case_block == dispatcher_block
|| dom_tree.dominates(dispatcher_node, NodeId::new(case_block))
})
.count();
#[allow(clippy::cast_precision_loss)]
let dominance_ratio = dominated_count as f64 / case_blocks.count().max(1) as f64;
score += dominance_ratio * w.dominance_ratio;
let total_blocks = ssa.block_count();
if total_blocks > 0 {
let dominated_total = (0..total_blocks)
.filter(|&b| {
b == dispatcher_block || dom_tree.dominates(dispatcher_node, NodeId::new(b))
})
.count();
#[allow(clippy::cast_precision_loss)]
let coverage = dominated_total as f64 / total_blocks as f64;
score += coverage * w.method_coverage;
}
score.min(1.0)
}
}
fn can_reach_dispatcher(
ssa: &SsaFunction,
from: usize,
dispatcher_block: usize,
dispatcher_node: NodeId,
dom_tree: &DominatorTree,
max_depth: usize,
) -> bool {
if ssa.block_successors(from).contains(&dispatcher_block) {
return true;
}
let mut queue = VecDeque::new();
let mut visited = BitSet::new(ssa.block_count());
for succ in ssa.block_successors(from) {
if succ != from {
queue.push_back((succ, 1));
}
}
visited.insert(from);
while let Some((block, depth)) = queue.pop_front() {
if block == dispatcher_block {
return true;
}
if depth >= max_depth || !visited.insert(block) {
continue;
}
if !dom_tree.dominates(dispatcher_node, NodeId::new(block)) {
continue;
}
for succ in ssa.block_successors(block) {
if !visited.contains(succ) {
queue.push_back((succ, depth + 1));
}
}
}
false
}
#[cfg(test)]
mod tests {
use crate::{
analysis::SsaVarId,
deobfuscation::passes::unflattening::dispatcher::{DispatcherInfo, StateTransform},
utils::BitSet,
};
use super::{CffPattern, EntryCondition, EntryPoint};
#[test]
fn test_cff_pattern_case_count() {
let pattern = CffPattern {
dispatcher_block: 0,
dispatcher: DispatcherInfo::Switch {
block: 0,
switch_var: SsaVarId::from_index(0),
cases: vec![1, 2, 3, 4, 5],
default: 6,
transform: StateTransform::Modulo(5),
},
state_var: None,
case_blocks: {
let mut cb = BitSet::new(7);
for i in [1, 2, 3, 4, 5, 6] {
cb.insert(i);
}
cb
},
entry_block: None,
entry_points: vec![EntryPoint::with_state(0, 42)],
exit_blocks: BitSet::new(7),
confidence: 0.8,
};
assert_eq!(pattern.case_count(), 5);
assert!(pattern.is_confuserex_style());
assert_eq!(pattern.entry_count(), 1);
assert!(!pattern.has_multiple_entries());
}
#[test]
fn test_entry_point() {
let entry = EntryPoint::new(0);
assert!(entry.is_unconditional());
assert!(!entry.has_known_state());
let entry_with_state = EntryPoint::with_state(1, 100);
assert!(entry_with_state.has_known_state());
assert_eq!(entry_with_state.initial_state, Some(100));
let var = SsaVarId::from_index(0);
let entry_with_condition = EntryPoint::new(2).with_condition(EntryCondition::Boolean {
var,
when_true: true,
});
assert!(!entry_with_condition.is_unconditional());
}
#[test]
fn test_cff_pattern_multiple_entries() {
let pattern = CffPattern {
dispatcher_block: 1,
dispatcher: DispatcherInfo::Switch {
block: 1,
switch_var: SsaVarId::from_index(0),
cases: vec![2, 3],
default: 4,
transform: StateTransform::Identity,
},
state_var: None,
case_blocks: {
let mut cb = BitSet::new(7);
for i in [2, 3, 4] {
cb.insert(i);
}
cb
},
entry_block: None,
entry_points: vec![EntryPoint::with_state(0, 10), EntryPoint::with_state(5, 20)],
exit_blocks: {
let mut eb = BitSet::new(7);
eb.insert(6);
eb
},
confidence: 0.7,
};
assert!(pattern.has_multiple_entries());
assert_eq!(pattern.entry_count(), 2);
assert_eq!(pattern.initial_states(), vec![10, 20]);
let primary = pattern.primary_entry().unwrap();
assert_eq!(primary.block, 0);
assert_eq!(primary.initial_state, Some(10));
}
}