use std::collections::{hash_map::Entry, HashMap, HashSet, VecDeque};
use dashmap::DashSet;
use crate::{
analysis::{
ConstValue, PhiTaintMode, SsaFunction, SsaOp, SsaVarId, TaintAnalysis, TaintConfig,
},
assembly::Operand,
compiler::CompilerContext,
deobfuscation::{
CfgInfo, StateMachineCallSite, StateMachineProvider, StateMachineSemantics,
StateSlotOperation, StateUpdateCall,
},
metadata::{
method::MethodModifiers, signatures::TypeSignature, tables::TableId, token::Token,
typesystem::CilType,
},
prelude::FlowType,
utils::graph::NodeId,
CilObject,
};
pub struct DetectedCallSite {
pub caller: Token,
pub uses_statemachine: bool,
}
impl DetectedCallSite {
fn direct(caller: Token) -> Self {
Self {
caller,
uses_statemachine: false,
}
}
fn statemachine(caller: Token) -> Self {
Self {
caller,
uses_statemachine: true,
}
}
}
#[derive(Debug)]
pub struct ConfuserExStateMachine {
semantics: StateMachineSemantics,
methods: DashSet<Token>,
}
impl ConfuserExStateMachine {
pub fn new(semantics: StateMachineSemantics, methods: impl IntoIterator<Item = Token>) -> Self {
let method_set = DashSet::new();
for method in methods {
method_set.insert(method);
}
Self {
semantics,
methods: method_set,
}
}
}
impl StateMachineProvider for ConfuserExStateMachine {
fn name(&self) -> &'static str {
"ConfuserEx CFGCtx"
}
fn semantics(&self) -> &StateMachineSemantics {
&self.semantics
}
fn applies_to_method(&self, method: Token) -> bool {
self.methods.contains(&method)
}
fn methods(&self) -> Vec<Token> {
self.methods.iter().map(|r| *r).collect()
}
fn find_initializations(
&self,
ssa: &SsaFunction,
ctx: &CompilerContext,
method_token: Token,
_assembly: &CilObject,
) -> Vec<(usize, usize, u32)> {
let mut seeds = Vec::new();
let Some(init_method_token) = self.semantics.init_method else {
return seeds;
};
for (block_idx, block) in ssa.iter_blocks() {
for (instr_idx, instr) in block.instructions().iter().enumerate() {
match instr.op() {
SsaOp::Call { method, args, .. }
if method.token() == init_method_token && args.len() >= 2 =>
{
let seed_var = args[1];
if let Some(ConstValue::I32(seed)) =
self.trace_to_constant(seed_var, ssa, ctx, method_token)
{
#[allow(clippy::cast_sign_loss)]
seeds.push((block_idx, instr_idx, seed as u32));
}
}
SsaOp::NewObj { ctor, args, .. }
if ctor.token() == init_method_token && args.len() == 1 =>
{
if let Some(ConstValue::I32(seed)) =
self.trace_to_constant(args[0], ssa, ctx, method_token)
{
#[allow(clippy::cast_sign_loss)]
seeds.push((block_idx, instr_idx, seed as u32));
}
}
_ => {}
}
}
}
seeds
}
fn find_state_updates(&self, ssa: &SsaFunction) -> Vec<StateUpdateCall> {
let mut updates = Vec::new();
let Some(update_method_token) = self.semantics.update_method else {
return updates;
};
for (block_idx, block) in ssa.iter_blocks() {
for (instr_idx, instr) in block.instructions().iter().enumerate() {
if let SsaOp::Call { method, args, dest } | SsaOp::CallVirt { method, args, dest } =
instr.op()
{
if method.token() == update_method_token {
if args.len() >= 3 {
if let Some(dest) = dest {
updates.push(StateUpdateCall {
block_idx,
instr_idx,
dest: *dest,
flag_var: args[1],
increment_var: args[2],
});
}
}
}
}
}
}
updates
}
fn find_decryptor_call_sites(
&self,
ssa: &SsaFunction,
state_updates: &[StateUpdateCall],
decryptor_tokens: &HashSet<Token>,
assembly: &CilObject,
) -> Vec<StateMachineCallSite> {
let mut call_sites = Vec::new();
let mut next_info_map: HashMap<SsaVarId, usize> = HashMap::new();
for (idx, update) in state_updates.iter().enumerate() {
next_info_map.insert(update.dest, idx);
}
for (block_idx, block) in ssa.iter_blocks() {
for (instr_idx, instr) in block.instructions().iter().enumerate() {
let (call_target, args, dest) = match instr.op() {
SsaOp::Call { method, args, dest } | SsaOp::CallVirt { method, args, dest } => {
(method.token(), args, *dest)
}
_ => continue,
};
let Some(dest) = dest else { continue };
let resolved_target =
resolve_method_spec_to_def(assembly, call_target).unwrap_or(call_target);
if !decryptor_tokens.contains(&resolved_target) {
continue;
}
if args.len() != 1 {
continue;
}
let arg_def = ssa.get_definition(args[0]);
let xor_def = match arg_def {
Some(SsaOp::Xor { .. }) => arg_def,
Some(SsaOp::Conv { operand, .. }) => ssa.get_definition(*operand),
_ => None,
};
let Some(SsaOp::Xor { left, right, .. }) = xor_def else {
continue;
};
let (state_var, encoded_var, feeding_idx) =
if let Some(&idx) = next_info_map.get(left) {
(*left, *right, idx)
} else if let Some(&idx) = next_info_map.get(right) {
(*right, *left, idx)
} else {
continue;
};
call_sites.push(StateMachineCallSite {
block_idx,
instr_idx,
dest,
decryptor: resolved_target,
call_target,
state_var,
encoded_var,
feeding_update_idx: feeding_idx,
});
}
}
call_sites
}
fn collect_updates_for_call(
&self,
call_site: &StateMachineCallSite,
all_updates: &[StateUpdateCall],
cfg_info: &CfgInfo<'_>,
seed_block: Option<usize>,
) -> Vec<usize> {
let feeding_update = &all_updates[call_site.feeding_update_idx];
let target_block = feeding_update.block_idx;
if target_block >= cfg_info.node_count {
return Vec::new();
}
let mut updates_by_block: HashMap<usize, Vec<usize>> = HashMap::new();
for (idx, update) in all_updates.iter().enumerate() {
updates_by_block
.entry(update.block_idx)
.or_default()
.push(idx);
}
for indices in updates_by_block.values_mut() {
indices.sort_by_key(|&idx| all_updates[idx].instr_idx);
}
let path = find_path_to_block(cfg_info, target_block);
let block_position: HashMap<usize, usize> = path
.iter()
.enumerate()
.map(|(pos, &block)| (block, pos))
.collect();
let seed_path_pos = seed_block.and_then(|sb| block_position.get(&sb).copied());
let mut relevant_updates: Vec<usize> = Vec::new();
for (&block_idx, update_indices) in &updates_by_block {
let Some(&pos) = block_position.get(&block_idx) else {
continue;
};
if let Some(seed_pos) = seed_path_pos {
if pos < seed_pos {
continue;
}
}
if block_idx == target_block {
for &idx in update_indices {
if all_updates[idx].instr_idx < feeding_update.instr_idx {
relevant_updates.push(idx);
}
}
} else {
relevant_updates.extend(update_indices.iter().copied());
}
}
relevant_updates.sort_by_key(|&idx| {
let update = &all_updates[idx];
let pos = block_position
.get(&update.block_idx)
.copied()
.unwrap_or(usize::MAX);
(pos, update.instr_idx)
});
relevant_updates
}
}
fn find_path_to_block(cfg_info: &CfgInfo<'_>, target: usize) -> Vec<usize> {
let entry = cfg_info.entry.index();
if target == entry {
return vec![entry];
}
let mut parent: HashMap<usize, usize> = HashMap::new();
parent.insert(target, usize::MAX);
let mut queue = VecDeque::new();
queue.push_back(target);
let mut found = false;
while let Some(block) = queue.pop_front() {
if block == entry {
found = true;
break;
}
if block >= cfg_info.predecessors.len() {
continue;
}
for &pred in &cfg_info.predecessors[block] {
if let Entry::Vacant(e) = parent.entry(pred) {
e.insert(block);
queue.push_back(pred);
}
}
}
if !found {
let mut path = vec![target];
let mut current = target;
let mut visited = HashSet::new();
visited.insert(target);
while current != entry {
if current >= cfg_info.node_count {
break;
}
match cfg_info.dom_tree.immediate_dominator(NodeId::new(current)) {
Some(idom) if visited.insert(idom.index()) => {
path.push(idom.index());
current = idom.index();
}
_ => break,
}
}
path.reverse();
return path;
}
let mut path = Vec::new();
let mut current = entry;
loop {
path.push(current);
if current == target {
break;
}
match parent.get(¤t) {
Some(&child) if child != usize::MAX => current = child,
_ => break,
}
}
path
}
pub fn detect_cfgctx_semantics(assembly: &CilObject) -> Option<StateMachineSemantics> {
if let Some(module_type) = assembly.types().module_type() {
for (_, nested_ref) in module_type.nested_types.iter() {
let nested_type = nested_ref.upgrade()?;
if let Some(semantics) = try_detect_cfgctx_from_type(assembly, &nested_type) {
return Some(semantics);
}
}
}
for type_entry in assembly.types().iter() {
let cil_type = type_entry.value();
if cil_type.name.is_ascii() && !cil_type.name.is_empty() {
continue;
}
if let Some(semantics) = try_detect_cfgctx_from_type(assembly, cil_type) {
return Some(semantics);
}
}
None
}
fn try_detect_cfgctx_from_type(
assembly: &CilObject,
cil_type: &CilType,
) -> Option<StateMachineSemantics> {
if !cil_type.is_value_type() {
return None;
}
if cil_type.fields.count() != 4 {
return None;
}
let mut ctor_token: Option<Token> = None;
let mut next_token: Option<Token> = None;
let mut multiplier: Option<u32> = None;
for method in &cil_type.query_methods() {
if method.is_ctor() && ctor_token.is_none() {
if let Ok(ssa) = method.ssa(assembly) {
if let Some(mult) = extract_multiplier_from_ssa(&ssa) {
multiplier = Some(mult);
ctor_token = Some(method.token);
}
}
} else if next_token.is_none() {
let sig = &method.signature;
if sig.params.len() == 2 && method.name != ".ctor" {
if let Ok(ssa) = method.ssa(assembly) {
let mut has_switch = false;
let mut has_stfld = false;
let mut has_ldfld = false;
for (_, block) in ssa.iter_blocks() {
for instr in block.instructions() {
match instr.op() {
SsaOp::Switch { .. } => has_switch = true,
SsaOp::StoreField { .. } => has_stfld = true,
SsaOp::LoadField { .. } => has_ldfld = true,
_ => {}
}
}
}
if has_switch && has_stfld && has_ldfld {
next_token = Some(method.token);
}
}
}
}
}
let (Some(init_method), Some(update_method), Some(mult)) = (ctor_token, next_token, multiplier)
else {
return None;
};
let field_tokens: Vec<Token> = (0..cil_type.fields.count())
.filter_map(|i| cil_type.fields.get(i))
.map(|f| f.token)
.collect();
let slot_ops = extract_slot_operations(assembly, update_method, &field_tokens)?;
Some(StateMachineSemantics {
type_token: Some(cil_type.token),
init_method: Some(init_method),
update_method: Some(update_method),
slot_count: 4,
slot_ops,
init_ops: vec![
StateSlotOperation::mul(),
StateSlotOperation::mul(),
StateSlotOperation::mul(),
StateSlotOperation::mul(),
],
init_constant: Some(u64::from(mult)),
explicit_flag_bit: 7,
update_slot_mask: 0x03,
get_slot_mask: 0x03,
get_slot_shift: 2,
})
}
fn extract_multiplier_from_ssa(ssa: &SsaFunction) -> Option<u32> {
for (_, block) in ssa.iter_blocks() {
for instr in block.instructions() {
let (left, right) = match instr.op() {
SsaOp::Mul { left, right, .. } | SsaOp::MulOvf { left, right, .. } => {
(*left, *right)
}
_ => continue,
};
for operand in [left, right] {
if let Some(mult) = get_i32_constant(ssa, operand) {
if mult != 0 && mult.abs() > 0x1000 {
#[allow(clippy::cast_sign_loss)]
return Some(mult as u32);
}
}
}
}
}
None
}
fn extract_slot_operations(
assembly: &CilObject,
next_method: Token,
field_tokens: &[Token],
) -> Option<Vec<StateSlotOperation>> {
let method = assembly.method(&next_method)?;
let ssa = method.ssa(assembly).ok()?;
let mut ops_found: Vec<(usize, StateSlotOperation)> = Vec::new();
for (_, block) in ssa.iter_blocks() {
for instr in block.instructions() {
let (field_token, value_var) = match instr.op() {
SsaOp::StoreField { field, value, .. } => (field.token(), *value),
_ => continue,
};
let Some(slot_idx) = field_tokens.iter().position(|t| *t == field_token) else {
continue;
};
if let Some(slot_op) = trace_to_arithmetic_op(&ssa, value_var) {
ops_found.push((slot_idx, slot_op));
}
}
}
ops_found.sort_by_key(|(idx, _)| *idx);
ops_found.dedup_by_key(|(idx, _)| *idx);
if ops_found.len() == 4 && ops_found.iter().enumerate().all(|(i, (idx, _))| *idx == i) {
return Some(ops_found.into_iter().map(|(_, op)| op).collect());
}
None
}
fn trace_to_arithmetic_op(ssa: &SsaFunction, start_var: SsaVarId) -> Option<StateSlotOperation> {
let mut worklist = vec![start_var];
let mut visited: HashSet<SsaVarId> = HashSet::new();
while let Some(var) = worklist.pop() {
if !visited.insert(var) {
continue;
}
let Some(def) = ssa.get_definition(var) else {
continue;
};
match def {
SsaOp::Xor { .. } => return Some(StateSlotOperation::xor()),
SsaOp::Add { .. } | SsaOp::AddOvf { .. } => return Some(StateSlotOperation::add()),
SsaOp::Sub { .. } | SsaOp::SubOvf { .. } => return Some(StateSlotOperation::sub()),
SsaOp::Mul { .. } | SsaOp::MulOvf { .. } => return Some(StateSlotOperation::mul()),
SsaOp::And { .. } => return Some(StateSlotOperation::and()),
SsaOp::Or { .. } => return Some(StateSlotOperation::or()),
SsaOp::Conv { operand, .. } => {
worklist.push(*operand);
}
_ => {
for (_, block) in ssa.iter_blocks() {
for phi in block.phi_nodes() {
if phi.result() == var {
if let Some(operand) = phi.operands().first() {
worklist.push(operand.value());
}
}
}
}
}
}
}
None
}
pub fn find_constants_initializer(assembly: &CilObject) -> Option<Token> {
let module_type = assembly.types().module_type()?;
let cctor_token = assembly.types().module_cctor()?;
let cctor = assembly.method(&cctor_token)?;
let mut init_candidates: Vec<Token> = Vec::new();
for instr in cctor.instructions() {
if instr.flow_type == FlowType::Call {
if let Operand::Token(call_target) = &instr.operand {
if call_target.is_table(TableId::MethodDef) {
init_candidates.push(*call_target);
}
}
}
}
for candidate in init_candidates {
let Some(method) = assembly.method(&candidate) else {
continue;
};
if !method.is_static() {
continue;
}
let sig = &method.signature;
if sig.return_type.base != TypeSignature::Void || !sig.params.is_empty() {
continue;
}
let is_in_module = method
.declaring_type_rc()
.is_some_and(|t| t.is_module_type());
if !is_in_module {
continue;
}
let mut has_array_ops = false;
let mut has_field_store = false;
for instr in method.instructions() {
match instr.mnemonic {
"newarr" | "newobj" => has_array_ops = true,
"stsfld" => has_field_store = true,
_ => {}
}
}
if has_array_ops && has_field_store {
return Some(candidate);
}
}
for method in &module_type.query_methods() {
if method.is_cctor() {
continue;
}
if !method.flags_modifiers.contains(MethodModifiers::STATIC) {
continue;
}
let sig = &method.signature;
if sig.return_type.base != TypeSignature::Void || !sig.params.is_empty() {
continue;
}
for instr in method.instructions() {
if instr.flow_type == FlowType::Call {
if let Operand::Token(call_target) = &instr.operand {
if let Some(callee) = assembly.method(call_target) {
if callee.name.contains("Decompress") || callee.name.contains("LZMA") {
return Some(method.token);
}
}
if let Some(memberref) = assembly.member_ref(call_target) {
if memberref.name.contains("Decompress") || memberref.name.contains("LZMA")
{
return Some(method.token);
}
}
}
}
}
}
None
}
pub fn find_call_sites(assembly: &CilObject, decryptor_tokens: &[Token]) -> Vec<DetectedCallSite> {
let decryptor_set: HashSet<_> = decryptor_tokens.iter().copied().collect();
let call_sites: boxcar::Vec<DetectedCallSite> = boxcar::Vec::new();
for method_entry in assembly.methods() {
let method = method_entry.value();
if decryptor_set.contains(&method.token) {
continue;
}
let Ok(ssa) = method.ssa(assembly) else {
continue;
};
for (_, block) in ssa.iter_blocks() {
for instr in block.instructions() {
let (call_target, args) = match instr.op() {
SsaOp::Call {
method: m, args, ..
}
| SsaOp::CallVirt {
method: m, args, ..
} => (m.token(), args),
_ => continue,
};
let resolved_target =
resolve_method_spec_to_def(assembly, call_target).unwrap_or(call_target);
if !decryptor_set.contains(&resolved_target) {
continue;
}
if args.is_empty() {
continue;
}
let arg_var = args[0];
match analyze_argument_dataflow(&ssa, arg_var) {
ArgumentAnalysis::DirectConstant(_) => {
call_sites.push(DetectedCallSite::direct(method.token));
}
ArgumentAnalysis::XorWithConstant(_) => {
call_sites.push(DetectedCallSite::statemachine(method.token));
}
ArgumentAnalysis::FlowsThroughCall { constant } => {
if constant.is_some() {
call_sites.push(DetectedCallSite::statemachine(method.token));
}
}
ArgumentAnalysis::Unknown => {
call_sites.push(DetectedCallSite::direct(method.token));
}
}
}
}
}
call_sites.into_iter().collect()
}
fn resolve_method_spec_to_def(assembly: &CilObject, token: Token) -> Option<Token> {
if token.table() != 0x2B {
return None; }
let method_spec = assembly.method_spec(&token)?;
let method_token = method_spec.method.token()?;
if method_token.is_table(TableId::MethodDef) {
Some(method_token)
} else {
None
}
}
enum ArgumentAnalysis {
DirectConstant(i32),
XorWithConstant(i32),
FlowsThroughCall { constant: Option<i32> },
Unknown,
}
fn analyze_argument_dataflow(ssa: &SsaFunction, arg_var: SsaVarId) -> ArgumentAnalysis {
if let Some(key) = get_i32_constant(ssa, arg_var) {
return ArgumentAnalysis::DirectConstant(key);
}
let config = TaintConfig {
forward: false,
backward: true,
phi_mode: PhiTaintMode::TaintAllOperands,
max_iterations: 50,
};
let mut taint = TaintAnalysis::new(config);
taint.add_tainted_var(arg_var);
taint.propagate(ssa);
let mut has_xor_with_const = None;
let mut has_call = false;
let mut call_const = None;
for var in taint.tainted_variables() {
let Some(def) = ssa.get_definition(*var) else {
continue;
};
match def {
SsaOp::Xor { left, right, .. } => {
if let Some(c) = get_i32_constant(ssa, *left) {
has_xor_with_const = Some(c);
} else if let Some(c) = get_i32_constant(ssa, *right) {
has_xor_with_const = Some(c);
}
}
SsaOp::Call { .. } | SsaOp::CallVirt { .. } => {
has_call = true;
if has_xor_with_const.is_some() {
call_const = has_xor_with_const;
}
}
_ => {}
}
}
if has_call {
ArgumentAnalysis::FlowsThroughCall {
constant: call_const.or(has_xor_with_const),
}
} else if let Some(constant) = has_xor_with_const {
ArgumentAnalysis::XorWithConstant(constant)
} else {
ArgumentAnalysis::Unknown
}
}
fn get_i32_constant(ssa: &SsaFunction, var: SsaVarId) -> Option<i32> {
let value = ssa
.get_var_constant(var)
.or_else(|| match ssa.get_definition(var) {
Some(SsaOp::Const { value, .. }) => Some(value),
_ => None,
})?;
#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
value.as_i32().or_else(|| value.as_u64().map(|v| v as i32))
}
#[cfg(test)]
mod tests {
use crate::{
deobfuscation::techniques::confuserex::statemachine::{
detect_cfgctx_semantics, find_constants_initializer,
},
test::helpers::load_sample,
};
#[test]
fn test_detect_cfgctx_on_cfg_sample() {
let assembly =
load_sample("tests/samples/packers/confuserex/1.6.0/mkaring_constants_cfg.exe");
let semantics = detect_cfgctx_semantics(&assembly);
assert!(
semantics.is_some(),
"CFG mode sample should have detectable CFGCtx semantics"
);
let semantics = semantics.unwrap();
assert_eq!(semantics.slot_count, 4, "CFGCtx should have 4 slots");
assert!(
semantics.init_method.is_some(),
"Should detect init method (constructor)"
);
assert!(
semantics.update_method.is_some(),
"Should detect update method (Next)"
);
assert!(semantics.type_token.is_some(), "Should detect type token");
}
#[test]
fn test_detect_cfgctx_on_original() {
let assembly = load_sample("tests/samples/packers/confuserex/1.6.0/original.exe");
let semantics = detect_cfgctx_semantics(&assembly);
assert!(
semantics.is_none(),
"Original sample should not have CFGCtx semantics"
);
}
#[test]
fn test_find_constants_initializer_on_constants_sample() {
let assembly = load_sample("tests/samples/packers/confuserex/1.6.0/mkaring_constants.exe");
let init = find_constants_initializer(&assembly);
assert!(
init.is_some(),
"Constants sample should have an initializer method"
);
}
#[test]
fn test_find_constants_initializer_on_original() {
let assembly = load_sample("tests/samples/packers/confuserex/1.6.0/original.exe");
let init = find_constants_initializer(&assembly);
assert!(
init.is_none(),
"Original sample should not have a constants initializer"
);
}
}