use std::collections::HashSet;
use crate::analysis::{FieldRef, SsaFunction, SsaType, SsaVarId};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum StateVarRef {
Local(u16),
SsaVar(SsaVarId),
Field(FieldRef),
ArrayElement {
array: SsaVarId,
index_pattern: IndexPattern,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum IndexPattern {
Variable(SsaVarId),
Constant(i32),
Computed {
base: SsaVarId,
operation: String,
},
}
impl StateVarRef {
#[must_use]
pub fn local(index: u16) -> Self {
Self::Local(index)
}
#[must_use]
pub fn ssa_var(var: SsaVarId) -> Self {
Self::SsaVar(var)
}
#[must_use]
pub fn field(field: FieldRef) -> Self {
Self::Field(field)
}
#[must_use]
pub fn array_element(array: SsaVarId, index_pattern: IndexPattern) -> Self {
Self::ArrayElement {
array,
index_pattern,
}
}
#[must_use]
pub fn as_local(&self) -> Option<u16> {
match self {
Self::Local(idx) => Some(*idx),
_ => None,
}
}
#[must_use]
pub fn as_ssa_var(&self) -> Option<SsaVarId> {
match self {
Self::SsaVar(var) => Some(*var),
_ => None,
}
}
#[must_use]
pub fn as_array_element(&self) -> Option<(SsaVarId, &IndexPattern)> {
match self {
Self::ArrayElement {
array,
index_pattern,
} => Some((*array, index_pattern)),
_ => None,
}
}
#[must_use]
pub fn is_local(&self) -> bool {
matches!(self, Self::Local(_))
}
#[must_use]
pub fn is_ssa_var(&self) -> bool {
matches!(self, Self::SsaVar(_))
}
#[must_use]
pub fn is_field(&self) -> bool {
matches!(self, Self::Field(_))
}
#[must_use]
pub fn is_array_element(&self) -> bool {
matches!(self, Self::ArrayElement { .. })
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Location {
pub block: usize,
pub instruction: usize,
}
impl Location {
#[must_use]
pub const fn new(block: usize, instruction: usize) -> Self {
Self { block, instruction }
}
}
#[derive(Debug, Clone)]
pub struct StateVariable {
pub var: StateVarRef,
pub dispatcher_var: Option<SsaVarId>,
pub def_sites: Vec<Location>,
pub use_sites: Vec<Location>,
pub confidence: f64,
}
impl StateVariable {
#[must_use]
pub fn new(var: StateVarRef) -> Self {
Self {
var,
dispatcher_var: None,
def_sites: Vec::new(),
use_sites: Vec::new(),
confidence: 0.0,
}
}
#[must_use]
pub fn from_local(local_index: u16) -> Self {
Self::new(StateVarRef::Local(local_index))
}
#[must_use]
pub fn from_ssa_var(var: SsaVarId) -> Self {
Self::new(StateVarRef::SsaVar(var))
}
pub fn with_dispatcher_var(mut self, var: SsaVarId) -> Self {
self.dispatcher_var = Some(var);
self
}
pub fn add_def_site(&mut self, location: Location) {
if !self.def_sites.contains(&location) {
self.def_sites.push(location);
}
}
pub fn add_use_site(&mut self, location: Location) {
if !self.use_sites.contains(&location) {
self.use_sites.push(location);
}
}
#[must_use]
pub fn def_count(&self) -> usize {
self.def_sites.len()
}
#[must_use]
pub fn use_count(&self) -> usize {
self.use_sites.len()
}
#[must_use]
pub fn is_defined_in(&self, block: usize) -> bool {
self.def_sites.iter().any(|loc| loc.block == block)
}
#[must_use]
pub fn is_used_in(&self, block: usize) -> bool {
self.use_sites.iter().any(|loc| loc.block == block)
}
#[must_use]
pub fn def_blocks(&self) -> HashSet<usize> {
self.def_sites.iter().map(|loc| loc.block).collect()
}
#[must_use]
pub fn use_blocks(&self) -> HashSet<usize> {
self.use_sites.iter().map(|loc| loc.block).collect()
}
}
pub fn identify_state_variable(
ssa: &SsaFunction,
dispatcher_block: usize,
switch_var: SsaVarId,
) -> Option<StateVariable> {
let block = ssa.block(dispatcher_block)?;
let phi_var = find_state_phi(ssa, dispatcher_block, switch_var)?;
let mut state_var = StateVariable::from_ssa_var(phi_var);
state_var.dispatcher_var = Some(phi_var);
if let Some(phi_block) = ssa.block(dispatcher_block) {
for phi in phi_block.phi_nodes() {
if phi.result() == phi_var {
for operand in phi.operands() {
state_var.add_def_site(Location::new(operand.predecessor(), 0));
}
}
}
}
state_var.add_use_site(Location::new(
dispatcher_block,
block.instruction_count().saturating_sub(1),
));
state_var.confidence = compute_state_var_confidence(ssa, &state_var);
Some(state_var)
}
fn find_state_phi(
ssa: &SsaFunction,
dispatcher_block: usize,
switch_var: SsaVarId,
) -> Option<SsaVarId> {
ssa.trace_to_phi(switch_var, Some(dispatcher_block))
}
fn compute_state_var_confidence(ssa: &SsaFunction, state_var: &StateVariable) -> f64 {
let mut score: f64 = 0.0;
let def_count = state_var.def_count();
if def_count >= 3 {
score += 0.3;
}
if def_count >= 5 {
score += 0.2;
}
if state_var.dispatcher_var.is_some() {
score += 0.3;
}
if let Some(var_id) = state_var.var.as_ssa_var() {
if let Some(var) = ssa.variable(var_id) {
if matches!(
var.var_type(),
SsaType::I32 | SsaType::U32 | SsaType::I64 | SsaType::U64
) {
score += 0.2;
}
}
}
score.min(1.0)
}
#[cfg(test)]
mod tests {
use crate::{
analysis::SsaVarId,
deobfuscation::passes::unflattening::statevar::{
IndexPattern, Location, StateVarRef, StateVariable,
},
};
#[test]
fn test_state_var_ref_local() {
let var_ref = StateVarRef::local(5);
assert_eq!(var_ref.as_local(), Some(5));
assert!(var_ref.as_ssa_var().is_none());
assert!(var_ref.is_local());
assert!(!var_ref.is_array_element());
}
#[test]
fn test_state_var_ref_ssa() {
let var_id = SsaVarId::new();
let var_ref = StateVarRef::ssa_var(var_id);
assert_eq!(var_ref.as_ssa_var(), Some(var_id));
assert!(var_ref.as_local().is_none());
assert!(var_ref.is_ssa_var());
}
#[test]
fn test_state_var_ref_array_element() {
let array_var = SsaVarId::new();
let index_var = SsaVarId::new();
let var_ref = StateVarRef::array_element(array_var, IndexPattern::Variable(index_var));
assert!(var_ref.is_array_element());
assert!(!var_ref.is_local());
let (arr, pattern) = var_ref.as_array_element().unwrap();
assert_eq!(arr, array_var);
assert!(matches!(pattern, IndexPattern::Variable(v) if *v == index_var));
}
#[test]
fn test_index_pattern_variants() {
let var = SsaVarId::new();
let pattern1 = IndexPattern::Variable(var);
assert!(matches!(pattern1, IndexPattern::Variable(_)));
let pattern2 = IndexPattern::Constant(42);
assert!(matches!(pattern2, IndexPattern::Constant(42)));
let pattern3 = IndexPattern::Computed {
base: var,
operation: "xor 0x1234".to_string(),
};
assert!(matches!(pattern3, IndexPattern::Computed { .. }));
}
#[test]
fn test_state_variable_def_use() {
let mut state_var = StateVariable::from_local(0);
state_var.add_def_site(Location::new(1, 5));
state_var.add_def_site(Location::new(2, 3));
state_var.add_use_site(Location::new(0, 10));
assert_eq!(state_var.def_count(), 2);
assert_eq!(state_var.use_count(), 1);
assert!(state_var.is_defined_in(1));
assert!(state_var.is_defined_in(2));
assert!(!state_var.is_defined_in(0));
assert!(state_var.is_used_in(0));
}
#[test]
fn test_state_variable_blocks() {
let mut state_var = StateVariable::from_local(0);
state_var.add_def_site(Location::new(1, 0));
state_var.add_def_site(Location::new(2, 0));
state_var.add_def_site(Location::new(1, 5));
let def_blocks = state_var.def_blocks();
assert_eq!(def_blocks.len(), 2);
assert!(def_blocks.contains(&1));
assert!(def_blocks.contains(&2));
}
#[test]
fn test_location() {
let loc = Location::new(5, 10);
assert_eq!(loc.block, 5);
assert_eq!(loc.instruction, 10);
}
}