mod algebraic;
mod callgraph;
mod cfg;
mod dataflow;
mod defuse;
mod range;
mod ssa;
mod taint;
#[cfg(feature = "x86")]
mod x86;
pub use crate::utils::graph::NodeId;
pub use algebraic::{simplify_op, SimplifyResult};
pub use callgraph::{
CallGraph, CallGraphNode, CallGraphStats, CallResolver, CallSite, CallTarget, CallType,
ResolverStats,
};
pub use cfg::{
BlockRole, BlockSemantics, CfgEdge, CfgEdgeKind, ControlFlowGraph, InductionUpdateKind,
InductionVar, LoopAnalyzer, LoopExit, LoopForest, LoopInfo, LoopSemantics, LoopType,
SemanticAnalyzer, SsaLoopAnalysis,
};
pub use dataflow::{
AnalysisResults, ConstantPropagation, DataFlowAnalysis, DataFlowCfg, DataFlowSolver, Direction,
JoinSemiLattice, Lattice, LiveVariables, LivenessResult, MeetSemiLattice, ReachingDefinitions,
ReachingDefsResult, ScalarValue, SccpResult,
};
pub use defuse::{DefUseIndex, Location};
pub use range::{IntervalRange, ValueRange};
#[cfg(feature = "z3")]
pub use ssa::Z3Solver;
pub use ssa::{
analyze_alias, AbstractValue, AliasResult, ArrayIndex, BinaryOpInfo, BinaryOpKind, CmpKind,
ComputedOp, ComputedValue, ConstEvaluator, ConstValue, Constraint, ControlFlow, DefSite,
DispatcherPattern, EvaluatorConfig, ExecutionTrace, FieldRef, FnPtrSig, MemoryDefSite,
MemoryLocation, MemoryOp, MemoryPhi, MemoryPhiOperand, MemorySsa, MemorySsaStats, MemoryState,
MemoryVersion, MethodPurity, MethodRef, PathConstraint, PatternDetector, PhiAnalyzer, PhiNode,
PhiOperand, ReturnInfo, SigRef, SimulationResult, SourceBlock, SsaBlock, SsaBlockBuilder,
SsaCfg, SsaConverter, SsaEvaluator, SsaExceptionHandler, SsaFunction, SsaFunctionBuilder,
SsaFunctionContext, SsaInstruction, SsaOp, SsaType, SsaVarId, SsaVariable, StackSimulator,
SymbolicEvaluator, SymbolicExpr, SymbolicOp, TypeClass, TypeContext, TypeRef, UnaryOpInfo,
UnaryOpKind, UseSite, ValueResolver, VariableOrigin,
};
pub use taint::{
cff_taint_config, find_blocks_jumping_to, find_token_dependencies, PhiTaintMode, TaintAnalysis,
TaintConfig, TaintStats, TokenTaintBuilder,
};
#[cfg(feature = "x86")]
pub use x86::{
x86_decode_all, x86_decode_single, x86_decode_traversal, x86_detect_epilogue,
x86_detect_prologue, x86_native_body_size, X86BasicBlock, X86Condition, X86DecodedInstruction,
X86EdgeKind, X86EpilogueInfo, X86Function, X86Instruction, X86Memory, X86Operand,
X86PrologueInfo, X86PrologueKind, X86Register, X86ToSsaTranslator, X86TraversalDecodeResult,
};
#[cfg(test)]
mod tests {
use crate::{
analysis::{CfgEdgeKind, ControlFlowGraph, SsaConverter, SsaFunction, VariableOrigin},
assembly::{decode_blocks, InstructionAssembler},
utils::graph::NodeId,
};
fn build_cfg(assembler: InstructionAssembler) -> ControlFlowGraph<'static> {
let (bytecode, _max_stack, _) = assembler.finish().expect("Failed to assemble bytecode");
let blocks =
decode_blocks(&bytecode, 0, 0x1000, Some(bytecode.len())).expect("Failed to decode");
ControlFlowGraph::from_basic_blocks(blocks).expect("Failed to build CFG")
}
fn build_ssa(cfg: &ControlFlowGraph<'_>, num_args: usize, num_locals: usize) -> SsaFunction {
SsaConverter::build(cfg, num_args, num_locals, None).expect("SSA construction failed")
}
fn assert_ssa_valid(ssa: &SsaFunction, cfg: &ControlFlowGraph<'_>) {
assert_has_arguments(ssa, ssa.num_args());
assert_has_locals(ssa, ssa.num_locals());
assert_single_assignment(ssa);
assert_phi_operands_valid(ssa, cfg);
}
fn assert_has_arguments(ssa: &SsaFunction, expected_args: usize) {
let arg_vars: Vec<_> = ssa.argument_variables().collect();
assert_eq!(
arg_vars.len(),
expected_args,
"Expected {} argument variables (v0), found {}",
expected_args,
arg_vars.len()
);
for (i, var) in arg_vars.iter().enumerate() {
assert_eq!(
var.origin(),
VariableOrigin::Argument(i as u16),
"Argument {} has wrong origin: {:?}",
i,
var.origin()
);
assert_eq!(
var.version(),
0,
"Argument {} should have version 0, got {}",
i,
var.version()
);
}
}
fn assert_has_locals(ssa: &SsaFunction, expected_locals: usize) {
let local_vars: Vec<_> = ssa.local_variables().collect();
assert_eq!(
local_vars.len(),
expected_locals,
"Expected {} local variables (v0), found {}",
expected_locals,
local_vars.len()
);
for (i, var) in local_vars.iter().enumerate() {
assert_eq!(
var.origin(),
VariableOrigin::Local(i as u16),
"Local {} has wrong origin: {:?}",
i,
var.origin()
);
assert_eq!(
var.version(),
0,
"Local {} should have version 0, got {}",
i,
var.version()
);
}
}
fn assert_single_assignment(ssa: &SsaFunction) {
for var in ssa.variables() {
let def_site = var.def_site();
if !def_site.is_phi() {
assert!(
def_site.block < ssa.block_count(),
"Variable {} defined in non-existent block {}",
var.id(),
def_site.block
);
}
}
}
fn assert_phi_operands_valid(ssa: &SsaFunction, cfg: &ControlFlowGraph<'_>) {
for (block_idx, block) in ssa.blocks().iter().enumerate() {
let preds: Vec<_> = cfg.predecessors(NodeId::new(block_idx)).collect();
for phi in block.phi_nodes() {
if preds.is_empty() {
assert!(
phi.operand_count() == 0,
"Phi in block {} has operands but no predecessors",
block_idx
);
continue;
}
for op in phi.operands() {
assert!(
preds.contains(&NodeId::new(op.predecessor())),
"Phi operand references non-predecessor block {} (preds: {:?})",
op.predecessor(),
preds
);
}
}
}
}
#[test]
fn test_sequential_method() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.ldarg_1()
.unwrap()
.add()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.block_count(), 1);
assert!(!cfg.has_loops());
assert_eq!(cfg.entry(), NodeId::new(0));
assert_eq!(cfg.exits().len(), 1);
assert_eq!(cfg.exits()[0], NodeId::new(0));
let ssa = build_ssa(&cfg, 2, 0);
assert_eq!(ssa.block_count(), 1);
assert_eq!(ssa.num_args(), 2);
assert_eq!(ssa.num_locals(), 0);
assert_ssa_valid(&ssa, &cfg);
assert_eq!(ssa.total_phi_count(), 0);
assert!(
ssa.variable_count() >= 2,
"Should have at least 2 variables (args)"
);
}
#[test]
fn test_nop_sequence() {
let mut asm = InstructionAssembler::new();
asm.nop()
.unwrap()
.nop()
.unwrap()
.nop()
.unwrap()
.ldc_i4_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.block_count(), 1);
let block = cfg.block(cfg.entry()).unwrap();
assert_eq!(block.instructions.len(), 5);
let ssa = build_ssa(&cfg, 0, 0);
assert_eq!(ssa.block_count(), 1);
assert_ssa_valid(&ssa, &cfg);
assert_eq!(ssa.total_phi_count(), 0);
}
#[test]
fn test_simple_if_then() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.brfalse_s("else")
.unwrap()
.ldc_i4_1()
.unwrap()
.ret()
.unwrap()
.label("else")
.unwrap()
.ldc_i4_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.block_count(), 3);
assert!(!cfg.has_loops());
assert_eq!(cfg.exits().len(), 2);
let edges: Vec<_> = cfg.outgoing_edges(cfg.entry()).collect();
assert_eq!(edges.len(), 2);
let edge_kinds: Vec<_> = edges.iter().map(|(_, _, e)| e.kind().clone()).collect();
assert!(edge_kinds.contains(&CfgEdgeKind::ConditionalTrue));
assert!(edge_kinds.contains(&CfgEdgeKind::ConditionalFalse));
let ssa = build_ssa(&cfg, 1, 0);
assert_eq!(ssa.block_count(), 3);
assert_ssa_valid(&ssa, &cfg);
assert_eq!(ssa.total_phi_count(), 0); }
#[test]
fn test_if_then_else_merge() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.brfalse_s("else")
.unwrap()
.ldc_i4_1()
.unwrap()
.br_s("merge")
.unwrap()
.label("else")
.unwrap()
.ldc_i4_0()
.unwrap()
.label("merge")
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.block_count(), 4);
assert!(!cfg.has_loops());
assert_eq!(cfg.exits().len(), 1);
let merge_block = cfg.exits()[0];
let preds: Vec<_> = cfg.predecessors(merge_block).collect();
assert_eq!(preds.len(), 2);
let ssa = build_ssa(&cfg, 1, 0);
assert_eq!(ssa.block_count(), 4);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_nested_conditionals() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.brfalse_s("outer_else")
.unwrap()
.ldarg_1()
.unwrap()
.brfalse_s("inner_else")
.unwrap()
.ldc_i4_1()
.unwrap()
.ret()
.unwrap()
.label("inner_else")
.unwrap()
.ldc_i4_2()
.unwrap()
.ret()
.unwrap()
.label("outer_else")
.unwrap()
.ldc_i4_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.block_count(), 5);
assert!(!cfg.has_loops());
assert_eq!(cfg.exits().len(), 3);
let ssa = build_ssa(&cfg, 2, 0);
assert_eq!(ssa.block_count(), 5);
assert_ssa_valid(&ssa, &cfg);
assert_eq!(ssa.total_phi_count(), 0); }
#[test]
fn test_simple_while_loop() {
let mut asm = InstructionAssembler::new();
asm.label("loop_header")
.unwrap()
.ldarg_0()
.unwrap()
.ldc_i4_0()
.unwrap()
.ble_s("loop_exit")
.unwrap()
.ldarg_0()
.unwrap()
.ldc_i4_1()
.unwrap()
.sub()
.unwrap()
.starg_s(0)
.unwrap()
.br_s("loop_header")
.unwrap()
.label("loop_exit")
.unwrap()
.ldarg_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert!(cfg.block_count() >= 2);
assert!(cfg.has_loops());
let loops = cfg.loops();
assert_eq!(loops.len(), 1);
assert_eq!(loops[0].header, NodeId::new(0));
assert!(!loops[0].latches.is_empty());
let ssa = build_ssa(&cfg, 1, 0);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_do_while_loop() {
let mut asm = InstructionAssembler::new();
asm.label("loop_body")
.unwrap()
.ldarg_0()
.unwrap()
.ldc_i4_1()
.unwrap()
.sub()
.unwrap()
.starg_s(0)
.unwrap()
.ldarg_0()
.unwrap()
.ldc_i4_0()
.unwrap()
.bgt_s("loop_body")
.unwrap()
.ldarg_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert!(cfg.has_loops());
assert_eq!(cfg.loops().len(), 1);
assert_eq!(cfg.loops()[0].header, NodeId::new(0));
let ssa = build_ssa(&cfg, 1, 0);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_nested_loops() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.stloc_0()
.unwrap()
.label("outer_header")
.unwrap()
.ldloc_0()
.unwrap()
.ldc_i4_0()
.unwrap()
.ble_s("outer_exit")
.unwrap()
.ldarg_1()
.unwrap()
.stloc_1()
.unwrap()
.label("inner_header")
.unwrap()
.ldloc_1()
.unwrap()
.ldc_i4_0()
.unwrap()
.ble_s("inner_exit")
.unwrap()
.ldloc_1()
.unwrap()
.ldc_i4_1()
.unwrap()
.sub()
.unwrap()
.stloc_1()
.unwrap()
.br_s("inner_header")
.unwrap()
.label("inner_exit")
.unwrap()
.ldloc_0()
.unwrap()
.ldc_i4_1()
.unwrap()
.sub()
.unwrap()
.stloc_0()
.unwrap()
.br_s("outer_header")
.unwrap()
.label("outer_exit")
.unwrap()
.ldc_i4_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert!(cfg.has_loops());
let loops = cfg.loops();
assert_eq!(loops.len(), 2);
let outer_loop = loops.iter().find(|l| l.depth == 0).unwrap();
let inner_loop = loops.iter().find(|l| l.depth == 1).unwrap();
assert!(outer_loop.body.contains(&inner_loop.header));
for &node in &inner_loop.body {
assert_eq!(cfg.innermost_loop(node).unwrap().header, inner_loop.header);
}
let ssa = build_ssa(&cfg, 2, 2);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_dominator_diamond() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.brfalse_s("b_path")
.unwrap()
.nop()
.unwrap()
.br_s("merge")
.unwrap()
.label("b_path")
.unwrap()
.nop()
.unwrap()
.label("merge")
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.block_count(), 4);
let entry = cfg.entry();
for i in 0..cfg.block_count() {
assert!(cfg.dominates(entry, NodeId::new(i)));
}
let merge = cfg
.node_ids()
.find(|&id| cfg.predecessors(id).count() == 2)
.unwrap();
assert_eq!(cfg.idom(merge), Some(entry));
let ssa = build_ssa(&cfg, 1, 0);
assert_eq!(ssa.block_count(), 4);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_dominance_frontiers() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.brfalse_s("else")
.unwrap()
.ldc_i4_1()
.unwrap()
.br_s("merge")
.unwrap()
.label("else")
.unwrap()
.ldc_i4_0()
.unwrap()
.label("merge")
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
let frontiers = cfg.dominance_frontiers();
assert!(!frontiers.is_empty());
assert!(frontiers[cfg.entry().index()].is_empty());
let ssa = build_ssa(&cfg, 1, 0);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_reverse_postorder_respects_edges() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.brfalse_s("b")
.unwrap()
.nop()
.unwrap()
.br_s("merge")
.unwrap()
.label("b")
.unwrap()
.nop()
.unwrap()
.label("merge")
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
let rpo = cfg.reverse_postorder();
assert_eq!(rpo[0], cfg.entry());
let entry_pos = rpo.iter().position(|&n| n == cfg.entry()).unwrap();
for succ in cfg.successors(cfg.entry()) {
let succ_pos = rpo.iter().position(|&n| n == succ).unwrap();
assert!(
entry_pos < succ_pos,
"Entry should come before successors in RPO"
);
}
let ssa = build_ssa(&cfg, 1, 0);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_postorder_respects_edges() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.ldarg_1()
.unwrap()
.add()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
let po = cfg.postorder();
let rpo = cfg.reverse_postorder();
assert_eq!(po.len(), rpo.len());
for (i, node) in po.iter().enumerate() {
assert_eq!(*node, rpo[rpo.len() - 1 - i]);
}
let ssa = build_ssa(&cfg, 2, 0);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_unreachable_after_unconditional_branch() {
let mut asm = InstructionAssembler::new();
asm.br_s("target")
.unwrap()
.nop()
.unwrap() .label("target")
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert!(cfg.block_count() >= 2);
let ssa = build_ssa(&cfg, 0, 0);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_multiple_returns() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.brfalse_s("check2")
.unwrap()
.ldc_i4_1()
.unwrap()
.ret()
.unwrap()
.label("check2")
.unwrap()
.ldarg_1()
.unwrap()
.brfalse_s("default")
.unwrap()
.ldc_i4_2()
.unwrap()
.ret()
.unwrap()
.label("default")
.unwrap()
.ldc_i4_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.exits().len(), 3);
assert!(!cfg.has_loops());
let ssa = build_ssa(&cfg, 2, 0);
assert_ssa_valid(&ssa, &cfg);
assert_eq!(ssa.total_phi_count(), 0); }
#[test]
fn test_local_variable_store_load() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.stloc_0()
.unwrap()
.ldloc_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.block_count(), 1);
assert!(!cfg.has_loops());
let ssa = build_ssa(&cfg, 1, 1);
assert_ssa_valid(&ssa, &cfg);
assert_eq!(ssa.total_phi_count(), 0);
}
#[test]
fn test_conditional_local_assignment() {
let mut asm = InstructionAssembler::new();
asm.ldc_i4_0()
.unwrap()
.stloc_0()
.unwrap()
.ldarg_0()
.unwrap()
.brfalse_s("skip")
.unwrap()
.ldc_i4_1()
.unwrap()
.stloc_0()
.unwrap()
.label("skip")
.unwrap()
.ldloc_0()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
assert_eq!(cfg.block_count(), 3);
assert!(!cfg.has_loops());
let ssa = build_ssa(&cfg, 1, 1);
assert_ssa_valid(&ssa, &cfg);
}
#[test]
fn test_dup_instruction() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.dup()
.unwrap()
.add()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
let ssa = build_ssa(&cfg, 1, 0);
assert_ssa_valid(&ssa, &cfg);
assert!(
ssa.variable_count() >= 1,
"Should have at least the argument variable"
);
}
#[test]
fn test_comparison_instruction() {
let mut asm = InstructionAssembler::new();
asm.ldarg_0()
.unwrap()
.ldarg_1()
.unwrap()
.clt()
.unwrap()
.ret()
.unwrap();
let cfg = build_cfg(asm);
let ssa = build_ssa(&cfg, 2, 0);
assert_ssa_valid(&ssa, &cfg);
assert!(
ssa.variable_count() >= 2,
"Should have at least the argument variables"
);
}
}