#![allow(
clippy::collapsible_if,
clippy::let_and_return,
clippy::new_without_default,
clippy::question_mark,
clippy::too_many_arguments
)]
use std::cell::{Cell, RefCell};
use std::collections::{HashMap, HashSet, VecDeque};
use std::fmt;
use petgraph::graph::NodeIndex;
use smallvec::SmallVec;
use crate::callgraph::callee_leaf_name;
use crate::cfg::Cfg;
use crate::labels::{Cap, DataLabel};
use crate::ssa::ir::{BlockId, SsaOp, SsaValue, Terminator};
use crate::symbol::Lang;
use crate::taint::ssa_transfer::CalleeSsaBody;
use super::heap::{HeapKey, SymbolicHeap};
use super::state::{PathConstraint, SymbolicState};
use super::transfer::{self, SymexHeapCtx, SymexSummaryCtx};
use super::value::{SymbolicValue, mk_phi};
pub(crate) const DEFAULT_MAX_DEPTH: usize = 3;
const MAX_CALLEE_BLOCKS: usize = 200;
const MAX_CROSS_FILE_DEPTH: usize = 1;
const MAX_CALLEE_STEPS: usize = 200;
const DEFAULT_MAX_BLOCKS: usize = 500;
const DEFAULT_MAX_FRAMES: usize = 15;
const DEFAULT_MAX_INSTRUCTIONS: usize = 2000;
const DEFAULT_MAX_SYMBOLIC_FORKS: usize = 8;
const DEFAULT_MAX_SOLVER_CHECKS: usize = 20;
const DEFAULT_MAX_RETAINED_PATH_STATES: usize = 16;
const MAX_FORKS_PER_CALLEE: usize = 2;
pub(crate) const DEFAULT_MAX_REENTRY_PER_FUNC: usize = 2;
pub(crate) const DEFAULT_MAX_SCC_REENTRY: usize = 3;
const MAX_CACHE_ENTRIES: usize = 64;
pub fn interproc_enabled() -> bool {
crate::utils::analysis_options::current()
.symex
.interprocedural
}
#[derive(Clone, Debug)]
pub enum CutoffReason {
DepthExceeded {
max_depth: usize,
},
BudgetBlocks {
executed: usize,
max: usize,
},
BudgetFrames {
created: usize,
max: usize,
},
BudgetInstructions {
executed: usize,
max: usize,
},
BudgetForks {
used: usize,
max: usize,
},
BudgetSolverChecks {
used: usize,
max: usize,
},
BudgetPathStates {
retained: usize,
max: usize,
},
RecursionLimit {
function: String,
re_entries: usize,
max: usize,
},
SccRecursionLimit {
scc_functions: Vec<String>,
total_entries: usize,
max: usize,
},
CalleeBodyTooLarge {
callee: String,
blocks: usize,
max: usize,
},
StepBudgetPerFrame {
callee: String,
steps: usize,
max: usize,
},
}
impl fmt::Display for CutoffReason {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CutoffReason::DepthExceeded { max_depth } => {
write!(f, "call depth exceeded (max {})", max_depth)
}
CutoffReason::BudgetBlocks { executed, max } => {
write!(f, "block budget exhausted ({}/{})", executed, max)
}
CutoffReason::BudgetFrames { created, max } => {
write!(f, "frame budget exhausted ({}/{})", created, max)
}
CutoffReason::BudgetInstructions { executed, max } => {
write!(f, "instruction budget exhausted ({}/{})", executed, max)
}
CutoffReason::BudgetForks { used, max } => {
write!(f, "fork budget exhausted ({}/{})", used, max)
}
CutoffReason::BudgetSolverChecks { used, max } => {
write!(f, "solver check budget exhausted ({}/{})", used, max)
}
CutoffReason::BudgetPathStates { retained, max } => {
write!(f, "path state budget exhausted ({}/{})", retained, max)
}
CutoffReason::RecursionLimit {
function,
re_entries,
max,
} => {
write!(
f,
"recursion limit for '{}' ({}/{})",
function, re_entries, max
)
}
CutoffReason::SccRecursionLimit {
scc_functions,
total_entries,
max,
} => {
write!(
f,
"SCC recursion limit for [{}] ({}/{})",
scc_functions.join(", "),
total_entries,
max,
)
}
CutoffReason::CalleeBodyTooLarge {
callee,
blocks,
max,
} => {
write!(
f,
"callee '{}' too large ({} blocks, max {})",
callee, blocks, max
)
}
CutoffReason::StepBudgetPerFrame { callee, steps, max } => {
write!(
f,
"per-frame step budget for '{}' ({}/{})",
callee, steps, max
)
}
}
}
}
pub struct InterprocCtx<'a> {
pub callee_bodies: &'a HashMap<crate::symbol::FuncKey, CalleeSsaBody>,
pub cfg: &'a Cfg,
pub lang: Lang,
pub max_depth: usize,
pub budget: &'a Cell<InterprocBudget>,
pub cache: &'a RefCell<InterprocCache>,
pub reentry_counts: &'a RefCell<HashMap<String, usize>>,
pub max_reentry_per_func: usize,
pub scc_membership: Option<&'a HashMap<String, usize>>,
pub max_scc_reentry: usize,
pub stats: &'a Cell<InterprocStats>,
pub cross_file_bodies: Option<&'a crate::summary::GlobalSummaries>,
pub cross_file_depth: usize,
pub caller_namespace: &'a str,
}
#[derive(Clone, Copy, Debug)]
pub struct InterprocBudget {
pub blocks_executed: usize,
pub max_blocks: usize,
pub frames_created: usize,
pub max_frames: usize,
pub instructions_executed: usize,
pub max_instructions: usize,
pub symbolic_forks: usize,
pub max_symbolic_forks: usize,
pub solver_checks: usize,
pub max_solver_checks: usize,
pub retained_path_states: usize,
pub max_retained_path_states: usize,
}
impl InterprocBudget {
pub fn new() -> Self {
InterprocBudget {
blocks_executed: 0,
max_blocks: DEFAULT_MAX_BLOCKS,
frames_created: 0,
max_frames: DEFAULT_MAX_FRAMES,
instructions_executed: 0,
max_instructions: DEFAULT_MAX_INSTRUCTIONS,
symbolic_forks: 0,
max_symbolic_forks: DEFAULT_MAX_SYMBOLIC_FORKS,
solver_checks: 0,
max_solver_checks: DEFAULT_MAX_SOLVER_CHECKS,
retained_path_states: 0,
max_retained_path_states: DEFAULT_MAX_RETAINED_PATH_STATES,
}
}
pub fn exhausted(&self) -> bool {
self.blocks_executed >= self.max_blocks
|| self.frames_created >= self.max_frames
|| self.instructions_executed >= self.max_instructions
}
fn exhaustion_reason(&self) -> Option<CutoffReason> {
if self.blocks_executed >= self.max_blocks {
Some(CutoffReason::BudgetBlocks {
executed: self.blocks_executed,
max: self.max_blocks,
})
} else if self.frames_created >= self.max_frames {
Some(CutoffReason::BudgetFrames {
created: self.frames_created,
max: self.max_frames,
})
} else if self.instructions_executed >= self.max_instructions {
Some(CutoffReason::BudgetInstructions {
executed: self.instructions_executed,
max: self.max_instructions,
})
} else {
None
}
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct InterprocStats {
pub cache_hits: usize,
pub cache_misses: usize,
pub total_frames: usize,
pub total_blocks: usize,
pub cutoffs: usize,
pub forks: usize,
}
#[derive(Clone, Debug)]
pub struct CallOutcome {
pub exit_states: Vec<CalleeExitState>,
pub internal_findings: Vec<InternalSinkFinding>,
pub cutoff_reasons: Vec<CutoffReason>,
}
impl CallOutcome {
fn cutoff(reason: CutoffReason, any_arg_tainted: bool) -> Self {
CallOutcome {
exit_states: if any_arg_tainted {
vec![CalleeExitState {
return_value: SymbolicValue::Unknown,
return_tainted: true,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
}]
} else {
Vec::new()
},
internal_findings: Vec::new(),
cutoff_reasons: vec![reason],
}
}
}
#[derive(Clone, Debug)]
pub struct CalleeExitState {
pub return_value: SymbolicValue,
pub return_tainted: bool,
pub heap_delta: Vec<HeapMutation>,
pub taint_delta: HashSet<SsaValue>,
pub path_constraints: Vec<PathConstraint>,
}
#[derive(Clone, Debug)]
pub struct HeapMutation {
pub key: HeapKey,
pub value: SymbolicValue,
pub tainted: bool,
}
#[derive(Clone, Debug)]
pub struct InternalSinkFinding {
pub sink_node: NodeIndex,
pub sink_cap: Cap,
pub tainted_value: SymbolicValue,
pub call_chain: Vec<String>,
pub constraints: Vec<PathConstraint>,
}
#[derive(Clone, Debug, Default)]
pub struct InterprocEvents {
pub internal_findings: Vec<InternalSinkFinding>,
pub cutoff_reasons: Vec<CutoffReason>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MergePolicy {
PhiMerge,
Widen,
MostTainted,
}
pub fn select_merge_policy(exit_count: usize, has_cutoffs: bool) -> MergePolicy {
if has_cutoffs {
MergePolicy::Widen
} else if exit_count > 4 {
MergePolicy::MostTainted
} else {
MergePolicy::PhiMerge
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct ArgAbstraction(SmallVec<[(usize, u16); 4]>);
impl ArgAbstraction {
pub fn build(arg_values: &[(SsaValue, SymbolicValue, bool)]) -> Self {
let mut entries: SmallVec<[(usize, u16); 4]> = SmallVec::new();
for (pos, (_, sym, tainted)) in arg_values.iter().enumerate() {
let taint_bit: u16 = if *tainted { 1 } else { 0 };
let discrim: u16 = match sym {
SymbolicValue::Concrete(_) => 0,
SymbolicValue::ConcreteStr(_) => 1,
SymbolicValue::Symbol(_) => 2,
SymbolicValue::BinOp(..) => 3,
SymbolicValue::Concat(..) => 4,
SymbolicValue::Call(..) => 5,
SymbolicValue::Phi(..) => 6,
SymbolicValue::Unknown => 7,
_ => 8, };
let concrete_hash: u16 = match sym {
SymbolicValue::Concrete(n) => (*n as u16).wrapping_mul(31),
SymbolicValue::ConcreteStr(s) => {
let mut h: u16 = 0;
for b in s.bytes().take(8) {
h = h.wrapping_mul(31).wrapping_add(b as u16);
}
h
}
_ => 0,
};
let tag = taint_bit | (discrim << 1) | (concrete_hash << 5);
entries.push((pos, tag));
}
ArgAbstraction(entries)
}
}
pub type InterprocCache = HashMap<(String, ArgAbstraction, u64), CallOutcome>;
struct ReentryGuard<'a> {
counts: &'a RefCell<HashMap<String, usize>>,
name: String,
}
impl<'a> ReentryGuard<'a> {
fn new(counts: &'a RefCell<HashMap<String, usize>>, name: String) -> Self {
{
let mut c = counts.borrow_mut();
*c.entry(name.clone()).or_insert(0) += 1;
}
ReentryGuard { counts, name }
}
}
impl<'a> Drop for ReentryGuard<'a> {
fn drop(&mut self) {
let mut c = self.counts.borrow_mut();
if let Some(count) = c.get_mut(&self.name) {
*count = count.saturating_sub(1);
}
}
}
pub fn execute_callee(
ctx: &InterprocCtx,
callee_name: &str,
arg_values: &[(SsaValue, SymbolicValue, bool)],
caller_heap: &SymbolicHeap,
depth: usize,
call_chain: &[String],
summary_ctx: Option<&SymexSummaryCtx>,
heap_ctx: Option<&SymexHeapCtx>,
) -> Option<CallOutcome> {
if !interproc_enabled() {
return None;
}
let any_arg_tainted = arg_values.iter().any(|(_, _, t)| *t);
if depth >= ctx.max_depth {
let mut s = ctx.stats.get();
s.cutoffs += 1;
ctx.stats.set(s);
return Some(CallOutcome::cutoff(
CutoffReason::DepthExceeded {
max_depth: ctx.max_depth,
},
any_arg_tainted,
));
}
{
let b = ctx.budget.get();
if b.exhausted() {
let mut s = ctx.stats.get();
s.cutoffs += 1;
ctx.stats.set(s);
return Some(CallOutcome::cutoff(
b.exhaustion_reason().unwrap_or(CutoffReason::BudgetBlocks {
executed: b.blocks_executed,
max: b.max_blocks,
}),
any_arg_tainted,
));
}
}
let normalized = callee_leaf_name(callee_name);
let arity_hint = arg_values.len();
let intra_match = ctx
.callee_bodies
.iter()
.find(|(k, _)| k.name == normalized && k.arity == Some(arity_hint))
.or_else(|| ctx.callee_bodies.iter().find(|(k, _)| k.name == normalized))
.map(|(_, v)| v);
let (body, is_cross_file) = match intra_match {
Some(b) => (b, false),
None => {
if !super::cross_file_symex_enabled() {
return None;
}
if ctx.cross_file_depth >= MAX_CROSS_FILE_DEPTH {
return None;
}
let arity_hint = Some(arg_values.len());
match ctx.cross_file_bodies.and_then(|gs| {
gs.resolve_callee_body(ctx.lang, normalized, arity_hint, ctx.caller_namespace)
}) {
Some(b) => (b, true),
None => return None, }
}
};
if body.ssa.blocks.len() > MAX_CALLEE_BLOCKS {
let mut s = ctx.stats.get();
s.cutoffs += 1;
ctx.stats.set(s);
return Some(CallOutcome::cutoff(
CutoffReason::CalleeBodyTooLarge {
callee: normalized.to_string(),
blocks: body.ssa.blocks.len(),
max: MAX_CALLEE_BLOCKS,
},
any_arg_tainted,
));
}
{
let counts = ctx.reentry_counts.borrow();
let re_entries = counts.get(normalized).copied().unwrap_or(0);
if re_entries >= ctx.max_reentry_per_func {
let mut s = ctx.stats.get();
s.cutoffs += 1;
ctx.stats.set(s);
return Some(CallOutcome::cutoff(
CutoffReason::RecursionLimit {
function: normalized.to_string(),
re_entries,
max: ctx.max_reentry_per_func,
},
any_arg_tainted,
));
}
}
if let Some(scc_map) = ctx.scc_membership {
if let Some(&callee_scc) = scc_map.get(normalized) {
let counts = ctx.reentry_counts.borrow();
let scc_total: usize = counts
.iter()
.filter(|(name, _)| scc_map.get(name.as_str()) == Some(&callee_scc))
.map(|(_, &count)| count)
.sum();
if scc_total >= ctx.max_scc_reentry {
let scc_funcs: Vec<String> = scc_map
.iter()
.filter(|(_, idx)| **idx == callee_scc)
.map(|(name, _)| name.clone())
.collect();
let mut s = ctx.stats.get();
s.cutoffs += 1;
ctx.stats.set(s);
return Some(CallOutcome::cutoff(
CutoffReason::SccRecursionLimit {
scc_functions: scc_funcs,
total_entries: scc_total,
max: ctx.max_scc_reentry,
},
any_arg_tainted,
));
}
}
}
let sig = ArgAbstraction::build(arg_values);
let heap_fp = caller_heap.fingerprint();
{
let cache = ctx.cache.borrow();
if let Some(cached) = cache.get(&(normalized.to_string(), sig.clone(), heap_fp)) {
let mut s = ctx.stats.get();
s.cache_hits += 1;
ctx.stats.set(s);
return Some(cached.clone());
}
}
{
let mut s = ctx.stats.get();
s.cache_misses += 1;
ctx.stats.set(s);
}
let _reentry_guard = ReentryGuard::new(ctx.reentry_counts, normalized.to_string());
{
let mut b = ctx.budget.get();
b.frames_created += 1;
ctx.budget.set(b);
}
{
let mut s = ctx.stats.get();
s.total_frames += 1;
ctx.stats.set(s);
}
let mut initial_state = SymbolicState::new();
initial_state.seed_from_const_values(&body.opt.const_values);
for block in &body.ssa.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
if let SsaOp::Param { index } = &inst.op {
if let Some((_, sym, tainted)) = arg_values.get(*index) {
initial_state.set(inst.value, sym.clone());
if *tainted {
initial_state.mark_tainted(inst.value);
}
}
}
}
}
let initial_heap = caller_heap.clone();
*initial_state.heap_mut() = initial_heap.clone();
let mut frame_chain = call_chain.to_vec();
frame_chain.push(normalized.to_string());
let mut exit_states: Vec<CalleeExitState> = Vec::new();
let mut internal_findings: Vec<InternalSinkFinding> = Vec::new();
let mut cutoff_reasons: Vec<CutoffReason> = Vec::new();
let mut work_queue: VecDeque<CalleePathState> = VecDeque::new();
work_queue.push_back(CalleePathState {
sym_state: initial_state,
current_block: body.ssa.entry,
predecessor: None,
steps: 0,
forks_used: 0,
});
while let Some(mut path) = work_queue.pop_front() {
loop {
if path.steps >= MAX_CALLEE_STEPS {
cutoff_reasons.push(CutoffReason::StepBudgetPerFrame {
callee: normalized.to_string(),
steps: path.steps,
max: MAX_CALLEE_STEPS,
});
break;
}
{
let b = ctx.budget.get();
if b.blocks_executed >= b.max_blocks {
cutoff_reasons.push(CutoffReason::BudgetBlocks {
executed: b.blocks_executed,
max: b.max_blocks,
});
break;
}
if b.instructions_executed >= b.max_instructions {
cutoff_reasons.push(CutoffReason::BudgetInstructions {
executed: b.instructions_executed,
max: b.max_instructions,
});
break;
}
}
let block = match body.ssa.blocks.get(path.current_block.0 as usize) {
Some(b) => b,
None => break,
};
let xfile_meta = if is_cross_file {
Some(&body.node_meta)
} else {
None
};
let body_cfg = body.body_graph.as_ref().unwrap_or(ctx.cfg);
transfer::transfer_block_with_predecessor(
&mut path.sym_state,
block,
body_cfg,
&body.ssa,
path.predecessor,
summary_ctx,
heap_ctx,
None,
Some(ctx.lang),
xfile_meta,
);
let block_steps = block.phis.len() + block.body.len();
path.steps += block_steps;
{
let mut b = ctx.budget.get();
b.blocks_executed += 1;
b.instructions_executed += block_steps;
ctx.budget.set(b);
}
{
let mut s = ctx.stats.get();
s.total_blocks += 1;
ctx.stats.set(s);
}
detect_internal_sinks(
block,
body_cfg,
&path.sym_state,
&frame_chain,
&mut internal_findings,
xfile_meta,
);
handle_nested_calls(
block,
ctx,
&mut path.sym_state,
depth,
&frame_chain,
summary_ctx,
heap_ctx,
&mut internal_findings,
&mut cutoff_reasons,
);
match &block.terminator {
Terminator::Return(ret_val) => {
let (return_value, return_tainted) = if let Some(v) = ret_val {
(path.sym_state.get(*v), path.sym_state.is_tainted(*v))
} else {
(SymbolicValue::Unknown, false)
};
let heap_delta = compute_heap_delta(&initial_heap, path.sym_state.heap());
let taint_delta = path.sym_state.tainted_values().clone();
exit_states.push(CalleeExitState {
return_value,
return_tainted,
heap_delta,
taint_delta,
path_constraints: path.sym_state.path_constraints().to_vec(),
});
break;
}
Terminator::Goto(target) => {
path.predecessor = Some(path.current_block);
path.current_block = *target;
}
Terminator::Branch {
true_blk,
false_blk,
..
} => {
let can_fork = path.forks_used < MAX_FORKS_PER_CALLEE && {
let b = ctx.budget.get();
b.symbolic_forks < b.max_symbolic_forks
&& (work_queue.len() + 1) < b.max_retained_path_states
};
let true_valid = body.ssa.blocks.get(true_blk.0 as usize).is_some();
let false_valid = body.ssa.blocks.get(false_blk.0 as usize).is_some();
if can_fork && true_valid && false_valid {
let false_state = path.sym_state.clone();
let false_path = CalleePathState {
sym_state: false_state,
current_block: *false_blk,
predecessor: Some(path.current_block),
steps: path.steps,
forks_used: path.forks_used + 1,
};
work_queue.push_back(false_path);
{
let mut b = ctx.budget.get();
b.symbolic_forks += 1;
ctx.budget.set(b);
}
{
let mut s = ctx.stats.get();
s.forks += 1;
ctx.stats.set(s);
}
path.predecessor = Some(path.current_block);
path.current_block = *true_blk;
path.forks_used += 1;
} else {
path.predecessor = Some(path.current_block);
if true_valid {
path.current_block = *true_blk;
} else if false_valid {
path.current_block = *false_blk;
} else {
break;
}
}
}
Terminator::Switch { .. } => {
let next = block
.succs
.iter()
.find(|s| body.ssa.blocks.get(s.0 as usize).is_some())
.copied();
match next {
Some(target) => {
path.predecessor = Some(path.current_block);
path.current_block = target;
}
None => break,
}
}
Terminator::Unreachable => {
break;
}
}
}
}
let outcome = CallOutcome {
exit_states,
internal_findings,
cutoff_reasons,
};
{
let mut cache = ctx.cache.borrow_mut();
if cache.len() >= MAX_CACHE_ENTRIES {
cache.clear();
}
cache.insert((normalized.to_string(), sig, heap_fp), outcome.clone());
}
Some(outcome)
}
struct CalleePathState {
sym_state: SymbolicState,
current_block: BlockId,
predecessor: Option<BlockId>,
steps: usize,
forks_used: usize,
}
fn detect_internal_sinks(
block: &crate::ssa::ir::SsaBlock,
cfg: &Cfg,
callee_state: &SymbolicState,
frame_chain: &[String],
internal_findings: &mut Vec<InternalSinkFinding>,
node_meta: Option<
&std::collections::HashMap<u32, crate::taint::ssa_transfer::CrossFileNodeMeta>,
>,
) {
for inst in block.body.iter() {
let labels: &[DataLabel] = if let Some(meta) = node_meta {
meta.get(&(inst.cfg_node.index() as u32))
.map(|m| m.info.taint.labels.as_slice())
.unwrap_or(&[])
} else {
&cfg[inst.cfg_node].taint.labels
};
for label in labels {
if let DataLabel::Sink(cap) = label {
let operands = match &inst.op {
SsaOp::Call { args, receiver, .. } => {
let mut ops: Vec<SsaValue> = Vec::new();
if let Some(r) = receiver {
ops.push(*r);
}
for slot in args {
if let Some(&v) = slot.first() {
ops.push(v);
}
}
ops
}
SsaOp::Assign(uses) => uses.to_vec(),
_ => Vec::new(),
};
if operands.iter().any(|v| callee_state.is_tainted(*v)) {
let tainted_val = operands
.iter()
.find(|v| callee_state.is_tainted(**v))
.map(|v| callee_state.get(*v))
.unwrap_or(SymbolicValue::Unknown);
internal_findings.push(InternalSinkFinding {
sink_node: inst.cfg_node,
sink_cap: *cap,
tainted_value: tainted_val,
call_chain: frame_chain.to_vec(),
constraints: callee_state.path_constraints().to_vec(),
});
}
}
}
}
}
fn handle_nested_calls(
block: &crate::ssa::ir::SsaBlock,
ctx: &InterprocCtx,
callee_state: &mut SymbolicState,
depth: usize,
frame_chain: &[String],
summary_ctx: Option<&SymexSummaryCtx>,
heap_ctx: Option<&SymexHeapCtx>,
internal_findings: &mut Vec<InternalSinkFinding>,
cutoff_reasons: &mut Vec<CutoffReason>,
) {
for inst in block.body.iter() {
if let SsaOp::Call {
callee,
args,
receiver,
..
} = &inst.op
{
let current_val = callee_state.get(inst.value);
if !matches!(
current_val,
SymbolicValue::Call(..) | SymbolicValue::Unknown
) {
continue;
}
let mut nested_args: Vec<(SsaValue, SymbolicValue, bool)> = Vec::new();
if let Some(r) = receiver {
nested_args.push((*r, callee_state.get(*r), callee_state.is_tainted(*r)));
}
for slot in args {
if let Some(&v) = slot.first() {
nested_args.push((v, callee_state.get(v), callee_state.is_tainted(v)));
}
}
if let Some(outcome) = execute_callee(
ctx,
callee,
&nested_args,
callee_state.heap(),
depth + 1,
frame_chain,
summary_ctx,
heap_ctx,
) {
let policy = select_merge_policy(
outcome.exit_states.len(),
!outcome.cutoff_reasons.is_empty(),
);
let merged = merge_exit_states(&outcome.exit_states, policy);
callee_state.set(inst.value, merged.return_value);
if merged.return_tainted {
callee_state.mark_tainted(inst.value);
}
for mutation in &merged.heap_delta {
callee_state.heap_mut().store(
mutation.key.clone(),
mutation.value.clone(),
mutation.tainted,
);
}
internal_findings.extend(outcome.internal_findings);
cutoff_reasons.extend(outcome.cutoff_reasons);
}
}
}
}
pub fn merge_exit_states(states: &[CalleeExitState], policy: MergePolicy) -> CalleeExitState {
match states.len() {
0 => CalleeExitState {
return_value: SymbolicValue::Unknown,
return_tainted: false,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
},
1 => states[0].clone(),
_ => match policy {
MergePolicy::PhiMerge => merge_phi(states),
MergePolicy::Widen => merge_widen(states),
MergePolicy::MostTainted => merge_most_tainted(states),
},
}
}
fn merge_phi(states: &[CalleeExitState]) -> CalleeExitState {
let phi_ops: Vec<_> = states
.iter()
.enumerate()
.map(|(i, s)| (BlockId(i as u32), s.return_value.clone()))
.collect();
let return_value = mk_phi(phi_ops);
let return_tainted = states.iter().any(|s| s.return_tainted);
let mut heap_delta: Vec<HeapMutation> = Vec::new();
let mut seen_keys: HashSet<HeapKey> = HashSet::new();
for s in states {
for m in &s.heap_delta {
if seen_keys.insert(m.key.clone()) {
heap_delta.push(m.clone());
}
}
}
let mut taint_delta: HashSet<SsaValue> = HashSet::new();
for s in states {
taint_delta.extend(&s.taint_delta);
}
CalleeExitState {
return_value,
return_tainted,
heap_delta,
taint_delta,
path_constraints: Vec::new(),
}
}
fn merge_widen(states: &[CalleeExitState]) -> CalleeExitState {
let return_tainted = states.iter().any(|s| s.return_tainted);
let mut heap_delta: Vec<HeapMutation> = Vec::new();
let mut seen_keys: HashSet<HeapKey> = HashSet::new();
for s in states {
for m in &s.heap_delta {
if seen_keys.insert(m.key.clone()) {
heap_delta.push(m.clone());
}
}
}
let mut taint_delta: HashSet<SsaValue> = HashSet::new();
for s in states {
taint_delta.extend(&s.taint_delta);
}
CalleeExitState {
return_value: SymbolicValue::Unknown,
return_tainted,
heap_delta,
taint_delta,
path_constraints: Vec::new(),
}
}
fn merge_most_tainted(states: &[CalleeExitState]) -> CalleeExitState {
states
.iter()
.max_by_key(|s| {
let score = s.taint_delta.len() + if s.return_tainted { 100 } else { 0 };
score
})
.cloned()
.unwrap_or_else(|| CalleeExitState {
return_value: SymbolicValue::Unknown,
return_tainted: false,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
})
}
fn compute_heap_delta(initial: &SymbolicHeap, final_heap: &SymbolicHeap) -> Vec<HeapMutation> {
let mut delta = Vec::new();
for (key, value) in final_heap.entries() {
let initial_val = initial.load(key);
let changed = matches!(initial_val, SymbolicValue::Unknown)
|| !sym_value_structurally_eq(&initial_val, value);
if changed {
delta.push(HeapMutation {
key: key.clone(),
value: value.clone(),
tainted: final_heap.is_tainted(key),
});
}
}
delta
}
fn sym_value_structurally_eq(a: &SymbolicValue, b: &SymbolicValue) -> bool {
match (a, b) {
(SymbolicValue::Concrete(x), SymbolicValue::Concrete(y)) => x == y,
(SymbolicValue::ConcreteStr(x), SymbolicValue::ConcreteStr(y)) => x == y,
(SymbolicValue::Symbol(x), SymbolicValue::Symbol(y)) => x == y,
(SymbolicValue::Unknown, SymbolicValue::Unknown) => true,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn arg_abstraction_different_taint() {
let v0 = SsaValue(0);
let a1 = ArgAbstraction::build(&[(v0, SymbolicValue::Symbol(v0), false)]);
let a2 = ArgAbstraction::build(&[(v0, SymbolicValue::Symbol(v0), true)]);
assert_ne!(a1, a2);
}
#[test]
fn arg_abstraction_same_values() {
let v0 = SsaValue(0);
let a1 = ArgAbstraction::build(&[(v0, SymbolicValue::Concrete(42), false)]);
let a2 = ArgAbstraction::build(&[(v0, SymbolicValue::Concrete(42), false)]);
assert_eq!(a1, a2);
}
#[test]
fn arg_abstraction_different_concrete() {
let v0 = SsaValue(0);
let a1 = ArgAbstraction::build(&[(v0, SymbolicValue::Concrete(1), false)]);
let a2 = ArgAbstraction::build(&[(v0, SymbolicValue::Concrete(2), false)]);
assert_ne!(a1, a2);
}
#[test]
fn merge_exit_states_empty() {
let merged = merge_exit_states(&[], MergePolicy::PhiMerge);
assert!(matches!(merged.return_value, SymbolicValue::Unknown));
assert!(!merged.return_tainted);
}
#[test]
fn merge_exit_states_single() {
let state = CalleeExitState {
return_value: SymbolicValue::Concrete(42),
return_tainted: true,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
};
let merged = merge_exit_states(&[state], MergePolicy::PhiMerge);
assert!(matches!(merged.return_value, SymbolicValue::Concrete(42)));
assert!(merged.return_tainted);
}
#[test]
fn merge_exit_states_multiple_unions_taint() {
let s1 = CalleeExitState {
return_value: SymbolicValue::Concrete(1),
return_tainted: false,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
};
let s2 = CalleeExitState {
return_value: SymbolicValue::Concrete(2),
return_tainted: true,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
};
let merged = merge_exit_states(&[s1, s2], MergePolicy::PhiMerge);
assert!(merged.return_tainted);
assert!(matches!(merged.return_value, SymbolicValue::Phi(_)));
}
#[test]
fn budget_exhaustion() {
let budget = InterprocBudget {
blocks_executed: 500,
max_blocks: 500,
frames_created: 0,
max_frames: 15,
instructions_executed: 0,
max_instructions: DEFAULT_MAX_INSTRUCTIONS,
symbolic_forks: 0,
max_symbolic_forks: DEFAULT_MAX_SYMBOLIC_FORKS,
solver_checks: 0,
max_solver_checks: DEFAULT_MAX_SOLVER_CHECKS,
retained_path_states: 0,
max_retained_path_states: DEFAULT_MAX_RETAINED_PATH_STATES,
};
assert!(budget.exhausted());
}
#[test]
fn budget_frames_exhaustion() {
let budget = InterprocBudget {
blocks_executed: 0,
max_blocks: 500,
frames_created: 15,
max_frames: 15,
instructions_executed: 0,
max_instructions: DEFAULT_MAX_INSTRUCTIONS,
symbolic_forks: 0,
max_symbolic_forks: DEFAULT_MAX_SYMBOLIC_FORKS,
solver_checks: 0,
max_solver_checks: DEFAULT_MAX_SOLVER_CHECKS,
retained_path_states: 0,
max_retained_path_states: DEFAULT_MAX_RETAINED_PATH_STATES,
};
assert!(budget.exhausted());
}
#[test]
fn budget_instructions_exhaustion() {
let budget = InterprocBudget {
blocks_executed: 0,
max_blocks: 500,
frames_created: 0,
max_frames: 15,
instructions_executed: 2000,
max_instructions: 2000,
symbolic_forks: 0,
max_symbolic_forks: DEFAULT_MAX_SYMBOLIC_FORKS,
solver_checks: 0,
max_solver_checks: DEFAULT_MAX_SOLVER_CHECKS,
retained_path_states: 0,
max_retained_path_states: DEFAULT_MAX_RETAINED_PATH_STATES,
};
assert!(budget.exhausted());
}
#[test]
fn budget_not_exhausted() {
let budget = InterprocBudget::new();
assert!(!budget.exhausted());
}
#[test]
fn sym_value_eq_concrete() {
assert!(sym_value_structurally_eq(
&SymbolicValue::Concrete(5),
&SymbolicValue::Concrete(5),
));
assert!(!sym_value_structurally_eq(
&SymbolicValue::Concrete(5),
&SymbolicValue::Concrete(6),
));
}
#[test]
fn sym_value_eq_unknown() {
assert!(sym_value_structurally_eq(
&SymbolicValue::Unknown,
&SymbolicValue::Unknown,
));
}
#[test]
fn sym_value_eq_different_kinds() {
assert!(!sym_value_structurally_eq(
&SymbolicValue::Concrete(1),
&SymbolicValue::Unknown,
));
}
#[test]
fn cutoff_reason_display() {
let r = CutoffReason::DepthExceeded { max_depth: 3 };
assert_eq!(format!("{}", r), "call depth exceeded (max 3)");
let r = CutoffReason::RecursionLimit {
function: "foo".into(),
re_entries: 2,
max: 2,
};
assert_eq!(format!("{}", r), "recursion limit for 'foo' (2/2)");
let r = CutoffReason::SccRecursionLimit {
scc_functions: vec!["a".into(), "b".into()],
total_entries: 3,
max: 3,
};
assert_eq!(format!("{}", r), "SCC recursion limit for [a, b] (3/3)");
}
#[test]
fn budget_exhaustion_reason() {
let b = InterprocBudget {
blocks_executed: 500,
max_blocks: 500,
..InterprocBudget::new()
};
assert!(matches!(
b.exhaustion_reason(),
Some(CutoffReason::BudgetBlocks { .. })
));
let b = InterprocBudget {
frames_created: 15,
max_frames: 15,
..InterprocBudget::new()
};
assert!(matches!(
b.exhaustion_reason(),
Some(CutoffReason::BudgetFrames { .. })
));
let b = InterprocBudget::new();
assert!(b.exhaustion_reason().is_none());
}
#[test]
fn merge_policy_widen() {
let s1 = CalleeExitState {
return_value: SymbolicValue::Concrete(1),
return_tainted: false,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
};
let s2 = CalleeExitState {
return_value: SymbolicValue::Concrete(2),
return_tainted: true,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
};
let merged = merge_exit_states(&[s1, s2], MergePolicy::Widen);
assert!(matches!(merged.return_value, SymbolicValue::Unknown));
assert!(merged.return_tainted);
}
#[test]
fn merge_policy_most_tainted() {
let s1 = CalleeExitState {
return_value: SymbolicValue::Concrete(1),
return_tainted: false,
heap_delta: Vec::new(),
taint_delta: HashSet::new(),
path_constraints: Vec::new(),
};
let mut taint_set = HashSet::new();
taint_set.insert(SsaValue(0));
taint_set.insert(SsaValue(1));
let s2 = CalleeExitState {
return_value: SymbolicValue::Concrete(2),
return_tainted: true,
heap_delta: Vec::new(),
taint_delta: taint_set,
path_constraints: Vec::new(),
};
let merged = merge_exit_states(&[s1, s2], MergePolicy::MostTainted);
assert!(merged.return_tainted);
assert!(matches!(merged.return_value, SymbolicValue::Concrete(2)));
}
#[test]
fn select_merge_policy_defaults() {
assert_eq!(select_merge_policy(1, false), MergePolicy::PhiMerge);
assert_eq!(select_merge_policy(2, false), MergePolicy::PhiMerge);
assert_eq!(select_merge_policy(4, false), MergePolicy::PhiMerge);
assert_eq!(select_merge_policy(5, false), MergePolicy::MostTainted);
assert_eq!(select_merge_policy(2, true), MergePolicy::Widen);
}
#[test]
fn calloutcome_cutoff_preserves_taint() {
let outcome = CallOutcome::cutoff(CutoffReason::DepthExceeded { max_depth: 3 }, true);
assert_eq!(outcome.exit_states.len(), 1);
assert!(outcome.exit_states[0].return_tainted);
assert!(matches!(
outcome.exit_states[0].return_value,
SymbolicValue::Unknown
));
assert_eq!(outcome.cutoff_reasons.len(), 1);
}
#[test]
fn calloutcome_cutoff_no_taint() {
let outcome = CallOutcome::cutoff(CutoffReason::DepthExceeded { max_depth: 3 }, false);
assert!(outcome.exit_states.is_empty());
assert_eq!(outcome.cutoff_reasons.len(), 1);
}
#[test]
fn interproc_stats_default() {
let stats = InterprocStats::default();
assert_eq!(stats.cache_hits, 0);
assert_eq!(stats.cache_misses, 0);
assert_eq!(stats.total_frames, 0);
assert_eq!(stats.cutoffs, 0);
assert_eq!(stats.forks, 0);
}
}