use crate::cfg::Cfg;
use crate::labels::Cap;
use crate::ssa::ir::{SsaBody, SsaOp, SsaValue};
use crate::summary::SinkSite;
use crate::taint::domain::TaintOrigin;
use crate::taint::path_state::PredicateKind;
use petgraph::graph::NodeIndex;
use smallvec::SmallVec;
use std::collections::{HashSet, VecDeque};
#[derive(Clone, Debug)]
pub struct SsaTaintEvent {
pub sink_node: NodeIndex,
pub tainted_values: Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>,
pub sink_caps: Cap,
pub all_validated: bool,
pub guard_kind: Option<PredicateKind>,
pub uses_summary: bool,
pub primary_sink_site: Option<SinkSite>,
}
pub(super) fn block_distance(ssa: &SsaBody, source_node: NodeIndex, sink_node: NodeIndex) -> u16 {
let src_block = match ssa.cfg_node_map.get(&source_node) {
Some(v) => ssa.def_of(*v).block,
None => return 0,
};
let sink_block = match ssa.cfg_node_map.get(&sink_node) {
Some(v) => ssa.def_of(*v).block,
None => return 0,
};
if src_block == sink_block {
return 0;
}
let mut visited = HashSet::new();
let mut queue = VecDeque::new();
visited.insert(src_block);
queue.push_back((src_block, 0u16));
while let Some((blk, dist)) = queue.pop_front() {
for &succ in &ssa.block(blk).succs {
if succ == sink_block {
return (dist + 1).min(255);
}
if visited.insert(succ) && dist + 1 < 255 {
queue.push_back((succ, dist + 1));
}
}
}
0 }
pub(super) fn reconstruct_flow_path(
tainted_val: SsaValue,
origin: &crate::taint::domain::TaintOrigin,
sink_node: NodeIndex,
ssa: &SsaBody,
cfg: &Cfg,
) -> Vec<crate::taint::FlowStepRaw> {
use crate::evidence::FlowStepKind;
use crate::taint::FlowStepRaw;
const MAX_STEPS: usize = 64;
let mut steps = Vec::new();
let mut visited = HashSet::new();
steps.push(FlowStepRaw {
cfg_node: sink_node,
var_name: cfg
.node_weight(sink_node)
.and_then(|n| n.call.callee.clone()),
op_kind: FlowStepKind::Sink,
});
let mut current = tainted_val;
for _ in 0..MAX_STEPS {
if !visited.insert(current) {
break;
}
let def = ssa.def_of(current);
let block = ssa.block(def.block);
let inst = block
.phis
.iter()
.chain(block.body.iter())
.find(|i| i.value == current);
let inst = match inst {
Some(i) => i,
None => break,
};
if let Some(prev) = steps.last() {
if prev.cfg_node == inst.cfg_node {
match &inst.op {
SsaOp::Source | SsaOp::Param { .. } | SsaOp::SelfParam | SsaOp::CatchParam => {
break;
}
SsaOp::Assign(uses) => {
current = pick_tainted_operand(uses, origin, ssa);
continue;
}
SsaOp::Call { args, receiver, .. } => {
current = pick_tainted_operand_call(args, receiver, origin, ssa);
continue;
}
SsaOp::Phi(operands) => {
let vals: SmallVec<[SsaValue; 4]> =
operands.iter().map(|(_, v)| *v).collect();
current = pick_tainted_operand(&vals, origin, ssa);
continue;
}
_ => break,
}
}
}
match &inst.op {
SsaOp::Source | SsaOp::Param { .. } | SsaOp::SelfParam | SsaOp::CatchParam => {
steps.push(FlowStepRaw {
cfg_node: inst.cfg_node,
var_name: inst.var_name.clone(),
op_kind: FlowStepKind::Source,
});
break;
}
SsaOp::Assign(uses) => {
steps.push(FlowStepRaw {
cfg_node: inst.cfg_node,
var_name: inst.var_name.clone(),
op_kind: FlowStepKind::Assignment,
});
if uses.is_empty() {
break;
}
current = pick_tainted_operand(uses, origin, ssa);
}
SsaOp::Call { args, receiver, .. } => {
steps.push(FlowStepRaw {
cfg_node: inst.cfg_node,
var_name: inst.var_name.clone(),
op_kind: FlowStepKind::Call,
});
current = pick_tainted_operand_call(args, receiver, origin, ssa);
}
SsaOp::Phi(operands) => {
steps.push(FlowStepRaw {
cfg_node: inst.cfg_node,
var_name: inst.var_name.clone(),
op_kind: FlowStepKind::Phi,
});
let vals: SmallVec<[SsaValue; 4]> = operands.iter().map(|(_, v)| *v).collect();
if vals.is_empty() {
break;
}
current = pick_tainted_operand(&vals, origin, ssa);
}
SsaOp::FieldProj { receiver, .. } => {
steps.push(FlowStepRaw {
cfg_node: inst.cfg_node,
var_name: inst.var_name.clone(),
op_kind: FlowStepKind::Assignment,
});
let single: SmallVec<[SsaValue; 4]> = smallvec::smallvec![*receiver];
current = pick_tainted_operand(&single, origin, ssa);
}
SsaOp::Const(_) | SsaOp::Nop | SsaOp::Undef => break,
}
}
steps.reverse();
steps
}
fn pick_tainted_operand(
operands: &[SsaValue],
origin: &crate::taint::domain::TaintOrigin,
ssa: &SsaBody,
) -> SsaValue {
for &op in operands {
if ssa.def_of(op).cfg_node == origin.node {
return op;
}
}
operands.first().copied().unwrap_or(SsaValue(0))
}
fn pick_tainted_operand_call(
args: &[SmallVec<[SsaValue; 2]>],
receiver: &Option<SsaValue>,
origin: &crate::taint::domain::TaintOrigin,
ssa: &SsaBody,
) -> SsaValue {
let mut all_vals: SmallVec<[SsaValue; 8]> = SmallVec::new();
for arg in args {
all_vals.extend_from_slice(arg);
}
if let Some(r) = receiver {
all_vals.push(*r);
}
pick_tainted_operand(&all_vals, origin, ssa)
}
pub fn ssa_events_to_findings(
events: &[SsaTaintEvent],
ssa: &SsaBody,
cfg: &Cfg,
) -> Vec<crate::taint::Finding> {
type FindingDedupKey = (usize, usize, Option<(String, u32, u32)>);
let mut findings = Vec::new();
let mut seen: HashSet<FindingDedupKey> = HashSet::new();
for event in events {
if event.all_validated {
let span = cfg[event.sink_node].ast.span;
crate::taint::ssa_transfer::state::record_all_validated_span(span);
if event.sink_caps.intersects(Cap::FILE_IO | Cap::SHELL_ESCAPE) {
crate::taint::ssa_transfer::state::record_path_safe_suppressed_span(span);
}
continue;
}
let primary_location = event.primary_sink_site.as_ref().and_then(|s| {
if s.line == 0 {
None
} else {
Some(crate::taint::SinkLocation {
file_rel: s.file_rel.clone(),
line: s.line,
col: s.col,
snippet: s.snippet.clone(),
})
}
});
debug_assert!(
primary_location.as_ref().is_none_or(|l| l.line != 0),
"primary_location must carry a resolved line coordinate",
);
let loc_key = primary_location
.as_ref()
.map(|l| (l.file_rel.clone(), l.line, l.col));
for (val, caps, origins) in &event.tainted_values {
let cap_specificity = (*caps & event.sink_caps).bits().count_ones() as u8;
for origin in origins {
if seen.insert((
origin.node.index(),
event.sink_node.index(),
loc_key.clone(),
)) {
let hop_count = block_distance(ssa, origin.node, event.sink_node);
let flow_steps = reconstruct_flow_path(*val, origin, event.sink_node, ssa, cfg);
let path_hash = compute_path_hash(&flow_steps);
findings.push(crate::taint::Finding {
body_id: crate::cfg::BodyId(0), sink: event.sink_node,
source: origin.node,
path: vec![origin.node, event.sink_node],
source_kind: origin.source_kind,
path_validated: event.all_validated,
guard_kind: event.guard_kind,
hop_count,
cap_specificity,
uses_summary: event.uses_summary,
flow_steps,
symbolic: None,
source_span: origin.source_span.map(|(start, _)| start),
primary_location: primary_location.clone(),
engine_notes: smallvec::SmallVec::new(),
path_hash,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: event.sink_caps & *caps,
});
}
}
}
}
findings
}
fn compute_path_hash(steps: &[crate::taint::FlowStepRaw]) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
for step in steps {
step.cfg_node.index().hash(&mut hasher);
let kind_tag: u8 = match step.op_kind {
crate::evidence::FlowStepKind::Source => 0,
crate::evidence::FlowStepKind::Assignment => 1,
crate::evidence::FlowStepKind::Call => 2,
crate::evidence::FlowStepKind::Phi => 3,
crate::evidence::FlowStepKind::Sink => 4,
};
kind_tag.hash(&mut hasher);
step.var_name.hash(&mut hasher);
}
hasher.finish()
}
pub(super) fn extract_sink_arg_positions(event: &SsaTaintEvent, ssa: &SsaBody) -> Vec<usize> {
let ssa_val = match ssa.cfg_node_map.get(&event.sink_node) {
Some(v) => *v,
None => return vec![],
};
let def = ssa.def_of(ssa_val);
let block = &ssa.blocks[def.block.0 as usize];
let inst = block
.phis
.iter()
.chain(block.body.iter())
.find(|i| i.value == ssa_val);
let inst = match inst {
Some(i) => i,
None => return vec![],
};
if let SsaOp::Call { args, .. } = &inst.op {
let tainted_vals: HashSet<SsaValue> =
event.tainted_values.iter().map(|(v, _, _)| *v).collect();
let mut positions = Vec::new();
for (i, arg_vals) in args.iter().enumerate() {
if arg_vals.iter().any(|v| tainted_vals.contains(v)) {
positions.push(i);
}
}
positions
} else {
vec![]
}
}