use crate::cfg::{Cfg, FuncSummaries, NodeInfo, StmtKind};
use crate::interop::InteropEdge;
use crate::labels::{Cap, DataLabel, SourceKind};
use crate::summary::GlobalSummaries;
use crate::symbol::Lang;
use petgraph::graph::NodeIndex;
use std::collections::HashMap;
use tracing::debug;
#[derive(Debug, Clone)]
pub struct Finding {
pub sink: NodeIndex,
pub source: NodeIndex,
#[allow(dead_code)] pub path: Vec<NodeIndex>,
pub source_kind: SourceKind,
}
fn taint_hash(taint: &HashMap<String, Cap>) -> u64 {
let mut h: u64 = 0;
for (k, bits) in taint {
let mut entry_h: u64 = 0xcbf2_9ce4_8422_2325; for b in k.as_bytes() {
entry_h ^= *b as u64;
entry_h = entry_h.wrapping_mul(0x0100_0000_01b3); }
entry_h ^= bits.bits() as u64;
entry_h = entry_h.wrapping_mul(0x0100_0000_01b3);
h ^= entry_h;
}
h
}
struct ResolvedSummary {
source_caps: Cap,
sanitizer_caps: Cap,
sink_caps: Cap,
propagates_taint: bool,
}
#[allow(clippy::too_many_arguments)]
fn resolve_callee(
callee: &str,
caller_lang: Lang,
caller_namespace: &str,
caller_func: &str,
call_ordinal: u32,
local: &FuncSummaries,
global: Option<&GlobalSummaries>,
interop_edges: &[InteropEdge],
) -> Option<ResolvedSummary> {
let local_matches: Vec<_> = local
.iter()
.filter(|(k, _)| {
k.name == callee && k.lang == caller_lang && k.namespace == caller_namespace
})
.collect();
if local_matches.len() == 1 {
let (_, ls) = local_matches[0];
return Some(ResolvedSummary {
source_caps: ls.source_caps,
sanitizer_caps: ls.sanitizer_caps,
sink_caps: ls.sink_caps,
propagates_taint: ls.propagates_taint,
});
}
if local_matches.len() > 1 {
return None;
}
if let Some(gs) = global {
let matches = gs.lookup_same_lang(caller_lang, callee);
if matches.len() == 1 {
let (_, fs) = matches[0];
return Some(ResolvedSummary {
source_caps: fs.source_caps(),
sanitizer_caps: fs.sanitizer_caps(),
sink_caps: fs.sink_caps(),
propagates_taint: fs.propagates_taint,
});
}
if matches.len() > 1 {
let same_ns: Vec<_> = matches
.iter()
.filter(|(k, _)| k.namespace == caller_namespace)
.collect();
if same_ns.len() == 1 {
let (_, fs) = same_ns[0];
return Some(ResolvedSummary {
source_caps: fs.source_caps(),
sanitizer_caps: fs.sanitizer_caps(),
sink_caps: fs.sink_caps(),
propagates_taint: fs.propagates_taint,
});
}
return None;
}
}
for edge in interop_edges {
if edge.from.caller_lang == caller_lang
&& edge.from.caller_namespace == caller_namespace
&& edge.from.callee_symbol == callee
&& (edge.from.caller_func.is_empty() || edge.from.caller_func == caller_func)
&& (edge.from.ordinal == 0 || edge.from.ordinal == call_ordinal)
{
if let Some(gs) = global
&& let Some(fs) = gs.get(&edge.to)
{
return Some(ResolvedSummary {
source_caps: fs.source_caps(),
sanitizer_caps: fs.sanitizer_caps(),
sink_caps: fs.sink_caps(),
propagates_taint: fs.propagates_taint,
});
}
}
}
None
}
fn apply_taint(
node: &NodeInfo,
out: &mut HashMap<String, Cap>,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
caller_lang: Lang,
caller_namespace: &str,
interop_edges: &[InteropEdge],
) {
debug!(target: "taint", "Applying taint to node: {:?}", node);
debug!(target: "taint", "Taint: {:?}", out);
let caller_func = node.enclosing_func.as_deref().unwrap_or("");
match node.label {
Some(DataLabel::Source(bits)) => {
if let Some(v) = &node.defines {
out.insert(v.clone(), bits);
}
}
Some(DataLabel::Sanitizer(bits)) => {
if let Some(v) = &node.defines {
let mut combined = Cap::empty();
for u in &node.uses {
if let Some(b) = out.get(u) {
combined |= *b;
}
}
let new = combined & !bits;
if new.is_empty() {
out.remove(v);
} else {
out.insert(v.clone(), new);
}
}
}
_ if node.kind == StmtKind::Call => {
if let Some(callee) = &node.callee
&& let Some(resolved) = resolve_callee(
callee,
caller_lang,
caller_namespace,
caller_func,
node.call_ordinal,
local_summaries,
global_summaries,
interop_edges,
)
{
let mut return_bits = Cap::empty();
return_bits |= resolved.source_caps;
if resolved.propagates_taint {
for u in &node.uses {
if let Some(bits) = out.get(u) {
return_bits |= *bits;
}
}
}
return_bits &= !resolved.sanitizer_caps;
if let Some(v) = &node.defines {
if return_bits.is_empty() {
out.remove(v);
} else {
out.insert(v.clone(), return_bits);
}
}
return;
}
}
_ => {}
}
if !matches!(
node.label,
Some(DataLabel::Source(_)) | Some(DataLabel::Sanitizer(_))
) && let Some(d) = &node.defines
{
let mut combined = Cap::empty();
for u in &node.uses {
if let Some(bits) = out.get(u) {
combined |= *bits;
}
}
if combined.is_empty() {
out.remove(d);
} else {
out.insert(d.clone(), combined);
}
}
}
pub fn analyse_file(
cfg: &Cfg,
entry: NodeIndex,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
caller_lang: Lang,
caller_namespace: &str,
interop_edges: &[InteropEdge],
) -> Vec<Finding> {
use std::collections::{HashMap, HashSet, VecDeque};
#[derive(Clone)]
struct Item {
node: NodeIndex,
taint: HashMap<String, Cap>,
}
type Key = (NodeIndex, u64);
let mut pred: HashMap<Key, Key> = HashMap::new();
let mut seen: HashSet<Key> = HashSet::new();
let mut findings: Vec<Finding> = Vec::new();
let mut q = VecDeque::new();
q.push_back(Item {
node: entry,
taint: HashMap::new(),
});
seen.insert((entry, 0));
while let Some(Item { node, taint }) = q.pop_front() {
let caller_func = cfg[node].enclosing_func.as_deref().unwrap_or("");
let mut out = taint.clone();
apply_taint(
&cfg[node],
&mut out,
local_summaries,
global_summaries,
caller_lang,
caller_namespace,
interop_edges,
);
let sink_caps = match cfg[node].label {
Some(DataLabel::Sink(caps)) => caps,
_ => {
cfg[node]
.callee
.as_ref()
.and_then(|c| {
resolve_callee(
c,
caller_lang,
caller_namespace,
caller_func,
cfg[node].call_ordinal,
local_summaries,
global_summaries,
interop_edges,
)
})
.filter(|r| !r.sink_caps.is_empty())
.map(|r| r.sink_caps)
.unwrap_or(Cap::empty())
}
};
if !sink_caps.is_empty() {
let bad = cfg[node]
.uses
.iter()
.any(|u| out.get(u).is_some_and(|b| (*b & sink_caps) != Cap::empty()));
if bad {
let sink_node = node;
let mut path = vec![node];
let mut source_node = node; let mut key = (node, taint_hash(&taint));
while let Some(&(prev, prev_hash)) = pred.get(&key) {
path.push(prev);
if matches!(cfg[prev].label, Some(DataLabel::Source(_))) {
source_node = prev;
break;
}
let prev_caller_func = cfg[prev].enclosing_func.as_deref().unwrap_or("");
if cfg[prev].kind == StmtKind::Call
&& let Some(callee) = &cfg[prev].callee
&& let Some(resolved) = resolve_callee(
callee,
caller_lang,
caller_namespace,
prev_caller_func,
cfg[prev].call_ordinal,
local_summaries,
global_summaries,
interop_edges,
)
&& !resolved.source_caps.is_empty()
{
source_node = prev;
break;
}
key = (prev, prev_hash);
}
path.reverse();
let source_kind = match cfg[source_node].label {
Some(DataLabel::Source(caps)) => {
let callee = cfg[source_node].callee.as_deref().unwrap_or("");
crate::labels::infer_source_kind(caps, callee)
}
_ => SourceKind::Unknown,
};
findings.push(Finding {
sink: sink_node,
source: source_node,
path,
source_kind,
});
}
}
let out_h = taint_hash(&out);
let in_h = taint_hash(&taint);
let succs: Vec<_> = cfg.neighbors(node).collect();
for (i, succ) in succs.iter().enumerate() {
let key = (*succ, out_h);
if !seen.contains(&key) {
seen.insert(key);
pred.insert(key, (node, in_h));
let taint_for_succ = if i + 1 == succs.len() {
std::mem::take(&mut out)
} else {
out.clone()
};
q.push_back(Item {
node: *succ,
taint: taint_for_succ,
});
}
}
}
findings
}
#[cfg(test)]
mod tests;