use petgraph::algo::dominators::{Dominators, simple_fast};
use petgraph::prelude::*;
use tracing::debug;
use tree_sitter::{Node, Tree};
use crate::labels::{Cap, DataLabel, Kind, LangAnalysisRules, classify, lookup, param_config};
use crate::summary::FuncSummary;
use crate::symbol::{FuncKey, Lang};
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum StmtKind {
Entry,
Exit,
Seq,
If,
Loop,
Break,
Continue,
Return,
Call,
}
#[derive(Debug, Clone, Copy)]
pub enum EdgeKind {
Seq, True, False, Back, }
#[derive(Debug, Clone)]
pub struct NodeInfo {
pub kind: StmtKind,
pub span: (usize, usize), pub label: Option<DataLabel>, pub defines: Option<String>, pub uses: Vec<String>, pub callee: Option<String>,
pub enclosing_func: Option<String>,
pub call_ordinal: u32,
}
#[derive(Debug, Clone)]
pub struct LocalFuncSummary {
#[allow(dead_code)] pub entry: NodeIndex,
#[allow(dead_code)] pub exit: NodeIndex,
pub source_caps: Cap,
pub sanitizer_caps: Cap,
pub sink_caps: Cap,
pub param_count: usize,
pub param_names: Vec<String>,
pub propagates_taint: bool,
pub tainted_sink_params: Vec<usize>,
pub callees: Vec<String>,
}
pub type Cfg = Graph<NodeInfo, EdgeKind>;
pub type FuncSummaries = HashMap<FuncKey, LocalFuncSummary>;
#[inline]
pub(crate) fn text_of<'a>(n: Node<'a>, code: &'a [u8]) -> Option<String> {
std::str::from_utf8(&code[n.start_byte()..n.end_byte()])
.ok()
.map(|s| s.to_string())
}
fn root_receiver_text(n: Node, lang: &str, code: &[u8]) -> Option<String> {
match lookup(lang, n.kind()) {
Kind::CallFn | Kind::CallMethod => {
let inner = n
.child_by_field_name("object")
.or_else(|| n.child_by_field_name("receiver"))
.or_else(|| n.child_by_field_name("function"));
match inner {
Some(child) => root_receiver_text(child, lang, code),
None => text_of(n, code),
}
}
_ => text_of(n, code),
}
}
fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<String> {
let mut cursor = n.walk();
for c in n.children(&mut cursor) {
match lookup(lang, c.kind()) {
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
return match lookup(lang, c.kind()) {
Kind::CallFn => c
.child_by_field_name("function")
.or_else(|| c.child_by_field_name("method"))
.or_else(|| c.child_by_field_name("name"))
.and_then(|f| text_of(f, code)),
Kind::CallMethod => {
let func = c
.child_by_field_name("method")
.or_else(|| c.child_by_field_name("name"))
.and_then(|f| text_of(f, code));
let recv = c
.child_by_field_name("object")
.or_else(|| c.child_by_field_name("receiver"))
.and_then(|f| root_receiver_text(f, lang, code));
match (recv, func) {
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
(_, Some(f)) => Some(f.to_string()),
_ => None,
}
}
Kind::CallMacro => c
.child_by_field_name("macro")
.and_then(|f| text_of(f, code)),
_ => None,
};
}
_ => {
if let Some(found) = first_call_ident(c, lang, code) {
return Some(found);
}
}
}
}
None
}
fn member_expr_text(n: Node, code: &[u8]) -> Option<String> {
match n.kind() {
"member_expression" | "attribute" | "selector_expression" => {
let obj = n
.child_by_field_name("object")
.or_else(|| n.child_by_field_name("value"))
.and_then(|o| member_expr_text(o, code))
.or_else(|| {
n.child_by_field_name("object")
.or_else(|| n.child_by_field_name("value"))
.and_then(|o| text_of(o, code))
});
let prop = n
.child_by_field_name("property")
.or_else(|| n.child_by_field_name("attribute"))
.or_else(|| n.child_by_field_name("field"))
.and_then(|p| text_of(p, code));
match (obj, prop) {
(Some(o), Some(p)) => Some(format!("{o}.{p}")),
(_, Some(p)) => Some(p),
(Some(o), _) => Some(o),
_ => text_of(n, code),
}
}
_ => text_of(n, code),
}
}
fn first_member_label(
n: Node,
lang: &str,
code: &[u8],
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
) -> Option<DataLabel> {
match n.kind() {
"member_expression" | "attribute" | "selector_expression" => {
if let Some(full) = member_expr_text(n, code) {
let mut candidate = full.as_str();
loop {
if let Some(lbl) = classify(lang, candidate, extra_labels) {
return Some(lbl);
}
match candidate.rsplit_once('.') {
Some((prefix, _)) => candidate = prefix,
None => break,
}
}
}
}
_ => {}
}
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
if let Some(lbl) = first_member_label(child, lang, code, extra_labels) {
return Some(lbl);
}
}
None
}
fn first_member_text(n: Node, code: &[u8]) -> Option<String> {
match n.kind() {
"member_expression" | "attribute" | "selector_expression" => member_expr_text(n, code),
_ => {
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
if let Some(t) = first_member_text(child, code) {
return Some(t);
}
}
None
}
}
}
fn has_call_descendant(n: Node, lang: &str) -> bool {
let mut cursor = n.walk();
for c in n.children(&mut cursor) {
match lookup(lang, c.kind()) {
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return true,
_ => {
if has_call_descendant(c, lang) {
return true;
}
}
}
}
false
}
fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
match n.kind() {
"identifier" | "field_identifier" | "property_identifier" => {
if let Some(txt) = text_of(n, code) {
out.push(txt);
}
}
"variable_name" => {
if let Some(txt) = text_of(n, code) {
out.push(txt.trim_start_matches('$').to_string());
}
}
_ => {
let mut c = n.walk();
for ch in n.children(&mut c) {
collect_idents(ch, code, out);
}
}
}
}
fn def_use(ast: Node, lang: &str, code: &[u8]) -> (Option<String>, Vec<String>) {
match lookup(lang, ast.kind()) {
Kind::CallWrapper => {
let mut defs = None;
let mut uses = Vec::new();
let def_node = ast
.child_by_field_name("pattern")
.or_else(|| ast.child_by_field_name("name"))
.or_else(|| ast.child_by_field_name("left"));
let val_node = ast
.child_by_field_name("value")
.or_else(|| ast.child_by_field_name("right"));
if def_node.is_some() || val_node.is_some() {
if let Some(pat) = def_node {
let mut tmp = Vec::<String>::new();
collect_idents(pat, code, &mut tmp);
defs = tmp.into_iter().next();
}
if let Some(val) = val_node {
collect_idents(val, code, &mut uses);
}
} else {
let mut cursor = ast.walk();
for child in ast.children(&mut cursor) {
let child_name = child
.child_by_field_name("name")
.or_else(|| child.child_by_field_name("declarator"))
.or_else(|| child.child_by_field_name("left"));
let child_value = child
.child_by_field_name("value")
.or_else(|| child.child_by_field_name("right"));
if child_value.is_some() {
if let Some(name_node) = child_name
&& defs.is_none()
{
let mut tmp = Vec::<String>::new();
collect_idents(name_node, code, &mut tmp);
defs = tmp.into_iter().next();
}
if let Some(val_node) = child_value {
collect_idents(val_node, code, &mut uses);
}
}
}
if defs.is_none() && uses.is_empty() {
collect_idents(ast, code, &mut uses);
}
}
(defs, uses)
}
Kind::Assignment => {
let mut defs = None;
let mut uses = Vec::new();
if let Some(lhs) = ast.child_by_field_name("left") {
let mut tmp = Vec::<String>::new();
collect_idents(lhs, code, &mut tmp);
defs = tmp.pop();
}
if let Some(rhs) = ast.child_by_field_name("right") {
collect_idents(rhs, code, &mut uses);
}
(defs, uses)
}
_ => {
let mut uses = Vec::new();
collect_idents(ast, code, &mut uses);
(None, uses)
}
}
}
#[allow(clippy::too_many_arguments)]
fn push_node<'a>(
g: &mut Cfg,
kind: StmtKind,
ast: Node<'a>,
lang: &str,
code: &'a [u8],
enclosing_func: Option<&str>,
call_ordinal: u32,
analysis_rules: Option<&LangAnalysisRules>,
) -> NodeIndex {
let mut text = match lookup(lang, ast.kind()) {
Kind::CallFn => ast
.child_by_field_name("function")
.or_else(|| ast.child_by_field_name("method"))
.or_else(|| ast.child_by_field_name("name"))
.and_then(|n| text_of(n, code))
.unwrap_or_default(),
Kind::CallMethod => {
let func = ast
.child_by_field_name("method")
.or_else(|| ast.child_by_field_name("name"))
.and_then(|n| text_of(n, code));
let recv = ast
.child_by_field_name("object")
.or_else(|| ast.child_by_field_name("receiver"))
.and_then(|n| root_receiver_text(n, lang, code));
match (recv, func) {
(Some(r), Some(f)) => format!("{r}.{f}"),
(_, Some(f)) => f,
_ => String::new(),
}
}
Kind::CallMacro => ast
.child_by_field_name("macro")
.and_then(|n| text_of(n, code))
.unwrap_or_default(),
_ => text_of(ast, code).unwrap_or_default(),
};
if matches!(
lookup(lang, ast.kind()),
Kind::CallWrapper | Kind::Assignment
) && let Some(inner) = first_call_ident(ast, lang, code)
{
text = inner;
}
let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
let mut label = classify(lang, &text, extra);
if label.is_none() {
let assign_node = if matches!(lookup(lang, ast.kind()), Kind::Assignment) {
Some(ast)
} else if matches!(lookup(lang, ast.kind()), Kind::CallWrapper) {
let mut cursor = ast.walk();
ast.children(&mut cursor)
.find(|c| matches!(lookup(lang, c.kind()), Kind::Assignment))
} else {
None
};
if let Some(assign) = assign_node
&& let Some(lhs) = assign.child_by_field_name("left")
{
if let Some(full) = member_expr_text(lhs, code) {
label = classify(lang, &full, extra);
}
if label.is_none()
&& let Some(prop) = lhs.child_by_field_name("property")
&& let Some(prop_text) = text_of(prop, code)
{
label = classify(lang, &prop_text, extra);
}
}
}
if label.is_none()
&& matches!(
lookup(lang, ast.kind()),
Kind::CallWrapper | Kind::Assignment
)
&& let Some(found) = first_member_label(ast, lang, code, extra)
{
label = Some(found);
if let Some(member_text) = first_member_text(ast, code) {
text = member_text;
}
}
let span = (ast.start_byte(), ast.end_byte());
let (defines, uses) = def_use(ast, lang, code);
let callee = if kind == StmtKind::Call {
Some(text.clone())
} else {
None
};
let idx = g.add_node(NodeInfo {
kind,
span,
label,
defines,
uses,
callee,
enclosing_func: enclosing_func.map(|s| s.to_string()),
call_ordinal,
});
debug!(
target: "cfg",
"node {} ← {:?} txt=`{}` span={:?} label={:?}",
idx.index(),
kind,
text,
span,
label
);
idx
}
fn extract_param_names<'a>(func_node: Node<'a>, lang: &str, code: &'a [u8]) -> Vec<String> {
let cfg = param_config(lang);
let mut names = Vec::new();
let Some(params) = func_node.child_by_field_name(cfg.params_field) else {
return names;
};
let mut cursor = params.walk();
for child in params.children(&mut cursor) {
if cfg.self_param_kinds.contains(&child.kind()) {
names.push("self".into());
continue;
}
if cfg.param_node_kinds.contains(&child.kind()) {
let mut found = false;
for &field in cfg.ident_fields {
if let Some(node) = child.child_by_field_name(field) {
let mut tmp = Vec::new();
collect_idents(node, code, &mut tmp);
if let Some(first) = tmp.into_iter().next() {
names.push(first);
found = true;
break;
}
}
}
if !found
&& child.kind() == "identifier"
&& let Some(txt) = text_of(child, code)
{
names.push(txt);
}
if !found && child.kind() == "parameter_declaration" {
let mut tmp = Vec::new();
collect_idents(child, code, &mut tmp);
if let Some(last) = tmp.pop() {
names.push(last);
}
}
continue;
}
}
names
}
fn is_configured_terminator(callee: &str, analysis_rules: Option<&LangAnalysisRules>) -> bool {
if let Some(rules) = analysis_rules {
let callee_lower = callee.to_ascii_lowercase();
rules
.terminators
.iter()
.any(|t| callee_lower == t.to_ascii_lowercase())
} else {
false
}
}
#[inline]
fn connect_all(g: &mut Cfg, froms: &[NodeIndex], to: NodeIndex, kind: EdgeKind) {
for &f in froms {
debug!(target: "cfg", "edge {} → {} ({:?})", f.index(), to.index(), kind);
g.add_edge(f, to, kind);
}
}
#[allow(clippy::too_many_arguments)]
fn build_sub<'a>(
ast: Node<'a>,
preds: &[NodeIndex], g: &mut Cfg,
lang: &str,
code: &'a [u8],
summaries: &mut FuncSummaries,
file_path: &str,
enclosing_func: Option<&str>,
call_ordinal: &mut u32,
analysis_rules: Option<&LangAnalysisRules>,
break_targets: &mut Vec<NodeIndex>,
continue_targets: &mut Vec<NodeIndex>,
) -> Vec<NodeIndex> {
match lookup(lang, ast.kind()) {
Kind::If => {
let cond = push_node(
g,
StmtKind::If,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, cond, EdgeKind::Seq);
let (then_block, else_block) = {
let field_then = ast
.child_by_field_name("consequence")
.or_else(|| ast.child_by_field_name("body"));
let field_else = ast.child_by_field_name("alternative");
if field_then.is_some() || field_else.is_some() {
(field_then, field_else)
} else {
let mut cursor = ast.walk();
let blocks: Vec<_> = ast
.children(&mut cursor)
.filter(|n| lookup(lang, n.kind()) == Kind::Block)
.collect();
(blocks.first().copied(), blocks.get(1).copied())
}
};
let then_first_node = NodeIndex::new(g.node_count());
let then_exits = if let Some(b) = then_block {
let exits = build_sub(
b,
&[cond],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
);
if then_first_node.index() < g.node_count() {
connect_all(g, &[cond], then_first_node, EdgeKind::True);
} else if let Some(&first) = exits.first() {
connect_all(g, &[cond], first, EdgeKind::True);
}
exits
} else {
vec![cond]
};
let else_first_node = NodeIndex::new(g.node_count());
let else_exits = if let Some(b) = else_block {
let exits = build_sub(
b,
&[cond],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
);
if else_first_node.index() < g.node_count() {
connect_all(g, &[cond], else_first_node, EdgeKind::False);
} else if let Some(&first) = exits.first() {
connect_all(g, &[cond], first, EdgeKind::False);
}
exits
} else {
if then_exits.is_empty() {
vec![cond]
} else {
if let Some(&first) = then_exits.first() {
connect_all(g, &[cond], first, EdgeKind::False);
}
then_exits.clone()
}
};
then_exits.into_iter().chain(else_exits).collect()
}
Kind::InfiniteLoop => {
let header = push_node(
g,
StmtKind::Loop,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, header, EdgeKind::Seq);
let mut loop_breaks = Vec::new();
let mut loop_continues = Vec::new();
let body = ast.child_by_field_name("body").expect("loop without body");
let body_exits = build_sub(
body,
&[header],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
&mut loop_breaks,
&mut loop_continues,
);
for &e in &body_exits {
connect_all(g, &[e], header, EdgeKind::Back);
}
for &c in &loop_continues {
connect_all(g, &[c], header, EdgeKind::Back);
}
if loop_breaks.is_empty() {
vec![header]
} else {
loop_breaks
}
}
Kind::While | Kind::For => {
let header = push_node(
g,
StmtKind::Loop,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, header, EdgeKind::Seq);
let mut loop_breaks = Vec::new();
let mut loop_continues = Vec::new();
let body = ast
.child_by_field_name("body")
.or_else(|| {
let mut c = ast.walk();
ast.children(&mut c)
.find(|n| lookup(lang, n.kind()) == Kind::Block)
})
.expect("loop without body");
let body_exits = build_sub(
body,
&[header],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
&mut loop_breaks,
&mut loop_continues,
);
for &e in &body_exits {
connect_all(g, &[e], header, EdgeKind::Back);
}
for &c in &loop_continues {
connect_all(g, &[c], header, EdgeKind::Back);
}
let mut exits = vec![header];
exits.extend(loop_breaks);
exits
}
Kind::Return => {
if has_call_descendant(ast, lang) {
let ord = *call_ordinal;
*call_ordinal += 1;
let call_idx = push_node(
g,
StmtKind::Call,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
connect_all(g, preds, call_idx, EdgeKind::Seq);
let ret = push_node(
g,
StmtKind::Return,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, &[call_idx], ret, EdgeKind::Seq);
Vec::new()
} else {
let ret = push_node(
g,
StmtKind::Return,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, ret, EdgeKind::Seq);
Vec::new() }
}
Kind::Break => {
let brk = push_node(
g,
StmtKind::Break,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, brk, EdgeKind::Seq);
break_targets.push(brk);
Vec::new()
}
Kind::Continue => {
let cont = push_node(
g,
StmtKind::Continue,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, cont, EdgeKind::Seq);
continue_targets.push(cont);
Vec::new()
}
Kind::SourceFile | Kind::Block => {
let mut cursor = ast.walk();
let mut frontier = preds.to_vec();
let mut last_live_frontier = preds.to_vec();
let mut prev_was_preproc = false;
for child in ast.children(&mut cursor) {
let child_is_fn = lookup(lang, child.kind()) == Kind::Function;
let child_preds = if frontier.is_empty() && (child_is_fn || prev_was_preproc) {
last_live_frontier.clone()
} else {
frontier.clone()
};
let child_exits = build_sub(
child,
&child_preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
);
let is_preproc = child.kind().starts_with("preproc_");
if !child_exits.is_empty() {
last_live_frontier = child_exits.clone();
}
frontier = child_exits;
prev_was_preproc = is_preproc;
}
frontier
}
Kind::Function => {
let fn_name = ast
.child_by_field_name("name")
.or_else(|| ast.child_by_field_name("declarator"))
.and_then(|n| {
let mut tmp = Vec::new();
collect_idents(n, code, &mut tmp);
tmp.into_iter().next()
})
.unwrap_or_else(|| "<anon>".to_string());
let entry_idx = push_node(
g,
StmtKind::Seq,
ast,
lang,
code,
Some(&fn_name),
0,
analysis_rules,
);
connect_all(g, preds, entry_idx, EdgeKind::Seq);
let param_names = extract_param_names(ast, lang, code);
let param_count = param_names.len();
let fn_first_node: NodeIndex = NodeIndex::new(g.node_count());
let body = ast.child_by_field_name("body").expect("fn w/o body");
let mut fn_call_ordinal: u32 = 0;
let mut fn_breaks = Vec::new();
let mut fn_continues = Vec::new();
let body_exits = build_sub(
body,
&[entry_idx],
g,
lang,
code,
summaries,
file_path,
Some(&fn_name),
&mut fn_call_ordinal,
analysis_rules,
&mut fn_breaks,
&mut fn_continues,
);
let mut var_taint = HashMap::<String, Cap>::new();
let mut node_bits = HashMap::<NodeIndex, Cap>::new();
let mut fn_src_bits = Cap::empty();
let mut fn_sani_bits = Cap::empty();
let mut fn_sink_bits = Cap::empty();
let mut callees = Vec::<String>::new();
let mut tainted_sink_params: Vec<usize> = Vec::new();
let param_set: HashSet<&str> = param_names.iter().map(|s| s.as_str()).collect();
let fn_node_range = entry_idx.index()..g.node_count();
for raw in fn_node_range {
let idx = NodeIndex::new(raw);
let info = &g[idx];
if let Some(callee) = &info.callee
&& !callees.contains(callee)
{
callees.push(callee.clone());
}
if let Some(DataLabel::Source(bits)) = info.label {
fn_src_bits |= bits;
}
if let Some(DataLabel::Sanitizer(bits)) = info.label {
fn_sani_bits |= bits;
}
if let Some(DataLabel::Sink(bits)) = info.label {
fn_sink_bits |= bits;
for u in &info.uses {
if let Some(pos) = param_names.iter().position(|p| p == u)
&& !tainted_sink_params.contains(&pos)
{
tainted_sink_params.push(pos);
}
}
}
let mut in_bits = Cap::empty();
for u in &info.uses {
if let Some(b) = var_taint.get(u) {
in_bits |= *b;
}
}
let mut out_bits = in_bits;
if let Some(lab) = &info.label {
match *lab {
DataLabel::Source(bits) => out_bits |= bits,
DataLabel::Sanitizer(bits) => out_bits &= !bits,
DataLabel::Sink(_) => { }
}
}
if let Some(def) = &info.defines {
if out_bits.is_empty() {
var_taint.remove(def);
} else {
var_taint.insert(def.clone(), out_bits);
}
}
node_bits.insert(idx, out_bits);
}
for (&idx, &bits) in &node_bits {
if g[idx].kind == StmtKind::Return {
fn_src_bits |= bits;
}
}
for &pred in &body_exits {
if let Some(&bits) = node_bits.get(&pred) {
fn_src_bits |= bits;
}
}
let propagates = {
let mut prop = false;
for &idx in node_bits.keys() {
if g[idx].kind == StmtKind::Return {
for u in &g[idx].uses {
if param_set.contains(u.as_str()) {
prop = true;
}
if let Some(bits) = var_taint.get(u)
&& !bits.is_empty()
&& param_names.iter().any(|p| var_taint.contains_key(p))
{
prop = true;
}
}
}
}
for &exit_pred in &body_exits {
let info = &g[exit_pred];
for u in &info.uses {
if param_set.contains(u.as_str()) {
prop = true;
}
}
if let Some(def) = &info.defines
&& param_set.contains(def.as_str())
{
prop = true;
}
}
prop
};
tainted_sink_params.sort_unstable();
tainted_sink_params.dedup();
let exit_idx = g.add_node(NodeInfo {
kind: StmtKind::Return,
span: (ast.start_byte(), ast.end_byte()),
label: None,
defines: None,
uses: Vec::new(),
callee: None,
enclosing_func: Some(fn_name.clone()),
call_ordinal: 0,
});
for &b in &body_exits {
connect_all(g, &[b], exit_idx, EdgeKind::Seq);
}
for raw in fn_first_node.index()..g.node_count() {
let idx = NodeIndex::new(raw);
let info = &g[idx];
if info.kind == StmtKind::Return
&& idx != exit_idx
&& !g.contains_edge(idx, exit_idx)
{
connect_all(g, &[idx], exit_idx, EdgeKind::Seq);
}
}
let key = FuncKey {
lang: Lang::from_slug(lang).unwrap_or(Lang::Rust),
namespace: file_path.to_owned(),
name: fn_name.clone(),
arity: Some(param_count),
};
summaries.insert(
key,
LocalFuncSummary {
entry: entry_idx,
exit: exit_idx,
source_caps: fn_src_bits,
sanitizer_caps: fn_sani_bits,
sink_caps: fn_sink_bits,
param_count,
param_names,
propagates_taint: propagates,
tainted_sink_params,
callees,
},
);
vec![exit_idx]
}
Kind::CallWrapper => {
let mut cursor = ast.walk();
if let Some(inner) = ast.children(&mut cursor).find(|c| {
matches!(
lookup(lang, c.kind()),
Kind::InfiniteLoop | Kind::While | Kind::For | Kind::If
)
}) {
return build_sub(
inner,
preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
);
}
let has_call = has_call_descendant(ast, lang);
let kind = if has_call {
StmtKind::Call
} else {
StmtKind::Seq
};
let ord = if kind == StmtKind::Call {
let o = *call_ordinal;
*call_ordinal += 1;
o
} else {
0
};
let node = push_node(
g,
kind,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
connect_all(g, preds, node, EdgeKind::Seq);
if kind == StmtKind::Call
&& let Some(callee) = &g[node].callee
&& is_configured_terminator(callee, analysis_rules)
{
return Vec::new();
}
vec![node]
}
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
let ord = *call_ordinal;
*call_ordinal += 1;
let n = push_node(
g,
StmtKind::Call,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
connect_all(g, preds, n, EdgeKind::Seq);
if let Some(callee) = &g[n].callee
&& is_configured_terminator(callee, analysis_rules)
{
return Vec::new();
}
vec![n]
}
Kind::Assignment => {
let has_call = has_call_descendant(ast, lang);
let kind = if has_call {
StmtKind::Call
} else {
StmtKind::Seq
};
let ord = if kind == StmtKind::Call {
let o = *call_ordinal;
*call_ordinal += 1;
o
} else {
0
};
let n = push_node(
g,
kind,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
connect_all(g, preds, n, EdgeKind::Seq);
vec![n]
}
Kind::Trivia => preds.to_vec(),
_ => {
let n = push_node(
g,
StmtKind::Seq,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, n, EdgeKind::Seq);
vec![n]
}
}
}
pub(crate) fn build_cfg<'a>(
tree: &'a Tree,
code: &'a [u8],
lang: &str,
file_path: &str,
analysis_rules: Option<&LangAnalysisRules>,
) -> (Cfg, NodeIndex, FuncSummaries) {
debug!(target: "cfg", "Building CFG for {:?}", tree.root_node());
let mut g: Cfg = Graph::with_capacity(128, 256);
let mut summaries = FuncSummaries::new();
let entry = g.add_node(NodeInfo {
kind: StmtKind::Entry,
span: (0, 0),
label: None,
defines: None,
uses: Vec::new(),
callee: None,
enclosing_func: None,
call_ordinal: 0,
});
let exit = g.add_node(NodeInfo {
kind: StmtKind::Exit,
span: (code.len(), code.len()),
label: None,
defines: None,
uses: Vec::new(),
callee: None,
enclosing_func: None,
call_ordinal: 0,
});
let mut top_ordinal: u32 = 0;
let mut top_breaks = Vec::new();
let mut top_continues = Vec::new();
let exits = build_sub(
tree.root_node(),
&[entry],
&mut g,
lang,
code,
&mut summaries,
file_path,
None,
&mut top_ordinal,
analysis_rules,
&mut top_breaks,
&mut top_continues,
);
debug!(target: "cfg", "exits: {:?}", exits);
for e in exits {
connect_all(&mut g, &[e], exit, EdgeKind::Seq);
}
debug!(target: "cfg", "CFG DONE — nodes: {}, edges: {}", g.node_count(), g.edge_count());
if cfg!(debug_assertions) {
for idx in g.node_indices() {
debug!(target: "cfg", " node {:>3}: {:?}", idx.index(), g[idx]);
}
for e in g.edge_references() {
debug!(
target: "cfg",
" edge {:>3} → {:<3} ({:?})",
e.source().index(),
e.target().index(),
e.weight()
);
}
let mut reachable: HashSet<NodeIndex> = Default::default();
let mut bfs = Bfs::new(&g, entry);
while let Some(nx) = bfs.next(&g) {
reachable.insert(nx);
}
debug!(
target: "cfg",
"reachable nodes: {}/{}",
reachable.len(),
g.node_count()
);
if reachable.len() != g.node_count() {
let unreachable: Vec<_> = g
.node_indices()
.filter(|i| !reachable.contains(i))
.collect();
debug!(target: "cfg", "‼︎ unreachable nodes: {:?}", unreachable);
}
let doms: Dominators<_> = simple_fast(&g, entry);
debug!(target: "cfg", "dominator tree computed (len = {:?})", doms);
}
(g, entry, summaries)
}
pub(crate) fn export_summaries(
summaries: &FuncSummaries,
file_path: &str,
lang: &str,
) -> Vec<FuncSummary> {
summaries
.iter()
.map(|(key, local)| FuncSummary {
name: key.name.clone(),
file_path: file_path.to_owned(),
lang: lang.to_owned(),
param_count: local.param_count,
param_names: local.param_names.clone(),
source_caps: local.source_caps.bits(),
sanitizer_caps: local.sanitizer_caps.bits(),
sink_caps: local.sink_caps.bits(),
propagates_taint: local.propagates_taint,
tainted_sink_params: local.tainted_sink_params.clone(),
callees: local.callees.clone(),
})
.collect()
}