#![allow(
clippy::collapsible_if,
clippy::let_and_return,
clippy::unnecessary_map_or
)]
use petgraph::algo::dominators::{Dominators, simple_fast};
use petgraph::prelude::*;
use tracing::{debug, warn};
use tree_sitter::{Node, Tree};
use crate::labels::{
Cap, DataLabel, Kind, LangAnalysisRules, classify, classify_all, classify_gated_sink, lookup,
};
use crate::summary::FuncSummary;
use crate::symbol::{FuncKey, Lang};
use crate::utils::snippet::truncate_at_char_boundary;
use smallvec::SmallVec;
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
mod blocks;
mod conditions;
mod decorators;
mod dto;
mod helpers;
mod hierarchy;
mod imports;
mod literals;
mod params;
use blocks::{build_begin_rescue, build_switch, build_try};
use helpers::{
collect_nested_function_nodes, derive_anon_fn_name_from_context, find_classifiable_inner_call,
first_call_ident_with_span, first_member_label, first_member_text, is_raii_factory,
is_subscript_kind, root_member_receiver, subscript_components, subscript_lhs_node,
};
use conditions::{
build_condition_chain, build_ternary_diamond, classify_ternary_lhs,
detect_rust_let_match_guard, emit_rust_match_guard_if, find_ternary_rhs_wrapper,
is_boolean_operator, unwrap_parens,
};
use decorators::extract_auth_decorators;
pub(crate) use helpers::{
collect_idents, collect_idents_with_paths, find_constructor_type_child, first_call_ident,
has_call_descendant, member_expr_text, root_receiver_text, text_of,
};
use imports::{extract_import_bindings, extract_promisify_aliases};
#[cfg(test)]
use literals::has_sql_placeholders;
use literals::{
arg0_kind_and_interpolation, call_ident_of, def_use, detect_go_replace_call_sanitizer,
detect_rust_replace_chain_sanitizer, extract_arg_callees, extract_arg_string_literals,
extract_arg_uses, extract_const_keyword_arg, extract_const_macro_arg, extract_const_string_arg,
extract_destination_field_pairs, extract_destination_kwarg_pairs, extract_kwargs,
extract_literal_rhs, extract_object_arg_property, extract_shell_array_payload_idents,
find_call_node, find_call_node_deep, find_chained_inner_call, has_keyword_arg,
has_object_arg_property, has_only_literal_args, is_parameterized_query_call,
java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
js_chain_outer_method_for_inner, ruby_chain_arg0_for_method, walk_chain_inner_call_args,
};
use params::{
compute_container_and_kind, extract_param_meta, inject_framework_param_sources,
is_configured_terminator,
};
pub fn extract_param_meta_for_test<'a>(
func_node: tree_sitter::Node<'a>,
lang: &str,
code: &'a [u8],
) -> Vec<(String, Option<crate::ssa::type_facts::TypeKind>)> {
extract_param_meta(func_node, lang, code)
.into_iter()
.map(|(name, ty, _siblings)| (name, ty))
.collect()
}
pub fn extract_param_meta_with_destructured_for_test<'a>(
func_node: tree_sitter::Node<'a>,
lang: &str,
code: &'a [u8],
) -> Vec<(
String,
Option<crate::ssa::type_facts::TypeKind>,
Vec<String>,
)> {
extract_param_meta(func_node, lang, code)
}
pub fn populate_dto_classes_for_test(root: tree_sitter::Node<'_>, lang: &str, code: &[u8]) {
DTO_CLASSES.with(|cell| {
*cell.borrow_mut() = dto::collect_dto_classes(root, lang, code);
});
}
pub fn clear_dto_classes_for_test() {
DTO_CLASSES.with(|cell| cell.borrow_mut().clear());
}
thread_local! {
static FN_DFS_INDICES: RefCell<HashMap<usize, u32>> = RefCell::new(HashMap::new());
pub(crate) static DTO_CLASSES: RefCell<HashMap<String, crate::ssa::type_facts::DtoFields>>
= RefCell::new(HashMap::new());
pub(crate) static TYPE_ALIAS_LC: RefCell<std::collections::HashSet<String>>
= RefCell::new(std::collections::HashSet::new());
}
fn populate_fn_dfs_indices(tree: &Tree, lang: &str) {
fn walk(n: Node, lang: &str, counter: &mut u32, map: &mut HashMap<usize, u32>) {
if lookup(lang, n.kind()) == Kind::Function {
map.insert(n.start_byte(), *counter);
*counter += 1;
}
let mut c = n.walk();
for child in n.children(&mut c) {
walk(child, lang, counter, map);
}
}
let mut map = HashMap::new();
let mut counter: u32 = 0;
walk(tree.root_node(), lang, &mut counter, &mut map);
FN_DFS_INDICES.with(|cell| *cell.borrow_mut() = map);
}
fn clear_fn_dfs_indices() {
FN_DFS_INDICES.with(|cell| cell.borrow_mut().clear());
}
fn fn_dfs_index(start_byte: usize) -> Option<u32> {
FN_DFS_INDICES.with(|cell| cell.borrow().get(&start_byte).copied())
}
pub(crate) fn anon_fn_name(start_byte: usize) -> String {
match fn_dfs_index(start_byte) {
Some(idx) => format!("<anon#{idx}>"),
None => format!("<anon@{start_byte}>"),
}
}
pub(crate) fn is_anon_fn_name(name: &str) -> bool {
name.starts_with("<anon#") || name.starts_with("<anon@")
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
pub enum StmtKind {
Entry,
Exit,
#[default]
Seq,
If,
Loop,
Break,
Continue,
Return,
Throw,
Call,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EdgeKind {
Seq, True, False, Back, Exception, }
pub(super) const MAX_COND_VARS: usize = 8;
pub(super) const MAX_CONDITION_TEXT_LEN: usize = 256;
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum BinOp {
Add,
Sub,
Mul,
Div,
Mod,
BitAnd,
BitOr,
BitXor,
LeftShift,
RightShift,
Eq,
NotEq,
Lt,
LtEq,
Gt,
GtEq,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct CallMeta {
pub callee: Option<String>,
#[doc(hidden)]
#[serde(default)]
pub callee_text: Option<String>,
pub outer_callee: Option<String>,
#[serde(default)]
pub callee_span: Option<(usize, usize)>,
pub call_ordinal: u32,
pub arg_uses: Vec<Vec<String>>,
pub receiver: Option<String>,
pub sink_payload_args: Option<Vec<usize>>,
pub kwargs: Vec<(String, Vec<String>)>,
pub arg_string_literals: Vec<Option<String>>,
#[serde(default)]
pub destination_uses: Option<Vec<String>>,
#[serde(default)]
pub gate_filters: Vec<GateFilter>,
#[serde(default)]
pub is_constructor: bool,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct GateFilter {
pub label_caps: crate::labels::Cap,
pub payload_args: Vec<usize>,
pub destination_uses: Option<Vec<String>>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub destination_fields: Vec<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct TaintMeta {
pub labels: SmallVec<[DataLabel; 2]>, pub const_text: Option<String>,
pub defines: Option<String>, pub uses: Vec<String>, pub extra_defines: Vec<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct AstMeta {
pub span: (usize, usize), pub enclosing_func: Option<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct NodeInfo {
pub kind: StmtKind,
pub call: CallMeta,
pub taint: TaintMeta,
pub ast: AstMeta,
pub condition_text: Option<String>,
pub condition_vars: Vec<String>,
pub condition_negated: bool,
pub all_args_literal: bool,
pub catch_param: bool,
pub arg_callees: Vec<Option<String>>,
pub cast_target_type: Option<String>,
pub bin_op: Option<BinOp>,
pub bin_op_const: Option<i64>,
pub managed_resource: bool,
pub in_defer: bool,
pub parameterized_query: bool,
pub string_prefix: Option<String>,
pub is_eq_with_const: bool,
pub is_numeric_length_access: bool,
pub member_field: Option<String>,
pub rhs_is_function_literal: bool,
}
impl NodeInfo {
#[inline]
pub fn classification_span(&self) -> (usize, usize) {
self.call.callee_span.unwrap_or(self.ast.span)
}
}
#[derive(Debug, Clone)]
pub struct LocalFuncSummary {
#[allow(dead_code)] pub entry: NodeIndex,
#[allow(dead_code)] pub exit: NodeIndex,
pub source_caps: Cap,
pub sanitizer_caps: Cap,
pub sink_caps: Cap,
pub param_count: usize,
pub param_names: Vec<String>,
pub propagating_params: Vec<usize>,
pub tainted_sink_params: Vec<usize>,
pub callees: Vec<crate::summary::CalleeSite>,
pub container: String,
pub disambig: Option<u32>,
pub kind: crate::symbol::FuncKind,
}
pub type Cfg = Graph<NodeInfo, EdgeKind>;
pub type FuncSummaries = HashMap<FuncKey, LocalFuncSummary>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct BodyId(pub u32);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BodyKind {
TopLevel,
NamedFunction,
AnonymousFunction,
}
#[derive(Debug, Clone)]
pub struct BodyMeta {
pub id: BodyId,
pub kind: BodyKind,
pub name: Option<String>,
pub params: Vec<String>,
pub param_types: Vec<Option<crate::ssa::type_facts::TypeKind>>,
pub param_destructured_fields: Vec<Vec<String>>,
pub param_count: usize,
pub span: (usize, usize),
pub parent_body_id: Option<BodyId>,
pub func_key: Option<FuncKey>,
pub auth_decorators: Vec<String>,
}
#[derive(Debug)]
pub struct BodyCfg {
pub meta: BodyMeta,
pub graph: Cfg,
pub entry: NodeIndex,
pub exit: NodeIndex,
}
#[derive(Debug, Clone)]
pub struct ImportBinding {
pub original: String,
pub module_path: Option<String>,
}
pub type ImportBindings = HashMap<String, ImportBinding>;
#[derive(Debug, Clone)]
pub struct PromisifyAlias {
pub wrapped: String,
}
pub type PromisifyAliases = HashMap<String, PromisifyAlias>;
#[derive(Debug)]
pub struct FileCfg {
pub bodies: Vec<BodyCfg>,
pub summaries: FuncSummaries,
pub import_bindings: ImportBindings,
pub promisify_aliases: PromisifyAliases,
pub hierarchy_edges: Vec<(String, String)>,
}
impl FileCfg {
pub fn toplevel(&self) -> &BodyCfg {
&self.bodies[0]
}
pub fn body(&self, id: BodyId) -> &BodyCfg {
&self.bodies[id.0 as usize]
}
pub fn function_bodies(&self) -> &[BodyCfg] {
&self.bodies[1..]
}
pub fn first_body(&self) -> &BodyCfg {
if self.bodies.len() > 1 {
&self.bodies[1]
} else {
&self.bodies[0]
}
}
pub fn total_node_count(&self) -> usize {
self.bodies.iter().map(|b| b.graph.node_count()).sum()
}
}
fn make_empty_node_info(
kind: StmtKind,
span: (usize, usize),
enclosing_func: Option<&str>,
) -> NodeInfo {
NodeInfo {
kind,
ast: AstMeta {
span,
enclosing_func: enclosing_func.map(|s| s.to_owned()),
},
..Default::default()
}
}
fn create_body_graph(
span_start: usize,
span_end: usize,
enclosing_func: Option<&str>,
) -> (Cfg, NodeIndex, NodeIndex) {
let mut g: Cfg = Graph::with_capacity(32, 64);
let entry = g.add_node(make_empty_node_info(
StmtKind::Entry,
(span_start, span_start),
enclosing_func,
));
let exit = g.add_node(make_empty_node_info(
StmtKind::Exit,
(span_end, span_end),
enclosing_func,
));
(g, entry, exit)
}
fn extract_condition_raw<'a>(
ast: Node<'a>,
lang: &str,
code: &'a [u8],
) -> (Option<String>, Vec<String>, bool) {
let cond_node = ast.child_by_field_name("condition").or_else(|| {
let mut cursor = ast.walk();
ast.children(&mut cursor).find(|c| {
let k = c.kind();
!matches!(lookup(lang, k), Kind::Block | Kind::Trivia)
&& k != "if"
&& k != "else"
&& k != "let"
&& k != "{"
&& k != "}"
&& k != "("
&& k != ")"
})
});
let Some(cond) = cond_node else {
return (None, Vec::new(), false);
};
let (inner, negated) = detect_negation(cond, ast, lang);
let mut vars = Vec::new();
collect_idents(inner, code, &mut vars);
vars.sort();
vars.dedup();
vars.truncate(MAX_COND_VARS);
let text = text_of(cond, code)
.map(|t| truncate_at_char_boundary(&t, MAX_CONDITION_TEXT_LEN).to_string());
(text, vars, negated)
}
pub(super) fn detect_negation<'a>(
cond: Node<'a>,
_if_ast: Node<'a>,
_lang: &str,
) -> (Node<'a>, bool) {
let cond = if cond.kind() == "parenthesized_expression" {
cond.child_by_field_name("expression")
.or_else(|| {
let mut cursor = cond.walk();
cond.children(&mut cursor)
.find(|c| c.kind() != "(" && c.kind() != ")")
})
.unwrap_or(cond)
} else {
cond
};
let is_negation_wrapper = matches!(
cond.kind(),
"unary_expression" | "not_operator" | "prefix_unary_expression" | "unary_not"
);
if is_negation_wrapper {
let has_not = cond
.child(0)
.is_some_and(|c| c.kind() == "!" || c.kind() == "not");
if has_not {
let inner = cond
.child_by_field_name("argument")
.or_else(|| cond.child_by_field_name("operand"))
.or_else(|| {
let mut cursor = cond.walk();
cond.children(&mut cursor)
.filter(|c| c.kind() != "!" && c.kind() != "not")
.last()
})
.unwrap_or(cond);
return (inner, true);
}
}
(cond, false)
}
fn extract_bin_op(ast: Node, lang: &str) -> Option<BinOp> {
let bin_expr = find_single_binary_expr(ast, lang)?;
let mut cursor = bin_expr.walk();
for child in bin_expr.children(&mut cursor) {
if child.is_named() {
continue; }
let kind = child.kind();
return match kind {
"+" => Some(BinOp::Add),
"-" => Some(BinOp::Sub),
"*" => Some(BinOp::Mul),
"/" => Some(BinOp::Div),
"%" => Some(BinOp::Mod),
"&" => Some(BinOp::BitAnd),
"|" => Some(BinOp::BitOr),
"^" => Some(BinOp::BitXor),
"<<" => Some(BinOp::LeftShift),
">>" => Some(BinOp::RightShift),
"==" | "===" => Some(BinOp::Eq),
"!=" | "!==" => Some(BinOp::NotEq),
"<" => Some(BinOp::Lt),
"<=" => Some(BinOp::LtEq),
">" => Some(BinOp::Gt),
">=" => Some(BinOp::GtEq),
_ => None, };
}
None
}
fn assignment_rhs<'a>(ast: Node<'a>) -> Option<Node<'a>> {
match ast.kind() {
"variable_declarator" | "assignment_expression" | "assignment" => ast
.child_by_field_name("value")
.or_else(|| ast.child_by_field_name("right")),
"variable_declaration" | "lexical_declaration" => {
let mut w = ast.walk();
ast.named_children(&mut w)
.find(|c| c.kind() == "variable_declarator")
.and_then(|d| {
d.child_by_field_name("value")
.or_else(|| d.child_by_field_name("right"))
})
}
"expression_statement" => {
let mut w = ast.walk();
ast.named_children(&mut w).find_map(|c| match c.kind() {
"assignment_expression" | "assignment" => c
.child_by_field_name("right")
.or_else(|| c.child_by_field_name("value")),
_ => None,
})
}
_ => None,
}
}
fn extract_template_prefix(ast: Node, lang: &str, code: &[u8]) -> Option<String> {
if !matches!(lang, "javascript" | "typescript") {
return None;
}
if let Some(rhs) = assignment_rhs(ast) {
if let Some(p) = prefix_of_expression(rhs, code) {
return Some(p);
}
}
if matches!(ast.kind(), "call_expression" | "call" | "new_expression") {
let args = ast
.child_by_field_name("arguments")
.or_else(|| ast.child_by_field_name("argument_list"));
if let Some(args_node) = args {
let mut w = args_node.walk();
if let Some(first) = args_node.named_children(&mut w).next() {
if let Some(p) = prefix_of_expression(first, code) {
return Some(p);
}
}
}
}
None
}
fn prefix_of_expression(node: Node, code: &[u8]) -> Option<String> {
let mut cur = node;
for _ in 0..6 {
match cur.kind() {
"parenthesized_expression" => {
cur = cur.named_child(0)?;
}
"as_expression" | "type_assertion" | "satisfies_expression" | "non_null_expression" => {
cur = cur
.child_by_field_name("expression")
.or_else(|| cur.named_child(0))?;
}
"await_expression" | "yield_expression" => {
cur = cur.named_child(0)?;
}
"call_expression" | "call" | "new_expression" => {
let args = cur
.child_by_field_name("arguments")
.or_else(|| cur.child_by_field_name("argument_list"))?;
let mut w = args.walk();
cur = args.named_children(&mut w).next()?;
}
_ => break,
}
}
if cur.kind() == "template_string" {
let mut w = cur.walk();
let first_child = cur.named_children(&mut w).next()?;
if first_child.kind() == "string_fragment" {
let frag = text_of(first_child, code)?;
if !frag.is_empty() {
return Some(frag);
}
}
return None;
}
if cur.kind() == "binary_expression" {
let mut w2 = cur.walk();
let mut ops = cur.children(&mut w2).filter(|c| !c.is_named());
if !ops.any(|c| c.kind() == "+") {
return None;
}
let left = cur.named_child(0)?;
if matches!(left.kind(), "string" | "string_fragment") {
let raw = text_of(left, code)?;
let trimmed = if (raw.starts_with('"') && raw.ends_with('"'))
|| (raw.starts_with('\'') && raw.ends_with('\''))
|| (raw.starts_with('`') && raw.ends_with('`'))
{
if raw.len() >= 2 {
raw[1..raw.len() - 1].to_string()
} else {
raw
}
} else {
raw
};
if !trimmed.is_empty() {
return Some(trimmed);
}
}
}
None
}
fn extract_bin_op_const(ast: Node, lang: &str, code: &[u8]) -> Option<i64> {
let bin_expr = find_single_binary_expr(ast, lang)?;
let left = bin_expr.named_child(0)?;
let right = bin_expr.named_child(1)?;
fn try_parse_number(n: Node, code: &[u8]) -> Option<i64> {
let kind = n.kind();
if kind == "number"
|| kind == "integer"
|| kind == "integer_literal"
|| kind == "number_literal"
|| kind == "float"
{
let text = std::str::from_utf8(&code[n.byte_range()]).ok()?.trim();
if let Ok(v) = text.parse::<i64>() {
return Some(v);
}
if let Some(hex) = text.strip_prefix("0x").or_else(|| text.strip_prefix("0X")) {
return i64::from_str_radix(hex, 16).ok();
}
if let Some(oct) = text.strip_prefix("0o").or_else(|| text.strip_prefix("0O")) {
return i64::from_str_radix(oct, 8).ok();
}
if let Some(bin) = text.strip_prefix("0b").or_else(|| text.strip_prefix("0B")) {
return i64::from_str_radix(bin, 2).ok();
}
None
} else {
None
}
}
try_parse_number(left, code).or_else(|| try_parse_number(right, code))
}
pub(super) fn detect_eq_with_const(ast: Node, lang: &str) -> bool {
let target = assignment_rhs(ast).unwrap_or(ast);
is_boolean_eq_const_tree(target, lang)
}
fn is_boolean_eq_const_tree(node: Node, lang: &str) -> bool {
match node.kind() {
"parenthesized_expression" => node
.named_child(0)
.is_some_and(|c| is_boolean_eq_const_tree(c, lang)),
"unary_expression" | "not_operator" => {
let mut w = node.walk();
let mut op_is_not = false;
for child in node.children(&mut w) {
if !child.is_named() && matches!(child.kind(), "!" | "not") {
op_is_not = true;
break;
}
}
if !op_is_not {
return false;
}
node.named_child(0)
.is_some_and(|c| is_boolean_eq_const_tree(c, lang))
}
"boolean_operator" => {
let l = node.named_child(0);
let r = node.named_child(1);
l.is_some_and(|n| is_boolean_eq_const_tree(n, lang))
&& r.is_some_and(|n| is_boolean_eq_const_tree(n, lang))
}
_ => {
if !is_binary_expr_kind(node.kind(), lang) {
return false;
}
let op = binary_operator_token(node);
match op.as_deref() {
Some("&&") | Some("||") | Some("and") | Some("or") => {
node.named_child(0)
.is_some_and(|l| is_boolean_eq_const_tree(l, lang))
&& node
.named_child(1)
.is_some_and(|r| is_boolean_eq_const_tree(r, lang))
}
Some("==") | Some("===") | Some("!=") | Some("!==") => {
let Some(left) = node.named_child(0) else {
return false;
};
let Some(right) = node.named_child(1) else {
return false;
};
let left_lit = is_equality_literal_kind(left.kind());
let right_lit = is_equality_literal_kind(right.kind());
left_lit ^ right_lit
}
_ => false,
}
}
}
}
fn binary_operator_token(node: Node) -> Option<String> {
let mut w = node.walk();
for child in node.children(&mut w) {
if !child.is_named() {
return Some(child.kind().to_string());
}
}
None
}
fn is_numeric_length_property(name: &str) -> bool {
matches!(name, "length" | "size" | "byteLength" | "count" | "len")
}
fn detect_member_field_assignment(ast: Node, code: &[u8]) -> Option<String> {
let target = ast
.child_by_field_name("value")
.or_else(|| ast.child_by_field_name("right"))
.or_else(|| {
let mut cursor = ast.walk();
ast.named_children(&mut cursor)
.find(|c| matches!(c.kind(), "variable_declarator" | "init_declarator"))
.and_then(|d| {
d.child_by_field_name("value")
.or_else(|| d.child_by_field_name("initializer"))
})
})
.unwrap_or(ast);
extract_member_field_name(target, code)
}
fn extract_member_field_name(node: Node, code: &[u8]) -> Option<String> {
match node.kind() {
"member_expression"
| "member_access_expression"
| "field_expression"
| "selector_expression"
| "attribute" => {
let prop = node
.child_by_field_name("property")
.or_else(|| node.child_by_field_name("attribute"))
.or_else(|| node.child_by_field_name("field"))
.or_else(|| node.child_by_field_name("name"))?;
let text = text_of(prop, code)?;
if text.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') && !text.is_empty() {
Some(text)
} else {
None
}
}
_ => None,
}
}
fn detect_numeric_length_access(ast: Node, _lang: &str, code: &[u8]) -> bool {
let target = ast
.child_by_field_name("value")
.or_else(|| ast.child_by_field_name("right"))
.or_else(|| {
let mut cursor = ast.walk();
ast.named_children(&mut cursor)
.find(|c| matches!(c.kind(), "variable_declarator" | "init_declarator"))
.and_then(|d| {
d.child_by_field_name("value")
.or_else(|| d.child_by_field_name("initializer"))
})
})
.unwrap_or(ast);
is_numeric_length_access_expr(target, code)
}
fn is_numeric_length_access_expr(node: Node, code: &[u8]) -> bool {
match node.kind() {
"member_expression"
| "attribute"
| "selector_expression"
| "field_expression"
| "member_access_expression" => {
let prop = node
.child_by_field_name("property")
.or_else(|| node.child_by_field_name("attribute"))
.or_else(|| node.child_by_field_name("field"))
.or_else(|| node.child_by_field_name("name"));
prop.and_then(|p| text_of(p, code))
.is_some_and(|t| is_numeric_length_property(&t))
}
"call_expression" | "method_invocation" | "method_call_expression" | "call" => {
let args = node
.child_by_field_name("arguments")
.or_else(|| node.child_by_field_name("argument_list"));
let arity = args
.map(|a| {
let mut c = a.walk();
a.named_children(&mut c).count()
})
.unwrap_or(0);
if arity != 0 {
return false;
}
let callee = node
.child_by_field_name("function")
.or_else(|| node.child_by_field_name("name"))
.or_else(|| node.child_by_field_name("method"));
match callee {
Some(c) => is_numeric_length_access_expr(c, code),
None => false,
}
}
_ => false,
}
}
fn is_equality_literal_kind(kind: &str) -> bool {
matches!(
kind,
"string"
| "string_literal"
| "interpreted_string_literal"
| "raw_string_literal"
| "encapsed_string"
| "number"
| "integer"
| "float"
| "integer_literal"
| "float_literal"
| "number_literal"
| "decimal_integer_literal"
| "hex_integer_literal"
| "octal_integer_literal"
| "binary_integer_literal"
| "decimal_floating_point_literal"
| "hex_floating_point_literal"
| "null"
| "null_literal"
| "nil"
| "none"
| "undefined"
| "true"
| "false"
| "boolean_literal"
)
}
fn find_single_binary_expr<'a>(ast: Node<'a>, lang: &str) -> Option<Node<'a>> {
let ast_kind = ast.kind();
if is_binary_expr_kind(ast_kind, lang) {
let named_count = ast.named_child_count();
if named_count == 2 {
let left = ast.named_child(0);
let right = ast.named_child(1);
let left_is_bin = left.is_some_and(|n| is_binary_expr_kind(n.kind(), lang));
let right_is_bin = right.is_some_and(|n| is_binary_expr_kind(n.kind(), lang));
if !left_is_bin && !right_is_bin {
return Some(ast);
}
}
return None; }
let wrapper_kinds = [
"expression_statement",
"assignment_expression",
"assignment",
"variable_declaration",
"variable_declarator",
"short_var_declaration",
"lexical_declaration",
];
if wrapper_kinds.contains(&ast_kind) || ast_kind.ends_with("_statement") {
let mut found: Option<Node<'a>> = None;
let mut cursor = ast.walk();
for child in ast.named_children(&mut cursor) {
if is_binary_expr_kind(child.kind(), lang) {
if found.is_some() {
return None; }
if child.named_child_count() == 2 {
let l = child.named_child(0);
let r = child.named_child(1);
let l_bin = l.is_some_and(|n| is_binary_expr_kind(n.kind(), lang));
let r_bin = r.is_some_and(|n| is_binary_expr_kind(n.kind(), lang));
if !l_bin && !r_bin {
found = Some(child);
}
}
} else if wrapper_kinds.contains(&child.kind()) {
let mut inner_cursor = child.walk();
for grandchild in child.named_children(&mut inner_cursor) {
if is_binary_expr_kind(grandchild.kind(), lang) {
if found.is_some() {
return None;
}
if grandchild.named_child_count() == 2 {
let l = grandchild.named_child(0);
let r = grandchild.named_child(1);
let l_bin = l.is_some_and(|n| is_binary_expr_kind(n.kind(), lang));
let r_bin = r.is_some_and(|n| is_binary_expr_kind(n.kind(), lang));
if !l_bin && !r_bin {
found = Some(grandchild);
}
}
}
}
}
}
return found;
}
None
}
fn is_binary_expr_kind(kind: &str, lang: &str) -> bool {
match lang {
"python" => kind == "binary_operator" || kind == "comparison_operator",
"ruby" => kind == "binary",
_ => kind == "binary_expression",
}
}
#[allow(clippy::too_many_arguments)]
pub(super) fn push_node<'a>(
g: &mut Cfg,
kind: StmtKind,
ast: Node<'a>,
lang: &str,
code: &'a [u8],
enclosing_func: Option<&str>,
call_ordinal: u32,
analysis_rules: Option<&LangAnalysisRules>,
) -> NodeIndex {
let mut text = match lookup(lang, ast.kind()) {
Kind::CallFn => ast
.child_by_field_name("function")
.or_else(|| ast.child_by_field_name("method"))
.or_else(|| ast.child_by_field_name("name"))
.or_else(|| ast.child_by_field_name("type"))
.or_else(|| ast.child_by_field_name("constructor"))
.or_else(|| find_constructor_type_child(ast))
.and_then(|n| {
let unwrapped = unwrap_parens(n);
if lookup(lang, unwrapped.kind()) == Kind::Function {
Some(anon_fn_name(unwrapped.start_byte()))
} else {
text_of(n, code)
}
})
.unwrap_or_default(),
Kind::CallMethod => {
let func = ast
.child_by_field_name("method")
.or_else(|| ast.child_by_field_name("name"))
.and_then(|n| text_of(n, code));
let recv = ast
.child_by_field_name("object")
.or_else(|| ast.child_by_field_name("receiver"))
.or_else(|| ast.child_by_field_name("scope"))
.and_then(|n| root_receiver_text(n, lang, code));
match (recv, func) {
(Some(r), Some(f)) => format!("{r}.{f}"),
(_, Some(f)) => f,
_ => String::new(),
}
}
Kind::CallMacro => ast
.child_by_field_name("macro")
.and_then(|n| text_of(n, code))
.unwrap_or_default(),
Kind::Function => ast
.child_by_field_name("name")
.or_else(|| ast.child_by_field_name("declarator"))
.and_then(|n| text_of(n, code))
.unwrap_or_default(),
_ => text_of(ast, code).unwrap_or_default(),
};
if lang == "cpp" {
if ast.kind() == "new_expression" {
text = "new".to_string();
} else if ast.kind() == "delete_expression" {
text = "delete".to_string();
}
}
if lang == "ruby" && ast.kind() == "subshell" {
text = "subshell".to_string();
}
let mut inner_text_span: Option<(usize, usize)> = None;
if matches!(
lookup(lang, ast.kind()),
Kind::CallWrapper | Kind::Assignment | Kind::Return
) {
if let Some((inner, inner_span)) = first_call_ident_with_span(ast, lang, code) {
text = inner;
inner_text_span = Some(inner_span);
} else if matches!(lookup(lang, ast.kind()), Kind::CallWrapper) {
let mut cursor = ast.walk();
if let Some(first) = ast.children(&mut cursor).next()
&& first.child_count() == 0
&& let Some(kw) = text_of(first, code)
&& kw.len() <= 16
{
text = kw;
inner_text_span = Some((first.start_byte(), first.end_byte()));
}
}
}
let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
let mut labels = classify_all(lang, &text, extra);
if lang == "rust" {
if let Some(cn) = find_call_node(ast, lang) {
if let Some(chain_raw) = text_of(cn, code) {
let chain_compact: String =
chain_raw.chars().filter(|c| !c.is_whitespace()).collect();
let chain_text = crate::labels::normalize_chained_call_for_classify(&chain_compact);
if chain_text != text {
let chain_labels = classify_all(lang, &chain_text, extra);
for l in chain_labels {
if !labels.contains(&l) {
labels.push(l);
}
}
}
let peeled = crate::ssa::type_facts::peel_identity_suffix(&chain_text);
if peeled != chain_text && peeled != text {
let peeled_labels = classify_all(lang, &peeled, extra);
for l in peeled_labels {
if !labels.contains(&l) {
labels.push(l);
}
}
}
let chain_for_synth = if peeled != chain_text {
&peeled
} else {
&chain_text
};
if !labels
.iter()
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(crate::labels::Cap::DATA_EXFIL)))
&& (chain_for_synth.contains("Request::builder.")
|| chain_for_synth.contains("hyper::Request::builder."))
{
let last_seg =
chain_for_synth.rsplit('.').next().unwrap_or(chain_for_synth);
if matches!(
last_seg,
"body" | "body_mut" | "body_string" | "body_json" | "body_bytes"
) {
labels.push(DataLabel::Sink(crate::labels::Cap::DATA_EXFIL));
}
}
}
}
}
let mut outer_callee: Option<String> = None;
let mut inner_callee_span: Option<(usize, usize)> = None;
if labels.is_empty()
&& matches!(
lookup(lang, ast.kind()),
Kind::CallWrapper
| Kind::Assignment
| Kind::Return
| Kind::CallFn
| Kind::CallMethod
| Kind::CallMacro
)
&& let Some((inner_text, inner_label, inner_span)) =
find_classifiable_inner_call(ast, lang, code, extra)
{
labels.push(inner_label);
outer_callee = Some(text.clone());
text = inner_text;
inner_callee_span = Some(inner_span);
}
if labels.is_empty() {
let assign_node = if matches!(lookup(lang, ast.kind()), Kind::Assignment) {
Some(ast)
} else if matches!(lookup(lang, ast.kind()), Kind::CallWrapper) {
let mut cursor = ast.walk();
ast.children(&mut cursor)
.find(|c| matches!(lookup(lang, c.kind()), Kind::Assignment))
} else {
None
};
if let Some(assign) = assign_node
&& let Some(lhs) = assign.child_by_field_name("left")
{
if let Some(full) = member_expr_text(lhs, code) {
if let Some(l) = classify(lang, &full, extra) {
labels.push(l);
}
}
if labels.is_empty()
&& let Some(prop) = lhs.child_by_field_name("property")
&& let Some(prop_text) = text_of(prop, code)
{
if let Some(l) = classify(lang, &prop_text, extra) {
labels.push(l);
}
}
}
}
if labels.is_empty()
&& matches!(
lookup(lang, ast.kind()),
Kind::CallWrapper | Kind::Assignment
)
&& !rhs_is_function_literal(ast, lang)
&& let Some(found) = first_member_label(ast, lang, code, extra)
{
labels.push(found);
if let Some(member_text) = first_member_text(ast, code) {
if outer_callee.is_none() && text != member_text {
outer_callee = Some(text.clone());
}
text = member_text;
}
}
if labels.is_empty()
&& matches!(lookup(lang, ast.kind()), Kind::If | Kind::While)
&& let Some(cond) = ast.child_by_field_name("condition")
&& cond.kind() == "let_condition"
&& let Some(val) = cond.child_by_field_name("value")
{
if let Some((ident, ident_span)) = first_call_ident_with_span(val, lang, code)
&& let Some(l) = classify(lang, &ident, extra)
{
labels.push(l);
text = ident;
if inner_text_span.is_none() {
inner_text_span = Some(ident_span);
}
}
if labels.is_empty()
&& let Some(ident_text) = text_of(val, code)
&& let Some(l) = classify(lang, &ident_text, extra)
{
labels.push(l);
text = ident_text;
}
}
let mut call_ast = find_call_node(ast, lang);
if labels.is_empty()
&& let Some(outer) = call_ast
&& let Some((inner, inner_callee_text)) = find_chained_inner_call(outer, lang, code)
&& !classify_gated_sink(lang, &inner_callee_text, |_| None, |_| None, |_| false).is_empty()
{
call_ast = Some(inner);
outer_callee = Some(text.clone());
text = inner_callee_text;
inner_callee_span = Some((inner.start_byte(), inner.end_byte()));
}
let mut sink_payload_args: Option<Vec<usize>> = None;
let mut destination_uses: Option<Vec<String>> = None;
let mut gate_filters: Vec<GateFilter> = Vec::new();
let has_sink_label = labels.iter().any(|l| matches!(l, DataLabel::Sink(_)));
{
let gate_call = call_ast.or_else(|| find_call_node_deep(ast, lang, 4));
if let Some(cn) = gate_call {
let gate_callee_text = if call_ast.is_some() {
text.clone()
} else {
cn.child_by_field_name("function")
.or_else(|| cn.child_by_field_name("method"))
.or_else(|| cn.child_by_field_name("name"))
.and_then(|f| text_of(f, code))
.unwrap_or_else(|| text.clone())
};
let matches = classify_gated_sink(
lang,
&gate_callee_text,
|idx| {
extract_const_string_arg(cn, idx, code).or_else(|| {
if matches!(lang, "c" | "cpp" | "c++" | "php") {
extract_const_macro_arg(cn, idx, code)
} else {
None
}
})
},
|kw| {
extract_const_keyword_arg(cn, kw, code).or_else(|| {
if matches!(lang, "javascript" | "typescript") {
extract_object_arg_property(cn, 1, kw, code)
} else {
None
}
})
},
|kw| {
has_keyword_arg(cn, kw, code)
|| (matches!(lang, "javascript" | "typescript")
&& has_object_arg_property(cn, 1, kw, code))
},
);
if !matches.is_empty() {
let mut union_payload: Vec<usize> = Vec::new();
for gm in &matches {
if has_sink_label {
if !labels.contains(&gm.label) {
labels.push(gm.label);
}
} else {
labels.push(gm.label);
}
let mut payload_vec: Vec<usize> =
if gm.payload_args == crate::labels::ALL_ARGS_PAYLOAD {
let arity = extract_arg_uses(cn, code).len();
(0..arity).collect()
} else {
gm.payload_args.to_vec()
};
let mut dest_uses: Option<Vec<String>> = None;
let mut dest_fields: Vec<String> = Vec::new();
if !gm.object_destination_fields.is_empty() {
let mut all_pairs: Vec<(String, String)> = Vec::new();
let mut had_object_match = false;
for &pos in gm.payload_args {
if let Some(pairs) = extract_destination_field_pairs(
cn,
pos,
gm.object_destination_fields,
code,
) {
all_pairs.extend(pairs);
had_object_match = true;
break;
}
}
let kwarg_pairs =
extract_destination_kwarg_pairs(cn, gm.object_destination_fields, code);
if !kwarg_pairs.is_empty() {
let arity = extract_arg_uses(cn, code).len();
if !payload_vec.contains(&arity) {
payload_vec.push(arity);
}
for pair in kwarg_pairs {
if !all_pairs.iter().any(|(_, v)| v == &pair.1) {
all_pairs.push(pair);
}
}
}
if had_object_match || !all_pairs.is_empty() {
let (fields, vars): (Vec<String>, Vec<String>) =
all_pairs.into_iter().unzip();
dest_uses = Some(vars);
dest_fields = fields;
}
}
let label_caps = match gm.label {
crate::labels::DataLabel::Sink(c) => c,
_ => crate::labels::Cap::empty(),
};
for &p in &payload_vec {
if !union_payload.contains(&p) {
union_payload.push(p);
}
}
gate_filters.push(GateFilter {
label_caps,
payload_args: payload_vec,
destination_uses: dest_uses,
destination_fields: dest_fields,
});
}
if !union_payload.is_empty() {
sink_payload_args = Some(union_payload);
}
if gate_filters.len() == 1 {
destination_uses = gate_filters[0].destination_uses.clone();
}
}
}
}
if matches!(lang, "javascript" | "js" | "typescript" | "ts") {
if let Some(cn) = call_ast.or_else(|| find_call_node_deep(ast, lang, 4)) {
let shell_matches = extract_shell_array_payload_idents(cn, code);
if !shell_matches.is_empty() {
let shell_label = DataLabel::Sink(Cap::SHELL_ESCAPE);
let already_has_shell_sink = labels.iter().any(|l| match l {
DataLabel::Sink(c) => c.contains(Cap::SHELL_ESCAPE),
_ => false,
});
if !already_has_shell_sink {
labels.push(shell_label);
}
let mut union_payload: Vec<usize> = sink_payload_args.clone().unwrap_or_default();
for sm in shell_matches {
if !union_payload.contains(&sm.arg_position) {
union_payload.push(sm.arg_position);
}
gate_filters.push(GateFilter {
label_caps: Cap::SHELL_ESCAPE,
payload_args: vec![sm.arg_position],
destination_uses: Some(sm.payload_idents),
destination_fields: Vec::new(),
});
}
if !union_payload.is_empty() {
sink_payload_args = Some(union_payload);
}
if gate_filters.len() == 1 {
destination_uses = gate_filters[0].destination_uses.clone();
}
}
}
}
if lang == "rust" && !labels.iter().any(|l| matches!(l, DataLabel::Sanitizer(_))) {
if let Some(cn) = call_ast {
if cn.kind() == "call_expression" || cn.kind() == "method_call_expression" {
if let Some(caps) = detect_rust_replace_chain_sanitizer(cn, code) {
labels.push(DataLabel::Sanitizer(caps));
}
}
}
}
if lang == "go" && !labels.iter().any(|l| matches!(l, DataLabel::Sanitizer(_))) {
if let Some(cn) = call_ast {
if cn.kind() == "call_expression" {
if let Some(caps) = detect_go_replace_call_sanitizer(cn, code) {
labels.push(DataLabel::Sanitizer(caps));
}
}
}
}
if (lang == "ruby" || lang == "rb")
&& labels
.iter()
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(Cap::SQL_QUERY)))
&& !labels
.iter()
.any(|l| matches!(l, DataLabel::Sanitizer(c) if c.contains(Cap::SQL_QUERY)))
{
let leaf = text.rsplit(['.', ':']).next().unwrap_or(&text);
const AR_QUERY_METHODS: &[&str] = &["where", "order", "group", "having", "joins", "pluck"];
if AR_QUERY_METHODS.contains(&leaf) {
let shape = call_ast
.and_then(arg0_kind_and_interpolation)
.or_else(|| ruby_chain_arg0_for_method(ast, &[leaf], code));
if let Some((arg0_kind, has_interp)) = shape
&& crate::labels::ruby::ar_query_safe_shape(&text, &arg0_kind, has_interp)
{
labels.push(DataLabel::Sanitizer(Cap::SQL_QUERY));
}
}
}
if lang == "java"
&& labels
.iter()
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(Cap::SQL_QUERY)))
&& !labels
.iter()
.any(|l| matches!(l, DataLabel::Sanitizer(c) if c.contains(Cap::SQL_QUERY)))
{
let leaf = text.rsplit('.').next().unwrap_or(&text);
if matches!(leaf, "executeUpdate" | "executeQuery") {
let outer_zero_arg = call_ast
.and_then(|cn| cn.child_by_field_name("arguments"))
.map(|args| {
let mut c = args.walk();
args.named_children(&mut c).count() == 0
})
.unwrap_or(false);
if outer_zero_arg {
const JPA_BIND_METHODS: &[&str] = &[
"createQuery",
"createNativeQuery",
"createNamedQuery",
"prepareStatement",
"prepareCall",
];
if let Some(call_node) = call_ast
&& let Some(arg0_kind) =
java_chain_arg0_kind_for_method(call_node, JPA_BIND_METHODS, code)
&& arg0_kind == "string_literal"
{
labels.push(DataLabel::Sanitizer(Cap::SQL_QUERY));
}
}
}
}
if (lang == "javascript"
|| lang == "js"
|| lang == "typescript"
|| lang == "ts"
|| lang == "tsx")
&& labels
.iter()
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(Cap::SQL_QUERY)))
&& !labels
.iter()
.any(|l| matches!(l, DataLabel::Sanitizer(c) if c.contains(Cap::SQL_QUERY)))
{
const QUERY_TARGETS: &[&str] = &["query", "execute"];
const ORM_CHAIN_METHODS: &[&str] = &[
"findOne",
"findMany",
"findFirst",
"findUnique",
"findById",
"find",
"create",
"createMany",
"update",
"updateMany",
"upsert",
"delete",
"deleteMany",
"count",
"aggregate",
"distinct",
"save",
];
let chain_call = call_ast.or_else(|| find_call_node_deep(ast, lang, 4));
if let Some(call_node) = chain_call {
let outer_method = js_chain_outer_method_for_inner(call_node, QUERY_TARGETS, code);
let outer_is_orm = outer_method
.as_deref()
.is_some_and(|m| ORM_CHAIN_METHODS.contains(&m));
if outer_is_orm
&& let Some((arg0_kind, has_interp)) =
js_chain_arg0_kind_for_method(call_node, QUERY_TARGETS, code)
&& !has_interp
&& matches!(
arg0_kind.as_str(),
"string" | "string_fragment" | "template_string"
)
{
labels.push(DataLabel::Sanitizer(Cap::SQL_QUERY));
}
}
}
let span = (ast.start_byte(), ast.end_byte());
let (defines, uses, extra_defines) = def_use(ast, lang, code);
let const_text = if (defines.is_some() && uses.is_empty())
|| (kind == StmtKind::Return && uses.is_empty())
{
extract_literal_rhs(ast, lang, code)
} else {
None
};
let callee = if kind == StmtKind::Call || !labels.is_empty() {
Some(text.clone())
} else {
None
};
let (condition_text, condition_vars, condition_negated) = if kind == StmtKind::If {
extract_condition_raw(ast, lang, code)
} else {
(None, Vec::new(), false)
};
let arg_uses = if kind == StmtKind::Call || sink_payload_args.is_some() {
call_ast
.map(|cn| extract_arg_uses(cn, code))
.unwrap_or_default()
} else {
Vec::new()
};
let arg_string_literals = call_ast
.map(|cn| extract_arg_string_literals(cn, code))
.unwrap_or_default();
let kwargs = if kind == StmtKind::Call || sink_payload_args.is_some() {
call_ast
.map(|cn| extract_kwargs(cn, code))
.unwrap_or_default()
} else {
Vec::new()
};
let all_args_literal = if kind == StmtKind::Call {
call_ast
.map(|cn| has_only_literal_args(cn, code))
.unwrap_or(false)
} else {
false
};
let parameterized_query = labels
.iter()
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(Cap::SQL_QUERY)))
&& call_ast
.or_else(|| find_call_node_deep(ast, lang, 5))
.is_some_and(|cn| is_parameterized_query_call(cn, code));
let mut arg_callees = call_ast
.map(|cn| extract_arg_callees(cn, lang, code))
.unwrap_or_default();
if !labels.is_empty() {
let assign_node = if matches!(lookup(lang, ast.kind()), Kind::Assignment) {
Some(ast)
} else if matches!(lookup(lang, ast.kind()), Kind::CallWrapper) {
let mut cursor = ast.walk();
ast.children(&mut cursor)
.find(|c| matches!(lookup(lang, c.kind()), Kind::Assignment))
} else {
None
};
if let Some(asgn) = assign_node
&& let Some(rhs) = asgn.child_by_field_name("right")
&& let Some(callee_name) = call_ident_of(rhs, lang, code)
{
arg_callees.push(Some(callee_name));
}
}
let receiver = if let Some(cn) = call_ast {
match lookup(lang, cn.kind()) {
Kind::CallMethod => {
let recv_node = cn
.child_by_field_name("object")
.or_else(|| cn.child_by_field_name("receiver"))
.or_else(|| cn.child_by_field_name("scope"))
.or_else(|| cn.child_by_field_name("value"));
if let Some(rn) = recv_node
&& matches!(rn.kind(), "identifier" | "variable_name")
&& let Some(recv_text) = text_of(rn, code)
{
Some(recv_text)
} else if let Some(rn) = recv_node {
root_member_receiver(rn, code).or_else(|| root_receiver_text(cn, lang, code))
} else {
None
}
}
Kind::CallFn => {
let func_child = cn.child_by_field_name("function");
let recv_node = match func_child {
Some(fc) if fc.kind() == "member_expression" || fc.kind() == "attribute" => {
fc.child_by_field_name("object")
}
Some(fc) if fc.kind() == "field_expression" => fc.child_by_field_name("value"),
_ => None,
};
if let Some(rn) = recv_node {
if matches!(rn.kind(), "identifier" | "variable_name" | "this" | "self") {
text_of(rn, code)
} else {
root_member_receiver(rn, code)
.or_else(|| root_receiver_text(rn, lang, code))
}
} else {
None
}
}
_ => None,
}
} else {
None
};
let cast_target_type = match ast.kind() {
"cast_expression" => ast
.child_by_field_name("type")
.filter(|n| matches!(n.kind(), "type_identifier" | "scoped_type_identifier"))
.and_then(|n| text_of(n, code)),
"as_expression" => ast
.child_by_field_name("type")
.filter(|n| matches!(n.kind(), "type_identifier" | "predefined_type"))
.and_then(|n| text_of(n, code)),
"type_assertion" => ast
.child(0)
.filter(|n| matches!(n.kind(), "type_identifier" | "predefined_type"))
.and_then(|n| text_of(n, code)),
"type_assertion_expression" => ast
.child_by_field_name("type")
.filter(|n| matches!(n.kind(), "type_identifier" | "qualified_type"))
.and_then(|n| text_of(n, code)),
_ => None,
};
let is_raii_managed = is_raii_factory(lang, &text);
let is_ruby_block_managed = lang == "ruby"
&& call_ast.is_some_and(|cn| {
let mut c = cn.walk();
cn.children(&mut c)
.any(|ch| ch.kind() == "do_block" || ch.kind() == "block")
});
let string_prefix = extract_template_prefix(ast, lang, code)
.or_else(|| call_ast.and_then(|cn| extract_template_prefix(cn, lang, code)));
let callee_span = inner_callee_span.or(inner_text_span).filter(|s| *s != span);
let is_constructor = ast.kind() == "new_expression"
|| ast.kind() == "object_creation_expression"
|| call_ast
.is_some_and(|cn| matches!(cn.kind(), "new_expression" | "object_creation_expression"));
let idx = g.add_node(NodeInfo {
kind,
call: CallMeta {
callee,
callee_text: None,
outer_callee,
callee_span,
call_ordinal,
arg_uses,
receiver,
sink_payload_args,
kwargs,
arg_string_literals,
destination_uses,
gate_filters,
is_constructor,
},
taint: TaintMeta {
labels,
const_text,
defines,
uses,
extra_defines,
},
ast: AstMeta {
span,
enclosing_func: enclosing_func.map(|s| s.to_string()),
},
condition_text,
condition_vars,
condition_negated,
all_args_literal,
catch_param: false,
arg_callees,
cast_target_type,
bin_op: extract_bin_op(ast, lang),
bin_op_const: extract_bin_op_const(ast, lang, code),
managed_resource: is_raii_managed || is_ruby_block_managed,
in_defer: false,
parameterized_query,
string_prefix,
is_eq_with_const: detect_eq_with_const(ast, lang),
is_numeric_length_access: detect_numeric_length_access(ast, lang, code),
member_field: detect_member_field_assignment(ast, code),
rhs_is_function_literal: rhs_is_function_literal(ast, lang),
});
debug!(
target: "cfg",
"node {} ← {:?} txt=`{}` span={:?} labels={:?}",
idx.index(),
kind,
text,
span,
g[idx].taint.labels
);
idx
}
#[inline]
pub(super) fn connect_all(g: &mut Cfg, froms: &[NodeIndex], to: NodeIndex, kind: EdgeKind) {
for &f in froms {
debug!(target: "cfg", "edge {} → {} ({:?})", f.index(), to.index(), kind);
g.add_edge(f, to, kind);
}
}
fn rhs_is_function_literal(ast: Node, lang: &str) -> bool {
use conditions::unwrap_parens;
let mut candidate = ast.child_by_field_name("right");
if candidate.is_none() {
let mut cursor = ast.walk();
for c in ast.children(&mut cursor) {
if matches!(
c.kind(),
"variable_declarator" | "init_declarator" | "let_declaration"
) {
candidate = c
.child_by_field_name("value")
.or_else(|| c.child_by_field_name("init"));
if candidate.is_some() {
break;
}
} else if matches!(lookup(lang, c.kind()), Kind::Assignment) {
candidate = c.child_by_field_name("right");
if candidate.is_some() {
break;
}
}
}
}
if candidate.is_none() {
candidate = ast
.child_by_field_name("value")
.or_else(|| ast.child_by_field_name("init"));
}
let Some(rhs) = candidate else { return false };
let rhs = unwrap_parens(rhs);
if matches!(lookup(lang, rhs.kind()), Kind::Function) && rhs.child_count() > 0 {
return true;
}
if rhs.kind() == "expression_list" {
let mut cursor = rhs.walk();
for c in rhs.named_children(&mut cursor) {
let c = unwrap_parens(c);
if matches!(lookup(lang, c.kind()), Kind::Function) && c.child_count() > 0 {
return true;
}
}
}
false
}
fn try_lower_subscript_write(
ast: Node,
preds: &[NodeIndex],
g: &mut Cfg,
lang: &str,
code: &[u8],
enclosing_func: Option<&str>,
call_ordinal: &mut u32,
) -> Option<NodeIndex> {
let assign_ast = if matches!(lookup(lang, ast.kind()), Kind::Assignment) {
ast
} else {
let mut cursor = ast.walk();
ast.children(&mut cursor)
.find(|c| matches!(lookup(lang, c.kind()), Kind::Assignment))?
};
let lhs = assign_ast.child_by_field_name("left")?;
if has_call_descendant(assign_ast, lang) {
return None;
}
let subscript_node = subscript_lhs_node(lhs, lang)?;
let (arr_text, idx_text) = subscript_components(subscript_node, code)?;
let rhs = assign_ast.child_by_field_name("right")?;
let mut rhs_uses: Vec<String> = Vec::new();
collect_idents(rhs, code, &mut rhs_uses);
let span = (ast.start_byte(), ast.end_byte());
let ord = *call_ordinal;
*call_ordinal += 1;
let mut uses_all: Vec<String> = vec![arr_text.clone(), idx_text.clone()];
uses_all.extend(rhs_uses.iter().cloned());
let n = g.add_node(NodeInfo {
kind: StmtKind::Call,
call: CallMeta {
callee: Some("__index_set__".to_string()),
receiver: Some(arr_text.clone()),
arg_uses: vec![vec![idx_text.clone()], rhs_uses.clone()],
call_ordinal: ord,
..Default::default()
},
taint: TaintMeta {
uses: uses_all,
..Default::default()
},
ast: AstMeta {
span,
enclosing_func: enclosing_func.map(|s| s.to_string()),
},
..Default::default()
});
connect_all(g, preds, n, EdgeKind::Seq);
Some(n)
}
type PreEmitArgSourceResult = (SmallVec<[NodeIndex; 4]>, Vec<(usize, String)>, Vec<String>);
fn pre_emit_arg_source_nodes(
g: &mut Cfg,
ast: Node,
lang: &str,
code: &[u8],
enclosing_func: Option<&str>,
analysis_rules: Option<&LangAnalysisRules>,
preds: &[NodeIndex],
) -> PreEmitArgSourceResult {
let mut effective_preds: SmallVec<[NodeIndex; 4]> = SmallVec::from_slice(preds);
let mut bindings: Vec<(usize, String)> = Vec::new();
let mut uses_only: Vec<String> = Vec::new();
let extra = analysis_rules.and_then(|r| {
if r.extra_labels.is_empty() {
None
} else {
Some(r.extra_labels.as_slice())
}
});
let Some(call_ast) = find_call_node(ast, lang) else {
return (effective_preds, bindings, uses_only);
};
let Some(args_node) = call_ast.child_by_field_name("arguments") else {
return (effective_preds, bindings, uses_only);
};
let children: Vec<_> = {
let mut cursor = args_node.walk();
args_node.named_children(&mut cursor).collect()
};
for child in &children {
let k = child.kind();
if k == "spread_element"
|| k == "dictionary_splat"
|| k == "list_splat"
|| k == "keyword_argument"
|| k == "splat_argument"
|| k == "hash_splat_argument"
|| k == "named_argument"
{
return (effective_preds, bindings, uses_only);
}
}
let pointer_on = crate::pointer::is_enabled();
for (pos, child) in children.iter().enumerate() {
let src_label = first_member_label(*child, lang, code, extra);
if let Some(DataLabel::Source(caps)) = src_label {
let synth_name = format!("__nyx_src_{}_{}", g.node_count(), pos);
let member_text = first_member_text(*child, code);
let span = (child.start_byte(), child.end_byte());
let mut src_labels: SmallVec<[DataLabel; 2]> = SmallVec::new();
src_labels.push(DataLabel::Source(caps));
let src_idx = g.add_node(NodeInfo {
kind: StmtKind::Seq,
call: CallMeta {
callee: member_text,
..Default::default()
},
taint: TaintMeta {
labels: src_labels,
defines: Some(synth_name.clone()),
..Default::default()
},
ast: AstMeta {
span,
enclosing_func: enclosing_func.map(|s| s.to_string()),
},
..Default::default()
});
connect_all(g, &effective_preds, src_idx, EdgeKind::Seq);
effective_preds.clear();
effective_preds.push(src_idx);
bindings.push((pos, synth_name));
continue;
}
if pointer_on
&& is_subscript_kind(child.kind())
&& let Some((arr_text, idx_text)) = subscript_components(*child, code)
{
let synth_name = format!("__nyx_idxget_{}_{}", g.node_count(), pos);
let span = (child.start_byte(), child.end_byte());
let idx_node = g.add_node(NodeInfo {
kind: StmtKind::Call,
call: CallMeta {
callee: Some("__index_get__".to_string()),
receiver: Some(arr_text.clone()),
arg_uses: vec![vec![idx_text.clone()]],
..Default::default()
},
taint: TaintMeta {
defines: Some(synth_name.clone()),
uses: vec![arr_text, idx_text],
..Default::default()
},
ast: AstMeta {
span,
enclosing_func: enclosing_func.map(|s| s.to_string()),
},
..Default::default()
});
connect_all(g, &effective_preds, idx_node, EdgeKind::Seq);
effective_preds.clear();
effective_preds.push(idx_node);
bindings.push((pos, synth_name));
}
}
let outer_method_is_writeback = call_ast
.child_by_field_name("function")
.or_else(|| call_ast.child_by_field_name("method"))
.and_then(|f| {
f.child_by_field_name("field")
.or_else(|| f.child_by_field_name("property"))
.or_else(|| f.child_by_field_name("name"))
})
.and_then(|n| text_of(n, code))
.is_some_and(|name| name == "Decode" || name == "Unmarshal");
if lang == "go" && outer_method_is_writeback {
let mut inner_args: Vec<Node> = Vec::new();
walk_chain_inner_call_args(call_ast, lang, &mut inner_args);
for arg in inner_args {
let k = arg.kind();
if k == "spread_element"
|| k == "dictionary_splat"
|| k == "list_splat"
|| k == "keyword_argument"
|| k == "splat_argument"
|| k == "hash_splat_argument"
|| k == "named_argument"
{
continue;
}
let src_label = first_member_label(arg, lang, code, extra);
if let Some(DataLabel::Source(caps)) = src_label {
let synth_name = format!("__nyx_chainsrc_{}_{}", g.node_count(), uses_only.len());
let member_text = first_member_text(arg, code);
let span = (arg.start_byte(), arg.end_byte());
let mut src_labels: SmallVec<[DataLabel; 2]> = SmallVec::new();
src_labels.push(DataLabel::Source(caps));
let src_idx = g.add_node(NodeInfo {
kind: StmtKind::Seq,
call: CallMeta {
callee: member_text,
..Default::default()
},
taint: TaintMeta {
labels: src_labels,
defines: Some(synth_name.clone()),
..Default::default()
},
ast: AstMeta {
span,
enclosing_func: enclosing_func.map(|s| s.to_string()),
},
..Default::default()
});
connect_all(g, &effective_preds, src_idx, EdgeKind::Seq);
effective_preds.clear();
effective_preds.push(src_idx);
uses_only.push(synth_name);
}
}
}
(effective_preds, bindings, uses_only)
}
fn apply_arg_source_bindings(
g: &mut Cfg,
call_node: NodeIndex,
bindings: &[(usize, String)],
uses_only: &[String],
) {
for (pos, synth_name) in bindings {
let arg_uses = &mut g[call_node].call.arg_uses;
if *pos < arg_uses.len() {
arg_uses[*pos].push(synth_name.clone());
} else {
while arg_uses.len() < *pos {
arg_uses.push(vec![]);
}
arg_uses.push(vec![synth_name.clone()]);
}
g[call_node].taint.uses.push(synth_name.clone());
}
for synth_name in uses_only {
g[call_node].taint.uses.push(synth_name.clone());
}
}
#[allow(clippy::too_many_arguments)]
pub(super) fn build_sub<'a>(
ast: Node<'a>,
preds: &[NodeIndex], g: &mut Cfg,
lang: &str,
code: &'a [u8],
summaries: &mut FuncSummaries,
file_path: &str,
enclosing_func: Option<&str>,
call_ordinal: &mut u32,
analysis_rules: Option<&LangAnalysisRules>,
break_targets: &mut Vec<NodeIndex>,
continue_targets: &mut Vec<NodeIndex>,
throw_targets: &mut Vec<NodeIndex>,
bodies: &mut Vec<BodyCfg>,
next_body_id: &mut u32,
current_body_id: BodyId,
) -> Vec<NodeIndex> {
match lookup(lang, ast.kind()) {
Kind::If => {
let init_exits_owned = ast.child_by_field_name("initializer").map(|init| {
build_sub(
init,
preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
)
});
let preds: &[NodeIndex] = match &init_exits_owned {
Some(exits) => exits.as_slice(),
None => preds,
};
let cond_subtree = ast.child_by_field_name("condition").or_else(|| {
let mut cursor = ast.walk();
ast.children(&mut cursor).find(|c| {
let k = c.kind();
!matches!(lookup(lang, k), Kind::Block | Kind::Trivia)
&& k != "if"
&& k != "else"
&& k != "let"
&& k != "{"
&& k != "}"
&& k != "("
&& k != ")"
})
});
let has_short_circuit = cond_subtree
.map(|c| is_boolean_operator(unwrap_parens(c)).is_some())
.unwrap_or(false);
let has_short_circuit = has_short_circuit
&& cond_subtree.map_or(false, |c| {
let unwrapped = unwrap_parens(c);
!matches!(
unwrapped.kind(),
"unary_expression"
| "not_operator"
| "prefix_unary_expression"
| "unary_not"
)
});
let is_unless = ast.kind() == "unless";
let (true_exits, false_exits) = if has_short_circuit {
let cond_ast = cond_subtree.unwrap();
build_condition_chain(
cond_ast,
preds,
EdgeKind::Seq,
g,
lang,
code,
enclosing_func,
)
} else {
let cond = push_node(
g,
StmtKind::If,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, cond, EdgeKind::Seq);
(vec![cond], vec![cond])
};
let (then_preds, else_preds) = if is_unless {
(&false_exits, &true_exits)
} else {
(&true_exits, &false_exits)
};
let (then_edge, else_edge) = if is_unless {
(EdgeKind::False, EdgeKind::True)
} else {
(EdgeKind::True, EdgeKind::False)
};
let (then_block, else_block) = {
let field_then = ast
.child_by_field_name("consequence")
.or_else(|| ast.child_by_field_name("body"));
let field_else = ast.child_by_field_name("alternative");
if field_then.is_some() || field_else.is_some() {
(field_then, field_else)
} else {
let mut cursor = ast.walk();
let blocks: Vec<_> = ast
.children(&mut cursor)
.filter(|n| lookup(lang, n.kind()) == Kind::Block)
.collect();
(blocks.first().copied(), blocks.get(1).copied())
}
};
let then_first_node = NodeIndex::new(g.node_count());
let then_exits = if let Some(b) = then_block {
let exits = build_sub(
b,
then_preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
if then_first_node.index() < g.node_count() {
connect_all(g, then_preds, then_first_node, then_edge);
} else if let Some(&first) = exits.first() {
connect_all(g, then_preds, first, then_edge);
}
exits
} else {
then_preds.to_vec()
};
let else_first_node = NodeIndex::new(g.node_count());
let else_exits = if let Some(b) = else_block {
let exits = build_sub(
b,
else_preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
if else_first_node.index() < g.node_count() {
connect_all(g, else_preds, else_first_node, else_edge);
} else if let Some(&first) = exits.first() {
connect_all(g, else_preds, first, else_edge);
}
exits
} else {
let pass = g.add_node(NodeInfo {
kind: StmtKind::Seq,
ast: AstMeta {
span: (ast.end_byte(), ast.end_byte()),
enclosing_func: enclosing_func.map(|s| s.to_string()),
},
..Default::default()
});
connect_all(g, else_preds, pass, else_edge);
vec![pass]
};
then_exits.into_iter().chain(else_exits).collect()
}
Kind::InfiniteLoop => {
let header = push_node(
g,
StmtKind::Loop,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, header, EdgeKind::Seq);
let mut loop_breaks = Vec::new();
let mut loop_continues = Vec::new();
let body = match ast.child_by_field_name("body") {
Some(b) => b,
None => {
warn!(
"loop without body (error recovery?): kind={} byte={}",
ast.kind(),
ast.start_byte()
);
return vec![header];
}
};
let body_exits = build_sub(
body,
&[header],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
&mut loop_breaks,
&mut loop_continues,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
for &e in &body_exits {
connect_all(g, &[e], header, EdgeKind::Back);
}
for &c in &loop_continues {
connect_all(g, &[c], header, EdgeKind::Back);
}
if loop_breaks.is_empty() {
vec![header]
} else {
loop_breaks
}
}
Kind::While | Kind::For => {
let header = push_node(
g,
StmtKind::Loop,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, header, EdgeKind::Seq);
let cond_subtree = ast.child_by_field_name("condition");
let has_short_circuit = cond_subtree
.map(|c| {
let unwrapped = unwrap_parens(c);
is_boolean_operator(unwrapped).is_some()
&& !matches!(
unwrapped.kind(),
"unary_expression"
| "not_operator"
| "prefix_unary_expression"
| "unary_not"
)
})
.unwrap_or(false);
let mut loop_breaks = Vec::new();
let mut loop_continues = Vec::new();
let body = match ast.child_by_field_name("body").or_else(|| {
let mut c = ast.walk();
ast.children(&mut c)
.find(|n| lookup(lang, n.kind()) == Kind::Block)
}) {
Some(b) => b,
None => {
warn!(
"loop without body (error recovery?): kind={} byte={}",
ast.kind(),
ast.start_byte()
);
return vec![header];
}
};
if has_short_circuit {
let cond_ast = cond_subtree.unwrap();
let (true_exits, false_exits) = build_condition_chain(
cond_ast,
&[header],
EdgeKind::Seq,
g,
lang,
code,
enclosing_func,
);
let body_first = NodeIndex::new(g.node_count());
let body_exits = build_sub(
body,
&true_exits,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
&mut loop_breaks,
&mut loop_continues,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
if body_first.index() < g.node_count() {
connect_all(g, &true_exits, body_first, EdgeKind::True);
}
for &e in &body_exits {
connect_all(g, &[e], header, EdgeKind::Back);
}
for &c in &loop_continues {
connect_all(g, &[c], header, EdgeKind::Back);
}
let mut exits: Vec<NodeIndex> = false_exits;
exits.extend(loop_breaks);
exits
} else {
let body_exits = build_sub(
body,
&[header],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
&mut loop_breaks,
&mut loop_continues,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
for &e in &body_exits {
connect_all(g, &[e], header, EdgeKind::Back);
}
for &c in &loop_continues {
connect_all(g, &[c], header, EdgeKind::Back);
}
let mut exits = vec![header];
exits.extend(loop_breaks);
exits
}
}
Kind::Return => {
if has_call_descendant(ast, lang) {
let ord = *call_ordinal;
*call_ordinal += 1;
let (effective_preds, src_bindings, src_uses_only) = pre_emit_arg_source_nodes(
g,
ast,
lang,
code,
enclosing_func,
analysis_rules,
preds,
);
let call_idx = push_node(
g,
StmtKind::Call,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
apply_arg_source_bindings(g, call_idx, &src_bindings, &src_uses_only);
connect_all(g, &effective_preds, call_idx, EdgeKind::Seq);
let ret = push_node(
g,
StmtKind::Return,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, &[call_idx], ret, EdgeKind::Seq);
let nested = collect_nested_function_nodes(ast, lang);
for func_node in nested {
let placeholders = build_sub(
func_node,
&[call_idx],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
for ph in placeholders {
let to_remove: Vec<_> =
g.edges_connecting(call_idx, ph).map(|e| e.id()).collect();
for eid in to_remove {
g.remove_edge(eid);
}
}
}
Vec::new()
} else {
let ret = push_node(
g,
StmtKind::Return,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, ret, EdgeKind::Seq);
Vec::new() }
}
Kind::Throw => {
if has_call_descendant(ast, lang) {
let ord = *call_ordinal;
*call_ordinal += 1;
let (effective_preds, src_bindings, src_uses_only) = pre_emit_arg_source_nodes(
g,
ast,
lang,
code,
enclosing_func,
analysis_rules,
preds,
);
let call_idx = push_node(
g,
StmtKind::Call,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
apply_arg_source_bindings(g, call_idx, &src_bindings, &src_uses_only);
connect_all(g, &effective_preds, call_idx, EdgeKind::Seq);
let ret = push_node(
g,
StmtKind::Throw,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, &[call_idx], ret, EdgeKind::Seq);
throw_targets.push(ret);
let nested = collect_nested_function_nodes(ast, lang);
for func_node in nested {
let placeholders = build_sub(
func_node,
&[call_idx],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
for ph in placeholders {
let to_remove: Vec<_> =
g.edges_connecting(call_idx, ph).map(|e| e.id()).collect();
for eid in to_remove {
g.remove_edge(eid);
}
}
}
Vec::new()
} else {
let ret = push_node(
g,
StmtKind::Throw,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, ret, EdgeKind::Seq);
throw_targets.push(ret);
Vec::new()
}
}
Kind::Try => build_try(
ast,
preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
),
Kind::Break => {
let brk = push_node(
g,
StmtKind::Break,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, brk, EdgeKind::Seq);
break_targets.push(brk);
Vec::new()
}
Kind::Continue => {
let cont = push_node(
g,
StmtKind::Continue,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, cont, EdgeKind::Seq);
continue_targets.push(cont);
Vec::new()
}
Kind::Switch => build_switch(
ast,
preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
),
Kind::SourceFile | Kind::Block => {
if lang == "ruby" && ast.kind() == "body_statement" {
let mut check = ast.walk();
if ast
.children(&mut check)
.any(|c| c.kind() == "rescue" || c.kind() == "ensure")
{
return build_begin_rescue(
ast,
preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
}
}
let mut cursor = ast.walk();
let mut frontier = preds.to_vec();
let mut last_live_frontier = preds.to_vec();
let mut prev_was_preproc = false;
for child in ast.children(&mut cursor) {
let child_preds = if frontier.is_empty() && prev_was_preproc {
last_live_frontier.clone()
} else {
frontier.clone()
};
let is_defer = lang == "go" && child.kind() == "defer_statement";
let defer_first_idx = if is_defer { g.node_count() } else { 0 };
let child_exits = build_sub(
child,
&child_preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
if is_defer {
for raw in defer_first_idx..g.node_count() {
let idx = NodeIndex::new(raw);
if g[idx].kind == StmtKind::Call {
g[idx].in_defer = true;
}
}
}
let is_preproc = child.kind().starts_with("preproc_");
if !child_exits.is_empty() {
last_live_frontier = child_exits.clone();
}
frontier = child_exits;
prev_was_preproc = is_preproc;
}
frontier
}
Kind::Function => {
let fn_name = if ast.kind() == "lambda_expression" {
anon_fn_name(ast.start_byte())
} else {
ast.child_by_field_name("name")
.or_else(|| ast.child_by_field_name("declarator"))
.and_then(|n| {
let mut tmp = Vec::new();
collect_idents(n, code, &mut tmp);
tmp.into_iter().next()
})
.unwrap_or_else(|| anon_fn_name(ast.start_byte()))
};
let fn_name = if is_anon_fn_name(&fn_name) {
derive_anon_fn_name_from_context(ast, lang, code).unwrap_or(fn_name)
} else {
fn_name
};
let is_anon = is_anon_fn_name(&fn_name);
let param_meta = extract_param_meta(ast, lang, code);
let param_count = param_meta.len();
let param_names: Vec<String> = param_meta.iter().map(|(n, _, _)| n.clone()).collect();
let param_types: Vec<Option<crate::ssa::type_facts::TypeKind>> =
param_meta.iter().map(|(_, t, _)| t.clone()).collect();
let param_destructured_fields: Vec<Vec<String>> = param_meta
.iter()
.map(|(_, _, siblings)| siblings.clone())
.collect();
let (fn_container, fn_kind) =
compute_container_and_kind(ast, ast.kind(), &fn_name, code);
let fn_disambig: Option<u32> =
Some(fn_dfs_index(ast.start_byte()).unwrap_or(ast.start_byte() as u32));
let (mut fn_graph, fn_entry, fn_exit) =
create_body_graph(ast.start_byte(), ast.end_byte(), Some(&fn_name));
let body_ast = match ast.child_by_field_name("body").or_else(|| {
let mut c = ast.walk();
ast.children(&mut c)
.find(|n| matches!(lookup(lang, n.kind()), Kind::Block | Kind::SourceFile))
}) {
Some(b) => b,
None => {
warn!(
"fn without body (forward decl / abstract / error recovery): kind={} name=’{}’",
ast.kind(),
fn_name
);
let placeholder = g.add_node(make_empty_node_info(
StmtKind::Seq,
(ast.start_byte(), ast.end_byte()),
enclosing_func,
));
connect_all(g, preds, placeholder, EdgeKind::Seq);
return vec![placeholder];
}
};
let fn_body_id = BodyId(*next_body_id);
*next_body_id += 1;
let entry_preds = inject_framework_param_sources(
ast,
code,
analysis_rules,
&mut fn_graph,
fn_entry,
Some(&fn_name),
);
let mut fn_call_ordinal: u32 = 0;
let mut fn_breaks = Vec::new();
let mut fn_continues = Vec::new();
let mut fn_throws = Vec::new();
let body_exits = build_sub(
body_ast,
&entry_preds,
&mut fn_graph,
lang,
code,
summaries,
file_path,
Some(&fn_name),
&mut fn_call_ordinal,
analysis_rules,
&mut fn_breaks,
&mut fn_continues,
&mut fn_throws,
bodies,
next_body_id,
fn_body_id,
);
for &b in &body_exits {
connect_all(&mut fn_graph, &[b], fn_exit, EdgeKind::Seq);
}
for idx in fn_graph.node_indices().collect::<Vec<_>>() {
if matches!(fn_graph[idx].kind, StmtKind::Return | StmtKind::Throw)
&& idx != fn_exit
&& !fn_graph.contains_edge(idx, fn_exit)
{
connect_all(&mut fn_graph, &[idx], fn_exit, EdgeKind::Seq);
}
}
let mut var_taint = HashMap::<String, Cap>::new();
let mut node_bits = HashMap::<NodeIndex, Cap>::new();
let mut fn_src_bits = Cap::empty();
let mut fn_sani_bits = Cap::empty();
let mut fn_sink_bits = Cap::empty();
let mut callees = Vec::<crate::summary::CalleeSite>::new();
let mut tainted_sink_params: Vec<usize> = Vec::new();
for idx in fn_graph.node_indices() {
let info = &fn_graph[idx];
if let Some(callee) = &info.call.callee {
let site = build_callee_site(callee, info, lang);
if !callees.iter().any(|c| {
c.name == site.name
&& c.arity == site.arity
&& c.receiver == site.receiver
&& c.qualifier == site.qualifier
&& c.ordinal == site.ordinal
}) {
callees.push(site);
}
}
for lbl in &info.taint.labels {
match *lbl {
DataLabel::Source(bits) => fn_src_bits |= bits,
DataLabel::Sanitizer(bits) => fn_sani_bits |= bits,
DataLabel::Sink(bits) => {
fn_sink_bits |= bits;
for u in &info.taint.uses {
if let Some(pos) = param_names.iter().position(|p| p == u)
&& !tainted_sink_params.contains(&pos)
{
tainted_sink_params.push(pos);
}
}
}
}
}
let mut in_bits = Cap::empty();
for u in &info.taint.uses {
if let Some(b) = var_taint.get(u) {
in_bits |= *b;
}
}
let mut out_bits = in_bits;
for lab in &info.taint.labels {
match *lab {
DataLabel::Source(bits) => out_bits |= bits,
DataLabel::Sanitizer(bits) => out_bits &= !bits,
DataLabel::Sink(_) => {}
}
}
if let Some(def) = &info.taint.defines {
if out_bits.is_empty() {
var_taint.remove(def);
} else {
var_taint.insert(def.clone(), out_bits);
}
}
node_bits.insert(idx, out_bits);
}
for (&idx, &bits) in &node_bits {
if fn_graph[idx].kind == StmtKind::Return {
fn_src_bits |= bits;
}
}
for &pred in &body_exits {
if let Some(&bits) = node_bits.get(&pred) {
fn_src_bits |= bits;
}
}
let propagating_params = {
let mut params = Vec::new();
for (i, pname) in param_names.iter().enumerate() {
let mut flows = false;
for &idx in node_bits.keys() {
if fn_graph[idx].kind == StmtKind::Return {
for u in &fn_graph[idx].taint.uses {
if u == pname {
flows = true;
}
if let Some(bits) = var_taint.get(u)
&& !bits.is_empty()
&& var_taint.contains_key(pname)
{
flows = true;
}
}
}
}
if !flows {
for &exit_pred in &body_exits {
let info = &fn_graph[exit_pred];
for u in &info.taint.uses {
if u == pname {
flows = true;
}
}
if let Some(def) = &info.taint.defines
&& def == pname
{
flows = true;
}
}
}
if flows {
params.push(i);
}
}
params
};
tainted_sink_params.sort_unstable();
tainted_sink_params.dedup();
let key = FuncKey {
lang: Lang::from_slug(lang).unwrap_or(Lang::Rust),
namespace: file_path.to_owned(),
container: fn_container.clone(),
name: fn_name.clone(),
arity: Some(param_count),
disambig: fn_disambig,
kind: fn_kind,
};
let body_func_key = key.clone();
summaries.insert(
key,
LocalFuncSummary {
entry: fn_entry,
exit: fn_exit,
source_caps: fn_src_bits,
sanitizer_caps: fn_sani_bits,
sink_caps: fn_sink_bits,
param_count,
param_names: param_names.clone(),
propagating_params,
tainted_sink_params,
callees,
container: fn_container,
disambig: fn_disambig,
kind: fn_kind,
},
);
let auth_decorators = extract_auth_decorators(ast, lang, code);
bodies.push(BodyCfg {
meta: BodyMeta {
id: fn_body_id,
kind: if is_anon {
BodyKind::AnonymousFunction
} else {
BodyKind::NamedFunction
},
name: if is_anon { None } else { Some(fn_name.clone()) },
params: param_names,
param_types,
param_destructured_fields,
param_count,
span: (ast.start_byte(), ast.end_byte()),
parent_body_id: Some(current_body_id),
func_key: Some(body_func_key),
auth_decorators,
},
graph: fn_graph,
entry: fn_entry,
exit: fn_exit,
});
let placeholder = g.add_node(make_empty_node_info(
StmtKind::Seq,
(ast.start_byte(), ast.end_byte()),
enclosing_func,
));
connect_all(g, preds, placeholder, EdgeKind::Seq);
vec![placeholder]
}
Kind::CallWrapper => {
let mut cursor = ast.walk();
if let Some(inner) = ast.children(&mut cursor).find(|c| {
matches!(
lookup(lang, c.kind()),
Kind::InfiniteLoop
| Kind::While
| Kind::For
| Kind::If
| Kind::Return
| Kind::Throw
| Kind::Break
| Kind::Continue
)
}) {
return build_sub(
inner,
preds,
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
}
if matches!(lang, "javascript" | "typescript" | "tsx")
&& let Some((lhs_ast, ternary_ast)) = find_ternary_rhs_wrapper(ast)
{
let (lhs_text, lhs_labels) =
classify_ternary_lhs(lhs_ast, lang, code, analysis_rules);
return build_ternary_diamond(
lhs_text,
lhs_labels,
ternary_ast,
preds,
EdgeKind::Seq,
g,
lang,
code,
enclosing_func,
call_ordinal,
analysis_rules,
);
}
if crate::pointer::is_enabled()
&& let Some(n) = try_lower_subscript_write(
ast,
preds,
g,
lang,
code,
enclosing_func,
call_ordinal,
)
{
return vec![n];
}
let has_call = has_call_descendant(ast, lang);
let kind = if has_call {
StmtKind::Call
} else {
StmtKind::Seq
};
let ord = if kind == StmtKind::Call {
let o = *call_ordinal;
*call_ordinal += 1;
o
} else {
0
};
let (effective_preds, src_bindings, src_uses_only) = if kind == StmtKind::Call {
pre_emit_arg_source_nodes(g, ast, lang, code, enclosing_func, analysis_rules, preds)
} else {
(SmallVec::from_slice(preds), Vec::new(), Vec::new())
};
let node = push_node(
g,
kind,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
apply_arg_source_bindings(g, node, &src_bindings, &src_uses_only);
if ast.kind() == "with_item"
&& g[node].kind == StmtKind::Call
&& g[node].taint.defines.is_some()
{
g[node].managed_resource = true;
}
connect_all(g, &effective_preds, node, EdgeKind::Seq);
if kind == StmtKind::Call
&& let Some(callee) = &g[node].call.callee
&& is_configured_terminator(callee, analysis_rules)
{
return Vec::new();
}
let nested = collect_nested_function_nodes(ast, lang);
for func_node in nested {
let placeholders = build_sub(
func_node,
&[node],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
for ph in placeholders {
let to_remove: Vec<_> = g.edges_connecting(node, ph).map(|e| e.id()).collect();
for eid in to_remove {
g.remove_edge(eid);
}
}
}
if lang == "rust"
&& let Some((guard, let_name)) = detect_rust_let_match_guard(ast, code)
{
let if_node = emit_rust_match_guard_if(g, guard, &let_name, code, enclosing_func);
connect_all(g, &[node], if_node, EdgeKind::Seq);
let true_gate = g.add_node(NodeInfo {
kind: StmtKind::Seq,
ast: AstMeta {
span: (ast.end_byte(), ast.end_byte()),
enclosing_func: enclosing_func.map(|s| s.to_string()),
},
..Default::default()
});
let false_gate = g.add_node(NodeInfo {
kind: StmtKind::Seq,
ast: AstMeta {
span: (ast.end_byte(), ast.end_byte()),
enclosing_func: enclosing_func.map(|s| s.to_string()),
},
..Default::default()
});
connect_all(g, &[if_node], true_gate, EdgeKind::True);
connect_all(g, &[if_node], false_gate, EdgeKind::False);
return vec![true_gate, false_gate];
}
vec![node]
}
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
let ord = *call_ordinal;
*call_ordinal += 1;
let (effective_preds, src_bindings, src_uses_only) = pre_emit_arg_source_nodes(
g,
ast,
lang,
code,
enclosing_func,
analysis_rules,
preds,
);
let n = push_node(
g,
StmtKind::Call,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
apply_arg_source_bindings(g, n, &src_bindings, &src_uses_only);
connect_all(g, &effective_preds, n, EdgeKind::Seq);
if let Some(callee) = &g[n].call.callee
&& is_configured_terminator(callee, analysis_rules)
{
return Vec::new();
}
let nested = collect_nested_function_nodes(ast, lang);
for func_node in nested {
let placeholders = build_sub(
func_node,
&[n],
g,
lang,
code,
summaries,
file_path,
enclosing_func,
call_ordinal,
analysis_rules,
break_targets,
continue_targets,
throw_targets,
bodies,
next_body_id,
current_body_id,
);
for ph in placeholders {
let to_remove: Vec<_> = g.edges_connecting(n, ph).map(|e| e.id()).collect();
for eid in to_remove {
g.remove_edge(eid);
}
}
}
vec![n]
}
Kind::Assignment => {
if matches!(lang, "javascript" | "typescript" | "tsx")
&& let (Some(left), Some(right)) = (
ast.child_by_field_name("left"),
ast.child_by_field_name("right"),
)
{
let rhs = unwrap_parens(right);
if rhs.kind() == "ternary_expression" {
let (lhs_text, lhs_labels) =
classify_ternary_lhs(left, lang, code, analysis_rules);
return build_ternary_diamond(
lhs_text,
lhs_labels,
rhs,
preds,
EdgeKind::Seq,
g,
lang,
code,
enclosing_func,
call_ordinal,
analysis_rules,
);
}
}
if crate::pointer::is_enabled()
&& let Some(n) = try_lower_subscript_write(
ast,
preds,
g,
lang,
code,
enclosing_func,
call_ordinal,
)
{
return vec![n];
}
let has_call = has_call_descendant(ast, lang);
let kind = if has_call {
StmtKind::Call
} else {
StmtKind::Seq
};
let ord = if kind == StmtKind::Call {
let o = *call_ordinal;
*call_ordinal += 1;
o
} else {
0
};
let n = push_node(
g,
kind,
ast,
lang,
code,
enclosing_func,
ord,
analysis_rules,
);
connect_all(g, preds, n, EdgeKind::Seq);
vec![n]
}
Kind::Trivia => preds.to_vec(),
_ => {
let n = push_node(
g,
StmtKind::Seq,
ast,
lang,
code,
enclosing_func,
0,
analysis_rules,
);
connect_all(g, preds, n, EdgeKind::Seq);
vec![n]
}
}
}
pub(crate) fn build_cfg<'a>(
tree: &'a Tree,
code: &'a [u8],
lang: &str,
file_path: &str,
analysis_rules: Option<&LangAnalysisRules>,
) -> FileCfg {
debug!(target: "cfg", "Building CFG for {:?}", tree.root_node());
populate_fn_dfs_indices(tree, lang);
DTO_CLASSES.with(|cell| {
*cell.borrow_mut() = dto::collect_dto_classes(tree.root_node(), lang, code);
});
TYPE_ALIAS_LC.with(|cell| {
*cell.borrow_mut() =
dto::collect_type_alias_local_collections(tree.root_node(), lang, code);
});
let (mut g, entry, exit) = create_body_graph(0, code.len(), None);
let mut summaries = FuncSummaries::new();
let mut bodies: Vec<BodyCfg> = Vec::new();
let mut next_body_id: u32 = 1;
let mut top_ordinal: u32 = 0;
let mut top_breaks = Vec::new();
let mut top_continues = Vec::new();
let mut top_throws = Vec::new();
let exits = build_sub(
tree.root_node(),
&[entry],
&mut g,
lang,
code,
&mut summaries,
file_path,
None,
&mut top_ordinal,
analysis_rules,
&mut top_breaks,
&mut top_continues,
&mut top_throws,
&mut bodies,
&mut next_body_id,
BodyId(0),
);
debug!(target: "cfg", "exits: {:?}", exits);
for e in exits {
connect_all(&mut g, &[e], exit, EdgeKind::Seq);
}
debug!(target: "cfg", "CFG DONE, top-level nodes: {}, bodies: {}", g.node_count(), bodies.len() + 1);
if cfg!(debug_assertions) {
for idx in g.node_indices() {
debug!(target: "cfg", " node {:>3}: {:?}", idx.index(), g[idx]);
}
for e in g.edge_references() {
debug!(
target: "cfg",
" edge {:>3} → {:<3} ({:?})",
e.source().index(),
e.target().index(),
e.weight()
);
}
let mut reachable: HashSet<NodeIndex> = Default::default();
let mut bfs = Bfs::new(&g, entry);
while let Some(nx) = bfs.next(&g) {
reachable.insert(nx);
}
debug!(
target: "cfg",
"reachable nodes: {}/{}",
reachable.len(),
g.node_count()
);
if reachable.len() != g.node_count() {
let unreachable: Vec<_> = g
.node_indices()
.filter(|i| !reachable.contains(i))
.collect();
debug!(target: "cfg", "‼︎ unreachable nodes: {:?}", unreachable);
}
let doms: Dominators<_> = simple_fast(&g, entry);
debug!(target: "cfg", "dominator tree computed (len = {:?})", doms);
}
let toplevel = BodyCfg {
meta: BodyMeta {
id: BodyId(0),
kind: BodyKind::TopLevel,
name: None,
params: Vec::new(),
param_types: Vec::new(),
param_destructured_fields: Vec::new(),
param_count: 0,
span: (0, code.len()),
parent_body_id: None,
func_key: None,
auth_decorators: Vec::new(),
},
graph: g,
entry,
exit,
};
bodies.insert(0, toplevel);
bodies.sort_by_key(|b| b.meta.id);
let import_bindings = if matches!(
lang,
"javascript" | "typescript" | "tsx" | "python" | "php" | "rust"
) {
extract_import_bindings(tree, code)
} else {
HashMap::new()
};
let promisify_aliases = if matches!(lang, "javascript" | "typescript" | "tsx") {
extract_promisify_aliases(tree, code)
} else {
HashMap::new()
};
let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
if !promisify_aliases.is_empty() {
apply_promisify_labels(&mut bodies, &promisify_aliases, lang, extra);
}
clear_fn_dfs_indices();
DTO_CLASSES.with(|cell| cell.borrow_mut().clear());
TYPE_ALIAS_LC.with(|cell| cell.borrow_mut().clear());
let hierarchy_edges = hierarchy::collect_hierarchy_edges(tree.root_node(), lang, code);
FileCfg {
bodies,
summaries,
import_bindings,
promisify_aliases,
hierarchy_edges,
}
}
fn apply_promisify_labels(
bodies: &mut [BodyCfg],
aliases: &PromisifyAliases,
lang: &str,
extra: Option<&[crate::labels::RuntimeLabelRule]>,
) {
for body in bodies.iter_mut() {
let indices: Vec<NodeIndex> = body.graph.node_indices().collect();
for idx in indices {
let Some(callee) = body.graph[idx].call.callee.clone() else {
continue;
};
let Some(alias) = aliases.get(&callee) else {
continue;
};
let mut wrapped_labels: Vec<crate::labels::DataLabel> =
classify_all(lang, &alias.wrapped, extra)
.into_iter()
.collect();
for gm in
classify_gated_sink(lang, &alias.wrapped, |_| None, |_| None, |_| false).iter()
{
if !wrapped_labels.contains(&gm.label) {
wrapped_labels.push(gm.label);
}
}
if wrapped_labels.is_empty() {
continue;
}
let info = &mut body.graph[idx];
for lbl in wrapped_labels {
if !info.taint.labels.contains(&lbl) {
info.taint.labels.push(lbl);
}
}
}
}
}
fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summary::CalleeSite {
use crate::summary::CalleeSite;
let receiver = info.call.receiver.clone();
let arity = if info.kind == StmtKind::Call || receiver.is_some() {
Some(info.call.arg_uses.len())
} else {
None
};
let qualifier = if receiver.is_some() {
None
} else if let Some(pos) = callee.rfind("::") {
let prefix = &callee[..pos];
if lang == "rust" {
Some(prefix.to_string()).filter(|s| !s.is_empty())
} else {
Some(prefix.rsplit("::").next().unwrap_or(prefix).to_string()).filter(|s| !s.is_empty())
}
} else if let Some(pos) = callee.rfind('.') {
let prefix = &callee[..pos];
Some(prefix.rsplit('.').next().unwrap_or(prefix).to_string()).filter(|s| !s.is_empty())
} else {
None
};
CalleeSite {
name: callee.to_string(),
arity,
receiver,
qualifier,
ordinal: info.call.call_ordinal,
}
}
pub(crate) fn export_summaries(
summaries: &FuncSummaries,
file_path: &str,
lang: &str,
) -> Vec<FuncSummary> {
summaries
.iter()
.map(|(key, local)| FuncSummary {
name: key.name.clone(),
file_path: file_path.to_owned(),
lang: lang.to_owned(),
param_count: local.param_count,
param_names: local.param_names.clone(),
source_caps: local.source_caps.bits(),
sanitizer_caps: local.sanitizer_caps.bits(),
sink_caps: local.sink_caps.bits(),
propagating_params: local.propagating_params.clone(),
propagates_taint: false,
tainted_sink_params: local.tainted_sink_params.clone(),
param_to_sink: Vec::new(),
callees: local.callees.clone(),
container: local.container.clone(),
disambig: local.disambig,
kind: local.kind,
module_path: None,
rust_use_map: None,
rust_wildcards: None,
hierarchy_edges: Vec::new(),
})
.collect()
}
#[cfg(test)]
mod cfg_tests;