use super::*;
use petgraph::visit::EdgeRef;
use tree_sitter::Language;
fn parse_and_build(src: &[u8], lang_str: &str, ts_lang: Language) -> (Cfg, NodeIndex) {
let file_cfg = parse_to_file_cfg(src, lang_str, ts_lang);
let body = if file_cfg.bodies.len() > 1 {
&file_cfg.bodies[1]
} else {
&file_cfg.bodies[0]
};
(body.graph.clone(), body.entry)
}
fn parse_to_file_cfg(src: &[u8], lang_str: &str, ts_lang: Language) -> FileCfg {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src, None).unwrap();
build_cfg(&tree, src, lang_str, "test.js", None)
}
#[test]
fn js_try_catch_has_exception_edges() {
let src = b"function f() { try { foo(); } catch (e) { bar(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let exception_edges: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.collect();
assert!(
!exception_edges.is_empty(),
"Expected at least one Exception edge"
);
for e in &exception_edges {
assert_eq!(cfg[e.source()].kind, StmtKind::Call);
}
}
#[test]
fn inner_call_override_narrows_classification_span() {
let src = b"function f() {\n x = `\n ${eval('1')}\n `;\n}\n";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let sink = cfg
.node_indices()
.find(|&i| cfg[i].call.callee.as_deref() == Some("eval"))
.expect("inner-call override should produce a node with callee=eval");
let info = &cfg[sink];
let outer_byte = info.ast.span.0;
let inner_byte = info.classification_span().0;
assert!(
inner_byte > outer_byte,
"classification span should start *inside* the outer statement (outer={outer_byte}, inner={inner_byte})"
);
let line_of = |b: usize| src[..b].iter().filter(|&&c| c == b'\n').count() + 1;
assert_eq!(line_of(outer_byte), 2, "outer ast.span on line 2");
assert_eq!(line_of(inner_byte), 3, "classification_span on eval's line");
assert!(
info.call.callee_span.is_some(),
"inner-call override should record callee_span"
);
}
#[test]
fn ruby_inner_call_fallback_classifies_wrapper_around_file_read() {
let src = b"def f(x)\n YAML.safe_load(File.read(x))\nend\n";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let sink = cfg
.node_indices()
.find(|&i| cfg[i].call.callee.as_deref() == Some("File.read"))
.expect(
"inner-call fallback should override the outer YAML.safe_load callee with File.read",
);
let info = &cfg[sink];
assert!(
info.taint
.labels
.iter()
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(crate::labels::Cap::FILE_IO))),
"wrapper-around-File.read node must carry the FILE_IO sink label"
);
assert_eq!(
info.call.outer_callee.as_deref(),
Some("YAML.safe_load"),
"outer_callee must preserve the original wrapping callee"
);
}
#[test]
fn ruby_inner_call_fallback_classifies_bare_outer_around_file_read() {
let src = b"def f(x)\n outer(File.read(x))\nend\n";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let sink = cfg
.node_indices()
.find(|&i| cfg[i].call.callee.as_deref() == Some("File.read"))
.expect("inner-call fallback must override `outer` callee with File.read");
let info = &cfg[sink];
assert!(
info.taint
.labels
.iter()
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(crate::labels::Cap::FILE_IO))),
"wrapper-around-File.read node must carry FILE_IO sink label"
);
}
#[test]
fn classification_span_falls_back_to_ast_span() {
let info = NodeInfo {
ast: AstMeta {
span: (100, 200),
enclosing_func: None,
},
..Default::default()
};
assert!(info.call.callee_span.is_none());
assert_eq!(info.classification_span(), (100, 200));
let narrowed = NodeInfo {
ast: AstMeta {
span: (100, 200),
enclosing_func: None,
},
call: CallMeta {
callee_span: Some((150, 170)),
..Default::default()
},
..Default::default()
};
assert_eq!(narrowed.classification_span(), (150, 170));
assert_eq!(narrowed.ast.span, (100, 200));
}
#[test]
fn callee_span_unset_when_no_narrowing_is_possible() {
let src = b"function f() { eval(x); }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let sink = cfg
.node_indices()
.find(|&i| cfg[i].call.callee.as_deref() == Some("eval"))
.expect("should find eval call");
let info = &cfg[sink];
if let Some(cs) = info.call.callee_span {
assert!(
cs.0 >= info.ast.span.0 && cs.1 <= info.ast.span.1,
"callee_span {:?} must be contained in ast.span {:?}",
cs,
info.ast.span,
);
assert_ne!(
cs, info.ast.span,
"callee_span should only be set when it narrows ast.span"
);
}
}
#[test]
fn js_try_finally_no_exception_edges() {
let src = b"function f() { try { foo(); } finally { cleanup(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let exception_edges: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.collect();
assert!(
exception_edges.is_empty(),
"Expected no Exception edges for try/finally without catch"
);
let mut reachable = HashSet::new();
let mut bfs = petgraph::visit::Bfs::new(&cfg, _entry);
while let Some(nx) = bfs.next(&cfg) {
reachable.insert(nx);
}
assert_eq!(
reachable.len(),
cfg.node_count(),
"All nodes should be reachable (finally connected to try body)"
);
}
#[test]
fn java_try_catch_has_exception_edges() {
let src = b"class Foo { void bar() { try { baz(); } catch (Exception e) { qux(); } } }";
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "java", ts_lang);
let exception_edges: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.collect();
assert!(
!exception_edges.is_empty(),
"Expected at least one Exception edge in Java try/catch"
);
for e in &exception_edges {
assert_eq!(cfg[e.source()].kind, StmtKind::Call);
}
}
#[test]
fn js_try_catch_finally_all_reachable() {
let src = b"function f() { try { foo(); } catch (e) { bar(); } finally { baz(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, entry) = parse_and_build(src, "javascript", ts_lang);
let mut reachable = HashSet::new();
let mut bfs = petgraph::visit::Bfs::new(&cfg, entry);
while let Some(nx) = bfs.next(&cfg) {
reachable.insert(nx);
}
assert_eq!(
reachable.len(),
cfg.node_count(),
"All nodes should be reachable in try/catch/finally"
);
let exception_edges: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.collect();
assert!(!exception_edges.is_empty());
}
#[test]
fn js_throw_in_try_catch_has_exception_edge() {
let src = b"function f() { try { throw new Error('bad'); } catch (e) { handle(e); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let exception_edges: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.collect();
assert!(
!exception_edges.is_empty(),
"throw inside try should create exception edge to catch"
);
}
#[test]
fn java_multiple_catch_clauses() {
let src = b"class Foo { void bar() { try { baz(); } catch (IOException e) { a(); } catch (Exception e) { b(); } } }";
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "java", ts_lang);
let exception_edges: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.collect();
assert!(
exception_edges.len() >= 2,
"Expected exception edges to multiple catch clauses, got {}",
exception_edges.len()
);
}
#[test]
fn js_catch_param_defines_variable() {
let src = b"function f() { try { foo(); } catch (e) { bar(e); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let catch_param_nodes: Vec<_> = cfg.node_indices().filter(|&n| cfg[n].catch_param).collect();
assert_eq!(
catch_param_nodes.len(),
1,
"Expected exactly one catch_param node"
);
let cp = &cfg[catch_param_nodes[0]];
assert_eq!(cp.taint.defines.as_deref(), Some("e"));
assert_eq!(cp.kind, StmtKind::Seq);
let exception_targets: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.map(|e| e.target())
.collect();
assert!(exception_targets.iter().all(|&t| t == catch_param_nodes[0]));
}
#[test]
fn java_catch_param_extracted() {
let src = b"class Foo { void bar() { try { baz(); } catch (Exception e) { qux(e); } } }";
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "java", ts_lang);
let catch_param_nodes: Vec<_> = cfg.node_indices().filter(|&n| cfg[n].catch_param).collect();
assert_eq!(
catch_param_nodes.len(),
1,
"Expected exactly one catch_param node in Java"
);
assert_eq!(
cfg[catch_param_nodes[0]].taint.defines.as_deref(),
Some("e")
);
}
#[test]
fn js_catch_no_param_no_synthetic() {
let src = b"function f() { try { foo(); } catch { bar(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let catch_param_nodes: Vec<_> = cfg.node_indices().filter(|&n| cfg[n].catch_param).collect();
assert!(
catch_param_nodes.is_empty(),
"catch without parameter should not create a catch_param node"
);
}
#[test]
fn ruby_begin_rescue_has_exception_edges() {
let src = b"def f()\n begin\n foo()\n rescue => e\n bar(e)\n end\nend";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let exception_edges: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.collect();
assert!(
!exception_edges.is_empty(),
"begin/rescue should produce exception edges"
);
}
#[test]
fn ruby_rescue_catch_param_defines_variable() {
let src = b"def f()\n begin\n foo()\n rescue StandardError => e\n bar(e)\n end\nend";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let catch_param_nodes: Vec<_> = cfg.node_indices().filter(|&n| cfg[n].catch_param).collect();
assert_eq!(
catch_param_nodes.len(),
1,
"Expected exactly one catch_param node in Ruby rescue"
);
let cp = &cfg[catch_param_nodes[0]];
assert_eq!(cp.taint.defines.as_deref(), Some("e"));
assert_eq!(cp.kind, StmtKind::Seq);
let exception_targets: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.map(|e| e.target())
.collect();
assert!(exception_targets.iter().all(|&t| t == catch_param_nodes[0]));
}
#[test]
fn ruby_begin_rescue_ensure_complete() {
let src =
b"def f()\n begin\n foo()\n rescue => e\n bar(e)\n ensure\n baz()\n end\nend";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let exception_count = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.count();
assert!(
exception_count > 0,
"begin/rescue/ensure should have exception edges"
);
let node_count = cfg.node_count();
assert!(node_count > 3, "CFG should have multiple nodes");
}
#[test]
fn ruby_rescue_no_variable() {
let src = b"def f()\n begin\n foo()\n rescue\n bar()\n end\nend";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let catch_param_nodes: Vec<_> = cfg.node_indices().filter(|&n| cfg[n].catch_param).collect();
assert!(
catch_param_nodes.is_empty(),
"rescue without variable should not create a catch_param node"
);
let exception_count = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.count();
assert!(
exception_count > 0,
"rescue without variable should still have exception edges"
);
}
#[test]
fn ruby_body_statement_implicit_begin() {
let src = b"def f()\n foo()\nrescue => e\n bar(e)\nend";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let exception_count = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.count();
assert!(
exception_count > 0,
"implicit begin via body_statement should produce exception edges"
);
let catch_param_nodes: Vec<_> = cfg.node_indices().filter(|&n| cfg[n].catch_param).collect();
assert_eq!(
catch_param_nodes.len(),
1,
"implicit begin rescue should have one catch_param node"
);
assert_eq!(
cfg[catch_param_nodes[0]].taint.defines.as_deref(),
Some("e")
);
}
#[test]
fn ruby_multiple_rescue_clauses() {
let src = b"def f()\n begin\n foo()\n rescue IOError => e\n handle_io(e)\n rescue => e\n handle_other(e)\n end\nend";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let catch_param_nodes: Vec<_> = cfg.node_indices().filter(|&n| cfg[n].catch_param).collect();
assert_eq!(
catch_param_nodes.len(),
2,
"Two rescue clauses should produce two catch_param nodes"
);
for &cp in &catch_param_nodes {
assert_eq!(cfg[cp].taint.defines.as_deref(), Some("e"));
}
let exception_targets: std::collections::HashSet<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Exception))
.map(|e| e.target())
.collect();
for &cp in &catch_param_nodes {
assert!(
exception_targets.contains(&cp),
"Exception edges should target each catch_param node"
);
}
}
fn if_nodes(cfg: &Cfg) -> Vec<NodeIndex> {
cfg.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::If)
.collect()
}
fn has_edge(cfg: &Cfg, src: NodeIndex, dst: NodeIndex, kind_match: fn(&EdgeKind) -> bool) -> bool {
cfg.edges(src)
.any(|e| e.target() == dst && kind_match(e.weight()))
}
#[test]
fn js_if_and_short_circuit() {
let src = b"function f() { if (a && b) { then(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
2,
"Expected 2 If nodes for `a && b`, got {}",
ifs.len()
);
let a_node = ifs
.iter()
.find(|&&n| cfg[n].condition_vars.contains(&"a".to_string()))
.copied()
.unwrap();
let b_node = ifs
.iter()
.find(|&&n| cfg[n].condition_vars.contains(&"b".to_string()))
.copied()
.unwrap();
assert!(
has_edge(&cfg, a_node, b_node, |e| matches!(e, EdgeKind::True)),
"Expected True edge from a to b"
);
let a_false: Vec<_> = cfg
.edges(a_node)
.filter(|e| matches!(e.weight(), EdgeKind::False))
.collect();
let b_false: Vec<_> = cfg
.edges(b_node)
.filter(|e| matches!(e.weight(), EdgeKind::False))
.collect();
assert!(!a_false.is_empty(), "Expected False edge from a");
assert!(!b_false.is_empty(), "Expected False edge from b");
}
#[test]
fn js_if_or_short_circuit() {
let src = b"function f() { if (a || b) { then(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
2,
"Expected 2 If nodes for `a || b`, got {}",
ifs.len()
);
let a_node = ifs
.iter()
.find(|&&n| cfg[n].condition_vars.contains(&"a".to_string()))
.copied()
.unwrap();
let b_node = ifs
.iter()
.find(|&&n| cfg[n].condition_vars.contains(&"b".to_string()))
.copied()
.unwrap();
assert!(
has_edge(&cfg, a_node, b_node, |e| matches!(e, EdgeKind::False)),
"Expected False edge from a to b"
);
let a_true: Vec<_> = cfg
.edges(a_node)
.filter(|e| matches!(e.weight(), EdgeKind::True))
.collect();
let b_true: Vec<_> = cfg
.edges(b_node)
.filter(|e| matches!(e.weight(), EdgeKind::True))
.collect();
assert!(!a_true.is_empty(), "Expected True edge from a");
assert!(!b_true.is_empty(), "Expected True edge from b");
}
#[test]
fn js_if_nested_and_or() {
let src = b"function f() { if (a && (b || c)) { then(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
3,
"Expected 3 If nodes for `a && (b || c)`, got {}",
ifs.len()
);
let a_node = ifs
.iter()
.find(|&&n| {
let vars = &cfg[n].condition_vars;
vars.contains(&"a".to_string()) && vars.len() == 1
})
.copied()
.unwrap();
let b_node = ifs
.iter()
.find(|&&n| {
let vars = &cfg[n].condition_vars;
vars.contains(&"b".to_string()) && vars.len() == 1
})
.copied()
.unwrap();
let c_node = ifs
.iter()
.find(|&&n| {
let vars = &cfg[n].condition_vars;
vars.contains(&"c".to_string()) && vars.len() == 1
})
.copied()
.unwrap();
assert!(has_edge(&cfg, a_node, b_node, |e| matches!(
e,
EdgeKind::True
)));
assert!(has_edge(&cfg, b_node, c_node, |e| matches!(
e,
EdgeKind::False
)));
}
#[test]
fn js_while_and_short_circuit() {
let src = b"function f() { while (a && b) { body(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
2,
"Expected 2 If nodes in while condition, got {}",
ifs.len()
);
let loop_headers: Vec<_> = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Loop)
.collect();
assert_eq!(loop_headers.len(), 1, "Expected 1 Loop header");
let header = loop_headers[0];
let back_edges: Vec<_> = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Back))
.collect();
assert!(!back_edges.is_empty(), "Expected back edges");
for e in &back_edges {
assert_eq!(
e.target(),
header,
"Back edge should go to loop header, not into condition chain"
);
}
}
#[test]
fn python_if_and() {
let src = b"def f():\n if a and b:\n pass\n";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
2,
"Expected 2 If nodes for Python `a and b`, got {}",
ifs.len()
);
let a_node = ifs
.iter()
.find(|&&n| cfg[n].condition_vars.contains(&"a".to_string()))
.copied()
.unwrap();
let b_node = ifs
.iter()
.find(|&&n| cfg[n].condition_vars.contains(&"b".to_string()))
.copied()
.unwrap();
assert!(
has_edge(&cfg, a_node, b_node, |e| matches!(e, EdgeKind::True)),
"Expected True edge from a to b in Python and"
);
}
#[test]
fn ruby_unless_and() {
let src = b"def f\n unless a && b\n x\n end\nend\n";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
2,
"Expected 2 If nodes for Ruby `unless a && b`, got {}",
ifs.len()
);
let a_node = ifs
.iter()
.find(|&&n| cfg[n].condition_vars.contains(&"a".to_string()))
.copied()
.unwrap();
let b_node = ifs
.iter()
.find(|&&n| cfg[n].condition_vars.contains(&"b".to_string()))
.copied()
.unwrap();
assert!(
has_edge(&cfg, a_node, b_node, |e| matches!(e, EdgeKind::True)),
"Expected True edge from a to b in unless"
);
let a_false_targets: Vec<_> = cfg
.edges(a_node)
.filter(|e| matches!(e.weight(), EdgeKind::False))
.map(|e| e.target())
.collect();
assert!(
!a_false_targets.is_empty(),
"a should have False edges in unless"
);
}
#[test]
fn while_short_circuit_continue() {
let src = b"function f() { while (a && b) { if (cond) { continue; } body(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let loop_headers: Vec<_> = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Loop)
.collect();
assert_eq!(loop_headers.len(), 1);
let header = loop_headers[0];
let continue_nodes: Vec<_> = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Continue)
.collect();
assert!(!continue_nodes.is_empty(), "Expected continue node");
for &cont in &continue_nodes {
assert!(
has_edge(&cfg, cont, header, |e| matches!(e, EdgeKind::Back)),
"Continue should have back-edge to loop header"
);
}
}
#[test]
fn negated_boolean_no_decomposition() {
let src = b"function f() { if (!(a && b)) { then(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
1,
"Negated boolean should NOT be decomposed, got {} If nodes",
ifs.len()
);
}
#[test]
fn js_triple_and_chain() {
let src = b"function f() { if (a && b && c) { then(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
3,
"Expected 3 If nodes for `a && b && c`, got {}",
ifs.len()
);
}
#[test]
fn js_or_precedence_with_and() {
let src = b"function f() { if (a || b && c) { then(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(
ifs.len(),
3,
"Expected 3 If nodes for `a || b && c`, got {}",
ifs.len()
);
}
fn parse_tree(src: &[u8], ts_lang: Language) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn first_call_ident_skips_lambda_body() {
let src = b"process(lambda: eval(dangerous))";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let tree = parse_tree(src, ts_lang);
let root = tree.root_node();
let result = first_call_ident(root, "python", src);
assert_eq!(result.as_deref(), Some("process"));
}
#[test]
fn first_call_ident_skips_arrow_function_body() {
let src = b"process(() => eval(dangerous))";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let tree = parse_tree(src, ts_lang);
let root = tree.root_node();
let result = first_call_ident(root, "javascript", src);
assert_eq!(result.as_deref(), Some("process"));
}
#[test]
fn first_call_ident_skips_named_function_in_arg() {
let src = b"process(function inner() { eval(dangerous); })";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let tree = parse_tree(src, ts_lang);
let root = tree.root_node();
let result = first_call_ident(root, "javascript", src);
assert_eq!(result.as_deref(), Some("process"));
}
#[test]
fn first_call_ident_normal_nested_call() {
let src = b"outer(inner(x))";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let tree = parse_tree(src, ts_lang);
let root = tree.root_node();
let result = first_call_ident(root, "javascript", src);
assert_eq!(result.as_deref(), Some("outer"));
}
#[test]
fn first_call_ident_finds_call_not_blocked_by_function() {
let src = b"[function() {}, actual_call()]";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let tree = parse_tree(src, ts_lang);
let root = tree.root_node();
let result = first_call_ident(root, "javascript", src);
assert_eq!(result.as_deref(), Some("actual_call"));
}
#[test]
fn callee_not_resolved_from_nested_function_arg() {
let src = b"function f() { safe_wrapper(function() { eval(user_input); }); }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let body = &file_cfg.bodies[1]; let has_safe = body
.graph
.node_weights()
.any(|info| info.call.callee.as_deref() == Some("safe_wrapper"));
assert!(has_safe, "expected a node with callee 'safe_wrapper'");
let outer_eval = body.graph.node_weights().any(|info| {
info.call.callee.as_deref() == Some("eval") && info.ast.enclosing_func.is_none()
});
assert!(
!outer_eval,
"eval should not appear as a callee in the outer scope from a nested function"
);
}
#[test]
fn nodeinfo_default_is_valid() {
let n = NodeInfo::default();
assert_eq!(n.kind, StmtKind::Seq);
assert!(n.call.callee.is_none());
assert!(n.call.outer_callee.is_none());
assert_eq!(n.call.call_ordinal, 0);
assert!(n.call.arg_uses.is_empty());
assert!(n.call.receiver.is_none());
assert!(n.call.sink_payload_args.is_none());
assert!(n.taint.labels.is_empty());
assert!(n.taint.const_text.is_none());
assert!(n.taint.defines.is_none());
assert!(n.taint.uses.is_empty());
assert!(n.taint.extra_defines.is_empty());
assert_eq!(n.ast.span, (0, 0));
assert!(n.ast.enclosing_func.is_none());
assert!(!n.all_args_literal);
assert!(!n.catch_param);
assert!(n.condition_text.is_none());
assert!(n.condition_vars.is_empty());
assert!(!n.condition_negated);
assert!(n.arg_callees.is_empty());
assert!(n.cast_target_type.is_none());
assert!(n.bin_op.is_none());
assert!(n.bin_op_const.is_none());
assert!(!n.managed_resource);
assert!(!n.in_defer);
assert!(!n.is_eq_with_const);
}
#[test]
fn callmeta_default() {
let c = CallMeta::default();
assert!(c.callee.is_none());
assert!(c.outer_callee.is_none());
assert_eq!(c.call_ordinal, 0);
assert!(c.arg_uses.is_empty());
assert!(c.receiver.is_none());
assert!(c.sink_payload_args.is_none());
}
#[test]
fn taintmeta_default() {
let t = TaintMeta::default();
assert!(t.labels.is_empty());
assert!(t.const_text.is_none());
assert!(t.defines.is_none());
assert!(t.uses.is_empty());
assert!(t.extra_defines.is_empty());
}
#[test]
fn astmeta_default() {
let a = AstMeta::default();
assert_eq!(a.span, (0, 0));
assert!(a.enclosing_func.is_none());
}
#[test]
fn synthetic_catch_param_node_structure() {
let n = NodeInfo {
kind: StmtKind::Seq,
ast: AstMeta {
span: (100, 100),
enclosing_func: Some("handle_request".into()),
},
taint: TaintMeta {
defines: Some("e".into()),
..Default::default()
},
call: CallMeta {
callee: Some("catch(e)".into()),
..Default::default()
},
catch_param: true,
..Default::default()
};
assert_eq!(n.kind, StmtKind::Seq);
assert_eq!(n.ast.span, (100, 100));
assert_eq!(n.ast.enclosing_func.as_deref(), Some("handle_request"));
assert_eq!(n.taint.defines.as_deref(), Some("e"));
assert_eq!(n.call.callee.as_deref(), Some("catch(e)"));
assert!(n.catch_param);
assert!(n.taint.labels.is_empty());
assert!(n.call.arg_uses.is_empty());
}
#[test]
fn synthetic_passthrough_node_structure() {
let n = NodeInfo {
kind: StmtKind::Seq,
ast: AstMeta {
span: (50, 50),
enclosing_func: Some("main".into()),
},
..Default::default()
};
assert_eq!(n.kind, StmtKind::Seq);
assert_eq!(n.ast.span, (50, 50));
assert!(n.taint.defines.is_none());
assert!(n.call.callee.is_none());
assert!(!n.catch_param);
}
#[test]
fn normal_call_node_structure() {
let n = NodeInfo {
kind: StmtKind::Call,
call: CallMeta {
callee: Some("eval".into()),
receiver: Some("window".into()),
call_ordinal: 3,
arg_uses: vec![vec!["x".into()], vec!["y".into()]],
sink_payload_args: Some(vec![0]),
..Default::default()
},
taint: TaintMeta {
labels: {
let mut v = SmallVec::new();
v.push(crate::labels::DataLabel::Sink(
crate::labels::Cap::CODE_EXEC,
));
v
},
defines: Some("result".into()),
uses: vec!["x".into(), "y".into()],
..Default::default()
},
ast: AstMeta {
span: (10, 50),
enclosing_func: Some("handler".into()),
},
..Default::default()
};
assert_eq!(n.call.callee.as_deref(), Some("eval"));
assert_eq!(n.call.receiver.as_deref(), Some("window"));
assert_eq!(n.call.call_ordinal, 3);
assert_eq!(n.call.arg_uses.len(), 2);
assert_eq!(n.call.sink_payload_args.as_deref(), Some(&[0usize][..]));
assert_eq!(n.taint.labels.len(), 1);
assert_eq!(n.taint.defines.as_deref(), Some("result"));
assert_eq!(n.taint.uses, vec!["x", "y"]);
assert_eq!(n.ast.span, (10, 50));
assert_eq!(n.ast.enclosing_func.as_deref(), Some("handler"));
}
#[test]
fn condition_node_preserves_fields() {
let n = NodeInfo {
kind: StmtKind::If,
ast: AstMeta {
span: (0, 20),
enclosing_func: None,
},
condition_text: Some("x > 0".into()),
condition_vars: vec!["x".into()],
condition_negated: true,
..Default::default()
};
assert_eq!(n.kind, StmtKind::If);
assert_eq!(n.condition_text.as_deref(), Some("x > 0"));
assert_eq!(n.condition_vars, vec!["x"]);
assert!(n.condition_negated);
}
#[test]
fn clone_preserves_all_sub_structs() {
let original = NodeInfo {
kind: StmtKind::Call,
call: CallMeta {
callee: Some("foo".into()),
callee_text: Some("obj.foo".into()),
outer_callee: Some("bar".into()),
callee_span: Some((7, 17)),
call_ordinal: 5,
arg_uses: vec![vec!["a".into()]],
receiver: Some("obj".into()),
sink_payload_args: Some(vec![1, 2]),
kwargs: vec![("shell".into(), vec!["True".into()])],
arg_string_literals: vec![Some("lit".into())],
destination_uses: None,
gate_filters: Vec::new(),
is_constructor: false,
},
taint: TaintMeta {
labels: {
let mut v = SmallVec::new();
v.push(crate::labels::DataLabel::Source(crate::labels::Cap::all()));
v
},
const_text: Some("42".into()),
defines: Some("r".into()),
uses: vec!["a".into(), "b".into()],
extra_defines: vec!["c".into()],
},
ast: AstMeta {
span: (10, 100),
enclosing_func: Some("main".into()),
},
all_args_literal: true,
catch_param: true,
..Default::default()
};
let cloned = original.clone();
assert_eq!(cloned.call.callee, original.call.callee);
assert_eq!(cloned.call.outer_callee, original.call.outer_callee);
assert_eq!(cloned.call.call_ordinal, original.call.call_ordinal);
assert_eq!(cloned.call.arg_uses, original.call.arg_uses);
assert_eq!(cloned.call.receiver, original.call.receiver);
assert_eq!(
cloned.call.sink_payload_args,
original.call.sink_payload_args
);
assert_eq!(cloned.call.kwargs, original.call.kwargs);
assert_eq!(cloned.taint.labels.len(), original.taint.labels.len());
assert_eq!(cloned.taint.const_text, original.taint.const_text);
assert_eq!(cloned.taint.defines, original.taint.defines);
assert_eq!(cloned.taint.uses, original.taint.uses);
assert_eq!(cloned.taint.extra_defines, original.taint.extra_defines);
assert_eq!(cloned.ast.span, original.ast.span);
assert_eq!(cloned.ast.enclosing_func, original.ast.enclosing_func);
assert_eq!(cloned.all_args_literal, original.all_args_literal);
assert_eq!(cloned.catch_param, original.catch_param);
}
#[test]
fn cfg_output_equivalence_js_catch() {
let src = b"function f() { try { foo(x); } catch(e) { bar(e); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let body = file_cfg.first_body();
let catch_params: Vec<_> = body
.graph
.node_weights()
.filter(|n| n.catch_param)
.collect();
assert_eq!(catch_params.len(), 1);
assert_eq!(catch_params[0].taint.defines.as_deref(), Some("e"));
assert!(
catch_params[0]
.call
.callee
.as_deref()
.unwrap()
.starts_with("catch(")
);
}
#[test]
fn cfg_output_equivalence_condition_chain() {
let src = b"function f(x) { if (x > 0) { sink(x); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let if_nodes: Vec<_> = cfg
.node_weights()
.filter(|n| n.kind == StmtKind::If)
.collect();
assert!(!if_nodes.is_empty());
let if_node = if_nodes[0];
assert!(if_node.condition_text.is_some() || !if_node.condition_vars.is_empty());
assert!(if_node.taint.labels.is_empty());
}
#[test]
fn make_empty_node_info_uses_sub_structs() {
let n = make_empty_node_info(StmtKind::Entry, (0, 100), Some("test_func"));
assert_eq!(n.kind, StmtKind::Entry);
assert_eq!(n.ast.span, (0, 100));
assert_eq!(n.ast.enclosing_func.as_deref(), Some("test_func"));
assert!(n.call.callee.is_none());
assert!(n.taint.defines.is_none());
assert!(n.taint.uses.is_empty());
}
#[test]
fn js_import_alias_bindings() {
let src = b"import { getInput as fetchInput } from './source';";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
assert_eq!(file_cfg.import_bindings.len(), 1);
let b = &file_cfg.import_bindings["fetchInput"];
assert_eq!(b.original, "getInput");
assert_eq!(b.module_path.as_deref(), Some("./source"));
}
#[test]
fn js_same_name_import_not_recorded() {
let src = b"import { exec } from 'child_process';";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
assert!(file_cfg.import_bindings.is_empty());
}
#[test]
fn python_import_alias_bindings() {
let src = b"from os import getenv as fetch_env";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "python", ts_lang);
assert_eq!(file_cfg.import_bindings.len(), 1);
let b = &file_cfg.import_bindings["fetch_env"];
assert_eq!(b.original, "getenv");
assert_eq!(b.module_path.as_deref(), Some("os"));
}
#[test]
fn python_multiple_aliased_imports() {
let src = b"from source import get_input as fetch_input, run_query as exec_query";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "python", ts_lang);
assert_eq!(file_cfg.import_bindings.len(), 2);
assert_eq!(
file_cfg.import_bindings["fetch_input"].original,
"get_input"
);
assert_eq!(file_cfg.import_bindings["exec_query"].original, "run_query");
}
#[test]
fn python_same_name_import_not_recorded() {
let src = b"from os import getenv";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "python", ts_lang);
assert!(file_cfg.import_bindings.is_empty());
}
#[test]
fn php_namespace_alias_bindings() {
let src = b"<?php\nuse App\\Security\\Sanitizer as Clean;\n";
let ts_lang = Language::from(tree_sitter_php::LANGUAGE_PHP);
let file_cfg = parse_to_file_cfg(src, "php", ts_lang);
assert_eq!(file_cfg.import_bindings.len(), 1);
let b = &file_cfg.import_bindings["Clean"];
assert_eq!(b.original, "Sanitizer");
assert_eq!(b.module_path.as_deref(), Some("App\\Security\\Sanitizer"));
}
#[test]
fn php_no_alias_not_recorded() {
let src = b"<?php\nuse App\\Security\\Sanitizer;\n";
let ts_lang = Language::from(tree_sitter_php::LANGUAGE_PHP);
let file_cfg = parse_to_file_cfg(src, "php", ts_lang);
assert!(file_cfg.import_bindings.is_empty());
}
#[test]
fn rust_use_as_alias_bindings() {
let src = b"use std::collections::HashMap as Map;";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "rust", ts_lang);
assert_eq!(file_cfg.import_bindings.len(), 1);
let b = &file_cfg.import_bindings["Map"];
assert_eq!(b.original, "HashMap");
assert_eq!(b.module_path.as_deref(), Some("std::collections::HashMap"));
}
#[test]
fn rust_no_alias_not_recorded() {
let src = b"use std::collections::HashMap;";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "rust", ts_lang);
assert!(file_cfg.import_bindings.is_empty());
}
#[test]
fn rust_nested_use_as_alias() {
let src = b"use std::io::{Read as IoRead, Write};";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "rust", ts_lang);
assert_eq!(file_cfg.import_bindings.len(), 1);
let b = &file_cfg.import_bindings["IoRead"];
assert_eq!(b.original, "Read");
}
#[test]
fn rust_format_macro_named_arg_lifted_into_uses() {
let src = b"fn f() { let x = 1; let y = format!(\"v={x}\"); }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
let mut found = false;
for n in cfg.node_indices() {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("y") {
assert!(
info.taint.uses.iter().any(|u| u == "x"),
"expected `x` in uses for `let y = format!(\"v={{x}}\")`; got {:?}",
info.taint.uses
);
found = true;
}
}
assert!(found, "no node found defining `y`");
}
#[test]
fn rust_format_macro_named_arg_with_format_spec() {
let src = b"fn f() { let x = 1; let y = format!(\"{x:?}\"); }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
let mut found = false;
for n in cfg.node_indices() {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("y") {
assert!(
info.taint.uses.iter().any(|u| u == "x"),
"expected `x` lifted past `{{x:?}}` format spec; got {:?}",
info.taint.uses
);
found = true;
}
}
assert!(found, "no node found defining `y`");
}
#[test]
fn rust_format_macro_escaped_braces_not_lifted() {
let src = b"fn f() { let q = format!(\"{{x}}\"); }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
for n in cfg.node_indices() {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("q") {
assert!(
!info.taint.uses.iter().any(|u| u == "x"),
"must not lift `x` from escaped `{{{{x}}}}`; got {:?}",
info.taint.uses
);
}
}
}
#[test]
fn rust_format_macro_positional_index_not_lifted() {
let src = b"fn f() { let a = 1; let q = format!(\"{0}\", a); }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
for n in cfg.node_indices() {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("q") {
assert!(
!info.taint.uses.iter().any(|u| u == "0"),
"must not lift digit-only positional placeholder; got {:?}",
info.taint.uses
);
assert!(
info.taint.uses.iter().any(|u| u == "a"),
"expected `a` in uses (positional arg) for `format!(\"{{0}}\", a)`; got {:?}",
info.taint.uses
);
}
}
}
#[test]
fn rust_println_macro_named_arg_lifted() {
let src = b"fn f() { let user = String::from(\"x\"); println!(\"hi {user}\"); }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
let mut found = false;
for n in cfg.node_indices() {
let info = &cfg[n];
if info.call.callee.as_deref() == Some("println") {
assert!(
info.taint.uses.iter().any(|u| u == "user"),
"expected `user` lifted into println! uses; got {:?}",
info.taint.uses
);
found = true;
}
}
assert!(found, "no println! macro_invocation node found");
}
#[test]
fn go_no_import_bindings() {
let src = b"package main\nimport alias \"fmt\"\n";
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "go", ts_lang);
assert!(file_cfg.import_bindings.is_empty());
}
#[test]
fn java_no_import_bindings() {
let src = b"import java.util.List;";
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "java", ts_lang);
assert!(file_cfg.import_bindings.is_empty());
}
#[test]
fn js_promisify_alias_member_expression() {
let src = b"const execAsync = util.promisify(child_process.exec);";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let alias = file_cfg
.promisify_aliases
.get("execAsync")
.expect("execAsync should be recorded");
assert_eq!(alias.wrapped, "child_process.exec");
}
#[test]
fn js_promisify_alias_bare_identifier() {
let src = b"const run = promisify(foo);";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
assert_eq!(
file_cfg
.promisify_aliases
.get("run")
.map(|a| a.wrapped.as_str()),
Some("foo")
);
}
#[test]
fn js_promisify_labels_carry_to_alias_call() {
let src = b"const runAsync = util.promisify(child_process.exec);\n\
function f(userCmd) { runAsync(userCmd); }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
assert!(file_cfg.promisify_aliases.contains_key("runAsync"));
let any_runasync_sink = file_cfg.bodies.iter().any(|b| {
b.graph.node_weights().any(|n| {
n.call.callee.as_deref() == Some("runAsync")
&& n.taint.labels.iter().any(|lbl| {
matches!(
lbl,
crate::labels::DataLabel::Sink(c)
if c.intersects(crate::labels::Cap::SHELL_ESCAPE)
)
})
})
});
assert!(
any_runasync_sink,
"runAsync call site should inherit child_process.exec's SHELL_ESCAPE sink"
);
}
#[test]
fn js_promisify_ignored_for_non_js_langs() {
let src = b"const x = util.promisify(exec)";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "python", ts_lang);
assert!(file_cfg.promisify_aliases.is_empty());
}
#[test]
fn js_promisify_non_call_value_ignored() {
let src = b"const execAsync = child_process.exec;";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
assert!(file_cfg.promisify_aliases.is_empty());
}
#[test]
fn sql_placeholder_detection() {
assert!(has_sql_placeholders("SELECT * FROM users WHERE id = $1"));
assert!(has_sql_placeholders("SELECT * FROM users WHERE id = ?"));
assert!(has_sql_placeholders("SELECT * FROM users WHERE id = %s"));
assert!(has_sql_placeholders("INSERT INTO t (a, b) VALUES ($1, $2)"));
assert!(has_sql_placeholders("SELECT * FROM t WHERE x = :name"));
assert!(has_sql_placeholders("WHERE id = ? AND name = ?"));
assert!(!has_sql_placeholders("SELECT * FROM users"));
assert!(!has_sql_placeholders("SELECT * FROM users WHERE id = 1"));
assert!(!has_sql_placeholders("SELECT $dollar FROM t")); assert!(!has_sql_placeholders("SELECT * FROM t WHERE x = $0")); assert!(!has_sql_placeholders("ratio = 50%")); }
#[test]
fn c_function_extracts_param_names() {
let src = b"void handle_command(int cmd, char *arg) { }";
let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "c", ts_lang);
let params: Vec<_> = file_cfg
.summaries
.values()
.flat_map(|s| s.param_names.iter().cloned())
.collect();
assert!(
params.contains(&"cmd".to_string()),
"expected 'cmd' in params, got: {:?}",
params
);
assert!(
params.contains(&"arg".to_string()),
"expected 'arg' in params, got: {:?}",
params
);
}
#[test]
fn cpp_function_extracts_param_names() {
let src = b"void process(int x, std::string name) { }";
let ts_lang = Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "cpp", ts_lang);
let params: Vec<_> = file_cfg
.summaries
.values()
.flat_map(|s| s.param_names.iter().cloned())
.collect();
assert!(
params.contains(&"x".to_string()),
"expected 'x' in params, got: {:?}",
params
);
assert!(
params.contains(&"name".to_string()),
"expected 'name' in params, got: {:?}",
params
);
}
#[test]
fn local_summary_callees_carry_arity_and_receiver() {
let src = br"
function outer(x, y) {
helper(x, y);
obj.method(x);
}
";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let summaries = &file_cfg.summaries;
let (_key, outer) = summaries
.iter()
.find(|(k, _)| k.name == "outer")
.expect("outer summary should exist");
let helper_site = outer
.callees
.iter()
.find(|c| c.name == "helper")
.expect("helper call should be recorded with structured metadata");
assert_eq!(
helper_site.arity,
Some(2),
"helper has 2 positional args at the call site"
);
assert_eq!(
helper_site.receiver, None,
"helper is not a method call — no receiver"
);
let method_site = outer
.callees
.iter()
.find(|c| c.name.ends_with("method"))
.expect("method call should be recorded");
assert_eq!(method_site.arity, Some(1), "method has 1 positional arg");
assert_eq!(
method_site.receiver.as_deref(),
Some("obj"),
"js CallFn over member_expression should populate structured receiver"
);
assert_eq!(
method_site.qualifier, None,
"qualifier is suppressed once receiver is populated"
);
}
#[test]
fn local_summary_callees_js_method_receiver() {
let src = br"
function outer(obj, x) {
obj.method(x);
}
";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let (_key, outer) = file_cfg
.summaries
.iter()
.find(|(k, _)| k.name == "outer")
.expect("js outer summary should exist");
let method_site = outer
.callees
.iter()
.find(|c| c.name.ends_with("method"))
.expect("js method call should be recorded");
assert_eq!(method_site.arity, Some(1));
assert_eq!(
method_site.receiver.as_deref(),
Some("obj"),
"js CallFn over member_expression should populate structured receiver"
);
}
#[test]
fn local_summary_callees_python_method_receiver() {
let src = b"
def outer(obj, x):
obj.method(x)
";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "python", ts_lang);
let (_key, outer) = file_cfg
.summaries
.iter()
.find(|(k, _)| k.name == "outer")
.expect("python outer summary should exist");
let method_site = outer
.callees
.iter()
.find(|c| c.name.ends_with("method"))
.expect("python method call should be recorded");
assert_eq!(method_site.arity, Some(1));
assert_eq!(
method_site.receiver.as_deref(),
Some("obj"),
"python CallFn over attribute should populate structured receiver"
);
}
#[test]
fn local_summary_callees_java_method_receiver() {
let src = br"
class Outer {
void outer(Bar obj, int x) {
obj.method(x);
}
}
";
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "java", ts_lang);
let (_key, outer) = file_cfg
.summaries
.iter()
.find(|(k, _)| k.name == "outer")
.expect("java outer summary should exist");
let method_site = outer
.callees
.iter()
.find(|c| c.name.ends_with("method"))
.expect("java method call should be recorded");
assert_eq!(method_site.arity, Some(1));
assert_eq!(
method_site.receiver.as_deref(),
Some("obj"),
"java CallMethod should populate the structured receiver field"
);
}
#[test]
fn call_node_kwargs_populated_for_python() {
let src = b"
def outer(cmd):
subprocess.run(cmd, shell=True, check=False)
";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
let call_node = cfg
.node_weights()
.find(|n| {
n.kind == StmtKind::Call && n.call.callee.as_deref().is_some_and(|c| c.ends_with("run"))
})
.expect("subprocess.run call node should exist");
assert_eq!(
call_node.call.arg_uses.len(),
1,
"arg_uses should be [cmd] — receiver is separate, kwargs are not positional"
);
assert_eq!(call_node.call.arg_uses[0], vec!["cmd".to_string()]);
assert_eq!(call_node.call.receiver.as_deref(), Some("subprocess"));
let kwargs = &call_node.call.kwargs;
assert_eq!(kwargs.len(), 2, "two keyword arguments expected");
assert_eq!(kwargs[0].0, "shell");
assert_eq!(kwargs[1].0, "check");
}
#[test]
fn call_node_kwargs_empty_for_javascript() {
let src = br"
function outer(cmd) {
child_process.exec(cmd, { shell: true });
}
";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let call_node = cfg
.node_weights()
.find(|n| {
n.kind == StmtKind::Call
&& n.call
.callee
.as_deref()
.is_some_and(|c| c.ends_with("exec"))
})
.expect("child_process.exec call node should exist");
assert!(
call_node.call.kwargs.is_empty(),
"JS object-literal arg is not a keyword_argument — kwargs should stay empty"
);
}
#[test]
fn local_summary_callees_have_distinct_ordinals() {
let src = br"
function outer() {
a();
a();
b();
}
";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let (_key, outer) = file_cfg
.summaries
.iter()
.find(|(k, _)| k.name == "outer")
.unwrap();
let a_sites: Vec<_> = outer.callees.iter().filter(|c| c.name == "a").collect();
assert_eq!(
a_sites.len(),
2,
"two a() calls should produce two entries with distinct ordinals, got: {:?}",
a_sites
);
let ord0 = a_sites[0].ordinal;
let ord1 = a_sites[1].ordinal;
assert_ne!(ord0, ord1, "ordinals must differ across sites");
}
fn js_body_names(src: &[u8]) -> Vec<String> {
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
file_cfg
.bodies
.iter()
.filter_map(|b| b.meta.func_key.as_ref().map(|k| k.name.clone()))
.collect()
}
fn js_body_kinds(src: &[u8]) -> Vec<BodyKind> {
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
file_cfg.bodies.iter().map(|b| b.meta.kind).collect()
}
#[test]
fn anon_fn_named_from_var_declarator_js() {
let src = b"var handler = function(x) { child_process.exec(x); };";
let names = js_body_names(src);
assert!(
names.iter().any(|n| n == "handler"),
"expected body named `handler` from var declarator, got: {:?}",
names
);
}
#[test]
fn anon_arrow_named_from_const_declarator_js() {
let src = b"const run = (x) => { eval(x); };";
let names = js_body_names(src);
assert!(
names.iter().any(|n| n == "run"),
"expected body named `run` from const arrow declarator, got: {:?}",
names
);
}
#[test]
fn anon_fn_named_from_member_assignment_js() {
let src = b"this.run = function(x) { eval(x); };";
let names = js_body_names(src);
assert!(
names.iter().any(|n| n == "run"),
"expected body named `run` from member assignment, got: {:?}",
names
);
}
#[test]
fn anon_fn_passed_as_arg_stays_anonymous_js() {
let src = b"apply(function(x) { eval(x); });";
let names = js_body_names(src);
let kinds = js_body_kinds(src);
assert!(
kinds.contains(&BodyKind::AnonymousFunction),
"expected at least one AnonymousFunction body, got: {:?}",
kinds
);
assert!(
names.iter().any(|n| is_anon_fn_name(n)),
"expected synthetic anon name on FuncKey for call-argument fn literal, got: {:?}",
names
);
assert!(
!names.iter().any(|n| n == "apply"),
"must not leak callee name onto its argument function, got: {:?}",
names
);
}
#[test]
fn named_fn_declaration_unchanged_js() {
let src = b"function real_name(x) { eval(x); }";
let names = js_body_names(src);
assert!(
names.iter().any(|n| n == "real_name"),
"named declaration must retain its name, got: {:?}",
names
);
}
#[test]
fn anon_fn_named_from_short_var_decl_go() {
let src = b"package main\nfunc main() { run := func(x string) { exec(x) }; run(\"hi\") }";
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "go", ts_lang);
let names: Vec<String> = file_cfg
.bodies
.iter()
.filter_map(|b| b.meta.func_key.as_ref().map(|k| k.name.clone()))
.collect();
assert!(
names.iter().any(|n| n == "run"),
"expected func literal body keyed as `run` via Go short-var decl, got: {:?}",
names
);
}
#[test]
fn iife_callee_resolves_to_anon_body_js() {
let src = b"(function(arg){ eval(arg); })(q);";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let top = &file_cfg.bodies[0];
let callee_names: Vec<String> = top
.graph
.node_indices()
.filter_map(|i| top.graph[i].call.callee.clone())
.collect();
assert!(
callee_names.iter().any(|c| is_anon_fn_name(c)),
"IIFE call site should record synthetic anon callee, got: {:?}",
callee_names
);
}
fn rust_body_sanitizer_caps(src: &[u8]) -> Vec<Cap> {
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
cfg.node_indices()
.flat_map(|i| cfg[i].taint.labels.clone())
.filter_map(|l| match l {
DataLabel::Sanitizer(c) => Some(c),
_ => None,
})
.collect()
}
#[test]
fn replace_chain_strips_file_io_for_path_traversal_literals() {
let src = br#"
fn sanitize_input(s: &str) -> String {
s.replace("..", "").replace("/", "_")
}
"#;
let caps = rust_body_sanitizer_caps(src);
assert!(
caps.iter().any(|c| c.contains(Cap::FILE_IO)),
"Expected a Sanitizer(FILE_IO) on the replace chain; got {:?}",
caps
);
}
#[test]
fn replace_chain_strips_html_escape_for_angle_brackets() {
let src = br#"
fn strip_tags(s: &str) -> String {
s.replace("<", "").replace(">", "")
}
"#;
let caps = rust_body_sanitizer_caps(src);
assert!(
caps.iter().any(|c| c.contains(Cap::HTML_ESCAPE)),
"Expected a Sanitizer(HTML_ESCAPE) on angle-bracket strip; got {:?}",
caps
);
assert!(
!caps.iter().any(|c| c.contains(Cap::FILE_IO)),
"Angle-bracket strip should NOT earn FILE_IO credit; got {:?}",
caps
);
}
#[test]
fn replace_chain_rejects_unrecognised_literals() {
let src = br#"
fn rewrite(s: &str) -> String {
s.replace("foo", "bar").replace("baz", "qux")
}
"#;
let caps = rust_body_sanitizer_caps(src);
assert!(
caps.is_empty(),
"Generic replace chain should not earn sanitizer credit; got {:?}",
caps
);
}
#[test]
fn replace_chain_rejects_when_replacement_reintroduces_pattern() {
let src = br#"
fn evil(s: &str) -> String {
s.replace("x", "..")
}
"#;
let caps = rust_body_sanitizer_caps(src);
assert!(
caps.is_empty(),
"Replacement reintroducing dangerous pattern must kill credit; got {:?}",
caps
);
}
#[test]
fn replace_chain_rejects_dynamic_arg() {
let src = br#"
fn dynamic(s: &str, needle: &str) -> String {
s.replace(needle, "")
}
"#;
let caps = rust_body_sanitizer_caps(src);
assert!(
caps.is_empty(),
"Dynamic replace arg must not earn credit; got {:?}",
caps
);
}
#[test]
fn replace_chain_rejects_non_identifier_base() {
let src = br#"
fn base_is_call() -> String {
get_s().replace("..", "")
}
"#;
let caps = rust_body_sanitizer_caps(src);
assert!(
caps.is_empty(),
"Non-identifier chain base must not earn credit; got {:?}",
caps
);
}
fn find_node_defining<'a>(cfg: &'a Cfg, var: &str) -> Option<&'a NodeInfo> {
cfg.node_indices()
.map(|i| &cfg[i])
.find(|n| n.taint.defines.as_deref() == Some(var))
}
#[test]
fn numeric_length_access_detected_on_js_property_read() {
let src = br#"function f(items) {
var count = items.length;
return count;
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let node = find_node_defining(&cfg, "count").expect("defines count");
assert!(
node.is_numeric_length_access,
"Expected is_numeric_length_access=true for `count = items.length`"
);
}
#[test]
fn numeric_length_access_detected_on_js_zero_arg_method_call() {
let src = br#"function f(list) {
var n = list.size();
return n;
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let node = find_node_defining(&cfg, "n").expect("defines n");
assert!(
node.is_numeric_length_access,
"Expected is_numeric_length_access=true for `n = list.size()`"
);
}
#[test]
fn numeric_length_access_ignores_unrelated_properties() {
let src = br#"function f(arr) {
var v = arr.foo;
return v;
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let node = find_node_defining(&cfg, "v").expect("defines v");
assert!(
!node.is_numeric_length_access,
"is_numeric_length_access must stay false for unrelated property `arr.foo`"
);
}
#[test]
fn numeric_length_access_ignores_method_calls_with_args() {
let src = br#"function f(s) {
var r = s.indexOf('x');
return r;
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let node = find_node_defining(&cfg, "r").expect("defines r");
assert!(
!node.is_numeric_length_access,
"is_numeric_length_access must stay false for arg-bearing calls"
);
}
use std::sync::Mutex;
static POINTER_ENV_GUARD: Mutex<()> = Mutex::new(());
fn with_pointer_env<R>(value: Option<&str>, f: impl FnOnce() -> R) -> R {
let _lock = POINTER_ENV_GUARD.lock().unwrap_or_else(|e| e.into_inner());
let prev = std::env::var("NYX_POINTER_ANALYSIS").ok();
unsafe {
match value {
Some(v) => std::env::set_var("NYX_POINTER_ANALYSIS", v),
None => std::env::remove_var("NYX_POINTER_ANALYSIS"),
}
}
let r = f();
unsafe {
match prev {
Some(v) => std::env::set_var("NYX_POINTER_ANALYSIS", v),
None => std::env::remove_var("NYX_POINTER_ANALYSIS"),
}
}
r
}
fn with_pointer_on<R>(f: impl FnOnce() -> R) -> R {
with_pointer_env(Some("1"), f)
}
fn count_nodes_with_callee(cfg: &Cfg, callee: &str) -> usize {
cfg.node_indices()
.filter(|i| cfg[*i].call.callee.as_deref() == Some(callee))
.count()
}
fn find_node_with_callee<'a>(cfg: &'a Cfg, callee: &str) -> Option<&'a NodeInfo> {
cfg.node_indices()
.map(|i| &cfg[i])
.find(|n| n.call.callee.as_deref() == Some(callee))
}
#[test]
fn js_subscript_read_lowers_to_index_get_call() {
with_pointer_on(|| {
let src = br#"function f(arr) {
sink(arr[0]);
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let node = find_node_with_callee(&cfg, "__index_get__")
.expect("__index_get__ node should be present");
assert_eq!(node.call.receiver.as_deref(), Some("arr"));
assert_eq!(node.call.arg_uses.len(), 1, "expect one arg group (index)");
assert_eq!(node.call.arg_uses[0], vec!["0"]);
assert!(
node.taint
.defines
.as_deref()
.is_some_and(|d| d.starts_with("__nyx_idxget_")),
"synth defines should use the __nyx_idxget_ prefix"
);
});
}
#[test]
fn js_subscript_write_lowers_to_index_set_call() {
with_pointer_on(|| {
let src = br#"function f(arr, v) {
arr[0] = v;
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let node = find_node_with_callee(&cfg, "__index_set__")
.expect("__index_set__ node should be present");
assert_eq!(node.call.receiver.as_deref(), Some("arr"));
assert_eq!(
node.call.arg_uses.len(),
2,
"expect arg_uses [[idx], [val]]"
);
assert_eq!(node.call.arg_uses[0], vec!["0"]);
assert_eq!(node.call.arg_uses[1], vec!["v"]);
});
}
#[test]
fn py_subscript_read_lowers_to_index_get_call() {
with_pointer_on(|| {
let src = br#"def f(arr):
sink(arr[0])
"#;
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
let node = find_node_with_callee(&cfg, "__index_get__")
.expect("python: __index_get__ node should be present");
assert_eq!(node.call.receiver.as_deref(), Some("arr"));
});
}
#[test]
fn py_subscript_write_lowers_to_index_set_call() {
with_pointer_on(|| {
let src = br#"def f(arr, v):
arr[0] = v
"#;
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
let node = find_node_with_callee(&cfg, "__index_set__")
.expect("python: __index_set__ node should be present");
assert_eq!(node.call.receiver.as_deref(), Some("arr"));
assert_eq!(node.call.arg_uses.len(), 2);
assert_eq!(node.call.arg_uses[1], vec!["v"]);
});
}
#[test]
fn go_index_expr_read_lowers_to_index_get_call() {
with_pointer_on(|| {
let src = br#"package main
func f(arr []string) {
sink(arr[0])
}
"#;
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "go", ts_lang);
let node = find_node_with_callee(&cfg, "__index_get__")
.expect("go: __index_get__ node should be present");
assert_eq!(node.call.receiver.as_deref(), Some("arr"));
});
}
#[test]
fn go_index_expr_write_lowers_to_index_set_call() {
with_pointer_on(|| {
let src = br#"package main
func f(m map[string]int, k string, v int) {
m[k] = v
}
"#;
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "go", ts_lang);
let node = find_node_with_callee(&cfg, "__index_set__")
.expect("go: __index_set__ node should be present");
assert_eq!(node.call.receiver.as_deref(), Some("m"));
assert_eq!(node.call.arg_uses.len(), 2);
assert_eq!(node.call.arg_uses[0], vec!["k"]);
assert_eq!(node.call.arg_uses[1], vec!["v"]);
});
}
#[test]
fn pointer_disabled_skips_subscript_synthesis() {
with_pointer_env(Some("0"), || {
let src = br#"function f(arr, v) {
sink(arr[0]);
arr[1] = v;
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
assert_eq!(count_nodes_with_callee(&cfg, "__index_get__"), 0);
assert_eq!(count_nodes_with_callee(&cfg, "__index_set__"), 0);
});
}
#[test]
fn js_switch_cascade_has_one_if_per_case() {
let src = br#"function f(x) {
switch (x) {
case 1: a(); break;
case 2: b(); break;
default: c();
}
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
assert_eq!(
if_nodes(&cfg).len(),
2,
"switch with 2 explicit cases + default should emit 2 dispatch If nodes"
);
for i in if_nodes(&cfg) {
let trues = cfg
.edges(i)
.filter(|e| matches!(e.weight(), EdgeKind::True))
.count();
let falses = cfg
.edges(i)
.filter(|e| matches!(e.weight(), EdgeKind::False))
.count();
assert!(
trues >= 1,
"case dispatch should have at least one True edge"
);
assert!(
falses >= 1,
"case dispatch should have at least one False edge"
);
}
}
#[test]
fn js_switch_default_in_middle_reorders_to_tail() {
let src = br#"function f(x) {
switch (x) {
case 1: a(); break;
default: c(); break;
case 2: b(); break;
}
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
assert_eq!(
if_nodes(&cfg).len(),
2,
"default-in-middle should still produce one If per non-default case"
);
}
#[test]
fn js_switch_fallthrough_no_break() {
use std::collections::HashSet;
let src = br#"function f(x) {
switch (x) {
case 1: first();
case 2: second(); break;
}
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let first = cfg
.node_indices()
.find(|&n| cfg[n].call.callee.as_deref() == Some("first"))
.expect("expected a Call node for `first`");
let second = cfg
.node_indices()
.find(|&n| cfg[n].call.callee.as_deref() == Some("second"))
.expect("expected a Call node for `second`");
let mut seen: HashSet<NodeIndex> = HashSet::new();
let mut stack = vec![first];
while let Some(n) = stack.pop() {
if !seen.insert(n) {
continue;
}
for e in cfg.edges(n) {
if matches!(e.weight(), EdgeKind::Seq | EdgeKind::True | EdgeKind::False) {
stack.push(e.target());
}
}
}
assert!(
seen.contains(&second),
"fall-through: `second` must be reachable from `first` over forward edges"
);
let first_seq_outs = cfg
.edges(first)
.filter(|e| matches!(e.weight(), EdgeKind::Seq))
.count();
assert!(
first_seq_outs >= 1,
"fall-through: `first()` must have a Seq out-edge (the fall-through wire)"
);
}
#[test]
fn js_for_loop_has_back_edge() {
let src = br#"function f() { for (let i = 0; i < 10; i++) { body(); } }"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let loop_nodes: Vec<_> = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Loop)
.collect();
assert_eq!(loop_nodes.len(), 1, "expected exactly one Loop node");
let back_edges = cfg
.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Back))
.count();
assert!(
back_edges >= 1,
"for loop must have at least one Back edge to its header"
);
}
#[test]
fn js_do_while_has_loop_node_and_back_edge() {
let src = br#"function f() { do { body(); } while (cond); }"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let loop_count = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Loop)
.count();
assert_eq!(loop_count, 1, "do-while should produce one Loop node");
assert!(
cfg.edge_references()
.any(|e| matches!(e.weight(), EdgeKind::Back)),
"do-while must have at least one Back edge"
);
}
#[test]
fn js_nested_while_break_targets_inner_loop() {
let src = br#"function f() {
while (a) {
while (b) { break; }
inner_after();
}
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let loops: Vec<_> = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Loop)
.collect();
assert_eq!(loops.len(), 2, "expected two Loop nodes");
let breaks: Vec<_> = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Break)
.collect();
assert_eq!(breaks.len(), 1, "expected exactly one Break node");
let outer_header = loops[0];
let brk = breaks[0];
let crosses_outer = cfg
.edges(brk)
.any(|e| e.target() == outer_header && matches!(e.weight(), EdgeKind::Back));
assert!(
!crosses_outer,
"inner break must not back-edge onto the outer loop header"
);
}
#[test]
fn js_nested_while_continue_targets_inner_loop() {
let src = br#"function f() {
while (a) {
while (b) { continue; }
}
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let loops: Vec<_> = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Loop)
.collect();
assert_eq!(loops.len(), 2, "expected two Loop nodes");
let outer_header = loops[0];
let inner_header = loops[1];
let cont = cfg
.node_indices()
.find(|&n| cfg[n].kind == StmtKind::Continue)
.expect("expected Continue node");
let back_edges_from_cont: Vec<_> = cfg
.edges(cont)
.filter(|e| matches!(e.weight(), EdgeKind::Back))
.collect();
assert!(
!back_edges_from_cont.is_empty(),
"continue must originate at least one Back edge"
);
assert!(
back_edges_from_cont
.iter()
.any(|e| e.target() == inner_header),
"continue's Back edge must target the inner loop header"
);
assert!(
!back_edges_from_cont
.iter()
.any(|e| e.target() == outer_header),
"continue must not back-edge onto the outer loop header"
);
}
#[test]
fn js_throw_inside_catch_emits_throw_node() {
let src = br#"function f() {
try {
try { foo(); } catch (e) { throw e; }
} catch (e2) {
handle();
}
}"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let throws: Vec<_> = cfg
.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Throw)
.collect();
assert_eq!(
throws.len(),
1,
"expected exactly one Throw node for the inner re-throw"
);
let handle = cfg
.node_indices()
.find(|&n| cfg[n].call.callee.as_deref() == Some("handle"))
.expect("expected `handle()` call node");
let in_edges = cfg
.edges_directed(handle, petgraph::Direction::Incoming)
.count();
assert!(in_edges >= 1, "outer catch body must be reachable");
}
#[test]
fn js_if_with_empty_branches_does_not_panic() {
let src = b"function f() { if (a) {} else {} return; }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
let ifs = if_nodes(&cfg);
assert_eq!(ifs.len(), 1, "expected one If node");
let i = ifs[0];
let trues: Vec<_> = cfg
.edges(i)
.filter(|e| matches!(e.weight(), EdgeKind::True))
.collect();
let falses: Vec<_> = cfg
.edges(i)
.filter(|e| matches!(e.weight(), EdgeKind::False))
.collect();
assert!(!trues.is_empty(), "empty-then If must still emit True edge");
assert!(
!falses.is_empty(),
"empty-else If must still emit False edge"
);
}
#[test]
fn js_empty_function_body_well_formed() {
let src = b"function f() {}";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
assert!(
file_cfg.bodies.len() >= 2,
"expected at least 2 bodies (top-level + function)"
);
for body in &file_cfg.bodies {
assert!(
body.graph.node_count() >= 1,
"every body must have at least one node"
);
}
}
fn loop_headers(cfg: &Cfg) -> Vec<NodeIndex> {
cfg.node_indices()
.filter(|&n| cfg[n].kind == StmtKind::Loop)
.collect()
}
fn back_edges(cfg: &Cfg) -> Vec<(NodeIndex, NodeIndex)> {
cfg.edge_references()
.filter(|e| matches!(e.weight(), EdgeKind::Back))
.map(|e| (e.source(), e.target()))
.collect()
}
fn assert_loop_with_back_edge(cfg: &Cfg, label: &str) {
let headers = loop_headers(cfg);
assert!(
!headers.is_empty(),
"{label}: expected at least one Loop header, found none"
);
let backs = back_edges(cfg);
assert!(
!backs.is_empty(),
"{label}: expected at least one Back edge"
);
for (_, dst) in &backs {
assert!(
headers.contains(dst),
"{label}: Back edge target {:?} is not a Loop header (headers={:?})",
dst,
headers
);
}
}
#[test]
fn js_for_loop_back_edge() {
let src = b"function f() { for (let i = 0; i < 10; i++) { body(i); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _) = parse_and_build(src, "javascript", ts_lang);
assert_loop_with_back_edge(&cfg, "js classic for");
}
#[test]
fn js_do_while_back_edge() {
let src = b"function f() { do { body(); } while (cond()); }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _) = parse_and_build(src, "javascript", ts_lang);
assert_loop_with_back_edge(&cfg, "js do-while");
}
#[test]
fn js_for_in_back_edge() {
let src = b"function f() { for (let k in obj) { use(k); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _) = parse_and_build(src, "javascript", ts_lang);
assert_loop_with_back_edge(&cfg, "js for-in");
}
#[test]
fn js_for_of_back_edge() {
let src = b"function f() { for (const x of items) { use(x); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _) = parse_and_build(src, "javascript", ts_lang);
assert_loop_with_back_edge(&cfg, "js for-of");
}
#[test]
fn python_for_loop_back_edge() {
let src = b"def f():\n for x in items:\n use(x)\n";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let (cfg, _) = parse_and_build(src, "python", ts_lang);
assert_loop_with_back_edge(&cfg, "python for");
}
#[test]
fn python_while_loop_back_edge() {
let src = b"def f():\n while cond():\n use(x)\n";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let (cfg, _) = parse_and_build(src, "python", ts_lang);
assert_loop_with_back_edge(&cfg, "python while");
}
#[test]
fn java_enhanced_for_back_edge() {
let src = b"class A { void f(int[] xs) { for (int x : xs) { use(x); } } }";
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let (cfg, _) = parse_and_build(src, "java", ts_lang);
assert_loop_with_back_edge(&cfg, "java enhanced-for");
}
#[test]
fn java_do_while_back_edge() {
let src = b"class A { void f() { do { body(); } while (cond()); } }";
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let (cfg, _) = parse_and_build(src, "java", ts_lang);
assert_loop_with_back_edge(&cfg, "java do-while");
}
#[test]
fn cpp_range_for_back_edge() {
let src = b"void f(int* xs) { for (int x : range) { use(x); } }";
let ts_lang = Language::from(tree_sitter_cpp::LANGUAGE);
let (cfg, _) = parse_and_build(src, "cpp", ts_lang);
assert_loop_with_back_edge(&cfg, "cpp range-for");
}
#[test]
fn c_do_while_back_edge() {
let src = b"void f() { do { body(); } while (cond()); }";
let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
let (cfg, _) = parse_and_build(src, "c", ts_lang);
assert_loop_with_back_edge(&cfg, "c do-while");
}
#[test]
fn go_for_loop_back_edge() {
let src = b"package p\nfunc f() { for i := 0; i < 10; i++ { body(i) } }";
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
let (cfg, _) = parse_and_build(src, "go", ts_lang);
assert_loop_with_back_edge(&cfg, "go for");
}
#[test]
fn go_for_range_loop_binding_is_defined() {
let src = b"package p\nfunc f(xs []string) { for _, p := range xs { use(p) } }";
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
let (cfg, _) = parse_and_build(src, "go", ts_lang);
let loop_node = cfg
.node_indices()
.find(|&n| matches!(cfg[n].kind, StmtKind::Loop))
.expect("for-range loop should produce a Loop header");
let info = &cfg[loop_node];
let all_defs: Vec<&str> = info
.taint
.defines
.iter()
.map(String::as_str)
.chain(info.taint.extra_defines.iter().map(String::as_str))
.collect();
assert!(
all_defs.contains(&"p"),
"loop binding `p` should appear in defines/extra_defines, got {:?}",
all_defs
);
assert!(
info.taint.uses.iter().any(|u| u == "xs"),
"iterable `xs` should appear in uses, got {:?}",
info.taint.uses
);
}
#[test]
fn ruby_while_back_edge() {
let src = b"def f\n while cond\n body\n end\nend\n";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _) = parse_and_build(src, "ruby", ts_lang);
assert_loop_with_back_edge(&cfg, "ruby while");
}
#[test]
fn ruby_until_back_edge() {
let src = b"def f\n until done\n body\n end\nend\n";
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
let (cfg, _) = parse_and_build(src, "ruby", ts_lang);
assert_loop_with_back_edge(&cfg, "ruby until");
}
#[test]
fn php_foreach_back_edge() {
let src = b"<?php function f($items) { foreach ($items as $x) { use($x); } }";
let ts_lang = Language::from(tree_sitter_php::LANGUAGE_PHP);
let (cfg, _) = parse_and_build(src, "php", ts_lang);
assert_loop_with_back_edge(&cfg, "php foreach");
}
#[test]
fn rust_for_loop_back_edge() {
let src = b"fn f() { for x in 0..10 { use_fn(x); } }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _) = parse_and_build(src, "rust", ts_lang);
assert_loop_with_back_edge(&cfg, "rust for");
}
#[test]
fn rust_while_loop_back_edge() {
let src = b"fn f() { while cond() { body(); } }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _) = parse_and_build(src, "rust", ts_lang);
assert_loop_with_back_edge(&cfg, "rust while");
}
#[test]
fn nested_loops_two_headers_two_back_edges() {
let src = b"function f() { for (let i = 0; i < 10; i++) { for (let j = 0; j < 10; j++) { use(i, j); } } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _) = parse_and_build(src, "javascript", ts_lang);
let headers = loop_headers(&cfg);
assert_eq!(headers.len(), 2, "expected 2 loop headers in nested loops");
let backs = back_edges(&cfg);
assert!(
backs.len() >= 2,
"expected ≥2 back edges in nested loops, got {}",
backs.len()
);
for (_, dst) in &backs {
assert!(headers.contains(dst), "back edge target not a loop header");
}
let mut hit = std::collections::HashSet::new();
for (_, dst) in &backs {
hit.insert(*dst);
}
assert_eq!(
hit.len(),
2,
"each header must receive at least one back edge"
);
}
#[test]
fn loop_with_break_no_back_edge_from_break() {
let src = b"function f() { while (cond()) { if (done()) break; body(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _) = parse_and_build(src, "javascript", ts_lang);
let headers = loop_headers(&cfg);
assert_eq!(headers.len(), 1, "expected 1 loop header");
let header = headers[0];
for n in cfg.node_indices() {
if cfg[n].kind != StmtKind::Break {
continue;
}
for e in cfg.edges(n) {
assert!(
!(matches!(e.weight(), EdgeKind::Back) && e.target() == header),
"break must not produce a back edge to the loop header"
);
}
}
}
#[test]
fn loop_with_continue_back_edge_to_header() {
let src = b"function f() { while (cond()) { if (skip()) continue; body(); } }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _) = parse_and_build(src, "javascript", ts_lang);
let headers = loop_headers(&cfg);
assert_eq!(headers.len(), 1);
let header = headers[0];
let mut found = false;
for n in cfg.node_indices() {
if cfg[n].kind != StmtKind::Continue {
continue;
}
for e in cfg.edges(n) {
if matches!(e.weight(), EdgeKind::Back) && e.target() == header {
found = true;
}
}
}
assert!(
found,
"expected at least one Back edge from a Continue node to the loop header"
);
}
#[test]
fn chained_method_call_rebinds_to_inner_gated_sink() {
let src = b"function f(uri) { https.get(uri, r => {}).on('error', e => {}); }";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let (cfg, _) = parse_and_build(src, "javascript", ts_lang);
let mut found = false;
for n in cfg.node_indices() {
let info = &cfg[n];
if info.kind != StmtKind::Call {
continue;
}
let Some(callee) = info.call.callee.as_deref() else {
continue;
};
if callee.ends_with("https.get") {
assert!(
info.call.sink_payload_args.is_some(),
"expected sink_payload_args to be populated for chained \
inner-gate https.get; got None on call node with callee {callee:?}"
);
found = true;
break;
}
}
assert!(
found,
"expected at least one Call node whose callee was rebound from \
the outer `.on(...)` to the inner `https.get` after the chained- \
call inner-gate rebinding fired"
);
}