use super::*;
use crate::cfg::FileCfg;
use crate::interop::InteropEdge;
use crate::labels::Cap;
use crate::symbol::FuncKey;
fn ssa_analyse_rust(src: &[u8]) -> Vec<Finding> {
use crate::cfg::build_cfg;
use crate::state::symbol::SymbolInterner;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src, None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let body = if file_cfg.bodies.len() > 1 {
&file_cfg.bodies[1]
} else {
file_cfg.first_body()
};
let cfg = &body.graph;
let entry = body.entry;
let summaries = &file_cfg.summaries;
let interner = SymbolInterner::from_cfg(cfg);
let ssa =
crate::ssa::lower_to_ssa(cfg, entry, None, true).expect("SSA lowering should succeed");
let transfer = ssa_transfer::SsaTaintTransfer {
lang: Lang::Rust,
namespace: "test.rs",
interner: &interner,
local_summaries: summaries,
global_summaries: None,
interop_edges: &[],
owner_body_id: crate::cfg::BodyId(0),
parent_body_id: None,
global_seed: None,
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
callee_bodies: None,
inline_cache: None,
context_depth: 0,
callback_bindings: None,
points_to: None,
dynamic_pts: None,
import_bindings: None,
promisify_aliases: None,
module_aliases: None,
static_map: None,
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
};
let events = ssa_transfer::run_ssa_taint(&ssa, cfg, &transfer);
let mut findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, cfg);
findings.sort_by_key(|f| (f.sink.index(), f.source.index()));
findings.dedup_by_key(|f| (f.sink, f.source));
findings
}
#[test]
fn ssa_linear_source_to_sink() {
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("DANGEROUS_ARG").unwrap();
Command::new("sh").arg(x).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert_eq!(
findings.len(),
1,
"SSA: linear source→sink should produce 1 finding"
);
}
#[test]
fn ssa_linear_sanitized_no_finding() {
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let clean = shell_escape::unix::escape(&x);
Command::new("sh").arg(clean).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert!(
findings.is_empty(),
"SSA: matching sanitizer should eliminate finding"
);
}
#[test]
fn ssa_reassignment_kills_taint() {
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let x = "safe_constant";
Command::new("sh").arg(x).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert!(
findings.is_empty(),
"SSA: reassignment to constant should kill taint"
);
}
#[test]
fn ssa_taint_through_branch_merge() {
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("DANGEROUS").unwrap();
let safe = html_escape::encode_safe(&x);
if x.len() > 5 {
Command::new("sh").arg(&x).status().unwrap();
} else {
Command::new("sh").arg(&safe).status().unwrap();
}
}"#;
let findings = ssa_analyse_rust(src);
assert!(
!findings.is_empty(),
"SSA: taint through branch should produce at least 1 finding"
);
}
#[test]
fn ssa_taint_through_loop() {
let src = br#"
use std::{env, process::Command};
fn main() {
let mut x = env::var("DANGEROUS").unwrap();
while x.len() < 100 {
x.push_str("a");
}
Command::new("sh").arg(x).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert_eq!(
findings.len(),
1,
"SSA: taint through loop should produce 1 finding"
);
}
#[test]
fn ssa_multi_variable_independence() {
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("TAINTED").unwrap();
let y = "safe";
Command::new("sh").arg(y).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert!(
findings.is_empty(),
"SSA: untainted variable at sink should produce no finding"
);
}
#[test]
fn env_to_arg_is_flagged() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("DANGEROUS_ARG").unwrap();
Command::new("sh").arg(x).status().unwrap();
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 1); }
#[test]
fn taint_through_if_else() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("DANGEROUS").unwrap();
let safe = html_escape::encode_safe(&x);
if x.len() > 5 {
Command::new("sh").arg(&x).status().unwrap(); // UNSAFE
} else {
Command::new("sh").arg(&safe).status().unwrap(); // SAFE
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 2);
}
#[test]
fn taint_through_while_loop() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::{env, process::Command};
fn main() {
let mut x = env::var("DANGEROUS").unwrap();
while x.len() < 100 { // Loop header (Loop)
x.push_str("a");
}
Command::new("sh").arg(x).status().unwrap(); // Should be flagged
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 1);
}
#[test]
fn taint_killed_by_matching_sanitizer() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let clean = shell_escape::unix::escape(&x);
Command::new("sh").arg(clean).status().unwrap();
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(
findings.is_empty(),
"matching sanitizer should kill the taint"
);
}
#[test]
fn wrong_sanitizer_preserves_taint() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let clean = html_escape::encode_safe(&x);
Command::new("sh").arg(clean).status().unwrap();
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(
findings.len(),
1,
"wrong sanitizer should NOT kill the taint"
);
}
#[test]
fn taint_breaks_out_of_loop() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::{env, process::Command};
fn main() {
loop {
let x = env::var("DANGEROUS").unwrap();
Command::new("sh").arg(&x).status().unwrap(); // vulnerable
break;
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 1);
}
#[test]
fn test_two_sources_one_sanitised() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let y = env::var("ANOTHER").unwrap();
let clean = shell_escape::unix::escape(&y);
Command::new("sh").arg(x).status().unwrap();
Command::new("sh").arg(clean).status().unwrap();
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(
findings.len(),
1,
"only the unsanitised source should be flagged"
);
}
#[test]
fn test_two_sources_wrong_sanitiser_both_flagged() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let y = env::var("ANOTHER").unwrap();
let clean = html_escape::encode_safe(&y);
Command::new("sh").arg(x).status().unwrap();
Command::new("sh").arg(clean).status().unwrap();
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(
findings.len(),
2,
"both should be flagged — wrong sanitiser"
);
}
#[test]
fn test_should_not_panic_on_empty_function() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::{env, process::Command};
fn f() {
if cond() {
return;
}
do_something();
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(findings.is_empty());
}
#[test]
fn cross_file_source_resolved_via_global_summaries() {
use crate::summary::FuncSummary;
let src = br#"
use std::process::Command;
fn main() {
let x = get_dangerous();
Command::new("sh").arg(x).status().unwrap();
}"#;
let file_cfg = parse_rust(src);
let local_summaries = &file_cfg.summaries;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Rust,
namespace: "file_a.rs".into(),
name: "get_dangerous".into(),
arity: Some(0),
..Default::default()
};
global.insert(
key,
FuncSummary {
name: "get_dangerous".into(),
file_path: "file_a.rs".into(),
lang: "rust".into(),
param_count: 0,
param_names: vec![],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let findings = analyse_file(
&file_cfg,
local_summaries,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(findings.len(), 1, "cross-file source should be detected");
}
#[test]
fn cross_file_sanitizer_resolved_via_global_summaries() {
use crate::summary::FuncSummary;
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let clean = my_sanitize(x);
Command::new("sh").arg(clean).status().unwrap();
}"#;
let file_cfg = parse_rust(src);
let local_summaries = &file_cfg.summaries;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Rust,
namespace: "file_a.rs".into(),
name: "my_sanitize".into(),
arity: Some(1),
..Default::default()
};
global.insert(
key,
FuncSummary {
name: "my_sanitize".into(),
file_path: "file_a.rs".into(),
lang: "rust".into(),
param_count: 1,
param_names: vec!["input".into()],
source_caps: 0,
sanitizer_caps: Cap::all().bits(),
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let findings = analyse_file(
&file_cfg,
local_summaries,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert!(
findings.is_empty(),
"cross-file sanitizer should neutralise taint"
);
}
fn parse_rust(src: &[u8]) -> FileCfg {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src, None).unwrap();
build_cfg(&tree, src, "rust", "test.rs", None)
}
fn extract_summaries_from_bytes(src: &[u8], path: &str) -> Vec<crate::summary::FuncSummary> {
use crate::cfg::export_summaries;
let file_cfg = parse_rust(src);
export_summaries(&file_cfg.summaries, path, "rust")
}
#[test]
fn cross_file_sink_resolved_via_global_summaries() {
use crate::summary::FuncSummary;
let src = br#"
use std::env;
fn main() {
let x = env::var("INPUT").unwrap();
dangerous_exec(x);
}"#;
let file_cfg = parse_rust(src);
let local_summaries = &file_cfg.summaries;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Rust,
namespace: "file_a.rs".into(),
name: "dangerous_exec".into(),
arity: Some(1),
..Default::default()
};
global.insert(
key,
FuncSummary {
name: "dangerous_exec".into(),
file_path: "file_a.rs".into(),
lang: "rust".into(),
param_count: 1,
param_names: vec!["cmd".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: Cap::SHELL_ESCAPE.bits(),
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![0],
callees: vec!["Command::new".into()],
..Default::default()
},
);
let findings = analyse_file(
&file_cfg,
local_summaries,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(findings.len(), 1, "cross-file sink should be detected");
}
#[test]
fn cross_file_sink_finding_carries_primary_location() {
use crate::summary::{FuncSummary, SinkSite};
use smallvec::smallvec;
let src = br#"
use std::env;
fn main() {
let x = env::var("INPUT").unwrap();
dangerous_exec(x);
}"#;
let file_cfg = parse_rust(src);
let local_summaries = &file_cfg.summaries;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Rust,
namespace: "file_a.rs".into(),
name: "dangerous_exec".into(),
arity: Some(1),
..Default::default()
};
let sink_site = SinkSite {
file_rel: "file_a.rs".into(),
line: 42,
col: 5,
snippet: "Command::new(\"sh\").arg(cmd).status().unwrap();".into(),
cap: Cap::SHELL_ESCAPE,
};
global.insert(
key,
FuncSummary {
name: "dangerous_exec".into(),
file_path: "file_a.rs".into(),
lang: "rust".into(),
param_count: 1,
param_names: vec!["cmd".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: Cap::SHELL_ESCAPE.bits(),
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![0],
param_to_sink: vec![(0, smallvec![sink_site.clone()])],
callees: vec!["Command::new".into()],
..Default::default()
},
);
let findings = analyse_file(
&file_cfg,
local_summaries,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"cross-file sink should still be detected",
);
let finding = &findings[0];
let loc = finding
.primary_location
.as_ref()
.expect("summary-resolved sink with SinkSite must carry primary_location");
assert_eq!(loc.file_rel, "file_a.rs");
assert_eq!(loc.line, 42);
assert_eq!(loc.col, 5);
}
#[test]
fn cross_file_sink_cap_only_site_leaves_primary_location_none() {
use crate::summary::FuncSummary;
let src = br#"
use std::env;
fn main() {
let x = env::var("INPUT").unwrap();
dangerous_exec(x);
}"#;
let file_cfg = parse_rust(src);
let local_summaries = &file_cfg.summaries;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Rust,
namespace: "file_a.rs".into(),
name: "dangerous_exec".into(),
arity: Some(1),
..Default::default()
};
global.insert(
key,
FuncSummary {
name: "dangerous_exec".into(),
file_path: "file_a.rs".into(),
lang: "rust".into(),
param_count: 1,
param_names: vec!["cmd".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: Cap::SHELL_ESCAPE.bits(),
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![0],
callees: vec!["Command::new".into()],
..Default::default()
},
);
let findings = analyse_file(
&file_cfg,
local_summaries,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(findings.len(), 1, "cross-file sink should be detected");
assert!(
findings[0].primary_location.is_none(),
"cap-only summary must not produce a primary_location",
);
}
#[test]
fn multi_file_source_to_sink_detected() {
use crate::summary::merge_summaries;
let lib_src = br#"
use std::env;
fn get_dangerous() -> String {
env::var("SECRET").unwrap()
}
"#;
let caller_src = br#"
use std::process::Command;
fn main() {
let x = get_dangerous();
Command::new("sh").arg(x).status().unwrap();
}
"#;
let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
let global = merge_summaries(summaries, None);
let file_cfg = parse_rust(caller_src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"cross-file source → inline sink should produce 1 finding"
);
}
#[test]
fn multi_file_sanitizer_neutralises_cross_file_source() {
use crate::summary::merge_summaries;
let lib_src = br#"
use std::env;
fn get_input() -> String {
env::var("INPUT").unwrap()
}
fn clean_shell(s: &str) -> String {
shell_escape::unix::escape(s).to_string()
}
"#;
let caller_src = br#"
use std::process::Command;
fn main() {
let x = get_input();
let clean = clean_shell(&x);
Command::new("sh").arg(clean).status().unwrap();
}
"#;
let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
let global = merge_summaries(summaries, None);
let file_cfg = parse_rust(caller_src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert!(
findings.is_empty(),
"matching cross-file sanitizer should neutralise taint, got {} findings",
findings.len()
);
}
#[test]
fn multi_file_wrong_sanitizer_preserves_taint() {
use crate::summary::merge_summaries;
let lib_src = br#"
use std::env;
fn get_input() -> String {
env::var("INPUT").unwrap()
}
fn clean_html(s: &str) -> String {
html_escape::encode_safe(s).to_string()
}
"#;
let caller_src = br#"
use std::process::Command;
fn main() {
let x = get_input();
let clean = clean_html(&x);
Command::new("sh").arg(clean).status().unwrap();
}
"#;
let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
let global = merge_summaries(summaries, None);
let file_cfg = parse_rust(caller_src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"wrong sanitizer (HTML for shell sink) should NOT neutralise taint"
);
}
#[test]
fn multi_file_sink_in_another_file() {
use crate::summary::merge_summaries;
let lib_src = br#"
use std::process::Command;
fn exec_cmd(cmd: &str) {
Command::new("sh").arg(cmd).status().unwrap();
}
"#;
let caller_src = br#"
use std::env;
fn main() {
let x = env::var("DANGEROUS").unwrap();
exec_cmd(&x);
}
"#;
let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
let global = merge_summaries(summaries, None);
let file_cfg = parse_rust(caller_src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(findings.len(), 1, "cross-file sink should be detected");
}
#[test]
fn multi_file_passthrough_preserves_taint() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "identity".into(),
arity: Some(1),
..Default::default()
};
global.insert(
key,
FuncSummary {
name: "identity".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 1,
param_names: vec!["s".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let caller_src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let y = identity(&x);
Command::new("sh").arg(y).status().unwrap();
}
"#;
let file_cfg = parse_rust(caller_src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"taint should propagate through passthrough function"
);
}
#[test]
fn multi_file_chain_source_sanitize_sink_across_files() {
use crate::summary::merge_summaries;
let lib_src = br#"
use std::env;
use std::process::Command;
fn get_input() -> String {
env::var("INPUT").unwrap()
}
fn clean_shell(s: &str) -> String {
shell_escape::unix::escape(s).to_string()
}
fn exec_cmd(cmd: &str) {
Command::new("sh").arg(cmd).status().unwrap();
}
"#;
let caller_src = br#"
fn main() {
let x = get_input();
let clean = clean_shell(&x);
exec_cmd(&clean);
}
"#;
let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
let global = merge_summaries(summaries, None);
let file_cfg = parse_rust(caller_src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert!(
findings.is_empty(),
"source → matching sanitizer → sink should produce 0 findings, got {}",
findings.len()
);
}
#[test]
fn sanitizer_strips_only_matching_bits() {
let src = br#"
use std::env;
fn sink_html(s: &str) {}
fn main() {
let x = env::var("DANGEROUS").unwrap();
let clean = shell_escape::unix::escape(&x);
sink_html(&clean);
}
"#;
let file_cfg = parse_rust(src);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(
findings.len(),
1,
"shell sanitizer should NOT strip HTML_ESCAPE bit; HTML sink should still fire"
);
}
#[test]
fn multiple_sanitizers_strip_all_bits() {
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let a = shell_escape::unix::escape(&x);
let b = html_escape::encode_safe(&a);
Command::new("sh").arg(b).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(
findings.is_empty(),
"both sanitizers together should strip all relevant bits"
);
}
#[test]
fn taint_through_variable_reassignment() {
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let y = x;
Command::new("sh").arg(y).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(
findings.len(),
1,
"taint should flow through simple variable reassignment"
);
}
#[test]
fn untainted_variable_at_sink_is_safe() {
let src = br#"
use std::process::Command;
fn main() {
let x = "harmless";
Command::new("sh").arg(x).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(
findings.is_empty(),
"untainted literal should not trigger a finding"
);
}
#[test]
fn local_summary_takes_precedence_over_global() {
use crate::summary::FuncSummary;
let caller_src = br#"
use std::{env, process::Command};
fn my_func() -> String {
env::var("SECRET").unwrap()
}
fn main() {
let x = my_func();
Command::new("sh").arg(x).status().unwrap();
}
"#;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Rust,
namespace: "other.rs".into(),
name: "my_func".into(),
arity: Some(0),
..Default::default()
};
global.insert(
key,
FuncSummary {
name: "my_func".into(),
file_path: "other.rs".into(),
lang: "rust".into(),
param_count: 0,
param_names: vec![],
source_caps: 0,
sanitizer_caps: Cap::all().bits(),
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let file_cfg = parse_rust(caller_src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"local summary (source) should take precedence over global (sanitizer)"
);
}
#[test]
fn empty_global_summaries_same_as_none() {
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
Command::new("sh").arg(x).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let summaries = &file_cfg.summaries;
let findings_none = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
let empty = GlobalSummaries::new();
let findings_empty = analyse_file(
&file_cfg,
summaries,
Some(&empty),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings_none.len(),
findings_empty.len(),
"empty GlobalSummaries should behave identically to None"
);
}
#[test]
fn taint_not_introduced_by_non_source_function() {
let src = br#"
use std::process::Command;
fn main() {
let x = totally_unknown_func();
Command::new("sh").arg(x).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(
findings.is_empty(),
"unknown function call should not introduce taint"
);
}
#[test]
fn source_and_sink_on_same_function() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "source_and_sink".into(),
arity: Some(1),
..Default::default()
};
global.insert(
key,
FuncSummary {
name: "source_and_sink".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 1,
param_names: vec!["input".into()],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: Cap::SHELL_ESCAPE.bits(),
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![0],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::env;
fn main() {
let x = env::var("DANGEROUS").unwrap();
source_and_sink(x);
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"function that is both source and sink should detect tainted arg as finding"
);
}
#[test]
fn multiple_cross_file_sources_one_sanitised() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
let key1 = FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "get_secret".into(),
arity: Some(0),
..Default::default()
};
global.insert(
key1,
FuncSummary {
name: "get_secret".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 0,
param_names: vec![],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let key2 = FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "get_other_secret".into(),
arity: Some(0),
..Default::default()
};
global.insert(
key2,
FuncSummary {
name: "get_other_secret".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 0,
param_names: vec![],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::process::Command;
fn main() {
let a = get_secret();
let b = get_other_secret();
let clean_a = shell_escape::unix::escape(&a);
Command::new("sh").arg(clean_a).status().unwrap();
Command::new("sh").arg(b).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"only the unsanitised cross-file source should produce a finding"
);
}
fn parse_lang(src: &[u8], slug: &str, ts_lang: tree_sitter::Language) -> FileCfg {
use crate::cfg::build_cfg;
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src, None).unwrap();
let ext = match slug {
"rust" => "test.rs",
"javascript" => "test.js",
"typescript" => "test.ts",
"python" => "test.py",
"go" => "test.go",
"java" => "test.java",
"c" => "test.c",
"cpp" => "test.cpp",
"php" => "test.php",
"ruby" => "test.rb",
_ => "test.txt",
};
build_cfg(&tree, src, slug, ext, None)
}
#[test]
fn js_source_to_sink() {
let src = b"function main() {\n let x = document.location();\n eval(x);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"JS: source->sink should produce 1 finding"
);
}
#[test]
fn ts_source_to_sink() {
let src = b"function main() {\n let x = document.location();\n eval(x);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
let file_cfg = parse_lang(src, "typescript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::TypeScript,
"test.ts",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"TS: source->sink should produce 1 finding"
);
}
#[test]
fn python_source_to_sink() {
let src = b"def main():\n x = os.getenv(\"SECRET\")\n os.system(x)\n";
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_lang(src, "python", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::Python,
"test.py",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"Python: source->sink should produce 1 finding"
);
}
#[test]
fn go_source_to_sink() {
let src =
b"package main\n\nfunc main() {\n\tx := os.Getenv(\"SECRET\")\n\texec.Command(x)\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_lang(src, "go", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Go, "test.go", &[], None);
assert_eq!(
findings.len(),
1,
"Go: source->sink should produce 1 finding"
);
}
#[test]
fn java_source_to_sink() {
let src = b"class Main {\n void main() {\n String x = System.getenv(\"SECRET\");\n Runtime.exec(x);\n }\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
let file_cfg = parse_lang(src, "java", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::Java,
"test.java",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"Java: source->sink should produce 1 finding"
);
}
#[test]
fn c_source_to_sink() {
let src = b"void main() {\n char* x = getenv(\"SECRET\");\n system(x);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
assert_eq!(
findings.len(),
1,
"C: source->sink should produce 1 finding"
);
}
#[test]
fn cpp_source_to_sink() {
let src = b"void main() {\n char* x = getenv(\"SECRET\");\n system(x);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert_eq!(
findings.len(),
1,
"C++: source->sink should produce 1 finding"
);
}
#[test]
fn cpp_c_str_propagates_taint() {
let src = b"#include <cstdlib>\n#include <string>\nint main() {\n char* input = std::getenv(\"X\");\n std::string s = input;\n std::system(s.c_str());\n return 0;\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
!findings.is_empty(),
"C++: tainted s.c_str() into system() must fire",
);
}
#[test]
fn cpp_std_move_propagates_taint() {
let src = b"#include <cstdlib>\n#include <string>\n#include <utility>\nint main() {\n char* input = std::getenv(\"X\");\n std::string s = input;\n std::string moved = std::move(s);\n std::system(moved.c_str());\n return 0;\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
!findings.is_empty(),
"C++: taint must flow through std::move() into the sink",
);
}
#[test]
fn cpp_static_cast_propagates_taint() {
let src = b"#include <cstdlib>\nint main() {\n char* input = std::getenv(\"X\");\n const char* casted = static_cast<const char*>(input);\n std::system(casted);\n return 0;\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
!findings.is_empty(),
"C++: taint must flow through static_cast<T>() into the sink",
);
}
#[test]
fn cpp_builder_chain_user_host_fires() {
let src = b"#include <cstdlib>\n#include <string>\nclass Socket {\npublic:\n static Socket builder() { return Socket(); }\n Socket& host(const std::string& h) { host_ = h; return *this; }\n Socket& port(int p) { port_ = p; return *this; }\n void connect() {}\nprivate:\n std::string host_;\n int port_ = 0;\n};\nint main() {\n char* h = std::getenv(\"X\");\n Socket::builder().host(h).port(80).connect();\n return 0;\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
!findings.is_empty(),
"C++: tainted host through fluent builder chain must reach terminal connect()",
);
}
#[test]
fn cpp_builder_chain_const_host_silent() {
let src = b"#include <string>\nclass Socket {\npublic:\n static Socket builder() { return Socket(); }\n Socket& host(const std::string& h) { host_ = h; return *this; }\n Socket& port(int p) { port_ = p; return *this; }\n void connect() {}\nprivate:\n std::string host_;\n int port_ = 0;\n};\nint main() {\n Socket::builder().host(\"api.example.com\").port(80).connect();\n return 0;\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
findings.is_empty(),
"C++: builder chain with literal host must NOT fire (Negative)",
);
}
#[test]
fn cpp_inline_class_method_resolves() {
let src = b"#include <cstdlib>\nclass Inner {\npublic:\n void run(const char* arg) { std::system(arg); }\n};\nint main() {\n char* input = std::getenv(\"X\");\n Inner inner;\n inner.run(input);\n return 0;\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
!findings.is_empty(),
"C++: tainted arg through inline class method must reach system()",
);
}
#[test]
fn cpp_identity_lambda_propagates_taint() {
let src = b"#include <cstdlib>\nint main() {\n char* input = std::getenv(\"X\");\n auto echo = [](const char* s) { return s; };\n std::system(echo(input));\n return 0;\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
!findings.is_empty(),
"C++: taint must flow through identity lambda echo() into system()",
);
}
#[test]
fn cpp_vector_data_propagates_taint() {
let src = b"#include <cstdlib>\n#include <vector>\nint main() {\n char* input = std::getenv(\"X\");\n std::vector<char> v(input, input + 8);\n std::system(v.data());\n return 0;\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
!findings.is_empty(),
"C++: taint must flow through v.data() into the sink",
);
}
#[test]
fn php_source_to_sink() {
let src =
b"<?php\nfunction main() {\n $x = file_get_contents(\"secret\");\n system($x);\n}\n?>";
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
let file_cfg = parse_lang(src, "php", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
assert_eq!(
findings.len(),
1,
"PHP: source->sink should produce 1 finding"
);
}
#[test]
fn php_echo_xss() {
let src = b"<?php\n$name = $_GET['name'];\necho \"<h1>Hello \" . $name . \"</h1>\";\n";
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
let file_cfg = parse_lang(src, "php", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
assert_eq!(
findings.len(),
1,
"PHP echo with tainted var should produce 1 XSS finding"
);
}
#[test]
fn php_echo_simple_var() {
let src = b"<?php\n$x = $_POST['data'];\necho $x;\n";
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
let file_cfg = parse_lang(src, "php", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
assert_eq!(
findings.len(),
1,
"PHP echo with simple tainted var should produce 1 finding"
);
}
#[test]
fn php_echo_safe_literal() {
let src = b"<?php\necho \"hello world\";\n";
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
let file_cfg = parse_lang(src, "php", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
assert_eq!(
findings.len(),
0,
"PHP echo with literal string should produce 0 findings"
);
}
#[test]
fn ruby_source_to_sink() {
let src = b"def main\n x = gets()\n system(x)\nend\n";
let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
let file_cfg = parse_lang(src, "ruby", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Ruby, "test.rb", &[], None);
assert_eq!(
findings.len(),
1,
"Ruby: source->sink should produce 1 finding"
);
}
fn extract_lang_summaries(
src: &[u8],
slug: &str,
ts_lang: tree_sitter::Language,
path: &str,
) -> Vec<crate::summary::FuncSummary> {
use crate::cfg::export_summaries;
let file_cfg = parse_lang(src, slug, ts_lang);
let local = &file_cfg.summaries;
export_summaries(local, path, slug)
}
#[test]
fn cross_lang_python_source_to_js_sink_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::merge_summaries;
let py_src = b"def get_input():\n x = os.getenv(\"SECRET\")\n return x\n";
let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let py_summaries = extract_lang_summaries(py_src, "python", py_lang, "lib.py");
let global = merge_summaries(py_summaries, None);
let js_src = b"function main() {\n let x = get_input();\n eval(x);\n}\n";
let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(js_src, "javascript", js_lang);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::JavaScript,
"main.js",
&[],
None,
);
assert!(findings.is_empty(), "No cross-lang without interop edge");
let edges = vec![InteropEdge {
from: CallSiteKey {
caller_lang: Lang::JavaScript,
caller_namespace: "main.js".into(),
caller_func: "main".into(),
callee_symbol: "get_input".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::Python,
namespace: "lib.py".into(),
name: "get_input".into(),
arity: Some(0),
..Default::default()
},
}];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::JavaScript,
"main.js",
&edges,
None,
);
assert_eq!(
findings.len(),
1,
"Python source → JS sink via interop edge"
);
}
#[test]
fn cross_lang_go_source_to_python_sink_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::merge_summaries;
let go_src =
b"package main\n\nfunc fetch_env() string {\n\tx := os.Getenv(\"SECRET\")\n\treturn x\n}\n";
let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
let go_summaries = extract_lang_summaries(go_src, "go", go_lang, "lib.go");
let global = merge_summaries(go_summaries, None);
let py_src = b"def main():\n x = fetch_env()\n os.system(x)\n";
let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_lang(py_src, "python", py_lang);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Python,
"main.py",
&[],
None,
);
assert!(findings.is_empty(), "No cross-lang without interop");
let edges = vec![InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Python,
caller_namespace: "main.py".into(),
caller_func: "main".into(),
callee_symbol: "fetch_env".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::Go,
namespace: "lib.go".into(),
name: "fetch_env".into(),
arity: Some(0),
..Default::default()
},
}];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Python,
"main.py",
&edges,
None,
);
assert_eq!(findings.len(), 1, "Go source → Python sink via interop");
}
#[test]
fn cross_lang_rust_sanitizer_in_js_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::merge_summaries;
let rs_src = br#"
fn clean_shell(s: &str) -> String {
shell_escape::unix::escape(s).to_string()
}
"#;
let rs_lang = tree_sitter::Language::from(tree_sitter_rust::LANGUAGE);
let rs_summaries = extract_lang_summaries(rs_src, "rust", rs_lang, "lib.rs");
let global = merge_summaries(rs_summaries, None);
let js_src = b"function main() {\n let x = document.location();\n let y = clean_shell(x);\n eval(y);\n}\n";
let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(js_src, "javascript", js_lang);
let local = &file_cfg.summaries;
let edges = vec![InteropEdge {
from: CallSiteKey {
caller_lang: Lang::JavaScript,
caller_namespace: "main.js".into(),
caller_func: "main".into(),
callee_symbol: "clean_shell".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "clean_shell".into(),
arity: Some(1),
..Default::default()
},
}];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::JavaScript,
"main.js",
&edges,
None,
);
assert!(
!findings.is_empty(),
"SHELL_ESCAPE sanitizer should NOT neutralise eval (code injection) taint"
);
}
#[test]
fn cross_lang_c_sink_called_from_java_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::merge_summaries;
let c_src = b"void run_cmd(char* cmd) {\n system(cmd);\n}\n";
let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let c_summaries = extract_lang_summaries(c_src, "c", c_lang, "native.c");
let global = merge_summaries(c_summaries, None);
let java_src = b"class Main {\n void main() {\n String x = System.getenv(\"INPUT\");\n run_cmd(x);\n }\n}\n";
let java_lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
let file_cfg = parse_lang(java_src, "java", java_lang);
let local = &file_cfg.summaries;
let edges = vec![InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Java,
caller_namespace: "Main.java".into(),
caller_func: "main".into(),
callee_symbol: "run_cmd".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::C,
namespace: "native.c".into(),
name: "run_cmd".into(),
arity: Some(1),
..Default::default()
},
}];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Java,
"Main.java",
&edges,
None,
);
assert_eq!(findings.len(), 1, "Java source → C sink via interop");
}
#[test]
fn cross_lang_three_languages_merged_summaries_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::merge_summaries;
let py_src = b"def get_secret():\n x = os.getenv(\"SECRET\")\n return x\n";
let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let py_sums = extract_lang_summaries(py_src, "python", py_lang, "source.py");
let c_src = b"void run_dangerous(char* cmd) {\n system(cmd);\n}\n";
let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let c_sums = extract_lang_summaries(c_src, "c", c_lang, "native.c");
let rs_src = br#"
fn make_safe(s: &str) -> String {
shell_escape::unix::escape(s).to_string()
}
"#;
let rs_lang = tree_sitter::Language::from(tree_sitter_rust::LANGUAGE);
let rs_sums = extract_lang_summaries(rs_src, "rust", rs_lang, "lib.rs");
let all_sums: Vec<_> = py_sums.into_iter().chain(c_sums).chain(rs_sums).collect();
let global = merge_summaries(all_sums, None);
let go_src = b"package main\n\nfunc main() {\n\tx := get_secret()\n\ty := make_safe(x)\n\trun_dangerous(y)\n}\n";
let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_lang(go_src, "go", go_lang);
let local = &file_cfg.summaries;
let edges = vec![
InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Go,
caller_namespace: "main.go".into(),
caller_func: "main".into(),
callee_symbol: "get_secret".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::Python,
namespace: "source.py".into(),
name: "get_secret".into(),
arity: Some(0),
..Default::default()
},
},
InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Go,
caller_namespace: "main.go".into(),
caller_func: "main".into(),
callee_symbol: "make_safe".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "make_safe".into(),
arity: Some(1),
..Default::default()
},
},
InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Go,
caller_namespace: "main.go".into(),
caller_func: "main".into(),
callee_symbol: "run_dangerous".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::C,
namespace: "native.c".into(),
name: "run_dangerous".into(),
arity: Some(1),
..Default::default()
},
},
];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Go,
"main.go",
&edges,
None,
);
assert!(
findings.is_empty(),
"source(Py) → sanitizer(Rs) → sink(C) via interop should be safe; got {} findings",
findings.len()
);
}
#[test]
fn cross_lang_three_languages_unsanitised_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::merge_summaries;
let py_src = b"def get_secret():\n x = os.getenv(\"SECRET\")\n return x\n";
let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let py_sums = extract_lang_summaries(py_src, "python", py_lang, "source.py");
let c_src = b"void run_dangerous(char* cmd) {\n system(cmd);\n}\n";
let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let c_sums = extract_lang_summaries(c_src, "c", c_lang, "native.c");
let all_sums: Vec<_> = py_sums.into_iter().chain(c_sums).collect();
let global = merge_summaries(all_sums, None);
let go_src = b"package main\n\nfunc main() {\n\tx := get_secret()\n\trun_dangerous(x)\n}\n";
let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_lang(go_src, "go", go_lang);
let local = &file_cfg.summaries;
let edges = vec![
InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Go,
caller_namespace: "main.go".into(),
caller_func: "main".into(),
callee_symbol: "get_secret".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::Python,
namespace: "source.py".into(),
name: "get_secret".into(),
arity: Some(0),
..Default::default()
},
},
InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Go,
caller_namespace: "main.go".into(),
caller_func: "main".into(),
callee_symbol: "run_dangerous".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::C,
namespace: "native.c".into(),
name: "run_dangerous".into(),
arity: Some(1),
..Default::default()
},
},
];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Go,
"main.go",
&edges,
None,
);
assert_eq!(
findings.len(),
1,
"source(Py) → sink(C) without sanitizer via interop"
);
}
#[test]
fn cross_lang_name_collision_stays_separate() {
use crate::summary::merge_summaries;
let py_src = b"def process_data():\n x = os.getenv(\"DATA\")\n return x\n";
let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let py_sums = extract_lang_summaries(py_src, "python", py_lang, "handler.py");
let c_summary = crate::summary::FuncSummary {
name: "process_data".into(),
file_path: "handler.c".into(),
lang: "c".into(),
param_count: 1,
param_names: vec!["s".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
};
let all_sums: Vec<_> = py_sums
.into_iter()
.chain(std::iter::once(c_summary))
.collect();
let global = merge_summaries(all_sums, None);
let py_matches = global.lookup_same_lang(Lang::Python, "process_data");
let c_matches = global.lookup_same_lang(Lang::C, "process_data");
assert_eq!(py_matches.len(), 1, "Python version stored separately");
assert_eq!(c_matches.len(), 1, "C version stored separately");
assert!(py_matches[0].1.source_caps != 0, "Python has source caps");
assert_eq!(
c_matches[0].1.source_caps, 0,
"C should NOT get Python's source caps"
);
}
#[test]
fn cross_lang_ruby_passthrough_in_js_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Ruby,
namespace: "helper.rb".into(),
name: "transform".into(),
arity: Some(1),
..Default::default()
};
global.insert(
key.clone(),
FuncSummary {
name: "transform".into(),
file_path: "helper.rb".into(),
lang: "ruby".into(),
param_count: 1,
param_names: vec!["data".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let js_src = b"function main() {\n let x = document.location();\n let y = transform(x);\n eval(y);\n}\n";
let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(js_src, "javascript", js_lang);
let local = &file_cfg.summaries;
let edges = vec![InteropEdge {
from: CallSiteKey {
caller_lang: Lang::JavaScript,
caller_namespace: "main.js".into(),
caller_func: "main".into(),
callee_symbol: "transform".into(),
ordinal: 0,
},
to: key,
}];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::JavaScript,
"main.js",
&edges,
None,
);
assert_eq!(
findings.len(),
1,
"taint should propagate through cross-lang passthrough via interop"
);
}
#[test]
fn cross_lang_php_source_to_go_sink_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::{FuncSummary, merge_summaries};
let php_summary = FuncSummary {
name: "read_input".into(),
file_path: "input.php".into(),
lang: "php".into(),
param_count: 0,
param_names: vec![],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec!["file_get_contents".into()],
..Default::default()
};
let global = merge_summaries(vec![php_summary], None);
let go_src = b"package main\n\nfunc main() {\n\tx := read_input()\n\texec.Command(x)\n}\n";
let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_lang(go_src, "go", go_lang);
let local = &file_cfg.summaries;
let edges = vec![InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Go,
caller_namespace: "main.go".into(),
caller_func: "main".into(),
callee_symbol: "read_input".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::Php,
namespace: "input.php".into(),
name: "read_input".into(),
arity: Some(0),
..Default::default()
},
}];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Go,
"main.go",
&edges,
None,
);
assert_eq!(findings.len(), 1, "PHP source → Go sink via interop");
}
#[test]
fn cross_lang_wrong_sanitizer_still_flags_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Python,
namespace: "sanitizers.py".into(),
name: "html_clean".into(),
arity: Some(1),
..Default::default()
};
global.insert(
key.clone(),
FuncSummary {
name: "html_clean".into(),
file_path: "sanitizers.py".into(),
lang: "python".into(),
param_count: 1,
param_names: vec!["text".into()],
source_caps: 0,
sanitizer_caps: Cap::HTML_ESCAPE.bits(),
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let js_src = b"function main() {\n let x = document.location();\n let y = html_clean(x);\n eval(y);\n}\n";
let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(js_src, "javascript", js_lang);
let local = &file_cfg.summaries;
let edges = vec![InteropEdge {
from: CallSiteKey {
caller_lang: Lang::JavaScript,
caller_namespace: "main.js".into(),
caller_func: "main".into(),
callee_symbol: "html_clean".into(),
ordinal: 0,
},
to: key,
}];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::JavaScript,
"main.js",
&edges,
None,
);
assert_eq!(
findings.len(),
1,
"wrong cross-language sanitizer should NOT neutralise"
);
}
#[test]
fn cross_lang_summary_preserves_lang_metadata() {
use crate::summary::merge_summaries;
let py_summary = crate::summary::FuncSummary {
name: "helper".into(),
file_path: "lib.py".into(),
lang: "python".into(),
param_count: 0,
param_names: vec![],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
};
let js_summary = crate::summary::FuncSummary {
name: "helper".into(),
file_path: "lib.js".into(),
lang: "javascript".into(),
param_count: 1,
param_names: vec!["x".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: Cap::SHELL_ESCAPE.bits(),
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![0],
callees: vec![],
..Default::default()
};
let global = merge_summaries(vec![py_summary, js_summary], None);
let py_matches = global.lookup_same_lang(Lang::Python, "helper");
let js_matches = global.lookup_same_lang(Lang::JavaScript, "helper");
assert_eq!(py_matches.len(), 1, "Python helper stored separately");
assert_eq!(js_matches.len(), 1, "JS helper stored separately");
assert!(
py_matches[0].1.source_caps != 0,
"Python source caps preserved"
);
assert!(js_matches[0].1.sink_caps != 0, "JS sink caps preserved");
assert!(
js_matches[0].1.propagates_any(),
"JS propagates_any preserved"
);
}
#[test]
fn cross_lang_full_pipeline_python_lib_js_caller_via_interop() {
use crate::interop::CallSiteKey;
use crate::summary::merge_summaries;
let py_src = b"def dangerous_query():\n x = os.getenv(\"SQL\")\n return x\n";
let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let py_sums = extract_lang_summaries(py_src, "python", py_lang, "db.py");
let js_lib_src = b"function run_query(q) {\n eval(q);\n}\n";
let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let js_sums = extract_lang_summaries(js_lib_src, "javascript", js_lang, "db.js");
let all_sums: Vec<_> = py_sums.into_iter().chain(js_sums).collect();
let global = merge_summaries(all_sums, None);
let go_src = b"package main\n\nfunc main() {\n\tq := dangerous_query()\n\trun_query(q)\n}\n";
let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_lang(go_src, "go", go_lang);
let local = &file_cfg.summaries;
let edges = vec![
InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Go,
caller_namespace: "main.go".into(),
caller_func: "main".into(),
callee_symbol: "dangerous_query".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::Python,
namespace: "db.py".into(),
name: "dangerous_query".into(),
arity: Some(0),
..Default::default()
},
},
InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Go,
caller_namespace: "main.go".into(),
caller_func: "main".into(),
callee_symbol: "run_query".into(),
ordinal: 0,
},
to: FuncKey {
lang: Lang::JavaScript,
namespace: "db.js".into(),
name: "run_query".into(),
arity: Some(1),
..Default::default()
},
},
];
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Go,
"main.go",
&edges,
None,
);
assert_eq!(
findings.len(),
1,
"Python source → JS sink via Go caller via interop"
);
}
#[test]
fn ambiguous_resolution_returns_none() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
for ns in &["a.rs", "b.rs"] {
let key = FuncKey {
lang: Lang::Rust,
namespace: (*ns).to_string(),
name: "helper".into(),
arity: Some(0),
..Default::default()
};
global.insert(
key,
FuncSummary {
name: "helper".into(),
file_path: (*ns).to_string(),
lang: "rust".into(),
param_count: 0,
param_names: vec![],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
}
let src = br#"
use std::process::Command;
fn main() {
let x = helper();
Command::new("sh").arg(x).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"c.rs",
&[],
None,
);
assert!(
findings.is_empty(),
"ambiguous resolution (two namespaces) should return None → no finding"
);
}
#[test]
fn exact_namespace_match_wins() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
let key_local = FuncKey {
lang: Lang::Rust,
namespace: "test.rs".into(),
name: "helper".into(),
arity: Some(0),
..Default::default()
};
global.insert(
key_local,
FuncSummary {
name: "helper".into(),
file_path: "test.rs".into(),
lang: "rust".into(),
param_count: 0,
param_names: vec![],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let key_other = FuncKey {
lang: Lang::Rust,
namespace: "other.rs".into(),
name: "helper".into(),
arity: Some(0),
..Default::default()
};
global.insert(
key_other,
FuncSummary {
name: "helper".into(),
file_path: "other.rs".into(),
lang: "rust".into(),
param_count: 0,
param_names: vec![],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::process::Command;
fn main() {
let x = helper();
Command::new("sh").arg(x).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"exact namespace match should resolve to the source version"
);
}
#[test]
fn interop_edge_wrong_caller_lang_no_match() {
use crate::interop::CallSiteKey;
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
let key = FuncKey {
lang: Lang::Python,
namespace: "lib.py".into(),
name: "get_data".into(),
arity: Some(0),
..Default::default()
};
global.insert(
key.clone(),
FuncSummary {
name: "get_data".into(),
file_path: "lib.py".into(),
lang: "python".into(),
param_count: 0,
param_names: vec![],
source_caps: Cap::all().bits(),
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let edges = vec![InteropEdge {
from: CallSiteKey {
caller_lang: Lang::Python, caller_namespace: "main.js".into(),
caller_func: "main".into(),
callee_symbol: "get_data".into(),
ordinal: 0,
},
to: key,
}];
let js_src = b"function main() {\n let x = get_data();\n eval(x);\n}\n";
let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(js_src, "javascript", js_lang);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::JavaScript,
"main.js",
&edges,
None,
);
assert!(
findings.is_empty(),
"Edge for wrong caller_lang should not match"
);
}
#[test]
fn return_call_recognized_as_source() {
use crate::cfg::{build_cfg, export_summaries};
use tree_sitter::Language;
let src = br#"
use std::env;
fn foo() -> String {
env::var("X").unwrap()
}
"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let exported = export_summaries(summaries, "test.rs", "rust");
let foo = exported
.iter()
.find(|s| s.name == "foo")
.expect("foo should exist");
assert!(
foo.source_caps != 0,
"foo() should have source_caps set because env::var is called inside return"
);
}
#[test]
fn validate_and_early_return() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if !validate(&x) { return; }
Command::new("sh").arg(x).status().unwrap();
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 0, "validated finding should be suppressed");
}
#[test]
fn validate_in_if_else_path_validated() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if validate(&x) {
Command::new("sh").arg(&x).status().unwrap();
} else {
println!("invalid input");
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 0, "validated finding should be suppressed");
}
#[test]
fn sink_on_failed_validation_branch() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if !validate(&x) {
Command::new("sh").arg(&x).status().unwrap();
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 1, "should detect taint flow to sink");
assert!(
!findings[0].path_validated,
"finding should NOT be path_validated (sink is in failed-validation branch)"
);
}
#[test]
fn contradictory_null_check_pruned() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").ok();
if x.is_none() { return; }
if x.is_none() {
Command::new("sh").arg("dangerous").status().unwrap();
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(
findings.is_empty(),
"inner branch is infeasible — should produce no findings (got {})",
findings.len()
);
}
#[test]
fn sanitize_one_branch_no_regression() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("DANGEROUS").unwrap();
let safe = html_escape::encode_safe(&x);
if x.len() > 5 {
Command::new("sh").arg(&x).status().unwrap(); // UNSAFE
} else {
Command::new("sh").arg(&safe).status().unwrap(); // SAFE
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(
findings.len(),
2,
"two findings expected (both branches reach sink with wrong/no sanitizer)"
);
}
#[test]
fn path_state_budget_graceful() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if x.len() > 1 {
if x.len() > 2 {
if x.len() > 3 {
if x.len() > 4 {
if x.len() > 5 {
if x.len() > 6 {
if x.len() > 7 {
if x.len() > 8 {
if x.len() > 9 {
Command::new("sh").arg(&x).status().unwrap();
}
}
}
}
}
}
}
}
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(
findings.len(),
1,
"should detect taint flow even with truncated PathState"
);
}
#[test]
fn unknown_predicate_not_pruned() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if x.len() > 5 { return; }
if x.len() > 5 {
Command::new("sh").arg(&x).status().unwrap();
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(
findings.len(),
1,
"Comparison predicate should not cause contradiction pruning"
);
}
#[test]
fn duplicate_null_guard_prunes_unreachable_sink() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
let y = env::var("OTHER").ok();
if y.is_none() { return; }
if y.is_none() {
Command::new("sh").arg(&x).status().unwrap();
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(
findings.is_empty(),
"duplicate null-guard with intervening early-return must prune \
the second if's body as unreachable; got findings = {:?}",
findings
);
}
#[test]
fn c_curl_handle_ssrf() {
let src = b"#include <stdlib.h>\n#include <curl/curl.h>\n\
void fetch() {\n char *url = getenv(\"TARGET\");\n \
CURL *curl = curl_easy_init();\n \
curl_easy_setopt(curl, CURLOPT_URL, url);\n \
curl_easy_perform(curl);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
assert!(
!findings.is_empty(),
"C: getenv -> curl_easy_setopt -> curl_easy_perform should produce SSRF finding"
);
}
#[test]
fn c_curl_handle_no_taint() {
let src = b"#include <curl/curl.h>\n\
void fetch() {\n CURL *curl = curl_easy_init();\n \
curl_easy_setopt(curl, CURLOPT_URL, \"https://example.com\");\n \
curl_easy_perform(curl);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
assert!(
findings.is_empty(),
"C: hardcoded URL in curl_easy_setopt should not produce finding"
);
}
#[test]
fn per_arg_propagation_tainted_param_propagates() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
global.insert(
FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "transform".into(),
arity: Some(2),
..Default::default()
},
FuncSummary {
name: "transform".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 2,
param_names: vec!["a".into(), "b".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::{env, process::Command};
fn main() {
let tainted = env::var("X").unwrap();
let safe = String::from("ok");
let y = transform(&tainted, &safe);
Command::new("sh").arg(y).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"tainted arg at propagating position should produce finding"
);
}
#[test]
fn per_arg_propagation_safe_at_propagating_position() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
global.insert(
FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "transform".into(),
arity: Some(2),
..Default::default()
},
FuncSummary {
name: "transform".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 2,
param_names: vec!["a".into(), "b".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![0],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::{env, process::Command};
fn main() {
let safe = String::from("ok");
let tainted = env::var("X").unwrap();
let y = transform(&safe, &tainted);
Command::new("sh").arg(y).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
0,
"tainted arg at non-propagating position should not produce finding"
);
}
#[test]
fn per_arg_propagation_legacy_backward_compat() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
global.insert(
FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "legacy_pass".into(),
arity: Some(2),
..Default::default()
},
FuncSummary {
name: "legacy_pass".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 2,
param_names: vec!["a".into(), "b".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![],
propagates_taint: true,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::{env, process::Command};
fn main() {
let safe = String::from("ok");
let tainted = env::var("X").unwrap();
let y = legacy_pass(&safe, &tainted);
Command::new("sh").arg(y).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"legacy propagates_taint=true with empty propagating_params should propagate all args"
);
}
#[test]
fn per_arg_propagation_both_params_propagate() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
global.insert(
FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "concat".into(),
arity: Some(2),
..Default::default()
},
FuncSummary {
name: "concat".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 2,
param_names: vec!["a".into(), "b".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![0, 1],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::{env, process::Command};
fn main() {
let safe = String::from("ok");
let tainted = env::var("X").unwrap();
let y = concat(&safe, &tainted);
Command::new("sh").arg(y).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"both params propagate — tainted arg at position 1 should produce finding"
);
}
#[test]
fn per_arg_propagation_literal_first_arg() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
global.insert(
FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "transform".into(),
arity: Some(2),
..Default::default()
},
FuncSummary {
name: "transform".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 2,
param_names: vec!["a".into(), "b".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![1],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::{env, process::Command};
fn main() {
let tainted = env::var("X").unwrap();
let y = transform("prefix", &tainted);
Command::new("sh").arg(y).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"literal first arg should not shift positional mapping — tainted at param 1 propagates"
);
}
#[test]
fn per_arg_propagation_nested_expr_arg() {
use crate::summary::FuncSummary;
let mut global = GlobalSummaries::new();
global.insert(
FuncKey {
lang: Lang::Rust,
namespace: "lib.rs".into(),
name: "transform".into(),
arity: Some(2),
..Default::default()
},
FuncSummary {
name: "transform".into(),
file_path: "lib.rs".into(),
lang: "rust".into(),
param_count: 2,
param_names: vec!["a".into(), "b".into()],
source_caps: 0,
sanitizer_caps: 0,
sink_caps: 0,
propagating_params: vec![1],
propagates_taint: false,
tainted_sink_params: vec![],
callees: vec![],
..Default::default()
},
);
let src = br#"
use std::{env, process::Command};
fn main() {
let x = String::from("safe");
let tainted = env::var("X").unwrap();
let y = transform(inner(&x), &tainted);
Command::new("sh").arg(y).status().unwrap();
}
"#;
let file_cfg = parse_rust(src);
let local = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
local,
Some(&global),
Lang::Rust,
"test.rs",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"nested call in arg 0 should not affect arg 1 positional mapping"
);
}
#[test]
fn js_cross_function_global_taint() {
let src = b"let x = \"safe\";\nfunction leak() { x = document.location(); }\nfunction use_it() { eval(x); }\nleak();\nuse_it();\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"cross-function global taint (leak -> use_it) should be detected"
);
}
#[test]
fn js_two_level_converges_no_mutation() {
let src = b"let x = document.location();\nfunction f() { eval(x); }\nf();\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"top-level source to function sink should be detected"
);
}
#[test]
fn catch_param_to_sink_has_caught_exception_source_kind() {
let src = b"
const { exec } = require('child_process');
try {
doSomething();
} catch (err) {
exec(err.command);
}
";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"catch param to sink should produce a finding"
);
for f in &findings {
assert_eq!(
f.source_kind,
crate::labels::SourceKind::CaughtException,
"catch-param origin should have CaughtException source kind, not {:?}",
f.source_kind
);
}
}
#[test]
fn catch_param_source_node_has_callee() {
let src = b"
try {
riskyOperation();
} catch (e) {
fetch(e.message);
}
";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let the_cfg = &file_cfg.first_body().graph;
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"catch param to fetch should produce a finding"
);
for f in &findings {
let source_info = &the_cfg[f.source];
assert!(
source_info.call.callee.is_some(),
"catch-param source node must have a callee for reporting, got None"
);
let callee = source_info.call.callee.as_deref().unwrap();
assert!(
callee.contains("catch"),
"catch-param callee should contain 'catch', got {:?}",
callee
);
}
}
#[test]
fn taint_origin_preserved_through_assignment() {
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("CMD").unwrap();
let y = x;
let z = y;
Command::new("sh").arg(z).status().unwrap();
}"#;
let file_cfg = parse_rust(src);
let the_cfg = &file_cfg.first_body().graph;
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 1);
let f = &findings[0];
let source_info = &the_cfg[f.source];
assert!(
source_info.call.callee.is_some(),
"source node should have callee after propagation through assignments"
);
let callee = source_info.call.callee.as_deref().unwrap();
assert!(
callee.contains("env") || callee.contains("var"),
"source callee should reference env::var, got {:?}",
callee
);
}
#[test]
fn taint_origin_preserved_through_branch_merge() {
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("CMD").unwrap();
let y;
if true {
y = x;
} else {
y = x;
}
Command::new("sh").arg(y).status().unwrap();
}"#;
let file_cfg = parse_rust(src);
let the_cfg = &file_cfg.first_body().graph;
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(!findings.is_empty());
for f in &findings {
let source_info = &the_cfg[f.source];
assert!(
source_info.call.callee.is_some(),
"source callee must not be None after branch merge"
);
}
}
fn assert_ssa_integration(src: &[u8]) {
use crate::cfg::build_cfg;
use crate::state::symbol::SymbolInterner;
use std::collections::HashSet;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src, None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let high_level = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
let body = if file_cfg.bodies.len() > 1 {
&file_cfg.bodies[1]
} else {
file_cfg.first_body()
};
let the_cfg = &body.graph;
let entry = body.entry;
let interner = SymbolInterner::from_cfg(the_cfg);
let ssa =
crate::ssa::lower_to_ssa(the_cfg, entry, None, true).expect("SSA lowering should succeed");
let ssa_xfer = ssa_transfer::SsaTaintTransfer {
lang: Lang::Rust,
namespace: "test.rs",
interner: &interner,
local_summaries: summaries,
global_summaries: None,
interop_edges: &[],
owner_body_id: crate::cfg::BodyId(0),
parent_body_id: None,
global_seed: None,
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
callee_bodies: None,
inline_cache: None,
context_depth: 0,
callback_bindings: None,
points_to: None,
dynamic_pts: None,
import_bindings: None,
promisify_aliases: None,
module_aliases: None,
static_map: None,
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
};
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
ssa_findings.sort_by_key(|f| (f.sink.index(), f.source.index(), !f.path_validated));
ssa_findings.dedup_by_key(|f| (f.sink, f.source));
let high_set: HashSet<_> = high_level
.iter()
.map(|f| (f.source.index(), f.sink.index()))
.collect();
let ssa_set: HashSet<_> = ssa_findings
.iter()
.map(|f| (f.source.index(), f.sink.index()))
.collect();
assert_eq!(
high_set, ssa_set,
"analyse_file vs direct SSA mismatch.\nHigh-level: {high_set:?}\nDirect SSA: {ssa_set:?}"
);
}
#[test]
fn equiv_env_to_arg() {
assert_ssa_integration(
br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("DANGEROUS_ARG").unwrap();
Command::new("sh").arg(x).status().unwrap();
}"#,
);
}
#[test]
fn equiv_taint_through_if_else() {
assert_ssa_integration(
br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("DANGEROUS").unwrap();
let safe = html_escape::encode_safe(&x);
if x.len() > 5 {
Command::new("sh").arg(&x).status().unwrap();
} else {
Command::new("sh").arg(&safe).status().unwrap();
}
}"#,
);
}
#[test]
fn equiv_taint_through_while_loop() {
assert_ssa_integration(
br#"
use std::{env, process::Command};
fn main() {
let mut x = env::var("DANGEROUS").unwrap();
while x.len() < 100 {
x.push_str("a");
}
Command::new("sh").arg(x).status().unwrap();
}"#,
);
}
#[test]
fn equiv_killed_by_matching_sanitizer() {
assert_ssa_integration(
br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let clean = shell_escape::unix::escape(&x);
Command::new("sh").arg(clean).status().unwrap();
}"#,
);
}
#[test]
fn equiv_wrong_sanitizer_preserves_taint() {
assert_ssa_integration(
br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
let escaped = html_escape::encode_safe(&x);
Command::new("sh").arg(escaped).status().unwrap();
}"#,
);
}
#[test]
fn integ_php_echo_simple_var() {
use crate::state::symbol::SymbolInterner;
let src = b"<?php\n$x = $_POST['data'];\necho $x;\n";
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
let file_cfg = parse_lang(src, "php", lang);
let the_cfg = &file_cfg.first_body().graph;
let entry = file_cfg.first_body().entry;
let summaries = &file_cfg.summaries;
let high_level = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
let interner = SymbolInterner::from_cfg(the_cfg);
let ssa = crate::ssa::lower_to_ssa(the_cfg, entry, None, true).expect("SSA lowering");
let ssa_xfer = ssa_transfer::SsaTaintTransfer {
lang: Lang::Php,
namespace: "test.php",
interner: &interner,
local_summaries: summaries,
global_summaries: None,
interop_edges: &[],
owner_body_id: crate::cfg::BodyId(0),
parent_body_id: None,
global_seed: None,
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
callee_bodies: None,
inline_cache: None,
context_depth: 0,
callback_bindings: None,
points_to: None,
dynamic_pts: None,
import_bindings: None,
promisify_aliases: None,
module_aliases: None,
static_map: None,
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
};
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
ssa_findings.sort_by_key(|f| (f.sink.index(), f.source.index(), !f.path_validated));
ssa_findings.dedup_by_key(|f| (f.sink, f.source));
let high_set: std::collections::HashSet<_> = high_level
.iter()
.map(|f| (f.source.index(), f.sink.index()))
.collect();
let ssa_set: std::collections::HashSet<_> = ssa_findings
.iter()
.map(|f| (f.source.index(), f.sink.index()))
.collect();
assert_eq!(
high_set, ssa_set,
"PHP echo analyse_file vs direct SSA mismatch"
);
}
#[test]
fn integ_c_curl_handle_ssrf() {
use crate::state::symbol::SymbolInterner;
let src = b"#include <stdlib.h>\n#include <curl/curl.h>\n\
void fetch() {\n char *url = getenv(\"TARGET\");\n \
CURL *curl = curl_easy_init();\n \
curl_easy_setopt(curl, CURLOPT_URL, url);\n \
curl_easy_perform(curl);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let the_cfg = &file_cfg.first_body().graph;
let entry = file_cfg.first_body().entry;
let summaries = &file_cfg.summaries;
let high_level = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
let interner = SymbolInterner::from_cfg(the_cfg);
let ssa = crate::ssa::lower_to_ssa(the_cfg, entry, None, true).expect("SSA lowering");
let ssa_xfer = ssa_transfer::SsaTaintTransfer {
lang: Lang::C,
namespace: "test.c",
interner: &interner,
local_summaries: summaries,
global_summaries: None,
interop_edges: &[],
owner_body_id: crate::cfg::BodyId(0),
parent_body_id: None,
global_seed: None,
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
ssa_summaries: None,
extra_labels: None,
base_aliases: None,
callee_bodies: None,
inline_cache: None,
context_depth: 0,
callback_bindings: None,
points_to: None,
dynamic_pts: None,
import_bindings: None,
promisify_aliases: None,
module_aliases: None,
static_map: None,
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
};
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
ssa_findings.sort_by_key(|f| (f.sink.index(), f.source.index(), !f.path_validated));
ssa_findings.dedup_by_key(|f| (f.sink, f.source));
let high_set: std::collections::HashSet<_> = high_level
.iter()
.map(|f| (f.source.index(), f.sink.index()))
.collect();
let ssa_set: std::collections::HashSet<_> = ssa_findings
.iter()
.map(|f| (f.source.index(), f.sink.index()))
.collect();
assert_eq!(
high_set, ssa_set,
"curl analyse_file vs direct SSA mismatch"
);
}
#[test]
fn equiv_validate_and_early_return() {
assert_ssa_integration(
br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if !validate(&x) { return; }
Command::new("sh").arg(x).status().unwrap();
}"#,
);
}
#[test]
fn ssa_js_two_level_global_to_function() {
let src = b"let x = document.location();\nfunction f() { eval(x); }\nf();\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"SSA JS two-level: top-level source should flow to function sink"
);
}
#[test]
fn ssa_js_two_level_function_isolation() {
let src =
b"function a() { let x = document.location(); }\nfunction b() { eval(x); }\na();\nb();\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
let _ = findings; }
#[test]
fn ssa_js_two_level_convergence() {
let src = b"let x = 'safe';\nfunction leak() { x = document.location(); }\nfunction use_it() { eval(x); }\nleak();\nuse_it();\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"SSA JS two-level: function mutation of global should converge and detect taint"
);
}
#[test]
fn ssa_js_chained_call_taint() {
let src = b"var express = require('express');\nvar app = express();\n\napp.get('/proxy', function(req, res) {\n var url = req.query.url;\n fetch(url).then(function(response) {\n return response.text();\n }).then(function(body) {\n res.send(body);\n });\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"SSA should detect taint through fetch(url).then().then() chain"
);
}
#[test]
fn ssa_field_write_to_sink() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n var obj = {};\n obj.data = req.query.input;\n res.send(obj.data);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"SSA: field write from source should propagate taint to field read at sink"
);
}
#[test]
fn ssa_field_overwrite_kills_taint() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n var obj = {};\n obj.data = req.query.input;\n obj.data = \"safe\";\n res.send(obj.data);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"SSA: constant overwrite of field should kill taint"
);
}
#[test]
fn ssa_field_different_bases_no_alias() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n var a = {};\n var b = {};\n a.tainted = req.query.input;\n res.send(b.safe);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"SSA: different base objects should not alias — a.tainted taint must not reach b.safe"
);
}
#[test]
fn ssa_python_attribute_taint() {
let src = b"import os\n\nclass Config:\n pass\n\nconfig = Config()\nconfig.cmd = os.getenv(\"CMD\")\nos.system(config.cmd)\n";
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_lang(src, "python", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::Python,
"test.py",
&[],
None,
);
assert!(
!findings.is_empty(),
"SSA: Python attribute write from source should propagate taint to attribute read at sink"
);
}
#[test]
fn ssa_field_safe_overwrite_no_fp() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n var obj = req.query;\n obj.safe = \"constant\";\n res.send(obj.safe);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"field-aware suppression: reading safe field of tainted base should not produce a finding, got {} findings",
findings.len()
);
}
#[test]
fn ssa_field_tainted_field_still_fires() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n var obj = {};\n obj.data = req.query.input;\n res.send(obj.data);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"field-aware suppression: tainted dotted-path field read should still produce a finding"
);
}
#[test]
fn ssa_field_base_sink_no_suppression() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n var obj = {};\n obj.data = req.query.input;\n res.send(obj);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"field-aware suppression: tainted base passed directly to sink should still fire"
);
}
#[test]
fn ssa_summary_identity_propagation() {
use crate::state::symbol::SymbolInterner;
use crate::summary::ssa_summary::TaintTransform;
let src = br#"
fn passthrough(x: String) -> String {
x
}"#;
let file_cfg = parse_lang(
src,
"rust",
tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
);
let the_cfg = &file_cfg.first_body().graph;
let _entry = file_cfg.first_body().entry;
let summaries = &file_cfg.summaries;
let interner = SymbolInterner::from_cfg(the_cfg);
let func_entries = super::find_function_entries(the_cfg);
assert!(
!func_entries.is_empty(),
"should find at least one function entry"
);
for (func_name, func_entry) in &func_entries {
let func_ssa = crate::ssa::lower_to_ssa(the_cfg, *func_entry, Some(func_name), false);
if let Ok(ssa) = func_ssa {
let param_count = ssa
.blocks
.iter()
.flat_map(|b| b.phis.iter().chain(b.body.iter()))
.filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
.count();
if param_count == 0 {
continue;
}
let summary = ssa_transfer::extract_ssa_func_summary(
&ssa,
the_cfg,
summaries,
None,
Lang::Rust,
"test.rs",
&interner,
param_count,
None,
None,
None,
None,
None,
);
assert!(
!summary.param_to_return.is_empty(),
"passthrough function should have param_to_return entries"
);
for (_, transform) in &summary.param_to_return {
assert!(
matches!(transform, TaintTransform::Identity),
"passthrough should produce Identity transform, got {:?}",
transform
);
}
}
}
}
#[test]
fn ssa_summary_sanitizer_strips_bits() {
use crate::state::symbol::SymbolInterner;
use crate::summary::ssa_summary::TaintTransform;
let src = br#"
fn sanitize_input(x: String) -> String {
html_escape::encode_safe(&x)
}"#;
let file_cfg = parse_lang(
src,
"rust",
tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
);
let the_cfg = &file_cfg.first_body().graph;
let _entry = file_cfg.first_body().entry;
let summaries = &file_cfg.summaries;
let interner = SymbolInterner::from_cfg(the_cfg);
let func_entries = super::find_function_entries(the_cfg);
for (func_name, func_entry) in &func_entries {
let func_ssa = crate::ssa::lower_to_ssa(the_cfg, *func_entry, Some(func_name), false);
if let Ok(ssa) = func_ssa {
let param_count = ssa
.blocks
.iter()
.flat_map(|b| b.phis.iter().chain(b.body.iter()))
.filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
.count();
if param_count == 0 {
continue;
}
let summary = ssa_transfer::extract_ssa_func_summary(
&ssa,
the_cfg,
summaries,
None,
Lang::Rust,
"test.rs",
&interner,
param_count,
None,
None,
None,
None,
None,
);
for (_, transform) in &summary.param_to_return {
assert!(
matches!(transform, TaintTransform::StripBits(_)),
"sanitizer wrapper should produce StripBits transform, got {:?}",
transform
);
}
}
}
}
#[test]
fn ssa_summary_source_adds_bits() {
use crate::state::symbol::SymbolInterner;
let src = br#"
use std::env;
fn read_config() -> String {
env::var("CONFIG").unwrap()
}"#;
let file_cfg = parse_lang(
src,
"rust",
tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
);
let the_cfg = &file_cfg.first_body().graph;
let _entry = file_cfg.first_body().entry;
let summaries = &file_cfg.summaries;
let interner = SymbolInterner::from_cfg(the_cfg);
let func_entries = super::find_function_entries(the_cfg);
for (func_name, func_entry) in &func_entries {
let func_ssa = crate::ssa::lower_to_ssa(the_cfg, *func_entry, Some(func_name), false);
if let Ok(ssa) = func_ssa {
let param_count = ssa
.blocks
.iter()
.flat_map(|b| b.phis.iter().chain(b.body.iter()))
.filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
.count();
let summary = ssa_transfer::extract_ssa_func_summary(
&ssa,
the_cfg,
summaries,
None,
Lang::Rust,
"test.rs",
&interner,
param_count,
None,
None,
None,
None,
None,
);
assert!(
!summary.source_caps.is_empty(),
"env-reading function should have non-empty source_caps, got {:?}",
summary.source_caps
);
}
}
}
#[test]
fn ssa_summary_param_to_sink() {
use crate::state::symbol::SymbolInterner;
let src = br#"
use std::process::Command;
fn run_cmd(cmd: String) {
Command::new("sh").arg(cmd).status().unwrap();
}"#;
let file_cfg = parse_lang(
src,
"rust",
tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
);
let the_cfg = &file_cfg.first_body().graph;
let _entry = file_cfg.first_body().entry;
let summaries = &file_cfg.summaries;
let interner = SymbolInterner::from_cfg(the_cfg);
let func_entries = super::find_function_entries(the_cfg);
for (func_name, func_entry) in &func_entries {
let func_ssa = crate::ssa::lower_to_ssa(the_cfg, *func_entry, Some(func_name), false);
if let Ok(ssa) = func_ssa {
let param_count = ssa
.blocks
.iter()
.flat_map(|b| b.phis.iter().chain(b.body.iter()))
.filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
.count();
if param_count == 0 {
continue;
}
let summary = ssa_transfer::extract_ssa_func_summary(
&ssa,
the_cfg,
summaries,
None,
Lang::Rust,
"test.rs",
&interner,
param_count,
None,
None,
None,
None,
None,
);
assert!(
!summary.param_to_sink.is_empty(),
"function passing param to Command sink should have param_to_sink entries"
);
}
}
}
#[test]
fn ssa_cross_function_taint_with_sanitizer_wrapper() {
let src = b"var express = require('express');\nvar app = express();\n\nfunction cleanHtml(input) {\n return DOMPurify.sanitize(input);\n}\n\napp.get('/safe', function(req, res) {\n var name = req.query.name;\n var safe = cleanHtml(name);\n res.send(safe);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let the_cfg = &file_cfg.first_body().graph;
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
drop(findings);
use crate::state::symbol::SymbolInterner;
let interner = SymbolInterner::from_cfg(the_cfg);
let ssa_summaries = super::extract_intra_file_ssa_summaries(
the_cfg,
&interner,
Lang::JavaScript,
"test.js",
summaries,
None,
);
let clean_summary = ssa_summaries
.iter()
.find(|(k, _)| k.name == "cleanHtml")
.map(|(_, v)| v)
.unwrap_or_else(|| {
panic!(
"cleanHtml should have an SSA summary, got keys: {:?}",
ssa_summaries.keys().map(|k| &k.name).collect::<Vec<_>>()
)
});
assert!(
!clean_summary.param_to_return.is_empty(),
"cleanHtml should propagate param to return"
);
}
#[test]
fn ssa_interproc_container_store_summary() {
use crate::state::symbol::SymbolInterner;
let src = b"var express = require('express');\nvar app = express();\n\nfunction storeInto(value, arr) {\n arr.push(value);\n}\n\napp.get('/store', function(req, res) {\n var items = [];\n storeInto(req.query.input, items);\n res.send(items.join(''));\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let the_cfg = &file_cfg.first_body().graph;
let summaries = &file_cfg.summaries;
let interner = SymbolInterner::from_cfg(the_cfg);
let ssa_summaries = super::extract_intra_file_ssa_summaries(
the_cfg,
&interner,
Lang::JavaScript,
"test.js",
summaries,
None,
);
let store_summary = ssa_summaries
.iter()
.find(|(k, _)| k.name == "storeInto")
.map(|(_, v)| v)
.expect("storeInto should have an SSA summary");
assert!(
!store_summary.param_to_container_store.is_empty(),
"storeInto should have param_to_container_store (value stored into arr)"
);
assert_eq!(
store_summary.param_to_container_store,
vec![(0, 1)],
"param_to_container_store should map value(0) → arr(1)"
);
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"inter-procedural container store should produce a finding"
);
}
#[test]
fn ssa_induction_var_no_taint() {
let src = br#"
use std::{env, process::Command};
fn main() {
let data = env::var("INPUT").unwrap();
let mut i = 0;
while i < 10 {
i = i + 1;
}
Command::new("sh").arg(data).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert_eq!(
findings.len(),
1,
"induction var optimization: tainted source should still produce 1 finding"
);
}
#[test]
fn ssa_loop_tainted_var_not_induction() {
let src = br#"
use std::{env, process::Command};
fn main() {
let mut x = env::var("DANGEROUS").unwrap();
while x.len() < 100 {
x.push_str("a");
}
Command::new("sh").arg(x).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert_eq!(
findings.len(),
1,
"tainted var in loop (not induction) should still propagate"
);
}
#[test]
fn ssa_taint_through_loop_still_works() {
let src = br#"
use std::{env, process::Command};
fn main() {
let x = env::var("DANGEROUS").unwrap();
for _i in 0..10 {
let _unused = 1;
}
Command::new("sh").arg(x).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert_eq!(
findings.len(),
1,
"taint through loop should still produce 1 finding"
);
}
#[test]
fn ssa_validation_targets_specific_var() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
let config = env::var("CONFIG").unwrap();
if validate(x, config) {
Command::new("sh").arg(config).status().unwrap();
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(!findings.is_empty(), "should detect taint flow for config");
let config_finding = findings.iter().find(|f| !f.path_validated);
assert!(
config_finding.is_some(),
"config should NOT be marked as path_validated (only x is validated)"
);
}
#[test]
fn ssa_method_validation_target() {
use crate::taint::path_state::classify_condition_with_target;
let (kind, target) = classify_condition_with_target("x.isValid()");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn ssa_phi_path_sensitive_both_branches_validated() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if validate(&x) {
Command::new("sh").arg(&x).status().unwrap();
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 0, "validated finding should be suppressed");
}
#[test]
fn ssa_phi_path_sensitive_one_branch_not_validated() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if !validate(&x) {
Command::new("sh").arg(&x).status().unwrap();
}
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert_eq!(findings.len(), 1, "should detect taint flow");
assert!(
!findings[0].path_validated,
"finding should NOT be path_validated (sink in failed-validation branch)"
);
}
#[test]
fn ssa_reassignment_kills_taint_js() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/r', function(req, res) {\n var name = req.query.input;\n name = \"Guest\";\n eval(name);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"JS: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_reassignment_kills_taint_ts() {
let src =
b"function main() {\n let x = document.location();\n x = \"safe\";\n eval(x);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
let file_cfg = parse_lang(src, "typescript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::TypeScript,
"test.ts",
&[],
None,
);
assert!(
findings.is_empty(),
"TS: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_reassignment_kills_taint_python() {
let src = b"import os\ndef main():\n cmd = os.getenv(\"CMD\")\n cmd = \"safe\"\n os.system(cmd)\n";
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_lang(src, "python", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::Python,
"test.py",
&[],
None,
);
assert!(
findings.is_empty(),
"Python: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_reassignment_kills_taint_go() {
let src = b"package main\n\nimport \"os\"\nimport \"os/exec\"\n\nfunc main() {\n\tcmd := os.Getenv(\"CMD\")\n\tcmd = \"safe\"\n\texec.Command(cmd)\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_lang(src, "go", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Go, "test.go", &[], None);
assert!(
findings.is_empty(),
"Go: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_reassignment_kills_taint_java() {
let src = b"class Main {\n void main() {\n String cmd = System.getenv(\"CMD\");\n cmd = \"safe\";\n Runtime.exec(cmd);\n }\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
let file_cfg = parse_lang(src, "java", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::Java,
"test.java",
&[],
None,
);
assert!(
findings.is_empty(),
"Java: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_reassignment_kills_taint_php() {
let src = b"<?php\n$cmd = $_GET['cmd'];\n$cmd = \"safe\";\neval($cmd);\n";
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
let file_cfg = parse_lang(src, "php", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
assert!(
findings.is_empty(),
"PHP: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_reassignment_kills_taint_ruby() {
let src = b"def main\n cmd = gets()\n cmd = \"safe\"\n system(cmd)\nend\n";
let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
let file_cfg = parse_lang(src, "ruby", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Ruby, "test.rb", &[], None);
assert!(
findings.is_empty(),
"Ruby: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_reassignment_kills_taint_c() {
let src = b"#include <stdlib.h>\nvoid main() {\n char* cmd = getenv(\"CMD\");\n cmd = \"safe\";\n system(cmd);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
assert!(
findings.is_empty(),
"C: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_reassignment_kills_taint_cpp() {
let src = b"#include <cstdlib>\nvoid main() {\n char* cmd = std::getenv(\"CMD\");\n cmd = \"safe\";\n system(cmd);\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let file_cfg = parse_lang(src, "cpp", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
assert!(
findings.is_empty(),
"C++: reassignment to constant should kill taint, got {} findings",
findings.len()
);
}
#[test]
fn ssa_compound_preserves_taint_js() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/r', function(req, res) {\n var name = req.query.input;\n name = name + \" suffix\";\n eval(name);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"JS: compound assignment should preserve taint"
);
}
#[test]
fn ssa_compound_preserves_taint_python() {
let src = b"import os\ndef main():\n cmd = os.getenv(\"CMD\")\n cmd = cmd + \" safe\"\n os.system(cmd)\n";
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
let file_cfg = parse_lang(src, "python", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::Python,
"test.py",
&[],
None,
);
assert!(
!findings.is_empty(),
"Python: compound assignment should preserve taint"
);
}
#[test]
fn ssa_compound_preserves_taint_go() {
let src = b"package main\n\nimport \"os\"\nimport \"os/exec\"\n\nfunc main() {\n\tcmd := os.Getenv(\"CMD\")\n\tcmd = cmd + \" suffix\"\n\texec.Command(cmd)\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
let file_cfg = parse_lang(src, "go", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Go, "test.go", &[], None);
assert!(
!findings.is_empty(),
"Go: compound assignment should preserve taint"
);
}
#[test]
fn ssa_compound_preserves_taint_java() {
let src = b"class Main {\n void main() {\n String cmd = System.getenv(\"CMD\");\n cmd = cmd + \" safe\";\n Runtime.exec(cmd);\n }\n}\n";
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
let file_cfg = parse_lang(src, "java", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::Java,
"test.java",
&[],
None,
);
assert!(
!findings.is_empty(),
"Java: compound assignment should preserve taint"
);
}
#[test]
fn ssa_phi_preserves_taint_on_non_reassigned_path_js() {
let src = b"var express = require('express');\nvar app = express();\napp.get('/r', function(req, res) {\n var name = req.query.input;\n if (name.length > 10) {\n name = \"fallback\";\n }\n eval(name);\n});\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"JS: PHI merge should preserve taint from non-reassigned path"
);
}
#[test]
fn ssa_phi_preserves_taint_on_non_reassigned_path_rust() {
let src = br#"
use std::env; use std::process::Command;
fn main() {
let mut x = env::var("DANGEROUS").unwrap();
if x.len() > 5 {
x = "safe".to_string();
}
Command::new("sh").arg(&x).status().unwrap();
}"#;
let findings = ssa_analyse_rust(src);
assert!(
!findings.is_empty(),
"Rust: PHI merge should preserve taint from non-reassigned path"
);
}
#[test]
fn abstract_ssrf_prefix_linear_suppression() {
let src = b"var userId = document.location();\nvar prefix = 'https://api.example.com/users/';\nvar url = prefix + userId;\nfetch(url);\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"Linear SSRF prefix: 'https://api.example.com/users/' + userId should be \
suppressed by abstract string domain. Got {} findings.",
findings.len()
);
}
#[test]
fn abstract_phi_replay_ssrf_suppression() {
let src = b"var userId = document.location();\nvar prefix1 = 'https://api.example.com/users/';\nvar prefix2 = 'https://api.example.com/admins/';\nvar url;\nif (userId.length > 5) {\n url = prefix1 + userId;\n} else {\n url = prefix2 + userId;\n}\nfetch(url);\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"Abstract phi replay: both branches produce safe SSRF prefixes, \
phi merge should preserve the common prefix 'https://api.example.com/' \
and suppress the SSRF finding. Got {} findings.",
findings.len()
);
}
#[test]
fn ruby_type_check_guard_suppresses_taint() {
let src = b"def run_query(params)\n user_id = params[:id]\n unless user_id.is_a?(Integer)\n return \"bad input\"\n end\n connection.execute(\"SELECT * FROM users WHERE id = \" + user_id.to_s)\nend\n";
let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
let file_cfg = parse_lang(src, "ruby", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Ruby, "test.rb", &[], None);
assert!(
findings.is_empty(),
"Ruby: is_a?(Integer) type guard should suppress taint finding, got {} findings",
findings.len()
);
}
#[test]
fn rust_struct_literal_with_source_produces_source_caps() {
let src = br#"
use std::env;
struct Cfg { val: String }
fn make_cfg() -> Cfg {
Cfg { val: env::var("X").unwrap() }
}
"#;
let summaries = extract_summaries_from_bytes(src, "test.rs");
let make = summaries
.iter()
.find(|s| s.name == "make_cfg")
.expect("make_cfg should have a summary");
assert!(
make.source_caps != 0,
"make_cfg should have source_caps from env::var inside struct literal, got 0"
);
}
#[test]
fn rust_struct_constructor_source_flows_through_format_to_sink() {
let src = br#"
use std::env;
use std::process::Command;
use std::fs;
struct AppConfig {
db_url: String,
upload_dir: String,
}
fn load_config() -> AppConfig {
AppConfig {
db_url: env::var("DATABASE_URL").unwrap(),
upload_dir: env::var("UPLOAD_DIR").unwrap(),
}
}
fn handle_export() {
let config = load_config();
let dump_cmd = format!("pg_dump {}", config.db_url);
Command::new("sh").arg("-c").arg(&dump_cmd).output().unwrap();
let dump_path = format!("{}/export.sql", config.upload_dir);
fs::write(&dump_path, "data").unwrap();
}
"#;
let file_cfg = parse_rust(src);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::Rust,
"test.rs",
&[],
None,
);
assert!(
findings.len() >= 2,
"Expected >= 2 taint findings (Command::new + fs::write), got {}",
findings.len()
);
}
#[test]
fn ssa_format_macro_propagates_taint() {
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
let cmd = format!("echo {}", x);
Command::new("sh").arg("-c").arg(&cmd).output().unwrap();
}
"#;
let findings = ssa_analyse_rust(src);
assert_eq!(
findings.len(),
1,
"format! should propagate taint from env::var to Command::new sink"
);
}
#[test]
fn phi_validated_must_requires_all_paths() {
use crate::cfg::build_cfg;
use tree_sitter::Language;
let src = br#"
use std::env; use std::process::Command;
fn main() {
let x = env::var("INPUT").unwrap();
if some_condition() {
validate(&x);
}
Command::new("sh").arg(&x).status().unwrap();
}"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
let summaries = &file_cfg.summaries;
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
assert!(
!findings.is_empty(),
"B-2 regression: phi must NOT promote to validated_must when only \
one branch validates — sink should still fire"
);
}
#[test]
fn inline_return_constant_with_internal_source_produces_no_finding() {
use tree_sitter::Language;
let src = b"var child_process = require('child_process');\n\
var express = require('express');\n\
var app = express();\n\
\n\
function transform(input) {\n\
var internal = document.location();\n\
return 'constant_value';\n\
}\n\
\n\
app.get('/safe', function(req, res) {\n\
var result = transform(req.query.data);\n\
child_process.exec(result);\n\
});\n";
let lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert_eq!(
findings.len(),
0,
"C-1: transform() returns constant — internal source must not leak, got {} findings: {:?}",
findings.len(),
findings
.iter()
.map(|f| format!("{}→{}", f.source.index(), f.sink.index()))
.collect::<Vec<_>>()
);
}
#[test]
fn inline_return_taint_prefers_explicit_return_value() {
use tree_sitter::Language;
let src = b"var child_process = require('child_process');\n\
var express = require('express');\n\
var app = express();\n\
\n\
function passthrough(cmd) {\n\
return cmd;\n\
}\n\
\n\
app.get('/a', function(req, res) {\n\
var w = passthrough(req.query.cmd);\n\
child_process.exec(w);\n\
});\n";
let lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"C-1 regression: passthrough(tainted) should produce exactly 1 finding, got {}",
findings.len()
);
}
#[test]
fn inline_return_taint_internal_source_does_not_widen_caps() {
use tree_sitter::Language;
let src = b"var child_process = require('child_process');\n\
var express = require('express');\n\
var app = express();\n\
\n\
function withSideEffect(cmd) {\n\
var leaked = document.location();\n\
return cmd;\n\
}\n\
\n\
app.get('/a', function(req, res) {\n\
var r = withSideEffect(req.query.cmd);\n\
child_process.exec(r);\n\
});\n";
let lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert_eq!(
findings.len(),
1,
"C-1 regression: withSideEffect should produce exactly 1 finding (param flow), got {}",
findings.len()
);
}
#[test]
fn same_name_methods_distinct_func_keys() {
let src = br#"
class Sanitizer {
process(x) {
return escape(x);
}
}
class Worker {
process(x) {
eval(x);
}
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let (summaries, bodies) = super::extract_ssa_artifacts_from_file_cfg(
&file_cfg,
Lang::JavaScript,
"test.js",
&file_cfg.summaries,
None,
None,
);
let mut containers: Vec<String> = summaries
.keys()
.filter(|k| k.name == "process")
.map(|k| k.container.clone())
.collect();
containers.sort();
assert_eq!(
containers,
vec!["Sanitizer".to_string(), "Worker".to_string()],
"FuncKey-based keying must produce one `process` summary per container; \
got {containers:?} from {:?}",
summaries.keys().collect::<Vec<_>>(),
);
let mut body_containers: Vec<String> = bodies
.iter()
.filter(|(k, _)| k.name == "process")
.map(|(k, _)| k.container.clone())
.collect();
body_containers.sort();
assert_eq!(
body_containers,
vec!["Sanitizer".to_string(), "Worker".to_string()],
"callee-body cache must keep both same-name methods distinct; got {body_containers:?}",
);
for key in summaries.keys() {
assert!(
bodies.iter().any(|(bk, _)| bk == key),
"summary key {key:?} missing from callee-body map"
);
}
}
#[test]
fn same_name_same_arity_functions_distinct_func_keys() {
let src = br#"
function helper(x) {
return escape(x);
}
function helper(x) {
eval(x);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let (summaries, bodies) = super::extract_ssa_artifacts_from_file_cfg(
&file_cfg,
Lang::JavaScript,
"test.js",
&file_cfg.summaries,
None,
None,
);
let helper_keys: Vec<_> = summaries.keys().filter(|k| k.name == "helper").collect();
assert_eq!(
helper_keys.len(),
2,
"two same-name same-arity definitions must produce two distinct summary entries; \
got {} keys: {:?}",
helper_keys.len(),
helper_keys,
);
let disambigs: std::collections::HashSet<_> = helper_keys.iter().map(|k| k.disambig).collect();
assert_eq!(
disambigs.len(),
2,
"FuncKey.disambig should differ for colliding same-name same-arity defs",
);
let body_count = bodies.iter().filter(|(k, _)| k.name == "helper").count();
assert_eq!(body_count, 2, "callee-body cache must also keep both defs");
}
fn make_finding_for_link_test(
body_id: u32,
source_idx: usize,
sink_idx: usize,
path_hash: u64,
path_validated: bool,
) -> Finding {
Finding {
body_id: crate::cfg::BodyId(body_id),
sink: petgraph::graph::NodeIndex::new(sink_idx),
source: petgraph::graph::NodeIndex::new(source_idx),
path: Vec::new(),
source_kind: crate::labels::SourceKind::EnvironmentConfig,
path_validated,
guard_kind: None,
hop_count: 0,
cap_specificity: 0,
uses_summary: false,
flow_steps: Vec::new(),
symbolic: None,
source_span: None,
primary_location: None,
engine_notes: smallvec::SmallVec::new(),
path_hash,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
}
}
#[test]
fn finding_id_encodes_validation_and_path_hash() {
let v = make_finding_for_link_test(1, 3, 7, 0xabcd_1234_0000_0001, true);
let mut v = v;
v.finding_id = super::make_finding_id(&v);
assert!(
v.finding_id.ends_with("-v"),
"validated ID must end -v: {}",
v.finding_id
);
assert!(
v.finding_id.contains("abcd12340000"),
"hash component missing: {}",
v.finding_id
);
let mut u = make_finding_for_link_test(1, 3, 7, 0xabcd_1234_0000_0001, false);
u.finding_id = super::make_finding_id(&u);
assert!(
u.finding_id.ends_with("-u"),
"unvalidated ID must end -u: {}",
u.finding_id
);
assert_ne!(
v.finding_id, u.finding_id,
"validation status must disambiguate IDs"
);
let mut u2 = make_finding_for_link_test(1, 3, 7, 0xdead_beef_0000_0002, false);
u2.finding_id = super::make_finding_id(&u2);
assert_ne!(
u.finding_id, u2.finding_id,
"path_hash must disambiguate IDs"
);
}
#[test]
fn link_alternative_paths_cross_references_same_body_sink_source() {
let mut findings = vec![
make_finding_for_link_test(1, 3, 7, 0x1111, true),
make_finding_for_link_test(1, 3, 7, 0x2222, false),
];
for f in &mut findings {
f.finding_id = super::make_finding_id(f);
}
let v_id = findings[0].finding_id.clone();
let u_id = findings[1].finding_id.clone();
super::link_alternative_paths(&mut findings);
assert_eq!(
findings[0].alternative_finding_ids.as_slice(),
std::slice::from_ref(&u_id),
"validated finding must reference the unvalidated sibling",
);
assert_eq!(
findings[1].alternative_finding_ids.as_slice(),
std::slice::from_ref(&v_id),
"unvalidated finding must reference the validated sibling",
);
}
#[test]
fn link_alternative_paths_does_not_link_distinct_sink_source() {
let mut findings = vec![
make_finding_for_link_test(1, 3, 7, 0x1111, false),
make_finding_for_link_test(1, 3, 8, 0x1111, false),
make_finding_for_link_test(1, 4, 7, 0x1111, false),
make_finding_for_link_test(2, 3, 7, 0x1111, false),
];
for f in &mut findings {
f.finding_id = super::make_finding_id(f);
}
super::link_alternative_paths(&mut findings);
for (i, f) in findings.iter().enumerate() {
assert!(
f.alternative_finding_ids.is_empty(),
"finding {i} should have no alternatives; got {:?}",
f.alternative_finding_ids,
);
}
}
#[test]
fn link_alternative_paths_three_way_group() {
let mut findings = vec![
make_finding_for_link_test(1, 3, 7, 0x1111, true),
make_finding_for_link_test(1, 3, 7, 0x2222, false),
make_finding_for_link_test(1, 3, 7, 0x3333, false),
];
for f in &mut findings {
f.finding_id = super::make_finding_id(f);
}
let ids: Vec<String> = findings.iter().map(|f| f.finding_id.clone()).collect();
super::link_alternative_paths(&mut findings);
for (i, f) in findings.iter().enumerate() {
let expected: std::collections::HashSet<&String> = ids
.iter()
.enumerate()
.filter_map(|(j, id)| if i == j { None } else { Some(id) })
.collect();
let got: std::collections::HashSet<&String> = f.alternative_finding_ids.iter().collect();
assert_eq!(
got, expected,
"finding {i} must list every other sibling ID",
);
}
}
#[test]
fn typed_call_receivers_populated_for_constructor_typed_receiver() {
let src = br#"
class Reader {
void read() {
FileInputStream f = new FileInputStream("/etc/passwd");
f.close();
}
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
let file_cfg = parse_lang(src, "java", lang);
let (summaries, _bodies) = super::extract_ssa_artifacts_from_file_cfg(
&file_cfg,
Lang::Java,
"Reader.java",
&file_cfg.summaries,
None,
None,
);
let read_sum = summaries
.iter()
.find(|(k, _)| k.name == "read")
.map(|(_, s)| s)
.expect("read() summary must be extracted");
let containers: Vec<&str> = read_sum
.typed_call_receivers
.iter()
.map(|(_, c)| c.as_str())
.collect();
assert!(
containers.contains(&"FileHandle"),
"FileInputStream-typed receiver must surface as `FileHandle` container; got {:?}",
read_sum.typed_call_receivers,
);
}
#[test]
fn typed_call_receivers_skips_free_function_calls() {
let src = br#"
class Maker {
void make() {
new FileInputStream("/tmp/x");
}
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
let file_cfg = parse_lang(src, "java", lang);
let (summaries, _) = super::extract_ssa_artifacts_from_file_cfg(
&file_cfg,
Lang::Java,
"Maker.java",
&file_cfg.summaries,
None,
None,
);
let typed = summaries
.iter()
.find(|(k, _)| k.name == "make")
.map(|(_, s)| s.typed_call_receivers.clone())
.unwrap_or_default();
assert!(
typed.is_empty(),
"constructor-invocation Call has no receiver and must not surface a typed entry; \
got {typed:?}",
);
}
#[test]
fn cve_2025_64430_promise_executor_extracted_as_body() {
let src = br#"
const downloadFromUri = (uri) => {
return new Promise((res, rej) => {
http.get(uri, response => { response.on('data', () => {}); }).on('error', e => rej(e));
});
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let names: Vec<Option<String>> = file_cfg
.bodies
.iter()
.map(|b| b.meta.name.clone())
.collect();
assert!(
file_cfg.bodies.len() >= 3,
"expected at least 3 bodies (top-level + downloadFromUri + Promise executor), \
got {}: {:?}",
file_cfg.bodies.len(),
names
);
}
#[test]
fn cve_2025_64430_promise_wrapper_via_summary_param_to_sink() {
let src = br#"
const downloadFromUri = uri => {
return new Promise((res, rej) => {
http.get(uri, response => { response.on('data', () => {}); }).on('error', e => rej(e));
});
};
const handler = (req) => {
downloadFromUri(req.body.uri);
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected SSRF flow finding via Promise-wrapper summary; got 0",
);
}
#[test]
fn cve_2025_64430_promise_executor_sink_via_lexical_containment() {
let src = br#"
const f = (input) => {
return new Promise((res, rej) => {
http.get(input);
});
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected SSRF Sink finding in Promise executor capturing `input`; got 0",
);
}
#[test]
fn cve_2025_64430_wrapper_with_member_source_arg_fires() {
let src = br#"
const helper = (uri) => {
http.get(uri);
};
const handler = (req) => {
helper(req.body.uri);
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected at least one SSRF flow finding through wrapper; got 0",
);
}
#[test]
fn cve_2025_64430_two_hop_transitive_summary_propagation() {
let src = br#"
const downloadFromUri = uri => {
return new Promise((res, rej) => {
http.get(uri, response => { response.on('data', () => {}); }).on('error', e => rej(e));
});
};
const helper = file => {
downloadFromUri(file._source.uri);
};
const handler = (req) => {
helper(req.body);
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected SSRF flow finding via two-hop transitive summary propagation; got 0",
);
}
#[test]
fn cve_2025_64430_multiline_chained_get_classifies_inner_sink() {
let src = br#"
const downloadFromUri = uri => {
return new Promise((res, rej) => {
http
.get(uri, response => { response.on('data', () => {}); })
.on('error', e => rej(e));
});
};
const helper = file => {
downloadFromUri(file._source.uri);
};
const handler = (req) => {
helper(req.body);
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected SSRF flow finding through multi-line chained http.get; got 0",
);
}
#[test]
fn indirect_validator_narrowing_marks_arg_validated() {
let src = br#"
async function handler(req) {
const target = req.query.url;
const ssrfError = await validateUrlSsrf(target);
if (ssrfError) {
throw new Error('blocked');
}
await axios.get(target);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"validator narrowing should suppress direct-flow SSRF; got {} finding(s)",
findings.len()
);
}
#[test]
fn regex_test_allowlist_narrowing_clears_direct_flow() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
async function handler(req) {
const userValue = req.body.filter;
if (!SAFE_REGEX.test(userValue)) {
throw new Error('bad');
}
return await db.execute(userValue);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"regex.test allowlist narrowing should suppress direct-flow finding; got {} finding(s): {findings:?}",
findings.len()
);
}
#[test]
fn helper_with_validator_does_not_propagate_to_caller_via_summary() {
let src = br#"
async function getWebhookResponse(child) {
const ssrfError = await validateUrlSsrf(child.webhookUrl);
if (ssrfError) {
throw new Error('blocked');
}
return await axios.post(child.webhookUrl, {});
}
async function handler(req) {
const child = req.body.filter;
const r = await getWebhookResponse(child);
return r;
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"helper-with-validator should not propagate sink via summary; got {} finding(s)",
findings.len()
);
}
#[test]
fn helper_without_validator_still_propagates_to_caller_via_summary() {
let src = br#"
async function getWebhookResponse(child) {
return await axios.post(child.webhookUrl, {});
}
async function handler(req) {
const child = req.body.filter;
const r = await getWebhookResponse(child);
return r;
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"helper-without-validator must still flag the cross-fn SSRF path",
);
}
#[test]
fn validated_params_to_return_suppresses_one_hop_helper_validator() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value) => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return `safe:${value}`;
};
async function handler(req) {
const userValue = req.body.filter;
const sql = sanitize(userValue);
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"regex.test allowlist inside helper must suppress caller sink; got {} finding(s)",
findings.len()
);
}
#[test]
fn validated_params_to_return_suppresses_two_hop_helper_validator() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value) => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return value;
};
const buildQuery = (value) => {
const s = sanitize(value);
return s + '!';
};
async function handler(req) {
const userValue = req.body.filter;
const sql = buildQuery(userValue);
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"two-hop helper-validator must propagate validated_params_to_return through both helpers; got {} finding(s)",
findings.len()
);
}
#[test]
fn validated_params_to_return_does_not_suppress_unvalidated_helper() {
let src = br#"
const sanitize = (value) => {
return `safe:${value}`;
};
async function handler(req) {
const userValue = req.body.filter;
const sql = sanitize(userValue);
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"helper without regex guard must still flag the caller sink",
);
}
#[test]
fn validated_params_to_return_suppresses_destructured_object_arg_helper() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value) => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return value;
};
const buildQuery = ({ value }) => {
const s = sanitize(value);
return s + '!';
};
async function handler(req) {
const userValue = req.body.filter;
const sql = buildQuery({ value: userValue });
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"destructured object-pattern arg with regex.test allowlist inside the helper must suppress caller sink; got {} finding(s)",
findings.len()
);
}
#[test]
fn validated_params_to_return_suppresses_destructured_object_arg_helper_ts() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value: string): string => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return value;
};
const buildQuery = ({ value }: { value: string }): string => {
const s = sanitize(value);
return s + '!';
};
async function handler(req: any) {
const userValue = req.body.filter;
const sql = buildQuery({ value: userValue });
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
let file_cfg = parse_lang(src, "typescript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::TypeScript,
"test.ts",
&[],
None,
);
assert!(
findings.is_empty(),
"TS destructured object-pattern arg with regex.test allowlist must suppress caller sink; got {} finding(s)",
findings.len()
);
}
#[test]
fn destructured_sibling_validation_propagates_through_summary() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value) => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return value;
};
const buildQuery = ({ column, operator, value }) => {
return `${column} ${operator} ${sanitize(value)}`;
};
async function handler(req) {
const userValue = req.body.filter;
const sql = buildQuery({ column: 'col', operator: '=', value: userValue });
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"destructured-sibling validation (validator binds non-primary slot binding) must propagate through summary; got {} finding(s)",
findings.len()
);
}
#[test]
fn classify_input_validator_callee_polarity_buckets() {
use crate::ssa::type_facts::{InputValidatorPolarity, classify_input_validator_callee};
assert_eq!(
classify_input_validator_callee("validateUrlSsrf"),
Some(InputValidatorPolarity::ErrorReturning)
);
assert_eq!(
classify_input_validator_callee("verifyToken"),
Some(InputValidatorPolarity::ErrorReturning)
);
assert_eq!(
classify_input_validator_callee("validate_url"),
Some(InputValidatorPolarity::ErrorReturning)
);
assert_eq!(
classify_input_validator_callee("isValidUrl"),
Some(InputValidatorPolarity::BooleanTrueIsValid)
);
assert_eq!(
classify_input_validator_callee("is_valid_email"),
Some(InputValidatorPolarity::BooleanTrueIsValid)
);
assert_eq!(
classify_input_validator_callee("isSafe"),
Some(InputValidatorPolarity::BooleanTrueIsValid)
);
assert_eq!(classify_input_validator_callee("checkPermissions"), None);
assert_eq!(classify_input_validator_callee("is_authorized"), None);
assert_eq!(classify_input_validator_callee("randomThing"), None);
assert_eq!(
classify_input_validator_callee("validator.validateUrlSsrf"),
Some(InputValidatorPolarity::ErrorReturning)
);
}
#[test]
#[ignore]
fn cve_2025_64430_three_hop_transitive_documents_depth_limit() {
let src = br#"
const downloadFromUri = uri => {
return new Promise((res, rej) => {
http.get(uri, response => { response.on('data', () => {}); }).on('error', e => rej(e));
});
};
const helper = file => {
downloadFromUri(file._source.uri);
};
const middle = data => {
helper(data);
};
const handler = (req) => {
middle(req.body);
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let _findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
}
#[test]
fn cve_2023_22621_js_default_params_extracted() {
use crate::cfg::extract_param_meta_for_test;
let src = br#"
const sendTemplatedEmail = (emailOptions = {}, emailTemplate = {}, data = {}) => {
return emailTemplate;
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&lang).unwrap();
let tree = parser.parse(&src[..], None).unwrap();
let root = tree.root_node();
let mut arrow_node: Option<tree_sitter::Node> = None;
fn find<'a>(n: tree_sitter::Node<'a>, out: &mut Option<tree_sitter::Node<'a>>) {
if n.kind() == "arrow_function" {
*out = Some(n);
return;
}
let mut c = n.walk();
for ch in n.named_children(&mut c) {
find(ch, out);
if out.is_some() {
return;
}
}
}
find(root, &mut arrow_node);
let arrow = arrow_node.expect("arrow function not found");
let params = extract_param_meta_for_test(arrow, "javascript", src);
let names: Vec<String> = params.iter().map(|(n, _)| n.clone()).collect();
assert_eq!(
names,
vec![
"emailOptions".to_string(),
"emailTemplate".to_string(),
"data".to_string()
],
"expected all 3 default-valued arrow params extracted; got {:?}",
names
);
}
#[test]
fn cve_2023_22621_lodash_template_fires_on_tainted_input() {
let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
_.template(req.body.tpl);
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected taint flow on _.template(req.body.tpl); got 0 findings",
);
}
#[test]
fn cve_2023_22621_lodash_template_suppressed_by_evaluate_false() {
let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
_.template(req.body.tpl, { evaluate: false });
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"expected no taint flow when evaluate:false is set; got {} findings",
findings.len(),
);
}
#[test]
fn cve_2023_22621_lodash_template_double_call_inner_rebinding() {
let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
const tpl = req.body.tpl;
_.template(tpl)({});
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected taint flow via double-call chain rebinding; got 0 findings",
);
}